ED_Figure5A_plots.R 4.98 KB
Newer Older
Monah Abou Alezz's avatar
Monah Abou Alezz committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
suppressPackageStartupMessages(library(DESeq2))
suppressPackageStartupMessages(library(stringr))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(RColorBrewer))

## Input featureCounts file
fcount.file <- "Scarfo_HEC2023/BulkRNAseq_2/featureCounts_results_corrected.txt"
## Contrasts to perform
contr <- "DLL4_plus:CD32_plus"
contr.list <- unlist(strsplit(contr, ","))
## Design Formula
formula.str <- "condition"
design.str <- gsub(",", "+", formula.str)
num_cond <- str_count(formula.str, ",") + 1
design.formula <- as.formula(paste("~", design.str, sep = ""))
## Read featureCounts results
if (!file.exists(fcount.file)) {
  stop("Error: featureCounts file not found.")
} else {
  read.counts <- read.table(fcount.file, 
                            header = TRUE, 
                            check.names = FALSE) %>% 
    column_to_rownames(var = "Geneid") %>% 
    select(-c(1:5))
}
orig.names <- names(read.counts)
ds <- list()
cond <- list()
for (i in seq(orig.names[1:length(orig.names)])) {
  ds[i] <- unlist(strsplit(orig.names[i], ":"))[2]
  cond[i] <- unlist(strsplit(orig.names[i], ":"))[3]
}
ds <- unlist(ds)
cond <- unlist(cond)
## Samples - Conditions
sample_info <- as.data.frame(str_split_fixed(cond, ",", num_cond))
colnames(sample_info) <- str_split_fixed(formula.str, ",", num_cond)
rownames(sample_info) <- ds
colnames(read.counts) <- ds
sample_info$condition <- factor(sample_info$condition)
last_condition <- str_split_fixed(formula.str, ",", num_cond)[num_cond]
## thresholds
adj.pval.thr <- 0.05
logfc.thr <- 0
## DESeq2 
DESeq.ds <- DESeqDataSetFromMatrix(countData = read.counts,
                                   colData = sample_info,
                                   design = design.formula)
## Remove genes without any counts
DESeq.ds <- DESeq.ds[rowSums(counts(DESeq.ds)) > 0, ]
## RunDGE
DESeq.ds <- DESeq(DESeq.ds)
## obtain regularized log-transformed values
DESeq.rlog <- rlogTransformation(DESeq.ds, blind = TRUE)
ctrs <- unlist(strsplit(contr.list[1], ":"))
c1 <- ctrs[1]
c2 <- ctrs[2]
DGE.results <- results(DESeq.ds,
                       pAdjustMethod = "BH",
                       independentFiltering = TRUE,
                       contrast = c(last_condition, c1, c2),
                       alpha = adj.pval.thr)
# Change format
DGE.results.annot <- as.data.frame(DGE.results) %>% 
  dplyr::select(log2FoldChange, lfcSE, baseMean, pvalue, padj)
colnames(DGE.results.annot) <- c("logFC", "lfcSE", "baseMean", "PValue", "FDR")
## sort the results according to the adjusted p-value
DGE.results.sorted <- DGE.results.annot[order(DGE.results.annot$FDR), ]
## Subset for only significant genes
DGE.results.sig <- subset(DGE.results.sorted, 
                          FDR < adj.pval.thr & abs(logFC) > logfc.thr)
DGEgenes <- rownames(DGE.results.sig)

Monah Abou Alezz's avatar
Monah Abou Alezz committed
76

Monah Abou Alezz's avatar
Monah Abou Alezz committed
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# ED_Fig5A -----------------------------------------------------------------

## scorecard
EHT_scorecard = c("CDH5","RUNX1","PTPRC", "SPN","CD44","ITGA2B","HLF","GFI1","MLLT3","HOXA9","MKI67","NRP2","NR2F2","GJA4","HEY1","DLL4","CXCR4","SOX17","SMAD6","GJA5","HES4","MECOM","NID2","PRND","ESM1","PDGFB","PALMD","TMEM100","EDN1","LTBP4","HEY2","SULF1","IL33","CYP26B1","ADGRG6","COL23A1","GATA6","BMX","TMCC3","DKK1","AGTR2","FBN2","ELN","ALDH1A1","NKX2-3","PROCR","GATA3","GBP4","MYCN","KCNK17","MYB","STAT5A","SMIM24","RAB27B","SPINK2")
hm.mat_DGEgenes <- assay(DESeq.rlog)[DGEgenes, ]
counts_eht <- hm.mat_DGEgenes[c(rownames(hm.mat_DGEgenes) %in% EHT_scorecard), ]
eht_sig <- counts_eht[c(rownames(counts_eht) %in% row.names(DGE.results.sig)), ]
eht_sig <- eht_sig[,c(1,3,6,2,5,8)]
annot <- data.frame(row.names = colnames(eht_sig),
                    Sample = c(rep("DLL4+", 3), rep("CD32+", 3)),
                    Donor = rep(c("RS227","RS242","RS243"), 2))

annot_colors <- list(Sample=c("DLL4+" ="blueviolet",
                              "CD32+" = "gold1"),
                     Donor=c("RS227"="darkslategray3",
                             "RS242"="brown3",
                             "RS243"="limegreen"))

pheatmap::pheatmap(eht_sig,
                   scale = "row",
                   annotation_col = annot,
                   annotation_colors = annot_colors,
                   cluster_rows = TRUE,
                   cluster_cols = TRUE,
                   show_rownames = TRUE,
                   show_colnames = TRUE,
                   angle_col = 90,
                   legend = TRUE,
                   legend_breaks = c(-1.5,0,1.5), # legend breaks to be shown
                   legend_labels = c(-1.5,0,1.5), # legend labels to be shown
                   treeheight_row = 25*96/72, # size of row dendogram
                   treeheight_col = 15*96/72, # size of column dendogram
                   fontsize = 10*96/72,
                   fontsize_row = 8*96/72,
                   fontsize_col = 10*96/72,
                   cellwidth = 20*96/72,
                   cellheight = 8*96/72,
                   border_color = NA
)