suppressPackageStartupMessages(library(DESeq2)) suppressPackageStartupMessages(library(stringr)) suppressPackageStartupMessages(library(tidyverse)) suppressPackageStartupMessages(library(dplyr)) suppressPackageStartupMessages(library(pheatmap)) suppressPackageStartupMessages(library(RColorBrewer)) ## Input featureCounts file fcount.file <- "Scarfo_HEC2023/BulkRNAseq_2/featureCounts_results_corrected.txt" ## Contrasts to perform contr <- "DLL4_plus:CD32_plus" contr.list <- unlist(strsplit(contr, ",")) ## Design Formula formula.str <- "condition" design.str <- gsub(",", "+", formula.str) num_cond <- str_count(formula.str, ",") + 1 design.formula <- as.formula(paste("~", design.str, sep = "")) ## Read featureCounts results if (!file.exists(fcount.file)) { stop("Error: featureCounts file not found.") } else { read.counts <- read.table(fcount.file, header = TRUE, check.names = FALSE) %>% column_to_rownames(var = "Geneid") %>% select(-c(1:5)) } orig.names <- names(read.counts) ds <- list() cond <- list() for (i in seq(orig.names[1:length(orig.names)])) { ds[i] <- unlist(strsplit(orig.names[i], ":"))[2] cond[i] <- unlist(strsplit(orig.names[i], ":"))[3] } ds <- unlist(ds) cond <- unlist(cond) ## Samples - Conditions sample_info <- as.data.frame(str_split_fixed(cond, ",", num_cond)) colnames(sample_info) <- str_split_fixed(formula.str, ",", num_cond) rownames(sample_info) <- ds colnames(read.counts) <- ds sample_info$condition <- factor(sample_info$condition) last_condition <- str_split_fixed(formula.str, ",", num_cond)[num_cond] ## thresholds adj.pval.thr <- 0.05 logfc.thr <- 0 ## DESeq2 DESeq.ds <- DESeqDataSetFromMatrix(countData = read.counts, colData = sample_info, design = design.formula) ## Remove genes without any counts DESeq.ds <- DESeq.ds[rowSums(counts(DESeq.ds)) > 0, ] ## RunDGE DESeq.ds <- DESeq(DESeq.ds) ## obtain regularized log-transformed values DESeq.rlog <- rlogTransformation(DESeq.ds, blind = TRUE) ctrs <- unlist(strsplit(contr.list[1], ":")) c1 <- ctrs[1] c2 <- ctrs[2] DGE.results <- results(DESeq.ds, pAdjustMethod = "BH", independentFiltering = TRUE, contrast = c(last_condition, c1, c2), alpha = adj.pval.thr) # Change format DGE.results.annot <- as.data.frame(DGE.results) %>% dplyr::select(log2FoldChange, lfcSE, baseMean, pvalue, padj) colnames(DGE.results.annot) <- c("logFC", "lfcSE", "baseMean", "PValue", "FDR") ## sort the results according to the adjusted p-value DGE.results.sorted <- DGE.results.annot[order(DGE.results.annot$FDR), ] ## Subset for only significant genes DGE.results.sig <- subset(DGE.results.sorted, FDR < adj.pval.thr & abs(logFC) > logfc.thr) DGEgenes <- rownames(DGE.results.sig) # ED_Fig5A ----------------------------------------------------------------- ## scorecard EHT_scorecard = c("CDH5","RUNX1","PTPRC", "SPN","CD44","ITGA2B","HLF","GFI1","MLLT3","HOXA9","MKI67","NRP2","NR2F2","GJA4","HEY1","DLL4","CXCR4","SOX17","SMAD6","GJA5","HES4","MECOM","NID2","PRND","ESM1","PDGFB","PALMD","TMEM100","EDN1","LTBP4","HEY2","SULF1","IL33","CYP26B1","ADGRG6","COL23A1","GATA6","BMX","TMCC3","DKK1","AGTR2","FBN2","ELN","ALDH1A1","NKX2-3","PROCR","GATA3","GBP4","MYCN","KCNK17","MYB","STAT5A","SMIM24","RAB27B","SPINK2") hm.mat_DGEgenes <- assay(DESeq.rlog)[DGEgenes, ] counts_eht <- hm.mat_DGEgenes[c(rownames(hm.mat_DGEgenes) %in% EHT_scorecard), ] eht_sig <- counts_eht[c(rownames(counts_eht) %in% row.names(DGE.results.sig)), ] eht_sig <- eht_sig[,c(1,3,6,2,5,8)] annot <- data.frame(row.names = colnames(eht_sig), Sample = c(rep("DLL4+", 3), rep("CD32+", 3)), Donor = rep(c("RS227","RS242","RS243"), 2)) annot_colors <- list(Sample=c("DLL4+" ="blueviolet", "CD32+" = "gold1"), Donor=c("RS227"="darkslategray3", "RS242"="brown3", "RS243"="limegreen")) pheatmap::pheatmap(eht_sig, scale = "row", annotation_col = annot, annotation_colors = annot_colors, cluster_rows = TRUE, cluster_cols = TRUE, show_rownames = TRUE, show_colnames = TRUE, angle_col = 90, legend = TRUE, legend_breaks = c(-1.5,0,1.5), # legend breaks to be shown legend_labels = c(-1.5,0,1.5), # legend labels to be shown treeheight_row = 25*96/72, # size of row dendogram treeheight_col = 15*96/72, # size of column dendogram fontsize = 10*96/72, fontsize_row = 8*96/72, fontsize_col = 10*96/72, cellwidth = 20*96/72, cellheight = 8*96/72, border_color = NA )