Commit 391e802f authored by Stefano Beretta's avatar Stefano Beretta
Browse files

Added paper plot script.

parent bf0bc939
library(ggplot2)
library(DESeq2)
library(edgeR)
library(RColorBrewer)
library(dplyr)
library(scales)
library(reshape2)
library(stringr)
library(VennDiagram)
library(clusterProfiler)
library(ComplexHeatmap)
wdir <- "Vavassori_LNP2022_RNAseq_electro"
data_dir <- paste(wdir, "paper_data", sep = "/")
out_dir <- paste(wdir, "paper_plots", sep = "/")
dir.create(out_dir)
# PCA Day 4
dge_d4 <- readRDS(paste(data_dir, "Day4_DGE.rds", sep = "/"))
DESeq.rlog <- rlogTransformation(dge_d4$deseq$ds, blind = TRUE)
rlog.norm.counts <- assay(DESeq.rlog)
DESeq.rlog_corr <- DESeq.rlog
assay(DESeq.rlog_corr) <- limma::removeBatchEffect(assay(DESeq.rlog_corr), DESeq.rlog_corr$donor)
# UT, Mock Electro, RNP, RNP+AAV6, AAV6 only
cdata <- as.data.frame(DESeq.rlog_corr@colData)
cdata <- mutate(cdata,
Condition = case_when(cdata$condition == "UT_D4" ~ "UT",
cdata$condition == "UT_electro_D4" ~ "Mock Electro",
cdata$condition == "RNP_D4" ~ "RNP",
cdata$condition == "RNP_AAV6_D4" ~ "RNP+AAV6",
cdata$condition == "AAV6_D4" ~ "AAV6 only"))
DESeq.rlog_corr@colData$condition <- cdata$Condition
i <- 500
P <- plotPCA(DESeq.rlog_corr, intgroup = "condition", ntop = i) +
theme_bw(base_size = 20) +
ggtitle(label = "Principal Component Analysis (PCA)",
subtitle = paste("Top", i, "genes (with batch correction)", sep = " ")) +
geom_point(size = 6) +
scale_color_manual(name = "Condition", values = hue_pal()(length(unique(cdata$Condition))))
#coord_fixed(ratio=3)
ggsave(filename = paste(out_dir, paste0("DESeq-PCA-Day4-ntop", i, "-batchcorr.png"), sep = "/"),
plot = P, height = 10, width = 10, dpi = 150)
# Venn DEGs
dge_thr <- 0.05
contr_degs <- list()
for (contr in c("UT_electro_D4-UT_D4", "RNP_D4-UT_D4", "RNP_AAV6_D4-UT_D4", "AAV6_D4-UT_D4")) {
print(contr)
res <- subset(dge_d4$deseq$dge_res[[contr]], FDR < dge_thr)
print(nrow(res))
contr_degs[[contr]] <- rownames(res)
}
venn.diagram(contr_degs,
category.names = c("Mock Electro", "RNP", "RNP+AAV6", "AAV6 only"),
col = c("#A3A500","#00BF7D","#00B0F6","#F8766D"),
fill = c(alpha("#A3A500", 0.5), alpha("#00BF7D", 0.5),alpha("#00B0F6",0.5),alpha("#F8766D",0.5)),
cex = 1.2,
cat.cex = 1,
output=TRUE,
cat.fontface = "bold",
cat.default.pos = "outer",
cat.pos = c(-10, 10, -10, 0),
cat.dist = c(0.2, 0.2, 0.09, 0.08),
imagetype="png" ,
height = 1200 ,
width = 1200,
resolution = 200,
compression = "lzw",
filename = paste(out_dir, paste0("DESeq-Day4-contrUT-FDR", dge_thr, ".png"), sep = "/"))
# Volcano
for (contr in c("UT_electro_D4-UT_D4", "RNP_D4-UT_D4", "RNP_AAV6_D4-UT_D4", "AAV6_D4-UT_D4", "RNP_D4-UT_electro_D4", "RNP_AAV6_D4-UT_electro_D4")) {
print(contr)
dge_res <- dge_d4$deseq$dge_res[[contr]]
data <- data.frame(gene = row.names(dge_res), pval = -log10(dge_res$FDR), lfc = dge_res$logFC, expr = log10(dge_res$baseMean))
data <- na.omit(data)
data <- mutate(data, color = case_when(data$lfc > dge_d4$logfc.thr & data$pval > -log10(dge_d4$adj.pval.thr) ~ "Overexpressed",
data$lfc < -dge_d4$logfc.thr & data$pval > -log10(dge_d4$adj.pval.thr) ~ "Underexpressed",
abs(data$lfc) < dge_d4$logfc.thr | data$pval < -log10(dge_d4$adj.pval.thr) ~ "NonSignificant"))
data <- data[order(data$pval, decreasing = TRUE),]
num_up <- nrow(subset(data, color == "Overexpressed"))
num_down <- nrow(subset(data, color == "Underexpressed"))
grob_up <- grobTree(textGrob(paste0("Num. Overexpr.: ", num_up),
x = 0.95, y = 0.75, hjust = 1,
gp=gpar(col="#CD4F39", fontsize = 26, fontface="bold")))
grob_down <- grobTree(textGrob(paste0("Num. Underexpr.: ", num_down),
x = 0.95, y = 0.1, hjust = 1,
gp=gpar(col="#008B00", fontsize = 26, fontface="bold")))
map <- ggplot(data, aes(x = expr, y = lfc, colour = color)) +
theme_bw(base_size = 22) +
theme(legend.position = c(0.85,0.9),
legend.background = element_rect(color = "black"),
plot.margin = unit(c(1,1,1,1), "cm")) +
xlab("Mean Expression") +
ylab("log2 Fold Change") +
scale_x_continuous(breaks = seq(0, 6, 1), labels = format(10^seq(0, 6, 1), scientific = T)) +
geom_point(data = subset(data, color == "NonSignificant"), size = 2, alpha = 0.8, na.rm = T) +
geom_point(data = subset(data, color != "NonSignificant"), size = 2, alpha = 0.8, na.rm = T) +
scale_color_manual(name = "Expression",
values = c(Overexpressed = "#CD4F39",
Underexpressed = "#008B00",
NonSignificant = "darkgray")) +
annotation_custom(grob_up) +
annotation_custom(grob_down)
ggsave(filename = paste(out_dir, paste("DESeq", contr, "MAplot.png", sep = "-"), sep = "/"),
plot = map, width = 12, height = 9, dpi = 150)
ggsave(filename = paste(out_dir, paste("DESeq", contr, "MAplot.pdf", sep = "-"), sep = "/"),
plot = map, width = 12, height = 9, dpi = 150)
}
# HM p53/inflammation/apoptosis/TNFa-NFKB
h_gmt <- read.gmt("h.all.v7.2.symbols.gmt")
dge_thr <- 0.05
contr_degs <- c()
for (contr in c("UT_electro_D4-UT_D4", "RNP_D4-UT_D4", "RNP_AAV6_D4-UT_D4", "AAV6_D4-UT_D4")) {
print(contr)
res <- subset(dge_d4$deseq$dge_res[[contr]], FDR < dge_thr)
print(nrow(res))
contr_degs <- unique(c(contr_degs, rownames(res)))
}
expr <- dge_d4$deseq$rlog_corr
expr_degs <- expr[contr_degs, ]
sinfo <- dge_d4$sample_info
sinfo <- mutate(sinfo,
Condition = case_when(sinfo$condition == "UT_D4" ~ "UT",
sinfo$condition == "UT_electro_D4" ~ "Mock Electro",
sinfo$condition == "RNP_D4" ~ "RNP",
sinfo$condition == "RNP_AAV6_D4" ~ "RNP+AAV6",
sinfo$condition == "AAV6_D4" ~ "AAV6 only"))
sinfo$donor <- NULL
sinfo$condition <- NULL
cols <- hue_pal()(5)
names(cols) <- sort(unique(sinfo$Condition))
sinfo$Condition <- factor(sinfo$Condition, levels = c("UT","AAV6 only","Mock Electro","RNP","RNP+AAV6"))
ha_col <- columnAnnotation(df = sinfo, col = list(Condition = cols), annotation_name_side = "right",
show_legend = c(Condition = FALSE), show_annotation_name = c(Condition = FALSE))
clu_cols = list("HALLMARK_TNFA_SIGNALING_VIA_NFKB" = c("forestgreen", "red", "white"),
"HALLMARK_APOPTOSIS" = c("white", "forestgreen", "red"),
"HALLMARK_P53_PATHWAY" = c("white", "white", "forestgreen"),
"HALLMARK_INFLAMMATORY_RESPONSE" = c("forestgreen", "white", "white"))
clu_labs = list("HALLMARK_TNFA_SIGNALING_VIA_NFKB" = c("Cluster1", "Cluster2", ""),
"HALLMARK_APOPTOSIS" = c("", "Cluster1", "Cluster2"),
"HALLMARK_P53_PATHWAY" = c("", "", "Cluster1"),
"HALLMARK_INFLAMMATORY_RESPONSE" = c("Cluster1", "", ""))
for (tt in c("HALLMARK_TNFA_SIGNALING_VIA_NFKB", "HALLMARK_APOPTOSIS", "HALLMARK_P53_PATHWAY", "HALLMARK_INFLAMMATORY_RESPONSE")) {
print(tt)
gmt_term <- subset(h_gmt, term == tt)
term_genes <- gmt_term$gene[gmt_term$gene %in% rownames(expr)]
term_expr <- expr[term_genes,]
mat <- as.matrix(t(scale(t(term_expr))))
head(mat)
hm = Heatmap(matrix = mat,
name = "Expr.",
col = colorRampPalette(rev(brewer.pal(n = 9, name ="RdBu")))(100),
cluster_columns = F,
cluster_rows = TRUE,
rect_gp = gpar(col = "grey", lwd = 0),
border = FALSE,
show_row_names = F,
show_column_names = FALSE,
column_title_gp = gpar(fontsize = 18),
column_split = sinfo$Condition,
column_gap = unit(2.5, "mm"),
row_split = 3,
row_title_gp = gpar(fontsize = 0),
row_gap = unit(2, "mm"),
top_annotation = ha_col,
right_annotation = rowAnnotation(foo = anno_block(gp = gpar(fill = clu_cols[[tt]], col = 0),
labels = clu_labs[[tt]],
labels_gp = gpar(col = "black", fontsize = 10)))
)
png(paste(out_dir, paste0(tt, "_HM_full.png"), sep = "/"), width = 1300, height = 1500, res = 150)
draw(hm, merge_legends = T)
dev.off()
# Save row-clustering info
r.dend <- row_dend(hm) #Extract row dendrogram
rcl.list <- row_order(hm) #Extract clusters (output is a list)
lapply(rcl.list, function(x) length(x)) #check/confirm size clusters
# loop to extract genes for each cluster.
for (i in 1:length(row_order(hm))){
if (i == 1) {
clu <- t(t(row.names(mat[row_order(hm)[[i]],])))
out <- cbind(clu, paste("cluster", i, sep=""))
colnames(out) <- c("GeneID", "Cluster")
} else {
clu <- t(t(row.names(mat[row_order(hm)[[i]],])))
clu <- cbind(clu, paste("cluster", i, sep=""))
out <- rbind(out, clu)
}
}
write.table(x = out, file = paste(out_dir, paste0(tt, "_HM_full-rowCluster.tsv"), sep = "/"),
quote = F, row.names = F, col.names = T, sep = "\t")
}
# Heatmap Enrichment Hallmark (pos e neg) Day 4
hallmark_ds_order <- c("AAV6_D4-UT_D4",
"UT_electro_D4-UT_D4",
"RNP_D4-UT_D4",
"RNP_AAV6_D4-UT_D4",
"UT_electro_D4-AAV6_D4",
"RNP_D4-AAV6_D4",
"RNP_AAV6_D4-AAV6_D4",
"RNP_D4-UT_electro_D4",
"RNP_AAV6_D4-UT_electro_D4",
"RNP_AAV6_D4-RNP_D4"
)
enr_data <- c("pos" = "enr-h.all.v7.2.symbols-pos.txt", "neg" = "enr-h.all.v7.2.symbols-neg.txt")
hallmark.full <- data.frame()
for (gd in names(enr_data)) {
for (ds in hallmark_ds_order) {
#print(ds)
ff <- paste(data_dir, paste(ds, enr_data[[gd]], sep = "-"), sep = "/")
if (file.info(ff)$size < 10) { next }
ds.t <- read.table(ff, header = T, sep = "\t")
if (nrow(ds.t) == 0) { next }
ds.t.filt <- ds.t[,c("ID","Description","GeneRatio","BgRatio","pvalue","p.adjust","qvalue","geneID","Count"), drop = FALSE]
ds.t.filt <- ds.t.filt[!is.na(ds.t.filt$p.adjust) & ds.t.filt$Description != "HALLMARK_HEME_METABOLISM",]
if (nrow(ds.t.filt) == 0) { next }
ds.t.filt$Dataset <- paste0(ds, ":", gd)
if (nrow(hallmark.full) == 0) {
hallmark.full <- ds.t.filt
} else {
hallmark.full <- rbind(hallmark.full, ds.t.filt)
}
}
}
hallmark.full.filt <- hallmark.full[, c("Description", "p.adjust", "Dataset")]
hallmark.full.filt$Description <- gsub(x = hallmark.full.filt$Description, "HALLMARK_", "")
hallmark.order <- hallmark.full.filt %>% group_by(Description) %>% summarise(Pos = median(p.adjust, na.rm = T))
hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = TRUE), "Description", drop = FALSE]
hallmark.full.filt.tt <- melt(reshape2::dcast(hallmark.full.filt, Description ~ Dataset, value.var = "p.adjust"), id.vars = c("Description"))
colnames(hallmark.full.filt.tt) <- c("Description", "Dataset", "p.adjust")
hallmark.full.filt.tt$Description <- factor(hallmark.full.filt.tt$Description, levels = hallmark.order.terms$Description)
hallmark.full.filt.tt$Enr <- str_split_fixed(hallmark.full.filt.tt$Dataset, ":", 2)[,2]
hallmark.full.filt.tt$Contr <- str_split_fixed(hallmark.full.filt.tt$Dataset, ":", 2)[,1]
unique(hallmark.full.filt.tt$Contr)
hallmark.full.filt.tt$Contr <- factor(hallmark.full.filt.tt$Contr, levels = hallmark_ds_order)
hallmark.full.filt.tt <- mutate(hallmark.full.filt.tt, pval = ifelse(hallmark.full.filt.tt$Enr == "pos",
-log10(hallmark.full.filt.tt$p.adjust),
-1 * (-log10(hallmark.full.filt.tt$p.adjust))))
hallmark.full.filt.tt$pval2 <- case_when(hallmark.full.filt.tt$pval > 10 ~ 10,
hallmark.full.filt.tt$pval < -10 ~ -10,
hallmark.full.filt.tt$pval <= 10 & hallmark.full.filt.tt$pval > -10 ~ hallmark.full.filt.tt$pval)
hallmark.full.filt.tt$Dataset <- gsub("UT_D4", "UT", hallmark.full.filt.tt$Dataset)
hallmark.full.filt.tt$Dataset <- gsub("UT_electro_D4", "Mock Electro", hallmark.full.filt.tt$Dataset)
hallmark.full.filt.tt$Dataset <- gsub("RNP_D4", "RNP", hallmark.full.filt.tt$Dataset)
hallmark.full.filt.tt$Dataset <- gsub("RNP_AAV6_D4", "RNP+AAV6", hallmark.full.filt.tt$Dataset)
hallmark.full.filt.tt$Dataset <- gsub("AAV6_D4", "AAV6 only", hallmark.full.filt.tt$Dataset)
head(hallmark.full.filt.tt)
p.hallmark <- ggplot(hallmark.full.filt.tt, aes(x = Dataset, y = Description)) +
theme_bw(base_size = 20) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
strip.text.x = element_blank(),
legend.title = element_text(size = 16),
legend.position = c(-0.4,-0.2),
legend.background = element_rect(color = "black"),
panel.spacing = unit(.3, "lines")) +
geom_tile(aes(fill = pval2), colour = "white") +
scale_fill_gradientn(name = "-log10(p.adj)*sign(Enr)", colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(20), na.value = "grey") +
ylab("") +
xlab("") +
facet_grid(.~Contr, scales = "free_x", space = "free_x")
ggsave(filename = paste(out_dir, "HALLMARK-Day4-enr-heatmap.pdf", sep = "/"), plot = p.hallmark,
width = 13, height = 8, dpi = 300)
# PCA Day12
dge_d12 <- readRDS(paste(data_dir, "Day12_DGE.rds", sep = "/"))
DESeq.rlog <- rlogTransformation(dge_d12$deseq$ds, blind = TRUE)
rlog.norm.counts <- assay(DESeq.rlog)
DESeq.rlog_corr <- DESeq.rlog
assay(DESeq.rlog_corr) <- limma::removeBatchEffect(assay(DESeq.rlog_corr), DESeq.rlog_corr$donor)
# UT, Mock Electro, RNP, RNP+AAV6, AAV6 only
cdata <- as.data.frame(DESeq.rlog_corr@colData)
cdata <- mutate(cdata,
Condition = case_when(cdata$condition == "UT_D12" ~ "UT",
cdata$condition == "UT_electro_D12" ~ "Mock Electro",
cdata$condition == "RNP_D12" ~ "RNP",
cdata$condition == "RNP_AAV6_D12" ~ "RNP+AAV6",
cdata$condition == "AAV6_D12" ~ "AAV6 only"))
DESeq.rlog_corr@colData$condition <- cdata$Condition
i <- 500
P <- plotPCA(DESeq.rlog_corr, intgroup = "condition", ntop = i) +
theme_bw(base_size = 20) +
ggtitle(label = "Principal Component Analysis (PCA)",
subtitle = paste("Top", i, "genes (with batch correction)", sep = " ")) +
geom_point(size = 6) +
scale_color_manual(name = "Condition", values = hue_pal()(length(unique(cdata$Condition)))) +
coord_fixed(ratio = 1.4)
ggsave(filename = paste(out_dir, paste0("DESeq-PCA-Day12-ntop", i, "-batchcorr.png"), sep = "/"),
plot = P, height = 10, width = 10, dpi = 150)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment