Commit 1f7cc998 authored by Stefano Beretta's avatar Stefano Beretta
Browse files

Added paper plot script.

parent c0ca1c22
library(DEP)
library(dplyr)
library(openxlsx)
library(ReactomePA)
library(org.Hs.eg.db)
library(ggfortify)
library(stringr)
library(reshape2)
library(RColorBrewer)
library(clusterProfiler)
wdir <- "vavassori_lnp2022_proteomics"
out_dir <- paste(wdir, "paper_plots", sep = "/")
dir.create(out_dir, showWarnings = F)
data_dir <- paste(wdir, "paper_data", sep = "/")
design <- read.xlsx(xlsxFile = paste(data_dir, "ProteomicsDesign.xlsx", sep = "/"))
str(design)
# Day 4
d4_design <- design[4:18,]
d4_design$label
dd <- read.xlsx(xlsxFile = paste(data_dir, "proteinGroups_SR-TIGET_200112.xlsx", sep = "/"))
colnames(dd)
for (col in d4_design$label) {
dd[[col]] <- as.numeric(dd[[col]])
dd[[col]] <- 2^dd[[col]]
dd[[col]][is.na(dd[[col]])] <- 0
}
head(dd)
dd_unique <- make_unique(dd, "Gene.names", "Protein.IDs", delim = ";")
colnames(dd_unique)
str(dd_unique)
samp_id <- match(d4_design$label, colnames(dd_unique))
print(samp_id)
data_dd <- make_se(proteins_unique = dd_unique, columns = samp_id, expdesign = d4_design)
# Filter for proteins that are identified in all replicates of at least one condition
data_dd_filt <- filter_missval(data_dd, thr = 0)
# Normalize the data
data_dd_norm <- normalize_vsn(se = data_dd_filt)
# Impute missing data using random draws from a Gaussian distribution centered around a minimal value (for MNAR)
data_dd_imp <- impute(data_dd_norm, fun = "MinProb", q = 0.01)
n_top_genes <- 100
mat <- as.data.frame(data_dd_imp@assays@data@listData)
mat_var <- sort(apply(mat, 1, var), decreasing = T)
mat_filt <- mat[names(mat_var[1:n_top_genes]),]
ss <- limma::removeBatchEffect(x = as.data.frame(data_dd_imp@assays@data@listData), batch = data_dd_imp$replicate)
ss_var <- sort(apply(ss, 1, var), decreasing = T)
ss_filt <- ss[names(ss_var[1:n_top_genes]),]
sinfo <- data.frame(Condition = data_dd_imp$condition, Replicate = data_dd_imp$replicate)
rownames(sinfo) <- data_dd_imp$ID
sinfo <- mutate(sinfo,
Cond = case_when(sinfo$Condition == "UT_D4" ~ "UT",
sinfo$Condition == "UT_electro_D4" ~ "Mock Electro",
sinfo$Condition == "RNP_D4" ~ "RNP",
sinfo$Condition == "RNP_AAV6_D4" ~ "RNP+AAV6",
sinfo$Condition == "AAV6_D4" ~ "AAV6 only"))
sinfo$Condition <- sinfo$Cond
pca2 <- prcomp(x = t(ss_filt), center = TRUE,scale. = T)
pdf(file = paste(out_dir, "DEP-PCA-Day4-top100-batchcorr.pdf", sep = "/"), width = 9, height = 7)
autoplot(object = pca2, data = sinfo, label = FALSE, colour = 'Condition', size = 6) +
ggtitle(label = "Principal Component Analysis (PCA)",
subtitle = paste("Top", n_top_genes, "proteins (with batch correction)", sep = " ")) +
theme_bw(base_size = 20)
dev.off()
con = c("UT_electro_D4_vs_UT_D4",
"RNP_D4_vs_UT_electro_D4",
"RNP_D4_vs_UT_D4",
"AAV6_D4_vs_RNP_D4",
"AAV6_D4_vs_UT_electro_D4",
"AAV6_D4_vs_UT_D4",
"RNP_AAV6_D4_vs_AAV6_D4",
"RNP_AAV6_D4_vs_RNP_D4",
"RNP_AAV6_D4_vs_UT_electro_D4",
"RNP_AAV6_D4_vs_UT_D4")
for (gsea_val in c("_GSEApval")) {
for (gsea_db in c("gsea_HALLMARK")) {
hallmark_ds_order <- c("UT_electro_D4_vs_UT_D4", # UT electro vs UT
"RNP_D4_vs_UT_D4", # RNP vs UT
"RNP_AAV6_D4_vs_UT_D4", # RNP+AAV6 vs UT
"UT_electro_D4_vs_AAV6_D4", # old "AAV6_D4-UT_electro_D4", # Utelectro vs AAV6 !!!
"RNP_D4_vs_AAV6_D4", # old "AAV6_D4-RNP_D4", # RNP vs AAV6 !!!
"RNP_AAV6_D4_vs_AAV6_D4", # RNP+AAV6 vs AAV6
"RNP_D4_vs_UT_electro_D4", # RNP vs UT electro
"RNP_AAV6_D4_vs_UT_electro_D4", # RNP+AAV6 vs Utelectro
"RNP_AAV6_D4_vs_RNP_D4", # RNP+AAV6 vs RNP
"AAV6_D4_vs_UT_D4" # AAV6 vs UT
)
hallmark.full <- data.frame()
for (contr in con) {
print(contr)
ds.t <- read.xlsx(xlsxFile = paste(wdir, "results_DEP", paste0(contr, paste0(gsea_val, ".xlsx")), sep = "/"),
sheet = gsea_db)
if (nrow(ds.t) == 0) { next }
ds.t.filt <- ds.t[,c("Description", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
ds.t.filt$Dataset <- contr
if (nrow(hallmark.full) == 0) {
hallmark.full <- ds.t.filt
} else {
hallmark.full <- rbind(hallmark.full, ds.t.filt)
}
}
hallmark.full.filt <- hallmark.full[, c("Description", "NES", "Dataset")]
hallmark.full.filt$Description <- gsub(x = hallmark.full.filt$Description, "HALLMARK_", "")
for (dd in hallmark_ds_order) {
if(!dd %in% unique(hallmark.full.filt$Dataset)) {
newdd <- paste(as.character(str_split_fixed(dd, "_vs_", 2)[,2]), as.character(str_split_fixed(dd, "_vs_", 2)[,1]), sep = "_vs_")
if (newdd %in% unique(hallmark.full.filt$Dataset)) {
newvals <- -1 * hallmark.full.filt[which(hallmark.full.filt$Dataset == newdd), "NES"]
hallmark.full.filt[which(hallmark.full.filt$Dataset == newdd), "NES"] <- newvals
hallmark.full.filt[which(hallmark.full.filt$Dataset == newdd), "Dataset"] <- dd
}
}
}
head(hallmark.full.filt)
hallmark.order <- hallmark.full.filt %>% group_by(Description) %>% summarise(Pos = sum(NES))
hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "Description", drop = FALSE]
hallmark.full.filt.tt <- melt(reshape2::dcast(hallmark.full.filt, Description ~ Dataset, value.var="NES"), id.vars = c("Description"))
colnames(hallmark.full.filt.tt) <- c("Description", "Dataset", "NES")
hallmark.full.filt.tt$Description <- factor(hallmark.full.filt.tt$Description, levels = hallmark.order.terms$Description)
hallmark.full.filt.tt$Dataset <- gsub("UT_D4", "UT", hallmark.full.filt.tt$Dataset)
hallmark.full.filt.tt$Dataset <- gsub("UT_electro_D4", "Mock Electro", hallmark.full.filt.tt$Dataset)
hallmark.full.filt.tt$Dataset <- gsub("RNP_D4", "RNP", hallmark.full.filt.tt$Dataset)
hallmark.full.filt.tt$Dataset <- gsub("RNP_AAV6_D4", "RNP+AAV6", hallmark.full.filt.tt$Dataset)
hallmark.full.filt.tt$Dataset <- gsub("AAV6_D4", "AAV6 only", hallmark.full.filt.tt$Dataset)
hallmark.full.filt.tt$Dataset <- gsub("_vs_", " - ", hallmark.full.filt.tt$Dataset)
hallmark_ds_order <- gsub("UT_D4", "UT", hallmark_ds_order)
hallmark_ds_order <- gsub("UT_electro_D4", "Mock Electro", hallmark_ds_order)
hallmark_ds_order <- gsub("RNP_D4", "RNP", hallmark_ds_order)
hallmark_ds_order <- gsub("RNP_AAV6_D4", "RNP+AAV6", hallmark_ds_order)
hallmark_ds_order <- gsub("AAV6_D4", "AAV6 only", hallmark_ds_order)
hallmark_ds_order <- gsub("_vs_", " - ", hallmark_ds_order)
hallmark.full.filt.tt$Dataset <- factor(hallmark.full.filt.tt$Dataset, levels = hallmark_ds_order)
p.hallmark <- ggplot(hallmark.full.filt.tt, aes(x = Dataset, y = Description)) +
theme_bw(base_size = 20) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_tile(aes(fill = NES), colour = "white") +
scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100), na.value = "grey") +
ylab("") +
xlab("")
plot_height <- ifelse(length(levels(hallmark.full.filt.tt$Description)) < 50, 7, 15)
ggsave(filename = paste(out_dir, paste0("HM_D4_", gsea_db, gsea_val, ".pdf"), sep = "/"), plot = p.hallmark,
width = 10, height = plot_height, dpi = 150)
}
}
# Day 12
d12_design <- design[19:33,]
d12_design$label
dd <- read.xlsx(xlsxFile = paste(wdir, "proteinGroups_SR-TIGET_200112.xlsx", sep = "/"))
colnames(dd)
for (col in d12_design$label) {
dd[[col]] <- as.numeric(dd[[col]])
dd[[col]] <- 2^dd[[col]]
dd[[col]][is.na(dd[[col]])] <- 0
}
head(dd)
dd_unique <- make_unique(dd, "Gene.names", "Protein.IDs", delim = ";")
colnames(dd_unique)
str(dd_unique)
samp_id <- match(d12_design$label, colnames(dd_unique))
print(samp_id)
data_dd <- make_se(proteins_unique = dd_unique, columns = samp_id, expdesign = d12_design)
# Filter for proteins that are identified in all replicates of at least one condition
data_dd_filt <- filter_missval(data_dd, thr = 0)
# Normalize the data
data_dd_norm <- normalize_vsn(se = data_dd_filt)
# Impute missing data using random draws from a Gaussian distribution centered around a minimal value (for MNAR)
data_dd_imp <- impute(data_dd_norm, fun = "MinProb", q = 0.01)
mat <- as.data.frame(data_dd_imp@assays@data@listData)
mat_var <- sort(apply(mat, 1, var), decreasing = T)
mat_filt <- mat[names(mat_var[1:n_top_genes]),]
ss <- limma::removeBatchEffect(x = as.data.frame(data_dd_imp@assays@data@listData), batch = data_dd_imp$replicate)
ss_var <- sort(apply(ss, 1, var), decreasing = T)
ss_filt <- ss[names(ss_var[1:n_top_genes]),]
sinfo <- data.frame(Condition = data_dd_imp$condition, Replicate = data_dd_imp$replicate)
rownames(sinfo) <- data_dd_imp$ID
sinfo <- mutate(sinfo,
Cond = case_when(sinfo$Condition == "UT_D12" ~ "UT",
sinfo$Condition == "UT_electro_D12" ~ "Mock Electro",
sinfo$Condition == "RNP_D12" ~ "RNP",
sinfo$Condition == "RNP_AAV6_D12" ~ "RNP+AAV6",
sinfo$Condition == "AAV6_D12" ~ "AAV6 only"))
sinfo$Condition <- sinfo$Cond
pca2 <- prcomp(x = t(ss_filt), center = TRUE,scale. = T)
pdf(file = paste(out_dir, paste0("DEP-PCA-Day12-top", n_top_genes, "-batchcorr.pdf"), sep = "/"), width = 9, height = 7)
autoplot(object = pca2, data = sinfo, label = FALSE, colour = 'Condition', size = 6) +
ggtitle(label = "Principal Component Analysis (PCA)",
subtitle = paste("Top", n_top_genes, "proteins (with batch correction)", sep = " ")) +
theme_bw(base_size = 20)
dev.off()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment