library(readxl) library(ggplot2) library(ggrepel) library(dplyr) library(fgsea) library(clusterProfiler) library(enrichplot) ##### Directories ##### #us <- "/Users/Squadrito/" us <-"C:/Users/bresesti.chiara/" wdir<-paste0(us,"/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/05-DGE-NoOut-Corr") wdir_CB<-paste0(us, "/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/Analysis CB_v2") fdir <- paste0(us,"/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures") #### Figure 3A&B #### #Import df setwd(wdir) miR_ctrl <- read_excel("edgeR_results.xlsx", sheet = "miR342-control") sponge_ctrl <- read_excel("edgeR_results.xlsx", sheet = "spongeBT-control") #Add DEG color and label for volcano plot miR_ctrl$DEG <- "NO" miR_ctrl$DEG[miR_ctrl$logFC > 1 & miR_ctrl$PValue < 0.05] <- "UP" miR_ctrl$DEG[miR_ctrl$logFC < (-1) & miR_ctrl$PValue < 0.05] <- "DOWN" miR_ctrl$DEG_label <- NA miR_ctrl$DEG_label[miR_ctrl$DEG != "NO"] <- miR_ctrl$...1[miR_ctrl$DEG != "NO"] sponge_ctrl$DEG <- "NO" sponge_ctrl$DEG[sponge_ctrl$logFC > 1 & sponge_ctrl$PValue < 0.05] <- "UP" sponge_ctrl$DEG[sponge_ctrl$logFC < (-1) & sponge_ctrl$PValue < 0.05] <- "DOWN" sponge_ctrl$DEG_label <- NA sponge_ctrl$DEG_label[sponge_ctrl$DEG != "NO"] <- sponge_ctrl$...1[sponge_ctrl$DEG != "NO"] #Volcano plot (left panel) data <- miR_ctrl ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) + geom_point() + theme_minimal() + geom_text_repel() + scale_color_manual(values=c("blue", "grey", "red")) + geom_vline(xintercept=c(-1, 1), col="black") + geom_hline(yintercept=-log10(0.05), col="black") + scale_x_continuous(limits = c(-4.1,4.1), breaks = seq(-4,4,1)) + scale_y_continuous(limits = c(0,20)) + xlab(bquote(Log[2](FC))) + ylab(bquote(-Log[10](PVal))) data <- sponge_ctrl ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) + geom_point() + theme_minimal() + geom_text_repel() + scale_color_manual(values=c("blue", "grey", "red")) + geom_vline(xintercept=c(-1, 1), col="black") + geom_hline(yintercept=-log10(0.05), col="black") + scale_x_continuous(limits = c(-4.1,4.1), breaks = seq(-4,4,1)) + scale_y_continuous(limits = c(0,20)) + xlab(bquote(Log[2](FC))) + ylab(bquote(-Log[10](PVal))) #Import miR-342-3p target list (from TargetScan) setwd(wdir_CB) miR342_targets <- read_excel("TargetScan8.0__miR-342-3p.predicted_targets.xlsx") miR342_targets <- filter(miR342_targets, miR342_targets$`Cumulative weighted context++ score`< (-0.3)) #ecdf plot (right panel) data <- miR_ctrl test <-filter(data, data$...1 %in% miR342_targets$'Target gene') plot(ecdf(data$logFC), lwd = 2, do.points=F, verticals=T, ylab = "Fraction of DEG", xlab = bquote(Log[2](FC)), xlim=c(-0.5,0.5), ylim=c(0,1), main="ECDF of gene expression: miR vs mut") + lines(ecdf(test$logFC), col= "blue", do.points=F, lwd = 2, verticals=T) + abline(v=0, col="black") + abline(h=0.5, col="black") + legend("bottomright", c("All genes","miR-342-3p targets"), col = c("black","blue"), lwd=2, cex = 0.7) ks.test(test$logFC, data$logFC, alternative = "g") #Kolmogorov-Smirnov test to calculate p-val of miR target distribution *not* greater than average data data <- sponge_ctrl test <-filter(data, data$...1 %in% miR342_targets$'Target gene') plot(ecdf(data$logFC), lwd = 2, do.points=F, verticals=T, ylab = "Fraction of DEG", xlab = bquote(Log[2](FC)), xlim=c(-0.3,0.3), ylim=c(0,1), main="ECDF of gene expression: sponge vs scr") + lines(ecdf(test$logFC), col= "red", do.points=F, lwd = 2, verticals=T) + abline(v=0, col="black") + abline(h=0.5, col="black") + legend("bottomright", c("All genes","miR-342-3p targets"), col = c("black","red"), lwd=2, cex = 0.7) ks.test(test$logFC, data$logFC, alternative = "l") #Kolmogorov-Smirnov test to calculate p-val of miR target distribution *not* less than average data #### Figure 3C&D #### #Upload df setwd(wdir) df1<- read_excel("EdgeR_results.xlsx",, sheet = "miR342-control") setwd(wdir_CB) miDB_sig5 <- readRDS("miDB_sig5.MLS.rds") #GO terms db #Rename pathways of interest in miDB_sig5 names(miDB_sig5)[names(miDB_sig5) == "HALLMARK_OXIDATIVE_PHOSPHORYLATION"] <- "Oxydative phosphorylation" names(miDB_sig5)[names(miDB_sig5) == "GOBP_REGULATION_OF_CHOLESTEROL_METABOLIC_PROCESS"] <- "Regulation of cholesterol metabolic process" names(miDB_sig5)[names(miDB_sig5) == "GOBP_RESPONSE_TO_INTERLEUKIN_12"] <- "Response to IL12" names(miDB_sig5)[names(miDB_sig5) == "GOBP_REGULATION_OF_LIPID_BIOSYNTHETIC_PROCESS"] <- "Regulation of lipid biosynthetic process" names(miDB_sig5)[names(miDB_sig5) == "GOMF_MHC_CLASS_I_PROTEIN_BINDING"] <- "MHC-I protein binding" names(miDB_sig5)[names(miDB_sig5) == "GOBP_TUMOR_NECROSIS_FACTOR_MEDIATED_SIGNALING_PATHWAY"] <- "TNFa mediated signalling pathway" names(miDB_sig5)[names(miDB_sig5) == "GOBP_NEGATIVE_REGULATION_OF_CELL_CYCLE_G2_M_PHASE_TRANSITION"] <- "Negative regulation of cell cycle progression" names(miDB_sig5)[names(miDB_sig5) == "GOBP_FATTY_ACYL_COA_BIOSYNTHETIC_PROCESS"] <- "Fatty acyl-CoA biosynthetic process" names(miDB_sig5)[names(miDB_sig5) == "GOBP_RESPONSE_TO_TUMOR_NECROSIS_FACTOR"] <- "Response to TNFa" names(miDB_sig5)[names(miDB_sig5) == "HALLMARK_G2M_CHECKPOINT"] <- "G2M checkpoint" names(miDB_sig5)[names(miDB_sig5) == "GOBP_RESPONSE_TO_TYPE_I_INTERFERON"] <- "Response to type I interferon" names(miDB_sig5)[names(miDB_sig5) == "GOBP_INTERLEUKIN_1_MEDIATED_SIGNALING_PATHWAY"] <- "IL1 mediated signalling pathway" names(miDB_sig5)[names(miDB_sig5) == "GOBP_RESPONSE_TO_INTERLEUKIN_1"] <- "Response to IL1" names(miDB_sig5)[names(miDB_sig5) == "LPS_RO"] <- "LPS response genes" names(miDB_sig5)[names(miDB_sig5) == "GOBP_CYTOKINE_MEDIATED_SIGNALING_PATHWAY"] <- "Cytokine mediated signalling pathway" names(miDB_sig5)[names(miDB_sig5) == "GOBP_MYELOID_CELL_DIFFERENTIATION"] <- "Myeloid cell differentiation" names(miDB_sig5)[names(miDB_sig5) == "GOBP_INTERLEUKIN_10_PRODUCTION"] <- "IL10 production" names(miDB_sig5)[names(miDB_sig5) == "GOBP_POSITIVE_REGULATION_OF_ENDOTHELIAL_CELL_PROLIFERATION"] <- "Regulation of endothelial cell proliferation" names(miDB_sig5)[names(miDB_sig5) == "GOBP_TUMOR_NECROSIS_FACTOR_SUPERFAMILY_CYTOKINE_PRODUCTION"] <- "TNFa superfamily cytokine production" names(miDB_sig5)[names(miDB_sig5) == "HALLMARK_ANGIOGENESIS"] <- "Angiogenesis" names(miDB_sig5)[names(miDB_sig5) == "GOMF_PATTERN_RECOGNITION_RECEPTOR_ACTIVITY"] <- "Pattern recognition receptor activity" names(miDB_sig5)[names(miDB_sig5) == "PGE2_RO"] <- "PGE2 response genes" #Filter low expression, NA and order by FC df1 <- filter(df1, df1$logCPM>5 & !is.na(df1$...1)) df1<-df1[order(df1$logFC),] df2<-df1$logFC names(df2)<-df1$...1 #Run GSEA with fgsea and filter by PVal sig <- miDB_sig5 test1<-fgsea(sig, df2,minSize = 7,maxSize =500, nproc=1) PvalResult<-filter(test1, test1$padj<= 0.05) #DotPlot pathways of interest (Fig.3C) pathways <- c("Oxydative phosphorylation", "Regulation of cholesterol metabolic process", "Response to IL12", "Regulation of lipid biosynthetic process", "MHC-I protein binding", "TNFa mediated signalling pathway", "Negative regulation of cell cycle progression", "Fatty acyl-CoA biosynthetic process", "Response to TNFa", "G2M checkpoint", "Response to type I interferon", "IL1 mediated signalling pathway", "Response to IL1", "LPS response genes", "Cytokine mediated signalling pathway", "Myeloid cell differentiation", "IL10 production", "Regulation of endothelial cell proliferation", "TNFa superfamily cytokine production", "Angiogenesis", "Pattern recognition receptor activity", "PGE2 response genes") genelist <- as.data.frame(PvalResult[PvalResult$pathway %in% pathways,]) ggplot(genelist, aes(x=NES, y=reorder(pathway,NES), size=size ,color=padj)) + geom_point() + scale_size_area(limits=c(10,450), max_size = 15) + scale_colour_gradient(low="red",high="blue") + labs(y='Pathway',x='NES') #Run GSEA with ClusterProfiler genelist <- data.frame(term = rep(names(miDB_sig5), sapply(miDB_sig5, length)), gene = unlist(miDB_sig5)) df2 = sort(df2, decreasing = TRUE) test2 <- GSEA(df2,TERM2GENE = genelist) #Enrichment map (Fig.3D) test3 <- filter(test2, ID %in% pathways) test3 <- pairwise_termsim(test3) emapplot(test3, min_edge = 0.01, color = "NES", layout="fr", repel =T)+ scale_fill_gradient2(name=bquote(NES),low="blue", high="red") #N.B. Produces slightly different plot every time, #but connection between pathways stays the same