library(readxl) library(ggplot2) library(ggrepel) library(dplyr) library(fgsea) library(clusterProfiler) library(enrichplot) ##### Directories ##### # us <-"C:/Users/bresesti.chiara/" # wdir<-paste0(us,"/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/05-DGE-NoOut-Corr") # wdir_CB<-paste0(us, "/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/Analysis CB_v2") # fdir <- paste0(us,"/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures") input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/reference" output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output" ################################################################################ # # R script to generate data and figures for manuscript figures 3A, 3B, 3C, and 3D. # This script performs differential gene expression analysis and gene set enrichment # analysis to generate volcano plots, empirical cumulative distribution function (ECDF) plots, # dot plots, and enrichment maps. # # Figures generated: # - Figure 3A & 3B: Volcano plots of differentially expressed genes in miR-342-3p # overexpression and sponge experiments, and ECDF plots comparing # logFC distributions of all genes vs. miR-342-3p target genes. # - Figure 3C: Dot plot visualizing enriched pathways from gene set enrichment analysis (GSEA). # - Figure 3D: Enrichment map visualizing relationships between enriched pathways. # # Input data files: # - Figure 3A & 3B: "edgeR_results.xlsx" (Sheet "miR342-control" and "spongeBT-control") # (Output from differential gene expression analysis, likely using edgeR) # - Figure 3A & 3B: "TargetScan8.0__miR-342-3p.predicted_targets.xlsx" (Sheet 1) # (List of predicted miR-342-3p target genes from TargetScan) # - Figure 3C & 3D: "EdgeR_results.xlsx" (Sheet "miR342-control") # (Same as input for Figure 3A & 3B, used for GSEA) # - Figure 3C & 3D: "miDB_sig5.MLS.rds" (RDS file containing gene sets for gene set enrichment analysis) # ################################################################################ #### Figure 3A&B #### #Import df miR_ctrl <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "miR342-control") sponge_ctrl <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "spongeBT-control") #Add DEG color and label for volcano plot miR_ctrl$DEG <- "NO" miR_ctrl$DEG[miR_ctrl$logFC > 1 & miR_ctrl$PValue < 0.05] <- "UP" miR_ctrl$DEG[miR_ctrl$logFC < (-1) & miR_ctrl$PValue < 0.05] <- "DOWN" miR_ctrl$DEG_label <- NA miR_ctrl$DEG_label[miR_ctrl$DEG != "NO"] <- miR_ctrl$...1[miR_ctrl$DEG != "NO"] sponge_ctrl$DEG <- "NO" sponge_ctrl$DEG[sponge_ctrl$logFC > 1 & sponge_ctrl$PValue < 0.05] <- "UP" sponge_ctrl$DEG[sponge_ctrl$logFC < (-1) & sponge_ctrl$PValue < 0.05] <- "DOWN" sponge_ctrl$DEG_label <- NA sponge_ctrl$DEG_label[sponge_ctrl$DEG != "NO"] <- sponge_ctrl$...1[sponge_ctrl$DEG != "NO"] #Volcano plot (left panel) data <- miR_ctrl ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) + geom_point() + theme_minimal() + geom_text_repel() + scale_color_manual(values=c("blue", "grey", "red")) + geom_vline(xintercept=c(-1, 1), col="black") + geom_hline(yintercept=-log10(0.05), col="black") + scale_x_continuous(limits = c(-4.1,4.1), breaks = seq(-4,4,1)) + scale_y_continuous(limits = c(0,20)) + xlab(bquote(Log[2](FC))) + ylab(bquote(-Log[10](PVal))) data <- sponge_ctrl ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) + geom_point() + theme_minimal() + geom_text_repel() + scale_color_manual(values=c("blue", "grey", "red")) + geom_vline(xintercept=c(-1, 1), col="black") + geom_hline(yintercept=-log10(0.05), col="black") + scale_x_continuous(limits = c(-4.1,4.1), breaks = seq(-4,4,1)) + scale_y_continuous(limits = c(0,20)) + xlab(bquote(Log[2](FC))) + ylab(bquote(-Log[10](PVal))) # Import miR-342-3p target list (from TargetScan) miR342_targets <- read_excel(paste0(input_dir, "/TargetScan8.0__miR-342-3p.predicted_targets.xlsx")) miR342_targets <- filter(miR342_targets, miR342_targets$`Cumulative weighted context++ score`< (-0.3)) #ecdf plot (right panel) data <- miR_ctrl test <-filter(data, data$...1 %in% miR342_targets$'Target gene') plot(ecdf(data$logFC), lwd = 2, do.points=F, verticals=T, ylab = "Fraction of DEG", xlab = bquote(Log[2](FC)), xlim=c(-0.5,0.5), ylim=c(0,1), main="ECDF of gene expression: miR vs mut") + lines(ecdf(test$logFC), col= "blue", do.points=F, lwd = 2, verticals=T) + abline(v=0, col="black") + abline(h=0.5, col="black") + legend("bottomright", c("All genes","miR-342-3p targets"), col = c("black","blue"), lwd=2, cex = 0.7) ks.test(test$logFC, data$logFC, alternative = "g") #Kolmogorov-Smirnov test to calculate p-val of miR target distribution *not* greater than average data data <- sponge_ctrl test <-filter(data, data$...1 %in% miR342_targets$'Target gene') plot(ecdf(data$logFC), lwd = 2, do.points=F, verticals=T, ylab = "Fraction of DEG", xlab = bquote(Log[2](FC)), xlim=c(-0.3,0.3), ylim=c(0,1), main="ECDF of gene expression: sponge vs scr") + lines(ecdf(test$logFC), col= "red", do.points=F, lwd = 2, verticals=T) + abline(v=0, col="black") + abline(h=0.5, col="black") + legend("bottomright", c("All genes","miR-342-3p targets"), col = c("black","red"), lwd=2, cex = 0.7) ks.test(test$logFC, data$logFC, alternative = "l") # Kolmogorov-Smirnov test to calculate p-val of miR target distribution *not* less than average data #### Figure 3C&D #### # Load df df1<- read_excel(paste0(input_dir, "/EdgeR_results.xlsx"), sheet = "miR342-control") miDB_sig5 <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds")) #GO terms db #Rename pathways of interest in miDB_sig5 names(miDB_sig5)[names(miDB_sig5) == "HALLMARK_OXIDATIVE_PHOSPHORYLATION"] <- "Oxydative phosphorylation" names(miDB_sig5)[names(miDB_sig5) == "GOBP_REGULATION_OF_CHOLESTEROL_METABOLIC_PROCESS"] <- "Regulation of cholesterol metabolic process" names(miDB_sig5)[names(miDB_sig5) == "GOBP_RESPONSE_TO_INTERLEUKIN_12"] <- "Response to IL12" names(miDB_sig5)[names(miDB_sig5) == "GOBP_REGULATION_OF_LIPID_BIOSYNTHETIC_PROCESS"] <- "Regulation of lipid biosynthetic process" names(miDB_sig5)[names(miDB_sig5) == "GOMF_MHC_CLASS_I_PROTEIN_BINDING"] <- "MHC-I protein binding" names(miDB_sig5)[names(miDB_sig5) == "GOBP_TUMOR_NECROSIS_FACTOR_MEDIATED_SIGNALING_PATHWAY"] <- "TNFa mediated signalling pathway" names(miDB_sig5)[names(miDB_sig5) == "GOBP_NEGATIVE_REGULATION_OF_CELL_CYCLE_G2_M_PHASE_TRANSITION"] <- "Negative regulation of cell cycle progression" names(miDB_sig5)[names(miDB_sig5) == "GOBP_FATTY_ACYL_COA_BIOSYNTHETIC_PROCESS"] <- "Fatty acyl-CoA biosynthetic process" names(miDB_sig5)[names(miDB_sig5) == "GOBP_RESPONSE_TO_TUMOR_NECROSIS_FACTOR"] <- "Response to TNFa" names(miDB_sig5)[names(miDB_sig5) == "HALLMARK_G2M_CHECKPOINT"] <- "G2M checkpoint" names(miDB_sig5)[names(miDB_sig5) == "GOBP_RESPONSE_TO_TYPE_I_INTERFERON"] <- "Response to type I interferon" names(miDB_sig5)[names(miDB_sig5) == "GOBP_INTERLEUKIN_1_MEDIATED_SIGNALING_PATHWAY"] <- "IL1 mediated signalling pathway" names(miDB_sig5)[names(miDB_sig5) == "GOBP_RESPONSE_TO_INTERLEUKIN_1"] <- "Response to IL1" names(miDB_sig5)[names(miDB_sig5) == "LPS_RO"] <- "LPS response genes" names(miDB_sig5)[names(miDB_sig5) == "GOBP_CYTOKINE_MEDIATED_SIGNALING_PATHWAY"] <- "Cytokine mediated signalling pathway" names(miDB_sig5)[names(miDB_sig5) == "GOBP_MYELOID_CELL_DIFFERENTIATION"] <- "Myeloid cell differentiation" names(miDB_sig5)[names(miDB_sig5) == "GOBP_INTERLEUKIN_10_PRODUCTION"] <- "IL10 production" names(miDB_sig5)[names(miDB_sig5) == "GOBP_POSITIVE_REGULATION_OF_ENDOTHELIAL_CELL_PROLIFERATION"] <- "Regulation of endothelial cell proliferation" names(miDB_sig5)[names(miDB_sig5) == "GOBP_TUMOR_NECROSIS_FACTOR_SUPERFAMILY_CYTOKINE_PRODUCTION"] <- "TNFa superfamily cytokine production" names(miDB_sig5)[names(miDB_sig5) == "HALLMARK_ANGIOGENESIS"] <- "Angiogenesis" names(miDB_sig5)[names(miDB_sig5) == "GOMF_PATTERN_RECOGNITION_RECEPTOR_ACTIVITY"] <- "Pattern recognition receptor activity" names(miDB_sig5)[names(miDB_sig5) == "PGE2_RO"] <- "PGE2 response genes" # Filter low expression, NA and order by FC df1 <- filter(df1, df1$logCPM>5 & !is.na(df1$...1)) df1<-df1[order(df1$logFC),] df2<-df1$logFC names(df2)<-df1$...1 # Run GSEA with fgsea and filter by PVal sig <- miDB_sig5 test1 <- fgsea(sig, df2, minSize=7, maxSize=500, nproc=1) PvalResult <- filter(test1, test1$padj <= 0.05) # DotPlot pathways of interest (Fig.3C) pathways <- c("Oxydative phosphorylation", "Regulation of cholesterol metabolic process", "Response to IL12", "Regulation of lipid biosynthetic process", "MHC-I protein binding", "TNFa mediated signalling pathway", "Negative regulation of cell cycle progression", "Fatty acyl-CoA biosynthetic process", "Response to TNFa", "G2M checkpoint", "Response to type I interferon", "IL1 mediated signalling pathway", "Response to IL1", "LPS response genes", "Cytokine mediated signalling pathway", "Myeloid cell differentiation", "IL10 production", "Regulation of endothelial cell proliferation", "TNFa superfamily cytokine production", "Angiogenesis", "Pattern recognition receptor activity", "PGE2 response genes") genelist <- as.data.frame(PvalResult[PvalResult$pathway %in% pathways,]) ggplot(genelist, aes(x=NES, y=reorder(pathway,NES), size=size ,color=padj)) + geom_point() + scale_size_area(limits=c(10,450), max_size = 15) + scale_colour_gradient(low="red",high="blue") + labs(y='Pathway', x='NES') #Run GSEA with ClusterProfiler genelist <- data.frame(term = rep(names(miDB_sig5), sapply(miDB_sig5, length)), gene = unlist(miDB_sig5)) df2 = sort(df2, decreasing = TRUE) test2 <- GSEA(df2,TERM2GENE = genelist) #Enrichment map (Fig.3D) test3 <- filter(test2, ID %in% pathways) test3 <- pairwise_termsim(test3) emapplot(test3, min_edge = 0.01, color = "NES", layout="fr", repel =T) + scale_fill_gradient2(name=bquote(NES),low="blue", high="red")