Commit 65cf9977 authored by Marco Monti's avatar Marco Monti
Browse files

I updated the scripts.

parent cad0b7de
...@@ -11,7 +11,7 @@ library("dplyr") ...@@ -11,7 +11,7 @@ library("dplyr")
# wdir1510 <-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1510_RNA_miRNA_QIAseq_UPX") # wdir1510 <-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1510_RNA_miRNA_QIAseq_UPX")
# output_d <- paste0(us, "/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures") # output_d <- paste0(us, "/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/reference" input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input"
output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output" output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
################################################################################ ################################################################################
...@@ -34,7 +34,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq ...@@ -34,7 +34,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
#### Figure 1B #### #### Figure 1B ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/QIAseqUltraplexRNA_181342.xlsx"), sheet=3, col_names = T, skip=1)) #sheet: umis.genes.polyA-mouse df1 <- as.data.frame(read_excel(paste0(input_dir, "/QIAseqUltraplexRNA_181342.xlsx"), sheet=3, col_names = T, skip=1)) # sheet: umis.genes.polyA-mouse
df1 <-df1[,-c(1,3:6)] # keep only gene name and UMI counts df1 <-df1[,-c(1,3:6)] # keep only gene name and UMI counts
df1[,-1] <-apply(df1[,-1],2,function(x){x/sum(x)*1000000}) # UMI normalized by CPM df1[,-1] <-apply(df1[,-1],2,function(x){x/sum(x)*1000000}) # UMI normalized by CPM
gene_vector <- c("Cd19", "Ms4a1", "Fcer2a", "Ighm", "Cd8a", "Xcr1", "Itgae", "Itgax", gene_vector <- c("Cd19", "Ms4a1", "Fcer2a", "Ighm", "Cd8a", "Xcr1", "Itgae", "Itgax",
...@@ -48,14 +48,13 @@ openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1B_table.xlsx"), ro ...@@ -48,14 +48,13 @@ openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1B_table.xlsx"), ro
# the df was exported and used to create a heatmap using Graphpad # the df was exported and used to create a heatmap using Graphpad
#### Figure 1C #### #### Figure 1C ####
# "edgeR_DGE_res_volcano.pdf" (in the 'wdir1509' folder) was imported in illustrator # "edgeR_DGE_res_volcano.pdf" (in the 'wdir1509' folder) was imported in illustrator and merged with the plot generated by the code below
# and merged with the plot generated by the code below df1 <- as.data.frame(read_excel(paste0(input_dir, "/173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
df1 <- as.data.frame(read_excel(paste0(input_dir, "/173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) #sheet: miRNA_piRNA df1 <- df1[,1:7] # UMI
df1 <-df1[,1:7] #UMI
rownames(df1)<-df1$miRNA rownames(df1)<-df1$miRNA
df2 <- apply(df1[,-1], 2, function(x) log(x)) #Log counts df2 <- apply(df1[,-1], 2, function(x) log(x)) # Log counts
df2 <- as.data.frame(limma::normalizeCyclicLoess(df2, weights = NULL, span=0.7, iterations = 5, method = "pairs")) #Cyclic loess normalization df2 <- as.data.frame(limma::normalizeCyclicLoess(df2, weights = NULL, span=0.7, iterations = 5, method = "pairs")) # Cyclic loess normalization
colnames(df2)<-c("B cell","RPM","cDC1","cDC2","LSEC","KC") #Rename columns colnames(df2)<-c("B cell","RPM","cDC1","cDC2","LSEC","KC") # Rename columns
upper.panel<-function(x, y, ...){ upper.panel<-function(x, y, ...){
points(((x+y)/2),(x-y), cex=0.6, pch=19, col="grey") points(((x+y)/2),(x-y), cex=0.6, pch=19, col="grey")
above <- (x-y-1)*((x+y)/2-5) > 5 & ((x+y)/2)>5 above <- (x-y-1)*((x+y)/2-5) > 5 & ((x+y)/2)>5
...@@ -63,19 +62,21 @@ upper.panel<-function(x, y, ...){ ...@@ -63,19 +62,21 @@ upper.panel<-function(x, y, ...){
below <- (x-y+1)*((x+y)/2-5) < -5 & ((x+y)/2)>5 below <- (x-y+1)*((x+y)/2-5) < -5 & ((x+y)/2)>5
points(((x+y)/2)[below],(x-y)[below], col="blue", cex=0.6, pch=19) points(((x+y)/2)[below],(x-y)[below], col="blue", cex=0.6, pch=19)
} # function for MA plot } # function for MA plot
pdf(file = paste(output_dir, "Fig_1C.pdf", sep = "/"), width=9, height=9)
pairs(df2[,1:6], lower.panel = NULL, upper.panel = upper.panel, pairs(df2[,1:6], lower.panel = NULL, upper.panel = upper.panel,
ylim=c(-8.5,8.5), xlim=c(2,14), cex.labels = 2) # pairwise plot ylim=c(-8.5,8.5), xlim=c(2,14), cex.labels = 2) # pairwise plot
dev.off()
#### Figure 1D #### #### Figure 1D ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) #sheet: miRNA_piRNA df1 <- as.data.frame(read_excel(paste0(input_dir, "/173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
df1 <- df1[-which(grepl("piR",df1[,1])),1:7] # UMI and piRNA removing df1 <- df1[-which(grepl("piR",df1[,1])),1:7] # UMI and piRNA removing
df1[,1] <- gsub("/.*","",df1[,1]) # leave only first miRNA for ambiguous entries df1[,1] <- gsub("/.*","",df1[,1]) # leave only first miRNA for ambiguous entries
df1[,-1] <- apply(df1[,-1],2,function(x){x/sum(x,na.rm=T)*1000000}) #UMI normalized by CPM df1[,-1] <- apply(df1[,-1],2,function(x){x/sum(x,na.rm=T)*1000000}) # UMI normalized by CPM
df.families <- as.data.frame(read_excel("Analysis CB/miRNA_Family.xlsx",sheet=1,col_names = T))#miRNA families from miRBase df.families <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_Family.xlsx"), sheet=1 ,col_names=T)) # miRNA families from miRBase
df.families <- df.families[df.families[,3]==10090,c(4,1)] #select mouse entries df.families <- df.families[df.families[,3]==10090,c(4,1)] # select mouse entries
df.families[,1] <- sub(pattern = "p.*",replacement ="p",x = df.families[,1]) df.families[,1] <- sub(pattern = "p.*",replacement ="p",x = df.families[,1])
df2 <- merge(df.families,df1,by=1) df2 <- merge(df.families,df1,by=1)
df2 <- aggregate(df2[,-c(1:2)],by = df2["miR family"],FUN = sum,na.rm=T) #sum counts by family df2 <- aggregate(df2[,-c(1:2)],by = df2["miR family"],FUN = sum,na.rm=T) # sum counts by family
gene_vector <- c("miR-150-5p","miR-25-3p/32-5p/92-3p/363-3p/367-3p","miR-142-3p.1", gene_vector <- c("miR-150-5p","miR-25-3p/32-5p/92-3p/363-3p/367-3p","miR-142-3p.1",
"miR-17-5p/20-5p/93-5p/106-5p","miR-191-5p", "miR-17-5p/20-5p/93-5p/106-5p","miR-191-5p",
"miR-15-5p/16-5p/195-5p/322-5p/497-5p","miR-26-5p","miR-138-5p", "miR-15-5p/16-5p/195-5p/322-5p/497-5p","miR-26-5p","miR-138-5p",
...@@ -83,6 +84,6 @@ gene_vector <- c("miR-150-5p","miR-25-3p/32-5p/92-3p/363-3p/367-3p","miR-142-3p. ...@@ -83,6 +84,6 @@ gene_vector <- c("miR-150-5p","miR-25-3p/32-5p/92-3p/363-3p/367-3p","miR-142-3p.
"miR-125-5p/351-5p","miR-126-3p.1","miR-199-3p") "miR-125-5p/351-5p","miR-126-3p.1","miR-199-3p")
df3 <- subset(df2, df2$`miR family`%in% gene_vector) %>% arrange(factor(`miR family`, levels=gene_vector)) df3 <- subset(df2, df2$`miR family`%in% gene_vector) %>% arrange(factor(`miR family`, levels=gene_vector))
rownames(df3) <- df3[,1] rownames(df3) <- df3[,1]
df3.scaled <- as.data.frame(t(scale(t(df3[-1])))) #Zscore normalization. Scaled only works on columns, so need to transform df3.scaled <- as.data.frame(t(scale(t(df3[-1])))) # Zscore normalization. Scaled only works on columns, so need to transform
openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1D_table.xlsx"), rowNames=T) openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1D_table.xlsx"), rowNames=T)
#the df was exported and used to create a heatmap using Graphpad # The df was exported and used to create a heatmap using Graphpad
...@@ -12,7 +12,7 @@ library(enrichplot) ...@@ -12,7 +12,7 @@ library(enrichplot)
# wdir_CB<-paste0(us, "/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/Analysis CB_v2") # wdir_CB<-paste0(us, "/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/Analysis CB_v2")
# fdir <- paste0(us,"/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures") # fdir <- paste0(us,"/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/reference" input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input"
output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output" output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
################################################################################ ################################################################################
...@@ -32,7 +32,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq ...@@ -32,7 +32,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
# Input data files: # Input data files:
# - Figure 3A & 3B: "edgeR_results.xlsx" (Sheet "miR342-control" and "spongeBT-control") # - Figure 3A & 3B: "edgeR_results.xlsx" (Sheet "miR342-control" and "spongeBT-control")
# (Output from differential gene expression analysis, likely using edgeR) # (Output from differential gene expression analysis, likely using edgeR)
# - Figure 3A & 3B: "TargetScan8.0__miR-342-3p.predicted_targets.xlsx" (Sheet 1) # - Figure 3A & 3B: "TargetScan8.0_miR-342-3p.predicted_targets.xlsx" (Sheet 1)
# (List of predicted miR-342-3p target genes from TargetScan) # (List of predicted miR-342-3p target genes from TargetScan)
# - Figure 3C & 3D: "EdgeR_results.xlsx" (Sheet "miR342-control") # - Figure 3C & 3D: "EdgeR_results.xlsx" (Sheet "miR342-control")
# (Same as input for Figure 3A & 3B, used for GSEA) # (Same as input for Figure 3A & 3B, used for GSEA)
...@@ -44,11 +44,11 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq ...@@ -44,11 +44,11 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
#### Figure 3A&B #### #### Figure 3A&B ####
#Import df # Import df
miR_ctrl <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "miR342-control") miR_ctrl <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "miR342-control")
sponge_ctrl <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "spongeBT-control") sponge_ctrl <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "spongeBT-control")
#Add DEG color and label for volcano plot # Add DEG color and label for volcano plot
miR_ctrl$DEG <- "NO" miR_ctrl$DEG <- "NO"
miR_ctrl$DEG[miR_ctrl$logFC > 1 & miR_ctrl$PValue < 0.05] <- "UP" miR_ctrl$DEG[miR_ctrl$logFC > 1 & miR_ctrl$PValue < 0.05] <- "UP"
miR_ctrl$DEG[miR_ctrl$logFC < (-1) & miR_ctrl$PValue < 0.05] <- "DOWN" miR_ctrl$DEG[miR_ctrl$logFC < (-1) & miR_ctrl$PValue < 0.05] <- "DOWN"
...@@ -61,9 +61,9 @@ sponge_ctrl$DEG[sponge_ctrl$logFC < (-1) & sponge_ctrl$PValue < 0.05] <- "DOWN" ...@@ -61,9 +61,9 @@ sponge_ctrl$DEG[sponge_ctrl$logFC < (-1) & sponge_ctrl$PValue < 0.05] <- "DOWN"
sponge_ctrl$DEG_label <- NA sponge_ctrl$DEG_label <- NA
sponge_ctrl$DEG_label[sponge_ctrl$DEG != "NO"] <- sponge_ctrl$...1[sponge_ctrl$DEG != "NO"] sponge_ctrl$DEG_label[sponge_ctrl$DEG != "NO"] <- sponge_ctrl$...1[sponge_ctrl$DEG != "NO"]
#Volcano plot (left panel) # Volcano plot (left panel)
data <- miR_ctrl data <- miR_ctrl
ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) + p1 <- ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) +
geom_point() + geom_point() +
theme_minimal() + theme_minimal() +
geom_text_repel() + geom_text_repel() +
...@@ -74,9 +74,10 @@ ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) + ...@@ -74,9 +74,10 @@ ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) +
scale_y_continuous(limits = c(0,20)) + scale_y_continuous(limits = c(0,20)) +
xlab(bquote(Log[2](FC))) + xlab(bquote(Log[2](FC))) +
ylab(bquote(-Log[10](PVal))) ylab(bquote(-Log[10](PVal)))
ggsave(filename = paste0(output_dir, "/Volcano_plot_3A.pdf"), plot=p1, width=7, height=7)
data <- sponge_ctrl data <- sponge_ctrl
ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) + p2 <- ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) +
geom_point() + geom_point() +
theme_minimal() + theme_minimal() +
geom_text_repel() + geom_text_repel() +
...@@ -87,49 +88,54 @@ ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) + ...@@ -87,49 +88,54 @@ ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) +
scale_y_continuous(limits = c(0,20)) + scale_y_continuous(limits = c(0,20)) +
xlab(bquote(Log[2](FC))) + xlab(bquote(Log[2](FC))) +
ylab(bquote(-Log[10](PVal))) ylab(bquote(-Log[10](PVal)))
ggsave(filename = paste0(output_dir, "/Volcano_plot_3B.pdf"), plot=p2, width=7, height=7)
# Import miR-342-3p target list (from TargetScan) # Import miR-342-3p target list (from TargetScan)
miR342_targets <- read_excel(paste0(input_dir, "/TargetScan8.0__miR-342-3p.predicted_targets.xlsx")) miR342_targets <- read_excel(paste0(input_dir, "/TargetScan8.0_miR-342-3p.predicted_targets.xlsx"))
miR342_targets <- filter(miR342_targets, miR342_targets$`Cumulative weighted context++ score`< (-0.3)) miR342_targets <- filter(miR342_targets, miR342_targets$`Cumulative weighted context++ score`< (-0.3))
#ecdf plot (right panel) # ecdf plot (right panel)
data <- miR_ctrl data <- miR_ctrl
test <-filter(data, data$...1 %in% miR342_targets$'Target gene') test <- filter(data, data$...1 %in% miR342_targets$'Target gene')
pdf(file = paste(output_dir, "Fig_3A_dx.pdf", sep = "/"), width=7, height=7)
plot(ecdf(data$logFC), lwd = 2, do.points=F, verticals=T, plot(ecdf(data$logFC), lwd = 2, do.points=F, verticals=T,
ylab = "Fraction of DEG", ylab = "Fraction of DEG",
xlab = bquote(Log[2](FC)), xlab = bquote(Log[2](FC)),
xlim=c(-0.5,0.5), xlim=c(-0.5,0.5),
ylim=c(0,1), ylim=c(0,1),
main="ECDF of gene expression: miR vs mut") + main="ECDF of gene expression: miR vs mut")
lines(ecdf(test$logFC), col= "blue", do.points=F, lwd = 2, verticals=T) + lines(ecdf(test$logFC), col= "blue", do.points=F, lwd = 2, verticals=T)
abline(v=0, col="black") + abline(v=0, col="black")
abline(h=0.5, col="black") + abline(h=0.5, col="black")
legend("bottomright", c("All genes","miR-342-3p targets"), legend("bottomright", c("All genes","miR-342-3p targets"), col = c("black","blue"), lwd=2, cex = 0.7)
col = c("black","blue"), lwd=2, cex = 0.7) ks.test(test$logFC, data$logFC, alternative = "g") # Kolmogorov-Smirnov test to calculate p-val of miR target distribution *not* greater than average data
ks.test(test$logFC, data$logFC, alternative = "g") #Kolmogorov-Smirnov test to calculate p-val of miR target distribution *not* greater than average data dev.off()
data <- sponge_ctrl data <- sponge_ctrl
test <-filter(data, data$...1 %in% miR342_targets$'Target gene') test <-filter(data, data$...1 %in% miR342_targets$'Target gene')
pdf(file = paste(output_dir, "Fig_3B_dx.pdf", sep = "/"), width=7, height=7)
plot(ecdf(data$logFC), lwd = 2, do.points=F, verticals=T, plot(ecdf(data$logFC), lwd = 2, do.points=F, verticals=T,
ylab = "Fraction of DEG", ylab = "Fraction of DEG",
xlab = bquote(Log[2](FC)), xlab = bquote(Log[2](FC)),
xlim=c(-0.3,0.3), xlim=c(-0.3,0.3),
ylim=c(0,1), ylim=c(0,1),
main="ECDF of gene expression: sponge vs scr") + main="ECDF of gene expression: sponge vs scr")
lines(ecdf(test$logFC), col= "red", do.points=F, lwd = 2, verticals=T) + lines(ecdf(test$logFC), col= "red", do.points=F, lwd = 2, verticals=T)
abline(v=0, col="black") + abline(v=0, col="black")
abline(h=0.5, col="black") + abline(h=0.5, col="black")
legend("bottomright", c("All genes","miR-342-3p targets"), legend("bottomright", c("All genes","miR-342-3p targets"), col = c("black","red"), lwd=2, cex = 0.7)
col = c("black","red"), lwd=2, cex = 0.7)
ks.test(test$logFC, data$logFC, alternative = "l") # Kolmogorov-Smirnov test to calculate p-val of miR target distribution *not* less than average data ks.test(test$logFC, data$logFC, alternative = "l") # Kolmogorov-Smirnov test to calculate p-val of miR target distribution *not* less than average data
dev.off()
#### Figure 3C&D #### #### Figure 3C&D ####
# Load df # Load df
df1<- read_excel(paste0(input_dir, "/EdgeR_results.xlsx"), sheet = "miR342-control") df1 <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "miR342-control")
miDB_sig5 <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds")) #GO terms db miDB_sig5 <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds")) # GO terms db
#Rename pathways of interest in miDB_sig5 # Rename pathways of interest in miDB_sig5
names(miDB_sig5)[names(miDB_sig5) == "HALLMARK_OXIDATIVE_PHOSPHORYLATION"] <- "Oxydative phosphorylation" names(miDB_sig5)[names(miDB_sig5) == "HALLMARK_OXIDATIVE_PHOSPHORYLATION"] <- "Oxydative phosphorylation"
names(miDB_sig5)[names(miDB_sig5) == "GOBP_REGULATION_OF_CHOLESTEROL_METABOLIC_PROCESS"] <- "Regulation of cholesterol metabolic process" names(miDB_sig5)[names(miDB_sig5) == "GOBP_REGULATION_OF_CHOLESTEROL_METABOLIC_PROCESS"] <- "Regulation of cholesterol metabolic process"
names(miDB_sig5)[names(miDB_sig5) == "GOBP_RESPONSE_TO_INTERLEUKIN_12"] <- "Response to IL12" names(miDB_sig5)[names(miDB_sig5) == "GOBP_RESPONSE_TO_INTERLEUKIN_12"] <- "Response to IL12"
...@@ -189,20 +195,23 @@ pathways <- c("Oxydative phosphorylation", ...@@ -189,20 +195,23 @@ pathways <- c("Oxydative phosphorylation",
"PGE2 response genes") "PGE2 response genes")
genelist <- as.data.frame(PvalResult[PvalResult$pathway %in% pathways,]) genelist <- as.data.frame(PvalResult[PvalResult$pathway %in% pathways,])
ggplot(genelist, aes(x=NES, y=reorder(pathway,NES), size=size ,color=padj)) + p3 <- ggplot(genelist, aes(x=NES, y=reorder(pathway,NES), size=size ,color=padj)) +
geom_point() + geom_point() +
scale_size_area(limits=c(10,450), max_size = 15) + scale_size_area(limits=c(10,450), max_size = 15) +
scale_colour_gradient(low="red",high="blue") + scale_colour_gradient(low="red", high="blue") +
labs(y='Pathway', x='NES') labs(y='Pathway', x='NES')
ggsave(filename = paste0(output_dir, "/gsea_DotPlot_Fig3C.pdf"), plot=p3, width=7, height=7)
#Run GSEA with ClusterProfiler # Run GSEA with ClusterProfiler
genelist <- data.frame(term = rep(names(miDB_sig5), sapply(miDB_sig5, length)), genelist <- data.frame(term = rep(names(miDB_sig5), sapply(miDB_sig5, length)),
gene = unlist(miDB_sig5)) gene = unlist(miDB_sig5))
df2 = sort(df2, decreasing = TRUE) df2 = sort(df2, decreasing = TRUE)
test2 <- GSEA(df2,TERM2GENE = genelist) test2 <- GSEA(df2, TERM2GENE = genelist)
#Enrichment map (Fig.3D) # Enrichment map (Fig.3D)
test3 <- filter(test2, ID %in% pathways) test3 <- filter(test2, ID %in% pathways)
test3 <- pairwise_termsim(test3) test3 <- pairwise_termsim(test3)
emapplot(test3, min_edge = 0.01, color = "NES", layout="fr", repel =T) + pdf(file = paste(output_dir, "emapplot_Fig.3D.pdf", sep = "/"), width=7, height=7)
scale_fill_gradient2(name=bquote(NES),low="blue", high="red") emapplot(test3, min_edge = 0.01, color = "NES", layout="fr") + # repel=T
\ No newline at end of file scale_fill_gradient2(name=bquote(NES), low="blue", high="red")
dev.off()
...@@ -20,13 +20,16 @@ library(cetcolor) ...@@ -20,13 +20,16 @@ library(cetcolor)
wdir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/results2/CB1_CB3_CB4" wdir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/results2/CB1_CB3_CB4"
wdir_CB <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/CB_scripts/results2" wdir_CB <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/CB_scripts/results2"
plot_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/CB_scripts_final/plots and tables used in figures" #plot_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/CB_scripts_final/plots and tables used in figures"
dirCB <-"/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/CB_scripts" dirCB <-"/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/CB_scripts"
dir.create(plot_dir, showWarnings=F, recursive=T) #dir.create(plot_dir, showWarnings=F, recursive=T)
input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input"
output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
# 16357 GO terms db. used for GSEA # 16357 GO terms db. used for GSEA
sig <- readRDS("/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/reference/miDB_sig5.MLS.rds") sig <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds"))
################################################################################ ################################################################################
...@@ -68,8 +71,6 @@ obs1@meta.data$Sample[obs1@meta.data$orig.ident=="CB4"&obs1@meta.data$hash.ID==" ...@@ -68,8 +71,6 @@ obs1@meta.data$Sample[obs1@meta.data$orig.ident=="CB4"&obs1@meta.data$hash.ID=="
obs1@meta.data$Sample[obs1@meta.data$orig.ident=="CB4"&obs1@meta.data$hash.ID=="Mouse2"] <- "miR_02" obs1@meta.data$Sample[obs1@meta.data$orig.ident=="CB4"&obs1@meta.data$hash.ID=="Mouse2"] <- "miR_02"
obs1@meta.data$Sample[obs1@meta.data$orig.ident=="CB4"&obs1@meta.data$hash.ID=="Mouse3"] <- "miR_05" obs1@meta.data$Sample[obs1@meta.data$orig.ident=="CB4"&obs1@meta.data$hash.ID=="Mouse3"] <- "miR_05"
obs1<-FindClusters(obs1, graph.name="RNA_snn_h.orig.ident", resolution=4)
# Manually annotate the clusters # Manually annotate the clusters
obs1@meta.data$Annotation.final.CB<-"Undefined" obs1@meta.data$Annotation.final.CB<-"Undefined"
obs1@meta.data$Annotation.final.CB[obs1@meta.data$RNA_snn_h.orig.ident_res.0.6==0]<- "Monocytes" obs1@meta.data$Annotation.final.CB[obs1@meta.data$RNA_snn_h.orig.ident_res.0.6==0]<- "Monocytes"
...@@ -98,21 +99,28 @@ obs1@meta.data$Annotation.final.CB[obs1@meta.data$RNA_snn_h.orig.ident_res.4==51 ...@@ -98,21 +99,28 @@ obs1@meta.data$Annotation.final.CB[obs1@meta.data$RNA_snn_h.orig.ident_res.4==51
# Visualize final annotation in Umap (Fig.5A) # Visualize final annotation in Umap (Fig.5A)
obs1 <- SetIdent(obs1,value="Annotation.final.CB") obs1 <- SetIdent(obs1,value="Annotation.final.CB")
pdf(file = paste(plot_dir, "CB_Annotation.final.CB_Fig.5A.pdf", sep = "/"), width=9, height=7) pdf(file = paste(output_dir, "CB_Annotation.final.CB_Fig.5A.pdf", sep = "/"), width=9, height=7)
DimPlot(obs1, reduction="umap.harmony.orig.ident", group.by="Annotation.final.CB", label=T, repel=T) DimPlot(obs1, reduction="umap.harmony.orig.ident", group.by="Annotation.final.CB", label=T, repel=T)
dev.off() dev.off()
# Adding a simplified annotation # Adding a simplified annotation
obs1@meta.data$Shortlables<-obs1@meta.data$Annotation.final.CB obs1@meta.data$Shortlables <- obs1@meta.data$Annotation.final.CB
DCs<-c("Mig cDCs","pDCs","pre DCs", "cDC1", "cDC2_MoDCs") DCs<-c("Mig cDCs","pDCs","pre DCs", "cDC1", "cDC2_MoDCs")
obs1@meta.data$Shortlables[obs1@meta.data$Annotation.final.CB%in%DCs]<- "DCs" obs1@meta.data$Shortlables[obs1@meta.data$Annotation.final.CB%in%DCs] <- "DCs"
granu<-c("Neutrophils","Basophils") granu<-c("Neutrophils","Basophils")
obs1@meta.data$Shortlables[obs1@meta.data$Annotation.final.CB%in%granu]<- "Granulocytes" obs1@meta.data$Shortlables[obs1@meta.data$Annotation.final.CB%in%granu] <- "Granulocytes"
TAMs<-c("TAMs_1", "TAMs_2", "TAMs_3", "TAMs_4", "TAMs_5") TAMs<-c("TAMs_1", "TAMs_2", "TAMs_3", "TAMs_4", "TAMs_5")
obs1@meta.data$Shortlables[obs1@meta.data$Annotation.final.CB%in%TAMs]<- "TAMs" obs1@meta.data$Shortlables[obs1@meta.data$Annotation.final.CB%in%TAMs] <- "TAMs"
# Export DimPlot as excel
var_list <- c("UMAPh_1", "UMAPh_2","PROP.Group","orig.ident", "Sample","RNA_snn_h.orig.ident_res.0.6", "mOrange_union","Annotation.final.CB", "Shortlables")
obs1_df <- FetchData(obs1, vars = var_list)
obs1_df$cell_ID <- rownames(obs1_df)
write.xlsx(obs1_df, file=paste0(output_dir, "/CB_Annotation.final.CB_Fig.5A.xlsx"), overwrite=T)
# Dotplot with markers for population (Suppl.Fig.5A) # Dotplot with markers for population (Suppl.Fig.5A)
obs1 <- SetIdent(obs1,value="Shortlables") obs1 <- SetIdent(obs1, value="Shortlables")
obs1$Shortlables <- factor(x = obs1$Shortlables, obs1$Shortlables <- factor(x = obs1$Shortlables,
levels = c("Cancer cells","Cholangiocytes", levels = c("Cancer cells","Cholangiocytes",
"Hepatocytes", "LSECs", "KC-LSEC doublet", "Hepatocytes", "LSECs", "KC-LSEC doublet",
...@@ -130,7 +138,7 @@ gene <- c("Vil1", "Cdx2","Cldn4","Cldn7","Epcam", ...@@ -130,7 +138,7 @@ gene <- c("Vil1", "Cdx2","Cldn4","Cldn7","Epcam",
"Cd209a","Itgax", "Flt3","H2-Aa","Xcr1", "Clec9a", "Cd209a","Itgax", "Flt3","H2-Aa","Xcr1", "Clec9a",
"Cd19", "Cd22", "Cd79a", "Ms4a1", "Pax5", "Cd19", "Cd22", "Cd79a", "Ms4a1", "Pax5",
"Cd3e", "Cd4", "Cd8a", "Ncr1", "Gzmb") "Cd3e", "Cd4", "Cd8a", "Ncr1", "Gzmb")
pdf(file = paste(plot_dir, "CB_DotPlot_Shortlables_Fig.S5A.pdf", sep="/"), width=12, height=6) pdf(file = paste(output_dir, "CB_DotPlot_Shortlables_Fig.S5A.pdf", sep="/"), width=12, height=6)
DotPlot(obs1,features = unique(gene), cols = "RdBu") + DotPlot(obs1,features = unique(gene), cols = "RdBu") +
theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1))
dev.off() dev.off()
...@@ -141,8 +149,8 @@ dev.off() ...@@ -141,8 +149,8 @@ dev.off()
cells_labels <- table(obs1@meta.data$Annotation.final.CB, obs1@meta.data$Sample) cells_labels <- table(obs1@meta.data$Annotation.final.CB, obs1@meta.data$Sample)
cells_labels_p <- round(prop.table(table(obs1@meta.data$Annotation.final.CB, obs1@meta.data$Sample), 2) * 100, 1) cells_labels_p <- round(prop.table(table(obs1@meta.data$Annotation.final.CB, obs1@meta.data$Sample), 2) * 100, 1)
write.csv(cells_labels, paste0(plot_dir, "/number_cells_sample_Annotation.final.CB_Fig.S5B.csv"), row.names=T) write.csv(cells_labels, paste0(output_dir, "/number_cells_sample_Annotation.final.CB_Fig.S5B.csv"), row.names=T)
write.csv(cells_labels_p, paste0(plot_dir, "/number_cells_sample_Annotation.final.CB_percentage_Fig.S5B.csv"), row.names=T) write.csv(cells_labels_p, paste0(output_dir, "/number_cells_sample_Annotation.final.CB_percentage_Fig.S5B.csv"), row.names=T)
#### Analysis of mOrange+ fraction (Fig.5B, Suppl.Fig.5C,D) #### #### Analysis of mOrange+ fraction (Fig.5B, Suppl.Fig.5C,D) ####
...@@ -156,8 +164,8 @@ obs2 <- subset(obs1, subset = mOrange_union == "mOrange+") ...@@ -156,8 +164,8 @@ obs2 <- subset(obs1, subset = mOrange_union == "mOrange+")
cells_labels_orange <- table(obs2@meta.data$Annotation.final.CB, obs2@meta.data$Sample) cells_labels_orange <- table(obs2@meta.data$Annotation.final.CB, obs2@meta.data$Sample)
cells_labels_orange_p <- round(prop.table(table(obs2@meta.data$Annotation.final.CB, obs2@meta.data$Sample), 2) * 100, 1) cells_labels_orange_p <- round(prop.table(table(obs2@meta.data$Annotation.final.CB, obs2@meta.data$Sample), 2) * 100, 1)
write.csv(cells_labels_orange, paste0(plot_dir, "/number_cells_sample_Annotation.final.CB_mOrange_fraction_Fig.S5C.csv"), row.names=T) write.csv(cells_labels_orange, paste0(output_dir, "/number_cells_sample_Annotation.final.CB_mOrange_fraction_Fig.S5C.csv"), row.names=T)
write.csv(cells_labels_orange_p, paste0(plot_dir, "/number_cells_sample_Annotation.final.CB_percentage_mOrange_fraction_Fig.S5C.csv"), row.names=T) write.csv(cells_labels_orange_p, paste0(output_dir, "/number_cells_sample_Annotation.final.CB_percentage_mOrange_fraction_Fig.S5C.csv"), row.names=T)
# DimPlot with overlayed mOrange density (Fig.5B) # DimPlot with overlayed mOrange density (Fig.5B)
...@@ -165,7 +173,7 @@ data <- Embeddings(object = obs2[["umap.harmony.orig.ident"]])[(colnames(obs2)), ...@@ -165,7 +173,7 @@ data <- Embeddings(object = obs2[["umap.harmony.orig.ident"]])[(colnames(obs2)),
data <- as.data.frame(data) data <- as.data.frame(data)
obs1 <- SetIdent(obs1, value="cells") obs1 <- SetIdent(obs1, value="cells")
pdf(file = paste(plot_dir, "UMAP_mOrange_cells_Fig.5B.pdf", sep = "/"), width=9, height=7) pdf(file = paste(output_dir, "UMAP_mOrange_cells_Fig.5B.pdf", sep = "/"), width=9, height=7)
DimPlot(obs1, reduction = "umap.harmony.orig.ident", pt.size = 0.1, cols = "azure3") + DimPlot(obs1, reduction = "umap.harmony.orig.ident", pt.size = 0.1, cols = "azure3") +
stat_density_2d(aes_string(x = "UMAPh_1", y = "UMAPh_2", fill = "after_stat(level)"), data=data, stat_density_2d(aes_string(x = "UMAPh_1", y = "UMAPh_2", fill = "after_stat(level)"), data=data,
linewidth = 0.4, geom = "density_2d_filled", linewidth = 0.4, geom = "density_2d_filled",
...@@ -183,9 +191,9 @@ obs3 <- subset(obs1, subset = Shortlables %in% selected_pop) ...@@ -183,9 +191,9 @@ obs3 <- subset(obs1, subset = Shortlables %in% selected_pop)
# Dotplot of Slc7a11 expression by cluster and experimental group (Fig.5C, groups divided in Illustrator) # Dotplot of Slc7a11 expression by cluster and experimental group (Fig.5C, groups divided in Illustrator)
obs3 <- SetIdent(obs3, value="Shortlables") obs3 <- SetIdent(obs3, value="Shortlables")
obs3$Shortlables <- factor(x = obs3$Shortlables, levels = c("LSECs", "Cancer cells", "Monocytes", "TAMs", "KCs", "DCs")) obs3$Shortlables <- factor(x = obs3$Shortlables, levels = c("LSECs", "Cancer cells", "Monocytes", "TAMs", "KCs", "DCs"))
pdf(file = paste(plot_dir, "CB_DotPlot_Shortlables_Slc7a11_Fig.5C.pdf", sep = "/"), width=9, height=5) pdf(file = paste(output_dir, "CB_DotPlot_Shortlables_Slc7a11_Fig.5C.pdf", sep = "/"), width=9, height=5)
DotPlot(obs3, features = "Slc7a11", dot.scale=20, cols = "RdBu", split.by = "PROP.Group") + DotPlot(obs3, features = "Slc7a11", dot.scale=20, cols = "RdBu", split.by = "PROP.Group") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) +
coord_flip() coord_flip()
dev.off() dev.off()
...@@ -203,7 +211,7 @@ for(i in selected_pop){ ...@@ -203,7 +211,7 @@ for(i in selected_pop){
addWorksheet(simpl_DEG, name) addWorksheet(simpl_DEG, name)
writeData(simpl_DEG, name, markers, rowNames = T) writeData(simpl_DEG, name, markers, rowNames = T)
} }
saveWorkbook(simpl_DEG, file = paste0(plot_dir, "/DEG_simpl_Shortlables.xlsx"), overwrite = T) saveWorkbook(simpl_DEG, file = paste0(output_dir, "/DEG_simpl_Shortlables.xlsx"), overwrite = T)
# Run GSEA for each population # Run GSEA for each population
GSEA_simpl <- data.frame(matrix(nrow = 0, ncol = 9)) GSEA_simpl <- data.frame(matrix(nrow = 0, ncol = 9))
...@@ -217,7 +225,7 @@ for(i in selected_pop){ ...@@ -217,7 +225,7 @@ for(i in selected_pop){
colnames(GSEA_simpl) <- colnames(test) colnames(GSEA_simpl) <- colnames(test)
GSEA_simpl <- rbind(GSEA_simpl, test) GSEA_simpl <- rbind(GSEA_simpl, test)
} }
write.xlsx(GSEA_simpl, file = paste0(plot_dir, "/GSEA_simpl_Shortlables.xlsx")) write.xlsx(GSEA_simpl, file = paste0(output_dir, "/GSEA_simpl_Shortlables.xlsx"))
# Plot barplot for selected gene sets (Fig.5D, names and colors edited on illustrator) # Plot barplot for selected gene sets (Fig.5D, names and colors edited on illustrator)
gene_sets <- c("HALLMARK_TNFA_SIGNALING_VIA_NFKB", gene_sets <- c("HALLMARK_TNFA_SIGNALING_VIA_NFKB",
...@@ -262,7 +270,7 @@ gene_sets <- gsub(x=gene_sets, pattern="_", replacement=" ") ...@@ -262,7 +270,7 @@ gene_sets <- gsub(x=gene_sets, pattern="_", replacement=" ")
# Reorder the terms # Reorder the terms
plot$pathway <- factor(plot$pathway, levels = gene_sets) plot$pathway <- factor(plot$pathway, levels = gene_sets)
write.xlsx(plot, file = paste0(plot_dir, "/GSEA_plotted.xlsx")) write.xlsx(plot, file = paste0(output_dir, "/GSEA_plotted.xlsx"))
# Plot the GSEA results (Fig5D) # Plot the GSEA results (Fig5D)
p <- ggplot(data=plot, aes(x=NES, y=pathway, fill=log10(pval))) + p <- ggplot(data=plot, aes(x=NES, y=pathway, fill=log10(pval))) +
...@@ -278,12 +286,13 @@ p <- ggplot(data=plot, aes(x=NES, y=pathway, fill=log10(pval))) + ...@@ -278,12 +286,13 @@ p <- ggplot(data=plot, aes(x=NES, y=pathway, fill=log10(pval))) +
axis.text.y = element_text(colour = "black"))+ axis.text.y = element_text(colour = "black"))+
geom_vline(xintercept=0)+ geom_vline(xintercept=0)+
facet_wrap(~cluster, ncol = length(unique(plot$cluster))) facet_wrap(~cluster, ncol = length(unique(plot$cluster)))
ggsave(filename = paste0(plot_dir, "/GSEA_Fig5D.pdf"), plot=p, width=12, height=7) ggsave(filename = paste0(output_dir, "/GSEA_Fig5D.pdf"), plot=p, width=12, height=7)
# Create signature file with only signature cytokines for the more stringent GSEA in Fig.5E # Create signature file with only signature cytokines for the more stringent GSEA in Fig.5E
sig.cyto <- sig[15061:16357] #sig.cyto <- sig[15061:16357]
sig.cyto <- readRDS(paste0(input_dir, "/miDB_sig5.MLS_cytokines.rds"))
# Define the populations we want to use for this GSEA # Define the populations we want to use for this GSEA
selected_pop2 <- c("B cells","cDC1","cDC2_MoDCs","KCs","Macrophages","Mig cDCs","Monocytes","Neutrophils","pDCs","T and NK cells", "TAMs_1","TAMs_2","TAMs_3","TAMs_4","TAMs_5") selected_pop2 <- c("B cells","cDC1","cDC2_MoDCs","KCs","Macrophages","Mig cDCs","Monocytes","Neutrophils","pDCs","T and NK cells", "TAMs_1","TAMs_2","TAMs_3","TAMs_4","TAMs_5")
...@@ -301,7 +310,7 @@ for(i in selected_pop2){ ...@@ -301,7 +310,7 @@ for(i in selected_pop2){
addWorksheet(simpl_DEG2, name) addWorksheet(simpl_DEG2, name)
writeData(simpl_DEG2, name, markers, rowNames = T) writeData(simpl_DEG2, name, markers, rowNames = T)
} }
saveWorkbook(simpl_DEG2, file = paste0(plot_dir, "/DEG_Annotation.final.CB.xlsx"), overwrite = T) saveWorkbook(simpl_DEG2, file = paste0(output_dir, "/DEG_Annotation.final.CB.xlsx"), overwrite = T)
# Repeat GSEA for cytokines signatures only # Repeat GSEA for cytokines signatures only
...@@ -316,7 +325,7 @@ for(i in selected_pop2){ ...@@ -316,7 +325,7 @@ for(i in selected_pop2){
colnames(GSEA_cyto) <- colnames(test) colnames(GSEA_cyto) <- colnames(test)
GSEA_cyto <- rbind(GSEA_cyto,test) GSEA_cyto <- rbind(GSEA_cyto,test)
} }
write.xlsx(GSEA_cyto, file = paste0(plot_dir, "/GSEA_cyto.xlsx")) write.xlsx(GSEA_cyto, file = paste0(output_dir, "/GSEA_cyto.xlsx"))
# Subset the GSEA_cyto to match the cytokine signature calculated in the populations with the same populations that we have in our scRNAseq. # Subset the GSEA_cyto to match the cytokine signature calculated in the populations with the same populations that we have in our scRNAseq.
# This is necessary because each cytokine was tested and the effect in each population was saved as a different signature. # This is necessary because each cytokine was tested and the effect in each population was saved as a different signature.
...@@ -363,4 +372,4 @@ pheatmap(mat.df, scale = "none", display_numbers = mat.label, ...@@ -363,4 +372,4 @@ pheatmap(mat.df, scale = "none", display_numbers = mat.label,
clustering_method = "ward.D2", clustering_method = "ward.D2",
cutree_rows = 2, cutree_cols = 3, cutree_rows = 2, cutree_cols = 3,
col=myColor, breaks = myBreaks, na_col = "grey", col=myColor, breaks = myBreaks, na_col = "grey",
filename = paste0(plot_dir, "/Heatmap_cytokine_sig_Fig.5E_new.pdf"), cellwidth=10, cellheight=10) # width=12, height=7 filename = paste0(output_dir, "/Heatmap_cytokine_sig_Fig.5E.pdf"), cellwidth=10, cellheight=10) # width=12, height=7
library(data.table) library(data.table)
library(survival)
library(dplyr) library(dplyr)
library(babelgene)
library(openxlsx) library(openxlsx)
library(readxl) library(readxl)
library(survminer)
library(gridExtra) library(gridExtra)
library(ggplot2) library(ggplot2)
library(survival)
library(survminer)
#library(babelgene)
##Load dataset # Set directories
wdir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts" wdir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts"
setwd(wdir) setwd(wdir)
input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/reference" input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input"
output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output" output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
...@@ -47,37 +47,37 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq ...@@ -47,37 +47,37 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
# and minimum patient number (n > 100 for HR plot). These thresholds can be adjusted within the script. # and minimum patient number (n > 100 for HR plot). These thresholds can be adjusted within the script.
################################################################################ ################################################################################
# Load dataset
metaD<-data.frame(fread(paste0(input_dir, "/TCGA_phenotype_denseDataOnlyDownload.tsv.gz"), sep='\t', colClasses=c("character"), data.table=FALSE)) metaD <- data.frame(fread(paste0(input_dir, "/TCGA_phenotype_denseDataOnlyDownload.tsv.gz"), sep='\t', colClasses=c("character"), data.table=FALSE))
Surv01<-data.frame(fread(paste0(input_dir, "/Survival_SupplementalTable_S1_20171025_xena_sp"), sep='\t', colClasses=c("character"), data.table=FALSE)) Surv01 <- data.frame(fread(paste0(input_dir, "/Survival_SupplementalTable_S1_20171025_xena_sp"), sep='\t', colClasses=c("character"), data.table=FALSE))
# FPKM01<-fread("tcga_RSEM_Hugo_norm_count") # FPKM01 <- fread(paste0(input_dir, "/tcga_RSEM_Hugo_norm_count"))
FPKM01<-fread(paste0(input_dir, "/pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz")) FPKM01 <- fread(paste0(input_dir, "/pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz"))
FPKM01<-as.data.frame(FPKM01) FPKM01 <- as.data.frame(FPKM01)
# Annot<-data.frame(fread(paste0(input_dir, "/probeMap_gencode.v23.annotation.gene.probemap"), sep='\t',colClasses=c("character"),data.table=FALSE)) # Annot <- data.frame(fread(paste0(input_dir, "/probeMap_gencode.v23.annotation.gene.probemap"), sep='\t',colClasses=c("character"),data.table=FALSE))
allGenes <- FPKM01[,1] allGenes <- FPKM01[,1]
#######################################################################
#####PLOT HR Score of signature for multiple tumor types###############
##################################################################### #####################################################################
humanGenes<-list(c("hsa-miR-342-3p")) #####PLOT HR Score of signature for multiple tumor types#############
#####################################################################
humanGenes <- list(c("hsa-miR-342-3p"))
####create xlsx file with genes, patients and tumor types # Create xlsx file with genes, patients and tumor types
Pan.data3 <- metaD[,] Pan.data3 <- metaD[,]
Pan.data4 <- merge(Surv01,Pan.data3,by=1) Pan.data4 <- merge(Surv01,Pan.data3,by=1)
Genes_selected <- FPKM01[FPKM01[,1]%in%humanGenes,] Genes_selected <- FPKM01[FPKM01[,1]%in%humanGenes,]
rownames(Genes_selected)<-Genes_selected[,1] rownames(Genes_selected)<-Genes_selected[,1]
Genes_selected<-Genes_selected[,-1] Genes_selected <- Genes_selected[,-1]
Genes_selected <- data.frame(t(Genes_selected)) Genes_selected <- data.frame(t(Genes_selected))
Genes_selected$sample<-rownames(Genes_selected) Genes_selected$sample<-rownames(Genes_selected)
Pan.data5 <- merge(Genes_selected,Pan.data4,by.y="sample") Pan.data5 <- merge(Genes_selected, Pan.data4, by.y="sample")
colnames(Pan.data5) colnames(Pan.data5)
write.xlsx(Pan.data5, paste0(ourdir, "/miR342_patients_survival.xlsx")) write.xlsx(Pan.data5, paste0(output_dir, "/miR342_patients_survival.xlsx"))
#############Regression with defined k2 # Regression with defined k2
k2<-0.5 #define quartile for selecting low/high score k2 <- 0.5 # define quartile for selecting low/high score
df2 <- NULL df2 <- NULL
for (i in unique(Pan.data5$cancer.type.abbreviation)){ for (i in unique(Pan.data5$cancer.type.abbreviation)){
Pan.data6 <- Pan.data5[Pan.data5$cancer.type.abbreviation%in%i,] Pan.data6 <- Pan.data5[Pan.data5$cancer.type.abbreviation%in%i,]
...@@ -86,16 +86,16 @@ for (i in unique(Pan.data5$cancer.type.abbreviation)){ ...@@ -86,16 +86,16 @@ for (i in unique(Pan.data5$cancer.type.abbreviation)){
cohort.score<-rep(0,nrow(P.OS)) cohort.score<-rep(0,nrow(P.OS))
M1 <- quantile(Pan.data6$hsa.miR.342.3p,k2) M1 <- quantile(Pan.data6$hsa.miR.342.3p,k2)
cohort.score[Pan.data6$hsa.miR.342.3p>=M1]<-1 cohort.score[Pan.data6$hsa.miR.342.3p>=M1]<-1
a1<-coxph(P.OS ~ as.numeric(cohort.score)) a1 <- coxph(P.OS ~ as.numeric(cohort.score))
df1 <- data.frame(Tumor = unique(Pan.data6$cancer.type.abbreviation),npatients=length(Pan.data6$X_PATIENT), HR = summary(a1)$coefficients[2], SE=summary(a1)$coefficients[3], df1 <- data.frame(Tumor = unique(Pan.data6$cancer.type.abbreviation),npatients=length(Pan.data6$X_PATIENT), HR = summary(a1)$coefficients[2], SE=summary(a1)$coefficients[3],
p = summary(a1)$coefficients[5] ) p = summary(a1)$coefficients[5] )
df2 <- rbind(df2,df1) df2 <- rbind(df2,df1)
df2<-df2[(order(df2$p)),] df2 <- df2[(order(df2$p)),]
}else{}} }else{}}
write.xlsx(df2, paste0(output_dir, "/miR342_HR_by_tumortype.xlsx")) write.xlsx(df2, paste0(output_dir, "/miR342_HR_by_tumortype.xlsx"))
###Filters selected tumors # Filters selected tumors
df3<-df2 df3<-df2
df3<-df3[df3$p<0.1,] df3<-df3[df3$p<0.1,]
df3<-df3[df3$npatients>100,] df3<-df3[df3$npatients>100,]
...@@ -113,13 +113,12 @@ a<-ggplot(df3, aes(x = reorder(Tumor, HR), y = HR, color = significance, label = ...@@ -113,13 +113,12 @@ a<-ggplot(df3, aes(x = reorder(Tumor, HR), y = HR, color = significance, label =
ggsave(filename = paste0(output_dir, "/HR_alltumors_342.pdf"), plot=a, width=9, height=4) ggsave(filename = paste0(output_dir, "/HR_alltumors_342.pdf"), plot=a, width=9, height=4)
####Plot survival of chosen tumors # Plot survival of chosen tumors
###Select type of tumor # Select type of tumor
df3<-df3[order(df3$HR),] df3 <- df3[order(df3$HR),]
df3<-df3[df3$p<0.05,] df3 <- df3[df3$p<0.05,]
#pdf(paste0(output_dir, "/Survival_***_342"), width=10,height = 6) pdf(paste0(output_dir, "/Survival_in_chosen_tumors_342.pdf"), width=10 ,height=6)
par(mfrow = c(2,4)) par(mfrow = c(2,4))
survival<-list() survival<-list()
for (i in df3$Tumor){ for (i in df3$Tumor){
...@@ -129,7 +128,7 @@ for (i in df3$Tumor){ ...@@ -129,7 +128,7 @@ for (i in df3$Tumor){
cohort.score<-rep(0,nrow(P.OS)) cohort.score<-rep(0,nrow(P.OS))
M1 <- quantile(Pan.data6$hsa.miR.342.3p,k2) M1 <- quantile(Pan.data6$hsa.miR.342.3p,k2)
cohort.score[Pan.data6$hsa.miR.342.3p>=M1]<-1 cohort.score[Pan.data6$hsa.miR.342.3p>=M1]<-1
##plotting survival # Plotting survival
fit.score <- survfit(P.OS ~ cohort.score) fit.score <- survfit(P.OS ~ cohort.score)
cox_model <- coxph(P.OS ~ as.numeric(cohort.score)) cox_model <- coxph(P.OS ~ as.numeric(cohort.score))
p_value <- summary(cox_model)$coefficients["as.numeric(cohort.score)", "Pr(>|z|)"] p_value <- summary(cox_model)$coefficients["as.numeric(cohort.score)", "Pr(>|z|)"]
...@@ -143,11 +142,10 @@ for (i in df3$Tumor){ ...@@ -143,11 +142,10 @@ for (i in df3$Tumor){
text(x = 10, y = 0.05, label = paste("n =", length(cohort.score)), adj = c(0, 1)) text(x = 10, y = 0.05, label = paste("n =", length(cohort.score)), adj = c(0, 1))
survival[[i]] <- recordPlot() survival[[i]] <- recordPlot()
} }
# dev.off() dev.off()
###Plot survival of metastastic/primary cancers # Plot survival of metastastic/primary cancers
#k2<-0.5 #k2<-0.5
Pan.data7 <- Pan.data5[Pan.data5$sample_type == "Metastatic",] Pan.data7 <- Pan.data5[Pan.data5$sample_type == "Metastatic",]
#Pan.data7 <- Pan.data5[Pan.data5$sample_type == "Primary Tumor",] #Pan.data7 <- Pan.data5[Pan.data5$sample_type == "Primary Tumor",]
...@@ -162,10 +160,10 @@ formatted_p_value <- ifelse(p_value < 0.0001, "<0.0001", as.numeric(round(p_val ...@@ -162,10 +160,10 @@ formatted_p_value <- ifelse(p_value < 0.0001, "<0.0001", as.numeric(round(p_val
par(las = 0) par(las = 0)
pdf(paste0(output_dir, "/Survival_metastatic_342.pdf"), width=5,height =5) pdf(paste0(output_dir, "/Survival_metastatic_342.pdf"), width=5, height=5)
plot(fit.score, lty = c(1, 1), col = c("blue", "red"), xlab = "Time (d)", ylab = "Overall Survival", lwd = 2, bty = "n", plot(fit.score, lty = c(1, 1), col = c("blue", "red"), xlab = "Time (d)", ylab = "Overall Survival", lwd = 2, bty = "n",
main = "Metastatic tumors") main = "Metastatic tumors")
#Add legend # Add legend
legend(x = max(fit.score$time)/2, y = 1, legend = c("miRNA high", "miRNA low"), lty = c(1, 1), col = c("red", "blue"), lwd = 2,bty = "n") legend(x = max(fit.score$time)/2, y = 1, legend = c("miRNA high", "miRNA low"), lty = c(1, 1), col = c("red", "blue"), lwd = 2,bty = "n")
text(x = max(fit.score$time)/1.8, y = 0.70, label = paste("p=", (formatted_p_value)), adj = c(0, 1)) text(x = max(fit.score$time)/1.8, y = 0.70, label = paste("p=", (formatted_p_value)), adj = c(0, 1))
text(x = 10, y = 0.05, label = paste("n =", length(cohort.score)), adj = c(0, 1)) text(x = 10, y = 0.05, label = paste("n =", length(cohort.score)), adj = c(0, 1))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment