Commit 65cf9977 authored by Marco Monti's avatar Marco Monti
Browse files

I updated the scripts.

parent cad0b7de
......@@ -11,7 +11,7 @@ library("dplyr")
# wdir1510 <-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1510_RNA_miRNA_QIAseq_UPX")
# output_d <- paste0(us, "/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/reference"
input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input"
output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
################################################################################
......@@ -34,7 +34,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
#### Figure 1B ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/QIAseqUltraplexRNA_181342.xlsx"), sheet=3, col_names = T, skip=1)) #sheet: umis.genes.polyA-mouse
df1 <- as.data.frame(read_excel(paste0(input_dir, "/QIAseqUltraplexRNA_181342.xlsx"), sheet=3, col_names = T, skip=1)) # sheet: umis.genes.polyA-mouse
df1 <-df1[,-c(1,3:6)] # keep only gene name and UMI counts
df1[,-1] <-apply(df1[,-1],2,function(x){x/sum(x)*1000000}) # UMI normalized by CPM
gene_vector <- c("Cd19", "Ms4a1", "Fcer2a", "Ighm", "Cd8a", "Xcr1", "Itgae", "Itgax",
......@@ -48,14 +48,13 @@ openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1B_table.xlsx"), ro
# the df was exported and used to create a heatmap using Graphpad
#### Figure 1C ####
# "edgeR_DGE_res_volcano.pdf" (in the 'wdir1509' folder) was imported in illustrator
# and merged with the plot generated by the code below
df1 <- as.data.frame(read_excel(paste0(input_dir, "/173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) #sheet: miRNA_piRNA
df1 <-df1[,1:7] #UMI
# "edgeR_DGE_res_volcano.pdf" (in the 'wdir1509' folder) was imported in illustrator and merged with the plot generated by the code below
df1 <- as.data.frame(read_excel(paste0(input_dir, "/173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
df1 <- df1[,1:7] # UMI
rownames(df1)<-df1$miRNA
df2 <- apply(df1[,-1], 2, function(x) log(x)) #Log counts
df2 <- as.data.frame(limma::normalizeCyclicLoess(df2, weights = NULL, span=0.7, iterations = 5, method = "pairs")) #Cyclic loess normalization
colnames(df2)<-c("B cell","RPM","cDC1","cDC2","LSEC","KC") #Rename columns
df2 <- apply(df1[,-1], 2, function(x) log(x)) # Log counts
df2 <- as.data.frame(limma::normalizeCyclicLoess(df2, weights = NULL, span=0.7, iterations = 5, method = "pairs")) # Cyclic loess normalization
colnames(df2)<-c("B cell","RPM","cDC1","cDC2","LSEC","KC") # Rename columns
upper.panel<-function(x, y, ...){
points(((x+y)/2),(x-y), cex=0.6, pch=19, col="grey")
above <- (x-y-1)*((x+y)/2-5) > 5 & ((x+y)/2)>5
......@@ -63,19 +62,21 @@ upper.panel<-function(x, y, ...){
below <- (x-y+1)*((x+y)/2-5) < -5 & ((x+y)/2)>5
points(((x+y)/2)[below],(x-y)[below], col="blue", cex=0.6, pch=19)
} # function for MA plot
pdf(file = paste(output_dir, "Fig_1C.pdf", sep = "/"), width=9, height=9)
pairs(df2[,1:6], lower.panel = NULL, upper.panel = upper.panel,
ylim=c(-8.5,8.5), xlim=c(2,14), cex.labels = 2) # pairwise plot
dev.off()
#### Figure 1D ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) #sheet: miRNA_piRNA
df1 <- as.data.frame(read_excel(paste0(input_dir, "/173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
df1 <- df1[-which(grepl("piR",df1[,1])),1:7] # UMI and piRNA removing
df1[,1] <- gsub("/.*","",df1[,1]) # leave only first miRNA for ambiguous entries
df1[,-1] <- apply(df1[,-1],2,function(x){x/sum(x,na.rm=T)*1000000}) #UMI normalized by CPM
df.families <- as.data.frame(read_excel("Analysis CB/miRNA_Family.xlsx",sheet=1,col_names = T))#miRNA families from miRBase
df.families <- df.families[df.families[,3]==10090,c(4,1)] #select mouse entries
df1[,-1] <- apply(df1[,-1],2,function(x){x/sum(x,na.rm=T)*1000000}) # UMI normalized by CPM
df.families <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_Family.xlsx"), sheet=1 ,col_names=T)) # miRNA families from miRBase
df.families <- df.families[df.families[,3]==10090,c(4,1)] # select mouse entries
df.families[,1] <- sub(pattern = "p.*",replacement ="p",x = df.families[,1])
df2 <- merge(df.families,df1,by=1)
df2 <- aggregate(df2[,-c(1:2)],by = df2["miR family"],FUN = sum,na.rm=T) #sum counts by family
df2 <- aggregate(df2[,-c(1:2)],by = df2["miR family"],FUN = sum,na.rm=T) # sum counts by family
gene_vector <- c("miR-150-5p","miR-25-3p/32-5p/92-3p/363-3p/367-3p","miR-142-3p.1",
"miR-17-5p/20-5p/93-5p/106-5p","miR-191-5p",
"miR-15-5p/16-5p/195-5p/322-5p/497-5p","miR-26-5p","miR-138-5p",
......@@ -83,6 +84,6 @@ gene_vector <- c("miR-150-5p","miR-25-3p/32-5p/92-3p/363-3p/367-3p","miR-142-3p.
"miR-125-5p/351-5p","miR-126-3p.1","miR-199-3p")
df3 <- subset(df2, df2$`miR family`%in% gene_vector) %>% arrange(factor(`miR family`, levels=gene_vector))
rownames(df3) <- df3[,1]
df3.scaled <- as.data.frame(t(scale(t(df3[-1])))) #Zscore normalization. Scaled only works on columns, so need to transform
df3.scaled <- as.data.frame(t(scale(t(df3[-1])))) # Zscore normalization. Scaled only works on columns, so need to transform
openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1D_table.xlsx"), rowNames=T)
#the df was exported and used to create a heatmap using Graphpad
# The df was exported and used to create a heatmap using Graphpad
......@@ -12,7 +12,7 @@ library(enrichplot)
# wdir_CB<-paste0(us, "/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/Analysis CB_v2")
# fdir <- paste0(us,"/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/reference"
input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input"
output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
################################################################################
......@@ -32,7 +32,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
# Input data files:
# - Figure 3A & 3B: "edgeR_results.xlsx" (Sheet "miR342-control" and "spongeBT-control")
# (Output from differential gene expression analysis, likely using edgeR)
# - Figure 3A & 3B: "TargetScan8.0__miR-342-3p.predicted_targets.xlsx" (Sheet 1)
# - Figure 3A & 3B: "TargetScan8.0_miR-342-3p.predicted_targets.xlsx" (Sheet 1)
# (List of predicted miR-342-3p target genes from TargetScan)
# - Figure 3C & 3D: "EdgeR_results.xlsx" (Sheet "miR342-control")
# (Same as input for Figure 3A & 3B, used for GSEA)
......@@ -44,11 +44,11 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
#### Figure 3A&B ####
#Import df
# Import df
miR_ctrl <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "miR342-control")
sponge_ctrl <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "spongeBT-control")
#Add DEG color and label for volcano plot
# Add DEG color and label for volcano plot
miR_ctrl$DEG <- "NO"
miR_ctrl$DEG[miR_ctrl$logFC > 1 & miR_ctrl$PValue < 0.05] <- "UP"
miR_ctrl$DEG[miR_ctrl$logFC < (-1) & miR_ctrl$PValue < 0.05] <- "DOWN"
......@@ -61,9 +61,9 @@ sponge_ctrl$DEG[sponge_ctrl$logFC < (-1) & sponge_ctrl$PValue < 0.05] <- "DOWN"
sponge_ctrl$DEG_label <- NA
sponge_ctrl$DEG_label[sponge_ctrl$DEG != "NO"] <- sponge_ctrl$...1[sponge_ctrl$DEG != "NO"]
#Volcano plot (left panel)
# Volcano plot (left panel)
data <- miR_ctrl
ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) +
p1 <- ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) +
geom_point() +
theme_minimal() +
geom_text_repel() +
......@@ -74,9 +74,10 @@ ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) +
scale_y_continuous(limits = c(0,20)) +
xlab(bquote(Log[2](FC))) +
ylab(bquote(-Log[10](PVal)))
ggsave(filename = paste0(output_dir, "/Volcano_plot_3A.pdf"), plot=p1, width=7, height=7)
data <- sponge_ctrl
ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) +
p2 <- ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) +
geom_point() +
theme_minimal() +
geom_text_repel() +
......@@ -87,49 +88,54 @@ ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) +
scale_y_continuous(limits = c(0,20)) +
xlab(bquote(Log[2](FC))) +
ylab(bquote(-Log[10](PVal)))
ggsave(filename = paste0(output_dir, "/Volcano_plot_3B.pdf"), plot=p2, width=7, height=7)
# Import miR-342-3p target list (from TargetScan)
miR342_targets <- read_excel(paste0(input_dir, "/TargetScan8.0__miR-342-3p.predicted_targets.xlsx"))
miR342_targets <- read_excel(paste0(input_dir, "/TargetScan8.0_miR-342-3p.predicted_targets.xlsx"))
miR342_targets <- filter(miR342_targets, miR342_targets$`Cumulative weighted context++ score`< (-0.3))
#ecdf plot (right panel)
# ecdf plot (right panel)
data <- miR_ctrl
test <-filter(data, data$...1 %in% miR342_targets$'Target gene')
test <- filter(data, data$...1 %in% miR342_targets$'Target gene')
pdf(file = paste(output_dir, "Fig_3A_dx.pdf", sep = "/"), width=7, height=7)
plot(ecdf(data$logFC), lwd = 2, do.points=F, verticals=T,
ylab = "Fraction of DEG",
xlab = bquote(Log[2](FC)),
xlim=c(-0.5,0.5),
ylim=c(0,1),
main="ECDF of gene expression: miR vs mut") +
lines(ecdf(test$logFC), col= "blue", do.points=F, lwd = 2, verticals=T) +
abline(v=0, col="black") +
abline(h=0.5, col="black") +
legend("bottomright", c("All genes","miR-342-3p targets"),
col = c("black","blue"), lwd=2, cex = 0.7)
ks.test(test$logFC, data$logFC, alternative = "g") #Kolmogorov-Smirnov test to calculate p-val of miR target distribution *not* greater than average data
main="ECDF of gene expression: miR vs mut")
lines(ecdf(test$logFC), col= "blue", do.points=F, lwd = 2, verticals=T)
abline(v=0, col="black")
abline(h=0.5, col="black")
legend("bottomright", c("All genes","miR-342-3p targets"), col = c("black","blue"), lwd=2, cex = 0.7)
ks.test(test$logFC, data$logFC, alternative = "g") # Kolmogorov-Smirnov test to calculate p-val of miR target distribution *not* greater than average data
dev.off()
data <- sponge_ctrl
test <-filter(data, data$...1 %in% miR342_targets$'Target gene')
pdf(file = paste(output_dir, "Fig_3B_dx.pdf", sep = "/"), width=7, height=7)
plot(ecdf(data$logFC), lwd = 2, do.points=F, verticals=T,
ylab = "Fraction of DEG",
xlab = bquote(Log[2](FC)),
xlim=c(-0.3,0.3),
ylim=c(0,1),
main="ECDF of gene expression: sponge vs scr") +
lines(ecdf(test$logFC), col= "red", do.points=F, lwd = 2, verticals=T) +
abline(v=0, col="black") +
abline(h=0.5, col="black") +
legend("bottomright", c("All genes","miR-342-3p targets"),
col = c("black","red"), lwd=2, cex = 0.7)
main="ECDF of gene expression: sponge vs scr")
lines(ecdf(test$logFC), col= "red", do.points=F, lwd = 2, verticals=T)
abline(v=0, col="black")
abline(h=0.5, col="black")
legend("bottomright", c("All genes","miR-342-3p targets"), col = c("black","red"), lwd=2, cex = 0.7)
ks.test(test$logFC, data$logFC, alternative = "l") # Kolmogorov-Smirnov test to calculate p-val of miR target distribution *not* less than average data
dev.off()
#### Figure 3C&D ####
# Load df
df1<- read_excel(paste0(input_dir, "/EdgeR_results.xlsx"), sheet = "miR342-control")
miDB_sig5 <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds")) #GO terms db
df1 <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "miR342-control")
miDB_sig5 <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds")) # GO terms db
#Rename pathways of interest in miDB_sig5
# Rename pathways of interest in miDB_sig5
names(miDB_sig5)[names(miDB_sig5) == "HALLMARK_OXIDATIVE_PHOSPHORYLATION"] <- "Oxydative phosphorylation"
names(miDB_sig5)[names(miDB_sig5) == "GOBP_REGULATION_OF_CHOLESTEROL_METABOLIC_PROCESS"] <- "Regulation of cholesterol metabolic process"
names(miDB_sig5)[names(miDB_sig5) == "GOBP_RESPONSE_TO_INTERLEUKIN_12"] <- "Response to IL12"
......@@ -189,20 +195,23 @@ pathways <- c("Oxydative phosphorylation",
"PGE2 response genes")
genelist <- as.data.frame(PvalResult[PvalResult$pathway %in% pathways,])
ggplot(genelist, aes(x=NES, y=reorder(pathway,NES), size=size ,color=padj)) +
p3 <- ggplot(genelist, aes(x=NES, y=reorder(pathway,NES), size=size ,color=padj)) +
geom_point() +
scale_size_area(limits=c(10,450), max_size = 15) +
scale_colour_gradient(low="red",high="blue") +
scale_colour_gradient(low="red", high="blue") +
labs(y='Pathway', x='NES')
ggsave(filename = paste0(output_dir, "/gsea_DotPlot_Fig3C.pdf"), plot=p3, width=7, height=7)
#Run GSEA with ClusterProfiler
# Run GSEA with ClusterProfiler
genelist <- data.frame(term = rep(names(miDB_sig5), sapply(miDB_sig5, length)),
gene = unlist(miDB_sig5))
df2 = sort(df2, decreasing = TRUE)
test2 <- GSEA(df2,TERM2GENE = genelist)
test2 <- GSEA(df2, TERM2GENE = genelist)
#Enrichment map (Fig.3D)
# Enrichment map (Fig.3D)
test3 <- filter(test2, ID %in% pathways)
test3 <- pairwise_termsim(test3)
emapplot(test3, min_edge = 0.01, color = "NES", layout="fr", repel =T) +
scale_fill_gradient2(name=bquote(NES),low="blue", high="red")
\ No newline at end of file
pdf(file = paste(output_dir, "emapplot_Fig.3D.pdf", sep = "/"), width=7, height=7)
emapplot(test3, min_edge = 0.01, color = "NES", layout="fr") + # repel=T
scale_fill_gradient2(name=bquote(NES), low="blue", high="red")
dev.off()
......@@ -20,13 +20,16 @@ library(cetcolor)
wdir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/results2/CB1_CB3_CB4"
wdir_CB <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/CB_scripts/results2"
plot_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/CB_scripts_final/plots and tables used in figures"
#plot_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/CB_scripts_final/plots and tables used in figures"
dirCB <-"/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/CB_scripts"
dir.create(plot_dir, showWarnings=F, recursive=T)
#dir.create(plot_dir, showWarnings=F, recursive=T)
input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input"
output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
# 16357 GO terms db. used for GSEA
sig <- readRDS("/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/reference/miDB_sig5.MLS.rds")
sig <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds"))
################################################################################
......@@ -68,8 +71,6 @@ obs1@meta.data$Sample[obs1@meta.data$orig.ident=="CB4"&obs1@meta.data$hash.ID=="
obs1@meta.data$Sample[obs1@meta.data$orig.ident=="CB4"&obs1@meta.data$hash.ID=="Mouse2"] <- "miR_02"
obs1@meta.data$Sample[obs1@meta.data$orig.ident=="CB4"&obs1@meta.data$hash.ID=="Mouse3"] <- "miR_05"
obs1<-FindClusters(obs1, graph.name="RNA_snn_h.orig.ident", resolution=4)
# Manually annotate the clusters
obs1@meta.data$Annotation.final.CB<-"Undefined"
obs1@meta.data$Annotation.final.CB[obs1@meta.data$RNA_snn_h.orig.ident_res.0.6==0]<- "Monocytes"
......@@ -98,21 +99,28 @@ obs1@meta.data$Annotation.final.CB[obs1@meta.data$RNA_snn_h.orig.ident_res.4==51
# Visualize final annotation in Umap (Fig.5A)
obs1 <- SetIdent(obs1,value="Annotation.final.CB")
pdf(file = paste(plot_dir, "CB_Annotation.final.CB_Fig.5A.pdf", sep = "/"), width=9, height=7)
pdf(file = paste(output_dir, "CB_Annotation.final.CB_Fig.5A.pdf", sep = "/"), width=9, height=7)
DimPlot(obs1, reduction="umap.harmony.orig.ident", group.by="Annotation.final.CB", label=T, repel=T)
dev.off()
# Adding a simplified annotation
obs1@meta.data$Shortlables<-obs1@meta.data$Annotation.final.CB
obs1@meta.data$Shortlables <- obs1@meta.data$Annotation.final.CB
DCs<-c("Mig cDCs","pDCs","pre DCs", "cDC1", "cDC2_MoDCs")
obs1@meta.data$Shortlables[obs1@meta.data$Annotation.final.CB%in%DCs]<- "DCs"
obs1@meta.data$Shortlables[obs1@meta.data$Annotation.final.CB%in%DCs] <- "DCs"
granu<-c("Neutrophils","Basophils")
obs1@meta.data$Shortlables[obs1@meta.data$Annotation.final.CB%in%granu]<- "Granulocytes"
obs1@meta.data$Shortlables[obs1@meta.data$Annotation.final.CB%in%granu] <- "Granulocytes"
TAMs<-c("TAMs_1", "TAMs_2", "TAMs_3", "TAMs_4", "TAMs_5")
obs1@meta.data$Shortlables[obs1@meta.data$Annotation.final.CB%in%TAMs]<- "TAMs"
obs1@meta.data$Shortlables[obs1@meta.data$Annotation.final.CB%in%TAMs] <- "TAMs"
# Export DimPlot as excel
var_list <- c("UMAPh_1", "UMAPh_2","PROP.Group","orig.ident", "Sample","RNA_snn_h.orig.ident_res.0.6", "mOrange_union","Annotation.final.CB", "Shortlables")
obs1_df <- FetchData(obs1, vars = var_list)
obs1_df$cell_ID <- rownames(obs1_df)
write.xlsx(obs1_df, file=paste0(output_dir, "/CB_Annotation.final.CB_Fig.5A.xlsx"), overwrite=T)
# Dotplot with markers for population (Suppl.Fig.5A)
obs1 <- SetIdent(obs1,value="Shortlables")
obs1 <- SetIdent(obs1, value="Shortlables")
obs1$Shortlables <- factor(x = obs1$Shortlables,
levels = c("Cancer cells","Cholangiocytes",
"Hepatocytes", "LSECs", "KC-LSEC doublet",
......@@ -130,7 +138,7 @@ gene <- c("Vil1", "Cdx2","Cldn4","Cldn7","Epcam",
"Cd209a","Itgax", "Flt3","H2-Aa","Xcr1", "Clec9a",
"Cd19", "Cd22", "Cd79a", "Ms4a1", "Pax5",
"Cd3e", "Cd4", "Cd8a", "Ncr1", "Gzmb")
pdf(file = paste(plot_dir, "CB_DotPlot_Shortlables_Fig.S5A.pdf", sep="/"), width=12, height=6)
pdf(file = paste(output_dir, "CB_DotPlot_Shortlables_Fig.S5A.pdf", sep="/"), width=12, height=6)
DotPlot(obs1,features = unique(gene), cols = "RdBu") +
theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1))
dev.off()
......@@ -141,8 +149,8 @@ dev.off()
cells_labels <- table(obs1@meta.data$Annotation.final.CB, obs1@meta.data$Sample)
cells_labels_p <- round(prop.table(table(obs1@meta.data$Annotation.final.CB, obs1@meta.data$Sample), 2) * 100, 1)
write.csv(cells_labels, paste0(plot_dir, "/number_cells_sample_Annotation.final.CB_Fig.S5B.csv"), row.names=T)
write.csv(cells_labels_p, paste0(plot_dir, "/number_cells_sample_Annotation.final.CB_percentage_Fig.S5B.csv"), row.names=T)
write.csv(cells_labels, paste0(output_dir, "/number_cells_sample_Annotation.final.CB_Fig.S5B.csv"), row.names=T)
write.csv(cells_labels_p, paste0(output_dir, "/number_cells_sample_Annotation.final.CB_percentage_Fig.S5B.csv"), row.names=T)
#### Analysis of mOrange+ fraction (Fig.5B, Suppl.Fig.5C,D) ####
......@@ -156,8 +164,8 @@ obs2 <- subset(obs1, subset = mOrange_union == "mOrange+")
cells_labels_orange <- table(obs2@meta.data$Annotation.final.CB, obs2@meta.data$Sample)
cells_labels_orange_p <- round(prop.table(table(obs2@meta.data$Annotation.final.CB, obs2@meta.data$Sample), 2) * 100, 1)
write.csv(cells_labels_orange, paste0(plot_dir, "/number_cells_sample_Annotation.final.CB_mOrange_fraction_Fig.S5C.csv"), row.names=T)
write.csv(cells_labels_orange_p, paste0(plot_dir, "/number_cells_sample_Annotation.final.CB_percentage_mOrange_fraction_Fig.S5C.csv"), row.names=T)
write.csv(cells_labels_orange, paste0(output_dir, "/number_cells_sample_Annotation.final.CB_mOrange_fraction_Fig.S5C.csv"), row.names=T)
write.csv(cells_labels_orange_p, paste0(output_dir, "/number_cells_sample_Annotation.final.CB_percentage_mOrange_fraction_Fig.S5C.csv"), row.names=T)
# DimPlot with overlayed mOrange density (Fig.5B)
......@@ -165,7 +173,7 @@ data <- Embeddings(object = obs2[["umap.harmony.orig.ident"]])[(colnames(obs2)),
data <- as.data.frame(data)
obs1 <- SetIdent(obs1, value="cells")
pdf(file = paste(plot_dir, "UMAP_mOrange_cells_Fig.5B.pdf", sep = "/"), width=9, height=7)
pdf(file = paste(output_dir, "UMAP_mOrange_cells_Fig.5B.pdf", sep = "/"), width=9, height=7)
DimPlot(obs1, reduction = "umap.harmony.orig.ident", pt.size = 0.1, cols = "azure3") +
stat_density_2d(aes_string(x = "UMAPh_1", y = "UMAPh_2", fill = "after_stat(level)"), data=data,
linewidth = 0.4, geom = "density_2d_filled",
......@@ -183,9 +191,9 @@ obs3 <- subset(obs1, subset = Shortlables %in% selected_pop)
# Dotplot of Slc7a11 expression by cluster and experimental group (Fig.5C, groups divided in Illustrator)
obs3 <- SetIdent(obs3, value="Shortlables")
obs3$Shortlables <- factor(x = obs3$Shortlables, levels = c("LSECs", "Cancer cells", "Monocytes", "TAMs", "KCs", "DCs"))
pdf(file = paste(plot_dir, "CB_DotPlot_Shortlables_Slc7a11_Fig.5C.pdf", sep = "/"), width=9, height=5)
pdf(file = paste(output_dir, "CB_DotPlot_Shortlables_Slc7a11_Fig.5C.pdf", sep = "/"), width=9, height=5)
DotPlot(obs3, features = "Slc7a11", dot.scale=20, cols = "RdBu", split.by = "PROP.Group") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) +
coord_flip()
dev.off()
......@@ -203,7 +211,7 @@ for(i in selected_pop){
addWorksheet(simpl_DEG, name)
writeData(simpl_DEG, name, markers, rowNames = T)
}
saveWorkbook(simpl_DEG, file = paste0(plot_dir, "/DEG_simpl_Shortlables.xlsx"), overwrite = T)
saveWorkbook(simpl_DEG, file = paste0(output_dir, "/DEG_simpl_Shortlables.xlsx"), overwrite = T)
# Run GSEA for each population
GSEA_simpl <- data.frame(matrix(nrow = 0, ncol = 9))
......@@ -217,7 +225,7 @@ for(i in selected_pop){
colnames(GSEA_simpl) <- colnames(test)
GSEA_simpl <- rbind(GSEA_simpl, test)
}
write.xlsx(GSEA_simpl, file = paste0(plot_dir, "/GSEA_simpl_Shortlables.xlsx"))
write.xlsx(GSEA_simpl, file = paste0(output_dir, "/GSEA_simpl_Shortlables.xlsx"))
# Plot barplot for selected gene sets (Fig.5D, names and colors edited on illustrator)
gene_sets <- c("HALLMARK_TNFA_SIGNALING_VIA_NFKB",
......@@ -262,7 +270,7 @@ gene_sets <- gsub(x=gene_sets, pattern="_", replacement=" ")
# Reorder the terms
plot$pathway <- factor(plot$pathway, levels = gene_sets)
write.xlsx(plot, file = paste0(plot_dir, "/GSEA_plotted.xlsx"))
write.xlsx(plot, file = paste0(output_dir, "/GSEA_plotted.xlsx"))
# Plot the GSEA results (Fig5D)
p <- ggplot(data=plot, aes(x=NES, y=pathway, fill=log10(pval))) +
......@@ -278,12 +286,13 @@ p <- ggplot(data=plot, aes(x=NES, y=pathway, fill=log10(pval))) +
axis.text.y = element_text(colour = "black"))+
geom_vline(xintercept=0)+
facet_wrap(~cluster, ncol = length(unique(plot$cluster)))
ggsave(filename = paste0(plot_dir, "/GSEA_Fig5D.pdf"), plot=p, width=12, height=7)
ggsave(filename = paste0(output_dir, "/GSEA_Fig5D.pdf"), plot=p, width=12, height=7)
# Create signature file with only signature cytokines for the more stringent GSEA in Fig.5E
sig.cyto <- sig[15061:16357]
#sig.cyto <- sig[15061:16357]
sig.cyto <- readRDS(paste0(input_dir, "/miDB_sig5.MLS_cytokines.rds"))
# Define the populations we want to use for this GSEA
selected_pop2 <- c("B cells","cDC1","cDC2_MoDCs","KCs","Macrophages","Mig cDCs","Monocytes","Neutrophils","pDCs","T and NK cells", "TAMs_1","TAMs_2","TAMs_3","TAMs_4","TAMs_5")
......@@ -301,7 +310,7 @@ for(i in selected_pop2){
addWorksheet(simpl_DEG2, name)
writeData(simpl_DEG2, name, markers, rowNames = T)
}
saveWorkbook(simpl_DEG2, file = paste0(plot_dir, "/DEG_Annotation.final.CB.xlsx"), overwrite = T)
saveWorkbook(simpl_DEG2, file = paste0(output_dir, "/DEG_Annotation.final.CB.xlsx"), overwrite = T)
# Repeat GSEA for cytokines signatures only
......@@ -316,7 +325,7 @@ for(i in selected_pop2){
colnames(GSEA_cyto) <- colnames(test)
GSEA_cyto <- rbind(GSEA_cyto,test)
}
write.xlsx(GSEA_cyto, file = paste0(plot_dir, "/GSEA_cyto.xlsx"))
write.xlsx(GSEA_cyto, file = paste0(output_dir, "/GSEA_cyto.xlsx"))
# Subset the GSEA_cyto to match the cytokine signature calculated in the populations with the same populations that we have in our scRNAseq.
# This is necessary because each cytokine was tested and the effect in each population was saved as a different signature.
......@@ -363,4 +372,4 @@ pheatmap(mat.df, scale = "none", display_numbers = mat.label,
clustering_method = "ward.D2",
cutree_rows = 2, cutree_cols = 3,
col=myColor, breaks = myBreaks, na_col = "grey",
filename = paste0(plot_dir, "/Heatmap_cytokine_sig_Fig.5E_new.pdf"), cellwidth=10, cellheight=10) # width=12, height=7
filename = paste0(output_dir, "/Heatmap_cytokine_sig_Fig.5E.pdf"), cellwidth=10, cellheight=10) # width=12, height=7
library(data.table)
library(survival)
library(dplyr)
library(babelgene)
library(openxlsx)
library(readxl)
library(survminer)
library(gridExtra)
library(ggplot2)
library(survival)
library(survminer)
#library(babelgene)
##Load dataset
# Set directories
wdir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts"
setwd(wdir)
input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/reference"
input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input"
output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
......@@ -47,37 +47,37 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
# and minimum patient number (n > 100 for HR plot). These thresholds can be adjusted within the script.
################################################################################
metaD<-data.frame(fread(paste0(input_dir, "/TCGA_phenotype_denseDataOnlyDownload.tsv.gz"), sep='\t', colClasses=c("character"), data.table=FALSE))
Surv01<-data.frame(fread(paste0(input_dir, "/Survival_SupplementalTable_S1_20171025_xena_sp"), sep='\t', colClasses=c("character"), data.table=FALSE))
# FPKM01<-fread("tcga_RSEM_Hugo_norm_count")
FPKM01<-fread(paste0(input_dir, "/pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz"))
FPKM01<-as.data.frame(FPKM01)
# Annot<-data.frame(fread(paste0(input_dir, "/probeMap_gencode.v23.annotation.gene.probemap"), sep='\t',colClasses=c("character"),data.table=FALSE))
# Load dataset
metaD <- data.frame(fread(paste0(input_dir, "/TCGA_phenotype_denseDataOnlyDownload.tsv.gz"), sep='\t', colClasses=c("character"), data.table=FALSE))
Surv01 <- data.frame(fread(paste0(input_dir, "/Survival_SupplementalTable_S1_20171025_xena_sp"), sep='\t', colClasses=c("character"), data.table=FALSE))
# FPKM01 <- fread(paste0(input_dir, "/tcga_RSEM_Hugo_norm_count"))
FPKM01 <- fread(paste0(input_dir, "/pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz"))
FPKM01 <- as.data.frame(FPKM01)
# Annot <- data.frame(fread(paste0(input_dir, "/probeMap_gencode.v23.annotation.gene.probemap"), sep='\t',colClasses=c("character"),data.table=FALSE))
allGenes <- FPKM01[,1]
#######################################################################
#####PLOT HR Score of signature for multiple tumor types###############
#####################################################################
humanGenes<-list(c("hsa-miR-342-3p"))
#####PLOT HR Score of signature for multiple tumor types#############
#####################################################################
humanGenes <- list(c("hsa-miR-342-3p"))
####create xlsx file with genes, patients and tumor types
# Create xlsx file with genes, patients and tumor types
Pan.data3 <- metaD[,]
Pan.data4 <- merge(Surv01,Pan.data3,by=1)
Genes_selected <- FPKM01[FPKM01[,1]%in%humanGenes,]
rownames(Genes_selected)<-Genes_selected[,1]
Genes_selected<-Genes_selected[,-1]
Genes_selected <- Genes_selected[,-1]
Genes_selected <- data.frame(t(Genes_selected))
Genes_selected$sample<-rownames(Genes_selected)
Pan.data5 <- merge(Genes_selected,Pan.data4,by.y="sample")
Pan.data5 <- merge(Genes_selected, Pan.data4, by.y="sample")
colnames(Pan.data5)
write.xlsx(Pan.data5, paste0(ourdir, "/miR342_patients_survival.xlsx"))
write.xlsx(Pan.data5, paste0(output_dir, "/miR342_patients_survival.xlsx"))
#############Regression with defined k2
k2<-0.5 #define quartile for selecting low/high score
# Regression with defined k2
k2 <- 0.5 # define quartile for selecting low/high score
df2 <- NULL
for (i in unique(Pan.data5$cancer.type.abbreviation)){
Pan.data6 <- Pan.data5[Pan.data5$cancer.type.abbreviation%in%i,]
......@@ -86,16 +86,16 @@ for (i in unique(Pan.data5$cancer.type.abbreviation)){
cohort.score<-rep(0,nrow(P.OS))
M1 <- quantile(Pan.data6$hsa.miR.342.3p,k2)
cohort.score[Pan.data6$hsa.miR.342.3p>=M1]<-1
a1<-coxph(P.OS ~ as.numeric(cohort.score))
a1 <- coxph(P.OS ~ as.numeric(cohort.score))
df1 <- data.frame(Tumor = unique(Pan.data6$cancer.type.abbreviation),npatients=length(Pan.data6$X_PATIENT), HR = summary(a1)$coefficients[2], SE=summary(a1)$coefficients[3],
p = summary(a1)$coefficients[5] )
df2 <- rbind(df2,df1)
df2<-df2[(order(df2$p)),]
df2 <- df2[(order(df2$p)),]
}else{}}
write.xlsx(df2, paste0(output_dir, "/miR342_HR_by_tumortype.xlsx"))
###Filters selected tumors
# Filters selected tumors
df3<-df2
df3<-df3[df3$p<0.1,]
df3<-df3[df3$npatients>100,]
......@@ -113,13 +113,12 @@ a<-ggplot(df3, aes(x = reorder(Tumor, HR), y = HR, color = significance, label =
ggsave(filename = paste0(output_dir, "/HR_alltumors_342.pdf"), plot=a, width=9, height=4)
####Plot survival of chosen tumors
###Select type of tumor
df3<-df3[order(df3$HR),]
df3<-df3[df3$p<0.05,]
# Plot survival of chosen tumors
# Select type of tumor
df3 <- df3[order(df3$HR),]
df3 <- df3[df3$p<0.05,]
#pdf(paste0(output_dir, "/Survival_***_342"), width=10,height = 6)
pdf(paste0(output_dir, "/Survival_in_chosen_tumors_342.pdf"), width=10 ,height=6)
par(mfrow = c(2,4))
survival<-list()
for (i in df3$Tumor){
......@@ -129,7 +128,7 @@ for (i in df3$Tumor){
cohort.score<-rep(0,nrow(P.OS))
M1 <- quantile(Pan.data6$hsa.miR.342.3p,k2)
cohort.score[Pan.data6$hsa.miR.342.3p>=M1]<-1
##plotting survival
# Plotting survival
fit.score <- survfit(P.OS ~ cohort.score)
cox_model <- coxph(P.OS ~ as.numeric(cohort.score))
p_value <- summary(cox_model)$coefficients["as.numeric(cohort.score)", "Pr(>|z|)"]
......@@ -143,11 +142,10 @@ for (i in df3$Tumor){
text(x = 10, y = 0.05, label = paste("n =", length(cohort.score)), adj = c(0, 1))
survival[[i]] <- recordPlot()
}
# dev.off()
dev.off()
###Plot survival of metastastic/primary cancers
# Plot survival of metastastic/primary cancers
#k2<-0.5
Pan.data7 <- Pan.data5[Pan.data5$sample_type == "Metastatic",]
#Pan.data7 <- Pan.data5[Pan.data5$sample_type == "Primary Tumor",]
......@@ -162,10 +160,10 @@ formatted_p_value <- ifelse(p_value < 0.0001, "<0.0001", as.numeric(round(p_val
par(las = 0)
pdf(paste0(output_dir, "/Survival_metastatic_342.pdf"), width=5,height =5)
pdf(paste0(output_dir, "/Survival_metastatic_342.pdf"), width=5, height=5)
plot(fit.score, lty = c(1, 1), col = c("blue", "red"), xlab = "Time (d)", ylab = "Overall Survival", lwd = 2, bty = "n",
main = "Metastatic tumors")
#Add legend
# Add legend
legend(x = max(fit.score$time)/2, y = 1, legend = c("miRNA high", "miRNA low"), lty = c(1, 1), col = c("red", "blue"), lwd = 2,bty = "n")
text(x = max(fit.score$time)/1.8, y = 0.70, label = paste("p=", (formatted_p_value)), adj = c(0, 1))
text(x = 10, y = 0.05, label = paste("n =", length(cohort.score)), adj = c(0, 1))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment