Commit 35c9bcca authored by Marco Monti's avatar Marco Monti
Browse files

I updated the scripts.

parent 65cf9977
library("readxl")
library("dplyr") library("dplyr")
library("openxlsx")
library("readxl")
library("stringr")
library("ggplot2")
#####Directories##### #####Directories#####
# us <-"C:/Users/bresesti.chiara/"
# wdir1509<-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1509_RNASeq_QIAseq_UPX/QIAseqUltraplexRNA_181342/primary_analysis")
# wdir1510<-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1510_RNA_miRNA_QIAseq_UPX")
# fdir <- paste0(us, "/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
#
# input_d <-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1509_RNASeq_QIAseq_UPX/QIAseqUltraplexRNA_181342/primary_analysis")
# wdir1510 <-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1510_RNA_miRNA_QIAseq_UPX")
# output_d <- paste0(us, "/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input" input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input"
output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output" output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
...@@ -22,19 +16,21 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq ...@@ -22,19 +16,21 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
# #
# Figures generated/data produced: # Figures generated/data produced:
# - Figure 1B: Heatmap data for selected marker genes (Excel table) # - Figure 1B: Heatmap data for selected marker genes (Excel table)
# - Figure 1C: Pairwise MA plots for miRNA data # - Figure 1C.sx: Pairwise volcano plots for RNAseq data
# - Figure 1C.dx: Pairwise MA plots for miRNA data
# - Figure 1D: Heatmap data for selected miRNA families (Excel table) # - Figure 1D: Heatmap data for selected miRNA families (Excel table)
# #
# Input data files: # Input data files:
# - Figure 1B: "QIAseqUltraplexRNA_181342.xlsx" (Sheet 3: umis.genes.polyA-mouse) # - Figure 1B: "miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx" (Sheet 3: umis.genes.polyA-mouse)
# - Figure 1C & 1D: "173308.all_samples.summary.xlsx" (Sheet 2: miRNA_piRNA) # - Figure 1C.sx: "miRNA_QIAseq_1509_181342_edgeR_results.xlsx"
# - Figure 1C.dx & 1D: "miRNA_QIAseq_1510_173308.all_samples.summary.xlsx" (Sheet 2: miRNA_piRNA)
# - Figure 1D: "miRNA_Family.xlsx" (Sheet 1) # - Figure 1D: "miRNA_Family.xlsx" (Sheet 1)
# #
################################################################################ ################################################################################
#### Figure 1B #### #### Figure 1B ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/QIAseqUltraplexRNA_181342.xlsx"), sheet=3, col_names = T, skip=1)) # sheet: umis.genes.polyA-mouse df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx"), sheet=3, col_names = T, skip=1)) # sheet: umis.genes.polyA-mouse
df1 <-df1[,-c(1,3:6)] # keep only gene name and UMI counts df1 <-df1[,-c(1,3:6)] # keep only gene name and UMI counts
df1[,-1] <-apply(df1[,-1],2,function(x){x/sum(x)*1000000}) # UMI normalized by CPM df1[,-1] <-apply(df1[,-1],2,function(x){x/sum(x)*1000000}) # UMI normalized by CPM
gene_vector <- c("Cd19", "Ms4a1", "Fcer2a", "Ighm", "Cd8a", "Xcr1", "Itgae", "Itgax", gene_vector <- c("Cd19", "Ms4a1", "Fcer2a", "Ighm", "Cd8a", "Xcr1", "Itgae", "Itgax",
...@@ -47,9 +43,85 @@ df2.scaled <- as.data.frame(t(scale(t(df2[-1])))) # Zscore normalization. Scaled ...@@ -47,9 +43,85 @@ df2.scaled <- as.data.frame(t(scale(t(df2[-1])))) # Zscore normalization. Scaled
openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1B_table.xlsx"), rowNames=T) openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1B_table.xlsx"), rowNames=T)
# the df was exported and used to create a heatmap using Graphpad # the df was exported and used to create a heatmap using Graphpad
#### Figure 1C ####
# "edgeR_DGE_res_volcano.pdf" (in the 'wdir1509' folder) was imported in illustrator and merged with the plot generated by the code below #### Figure 1C.sx ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
dge_res <- list()
ff <- paste0(input_dir, "/miRNA_QIAseq_1509_181342_edgeR_results.xlsx")
for (contr in getSheetNames(ff)) {
dge_res[[contr]] <- read.xlsx(ff, rowNames = T, sheet = contr)
}
sample_order <- c("RPM", "cDC1", "cDC2", "B220+", "LSEC", "KC")
# Plot DGE volcano
diff_genes_union <- c()
diff_genes_inter <- c()
pva_thr <- 0.001
logfc_thr <- 2
for (contr in names(dge_res)) {
gg <- row.names(subset(dge_res[[contr]], PValue < pva_thr & abs(logFC) > logfc_thr))
diff_genes_union <- union(diff_genes_union, gg)
diff_genes_inter <- intersect(diff_genes_inter, gg)
}
length(diff_genes_union)
length(diff_genes_inter)
full.t <- data.frame()
for (contr in names(dge_res)) {
df <- dge_res[[contr]]
df$Sample1 <- str_split_fixed(contr, "-", 2)[,1]
df$Sample2 <- str_split_fixed(contr, "-", 2)[,2]
full.t <- rbind(full.t, df)
}
head(full.t)
# Volcano plot
data <- full.t
data$pval <- -log10(data$PValue)
data <- mutate(data, color = case_when(data$logFC > logfc_thr & data$pval > -log10(pva_thr) ~ "Overexpressed",
data$logFC < -logfc_thr & data$pval > -log10(pva_thr) ~ "Underexpressed",
abs(data$logFC) < logfc_thr | data$pval < -log10(pva_thr) ~ "NonSignificant"))
data$Sample1 <- factor(data$Sample1, levels = sample_order)
data$Sample2 <- factor(data$Sample2, levels = sample_order)
num_genes <- data.frame()
for (contr in names(dge_res)) {
s1 <- str_split_fixed(contr, "-", 2)[,1]
s2 <- str_split_fixed(contr, "-", 2)[,2]
dd <- subset(data, Sample1 == s1 & Sample2 == s2 & PValue < pva_thr)
n_up <- nrow(subset(dd, logFC > logfc_thr))
n_down <- nrow(subset(dd, logFC < -logfc_thr))
print(nrow(dd))
df <- data.frame("Sample1" = s1,
"Sample2" = s2,
"label" = paste(paste0("Down: ", n_down), paste0("Up: ", n_up), sep = " "))
num_genes <- rbind(num_genes, df)
}
num_genes$Sample1 <- factor(num_genes$Sample1, levels = sample_order)
num_genes$Sample2 <- factor(num_genes$Sample2, levels = sample_order)
vol <- ggplot(data, aes(x = logFC, y = pval, colour = color)) +
theme_bw(base_size = 12) +
theme(legend.position = "none") +
xlab("log2 Fold Change") +
ggtitle(paste0("DGE results: pval < ", pva_thr, " & abs(logFC) > ", logfc_thr)) +
ylab(expression(-log[10]("adjusted p-value"))) +
geom_hline(yintercept = -log10(pva_thr), colour = "darkgray") +
geom_vline(xintercept = logfc_thr, colour = "darkgray") +
geom_vline(xintercept = -logfc_thr, colour = "darkgray") +
geom_point(size = 2, alpha = 0.8, na.rm = T) +
scale_color_manual(name = "Expression",
values = c(Overexpressed = "red", # #CD4F39
Underexpressed = "blue", # #008B00
NonSignificant = "darkgray")) +
geom_text(data = num_genes, mapping = aes(x = -1, y = -3, label = label, color = "black")) +
facet_grid(Sample1 ~ Sample2, scales = "free")
ggsave(filename = paste(output_dir, "edgeR_DGE_res_volcano_Fig.3C.sx.pdf", sep = "/"), plot = vol, width = 15, height = 16)
# "edgeR_DGE_res_volcano_Fig.3C.sx.pdf" was imported in illustrator and merged with the plot generated by the code below
#### Figure 1C.dx ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
df1 <- df1[,1:7] # UMI df1 <- df1[,1:7] # UMI
rownames(df1)<-df1$miRNA rownames(df1)<-df1$miRNA
df2 <- apply(df1[,-1], 2, function(x) log(x)) # Log counts df2 <- apply(df1[,-1], 2, function(x) log(x)) # Log counts
...@@ -62,21 +134,21 @@ upper.panel<-function(x, y, ...){ ...@@ -62,21 +134,21 @@ upper.panel<-function(x, y, ...){
below <- (x-y+1)*((x+y)/2-5) < -5 & ((x+y)/2)>5 below <- (x-y+1)*((x+y)/2-5) < -5 & ((x+y)/2)>5
points(((x+y)/2)[below],(x-y)[below], col="blue", cex=0.6, pch=19) points(((x+y)/2)[below],(x-y)[below], col="blue", cex=0.6, pch=19)
} # function for MA plot } # function for MA plot
pdf(file = paste(output_dir, "Fig_1C.pdf", sep = "/"), width=9, height=9) pdf(file = paste(output_dir, "MA_plots_miRNA_Fig_1C.dx.pdf", sep = "/"), width=9, height=9)
pairs(df2[,1:6], lower.panel = NULL, upper.panel = upper.panel, pairs(df2[,1:6], lower.panel = NULL, upper.panel = upper.panel,
ylim=c(-8.5,8.5), xlim=c(2,14), cex.labels = 2) # pairwise plot ylim=c(-8.5,8.5), xlim=c(2,14), cex.labels = 2) # pairwise plot
dev.off() dev.off()
#### Figure 1D #### #### Figure 1D ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
df1 <- df1[-which(grepl("piR",df1[,1])),1:7] # UMI and piRNA removing df1 <- df1[-which(grepl("piR",df1[,1])),1:7] # UMI and piRNA removing
df1[,1] <- gsub("/.*","",df1[,1]) # leave only first miRNA for ambiguous entries df1[,1] <- gsub("/.*","",df1[,1]) # leave only first miRNA for ambiguous entries
df1[,-1] <- apply(df1[,-1],2,function(x){x/sum(x,na.rm=T)*1000000}) # UMI normalized by CPM df1[,-1] <- apply(df1[,-1],2,function(x){x/sum(x, na.rm=T)*1000000}) # UMI normalized by CPM
df.families <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_Family.xlsx"), sheet=1 ,col_names=T)) # miRNA families from miRBase df.families <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_Family.xlsx"), sheet=1 ,col_names=T)) # miRNA families from miRBase
df.families <- df.families[df.families[,3]==10090,c(4,1)] # select mouse entries df.families <- df.families[df.families[,3]==10090,c(4,1)] # select mouse entries
df.families[,1] <- sub(pattern = "p.*",replacement ="p",x = df.families[,1]) df.families[,1] <- sub(pattern = "p.*", replacement="p", x = df.families[,1])
df2 <- merge(df.families,df1,by=1) df2 <- merge(df.families,df1,by=1)
df2 <- aggregate(df2[,-c(1:2)],by = df2["miR family"],FUN = sum,na.rm=T) # sum counts by family df2 <- aggregate(df2[,-c(1:2)], by = df2["miR family"], FUN=sum, na.rm=T) # sum counts by family
gene_vector <- c("miR-150-5p","miR-25-3p/32-5p/92-3p/363-3p/367-3p","miR-142-3p.1", gene_vector <- c("miR-150-5p","miR-25-3p/32-5p/92-3p/363-3p/367-3p","miR-142-3p.1",
"miR-17-5p/20-5p/93-5p/106-5p","miR-191-5p", "miR-17-5p/20-5p/93-5p/106-5p","miR-191-5p",
"miR-15-5p/16-5p/195-5p/322-5p/497-5p","miR-26-5p","miR-138-5p", "miR-15-5p/16-5p/195-5p/322-5p/497-5p","miR-26-5p","miR-138-5p",
......
...@@ -7,11 +7,6 @@ library(clusterProfiler) ...@@ -7,11 +7,6 @@ library(clusterProfiler)
library(enrichplot) library(enrichplot)
##### Directories ##### ##### Directories #####
# us <-"C:/Users/bresesti.chiara/"
# wdir<-paste0(us,"/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/05-DGE-NoOut-Corr")
# wdir_CB<-paste0(us, "/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/Analysis CB_v2")
# fdir <- paste0(us,"/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input" input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input"
output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output" output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
...@@ -30,11 +25,10 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq ...@@ -30,11 +25,10 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
# - Figure 3D: Enrichment map visualizing relationships between enriched pathways. # - Figure 3D: Enrichment map visualizing relationships between enriched pathways.
# #
# Input data files: # Input data files:
# - Figure 3A & 3B: "edgeR_results.xlsx" (Sheet "miR342-control" and "spongeBT-control") # - Figure 3A & 3B: "RNAseq_90-857433247_edgeR_results.xlsx" (Sheet "miR342-control" and "spongeBT-control") (Output of DGE)
# (Output from differential gene expression analysis, likely using edgeR) # - Figure 3A & 3B: "TargetScan8.0_miR-342-3p.predicted_targets.xlsx"
# - Figure 3A & 3B: "TargetScan8.0_miR-342-3p.predicted_targets.xlsx" (Sheet 1)
# (List of predicted miR-342-3p target genes from TargetScan) # (List of predicted miR-342-3p target genes from TargetScan)
# - Figure 3C & 3D: "EdgeR_results.xlsx" (Sheet "miR342-control") # - Figure 3C & 3D: "RNAseq_90-857433247_edgeR_results.xlsx" (Sheet "miR342-control")
# (Same as input for Figure 3A & 3B, used for GSEA) # (Same as input for Figure 3A & 3B, used for GSEA)
# - Figure 3C & 3D: "miDB_sig5.MLS.rds" (RDS file containing gene sets for gene set enrichment analysis) # - Figure 3C & 3D: "miDB_sig5.MLS.rds" (RDS file containing gene sets for gene set enrichment analysis)
# #
...@@ -45,8 +39,8 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq ...@@ -45,8 +39,8 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
#### Figure 3A&B #### #### Figure 3A&B ####
# Import df # Import df
miR_ctrl <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "miR342-control") miR_ctrl <- read_excel(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results.xlsx"), sheet = "miR342-control")
sponge_ctrl <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "spongeBT-control") sponge_ctrl <- read_excel(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results.xlsx"), sheet = "spongeBT-control")
# Add DEG color and label for volcano plot # Add DEG color and label for volcano plot
miR_ctrl$DEG <- "NO" miR_ctrl$DEG <- "NO"
...@@ -132,7 +126,7 @@ dev.off() ...@@ -132,7 +126,7 @@ dev.off()
#### Figure 3C&D #### #### Figure 3C&D ####
# Load df # Load df
df1 <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "miR342-control") df1 <- read_excel(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results.xlsx"), sheet = "miR342-control")
miDB_sig5 <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds")) # GO terms db miDB_sig5 <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds")) # GO terms db
# Rename pathways of interest in miDB_sig5 # Rename pathways of interest in miDB_sig5
......
...@@ -9,29 +9,10 @@ library(scales) ...@@ -9,29 +9,10 @@ library(scales)
library(RColorBrewer) library(RColorBrewer)
library(cetcolor) library(cetcolor)
##### Directories #####
#username <- "/Users/Squadrito/"
#username <- "/Users/bresesti.chiara/"
#wdir <- paste0(username, "Dropbox (HSR Global)/90-935462466_scRNAseq_Bresesti/Analysis_MM/results2/CB1_CB3_CB4")
#wdir_CB <- paste0(username, "Dropbox (HSR Global)/90-935462466_scRNAseq_Bresesti/CB_scripts/results2")
#plot_dir <- paste0(username,"/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures") # fdir previously
#dirCB <-paste0(username,"Dropbox (HSR Global)/90-935462466_scRNAseq_Bresesti/CB_scripts")
#sig <- readRDS(paste0(wdir, "/miDB_sig5.MLS.rds")) #GO terms db
wdir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/results2/CB1_CB3_CB4"
wdir_CB <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/CB_scripts/results2"
#plot_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/CB_scripts_final/plots and tables used in figures"
dirCB <-"/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/CB_scripts"
#dir.create(plot_dir, showWarnings=F, recursive=T)
input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input" input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input"
output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output" output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
# 16357 GO terms db. used for GSEA
sig <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds"))
################################################################################ ################################################################################
# R script for single-cell RNA-seq data analysis and figure generation. # R script for single-cell RNA-seq data analysis and figure generation.
# #
...@@ -54,10 +35,13 @@ sig <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds")) ...@@ -54,10 +35,13 @@ sig <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds"))
# #
################################################################################ ################################################################################
set.seed(42) set.seed(42)
obs1 <- readRDS(paste0(wdir, "/CB1_CB3_CB4_final.rds")) # 16357 GO terms db. used for GSEA
sig <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds"))
# Load the scRNAseq dataset
obs1 <- readRDS(paste0(input_dir, "/CB1_CB3_CB4_final.rds"))
# Annotate each cell with a sample label based on its original identity (library) and hash ID # Annotate each cell with a sample label based on its original identity (library) and hash ID
obs1@meta.data$Sample <- "Undefined" obs1@meta.data$Sample <- "Undefined"
...@@ -153,7 +137,7 @@ write.csv(cells_labels, paste0(output_dir, "/number_cells_sample_Annotation.fina ...@@ -153,7 +137,7 @@ write.csv(cells_labels, paste0(output_dir, "/number_cells_sample_Annotation.fina
write.csv(cells_labels_p, paste0(output_dir, "/number_cells_sample_Annotation.final.CB_percentage_Fig.S5B.csv"), row.names=T) write.csv(cells_labels_p, paste0(output_dir, "/number_cells_sample_Annotation.final.CB_percentage_Fig.S5B.csv"), row.names=T)
#### Analysis of mOrange+ fraction (Fig.5B, Suppl.Fig.5C,D) #### #### Analysis of mOrange+ fraction (Fig.5B, Fig.S5C, Fig.S5D) ####
# The metadata indicating mOrange positivity are: positive_cells (generated with the mapping), mOrange_label (generated with CellRanger), mOrange_union (union of the previous two labels) # The metadata indicating mOrange positivity are: positive_cells (generated with the mapping), mOrange_label (generated with CellRanger), mOrange_union (union of the previous two labels)
...@@ -291,8 +275,8 @@ ggsave(filename = paste0(output_dir, "/GSEA_Fig5D.pdf"), plot=p, width=12, heigh ...@@ -291,8 +275,8 @@ ggsave(filename = paste0(output_dir, "/GSEA_Fig5D.pdf"), plot=p, width=12, heigh
# Create signature file with only signature cytokines for the more stringent GSEA in Fig.5E # Create signature file with only signature cytokines for the more stringent GSEA in Fig.5E
#sig.cyto <- sig[15061:16357] sig.cyto <- sig[15061:16357]
sig.cyto <- readRDS(paste0(input_dir, "/miDB_sig5.MLS_cytokines.rds")) #sig.cyto <- readRDS(paste0(input_dir, "/miDB_sig5.MLS_cytokines.rds"))
# Define the populations we want to use for this GSEA # Define the populations we want to use for this GSEA
selected_pop2 <- c("B cells","cDC1","cDC2_MoDCs","KCs","Macrophages","Mig cDCs","Monocytes","Neutrophils","pDCs","T and NK cells", "TAMs_1","TAMs_2","TAMs_3","TAMs_4","TAMs_5") selected_pop2 <- c("B cells","cDC1","cDC2_MoDCs","KCs","Macrophages","Mig cDCs","Monocytes","Neutrophils","pDCs","T and NK cells", "TAMs_1","TAMs_2","TAMs_3","TAMs_4","TAMs_5")
......
...@@ -21,13 +21,20 @@ GEO: ...@@ -21,13 +21,20 @@ GEO:
- CB2025_figure_3_RNAseq.R - CB2025_figure_3_RNAseq.R
- CB2025_figure_5_scRNAseq.R - CB2025_figure_5_scRNAseq.R
- TCGA_analysis.R - TCGA_analysis.R
- data: results of the analyses - Output: results of the analyses
- plots - Input: input files required to generate the figures
- tables - miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx: UMI and gene count data from RNA-seq (Figure 1B)
- reference - miRNA_QIAseq_1509_181342_edgeR_results.xlsx: Summary data for miRNA and piRNA (Figure 1C.dx & 1D)
- regev_lab_cell_cycle_mouse.rds: contains the genes involved in the cell cycle in mouse - miRNA_QIAseq_1510_173308.all_samples.summary.xlsx: Differential expression analysis results (Figure 1C.sx)
- miDB_sig5.MLS.rds: reference files for GSEA analysis - miRNA_Family.xlsx: miRNA family information (Figure 1D)
- miDB_sig5.MLS_cytokines.rds: reference files for GSEA analysis - miDB_sig5.MLS.rds: reference files for GSEA analysis (Figure 3 & 5)
- RNAseq_90-857433247_edgeR_results.xlsx: Differential gene expression analysis for miR-342 vs. control and spongeBT vs. control (Figure 3A & 3B & 3C & 3D)
- TargetScan8.0_miR-342-3p.predicted_targets.xlsx: Predicted target genes for miR-342-3p from TargetScan (Figure 3A & 3B)
- CB1_CB3_CB4_final.rds: Seurat object containing scRNA-seq data (Figure 5 & S5)
- TCGA_phenotype.tsv.gz: TCGA patient phenotype data (metadata) with clinical and demographic information (TCGA Analysis)
- Survival_SupplementalTable_S1_20171025_xena_sp: TCGA patient survival data, providing overall survival (OS) status and time (TCGA Analysis)
- pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz: TCGA pancancer miRNA expression data (FPKM values) across different tumor samples (TCGA Analysis)
## scRNAseq analysis ## scRNAseq analysis
The initial preprocessing of the data, including mapping against the _Mus musculus_ GRCm38 reference genome and gene counting, was done using the 10x Genomics Cell Ranger Software (v7.2.0) using default parameters. The resulting data were imported into R and analyzed with the Seurat package (v5.0.1). The initial preprocessing of the data, including mapping against the _Mus musculus_ GRCm38 reference genome and gene counting, was done using the 10x Genomics Cell Ranger Software (v7.2.0) using default parameters. The resulting data were imported into R and analyzed with the Seurat package (v5.0.1).
......
...@@ -8,10 +8,7 @@ library(survival) ...@@ -8,10 +8,7 @@ library(survival)
library(survminer) library(survminer)
#library(babelgene) #library(babelgene)
# Set directories ##### Directories #####
wdir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts"
setwd(wdir)
input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input" input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input"
output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output" output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
...@@ -35,7 +32,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq ...@@ -35,7 +32,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
# in this patient subgroup. # in this patient subgroup.
# #
# Input data files: # Input data files:
# - TCGA_phenotype_denseDataOnlyDownload.tsv.gz: TCGA patient phenotype data (metadata), # - TCGA_phenotype.tsv.gz: TCGA patient phenotype data (metadata),
# downloaded from TCGA or Xena, contains clinical and demographic information. # downloaded from TCGA or Xena, contains clinical and demographic information.
# - Survival_SupplementalTable_S1_20171025_xena_sp: TCGA patient survival data, provides overall survival (OS) time and status. # - Survival_SupplementalTable_S1_20171025_xena_sp: TCGA patient survival data, provides overall survival (OS) time and status.
# - pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz: # - pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz:
...@@ -48,7 +45,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq ...@@ -48,7 +45,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
################################################################################ ################################################################################
# Load dataset # Load dataset
metaD <- data.frame(fread(paste0(input_dir, "/TCGA_phenotype_denseDataOnlyDownload.tsv.gz"), sep='\t', colClasses=c("character"), data.table=FALSE)) metaD <- data.frame(fread(paste0(input_dir, "/TCGA_phenotype.tsv.gz"), sep='\t', colClasses=c("character"), data.table=FALSE))
Surv01 <- data.frame(fread(paste0(input_dir, "/Survival_SupplementalTable_S1_20171025_xena_sp"), sep='\t', colClasses=c("character"), data.table=FALSE)) Surv01 <- data.frame(fread(paste0(input_dir, "/Survival_SupplementalTable_S1_20171025_xena_sp"), sep='\t', colClasses=c("character"), data.table=FALSE))
# FPKM01 <- fread(paste0(input_dir, "/tcga_RSEM_Hugo_norm_count")) # FPKM01 <- fread(paste0(input_dir, "/tcga_RSEM_Hugo_norm_count"))
FPKM01 <- fread(paste0(input_dir, "/pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz")) FPKM01 <- fread(paste0(input_dir, "/pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz"))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment