I updated the scripts.

35c9bcca · Marco Monti · 65cf9977 · 35c9bcca · 35c9bcca · 35c9bcca
Commit 35c9bcca authored Mar 27, 2025 by Marco Monti
--- a/CB2025_figure_1_RNAseq.R
+++ b/CB2025_figure_1_RNAseq.R
-library("readxl")
 library("dplyr")
+library("openxlsx")
+library("readxl")
+library("stringr")
+library("ggplot2")
 #####Directories#####
-# us <-"C:/Users/bresesti.chiara/"
-# wdir1509<-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1509_RNASeq_QIAseq_UPX/QIAseqUltraplexRNA_181342/primary_analysis")
-# wdir1510<-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1510_RNA_miRNA_QIAseq_UPX")
-# fdir <- paste0(us, "/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
-# 
-# input_d <-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1509_RNASeq_QIAseq_UPX/QIAseqUltraplexRNA_181342/primary_analysis")
-# wdir1510 <-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1510_RNA_miRNA_QIAseq_UPX")
-# output_d <- paste0(us, "/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
 input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input"
 output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
@@ -22,19 +16,21 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
 #
 # Figures generated/data produced:
 #   - Figure 1B: Heatmap data for selected marker genes (Excel table)
-#   - Figure 1C: Pairwise MA plots for miRNA data
+#   - Figure 1C.sx: Pairwise volcano plots for RNAseq data
+#   - Figure 1C.dx: Pairwise MA plots for miRNA data
 #   - Figure 1D: Heatmap data for selected miRNA families (Excel table)
 #
 # Input data files:
-#   - Figure 1B: "QIAseqUltraplexRNA_181342.xlsx" (Sheet 3: umis.genes.polyA-mouse)
+#   - Figure 1B: "miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx" (Sheet 3: umis.genes.polyA-mouse)
-#   - Figure 1C & 1D: "173308.all_samples.summary.xlsx" (Sheet 2: miRNA_piRNA)
+#   - Figure 1C.sx: "miRNA_QIAseq_1509_181342_edgeR_results.xlsx"
+#   - Figure 1C.dx & 1D: "miRNA_QIAseq_1510_173308.all_samples.summary.xlsx" (Sheet 2: miRNA_piRNA)
 #   - Figure 1D: "miRNA_Family.xlsx" (Sheet 1)
 #
 ################################################################################
 #### Figure 1B ####
-df1 <- as.data.frame(read_excel(paste0(input_dir, "/QIAseqUltraplexRNA_181342.xlsx"), sheet=3, col_names = T, skip=1)) # sheet: umis.genes.polyA-mouse
+df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx"), sheet=3, col_names = T, skip=1)) # sheet: umis.genes.polyA-mouse
 df1 <-df1[,-c(1,3:6)] # keep only gene name and UMI counts
 df1[,-1] <-apply(df1[,-1],2,function(x){x/sum(x)*1000000}) # UMI normalized by CPM
 gene_vector <- c("Cd19", "Ms4a1", "Fcer2a", "Ighm", "Cd8a", "Xcr1", "Itgae", "Itgax", 
@@ -47,9 +43,85 @@ df2.scaled <- as.data.frame(t(scale(t(df2[-1])))) # Zscore normalization. Scaled
 openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1B_table.xlsx"), rowNames=T)
 # the df was exported and used to create a heatmap using Graphpad
-#### Figure 1C ####
-# "edgeR_DGE_res_volcano.pdf" (in the 'wdir1509' folder) was imported in illustrator and merged with the plot generated by the code below
+#### Figure 1C.sx ####
-df1 <- as.data.frame(read_excel(paste0(input_dir, "/173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
+dge_res <- list()
+ff <- paste0(input_dir, "/miRNA_QIAseq_1509_181342_edgeR_results.xlsx")
+for (contr in getSheetNames(ff)) {
+  dge_res[[contr]] <- read.xlsx(ff, rowNames = T, sheet = contr)
+}
+sample_order <- c("RPM", "cDC1", "cDC2", "B220+", "LSEC", "KC")
+# Plot DGE volcano
+diff_genes_union <- c()
+diff_genes_inter <- c()
+pva_thr <- 0.001
+logfc_thr <- 2
+for (contr in names(dge_res)) {
+  gg <- row.names(subset(dge_res[[contr]], PValue < pva_thr & abs(logFC) > logfc_thr))
+  diff_genes_union <- union(diff_genes_union, gg)
+  diff_genes_inter <- intersect(diff_genes_inter, gg)
+}
+length(diff_genes_union)
+length(diff_genes_inter)
+full.t <- data.frame()
+for (contr in names(dge_res)) {
+  df <- dge_res[[contr]]
+  df$Sample1 <- str_split_fixed(contr, "-", 2)[,1]
+  df$Sample2 <- str_split_fixed(contr, "-", 2)[,2]
+  full.t <- rbind(full.t, df)
+}
+head(full.t)
+# Volcano plot
+data <- full.t
+data$pval <- -log10(data$PValue)
+data <- mutate(data, color = case_when(data$logFC > logfc_thr & data$pval > -log10(pva_thr) ~ "Overexpressed",
+                                       data$logFC < -logfc_thr & data$pval > -log10(pva_thr) ~ "Underexpressed",
+                                       abs(data$logFC) < logfc_thr | data$pval < -log10(pva_thr) ~ "NonSignificant"))
+data$Sample1 <- factor(data$Sample1, levels = sample_order)
+data$Sample2 <- factor(data$Sample2, levels = sample_order)
+num_genes <- data.frame()
+for (contr in names(dge_res)) {
+  s1 <- str_split_fixed(contr, "-", 2)[,1]
+  s2 <- str_split_fixed(contr, "-", 2)[,2]
+  dd <- subset(data, Sample1 == s1 & Sample2 == s2 & PValue < pva_thr)
+  n_up <- nrow(subset(dd, logFC > logfc_thr))
+  n_down <- nrow(subset(dd, logFC < -logfc_thr))
+  print(nrow(dd))
+  df <- data.frame("Sample1" = s1,
+                   "Sample2" = s2,
+                   "label" = paste(paste0("Down: ", n_down), paste0("Up: ", n_up), sep = "               "))
+  num_genes <- rbind(num_genes, df)
+}
+num_genes$Sample1 <- factor(num_genes$Sample1, levels = sample_order)
+num_genes$Sample2 <- factor(num_genes$Sample2, levels = sample_order)
+vol <- ggplot(data, aes(x = logFC, y = pval, colour = color)) +
+  theme_bw(base_size = 12) +
+  theme(legend.position = "none") +
+  xlab("log2 Fold Change") +
+  ggtitle(paste0("DGE results: pval < ", pva_thr, " & abs(logFC) > ", logfc_thr)) +
+  ylab(expression(-log[10]("adjusted p-value"))) +
+  geom_hline(yintercept = -log10(pva_thr), colour = "darkgray") +
+  geom_vline(xintercept = logfc_thr, colour = "darkgray") +
+  geom_vline(xintercept = -logfc_thr, colour = "darkgray") +
+  geom_point(size = 2, alpha = 0.8, na.rm = T) +
+  scale_color_manual(name = "Expression",
+                     values = c(Overexpressed = "red", # #CD4F39
+                                Underexpressed = "blue", # #008B00
+                                NonSignificant = "darkgray")) +
+  geom_text(data = num_genes, mapping = aes(x = -1, y = -3, label = label, color = "black")) +
+  facet_grid(Sample1 ~ Sample2, scales = "free")
+ggsave(filename = paste(output_dir, "edgeR_DGE_res_volcano_Fig.3C.sx.pdf", sep = "/"), plot = vol, width = 15, height = 16)
+# "edgeR_DGE_res_volcano_Fig.3C.sx.pdf" was imported in illustrator and merged with the plot generated by the code below
+#### Figure 1C.dx ####
+df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
 df1 <- df1[,1:7] # UMI 
 rownames(df1)<-df1$miRNA
 df2 <- apply(df1[,-1], 2, function(x) log(x)) # Log counts
@@ -62,21 +134,21 @@ upper.panel<-function(x, y, ...){
  below <- (x-y+1)*((x+y)/2-5) < -5 & ((x+y)/2)>5
  points(((x+y)/2)[below],(x-y)[below], col="blue", cex=0.6, pch=19)
 } # function for MA plot
-pdf(file = paste(output_dir, "Fig_1C.pdf", sep = "/"), width=9, height=9)
+pdf(file = paste(output_dir, "MA_plots_miRNA_Fig_1C.dx.pdf", sep = "/"), width=9, height=9)
 pairs(df2[,1:6], lower.panel = NULL, upper.panel = upper.panel, 
      ylim=c(-8.5,8.5), xlim=c(2,14), cex.labels = 2) # pairwise plot
 dev.off()
 #### Figure 1D ####
-df1 <- as.data.frame(read_excel(paste0(input_dir, "/173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
+df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
 df1 <- df1[-which(grepl("piR",df1[,1])),1:7] # UMI and piRNA removing
 df1[,1] <- gsub("/.*","",df1[,1]) # leave only first miRNA for ambiguous entries
-df1[,-1] <- apply(df1[,-1],2,function(x){x/sum(x,na.rm=T)*1000000}) # UMI normalized by CPM
+df1[,-1] <- apply(df1[,-1],2,function(x){x/sum(x, na.rm=T)*1000000}) # UMI normalized by CPM
 df.families <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_Family.xlsx"), sheet=1 ,col_names=T)) # miRNA families from miRBase
 df.families <- df.families[df.families[,3]==10090,c(4,1)] # select mouse entries
-df.families[,1] <- sub(pattern = "p.*",replacement ="p",x = df.families[,1])
+df.families[,1] <- sub(pattern = "p.*", replacement="p", x = df.families[,1])
 df2 <- merge(df.families,df1,by=1)
-df2 <- aggregate(df2[,-c(1:2)],by = df2["miR family"],FUN = sum,na.rm=T) # sum counts by family
+df2 <- aggregate(df2[,-c(1:2)], by = df2["miR family"], FUN=sum, na.rm=T) # sum counts by family
 gene_vector <- c("miR-150-5p","miR-25-3p/32-5p/92-3p/363-3p/367-3p","miR-142-3p.1",
                 "miR-17-5p/20-5p/93-5p/106-5p","miR-191-5p",
                 "miR-15-5p/16-5p/195-5p/322-5p/497-5p","miR-26-5p","miR-138-5p",

--- a/CB2025_figure_3_RNAseq.R
+++ b/CB2025_figure_3_RNAseq.R
@@ -7,11 +7,6 @@ library(clusterProfiler)
 library(enrichplot)
 ##### Directories #####
-# us <-"C:/Users/bresesti.chiara/"
-# wdir<-paste0(us,"/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/05-DGE-NoOut-Corr")
-# wdir_CB<-paste0(us, "/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/Analysis CB_v2")
-# fdir <- paste0(us,"/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
 input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input"
 output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
@@ -30,11 +25,10 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
 # - Figure 3D: Enrichment map visualizing relationships between enriched pathways.
 #
 # Input data files:
-# - Figure 3A & 3B: "edgeR_results.xlsx" (Sheet "miR342-control" and "spongeBT-control")
+# - Figure 3A & 3B: "RNAseq_90-857433247_edgeR_results.xlsx" (Sheet "miR342-control" and "spongeBT-control") (Output of DGE)
-# (Output from differential gene expression analysis, likely using edgeR)
+# - Figure 3A & 3B: "TargetScan8.0_miR-342-3p.predicted_targets.xlsx"
-# - Figure 3A & 3B: "TargetScan8.0_miR-342-3p.predicted_targets.xlsx" (Sheet 1)
 # (List of predicted miR-342-3p target genes from TargetScan)
-# - Figure 3C & 3D: "EdgeR_results.xlsx" (Sheet "miR342-control")
+# - Figure 3C & 3D: "RNAseq_90-857433247_edgeR_results.xlsx" (Sheet "miR342-control")
 # (Same as input for Figure 3A & 3B, used for GSEA)
 # - Figure 3C & 3D: "miDB_sig5.MLS.rds" (RDS file containing gene sets for gene set enrichment analysis)
 #
@@ -45,8 +39,8 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
 #### Figure 3A&B ####
 # Import df
-miR_ctrl <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "miR342-control")
+miR_ctrl <- read_excel(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results.xlsx"), sheet = "miR342-control")
-sponge_ctrl <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "spongeBT-control")
+sponge_ctrl <- read_excel(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results.xlsx"), sheet = "spongeBT-control")
 # Add DEG color and label for volcano plot
 miR_ctrl$DEG <- "NO"
@@ -132,7 +126,7 @@ dev.off()
 #### Figure 3C&D ####
 # Load df
-df1 <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "miR342-control")
+df1 <- read_excel(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results.xlsx"), sheet = "miR342-control")
 miDB_sig5 <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds")) # GO terms db
 # Rename pathways of interest in miDB_sig5

--- a/CB2025_figure_5_scRNAseq.R
+++ b/CB2025_figure_5_scRNAseq.R
@@ -9,29 +9,10 @@ library(scales)
 library(RColorBrewer)
 library(cetcolor)
+##### Directories #####
-#username <- "/Users/Squadrito/"
-#username <- "/Users/bresesti.chiara/"
-#wdir <- paste0(username, "Dropbox (HSR Global)/90-935462466_scRNAseq_Bresesti/Analysis_MM/results2/CB1_CB3_CB4")
-#wdir_CB <- paste0(username, "Dropbox (HSR Global)/90-935462466_scRNAseq_Bresesti/CB_scripts/results2")
-#plot_dir <- paste0(username,"/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures") # fdir previously
-#dirCB <-paste0(username,"Dropbox (HSR Global)/90-935462466_scRNAseq_Bresesti/CB_scripts")
-#sig <- readRDS(paste0(wdir, "/miDB_sig5.MLS.rds")) #GO terms db
-wdir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/results2/CB1_CB3_CB4"
-wdir_CB <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/CB_scripts/results2"
-#plot_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/CB_scripts_final/plots and tables used in figures"
-dirCB <-"/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/CB_scripts"
-#dir.create(plot_dir, showWarnings=F, recursive=T)
 input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input"
 output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
-# 16357 GO terms db. used for GSEA
-sig <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds"))
 ################################################################################
 # R script for single-cell RNA-seq data analysis and figure generation.
 #
@@ -54,10 +35,13 @@ sig <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds"))
 #
 ################################################################################
 set.seed(42)
-obs1 <- readRDS(paste0(wdir, "/CB1_CB3_CB4_final.rds"))
+# 16357 GO terms db. used for GSEA
+sig <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds"))
+# Load the scRNAseq dataset
+obs1 <- readRDS(paste0(input_dir, "/CB1_CB3_CB4_final.rds"))
 # Annotate each cell with a sample label based on its original identity (library) and hash ID
 obs1@meta.data$Sample <- "Undefined"
@@ -153,7 +137,7 @@ write.csv(cells_labels, paste0(output_dir, "/number_cells_sample_Annotation.fina
 write.csv(cells_labels_p, paste0(output_dir, "/number_cells_sample_Annotation.final.CB_percentage_Fig.S5B.csv"), row.names=T)
-#### Analysis of mOrange+ fraction (Fig.5B, Suppl.Fig.5C,D) ####
+#### Analysis of mOrange+ fraction (Fig.5B, Fig.S5C, Fig.S5D) ####
 # The metadata indicating mOrange positivity are: positive_cells (generated with the mapping), mOrange_label (generated with CellRanger), mOrange_union (union of the previous two labels)
@@ -291,8 +275,8 @@ ggsave(filename = paste0(output_dir, "/GSEA_Fig5D.pdf"), plot=p, width=12, heigh
 # Create signature file with only signature cytokines for the more stringent GSEA in Fig.5E
-#sig.cyto <- sig[15061:16357]
+sig.cyto <- sig[15061:16357]
-sig.cyto <- readRDS(paste0(input_dir, "/miDB_sig5.MLS_cytokines.rds"))
+#sig.cyto <- readRDS(paste0(input_dir, "/miDB_sig5.MLS_cytokines.rds"))
 # Define the populations we want to use for this GSEA
 selected_pop2 <- c("B cells","cDC1","cDC2_MoDCs","KCs","Macrophages","Mig cDCs","Monocytes","Neutrophils","pDCs","T and NK cells", "TAMs_1","TAMs_2","TAMs_3","TAMs_4","TAMs_5")

--- a/README.md
+++ b/README.md
@@ -21,13 +21,20 @@ GEO:
    - CB2025_figure_3_RNAseq.R
    - CB2025_figure_5_scRNAseq.R
    - TCGA_analysis.R
- data: results of the analyses
+- Output: results of the analyses
-    - plots
+- Input: input files required to generate the figures
-    - tables
+    - miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx: UMI and gene count data from RNA-seq (Figure 1B)
- reference
+    - miRNA_QIAseq_1509_181342_edgeR_results.xlsx: Summary data for miRNA and piRNA (Figure 1C.dx & 1D)
-    - regev_lab_cell_cycle_mouse.rds: contains the genes involved in the cell cycle in mouse
+    - miRNA_QIAseq_1510_173308.all_samples.summary.xlsx: Differential expression analysis results (Figure 1C.sx)
-    - miDB_sig5.MLS.rds: reference files for GSEA analysis
+    - miRNA_Family.xlsx: miRNA family information (Figure 1D)
-    - miDB_sig5.MLS_cytokines.rds: reference files for GSEA analysis
+    - miDB_sig5.MLS.rds: reference files for GSEA analysis (Figure 3 & 5)
+    - RNAseq_90-857433247_edgeR_results.xlsx: Differential gene expression analysis for miR-342 vs. control and spongeBT vs. control (Figure 3A & 3B & 3C & 3D)
+    - TargetScan8.0_miR-342-3p.predicted_targets.xlsx: Predicted target genes for miR-342-3p from TargetScan (Figure 3A & 3B)
+    - CB1_CB3_CB4_final.rds: Seurat object containing scRNA-seq data (Figure 5 & S5)
+    - TCGA_phenotype.tsv.gz: TCGA patient phenotype data (metadata) with clinical and demographic information (TCGA Analysis)
+    - Survival_SupplementalTable_S1_20171025_xena_sp: TCGA patient survival data, providing overall survival (OS) status and time (TCGA Analysis)
+    - pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz: TCGA pancancer miRNA expression data (FPKM values) across different tumor samples (TCGA Analysis)
 ## scRNAseq analysis
 The initial preprocessing of the data, including mapping against the _Mus musculus_ GRCm38 reference genome and gene counting, was done using the 10x Genomics Cell Ranger Software (v7.2.0) using default parameters. The resulting data were imported into R and analyzed with the Seurat package (v5.0.1).

--- a/TCGA_analysis.R
+++ b/TCGA_analysis.R
@@ -8,10 +8,7 @@ library(survival)
 library(survminer)
 #library(babelgene)
-# Set directories
+##### Directories #####
-wdir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts"
-setwd(wdir)
 input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Input"
 output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
@@ -35,7 +32,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
 #     in this patient subgroup.
 #
 # Input data files:
-#   - TCGA_phenotype_denseDataOnlyDownload.tsv.gz: TCGA patient phenotype data (metadata),
+#   - TCGA_phenotype.tsv.gz: TCGA patient phenotype data (metadata),
 #     downloaded from TCGA or Xena, contains clinical and demographic information.
 #   - Survival_SupplementalTable_S1_20171025_xena_sp: TCGA patient survival data, provides overall survival (OS) time and status.
 #   - pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz:
@@ -48,7 +45,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
 ################################################################################
 # Load dataset
-metaD <- data.frame(fread(paste0(input_dir, "/TCGA_phenotype_denseDataOnlyDownload.tsv.gz"), sep='\t', colClasses=c("character"), data.table=FALSE))
+metaD <- data.frame(fread(paste0(input_dir, "/TCGA_phenotype.tsv.gz"), sep='\t', colClasses=c("character"), data.table=FALSE))
 Surv01 <- data.frame(fread(paste0(input_dir, "/Survival_SupplementalTable_S1_20171025_xena_sp"), sep='\t', colClasses=c("character"), data.table=FALSE))
 # FPKM01 <- fread(paste0(input_dir, "/tcga_RSEM_Hugo_norm_count"))
 FPKM01 <- fread(paste0(input_dir, "/pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz"))