library(data.table) library(survival) library(dplyr) library(babelgene) library(openxlsx) library(readxl) library(survminer) library(gridExtra) library(ggplot2) ##Load dataset #username <- "C:/Users/notaro.marco/" username <- "/Users/bresesti.chiara/" wdir<- paste0(username, "/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al Cell Reports/TCGA_analysis") setwd(wdir) metaD<-data.frame(fread("TCGA_phenotype_denseDataOnlyDownload.tsv.gz", sep='\t',colClasses=c("character"),data.table=FALSE)) Surv01<-data.frame(fread("Survival_SupplementalTable_S1_20171025_xena_sp", sep='\t',colClasses=c("character"),data.table=FALSE)) # FPKM01<-fread("tcga_RSEM_Hugo_norm_count") FPKM01<-fread("pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz") FPKM01<-as.data.frame(FPKM01) # Annot<-data.frame(fread("probeMap_gencode.v23.annotation.gene.probemap", sep='\t',colClasses=c("character"),data.table=FALSE)) allGenes <- FPKM01[,1] ####################################################################### #####PLOT HR Score of signature for multiple tumor types############### ##################################################################### humanGenes<-list(c("hsa-miR-342-3p")) ####create xlsx file with genes, patients and tumor types Pan.data3 <- metaD[,] Pan.data4 <- merge(Surv01,Pan.data3,by=1) Genes_selected <- FPKM01[FPKM01[,1]%in%humanGenes,] rownames(Genes_selected)<-Genes_selected[,1] Genes_selected<-Genes_selected[,-1] Genes_selected <- data.frame(t(Genes_selected)) Genes_selected$sample<-rownames(Genes_selected) Pan.data5 <- merge(Genes_selected,Pan.data4,by.y="sample") colnames(Pan.data5) #setwd(wdir) #write.xlsx(Pan.data5, "miR342_patients_survival.xlsx") #############Regression with defined k2 k2<-0.5 #define quartile for selecting low/high score df2 <- NULL for (i in unique(Pan.data5$cancer.type.abbreviation)){ Pan.data6 <- Pan.data5[Pan.data5$cancer.type.abbreviation%in%i,] if(nrow(Pan.data6)>2){ P.OS <- Surv(as.numeric(Pan.data6$OS.time), as.numeric(Pan.data6$OS)) cohort.score<-rep(0,nrow(P.OS)) M1 <- quantile(Pan.data6$hsa.miR.342.3p,k2) cohort.score[Pan.data6$hsa.miR.342.3p>=M1]<-1 a1<-coxph(P.OS ~ as.numeric(cohort.score)) df1 <- data.frame(Tumor = unique(Pan.data6$cancer.type.abbreviation),npatients=length(Pan.data6$X_PATIENT), HR = summary(a1)$coefficients[2], SE=summary(a1)$coefficients[3], p = summary(a1)$coefficients[5] ) df2 <- rbind(df2,df1) df2<-df2[(order(df2$p)),] }else{}} #write.xlsx(df2, "miR342_HR_by_tumortype.xlsx") ###Filters selected tumors df3<-df2 df3<-df3[df3$p<0.1,] df3<-df3[df3$npatients>100,] df3$significance <- ifelse(df3$p < 0.05, "p<0.05", "NS") a<-ggplot(df3, aes(x = reorder(Tumor, HR), y = HR, color = significance, label = significance)) + geom_point(size = 4) + geom_errorbar(aes(ymin = pmax(HR-SE, 0), ymax = HR + SE), width = 0.2) + scale_color_manual(name = "Color", values = c("red", "blue")) + labs(title = "HR by tumor type", x = "Tumor Type", y = "HR") + geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 1), fill = "lightblue", alpha = 0.01) + geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = 1, ymax = Inf), fill = "pink", alpha = 0.01)+ theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust= 1, color = "Black")) + geom_hline(yintercept = 1, linetype = "dashed", color = "gray") # pdf("HR_alltumors_342.pdf",width=9,height = 4) a # dev.off() ####Plot survival of chosen tumors ###Select type of tumor df3<-df3[order(df3$HR),] df3<-df3[df3$p<0.05,] # pdf("Survival_***_342",width=10,height = 6) par(mfrow = c(2,4)) survival<-list() for (i in df3$Tumor){ tumor.type<-i Pan.data6 <- Pan.data5[Pan.data5$cancer.type.abbreviation%in%i,] P.OS <- Surv(as.numeric(Pan.data6$OS.time), as.numeric(Pan.data6$OS)) cohort.score<-rep(0,nrow(P.OS)) M1 <- quantile(Pan.data6$hsa.miR.342.3p,k2) cohort.score[Pan.data6$hsa.miR.342.3p>=M1]<-1 ##plotting survival fit.score <- survfit(P.OS ~ cohort.score) cox_model <- coxph(P.OS ~ as.numeric(cohort.score)) p_value <- summary(cox_model)$coefficients["as.numeric(cohort.score)", "Pr(>|z|)"] formatted_p_value <- ifelse(p_value < 0.0001, "<0.0001", as.numeric(round(p_value,5))) par(las = 0) plot(fit.score, lty = c(1, 1), col = c("blue", "red"), xlab = "Time (d)", ylab = "Overall Survival", lwd = 2, bty = "n", main = paste0("Survival in ", i)) # Add legend legend(x = max(fit.score$time)/2, y = 1, legend = c("miRNA high", "miRNA low"), lty = c(1, 1), col = c("red", "blue"), lwd = 2,bty = "n") text(x = max(fit.score$time)/1.8, y = 0.70, label = paste("p=", (formatted_p_value)), adj = c(0, 1)) text(x = 10, y = 0.05, label = paste("n =", length(cohort.score)), adj = c(0, 1)) survival[[i]] <- recordPlot() } # dev.off() ###Plot survival of metastastic/primary cancers #k2<-0.5 Pan.data7 <- Pan.data5[Pan.data5$sample_type == "Metastatic",] #Pan.data7 <- Pan.data5[Pan.data5$sample_type == "Primary Tumor",] P.OS <- Surv(as.numeric(Pan.data7$OS.time), as.numeric(Pan.data7$OS)) cohort.score<-rep(0,nrow(P.OS)) M1 <- quantile(Pan.data7$hsa.miR.342.3p,k2) cohort.score[Pan.data7$hsa.miR.342.3p>M1]<-1 fit.score <- survfit(P.OS ~ cohort.score) cox_model <- coxph(P.OS ~ as.numeric(cohort.score)) p_value <- summary(cox_model)$coefficients["as.numeric(cohort.score)", "Pr(>|z|)"] formatted_p_value <- ifelse(p_value < 0.0001, "<0.0001", as.numeric(round(p_value,5))) par(las = 0) pdf("Survival_metastatic_342.pdf",width=5,height =5) plot(fit.score, lty = c(1, 1), col = c("blue", "red"), xlab = "Time (d)", ylab = "Overall Survival", lwd = 2, bty = "n", main = "Metastatic tumors") #Add legend legend(x = max(fit.score$time)/2, y = 1, legend = c("miRNA high", "miRNA low"), lty = c(1, 1), col = c("red", "blue"), lwd = 2,bty = "n") text(x = max(fit.score$time)/1.8, y = 0.70, label = paste("p=", (formatted_p_value)), adj = c(0, 1)) text(x = 10, y = 0.05, label = paste("n =", length(cohort.score)), adj = c(0, 1)) dev.off()