Added script and raw data.

412555ba · Stefano Beretta · 7e39cec6 · 412555ba · 412555ba · 412555ba
Commit 412555ba authored Jul 26, 2023 by Stefano Beretta
--- a/Nanostring_analysis.R
+++ b/Nanostring_analysis.R
+# NanoString Analisys
+library(dplyr)
+library(psych)
+library(ggplot2)
+library(ggpubr)
+
+wdir <- "~/Downloads/tmp/squadrito_livertumor2022_nanostring"
+
+rawdata <- paste(wdir, "Raw_data", sep = "/")
+plots <- paste(wdir, "Plots", sep = "/")
+dir.create(plots, showWarnings = F)
+
+# Leggi tutti i file CSV nella cartella e concatenali
+setwd(rawdata)
+all_data <- NULL
+for (file_name in list.files(pattern = "*.RCC")) {
+    data <- read.csv(file_name, skip = 26)
+    data <- data[1:(nrow(data)-3),]
+    data$file <- file_name
+    data$patient <- substr(file_name,1,5)
+    data$Type_sample <-substr(file_name,7,7)
+    all_data <- rbind(all_data, data)
+}
+
+# TAKING ALL TOGETHER
+A_data <- reshape(
+    data = all_data,
+    direction = "wide",
+    idvar = "Name",
+    timevar = "file",
+    v.names = "Count",
+    drop = c("Type_sample","patient"))
+
+# Normalize quantiles
+df_q <- A_data[rowSums(is.na(A_data))==0 & rowSums(A_data>4)>20,]
+rbind(dim(A_data),dim(df_q))
+df_q <- rbind(df_q,A_data[481,])#add IL10 data, because of importance
+df_q[,4:ncol(df_q)] <- limma::normalizeQuantiles(df_q[,4:ncol(df_q)])
+df_q[,4:ncol(df_q)] <- log10(df_q[,4:ncol(df_q)])
+
+# Split in 2, according to Tumor (T) o peritumor (P) areas
+typeS <- paste("type" , substr(colnames(df_q),13,13),sep=".")
+df_qT <- df_q[,typeS=="type.T"]
+rownames(df_qT) <- df_q[,2]
+df_qP <- df_q[,typeS=="type.P"]
+rownames(df_qP) <- df_q[,2]
+df_qA <- df_q[,4:ncol(df_q)]
+rownames(df_qA) <- df_q[,2]
+
+# Differential expression
+Sigs <- readxl::read_excel(paste(wdir, "Signatures_Table5S.xlsx", sep = "/"))
+IFNa_genes<-intersect(df_q[,2],Sigs[[1]][-1])
+TR1_genes<- intersect(df_q[,2],c(Sigs[[2]][2:17],"IL10RA"))
+HLA_genes <- grep(pattern="HLA",df_q[,2],value = T)
+MS_genes <- intersect(df_q[,2], c("EPCAM","CDH1","SOCS1","STAT1",
+                                  "NLRC5","TGFB1","CD86","TAP1","TAP2",
+                                  "CD3G","CD8A","EOMES","PDCD1","GZMK",
+                                  "CD40","CD74","TREM2","LAG3","IL10RA",
+                                  "HAVCR2","CTLA4","CASP3","CD69","ITGAE","ITGA1",
+                                  "CD7","IL2","TNF","IL10"))
+
+#Figures: P vs T
+#Diff gene expression P vs T, selected genes
+#Reshape wide to long matrix with expression values
+df_qA2 <- reshape(df_qA, 
+                  direction = "long",
+                  v.names="value",
+                  varying = names(df_qA),
+                  times= names(df_qA),
+                  timevar = "file",
+                  idvar="genes",
+                  ids = rownames(df_qA))
+df_qA2$patient <- substr(df_qA2$file,7,11)
+df_qA2$type <- substr(df_qA2$file,13,13)
+
+## Plots
+setwd(plots)
+
+# Preparing_wNormByGene
+GiveSigs <-  function(x1,x2){
+    nor.genes <- x1[x2,]
+    mean1 <- apply(nor.genes,1,geometric.mean)
+    df1<- as.data.frame(t(nor.genes))
+    mean2<- as.data.frame(t(mapply(`/`, df1,mean1)))
+    result1<- apply(mean2,2,geometric.mean)
+    names(result1) <- names(x1)
+    return(result1)}
+IFNa_Score.P <-GiveSigs(df_qP,IFNa_genes)
+TR1_Score.P <- GiveSigs(df_qP,TR1_genes)
+IFNa_Score.T <- GiveSigs(df_qT,IFNa_genes)
+TR1_Score.T <- GiveSigs(df_qT,TR1_genes)
+
+# Add IFNa and TR1 scores to df_qA
+df_qT2 <- df_qT
+colnames(df_qT2) <- substr(colnames(df_qT2), 7, 11)
+df_qT3 <- data.frame(t(df_qT2))
+df_qT3$type <- "T"
+df_qT3$patient <- rownames(df_qT3)
+df_qT3$TR1_Score <- TR1_Score.T
+df_qT3$IFNa_Score <- IFNa_Score.T
+
+df_qP2 <- df_qP
+colnames(df_qP2) <- substr(colnames(df_qP2), 7, 11)
+df_qP3 <- data.frame(t(df_qP2))
+df_qP3$type <- "P"
+df_qP3$patient <- rownames(df_qP3)
+df_qP3$TR1_Score <- TR1_Score.P
+df_qP3$IFNa_Score <- IFNa_Score.P
+df_qAC <- rbind(df_qP3, df_qT3)
+
+# Figure: Correlation TR1score and IFNascore
+pdf(file = "Plot_TR1score_vs_IFNascore.pdf",width = 4, height = 4)
+ggplot(df_qAC, aes(x=IFNa_Score, y=TR1_Score,color=type,fill=type))+
+    geom_point()+
+    geom_smooth(method=lm, se=T, fullrange=TRUE)+
+    stat_cor(method = "spearman")+
+    labs(title = "TR1score vs IFNascore")+
+    xlab(label="IFNa signature score")+
+    ylab(label="Tr1 signature score")
+dev.off()
+
+df.nano <- data.frame(rownames(df_qAC), df_qAC$IFNa_Score, df_qAC$TR1_Score, df_qAC$type)
+openxlsx::write.xlsx(df.nano,"Plot_TR1score_vs_IFNascore.xlsx", rowNames = F)	
+
+
+# Figure: Correlation CTLA4 and IFNascore
+pdf(file = "Plot_CTLA4_vs_IFNascore.pdf",width = 4, height = 4)
+ggplot(df_qAC, aes(x=IFNa_Score, y=CTLA4,color=type,fill=type))+
+    geom_point()+
+    geom_smooth(method=lm, se=T, fullrange=TRUE)+
+    stat_cor(method = "spearman")+
+    labs(title = "CTLA4 vs IFNascore")+
+    xlab(label = "IFNa signature score")+
+    ylab(label = "Log10 CTLA4 expression")
+dev.off()
+
+df.nano <- data.frame(rownames(df_qAC), df_qAC$IFNa_Score, df_qAC$CTLA4, df_qAC$type)
+openxlsx::write.xlsx(df.nano, "Plot_CTLA4_vs_IFNascore.xlsx", rowNames = F)
+
+# Figure: Correlation HLA-C and IFNascore
+pdf(file = "Plot_HLA-C_vs_IFNascore.pdf",width = 4, height = 4)
+ggplot(df_qAC, aes(x=IFNa_Score, y=HLA.C,color=type,fill=type))+
+    geom_point()+
+    geom_smooth(method=lm, se=T, fullrange=TRUE)+
+    stat_cor(method = "spearman")+
+    labs(title = "HLA-C vs IFNascore")+
+    xlab(label = "IFNa signature score")+
+    ylab(label = "Log10 HLA-C expression")
+dev.off()
+
+df.nano <- data.frame(rownames(df_qAC), df_qAC$IFNa_Score, df_qAC$HLA.C, df_qAC$type)
+openxlsx::write.xlsx(df.nano, "Plot_HLA-C vs IFNascore.xlsx", rowNames = F)	
+
+#Correlation all Genes vs IFNa
+x1 <- df_qA
+genesTotest <- subset(rownames(x1), !(rownames(x1)%in%c(IFNa_genes)))
+tt1 <- x1[IFNa_genes,]
+dfx <- data.frame()
+for (i in genesTotest){
+    AllCor <- apply(tt1,1,function(x) cor(x,as.numeric(x1[i,]),method="spearman"))
+    TR1corr<- data.frame(value=AllCor,tested.Gene=rep(i,nrow(tt1)))
+    TR1corr$Type <- "IFNa_genes"
+    TR1corr$Median1 <- median(AllCor)
+    dfx <- rbind(TR1corr,dfx)
+}
+
+dfx <- dfx[rev(order(dfx$Median1)),]
+dfx2 <- dfx[1:(length(IFNa_genes)*100),]
+dfx2 <- dfx2[order(dfx2$Median1, decreasing = T), ]
+dfx2 <- head(dfx2, n = 20*16)
+dfx2$tested.Gene <- factor(dfx2$tested.Gene, levels = unique(dfx2$tested.Gene))
+
+pdf(file = "Plot_Top.corr.IFNaGenes.All.pdf",width = 12, height = 4)
+ggplot(dfx2, aes(x = tested.Gene, y = value)) + 
+    geom_boxplot() + geom_dotplot(binaxis='y', stackdir='center',
+                                  position=position_dodge(0.75),dotsize = 0.5) +
+    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
+    labs(title =  "Correlation against IFNa Genes")+
+    xlab(label="Top genes")+
+    ylab(label="rho coefficient")
+dev.off()
+
+openxlsx::write.xlsx(dfx2, "Plot_Top.corr.IFNaGenes.All.xlsx", rowNames = T)	
--- a/Raw_data/.Rhistory
+++ b/Raw_data/.Rhistory
--- a/Raw_data/CRB02_P.RCC
+++ b/Raw_data/CRB02_P.RCC
--- a/Raw_data/CRB03_P.RCC
+++ b/Raw_data/CRB03_P.RCC
--- a/Raw_data/CRB03_T.RCC
+++ b/Raw_data/CRB03_T.RCC
--- a/Raw_data/CRB04_P.RCC
+++ b/Raw_data/CRB04_P.RCC
--- a/Raw_data/CRB04_T.RCC
+++ b/Raw_data/CRB04_T.RCC
--- a/Raw_data/CRB05_P.RCC
+++ b/Raw_data/CRB05_P.RCC
--- a/Raw_data/CRB05_T.RCC
+++ b/Raw_data/CRB05_T.RCC
--- a/Raw_data/CRB07_P.RCC
+++ b/Raw_data/CRB07_P.RCC
--- a/Raw_data/CRB07_T.RCC
+++ b/Raw_data/CRB07_T.RCC
--- a/Raw_data/CRB08_P.RCC
+++ b/Raw_data/CRB08_P.RCC
--- a/Raw_data/CRB08_T.RCC
+++ b/Raw_data/CRB08_T.RCC
--- a/Raw_data/CRB10_P.RCC
+++ b/Raw_data/CRB10_P.RCC
--- a/Raw_data/CRB10_T.RCC
+++ b/Raw_data/CRB10_T.RCC
--- a/Raw_data/CRB16_P.RCC
+++ b/Raw_data/CRB16_P.RCC
--- a/Raw_data/CRB16_T.RCC
+++ b/Raw_data/CRB16_T.RCC
--- a/Raw_data/CRB21_P.RCC
+++ b/Raw_data/CRB21_P.RCC
--- a/Raw_data/CRB21_T.RCC
+++ b/Raw_data/CRB21_T.RCC
--- a/Raw_data/CRB28_P.RCC
+++ b/Raw_data/CRB28_P.RCC