# NanoString Analisys library(dplyr) library(psych) library(ggplot2) library(ggpubr) wdir <- "~/Downloads/tmp/squadrito_livertumor2022_nanostring" rawdata <- paste(wdir, "Raw_data", sep = "/") plots <- paste(wdir, "Plots", sep = "/") dir.create(plots, showWarnings = F) # Leggi tutti i file CSV nella cartella e concatenali setwd(rawdata) all_data <- NULL for (file_name in list.files(pattern = "*.RCC")) { data <- read.csv(file_name, skip = 26) data <- data[1:(nrow(data)-3),] data$file <- file_name data$patient <- substr(file_name,1,5) data$Type_sample <-substr(file_name,7,7) all_data <- rbind(all_data, data) } # TAKING ALL TOGETHER A_data <- reshape( data = all_data, direction = "wide", idvar = "Name", timevar = "file", v.names = "Count", drop = c("Type_sample","patient")) # Normalize quantiles df_q <- A_data[rowSums(is.na(A_data))==0 & rowSums(A_data>4)>20,] rbind(dim(A_data),dim(df_q)) df_q <- rbind(df_q,A_data[481,])#add IL10 data, because of importance df_q[,4:ncol(df_q)] <- limma::normalizeQuantiles(df_q[,4:ncol(df_q)]) df_q[,4:ncol(df_q)] <- log10(df_q[,4:ncol(df_q)]) # Split in 2, according to Tumor (T) o peritumor (P) areas typeS <- paste("type" , substr(colnames(df_q),13,13),sep=".") df_qT <- df_q[,typeS=="type.T"] rownames(df_qT) <- df_q[,2] df_qP <- df_q[,typeS=="type.P"] rownames(df_qP) <- df_q[,2] df_qA <- df_q[,4:ncol(df_q)] rownames(df_qA) <- df_q[,2] # Differential expression Sigs <- readxl::read_excel(paste(wdir, "Signatures_Table5S.xlsx", sep = "/")) IFNa_genes<-intersect(df_q[,2],Sigs[[1]][-1]) TR1_genes<- intersect(df_q[,2],c(Sigs[[2]][2:17],"IL10RA")) HLA_genes <- grep(pattern="HLA",df_q[,2],value = T) MS_genes <- intersect(df_q[,2], c("EPCAM","CDH1","SOCS1","STAT1", "NLRC5","TGFB1","CD86","TAP1","TAP2", "CD3G","CD8A","EOMES","PDCD1","GZMK", "CD40","CD74","TREM2","LAG3","IL10RA", "HAVCR2","CTLA4","CASP3","CD69","ITGAE","ITGA1", "CD7","IL2","TNF","IL10")) #Figures: P vs T #Diff gene expression P vs T, selected genes #Reshape wide to long matrix with expression values df_qA2 <- reshape(df_qA, direction = "long", v.names="value", varying = names(df_qA), times= names(df_qA), timevar = "file", idvar="genes", ids = rownames(df_qA)) df_qA2$patient <- substr(df_qA2$file,7,11) df_qA2$type <- substr(df_qA2$file,13,13) ## Plots setwd(plots) # Preparing_wNormByGene GiveSigs <- function(x1,x2){ nor.genes <- x1[x2,] mean1 <- apply(nor.genes,1,geometric.mean) df1<- as.data.frame(t(nor.genes)) mean2<- as.data.frame(t(mapply(`/`, df1,mean1))) result1<- apply(mean2,2,geometric.mean) names(result1) <- names(x1) return(result1)} IFNa_Score.P <-GiveSigs(df_qP,IFNa_genes) TR1_Score.P <- GiveSigs(df_qP,TR1_genes) IFNa_Score.T <- GiveSigs(df_qT,IFNa_genes) TR1_Score.T <- GiveSigs(df_qT,TR1_genes) # Add IFNa and TR1 scores to df_qA df_qT2 <- df_qT colnames(df_qT2) <- substr(colnames(df_qT2), 7, 11) df_qT3 <- data.frame(t(df_qT2)) df_qT3$type <- "T" df_qT3$patient <- rownames(df_qT3) df_qT3$TR1_Score <- TR1_Score.T df_qT3$IFNa_Score <- IFNa_Score.T df_qP2 <- df_qP colnames(df_qP2) <- substr(colnames(df_qP2), 7, 11) df_qP3 <- data.frame(t(df_qP2)) df_qP3$type <- "P" df_qP3$patient <- rownames(df_qP3) df_qP3$TR1_Score <- TR1_Score.P df_qP3$IFNa_Score <- IFNa_Score.P df_qAC <- rbind(df_qP3, df_qT3) # Figure: Correlation TR1score and IFNascore pdf(file = "Plot_TR1score_vs_IFNascore.pdf",width = 4, height = 4) ggplot(df_qAC, aes(x=IFNa_Score, y=TR1_Score,color=type,fill=type))+ geom_point()+ geom_smooth(method=lm, se=T, fullrange=TRUE)+ stat_cor(method = "spearman")+ labs(title = "TR1score vs IFNascore")+ xlab(label="IFNa signature score")+ ylab(label="Tr1 signature score") dev.off() df.nano <- data.frame(rownames(df_qAC), df_qAC$IFNa_Score, df_qAC$TR1_Score, df_qAC$type) openxlsx::write.xlsx(df.nano,"Plot_TR1score_vs_IFNascore.xlsx", rowNames = F) # Figure: Correlation CTLA4 and IFNascore pdf(file = "Plot_CTLA4_vs_IFNascore.pdf",width = 4, height = 4) ggplot(df_qAC, aes(x=IFNa_Score, y=CTLA4,color=type,fill=type))+ geom_point()+ geom_smooth(method=lm, se=T, fullrange=TRUE)+ stat_cor(method = "spearman")+ labs(title = "CTLA4 vs IFNascore")+ xlab(label = "IFNa signature score")+ ylab(label = "Log10 CTLA4 expression") dev.off() df.nano <- data.frame(rownames(df_qAC), df_qAC$IFNa_Score, df_qAC$CTLA4, df_qAC$type) openxlsx::write.xlsx(df.nano, "Plot_CTLA4_vs_IFNascore.xlsx", rowNames = F) # Figure: Correlation HLA-C and IFNascore pdf(file = "Plot_HLA-C_vs_IFNascore.pdf",width = 4, height = 4) ggplot(df_qAC, aes(x=IFNa_Score, y=HLA.C,color=type,fill=type))+ geom_point()+ geom_smooth(method=lm, se=T, fullrange=TRUE)+ stat_cor(method = "spearman")+ labs(title = "HLA-C vs IFNascore")+ xlab(label = "IFNa signature score")+ ylab(label = "Log10 HLA-C expression") dev.off() df.nano <- data.frame(rownames(df_qAC), df_qAC$IFNa_Score, df_qAC$HLA.C, df_qAC$type) openxlsx::write.xlsx(df.nano, "Plot_HLA-C vs IFNascore.xlsx", rowNames = F) #Correlation all Genes vs IFNa x1 <- df_qA genesTotest <- subset(rownames(x1), !(rownames(x1)%in%c(IFNa_genes))) tt1 <- x1[IFNa_genes,] dfx <- data.frame() for (i in genesTotest){ AllCor <- apply(tt1,1,function(x) cor(x,as.numeric(x1[i,]),method="spearman")) TR1corr<- data.frame(value=AllCor,tested.Gene=rep(i,nrow(tt1))) TR1corr$Type <- "IFNa_genes" TR1corr$Median1 <- median(AllCor) dfx <- rbind(TR1corr,dfx) } dfx <- dfx[rev(order(dfx$Median1)),] dfx2 <- dfx[1:(length(IFNa_genes)*100),] dfx2 <- dfx2[order(dfx2$Median1, decreasing = T), ] dfx2 <- head(dfx2, n = 20*16) dfx2$tested.Gene <- factor(dfx2$tested.Gene, levels = unique(dfx2$tested.Gene)) pdf(file = "Plot_Top.corr.IFNaGenes.All.pdf",width = 12, height = 4) ggplot(dfx2, aes(x = tested.Gene, y = value)) + geom_boxplot() + geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(0.75),dotsize = 0.5) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+ labs(title = "Correlation against IFNa Genes")+ xlab(label="Top genes")+ ylab(label="rho coefficient") dev.off() openxlsx::write.xlsx(dfx2, "Plot_Top.corr.IFNaGenes.All.xlsx", rowNames = T)