Commit 412555ba authored by Stefano Beretta's avatar Stefano Beretta
Browse files

Added script and raw data.

parent 7e39cec6
# NanoString Analisys
library(dplyr)
library(psych)
library(ggplot2)
library(ggpubr)
wdir <- "~/Downloads/tmp/squadrito_livertumor2022_nanostring"
rawdata <- paste(wdir, "Raw_data", sep = "/")
plots <- paste(wdir, "Plots", sep = "/")
dir.create(plots, showWarnings = F)
# Leggi tutti i file CSV nella cartella e concatenali
setwd(rawdata)
all_data <- NULL
for (file_name in list.files(pattern = "*.RCC")) {
data <- read.csv(file_name, skip = 26)
data <- data[1:(nrow(data)-3),]
data$file <- file_name
data$patient <- substr(file_name,1,5)
data$Type_sample <-substr(file_name,7,7)
all_data <- rbind(all_data, data)
}
# TAKING ALL TOGETHER
A_data <- reshape(
data = all_data,
direction = "wide",
idvar = "Name",
timevar = "file",
v.names = "Count",
drop = c("Type_sample","patient"))
# Normalize quantiles
df_q <- A_data[rowSums(is.na(A_data))==0 & rowSums(A_data>4)>20,]
rbind(dim(A_data),dim(df_q))
df_q <- rbind(df_q,A_data[481,])#add IL10 data, because of importance
df_q[,4:ncol(df_q)] <- limma::normalizeQuantiles(df_q[,4:ncol(df_q)])
df_q[,4:ncol(df_q)] <- log10(df_q[,4:ncol(df_q)])
# Split in 2, according to Tumor (T) o peritumor (P) areas
typeS <- paste("type" , substr(colnames(df_q),13,13),sep=".")
df_qT <- df_q[,typeS=="type.T"]
rownames(df_qT) <- df_q[,2]
df_qP <- df_q[,typeS=="type.P"]
rownames(df_qP) <- df_q[,2]
df_qA <- df_q[,4:ncol(df_q)]
rownames(df_qA) <- df_q[,2]
# Differential expression
Sigs <- readxl::read_excel(paste(wdir, "Signatures_Table5S.xlsx", sep = "/"))
IFNa_genes<-intersect(df_q[,2],Sigs[[1]][-1])
TR1_genes<- intersect(df_q[,2],c(Sigs[[2]][2:17],"IL10RA"))
HLA_genes <- grep(pattern="HLA",df_q[,2],value = T)
MS_genes <- intersect(df_q[,2], c("EPCAM","CDH1","SOCS1","STAT1",
"NLRC5","TGFB1","CD86","TAP1","TAP2",
"CD3G","CD8A","EOMES","PDCD1","GZMK",
"CD40","CD74","TREM2","LAG3","IL10RA",
"HAVCR2","CTLA4","CASP3","CD69","ITGAE","ITGA1",
"CD7","IL2","TNF","IL10"))
#Figures: P vs T
#Diff gene expression P vs T, selected genes
#Reshape wide to long matrix with expression values
df_qA2 <- reshape(df_qA,
direction = "long",
v.names="value",
varying = names(df_qA),
times= names(df_qA),
timevar = "file",
idvar="genes",
ids = rownames(df_qA))
df_qA2$patient <- substr(df_qA2$file,7,11)
df_qA2$type <- substr(df_qA2$file,13,13)
## Plots
setwd(plots)
# Preparing_wNormByGene
GiveSigs <- function(x1,x2){
nor.genes <- x1[x2,]
mean1 <- apply(nor.genes,1,geometric.mean)
df1<- as.data.frame(t(nor.genes))
mean2<- as.data.frame(t(mapply(`/`, df1,mean1)))
result1<- apply(mean2,2,geometric.mean)
names(result1) <- names(x1)
return(result1)}
IFNa_Score.P <-GiveSigs(df_qP,IFNa_genes)
TR1_Score.P <- GiveSigs(df_qP,TR1_genes)
IFNa_Score.T <- GiveSigs(df_qT,IFNa_genes)
TR1_Score.T <- GiveSigs(df_qT,TR1_genes)
# Add IFNa and TR1 scores to df_qA
df_qT2 <- df_qT
colnames(df_qT2) <- substr(colnames(df_qT2), 7, 11)
df_qT3 <- data.frame(t(df_qT2))
df_qT3$type <- "T"
df_qT3$patient <- rownames(df_qT3)
df_qT3$TR1_Score <- TR1_Score.T
df_qT3$IFNa_Score <- IFNa_Score.T
df_qP2 <- df_qP
colnames(df_qP2) <- substr(colnames(df_qP2), 7, 11)
df_qP3 <- data.frame(t(df_qP2))
df_qP3$type <- "P"
df_qP3$patient <- rownames(df_qP3)
df_qP3$TR1_Score <- TR1_Score.P
df_qP3$IFNa_Score <- IFNa_Score.P
df_qAC <- rbind(df_qP3, df_qT3)
# Figure: Correlation TR1score and IFNascore
pdf(file = "Plot_TR1score_vs_IFNascore.pdf",width = 4, height = 4)
ggplot(df_qAC, aes(x=IFNa_Score, y=TR1_Score,color=type,fill=type))+
geom_point()+
geom_smooth(method=lm, se=T, fullrange=TRUE)+
stat_cor(method = "spearman")+
labs(title = "TR1score vs IFNascore")+
xlab(label="IFNa signature score")+
ylab(label="Tr1 signature score")
dev.off()
df.nano <- data.frame(rownames(df_qAC), df_qAC$IFNa_Score, df_qAC$TR1_Score, df_qAC$type)
openxlsx::write.xlsx(df.nano,"Plot_TR1score_vs_IFNascore.xlsx", rowNames = F)
# Figure: Correlation CTLA4 and IFNascore
pdf(file = "Plot_CTLA4_vs_IFNascore.pdf",width = 4, height = 4)
ggplot(df_qAC, aes(x=IFNa_Score, y=CTLA4,color=type,fill=type))+
geom_point()+
geom_smooth(method=lm, se=T, fullrange=TRUE)+
stat_cor(method = "spearman")+
labs(title = "CTLA4 vs IFNascore")+
xlab(label = "IFNa signature score")+
ylab(label = "Log10 CTLA4 expression")
dev.off()
df.nano <- data.frame(rownames(df_qAC), df_qAC$IFNa_Score, df_qAC$CTLA4, df_qAC$type)
openxlsx::write.xlsx(df.nano, "Plot_CTLA4_vs_IFNascore.xlsx", rowNames = F)
# Figure: Correlation HLA-C and IFNascore
pdf(file = "Plot_HLA-C_vs_IFNascore.pdf",width = 4, height = 4)
ggplot(df_qAC, aes(x=IFNa_Score, y=HLA.C,color=type,fill=type))+
geom_point()+
geom_smooth(method=lm, se=T, fullrange=TRUE)+
stat_cor(method = "spearman")+
labs(title = "HLA-C vs IFNascore")+
xlab(label = "IFNa signature score")+
ylab(label = "Log10 HLA-C expression")
dev.off()
df.nano <- data.frame(rownames(df_qAC), df_qAC$IFNa_Score, df_qAC$HLA.C, df_qAC$type)
openxlsx::write.xlsx(df.nano, "Plot_HLA-C vs IFNascore.xlsx", rowNames = F)
#Correlation all Genes vs IFNa
x1 <- df_qA
genesTotest <- subset(rownames(x1), !(rownames(x1)%in%c(IFNa_genes)))
tt1 <- x1[IFNa_genes,]
dfx <- data.frame()
for (i in genesTotest){
AllCor <- apply(tt1,1,function(x) cor(x,as.numeric(x1[i,]),method="spearman"))
TR1corr<- data.frame(value=AllCor,tested.Gene=rep(i,nrow(tt1)))
TR1corr$Type <- "IFNa_genes"
TR1corr$Median1 <- median(AllCor)
dfx <- rbind(TR1corr,dfx)
}
dfx <- dfx[rev(order(dfx$Median1)),]
dfx2 <- dfx[1:(length(IFNa_genes)*100),]
dfx2 <- dfx2[order(dfx2$Median1, decreasing = T), ]
dfx2 <- head(dfx2, n = 20*16)
dfx2$tested.Gene <- factor(dfx2$tested.Gene, levels = unique(dfx2$tested.Gene))
pdf(file = "Plot_Top.corr.IFNaGenes.All.pdf",width = 12, height = 4)
ggplot(dfx2, aes(x = tested.Gene, y = value)) +
geom_boxplot() + geom_dotplot(binaxis='y', stackdir='center',
position=position_dodge(0.75),dotsize = 0.5) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
labs(title = "Correlation against IFNa Genes")+
xlab(label="Top genes")+
ylab(label="rho coefficient")
dev.off()
openxlsx::write.xlsx(dfx2, "Plot_Top.corr.IFNaGenes.All.xlsx", rowNames = T)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment