Nanostring_analysis.R 6.42 KB
Newer Older
Stefano Beretta's avatar
Stefano Beretta committed
1
2
3
4
5
6
# NanoString Analisys
library(dplyr)
library(psych)
library(ggplot2)
library(ggpubr)

Stefano Beretta's avatar
Stefano Beretta committed
7
wdir <- "squadrito_livertumor2022_nanostring"
Stefano Beretta's avatar
Stefano Beretta committed
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183

rawdata <- paste(wdir, "Raw_data", sep = "/")
plots <- paste(wdir, "Plots", sep = "/")
dir.create(plots, showWarnings = F)

# Leggi tutti i file CSV nella cartella e concatenali
setwd(rawdata)
all_data <- NULL
for (file_name in list.files(pattern = "*.RCC")) {
    data <- read.csv(file_name, skip = 26)
    data <- data[1:(nrow(data)-3),]
    data$file <- file_name
    data$patient <- substr(file_name,1,5)
    data$Type_sample <-substr(file_name,7,7)
    all_data <- rbind(all_data, data)
}

# TAKING ALL TOGETHER
A_data <- reshape(
    data = all_data,
    direction = "wide",
    idvar = "Name",
    timevar = "file",
    v.names = "Count",
    drop = c("Type_sample","patient"))

# Normalize quantiles
df_q <- A_data[rowSums(is.na(A_data))==0 & rowSums(A_data>4)>20,]
rbind(dim(A_data),dim(df_q))
df_q <- rbind(df_q,A_data[481,])#add IL10 data, because of importance
df_q[,4:ncol(df_q)] <- limma::normalizeQuantiles(df_q[,4:ncol(df_q)])
df_q[,4:ncol(df_q)] <- log10(df_q[,4:ncol(df_q)])

# Split in 2, according to Tumor (T) o peritumor (P) areas
typeS <- paste("type" , substr(colnames(df_q),13,13),sep=".")
df_qT <- df_q[,typeS=="type.T"]
rownames(df_qT) <- df_q[,2]
df_qP <- df_q[,typeS=="type.P"]
rownames(df_qP) <- df_q[,2]
df_qA <- df_q[,4:ncol(df_q)]
rownames(df_qA) <- df_q[,2]

# Differential expression
Sigs <- readxl::read_excel(paste(wdir, "Signatures_Table5S.xlsx", sep = "/"))
IFNa_genes<-intersect(df_q[,2],Sigs[[1]][-1])
TR1_genes<- intersect(df_q[,2],c(Sigs[[2]][2:17],"IL10RA"))
HLA_genes <- grep(pattern="HLA",df_q[,2],value = T)
MS_genes <- intersect(df_q[,2], c("EPCAM","CDH1","SOCS1","STAT1",
                                  "NLRC5","TGFB1","CD86","TAP1","TAP2",
                                  "CD3G","CD8A","EOMES","PDCD1","GZMK",
                                  "CD40","CD74","TREM2","LAG3","IL10RA",
                                  "HAVCR2","CTLA4","CASP3","CD69","ITGAE","ITGA1",
                                  "CD7","IL2","TNF","IL10"))

#Figures: P vs T
#Diff gene expression P vs T, selected genes
#Reshape wide to long matrix with expression values
df_qA2 <- reshape(df_qA, 
                  direction = "long",
                  v.names="value",
                  varying = names(df_qA),
                  times= names(df_qA),
                  timevar = "file",
                  idvar="genes",
                  ids = rownames(df_qA))
df_qA2$patient <- substr(df_qA2$file,7,11)
df_qA2$type <- substr(df_qA2$file,13,13)

## Plots
setwd(plots)

# Preparing_wNormByGene
GiveSigs <-  function(x1,x2){
    nor.genes <- x1[x2,]
    mean1 <- apply(nor.genes,1,geometric.mean)
    df1<- as.data.frame(t(nor.genes))
    mean2<- as.data.frame(t(mapply(`/`, df1,mean1)))
    result1<- apply(mean2,2,geometric.mean)
    names(result1) <- names(x1)
    return(result1)}
IFNa_Score.P <-GiveSigs(df_qP,IFNa_genes)
TR1_Score.P <- GiveSigs(df_qP,TR1_genes)
IFNa_Score.T <- GiveSigs(df_qT,IFNa_genes)
TR1_Score.T <- GiveSigs(df_qT,TR1_genes)

# Add IFNa and TR1 scores to df_qA
df_qT2 <- df_qT
colnames(df_qT2) <- substr(colnames(df_qT2), 7, 11)
df_qT3 <- data.frame(t(df_qT2))
df_qT3$type <- "T"
df_qT3$patient <- rownames(df_qT3)
df_qT3$TR1_Score <- TR1_Score.T
df_qT3$IFNa_Score <- IFNa_Score.T

df_qP2 <- df_qP
colnames(df_qP2) <- substr(colnames(df_qP2), 7, 11)
df_qP3 <- data.frame(t(df_qP2))
df_qP3$type <- "P"
df_qP3$patient <- rownames(df_qP3)
df_qP3$TR1_Score <- TR1_Score.P
df_qP3$IFNa_Score <- IFNa_Score.P
df_qAC <- rbind(df_qP3, df_qT3)

# Figure: Correlation TR1score and IFNascore
pdf(file = "Plot_TR1score_vs_IFNascore.pdf",width = 4, height = 4)
ggplot(df_qAC, aes(x=IFNa_Score, y=TR1_Score,color=type,fill=type))+
    geom_point()+
    geom_smooth(method=lm, se=T, fullrange=TRUE)+
    stat_cor(method = "spearman")+
    labs(title = "TR1score vs IFNascore")+
    xlab(label="IFNa signature score")+
    ylab(label="Tr1 signature score")
dev.off()

df.nano <- data.frame(rownames(df_qAC), df_qAC$IFNa_Score, df_qAC$TR1_Score, df_qAC$type)
openxlsx::write.xlsx(df.nano,"Plot_TR1score_vs_IFNascore.xlsx", rowNames = F)	


# Figure: Correlation CTLA4 and IFNascore
pdf(file = "Plot_CTLA4_vs_IFNascore.pdf",width = 4, height = 4)
ggplot(df_qAC, aes(x=IFNa_Score, y=CTLA4,color=type,fill=type))+
    geom_point()+
    geom_smooth(method=lm, se=T, fullrange=TRUE)+
    stat_cor(method = "spearman")+
    labs(title = "CTLA4 vs IFNascore")+
    xlab(label = "IFNa signature score")+
    ylab(label = "Log10 CTLA4 expression")
dev.off()

df.nano <- data.frame(rownames(df_qAC), df_qAC$IFNa_Score, df_qAC$CTLA4, df_qAC$type)
openxlsx::write.xlsx(df.nano, "Plot_CTLA4_vs_IFNascore.xlsx", rowNames = F)

# Figure: Correlation HLA-C and IFNascore
pdf(file = "Plot_HLA-C_vs_IFNascore.pdf",width = 4, height = 4)
ggplot(df_qAC, aes(x=IFNa_Score, y=HLA.C,color=type,fill=type))+
    geom_point()+
    geom_smooth(method=lm, se=T, fullrange=TRUE)+
    stat_cor(method = "spearman")+
    labs(title = "HLA-C vs IFNascore")+
    xlab(label = "IFNa signature score")+
    ylab(label = "Log10 HLA-C expression")
dev.off()

df.nano <- data.frame(rownames(df_qAC), df_qAC$IFNa_Score, df_qAC$HLA.C, df_qAC$type)
openxlsx::write.xlsx(df.nano, "Plot_HLA-C vs IFNascore.xlsx", rowNames = F)	

#Correlation all Genes vs IFNa
x1 <- df_qA
genesTotest <- subset(rownames(x1), !(rownames(x1)%in%c(IFNa_genes)))
tt1 <- x1[IFNa_genes,]
dfx <- data.frame()
for (i in genesTotest){
    AllCor <- apply(tt1,1,function(x) cor(x,as.numeric(x1[i,]),method="spearman"))
    TR1corr<- data.frame(value=AllCor,tested.Gene=rep(i,nrow(tt1)))
    TR1corr$Type <- "IFNa_genes"
    TR1corr$Median1 <- median(AllCor)
    dfx <- rbind(TR1corr,dfx)
}

dfx <- dfx[rev(order(dfx$Median1)),]
dfx2 <- dfx[1:(length(IFNa_genes)*100),]
dfx2 <- dfx2[order(dfx2$Median1, decreasing = T), ]
dfx2 <- head(dfx2, n = 20*16)
dfx2$tested.Gene <- factor(dfx2$tested.Gene, levels = unique(dfx2$tested.Gene))

pdf(file = "Plot_Top.corr.IFNaGenes.All.pdf",width = 12, height = 4)
ggplot(dfx2, aes(x = tested.Gene, y = value)) + 
    geom_boxplot() + geom_dotplot(binaxis='y', stackdir='center',
                                  position=position_dodge(0.75),dotsize = 0.5) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
    labs(title =  "Correlation against IFNa Genes")+
    xlab(label="Top genes")+
    ylab(label="rho coefficient")
dev.off()

openxlsx::write.xlsx(dfx2, "Plot_Top.corr.IFNaGenes.All.xlsx", rowNames = T)