Commit f69fe925 authored by Marco Monti's avatar Marco Monti
Browse files

I added the manuscript's scripts

parent 3bcab94a
library(Seurat)
##########################################################
####DOTPLOT WITH STATISTIC FROM LIVER####
# Create function
PlotStellare <- function(obs1=obs1, features1=features1, valueG="NewlablesG",
i="CD8 T cells", group.by0 ="Clonetype2.Group", controGroup="SIINFEKL", noCtrlvsAll=T){
obs1 <- SetIdent(obs1, value = valueG)
pa1 <- DotPlot(obs1, features = features1, idents = i, dot.scale=15, split.by = group.by0,
cols="RdBu", scale=T)+RotatedAxis()
df1 <- pa1$data
contro0<- paste0(i,"_",controGroup)
a1 <- sub(paste0(i,"_"),"",unique(df1$id))
dfcombo <- NULL
for (ix in a1){if(ix!=controGroup){
m.Combo <- FindMarkers(obs1, ident.1 = ix,ident.2=controGroup, group.by = group.by0,
subset.ident = i,features = features1,
min.pct=0.01, logfc.threshold=0, min.cells.feature=1, min.cells.group=1)
}else{
m.Combo <- FindMarkers(obs1, ident.1 = ix, group.by = group.by0,
subset.ident = i,features = features1,
min.pct=0.01, logfc.threshold=0, min.cells.feature=1, min.cells.group=1)
if(noCtrlvsAll){m.Combo$p_val_adj <- 1}
}
m.Combo <- m.Combo[match(features1,rownames(m.Combo)),]
m.Combo$ix <- ix
dfcombo <- rbind(dfcombo,m.Combo)}
df2 <- cbind(df1,dfcombo)
data.frame(df2$id,df2$ix)
df2$stars <- ifelse(df2$p_val_adj< 0.001,"***",
ifelse(df2$p_val_adj< 0.01,"**",
ifelse(df2$p_val_adj< 0.05,"*","")))
df2$id <- factor(x = df2$id, levels = paste0(i,"_",rev(a1)))
a <- ggplot(df2, aes(y=id, x=features.plot, size=pct.exp ,color=avg.exp.scaled)) +
geom_point() +
scale_size_area(max_size=15) +
scale_colour_gradient2(low="blue",mid="darkgrey",high="darkred") + #"#CCCCFF"
labs(y='',x='')+
geom_text(aes(label=stars,vjust=0.8),col="black",size=4)+
theme_classic()+
theme(axis.text.y = element_text(color = "Black"))+
theme(axis.text.x = element_text(angle=45, hjust=1, color = "Black"))+
theme(axis.line.x=element_line(color="Black"))+
theme(axis.line.y=element_line(color="Black"))+
ggtitle(paste0(unique(obs1$Tissue),".",i," vs ",controGroup))
return(a)}
plotRadar <- function(i){
ploTData<- rbind(max1,min1,get(i))
id <- sub(".db","",i)
radarchart(ploTData, title = id, pcol=colors_border, pfcol=colors_in, plwd=2, plty=1, cglcol="grey", cglty = 1, cglwd=0.8)
} #function to plot radarplot
library(Seurat)
library(openxlsx)
library(readxl)
library(tidyr)
library("RColorBrewer")
library(dplyr)
library(reshape2)
library(fgsea)
library(patchwork)
library(presto)
library(ggplot2)
library(matrixStats)
library(fmsb)
cluster_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-856756828_scRNAseq_Notaro/Analysis_MM3/results2"
wdir <- paste0(cluster_dir, "/Result2_MNOT")
ldir <- paste0(cluster_dir, "/Liver_APCs/")
pdir <- paste0(cluster_dir, "/Result2_MNOT/Plots")
# Custom functions
#PlotStellare <-readRDS(paste0(username,"Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/Notaro et al/Scripts/Functions/PlotStellare.rds"))
source(file = "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A018/Analysis_MM/last_scripts_MNOT/TAD018/plot_stellare_function.R")
#############################################################################
##########################LIVER MACS ANALYSIS################################
#############################################################################
apcs.liver <- readRDS(paste0(ldir, "Liver_APCs_final.rds"))
#assign groups
#name and order groups
apcs.liver@meta.data$RNA_Group[apcs.liver@meta.data$PROP.Condition=="li.OVA"] <- "liOVA"
apcs.liver@meta.data$RNA_Group[apcs.liver@meta.data$PROP.Condition=="li.OVA.Combo"] <- "OVA.Combo"
apcs.liver@meta.data$RNA_Group[apcs.liver@meta.data$PROP.Condition=="li.Stop"] <- "SIINFEKL"
apcs.liver@meta.data$RNA_Group[apcs.liver@meta.data$PROP.Condition=="li.Stop.Combo"] <- "SIINFEKL.Combo"
apcs.liver@meta.data$RNA_Group <- factor(x = apcs.liver@meta.data$RNA_Group, levels = c("liOVA", "OVA.Combo","SIINFEKL", "SIINFEKL.Combo"))
apcs.liver@meta.data$Tissue <- "Liver"
#######Figure S4L- PLOT STELLARE################
features1 <- c("H2-Q7", "H2-T22", "H2-Q4", "H2-K1", "H2-T23", "H2-M3", "H2-D1","Tap1","Tap2", #MHCI
"H2-Eb1", "H2-Ab1", "H2-Aa", "H2-DMb1", "H2-DMa", "Ciita", #MHCII
"Ifi44", "Oasl1","Irf7","Isg15", "Isg20","Oas1a","Oas1g", #IFNa
"Ccl5", "Upp1", "Slamf7", "Cxcl9", "Gbp4","Cd74", #IFNg
"Vav2","Tgfb1","Il10","Ccl24","Mmp8","Tmem176b","Trem2","Fn1") #protumor
apcs.liver<-SetIdent(apcs.liver, value = "RNA_Group")
pdf(paste0(ldir, "/Dotplot_stellare_APCs_FigS4L.pdf"), width=13, height=3)
PlotStellare(obs1 = apcs.liver,features1 = features1, valueG = "NewlablesM_12_clu", i="CD8 cDC1", group.by0="RNA_Group" , noCtrlvsAll=T, controGroup="SIINFEKL")
dev.off()
#####################################################################
############Figure S4M - RADAR PLOT MACROCLASSES####################
#####################################################################
apcs.liver@meta.data$Shortlables<-apcs.liver@meta.data$NewlablesM_12_clu
#DCs<-c("CD8 cDC1","moDCs", "Ccr7 DCs","pDCs","pre DCs")
#apcs.liver@meta.data$Shortlables[apcs.liver@meta.data$NewlablesM_12_clu%in%DCs]<- "DCs"
#granu<-c("Neutrophils","Basophils")
#apcs.liver@meta.data$Shortlables[apcs.liver@meta.data$NewlablesM_12_clu%in%granu]<- "Granulocytes"
plotRadar <- function(i){
ploTData<- rbind(max1,min1,get(i))
id <- sub(".db","",i)
radarchart(ploTData, title = id, pcol=colors_border, pfcol=colors_in, plwd=2, plty=1, cglcol="grey", cglty = 1,cglwd=0.8)
} #function to plot radarplot
##############LIVER RADARPLOT##########################
MHCI_genes <- list(c("H2-Q7", "H2-T22", "H2-Q4", "H2-K1", "H2-T23", "H2-M3", "H2-D1","Tap1","Tap2"))
MHCII_genes <- list( c("H2-Eb1", "H2-Ab1", "H2-Aa", "H2-DMb1", "H2-DMa", "Ciita"))
Protumoral_genes <- list(c("Vav2","Tgfb1","Il10","Ccl24","Mmp8","Tmem176b","Trem2","Fn1"))
IFNa_genes <- list(c("Ifi44", "Oasl1","Irf7","Isg15", "Isg20","Oas1a","Oas1g"))
IFNg_genes <-list(c("Ccl5", "Upp1", "Slamf7", "Cxcl9", "Gbp4","Cd74"))
apcs.liver <- AddModuleScore(apcs.liver, features = MHCI_genes, name = "MHCI_genes")
apcs.liver <- AddModuleScore(apcs.liver, features = MHCII_genes, name = "MHCII_genes")
apcs.liver <- AddModuleScore(apcs.liver, features = Protumoral_genes, name = "Protumoral_genes")
apcs.liver <- AddModuleScore(apcs.liver, features = IFNa_genes, name = "IFNa_genes")
apcs.liver <- AddModuleScore(apcs.liver, features = IFNg_genes, name = "IFNg_genes")
#Normalize data
#we needed to find a way to normalize the module scores.
#we used a quantile normalization method.
M1<-apcs.liver@meta.data[,c("Shortlables", "RNA_Group", "MHCI_genes1","MHCII_genes1","Protumoral_genes1","IFNa_genes1","IFNg_genes1")]
M1[,3:7]<-limma::normalizeQuantiles(M1[,3:7])
Groups <-unique(apcs.liver$RNA_Group)
CellType<-unique(M1$Shortlables)
for (i in CellType){
M2<-M1[M1$Shortlables%in%i,]
M3a<-colMedians(as.matrix(M2[M2$RNA_Group%in%Groups[1],3:7]),na.rm=T)
M3b<-colMedians(as.matrix(M2[M2$RNA_Group%in%Groups[2],3:7]),na.rm=T)
M3c<-colMedians(as.matrix(M2[M2$RNA_Group%in%Groups[3],3:7]),na.rm=T)
M3d<-colMedians(as.matrix(M2[M2$RNA_Group%in%Groups[4],3:7]),na.rm=T)
M4<-rbind(M3a,M3b,M3c,M3d)
rownames(M4)<-Groups
colnames(M4)<-colnames(M2[,3:7])
x<-as.data.frame(M4)
assign(paste0(i,".db"),x)}
lsCells <- grep(".db",ls(),value=T)
MinMax<-lapply(lsCells,get)
max1 <- rep(do.call("max", MinMax),5)
min1 <- rep(do.call("min", MinMax),5)
colors_border=c(rgb(0,0,1,0.9), rgb(0,0.8,0,0.9), rgb(0,0.5,0.5,0.9), rgb(0,0,0,0.9))
colors_in=c(rgb(0,0,0.6,0.1), rgb(0,0.6,0,0.1), rgb(0,0.1,0.1,0.1), rgb(0.1,0.1,0.1,0.1))
pdf(paste0(pdir, "/Radar_plot_liverAPCs_shorterlables_FigS4M_new2.pdf"), width=17, height=6)
par(mfrow=c(2,5))
for (i in 1:length(unique(apcs.liver$Shortlables))){
plotRadar(lsCells[i])
}
dev.off()
This diff is collapsed.
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(harmony))
suppressPackageStartupMessages(library(openxlsx))
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(RColorBrewer)) # Color palettes for visualization
suppressPackageStartupMessages(library(dplyr))
##############################################################################
################Figure S4D - Liver clusters dotplot markers###################
dataset <- "18"
Tissue <- "Liver" # "Liver" or "Tumor"
wd <- paste0("/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A018/Analysis_MM")
setwd(wd)
# Plot dir
plot_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A018/Analysis_MM/results3/Result3_MNOT/Plots"
dir.create(plot_dir, showWarnings = FALSE)
# Load the RDS
obs1 <- readRDS(paste0(wd, "/results2/", Tissue, "/", Tissue, "_final.rds"))
# Assign groups
obs1@meta.data$RNA_Group[obs1@meta.data$PROP.Condition=="liOVA_IL12"] <- "OVA.IL12"
obs1@meta.data$RNA_Group[obs1@meta.data$PROP.Condition=="liOVA_IFNa"] <- "OVA.IFNa"
obs1@meta.data$RNA_Group[obs1@meta.data$PROP.Condition=="liOVA"] <- "OVA"
obs1@meta.data$RNA_Group[obs1@meta.data$PROP.Condition=="Combo3x"] <- "Combo"
#sample origin
obs1$Tube <- paste0(obs1$RNA_Group,obs1$hash.ID)
obs1@meta.data$RNA_Group<- factor(x = obs1@meta.data$RNA_Group, levels = c("OVA","OVA.IFNa","OVA.IL12","Combo"))
obs1@meta.data$Tissue <- obs1@meta.data$PROP.Source
# Liver
obs1 <- SetIdent(obs1, value = "Newlables2")
obs1@meta.data$Newlables2 <- factor(x = obs1@meta.data$Newlables2, levels = c("APCs","TandNK","B cells","Hepatocytes","Endothelial cells","Others"))
obs1_subset <- subset(obs1, Newlables2 != "Others")
# Dot Plot
features_to_plot_manual_liver_MNOT <- unique(c("Cd86", "Itgax", "Clec9a", "Flt3", "Clec4f", "Adgre1", "Lyz2", "Marco", "Cd14", # APCs
"Cd3e", "Cd8a", "Cd4", "Nkg7", "Gzmb", "Cd3g", "Klrb1c", # TandNK
"Cd79a", "Ms4a1", "Cd19", "Ighm", "Cd22", "Ebf1", "Pax5", # B cells
"Alb", "Adh1", "Cyp3a11", "Serpina1a", "Apoa1", "Fgb", "Hamp", # Hepatocytes
"Cdh5", "Pecam1", "Kdr", "Eng", "Nos3", "Esam", "Vwf" # Endothelial cells
))
paletteLength <- 100
myColor <- colorRampPalette(c("blue", "darkgrey", "darkred"))(paletteLength)
pdf(paste0(plot_dir, "/TAD018_", Tissue, "_dotplot.markers.Newlables2_FigS4D.pdf"), width=11, height=4)
dotplot1<-scCustomize::Clustered_DotPlot(obs1_subset,
features = features_to_plot_manual_liver_MNOT,
plot_km_elbow=F,
row_label_size=10,
flip=T, colors_use_exp = myColor,
cluster_feature=F, cluster_ident=F)
dev.off()
##############################################################################
################Figure S4E - Tumor clusters dotplot markers###################
dataset <- "18"
Tissue <- "Tumor" # "Liver" or "Tumor"
wd <- paste0("/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A0", dataset, "/Analysis_MM")
setwd(wd)
# Plot dir
plot_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A018/Analysis_MM/results3/Result3_MNOT/Plots"
dir.create(plot_dir, showWarnings = FALSE)
# Load the RDS
obs1 <- readRDS(paste0(wd, "/results2/", Tissue, "/", Tissue, "_final.rds"))
# Assign groups
obs1@meta.data$RNA_Group[obs1@meta.data$PROP.Condition=="liOVA_IL12"] <- "OVA.IL12"
obs1@meta.data$RNA_Group[obs1@meta.data$PROP.Condition=="liOVA_IFNa"] <- "OVA.IFNa"
obs1@meta.data$RNA_Group[obs1@meta.data$PROP.Condition=="liOVA"] <- "OVA"
obs1@meta.data$RNA_Group[obs1@meta.data$PROP.Condition=="Combo3x"] <- "Combo"
#sample origin
obs1$Tube <- paste0(obs1$RNA_Group,obs1$hash.ID)
obs1@meta.data$RNA_Group<- factor(x = obs1@meta.data$RNA_Group, levels = c("OVA","OVA.IFNa","OVA.IL12","Combo"))
obs1@meta.data$Tissue <- obs1@meta.data$PROP.Source
# Tumor
obs1 <- SetIdent(obs1, value = "Newlables2")
obs1@meta.data$Newlables2 <- factor(x = obs1@meta.data$Newlables2, levels = c("APCs","TandNK","B cells","Cancer cells","Others"))
obs1_subset <- subset(obs1, Newlables2 != "Others")
# Dot Plot
features_to_plot_manual_tumor_MNOT <- unique(c("Cd86", "Itgax", "Adgre1", "Lyz2", "Cd14", "Cd68", "Fcgr1", # APCs
"Cd3e", "Cd8a", "Cd4", "Nkg7", "Gzmb", "Cd3g", # TandNK
"Cd79a", "Ms4a1", "Cd19", "Ighm", "Cd22", "Ebf1", "Pax5", # B cells
"Cd34", "Serpinf1", "Sparc", "Mxra7", "Fstl1" # Cancer cells
))
paletteLength <- 100
myColor <- colorRampPalette(c("blue", "darkgrey", "darkred"))(paletteLength)
pdf(paste0(plot_dir, "/TAD018_", Tissue, "_dotplot.markers.Newlables2_FigS4E.pdf"), width=10, height=3)
dotplot1<-scCustomize::Clustered_DotPlot(obs1_subset,
features = features_to_plot_manual_tumor_MNOT,
plot_km_elbow=F,
row_label_size=10,
flip=T, colors_use_exp = myColor,
cluster_feature=F, cluster_ident=F)
dev.off()
This diff is collapsed.
#####Fluvial
library(Seurat)
library(openxlsx)
library(readxl)
library(tidyr)
library("RColorBrewer")
library(dplyr)
library(reshape2)
library(fgsea)
library(ggalluvial)
library("gridExtra")
cluster_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A018/Analysis_MM/results3"
wdir<- paste0(cluster_dir, "/Result3_MNOT")
ddir<- paste0(cluster_dir, "/Liver_Tumor_TnK/")
pdir<- paste0(cluster_dir, "/Result3_MNOT/Plots")
# Load the RDS
obs1 <- readRDS(paste0(ddir, "Liver_Tumor_TnK_final.rds"))
obs1@meta.data$Tissue <- obs1@meta.data$PROP.Source
obs1@meta.data$RNA_Group <- obs1@meta.data$PROP.Condition
obs1@meta.data$RNA_Group[obs1@meta.data$RNA_Group == "liOVA"] <- "OVA"
obs1@meta.data$RNA_Group[obs1@meta.data$RNA_Group == "liOVA_IL12"] <- "OVA.IL12"
obs1@meta.data$RNA_Group[obs1@meta.data$RNA_Group == "liOVA_IFNa"] <- "OVA.IFNa"
obs1@meta.data$RNA_Group[obs1@meta.data$RNA_Group == "Combo3x"] <- "Combo"
obs1@meta.data$RNA_Group <- factor(x = obs1@meta.data$RNA_Group, levels = c("OVA","OVA.IFNa","OVA.IL12","Combo"))
# Sample origin
obs1$Tube <- paste0(obs1$RNA_Group, obs1$hash.ID)
obs1@meta.data$Orig<-paste0(obs1@meta.data$Tissue, ".", obs1@meta.data$RNA_Group)
obs1@meta.data$RNA_Group <- factor(x = obs1@meta.data$RNA_Group, levels = c("OVA","OVA.IFNa","OVA.IL12","Combo"))
#################Create labels for all CD8 and CD4 T cells#############
obs1@meta.data$NewlablesG <- obs1@meta.data$NewlablesM
obs1@meta.data$NewlablesG[grep("CD4",obs1@meta.data$NewlablesM) ] <- "CD4 T cells"
obs1@meta.data$NewlablesG[grep("CD8",obs1@meta.data$NewlablesM)] <- "CD8 T cells"
###############create labels CD4 and CD8 clusters without naive####################
obs1@meta.data$NewlablesS <- obs1@meta.data$NewlablesM
indCd4<- grepl("CD4",obs1@meta.data$NewlablesM) & !grepl("Naive",obs1@meta.data$NewlablesM)
indCd8<- grepl("CD8",obs1@meta.data$NewlablesM) & !grepl("Naive",obs1@meta.data$NewlablesM)
obs1@meta.data$NewlablesS[indCd4] <- "CD4 T cells"
obs1@meta.data$NewlablesS[indCd8] <- "CD8 T cells"
obs1@meta.data$NewlablesST <- paste0(obs1@meta.data$Tissue, ".", obs1@meta.data$NewlablesS)
################################################
###Calculating hyperexpanded T cells#############
################################################
obs1$aa3.Tube <- paste0(obs1$cdr3_aa2,".",obs1$Tube)
obs1$aa3.Tube[is.na(obs1$cdr3_aa2)] <- NA
obs1@meta.data$ClonoFreq <- obs1@meta.data %>% group_by(aa3.Tube) %>% mutate(count = n()) %>% ungroup() %>%
as.data.frame() %>% select (count)
obs1@meta.data$ClonoFreq[is.na(obs1$cdr3_aa2),] <- NA
obs1@meta.data$Clonetype <- NA
obs1@meta.data$Clonetype[obs1@meta.data$ClonoFreq==1] <- "unique"
obs1@meta.data$Clonetype[obs1@meta.data$ClonoFreq > 1 & obs1@meta.data$ClonoFreq <= 5] <- "small"
obs1@meta.data$Clonetype[obs1@meta.data$ClonoFreq > 5 & obs1@meta.data$ClonoFreq <= 30] <- "large"
obs1@meta.data$Clonetype[obs1@meta.data$ClonoFreq > 30] <- "hyperexpanded"
obs1$Clonetype <- factor(x = obs1$Clonetype, levels = c("unique", "small", "large", "hyperexpanded")) #order clonotype variable
############################################
######FINDING Tetramer reactive TCRs##########
############################################
ind1 <- obs1$ADT.Tetramer
ind1 <- ind1[order(ind1)]
ind1 <- ind1[order(ind1)]
TetramerCount <- obs1@meta.data$ADT.Tetramer[!is.na(obs1@meta.data$ADT.Tetramer)]
TetramerCount <- TetramerCount[TetramerCount > 0]
obs1@meta.data$TetramerT <- "Other"
obs1@meta.data$TetramerT[obs1@meta.data$ADT.Tetramer>2] <- "OVA.TCR"
df1 <- data.frame(obs1$cdr3_aa2, obs1$TetramerT,obs1$ClonoFreq,obs1$Tissue)
df1$aa3.OVA <- paste0(obs1$cdr3_aa2,".",obs1$TetramerT)
df1 <- na.omit(df1)
df1l <- df1[df1$obs1.Tissue=="Liver",]
df2 <- df1l %>% group_by(aa3.OVA) %>% mutate(count = n()) %>% ungroup() %>% as.data.frame()
df3 <- df2 %>% select(obs1.cdr3_aa2,obs1.TetramerT,obs1.ClonoFreq,count) %>% unique() %>%
pivot_wider(names_from = obs1.TetramerT, values_from = count) %>% as.data.frame()
df3[is.na(df3)] <- 0
df3$Tumor <- df3[,2]-df3[,3]-df3[,4]
OVA.TCRs <- df3$obs1.cdr3_aa2[df3[,4]/df3[,3]>1]
df1 <- data.frame(obs1$cdr3_aa2, obs1$TetramerT,obs1$ClonoFreq,obs1$Tissue,obs1$Tube)
df1$aa3.Tube <- paste0(obs1$cdr3_aa2,".",obs1$TetramerT,".",obs1$Tube)
df1 <- na.omit(df1)
df1l <- df1[df1$obs1.Tissue=="Liver",]
df2 <- df1l %>% group_by(aa3.Tube) %>% mutate(count = n()) %>% ungroup() %>% as.data.frame()
df3T <- df2 %>% select(obs1.cdr3_aa2,obs1.TetramerT,obs1.Tube,obs1.ClonoFreq,count) %>% unique() %>%
pivot_wider(names_from = obs1.TetramerT, values_from = count) %>% as.data.frame()
df3T[is.na(df3T)] <- 0
######Indicating Tetramer OVA in obs1######
obs1$OVA.reactive <- ifelse(obs1$cdr3_aa2 %in% OVA.TCRs, "OVA", "Other")
obs1@meta.data$TetramerTS <- paste0(obs1@meta.data$OVA.reactive,".",obs1@meta.data$NewlablesS)
######TISSUE SHARED CLONOTYPES########
#Identify cells with the same Cdr3aa sequence in both tissues
df1<-data.frame(obs1@meta.data$cdr3_aa2,obs1@meta.data$hash.ID,obs1@meta.data$RNA_Group, obs1@meta.data$Tissue)
df2<-na.omit(df1)
df2$cdr3_mouse<-paste0(df2$obs1.meta.data.cdr3_aa2,".",df2$obs1.meta.data.hash.ID,".",df2$obs1.meta.data.RNA_Group)
shared_cdr3aa <- intersect(df2$cdr3_mouse[df2$obs1.meta.data.Tissue == "Liver"], df2$cdr3_mouse[df2$obs1.meta.data.Tissue == "Tumor"])
obs1$cdr3_mouse<-paste0(obs1$cdr3_aa2,".",obs1$hash.ID,".",obs1$RNA_Group)
obs1$Shared <- ifelse(obs1$cdr3_mouse %in% shared_cdr3aa, "Shared", "Unique")
# Create variables share tissue and shared OVA
obs1@meta.data$Sharedtissue<-paste0(obs1@meta.data$Tissue,".",obs1@meta.data$Shared)
obs1@meta.data$SharedOVA<-paste0(obs1@meta.data$Shared,".",obs1@meta.data$OVA.reactive)
####Alluvial plot#####
######Figure S3B-allOVA the same color########
df1 <- obs1@meta.data
df1 <- df1[df1$NewlablesG=="CD8 T cells",]
#df2<- na.omit(df1[,c(13,38,39,49)])
df2<- na.omit(df1[,c("cdr3_aa2","RNA_Group","Tissue","OVA.reactive")])
for (i in unique(df1$RNA_Group)){
# i <- unique(df1$RNA_Group)[1]
df3 <- df2[df2$RNA_Group==i&df2$Tissue=="Liver",]
df4 <- df2[df2$RNA_Group==i&df2$Tissue=="Tumor",]
df3 <- df3 %>% group_by(cdr3_aa2) %>% mutate(ClonoFreq = n()) %>% unique() %>% ungroup() %>% as.data.frame()
df4 <- df4 %>% group_by(cdr3_aa2) %>% mutate(ClonoFreq = n()) %>% unique() %>% ungroup() %>% as.data.frame()
df3 <- df3[df3$ClonoFreq>2,]
df4 <- df4[df4$ClonoFreq>2,]
df3$ClonoFreq.norm <- sapply(df3$ClonoFreq,function(x){100*x/sum(df3$ClonoFreq)})
df4$ClonoFreq.norm <- sapply(df4$ClonoFreq,function(x){100*x/sum(df4$ClonoFreq)})
df6 <- rbind(df3,df4)
df6 <- df6[order(df6$cdr3_aa2),]
df6$col1[!duplicated(df6$cdr3_aa2)] <- "#CCCCCC"
Dtests <- duplicated(df6$cdr3_aa2)|duplicated(df6$cdr3_aa2, fromLast=T)
df6$col1 [df6$OVA.reactive=="OVA"] <- "#3399FF" #"#3399FF"
df6$col1 [df6$OVA.reactive=="Other"&Dtests] <- "#CC6600"
plotA <- ggplot(data = df6,
aes(x=Tissue, y=ClonoFreq.norm, alluvium=cdr3_aa2, stratum=cdr3_aa2)) +
geom_flow(aes(fill=col1), decreasing=F)+
geom_stratum(aes(fill=col1, size=0.2), decreasing=F) +
scale_fill_manual(values=setNames(unique(df6$col), unique(df6$col)))+
theme_bw() + theme(legend.position="none")+
scale_size_identity()+
ggtitle(i)
nameR <- paste0(i, ".plot")
assign(nameR, plotA)}
pdf(paste0(pdir, "/Fluvial.Plot.18.CD8.norm.2clones.OVAblue_FigS3B.pdf"), width=4, height=4)
grid.arrange(Combo.plot,OVA.IFNa.plot,OVA.IL12.plot,OVA.plot, ncol=2, nrow=2)
dev.off()
######CD4 T cells########
####Figure 3H - normalized test CD4#######
df1 <- obs1@meta.data
df1 <- df1[df1$NewlablesG=="CD4 T cells",]
#df2<- na.omit(df1[,c(13,38,39,49)])
df2<- na.omit(df1[,c("cdr3_aa2","RNA_Group","Tissue","OVA.reactive")])
for (i in unique(df1$RNA_Group)){
# i <- unique(df1$RNA_Group)[1]
df3 <- df2[df2$RNA_Group==i&df2$Tissue=="Liver",]
df4 <- df2[df2$RNA_Group==i&df2$Tissue=="Tumor",]
df3 <- df3 %>% group_by(cdr3_aa2) %>% mutate(ClonoFreq = n()) %>% unique() %>% ungroup() %>% as.data.frame()
df4 <- df4 %>% group_by(cdr3_aa2) %>% mutate(ClonoFreq = n()) %>% unique() %>% ungroup() %>% as.data.frame()
df3 <- df3[df3$ClonoFreq>1,]
df4 <- df4[df4$ClonoFreq>1,]
df3$ClonoFreq.norm <- sapply(df3$ClonoFreq,function(x){100*x/sum(df3$ClonoFreq)})
df4$ClonoFreq.norm <- sapply(df4$ClonoFreq,function(x){100*x/sum(df4$ClonoFreq)})
df6 <- rbind(df3,df4)
df6 <- df6[order(df6$cdr3_aa2),]
df6$col1[!duplicated(df6$cdr3_aa2)] <- "#CCCCCC"
Dtests <- duplicated(df6$cdr3_aa2)|duplicated(df6$cdr3_aa2, fromLast=T)
df6$col1 [df6$OVA.reactive=="OVA"&Dtests] <- "#CC6600" #"#3399FF"
df6$col1 [df6$OVA.reactive=="Other"&Dtests] <- "#CC6600"
plotA <- ggplot(data = df6,
aes(x = Tissue, y=ClonoFreq.norm, alluvium = cdr3_aa2,stratum=cdr3_aa2)) +
geom_flow(aes(fill = col1),decreasing = F)+
geom_stratum(aes(fill = col1,size=0.2),decreasing = F) +
scale_fill_manual(values = setNames(unique(df6$col), unique(df6$col)))+
theme_bw() + theme(legend.position = "none")+
scale_size_identity()+
ggtitle(i)
nameR <- paste0(i,".plot")
assign(nameR,plotA)}
pdf(paste0(pdir, "/Fluvial.Plot.18.CD4.norm.2clones_Fig3H.pdf"), width=4, height=4)
grid.arrange(Combo.plot,OVA.IFNa.plot,OVA.IL12.plot,OVA.plot, ncol=2, nrow=2)
dev.off()
library(Seurat)
library(openxlsx)
library(readxl)
library(tidyr)
library("RColorBrewer")
library(dplyr)
library(reshape2)
library(fgsea)
library(matrixStats)
library(fmsb)
cluster_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A019/Analysis_MM"
wdir <- paste0(cluster_dir, "/Result3_MNOT")
ddir <- paste0(cluster_dir, "/results3/Liver_Tumor_APCs/")
pdir <- paste0(cluster_dir, "/Result3_MNOT/Plots")
# Custom functions
source(file = "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A018/Analysis_MM/last_scripts_MNOT/TAD018/plot_stellare_function.R")
# Load RDS
obs1 <- readRDS(paste0(ddir, "Liver_Tumor_APCs_final.rds"))
########################
#####Annotations#######
########################
obs1@meta.data$Tissue <- obs1@meta.data$PROP.Source
obs1@meta.data$RNA_Group <- obs1@meta.data$PROP.Condition
obs1@meta.data$RNA_Group[obs1@meta.data$RNA_Group == "TA_only"] <- "TA33"
obs1@meta.data$RNA_Group[obs1@meta.data$RNA_Group == "TA_combo"] <- "TA33.Combo"
obs1@meta.data$RNA_Group <- factor(x = obs1@meta.data$RNA_Group, levels = c("TA33","TA33.Combo"))
# Tissue Sample assign
obs1$Tube <- paste0(obs1$hash.ID,obs1$RNA_Group)
obs1@meta.data$Orig<-paste0(obs1@meta.data$Tissue, ".", obs1@meta.data$RNA_Group)
table(obs1@meta.data$Tissue,obs1@meta.data$RNA_Group)
table(obs1@meta.data$Orig)
obs1@meta.data$allData <- "all"
#############Analysis APCs###################
features1 = c("H2-Q7", "H2-T22", "H2-Q4", "H2-K1", "H2-T23", "H2-M3", "H2-D1","Tap1","Tap2", #MHCI
"H2-Eb1", "H2-Ab1", "H2-Aa", "H2-DMb1", "H2-DMa", "Ciita", #MHCII
"Ifi44", "Oasl1","Irf7","Isg15", "Isg20","Oas1a","Oas1g", #IFNa
"Ccl5", "Upp1", "Slamf7", "Cxcl9", "Gbp4","Cd74", #IFNg
"Vav2","Tgfb1","Il10","Ccl24","Mmp8","Tmem176b","Trem2","Fn1") #protumor
#####Figure 6A##############
pdf(paste0(pdir, "/liver.apc.19.LandT_Fig6A.pdf"), width=14, height=3)
plot.a <- PlotStellare(obs1, features1=features1, valueG="allData", i="all", group.by0="Orig", controGroup="Liver.TA33")
print(plot.a)
par(mfrow=c(1,2))
dev.off()
#######################GSEA#######################################################################
#######################Figure S6B - gsea on Liver and tumor APCs##################################
# Load signature
#miDB_sig4.MLS <- readRDS("C:/Users/notaro.marco/Dropbox (HSR Global)/90-756142658_RNAseq_Squadrito/05-DGE-Tumor/miDB_sig4.MLS.rds")
miDB_sig4.MLS <- readRDS("/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A018/reference/miDB_sig4.MLS.rds")
sub.obj<-subset(obs1, subset= NewlablesM == "KCs"|
NewlablesM == "Macrophages"|
NewlablesM == "Monocytes")
sub.obj <- SetIdent(sub.obj, value = "Orig")
obs1.dge <- FindMarkers(sub.obj, ident.1 = "Liver.TA33.Combo", ident.2 = "Liver.TA33", min.pct=0.01, logfc.threshold=0, min.cells.feature=1, min.cells.group=1)
obs2.dge <- FindMarkers(sub.obj, ident.1 = "Tumor.TA33.Combo", ident.2 = "Tumor.TA33", min.pct=0.01, logfc.threshold=0, min.cells.feature=1, min.cells.group=1)
obs1.ranks <- obs1.dge$avg_log2FC
obs2.ranks <- obs2.dge$avg_log2FC
names(obs2.ranks) <- rownames(obs2.dge)
names(obs1.ranks) <- rownames(obs1.dge)
res.combo.liver<-fgsea(pathways = miDB_sig4.MLS, stats = obs1.ranks, minSize=0, maxSize=500, eps=0) # nPermSimple=50000
res.combo.tumor<-fgsea(pathways = miDB_sig4.MLS, stats = obs2.ranks, minSize=0, maxSize=500, eps=0) # PermSimple=50000
# Select columns of interest
res.combo.liver2<-res.combo.liver[,c(1,3,6,8)]
res.combo.tumor2<-res.combo.tumor[,c(1,3,6,8)]
# Merge in a single file and save xlsx
merge1 <- merge(x=res.combo.liver2, y=res.combo.tumor2, by="pathway", suffixes = c("-liver", "-tumor"))
write.xlsx(merge1, paste0(pdir, "/GSEA_LandT_KC-mono-macs.xlsx"), asTable=T, rowNames=T)
##################GSEA Heatmap of selected#######################################
#rearrange data to plot heatmap
res.combo.liver$origin<-"Liver"
res.combo.tumor$origin<-"Tumor"
# res.OVA$origin<- factor(x = res.OVA$origin, levels = c("OVA.Combo","OVA.Il12","OVA.IFNa","liOVA"))
merge.results<-rbind(res.combo.liver,res.combo.tumor)
#saveRDS(merge.results, "GSEA.liverandtumormacs.nperm50000.rds")
#create variable with star for statistic
merge.results$stats <- ifelse(merge.results$padj < 0.0005, "***",
ifelse(merge.results$padj < 0.005, "**",
ifelse(merge.results$padj < 0.05, "*", "")))
# Manually selected pathways
a1<-c("IFNa_RO",
"HALLMARK_INTERFERON_GAMMA_RESPONSE",
"GOBP_DEFENSE_RESPONSE_TO_VIRUS",
"GOBP_POSITIVE_REGULATION_OF_LEUKOCYTE_CELL_CELL_ADHESION",
"GOBP_POSITIVE_REGULATION_OF_CELL_KILLING",
"GOCC_MHC_PROTEIN_COMPLEX",
"GOBP_POSITIVE_REGULATION_OF_T_CELL_MEDIATED_CYTOTOXICITY",
"GOCC_MHC_CLASS_II_PROTEIN_COMPLEX",
"GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_ENDOGENOUS_ANTIGEN",
"HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION",
"GOBP_POSITIVE_REGULATION_OF_EPITHELIAL_CELL_PROLIFERATION",
"GOBP_POSITIVE_REGULATION_OF_ENDOTHELIAL_CELL_PROLIFERATION",
"PGE2_RO",
"HALLMARK_ANGIOGENESIS",
"IL4_RO"
)
# Subset selected pathways
merge.results1 <-merge.results[merge.results$pathway %in% a1,]
# Rename pathways
merge.results1[merge.results1 == "IFNa_RO"] <- "IFNa response (Cilenti et all)"
merge.results1[merge.results1 == "HALLMARK_INTERFERON_GAMMA_RESPONSE"] <- "Hallmark_IFNg response"
merge.results1[merge.results1 == "GOBP_DEFENSE_RESPONSE_TO_VIRUS"] <- "Antiviral response"
merge.results1[merge.results1 == "GOBP_POSITIVE_REGULATION_OF_LEUKOCYTE_CELL_CELL_ADHESION"] <- "Positive regulation of cell adhesion"
merge.results1[merge.results1 == "GOBP_POSITIVE_REGULATION_OF_CELL_KILLING"] <- "Positive regulation of cell killing"
merge.results1[merge.results1 == "GOCC_MHC_PROTEIN_COMPLEX"] <- "MHC protein complex"
merge.results1[merge.results1 == "GOBP_POSITIVE_REGULATION_OF_T_CELL_MEDIATED_CYTOTOXICITY"] <- "Positive regulation of T cell cytotoxicity"
merge.results1[merge.results1 == "GOCC_MHC_CLASS_II_PROTEIN_COMPLEX"] <- "MHC-II protein complex"
merge.results1[merge.results1 == "GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_ENDOGENOUS_ANTIGEN"] <- "Antigen processing and presentation"
merge.results1[merge.results1 == "HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION"] <- "Hallmark EMT"
merge.results1[merge.results1 == "GOBP_POSITIVE_REGULATION_OF_EPITHELIAL_CELL_PROLIFERATION"] <- "Positive regulation of epithelial cell proliferation"
merge.results1[merge.results1 == "GOBP_POSITIVE_REGULATION_OF_ENDOTHELIAL_CELL_PROLIFERATION"] <- "Positive regulation of endothelial cell proliferation"
merge.results1[merge.results1 == "PGE2_RO"] <- "PGE2 signature from Cilenti et al"
merge.results1[merge.results1 == "HALLMARK_ANGIOGENESIS"] <- "Angiogenesis"
merge.results1[merge.results1 == "IL4_RO"] <- "IL-4 signature from Cilenti et al"
# Order in the way we want to see
merge.results1$pathway<- factor(merge.results1$pathway, levels = c("IFNa response (Cilenti et all)",
"Hallmark_IFNg response",
"Antiviral response",
"Positive regulation of cell adhesion",
"Positive regulation of cell killing",
"Positive regulation of T cell cytotoxicity",
"Antigen processing and presentation",
"MHC protein complex",
"MHC-II protein complex",
"Hallmark EMT",
"Positive regulation of epithelial cell proliferation",
"Positive regulation of endothelial cell proliferation",
"Angiogenesis",
"IL-4 signature from Cilenti et al",
"PGE2 signature from Cilenti et al"))
# Heatmap
y<-ggplot(merge.results1, aes (x=origin, y=pathway, fill=NES)) +
geom_tile() +
scale_fill_gradientn(limits = c(-4,5.2), colours=c("Blue","White","Red"))+
geom_text(aes(label = stats), vjust=0.7,angle=0) +
labs(y='',x='')+
theme_classic()+
theme(axis.text.y = element_text(color = "Black"),
axis.text.x = element_text(angle=90, vjust=0.5, hjust=1, color="Black"),
axis.line.x=element_line(color="Black"),
axis.line.y=element_line(color="Black"))+
ggtitle("GSEA_MacroKCandmono vs OVA_Liver")
ggsave(filename = paste0(pdir, "/Liver_and_Tumor_MacroKCsMonovsOVA_FigS6B.pdf"), plot=y, width=5, height=3)
# Unbias heatmap
merge.results <- merge.results[order(merge.results$padj, decreasing = F)]
merge.results_temp <-merge.results[merge.results$padj < 0.0001,]
filter_in <- head(unique(merge.results_temp$pathway), 100)
merge.results2 <-merge.results[merge.results$pathway %in% filter_in,]
merge.results2$origin<- factor(x = merge.results2$origin, levels = c("Liver","Tumor"))
y<-ggplot(merge.results2, aes (x=origin, y=pathway, fill=NES)) +
geom_tile() +
scale_fill_distiller(palette = "RdBu") +
geom_text(aes(label = stats), vjust = 0.7,angle = 0) +
labs(y='',x='')+
theme_classic()+
theme(axis.text.y = element_text(color = "Black"), text = element_text(size = 15),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, color = "Black"),
axis.line.x=element_line(color="Black"),
axis.line.y=element_line(color="Black"))
ggtitle("GSEA_MacroKCandmono vs OVA_Liver")
ggsave(filename = paste0(pdir, "/Liver_and_Tumor_MacroKCsMonovsOVA_unbias.pdf"), plot=y, width=15, height=30) # limitsize = FALSE
##############################################################################
##################Figure S6D - RADAR PLOT######################################
##############################################################################
plotRadar <- function(i){
ploTData<- rbind(max1,min1,get(i))
id <- sub(".db","",i)
radarchart(ploTData, title = id, pcol=colors_border, pfcol=colors_in, plwd=2, plty=1, cglcol="grey", cglty=1, cglwd=0.8)
} #function to plot radarplot
####Create label to cluster dc and granulocytes together
obs1@meta.data$Shortlables<-obs1@meta.data$NewlablesM
#DCs<-c("CD8 cDC1","moDCs", "Ccr7 DCs","pDCs","pre DCs")
#obs1@meta.data$Shortlables[obs1@meta.data$NewlablesM%in%DCs] <- "DCs"
#granu<-c("Neutrophils","Basophils")
#obs1@meta.data$Shortlables[obs1@meta.data$NewlablesM%in%granu] <- "Granulocytes"
obs1$Shortlables <- paste0(obs1$Shortlables,".",obs1$Tissue)
# AddModuleScore
MHCI_genes<- list(c("H2-Q7", "H2-T22", "H2-Q4", "H2-K1", "H2-T23", "H2-M3", "H2-D1","Tap1","Tap2"))
MHCII_genes<- list( c("H2-Eb1", "H2-Ab1", "H2-Aa", "H2-DMb1", "H2-DMa", "Ciita"))
Protumoral_genes<- list(c("Vav2","Tgfb1","Il10","Ccl24","Mmp8","Tmem176b","Trem2","Fn1") )
IFNa_genes<- list(c("Ifi44", "Oasl1","Irf7","Isg15", "Isg20","Oas1a","Oas1g") )
IFNg_genes<-list(c("Ccl5", "Upp1", "Slamf7", "Cxcl9", "Gbp4","Cd74"))
obs1<- AddModuleScore(obs1, features = MHCI_genes, name = "MHCI_genes")
obs1<- AddModuleScore(obs1, features = MHCII_genes, name = "MHCII_genes")
obs1<- AddModuleScore(obs1, features = Protumoral_genes, name = "Protumoral_genes")
obs1<- AddModuleScore(obs1, features = IFNa_genes, name = "IFNa_genes")
obs1<- AddModuleScore(obs1, features = IFNg_genes, name = "IFNg_genes")
# Normalize data
#we needed to find a way to normalize the module scores.
#we used a quantile normalization method.
#M1 <- obs1@meta.data[,c(37,42, 43:47)]
M1 <- obs1@meta.data[,c("Shortlables", "RNA_Group", "MHCI_genes1","MHCII_genes1","Protumoral_genes1","IFNa_genes1","IFNg_genes1")]
M1[,3:7]<-limma::normalizeQuantiles(M1[,3:7])
Groups <-unique(obs1$RNA_Group)
CellType<-unique(M1$Shortlables)
#with Median
#TAMs
#Groups
for (i in CellType){
M2<-M1[M1$Shortlables%in%i,]
M3a<-colMedians(as.matrix(M2[M2$RNA_Group%in%Groups[1],3:7]),na.rm=T)
M3b<-colMedians(as.matrix(M2[M2$RNA_Group%in%Groups[2],3:7]),na.rm=T)
M4<-rbind(M3a,M3b)
rownames(M4)<-Groups
colnames(M4)<-colnames(M2[,3:7])
x<-as.data.frame(M4)
assign(paste0(i,".db"),x)}
lsCells <- grep(".db",ls(),value=T)
MinMax<-lapply(lsCells,get)
max1 <- rep(do.call("max", MinMax),5)
min1 <- rep(do.call("min", MinMax),5)
colors_border=c(rgb(0,0.8,0,0.9), rgb(0,0,0,0.9))
colors_in=c(rgb(0,0.3,0,0.), rgb(0,0,0.0,0.1))
pdf(paste0(pdir, "/Radar_plot_liver_and_tumor_APCs_FigS6D_new2.pdf"), width=15, height=12)
par(mfrow=c(4,5))
for (i in 1:length(unique(obs1$Shortlables))){
plotRadar(lsCells[i])
}
dev.off()
#######################Plot UMAP################
liver.apcs <- readRDS("/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A019/Analysis_MM/results3/Liver_APCs/Liver_APCs_final.rds")
tumor.apcs <- readRDS("/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A019/Analysis_MM/results3/Tumor_APCs/Tumor_APCs_final.rds")
liver.apcs@meta.data$Tissue <- liver.apcs@meta.data$PROP.Source
liver.apcs@meta.data$RNA_Group <- liver.apcs@meta.data$PROP.Condition
liver.apcs@meta.data$RNA_Group[liver.apcs@meta.data$RNA_Group == "TA_only"] <- "TA33"
liver.apcs@meta.data$RNA_Group[liver.apcs@meta.data$RNA_Group == "TA_combo"] <- "TA33.Combo"
liver.apcs@meta.data$RNA_Group <- factor(x = liver.apcs@meta.data$RNA_Group, levels = c("TA33","TA33.Combo"))
liver.apcs$Tube <- paste0(liver.apcs$hash.ID,liver.apcs$RNA_Group)
tumor.apcs@meta.data$Tissue <- tumor.apcs@meta.data$PROP.Source
tumor.apcs@meta.data$RNA_Group <- tumor.apcs@meta.data$PROP.Condition
tumor.apcs@meta.data$RNA_Group[tumor.apcs@meta.data$RNA_Group == "TA_only"] <- "TA33"
tumor.apcs@meta.data$RNA_Group[tumor.apcs@meta.data$RNA_Group == "TA_combo"] <- "TA33.Combo"
tumor.apcs@meta.data$RNA_Group <- factor(x = tumor.apcs@meta.data$RNA_Group, levels = c("TA33","TA33.Combo"))
tumor.apcs$Tube <- paste0(tumor.apcs$hash.ID,tumor.apcs$RNA_Group)
# Save plot
pdf(paste0(pdir, "/liver_apcs_umap_FigS11A-1.pdf"), width=7, height=6) # Figure S11A-1
DimPlot(liver.apcs, reduction = "umap.harmony.orig.ident",group.by = "NewlablesM_clu", pt.size = 0.5, label = T, label.size = 3, repel = T)
dev.off()
pdf(paste0(pdir, "/tumor_apcs_umap_FigS11A-2.pdf"), width=7, height=6) # Figure S11A-2
DimPlot(tumor.apcs, reduction = "umap.harmony.orig.ident",group.by = "NewlablesM_clu", pt.size = 0.5, label = T, label.size = 3, repel = T)
dev.off()
# Export DimPlot as excel
var_list <- c("UMAPh_1", "UMAPh_2","Tissue","orig.ident","Sample","RNA_Group","Tube","RNA_snn_h.orig.ident_res.1.2","NewlablesM_clu")
liver.apcs_df <- FetchData(liver.apcs, vars = var_list)
liver.apcs_df$cell_ID <- rownames(liver.apcs_df)
write.xlsx(liver.apcs_df, file=paste0(pdir, "/liver_apcs_umap_FigS11A-1.xlsx"), overwrite=T)
tumor.apcs_df <- FetchData(tumor.apcs, vars = var_list)
tumor.apcs_df$cell_ID <- rownames(tumor.apcs_df)
write.xlsx(tumor.apcs_df, file=paste0(pdir, "/tumor_apcs_umap_FigS11A-2.xlsx"), overwrite=T)
#################################ENDS here##############################
\ No newline at end of file
This diff is collapsed.
library(SingleCellExperiment)
library(dplyr)
library(ggplot2)
library(multinichenetr)
#####RELATIVE TO FIGURE 6H#########
#############Prepare files
wdir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A019/Analysis_MM/results3/Result3_MNOT"
setwd(wdir)
all.labels <- readRDS("all.labels.rds")
#####open LIVER Obj
####Prepare files
username <- "C:/Users/notaro.marco/"
username <- "/Users/Squadrito/"
ddir<- paste0(username, "Dropbox (HSR Global)/90-935462466_scRNAseq_NotaroTAD_A019/Analysis_MM/results2cond/Tumor")
setwd(ddir)
obs <- readRDS("Tumor_final.rds")
obs@meta.data$NewlablesM2 <- all.labels[rownames(obs@meta.data)]
obs$sample_id <- paste0(obs$hash.ID, obs$PROP.Condition)
obs@meta.data$NewlablesG <- obs@meta.data$NewlablesM2
obs@meta.data$NewlablesG[grep("CD4",obs@meta.data$NewlablesM2) ] <- "CD4_Tcells"
obs@meta.data$NewlablesG[grep("CD8",obs@meta.data$NewlablesM2)] <- "CD8_Tcells"
obs@meta.data$NewlablesG[grep("DCs",obs@meta.data$NewlablesM2) ] <- "DCs"
###subset
sub.obj<-subset(obs,
subset= NewlablesG == "Macrophages"|
NewlablesG == "DCs"|
NewlablesG == "CD4_Tcells"|
NewlablesG == "CD8_Tcells")
#sub.obj2 <- subset(sub.obj,cells = sample(1:nrow(sub.obj@meta.data),500))
sub.obj2 <- sub.obj
nrow(sub.obj@meta.data)
###Multinichnet
organism = "mouse"
if(organism == "human"){
lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds"))
lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% distinct(ligand, receptor) %>% mutate(ligand = make.names(ligand), receptor = make.names(receptor))
ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds"))
colnames(ligand_target_matrix) = colnames(ligand_target_matrix) %>% make.names()
rownames(ligand_target_matrix) = rownames(ligand_target_matrix) %>% make.names()
} else if(organism == "mouse"){
lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds"))
lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% distinct(ligand, receptor) %>% mutate(ligand = make.names(ligand), receptor = make.names(receptor))
ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds"))
colnames(ligand_target_matrix) = colnames(ligand_target_matrix) %>% make.names()
rownames(ligand_target_matrix) = rownames(ligand_target_matrix) %>% make.names()
}
obs1 <- Seurat::as.SingleCellExperiment(sub.obj2, assay = "RNA")
obs1 = alias_to_symbol_SCE(obs1, "mouse") %>% makenames_SCE()
celltype_id = "NewlablesG"
group_id = "PROP.Condition"
sample_id = "sample_id"
covariates = NA
batches = NA
senders_oi = SummarizedExperiment::colData(obs1)[,celltype_id] %>% unique()
receivers_oi = SummarizedExperiment::colData(obs1)[,celltype_id] %>% unique()
min_cells = 10
abundance_expression_info = get_abundance_expression_info(sce = obs1,
sample_id = sample_id, group_id = group_id, celltype_id = celltype_id,
min_cells = min_cells, senders_oi = senders_oi,
receivers_oi = receivers_oi, lr_network = lr_network, batches = batches)
abundance_expression_info$abund_plot_sample
contrasts_oi = c("'TA_combo-TA_only','TA_only-TA_combo'")
contrast_tbl = tibble(contrast =
c("TA_combo-TA_only","TA_only-TA_combo"),
group = c("TA_combo","TA_only"))
DE_info = get_DE_info(sce = obs1, sample_id = sample_id,
group_id = group_id, celltype_id = celltype_id,
batches = batches, covariates = covariates,
contrasts_oi = contrasts_oi, min_cells = min_cells)
DE_info$celltype_de$de_output_tidy %>% arrange(p_adj) %>% head()
celltype_de = DE_info$celltype_de$de_output_tidy
sender_receiver_de = combine_sender_receiver_de(
sender_de = celltype_de,
receiver_de = celltype_de,
senders_oi = senders_oi,
receivers_oi = receivers_oi,
lr_network = lr_network)
sender_receiver_de %>% head(20)
logFC_threshold = 0.50
p_val_threshold = 0.05
fraction_cutoff = 0.05
p_val_adj = FALSE
top_n_target = 250
verbose = TRUE
#cores_system = 8
#n.cores = min(cores_system, sender_receiver_de$receiver %>% unique() %>% length()) # use one core per receiver cell type
#takes time
ligand_activities_targets_DEgenes = suppressMessages(suppressWarnings(get_ligand_activities_targets_DEgenes(
receiver_de = celltype_de,
receivers_oi = receivers_oi,
ligand_target_matrix = ligand_target_matrix,
logFC_threshold = logFC_threshold,
p_val_threshold = p_val_threshold,
p_val_adj = p_val_adj,
top_n_target = top_n_target,
verbose = verbose,
n.cores = 1
)))
#prioritazing
prioritizing_weights_DE = c("de_ligand" = 1,
"de_receptor" = 1)
prioritizing_weights_activity = c("activity_scaled" = 2)
prioritizing_weights_expression_specificity = c("exprs_ligand" = 2,
"exprs_receptor" = 2)
prioritizing_weights_expression_sufficiency = c("frac_exprs_ligand_receptor" = 1)
prioritizing_weights_relative_abundance = c( "abund_sender" = 0,
"abund_receiver" = 0)
prioritizing_weights = c(prioritizing_weights_DE,
prioritizing_weights_activity,
prioritizing_weights_expression_specificity,
prioritizing_weights_expression_sufficiency,
prioritizing_weights_relative_abundance)
sender_receiver_tbl = sender_receiver_de %>% dplyr::distinct(sender, receiver)
metadata_combined = SummarizedExperiment::colData(obs1) %>% tibble::as_tibble()
#grouping
if(!is.na(batches)){
grouping_tbl = metadata_combined[,c(sample_id, group_id, batches)] %>% tibble::as_tibble() %>% dplyr::distinct()
colnames(grouping_tbl) = c("sample","group",batches)
} else {
grouping_tbl = metadata_combined[,c(sample_id, group_id)] %>% tibble::as_tibble() %>% dplyr::distinct()
colnames(grouping_tbl) = c("sample","group")}
#run it
prioritization_tables = suppressMessages(generate_prioritization_tables(
sender_receiver_info = abundance_expression_info$sender_receiver_info,
sender_receiver_de = sender_receiver_de,
ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes,
contrast_tbl = contrast_tbl,
sender_receiver_tbl = sender_receiver_tbl,
grouping_tbl = grouping_tbl,
prioritizing_weights = prioritizing_weights,
fraction_cutoff = fraction_cutoff,
abundance_data_receiver = abundance_expression_info$abundance_data_receiver,
abundance_data_sender = abundance_expression_info$abundance_data_sender
))
prioritization_tables$group_prioritization_tbl %>% head(20)
##known info
lr_target_prior_cor = lr_target_prior_cor_inference(prioritization_tables$group_prioritization_tbl$receiver %>% unique(), abundance_expression_info, celltype_de, grouping_tbl, prioritization_tables, ligand_target_matrix, logFC_threshold = logFC_threshold, p_val_threshold = p_val_threshold, p_val_adj = p_val_adj)
##saving
multinichenet_output = list(
celltype_info = abundance_expression_info$celltype_info,
celltype_de = celltype_de,
sender_receiver_info = abundance_expression_info$sender_receiver_info,
sender_receiver_de = sender_receiver_de,
ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes,
prioritization_tables = prioritization_tables,
grouping_tbl = grouping_tbl,
lr_target_prior_cor = lr_target_prior_cor
)
multinichenet_output = make_lite_output(multinichenet_output)
seted(wdir)
saveRDS(multinichenet_output, paste0(wdir, "multinichenet_output.Tumor.19.rds"))
##look at top 50
prioritized_tbl_oi_all = get_top_n_lr_pairs(multinichenet_output$prioritization_tables, 50, rank_per_group = FALSE)
prioritized_tbl_oi = multinichenet_output$prioritization_tables$group_prioritization_tbl %>%
filter(id %in% prioritized_tbl_oi_all$id) %>%
distinct(id, sender, receiver, ligand, receptor, group) %>% left_join(prioritized_tbl_oi_all)
prioritized_tbl_oi$prioritization_score[is.na(prioritized_tbl_oi$prioritization_score)] = 0
senders_receivers = union(prioritized_tbl_oi$sender %>% unique(), prioritized_tbl_oi$receiver %>% unique()) %>% sort()
colors_sender = RColorBrewer::brewer.pal(n = length(senders_receivers), name = 'Spectral') %>% magrittr::set_names(senders_receivers)
colors_receiver = RColorBrewer::brewer.pal(n = length(senders_receivers), name = 'Spectral') %>% magrittr::set_names(senders_receivers)
circos_list = make_circos_group_comparison(prioritized_tbl_oi, colors_sender, colors_receiver)
##see interactions
group_oi = "TA_combo"
##plot results
prioritized_tbl_oi_M_50 = get_top_n_lr_pairs(multinichenet_output$prioritization_tables, 50, groups_oi = group_oi)
plot_oi = make_sample_lr_prod_activity_plots(multinichenet_output$prioritization_tables, prioritized_tbl_oi_M_50)
plot_oi
##see interactions
group_oi = "TA_only"
##plot results
prioritized_tbl_oi_M_50 = get_top_n_lr_pairs(multinichenet_output$prioritization_tables, 50, groups_oi = group_oi)
plot_oi = make_sample_lr_prod_activity_plots(multinichenet_output$prioritization_tables, prioritized_tbl_oi_M_50)
plot_oi
sessionInfo()
suppressPackageStartupMessages(library(Seurat))
options(Seurat.object.assay.version = "v5")
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(openxlsx))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(plyr))
wd <- "/beegfs/scratch/ric.squadrito/ric.squadrito/202312011215_VMSC10702-NotaroTAD_A019/Analysis_MM/python_scripts/circles"
setwd(wd)
args <- commandArgs(trailingOnly = TRUE)
sample <- args[1] # "M18", "M21", "M40", "M41"
#sample <- "M18"
out_dir <- paste0(wd, "/distance_table")
dir.create(out_dir, showWarnings = F, recursive = T)
#####################
# Define a function to calculate Euclidean distance
euclidean_dist <- function(x1, y1, x2, y2) {
return(sqrt((x1 - x2)^2 + (y1 - y2)^2))
}
#####################
detected_transcripts <- read.csv(file=paste0(wd, "/detected_transcripts_", sample, ".csv"), header = T, numerals = c("no.loss"))
# "global_x" and "global_y" give the position in um, x and y in pixels: https://vizgen.github.io/vizgen-postprocessing/output_data_formats/detected_transcripts_format.html
df_molecules <- detected_transcripts[c("gene", "global_x", "global_y", "cell_id")]
colnames(df_molecules) <- c("gene", "x", "y", "cell_id")
rm(detected_transcripts)
# Remove transcripts without cell_id
nrow(df_molecules)
df_molecules <- df_molecules %>% filter(! df_molecules$gene %in% grep(x = df_molecules$gene, pattern = "Blank", value = T))
nrow(df_molecules)
df_molecules <- df_molecules[df_molecules$cell_id != "-1",] # It might be possible to include transcripts outside the cells but the results appear similar
nrow(df_molecules)
# M18 transcripts: 4263642 => 4242609 => 3254954
# M21 transcripts: 3508195 => 3490821 => 2430395
# M40 transcripts: 2017821 => 2001106 => 1449746
# M41 transcripts: 4510227 => 4494444 => 3030891
# Number of genes
n_genes <- 500
# Define the distance threshold. Default d=30um
d <- 30
# SelectedGenes contains the list of genes manually selected to identify specific cell types.
# It is possible to run the analysis with all the 500 genes but it takes much more time (5-10 days).
SelectedGenes <- unique(c("S100a6", "Klf4", "Anxa2", "Cdkn2a", "Vim",
# Antigen presentation
"Ciita", "H2-DMb1", "H2-Aa", "H2-Ab1",
# Macrophages
"Clec4f", "C1qa", "Marco",
# T cell activation
"Trac", "Cd3e", "Klf2", "Sell", "Il2rb", "Stat1","Ccr5", "Il12rb1", "Cd40",
# IFN
"Irf1", "Isg15", "Ifit2", "Cxcl9","Il18r1",
# Protumoral
"Serpine1","Cebpb","Pdgfb","Lrg1","Id2", "Vegfa", "Il1r1", "Cd274", "Tgfb1")) # Cd81
df_molecules <- df_molecules[df_molecules$gene %in% SelectedGenes,]
# subset to have a smaller dataframe. This is used mostly for benchmarking and testing
#df_molecules <- df_molecules[df_molecules$gene %in% head(genes_ID, n_genes),]
# Get the unique gene ID
genes_ID <- unique(df_molecules$gene)
# Sort genes_ID
genes_ID <- sort(genes_ID, decreasing=F)
n_genes <- length(genes_ID) # 500
# Initialize an empty distance matrix
distance_matrix <- matrix(nrow = length(genes_ID), ncol = length(genes_ID), 0)
distance_matrix_norm <- matrix(nrow = length(genes_ID), ncol = length(genes_ID), 0)
colnames(distance_matrix) <- genes_ID
rownames(distance_matrix) <- genes_ID
colnames(distance_matrix_norm) <- genes_ID
rownames(distance_matrix_norm) <- genes_ID
# Loop through each gene type combination
for (i in 1:length(genes_ID)) { # 1:length(genes_ID) or 1:n_genes
print(i)
gene_A <- genes_ID[i]
# Filter transcripts for gene A
transcripts_A <- df_molecules[df_molecules$gene == gene_A,]
for (j in 1:length(genes_ID)) { # (i + 1):length(genes_ID)) or 1:n_genes
gene_B <- genes_ID[j]
# Filter transcripts for gene B
transcripts_B <- df_molecules[df_molecules$gene == gene_B,]
# Initialize counters for close and not close transcripts
close_count <- 0
# Loop through transcripts in transcripts_A
for (z in 1:nrow(transcripts_A)) {
a_x <- transcripts_A$x[z]
a_y <- transcripts_A$y[z]
# filter to retain only transcripts_B that are not in the same cell as the transcripts_A analysed. This speed up the script.
transcripts_B_temp <- transcripts_B[transcripts_B$cell_id != transcripts_A$cell_id[z],]
# If you include also transcripts outside the cells add the following lines
#if (transcripts_A$cell_id[z] != "-1") {
# transcripts_B_temp <- transcripts_B[transcripts_B$cell_id != transcripts_A$cell_id[z],]}
#else{transcripts_B_temp <- transcripts_B}
# filter to retain only transcripts_B that are close to the transcripts_A analysed. This speed up the script. It looks only in a square of l=1.1*1.1*2d.
transcripts_B_temp <- transcripts_B_temp[transcripts_B_temp$x > a_x - 1.1*d & transcripts_B_temp$x < a_x + 1.1*d,]
transcripts_B_temp <- transcripts_B_temp[transcripts_B_temp$y > a_y - 1.1*d & transcripts_B_temp$y < a_y + 1.1*d,]
is_close <- FALSE # Flag to track if a close transcript is found
if (nrow(transcripts_B_temp)==0){next} # This transcript of A is not close to any transcript of B
# Loop through transcripts in transcripts_B
for (w in 1:nrow(transcripts_B_temp)) {
b_x <- transcripts_B_temp$x[w]
b_y <- transcripts_B_temp$y[w]
# Calculate distance
distance <- euclidean_dist(a_x, a_y, b_x, b_y)
# Check if distance is less than threshold
if (distance < d) {
is_close <- TRUE
break # Exit inner loop if close transcript found
}
}
# Update counters based on the flag
if (is_close) {
close_count <- close_count + 1
}
}
# Fill the distance matrix. Note: The matrix is not perfectly symmetrical because more A molecules can be close to the same B molecule and viceversa
distance_matrix[i, j] <- close_count
# Normalize for the sum of transcripts A and B.
# Note: I multiplied close_count * 2 because each transcript-transcript interaction involves two transcripts.
distance_matrix_norm[i, j] <- round(close_count*2/((nrow(transcripts_A)+nrow(transcripts_B))),5)
}
}
# Print the distance matrix
print(distance_matrix)
print(distance_matrix_norm)
# Save the matrix
write.csv(distance_matrix, paste0(out_dir, "/", sample, "_transcripts_distance_matrix_inCell_t", d, "_g", n_genes, "_selected.csv"), row.names=T)
write.csv(distance_matrix_norm, paste0(out_dir, "/", sample, "_transcripts_distance_matrix_norm_inCell_t", d, "_g", n_genes, "_selected.csv"), row.names=T)
suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(openxlsx))
suppressPackageStartupMessages(library(RColorBrewer))
wd <- "/beegfs/scratch/ric.squadrito/ric.squadrito/202312011215_VMSC10702-NotaroTAD_A019/Analysis_MM/python_scripts/circles"
setwd(wd)
# M21 and M40 are TA_Only and M18 and M41 are TA_Combo
sample <- "M18" # "M18", "M21", "M40", "M41"
# Define the distance threshold. Default d=30um
d <- 30 # 10 or 30
inCell <- "inCell_" # "inCell_" or ""
# _transcripts_distance_matrix_t30_g500.csv _transcripts_distance_matrix_inCell_t10_g500-2.csv _transcripts_distance_matrix_post_t30_g500.csv
td_matrix_norm <- read.csv(paste0(wd, "/distance_table/", sample, "_transcripts_distance_matrix_norm_", inCell, "t", d, "_g35_selected.csv"))
plot_dir <- paste0(wd, "/distance_table/selected/")
dir.create(plot_dir, showWarnings=F, recursive=T)
td_matrix_norm$X <- NULL
genes <- colnames(td_matrix_norm)
H2_genes <- genes[grep(x = genes, pattern = "H2")]
H2_genes_modified <- gsub("\\.", "-", H2_genes) # replace "." with "-"
# Change the column names of the genes that start with H2...
#colnames(td_matrix_norm[H2_genes]) <- H2_genes_modified
genes <- gsub("\\.", "-", genes) # replace "." with "-"
colnames(td_matrix_norm) <- genes
rownames(td_matrix_norm) <- genes
td_matrix_norm$X <- NULL
colnames(td_matrix_norm) <- genes
rownames(td_matrix_norm) <- genes
#Cancer cell signature from top genes
SelectedGenes <- unique(c("S100a6", "Klf4", "Anxa2", "Cdkn2a", "Vim",
#antigen presentation
"Ciita", "H2-DMb1", "H2-Aa", "H2-Ab1",
#Macrophages
"Clec4f", "C1qa", "Marco",
#T cell activation
"Trac", "Cd3e", "Klf2", "Sell", "Il2rb", "Stat1","Ccr5", "Il12rb1", "Cd40",
#IFN
"Irf1", "Isg15", "Ifit2", "Cxcl9","Il18r1",
#protumoral
"Serpine1","Cebpb","Pdgfb","Lrg1","Id2", "Vegfa", "Il1r1", "Cd274", "Tgfb1")) # Cd81
SelectedGenes_ann <- c("Cancer genes", "Cancer genes", "Cancer genes", "Cancer genes", "Cancer genes",
#antigen presentation
"MHCI", "MHCI", "MHCI", "MHCI",
#Macrophages
"Macs", "Macs","Macs",
#T cell activation
"T cell activation", "T cell activation", "T cell activation", "T cell activation", "T cell activation", "T cell activation","T cell activation", "T cell activation", "T cell activation",
#IFN
"IFN", "IFN", "IFN", "IFN","IFN",
#Protumoral
"Protumoral","Protumoral","Protumoral","Protumoral","Protumoral","Protumoral","Protumoral","Protumoral","Protumoral")
gene_annotation <- data.frame(annotation = SelectedGenes_ann)
row.names(gene_annotation) <- SelectedGenes
signature_levels <- c("Cancer genes", "MHCI", "Macs", "T cell activation", "IFN", "Protumoral")
gene_annotation$annotation <- factor(gene_annotation$annotation, levels = signature_levels, ordered = T)
td_matrix_norm_selected <- td_matrix_norm[SelectedGenes, SelectedGenes]
pheatmap(td_matrix_norm_selected, cluster_rows = F, cluster_cols = F, na_col = "grey", scale = "none",
filename = paste0(plot_dir, sample, "_transcripts_distance_matrix_norm_", inCell, "d", d, "_selected.pdf"), width = 15, height = 15,
fontsize=9, annotation_col = gene_annotation, annotation_row = gene_annotation)
# Compare two samples
# M21 and M40 are TA_Only and M18 and M41 are TA_Combo
sample2 <- "M21" # "M18", "M21", "M40", "M41"
td_matrix_norm_S2 <- read.csv(paste0(wd, "/distance_table/", sample2, "_transcripts_distance_matrix_norm_", inCell, "t", d, "_g35_selected.csv"))
td_matrix_norm_S2$X <- NULL
colnames(td_matrix_norm_S2) <- genes
rownames(td_matrix_norm_S2) <-genes
# Combine the two samples by subtracting the two matrices
td_matrix_norm_S1_S2 <- td_matrix_norm - td_matrix_norm_S2
td_matrix_norm_selected_S1_S2 <- td_matrix_norm_S1_S2[SelectedGenes, SelectedGenes]
# Plot the results
paletteLength <- 50
myColor <- colorRampPalette(c("blue", "white", "red"))(paletteLength)
# Use floor and ceiling to deal with even/odd length pallettelengths
MIN <- min(td_matrix_norm_selected_S1_S2[!is.na(td_matrix_norm_selected_S1_S2)])
MAX <- max(td_matrix_norm_selected_S1_S2[!is.na(td_matrix_norm_selected_S1_S2)])
myBreaks <- c(seq(MIN, 0, length.out=ceiling(paletteLength/2) + 1),
seq(MAX/paletteLength, MAX, length.out=floor(paletteLength/2)))
pheatmap(td_matrix_norm_selected_S1_S2, cluster_rows = F, cluster_cols = F, na_col = "grey", scale = "none",
filename = paste0(plot_dir, sample, "_", sample2, "_transcripts_distance_matrix_norm_", inCell, "d", d, "_selected.pdf"), width = 15, height = 15,
fontsize=9, annotation_col = gene_annotation, annotation_row = gene_annotation, color = myColor, breaks = myBreaks)
library(openxlsx)
library(dplyr)
library(readxl)
library(psych)
library(data.table)
library(GGally)
library(ggplot2)
library(ggpubr)
library(ggrepel)
#WDs
MNOT.gar<-"C:/Users/notaro.marco/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38/"
MNOT.pvac<-"C:/Users/notaro.marco/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38/pvacseq_mouse/MHC_Class_I"
MNOT.wd1<-"C:/Users/notaro.marco/Dropbox (HSR Global)/90-756142658_RNAseq_Squadrito/03-aln_GRCm38/"
#WD MLS
#MNOT.gar<-"/Users/Squadrito/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38"
#MNOT.pvac<-"/Users/Squadrito/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38/pvacseq_mouse/MHC_Class_I"
#MNOT.wd1<-"/Users/Squadrito/Dropbox (HSR Global)/90-756142658_RNAseq_Squadrito/03-aln_GRCm38"
#define filtering functions
filter_data_common <- function(df, a1, a2, a3, a4, a5, a6, a7, a8) {
subset(df, (Ensemble_score <= a1 | is.na(Ensemble_score)) &
(IC50.MT <= a2 | is.na(IC50.MT)) &
(DAI >= a3 | is.na(DAI)) &
(agretopicity <= a4 | is.na(agretopicity)) &
(average_vivo >= a5 | is.na(average_vivo)) &
(average_vitro >= a6 | is.na(average_vitro)) &
(dissimilarity >= a7 | is.na(dissimilarity)) &
(foreignness_score >= a8 | is.na(foreignness_score)))
}
filter_data_garnish <- function(df, a1, a3, a5, a6, a7, a8) {
subset(df, (Ensemble_score <= a1 | is.na(Ensemble_score)) &
(DAI >= a3 | is.na(DAI)) &
(average_vivo >= a5 | is.na(average_vivo)) &
(average_vitro >= a6 | is.na(average_vitro)) &
(dissimilarity >= a7 | is.na(dissimilarity)) &
(foreignness_score >= a8 | is.na(foreignness_score)))
}
filter_data_pvac <- function(df, a2, a4, a5, a6, a7, a8) {
subset(df,(IC50.MT <= a2 | is.na(IC50.MT)) &
(agretopicity <= a4 | is.na(agretopicity)) &
(average_vivo >= a5 | is.na(average_vivo)) &
(average_vitro >= a6 | is.na(average_vitro)) &
(dissimilarity >= a7 | is.na(dissimilarity)) &
(foreignness_score >= a8 | is.na(foreignness_score)))
}
##Garnish
#carica la tabella di antigengarnish
setwd(MNOT.gar)
data_garnish<- fread("ant_garnish_res_AKTPFV3only_filt_coding.tsv", sep = "\t", header = TRUE)
###pvac
setwd(MNOT.pvac)
data_pvac <- read_excel("pvac_seq.xlsx")
#Common
common.pep.pvac<-unique(data_pvac$Best.Peptide)
common.pep.garnish<-unique(data_garnish$nmer)
common.pep<-intersect(common.pep.pvac,common.pep.garnish)
data_pvac2<-subset(data_pvac,Best.Peptide%in%common.pep)
data_pvac3<-data_pvac2 %>%
group_by(Best.Peptide) %>%
slice(which.min(IC50.MT))
data_garnish2<-subset(data_garnish,nmer%in%common.pep)
data_garnish3<-data_garnish2 %>%
group_by(nmer_uuid) %>%
slice(which.min(Ensemble_score))
data.common<-merge(data_pvac3,data_garnish3, by.x = "Best.Peptide", by.y= "nmer")
#ADD expression
setwd(MNOT.wd1)
count_tpm <- read_excel("featureCounts_results_tpm.xlsx")
#average expression level to pply filtering
count_tpm$average_vitro<-apply(count_tpm[,7:9],1,function(x)mean(x))
#sum(count_tpm$average_vitro==0)
count_tpm$average_vitro[apply(count_tpm[,7:9],1,function(x)mean(x)*min(x))==0]<-0
count_tpm$average_vivo<-apply(count_tpm[,10:12],1,function(x)mean(x))
#sum(count_tpm$average_vivo==0)
count_tpm$average_vivo[apply(count_tpm[,10:12],1,function(x)mean(x)*min(x))==0]<-0
merged.exp<-merge(data.common,count_tpm[,c("Geneid","average_vivo","average_vitro")], by.x = "Gene",by.y="Geneid")
merged.df<-merged.exp #1244
merged.df[,c(22)]<-as.numeric(as.character(merged.df[,c(22)]))
merged.df[,c(24)]<-as.numeric(as.character(merged.df[,c(24)]))
merged.df$dissimilarity.x[is.na(merged.df$dissimilarity.x)] <- 0
merged.df$foreignness_score.x[is.na(merged.df$foreignness_score.x)] <- 0
merged.df$dissimilarity<-merged.df$dissimilarity.x
merged.df$foreignness_score<-merged.df$foreignness_score.x
#FILTERING common based on expr and foreigness
a1<-quantile(merged.df$Ensemble_score, 0.2) #minor 0.4
a2<-quantile(merged.df$IC50.MT, 1) #minor 0.4
a3<-quantile(merged.df$DAI, 0,na.rm=T) #higher 0.4
a4<-quantile(merged.df$agretopicity, 1) #minor 0.6
a5<-quantile(merged.df$average_vivo, 0.80) #higher 90
a6<-quantile(merged.df$average_vitro, 0.90) #higher 95
a7<-quantile(merged.df$dissimilarity, 0) #higher
a8<-quantile(merged.df$foreignness_score, 0.5) #higher
data_filter1<-filter_data_common(merged.df,a1,a2,a3,a4,a5,a6,a7,a8)
db1 <- data_filter1[,c(1,2,3,6,10,11,131,12,132,138,139,140,141)]
db1$type <- "exp-foreigness"
db1
#FILTERING common based on IC50,Agretopicity
a1<-quantile(merged.df$Ensemble_score, 0.12) #minor
a2<-quantile(merged.df$IC50.MT, 0.12) #minor
a3<-quantile(merged.df$DAI, 0.2,na.rm=T) #higher
a4<-quantile(merged.df$agretopicity, 0.1) #minor
a5<-quantile(merged.df$average_vivo, 0.50) #higher
a6<-quantile(merged.df$average_vitro, 0.60) #higher
a7<-quantile(merged.df$dissimilarity, 0) #higher
a8<-quantile(merged.df$foreignness_score, 0.0) #higher
data_filter2<-filter_data_common(merged.df,a1,a2,a3,a4,a5,a6,a7,a8)
db2 <- data_filter2[,c(1,2,3,6,10,11,131,12,132,138,139,140,141)]
db2$type <- "IC50-agretopicity"
db2
#FILTERING common based on expression and IC50
a1<-quantile(merged.df$Ensemble_score, 0.3) #minor
a2<-quantile(merged.df$IC50.MT, 0.3) #minor
a3<-quantile(merged.df$DAI, 0.4,na.rm=T) #higher
a4<-quantile(merged.df$agretopicity, 0.6) #minor
a5<-quantile(merged.df$average_vivo, 0.90) #higher
a6<-quantile(merged.df$average_vitro, 0.95) #higher
a7<-quantile(merged.df$dissimilarity, 0) #higher
a8<-quantile(merged.df$foreignness_score, 0.0) #higher
data_filter3<-filter_data_common(merged.df,a1,a2,a3,a4,a5,a6,a7,a8)
db3 <- data_filter3[,c(1,2,3,6,10,11,131,12,132,138,139,140,141)]
db3$type <- "IC50-exp"
db3
common.sel<- rbind(db1,db2,db3)
common.sel<-common.sel %>% group_by(Best.Peptide) %>%
slice(which.min(Ensemble_score))
#top scoring in garnish
#take only garnish not present in common
garnish.only<- subset(data_garnish, !(nmer %in% common.pep))
#ADD expression
garnish.only<-merge(garnish.only,count_tpm[,c("Geneid",
"average_vivo","average_vitro")], by.x = "gene",by.y="Geneid")
#rimuovi peptidi wt,più lunghi di 10aa, multipli e di frame diversi, tiene solo quelli con ensemble score piu alto
garnish.only<- subset(garnish.only, pep_type != "wt")
garnish.only<-subset(garnish.only, nmer_l<= 10 )
garnish.only<-garnish.only %>%
group_by(nmer_uuid) %>%
slice(which.min(Ensemble_score))
garnish.only <- garnish.only %>%
group_by(var_uuid) %>%
slice(which.min(Ensemble_score))
garnish.only$foreignness_score[is.na(garnish.only$foreignness_score)]<-0
#FILTERING garnish based on Enseble score
a1<-quantile(garnish.only$Ensemble_score, 0.01) #minor
a3<-quantile(garnish.only$DAI, 0.5,na.rm=T) #higher
a5<-quantile(garnish.only$average_vivo, 0.40) #higher
a6<-quantile(garnish.only$average_vitro, 0.40) #higher
a7<-quantile(garnish.only$dissimilarity, 0 ,na.rm=T ) #higher
a8<-quantile(garnish.only$foreignness_score, .8,na.rm=T ) #higher
data_filter1<-filter_data_garnish(garnish.only,a1,a3,a5,a6,a7,a8)
db1<-data_filter1[,c(1,2,9,10,12,13,57,108,109,111,113,115,116)]
db1$type <- "Escore"
#FILTERING garnish based on expression
a1<-quantile(garnish.only$Ensemble_score, 0.1) #minor
a3<-quantile(garnish.only$DAI, 0.5,na.rm=T) #higher
a5<-quantile(garnish.only$average_vivo, 0.90) #higher
a6<-quantile(garnish.only$average_vitro, 0.95) #higher
a7<-quantile(garnish.only$dissimilarity, 0 ,na.rm=T ) #higher
a8<-quantile(garnish.only$foreignness_score, .8,na.rm=T ) #higher
data_filter2<-filter_data_garnish(garnish.only,a1,a3,a5,a6,a7,a8)
db2<-data_filter2[,c(1,2,9,10,12,13,57,108,109,111,113,115,116)]
db2$type <- "expression"
garnish.sel<- rbind(db1,db2)
garnish.sel<-garnish.sel %>% group_by(nmer) %>%
slice(which.min(Ensemble_score))
#top scoring in pvac
#take only pvac not present in common
pvac.only<- subset(data_pvac, !(Best.Peptide %in% common.pep))
#ADD expression
pvac.only<-merge(pvac.only,count_tpm[,c("Geneid",
"average_vivo","average_vitro")], by.x = "Gene",by.y="Geneid")
pvac.only$nmer_l<-nchar(pvac.only$Best.Peptide)
pvac.only$foreignness_score<-as.numeric(pvac.only$foreignness_score)
pvac.only$foreignness_score[is.na(pvac.only$foreignness_score)]<-0
#FILTERING pvac based on IC50
a2<-quantile(pvac.only$IC50.MT, 0.2) #minor
a4<-quantile(pvac.only$agretopicity, 0.2) #minor
a5<-quantile(pvac.only$average_vivo, 0.2) #higher
a6<-quantile(pvac.only$average_vitro, 0.2) #higher
a7<-quantile(pvac.only$dissimilarity, 0) #higher
a8<-quantile(pvac.only$foreignness_score, 0) #higher
data_filter1<-filter_data_pvac(pvac.only,a2,a4,a5,a6,a7,a8)
db1<-data_filter1[,c(1,2,3,6,10,11,25,26,22,24)]
db1$type <- "IC50"
#FILTERING pvac based on expression
a2<-quantile(pvac.only$IC50.MT, .5) #minor
a4<-quantile(pvac.only$agretopicity, 1) #minor
a5<-quantile(pvac.only$average_vivo, 0.80) #higher
a6<-quantile(pvac.only$average_vitro, 0.90) #higher
a7<-quantile(pvac.only$dissimilarity, 0) #higher
a8<-quantile(pvac.only$foreignness_score, 0) #higher
data_filter2<-filter_data_pvac(pvac.only,a2,a4,a5,a6,a7,a8)
db2<-data_filter2[,c(1,2,3,6,10,11,25,26,22,24)]
db2$type <- "exp"
pvac.sel<- rbind(db1,db2)
pvac.sel<-pvac.sel %>%
group_by(Best.Peptide) %>%
slice(which.min(IC50.MT))
#length of nmer in original db
windows()
par(mfrow=c(3,1))
hist(garnish.only$nmer_l,xlim=c(8,12))
hist(merged.df$nmer_l,xlim=c(8,12))
hist(pvac.only$nmer_l,xlim=c(8,12))
#length of selected nmer
pvac.sel$nmer_l<-nchar(pvac.sel$Best.Peptide)
common.sel$nmer_l<-nchar(common.sel$Best.Peptide)
garnish.sel$nmer_l<-nchar(garnish.sel$nmer)
windows()
par(mfrow=c(3,1))
hist(garnish.sel$nmer_l,xlim=c(8,12))
hist(common.sel$nmer_l,xlim=c(8,12))
hist(pvac.sel$nmer_l,xlim=c(8,12))
#FINAL MERGE of selected adding origin
common.sel$origin<-"common"
garnish.sel$origin<-"garnish"
pvac.sel$origin<-"pvac"
colnames(common.sel)[1:3]<-c("gene","nmer","ID")
colnames(pvac.sel)[c(1,2)]<-c("gene","nmer")
db.All <- merge(garnish.sel,merge(common.sel,pvac.sel,all=TRUE),all=T)
# Crea un nuovo file Excel e salva il dataframe
setwd(MNOT.gar)
write.xlsx(db.All,"FinalTable.xlsx")
#########################################################################################
#####################ENDS HERE###########################################################
###########################################################################################
#plots
FINAL_LIST_PEP <- read_excel("C:/Users/notaro.marco/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38/FINAL_LIST_PEP.xlsx")
windows()
par(mfrow=c(4,2))
db.ES<- FINAL_LIST_PEP[!is.na(FINAL_LIST_PEP$Ensemble_score),]
plot.ecdf((merged.df$Ensemble_score),main="Ensemble_score", xlab=(""))
abline(v=(FINAL_LIST_PEP$Ensemble_score),col="red")
db.ES<- FINAL_LIST_PEP[!is.na(FINAL_LIST_PEP$IC50.MT),]
plot.ecdf((merged.df$IC50.MT),main="IC50.MT", xlab=(""))
abline(v=(FINAL_LIST_PEP$IC50.MT),col="red")
db.ES<- FINAL_LIST_PEP[!is.na(FINAL_LIST_PEP$foreignness_score),]
plot.ecdf(merged.df$foreignness_score,main="foreigness_score", xlab=(""))
abline(v=FINAL_LIST_PEP$foreignness_score,col="red")
#db.ES<- FINAL_LIST_PEP[!is.na(FINAL_LIST_PEP$DAI),]
#plot.ecdf((merged.df$DAI+0.1),main="DAI")
#abline(v=(db.ES$DAI+0.1),col="red")
db.ES<- FINAL_LIST_PEP[!is.na(FINAL_LIST_PEP$agretopicity),]
plot.ecdf((merged.df$agretopicity+0.01),main="agretopicity", xlab=(""))
abline(v=(FINAL_LIST_PEP$agretopicity+0.01),col="red")
db.ES<- FINAL_LIST_PEP[!is.na(FINAL_LIST_PEP$average_vitro),]
plot.ecdf((merged.df$average_vitro+0.00001),main="average_vitro", xlab=(""))
abline(v=(FINAL_LIST_PEP$average_vitro+0.00001),col="red")
db.ES<- FINAL_LIST_PEP[!is.na(FINAL_LIST_PEP$average_vivo),]
plot.ecdf((merged.df$average_vivo+0.00001),main="average_vivo", xlab=(""))
abline(v=(FINAL_LIST_PEP$average_vivo+0.00001),col="red")
#gg plots
windows()
par(mfrow=c(2,2))
p<- ggplot(merged.df, aes(IC50.MT)) +
geom_line(stat = "ecdf")+
geom_point(stat="ecdf",size=2) +
geom_vline(xintercept=FINAL_LIST_PEP$IC50.MT, colour = "red")
p
#histos
windows()
par(mfrow=c(2,2))
hist(merged.df$IC50.MT, breaks = 100, xlab="IC50", main ="IC50")
abline(v=(FINAL_LIST_PEP$IC50.MT),col="black")
hist(log10(merged.df$average_vivo), breaks = 100, xlab="Average expression (log TPM)", main ="Log TPM")
abline(v=(log10(FINAL_LIST_PEP$average_vivo)),col="black")
hist(merged.df$agretopicity, xlab="Agretopicity", main ="Agretopicity")
abline(v=(FINAL_LIST_PEP$agretopicity),col="red")
hist(merged.df$foreignness_score, xlab="Foreigness", main ="Foreigness")
abline(v=(FINAL_LIST_PEP$foreignness_score),col="red")
#corr
windows()
par(mfrow=c(2,2))
p<-ggscatter(merged.df, x = "IC50.MT", y = "average_vivo",
cor.coef = TRUE, cor.method = "pearson",
xlab = "IC50", ylab = "log10 TPM",
color= "black", fill='grey', shape=21, size=5)+
scale_y_log10()+
scale_x_log10()+
geom_vline(xintercept=1000, colour = "red",linetype = "dashed")+
geom_hline(yintercept=10, colour = "red",linetype = "dashed")+
geom_point(data=FINAL_LIST_PEP, fill= "green",color="black", , shape=21, size=5)+
grids(linetype = "dashed")+
geom_text_repel(
data = FINAL_LIST_PEP,
aes(label = gene),
size = 5,
box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines")
)
p
windows()
par(mfrow=c(2,2))
p<-ggscatter(merged.df, x = "foreignness_score", y = "agretopicity",
cor.coef = TRUE, cor.method = "pearson",
xlab = "Foreigness", ylab = "Agretopicity",
color= "black", fill='grey', shape=21, size=5)+
scale_y_log10()+
scale_x_log10()+
geom_point(data=FINAL_LIST_PEP, fill= "green",color="black", , shape=21, size=5)+
grids(linetype = "dashed")+
geom_text_repel(
data = FINAL_LIST_PEP,
aes(label = gene),
size = 5,
box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines")
)
p
library(openxlsx)
library(dplyr)
library(readxl)
library(psych)
library(data.table)
library(tidyr)
library(stringr)
library(R.utils)
#WDs
MNOT.gar<-"C:/Users/notaro.marco/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38/"
MNOT.pvac<-"C:/Users/notaro.marco/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38/pvacseq_mouse/MHC_Class_I"
MNOT.wd1<-"C:/Users/notaro.marco/Dropbox (HSR Global)/90-756142658_RNAseq_Squadrito/03-aln_GRCm38/"
#MLS
MNOT.gar<-"/Users/Squadrito/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38"
MNOT.pvac<-"/Users/Squadrito/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38/pvacseq_mouse/MHC_Class_I"
MNOT.wd1<-"/Users/Squadrito/Dropbox (HSR Global)/90-756142658_RNAseq_Squadrito/03-aln_GRCm38"
mappa_completa <- c("Ala" = "A", "Arg" = "R", "Asn" = "N", "Asp" = "D", "Cys" = "C",
"Gln" = "Q", "Glu" = "E", "Gly" = "G", "His" = "H", "Ile" = "I",
"Leu" = "L", "Lys" = "K", "Met" = "M", "Phe" = "F", "Pro" = "P",
"Ser" = "S", "Thr" = "T", "Trp" = "W", "Tyr" = "Y", "Val" = "V")
##Garnish
#carica la tabella di antigengarnish
#setwd(MNOT.gar)
#data_garnish<- fread("ant_garnish_res_AKTPFV3only_filt_coding.tsv.gz", sep = "\t", header = TRUE)
#clean garnish
#data_garnish<-subset(data_garnish, effect_type == "missense_variant")
#data_garnish<- subset(data_garnish, pep_type != "wt")
#saveRDS(data_garnish,"data_garnish.rds")
data_garnish<-readRDS("data_garnish.rds")
#create protein_pos column
data_garnish <- separate(data_garnish, Protein_position_Protein_len,
into = c("Protein_pos", "Protein_length"), sep = "/")
data_garnish$Protein_pos<-as.numeric(as.character(data_garnish$Protein_pos))
#calculate distance from previous mutation in the same gene, take only mutation that are nearby in the protein (distance<10)
data_garnish2<-data_garnish[,c(50,7,60,57,67)]
data_garnish2 <- unique(data_garnish2)
double_mut <- data_garnish2[order(data_garnish2$transcript_id, data_garnish2$Protein_pos),]
double_mut$next_position1 <- c(double_mut$Protein_pos[-1],NA)
double_mut$next_gene1 <- c(double_mut$transcript_id[-1],NA)
double_mut$next_mut1 <- c(double_mut$protein_change[-1],NA)
double_mut$distance1 <- ifelse(double_mut$transcript_id == double_mut$next_gene1,
double_mut$next_position1-double_mut$Protein_pos, NA)
#genes with 3 concatenate mutations
df1<-double_mut
remove_rows<- c(-1,-2)
df2<- df1[order(df1$transcript_id, df1$Protein_pos),]
df2$next_position2 <- c(df2$Protein_pos[remove_rows],NA,NA)
df2$next_gene2 <- c(df2$transcript_id[remove_rows],NA,NA)
df2$next_mut2 <- c(df2$protein_change[remove_rows],NA,NA)
df2$distance2 <- ifelse(df2$transcript_id == df2$next_gene2,
df2$next_position2 - df2$Protein_pos, NA)
#transcripts with 4 concatenate mutations
remove_rows<- c(-1,-2,-3)
df3<- df2[order(df2$transcript_id, df2$Protein_pos),]
df3$next_position3 <- c(df2$Protein_pos[remove_rows],NA,NA,NA)
df3$next_gene3 <- c(df2$transcript_id[remove_rows],NA,NA,NA)
df3$next_mut3 <- c(df2$protein_change[remove_rows],NA,NA,NA)
df3$distance3 <- ifelse(df3$transcript_id == df3$next_gene3,
df3$next_position3 - df3$Protein_pos, NA)
#transcripts with 5 concatenate mutations
remove_rows<- c(-1,-2,-3,-4)
df4<- df3[order(df3$transcript_id, df3$Protein_pos),]
df4$next_position4 <- c(df3$Protein_pos[remove_rows],NA,NA,NA,NA)
df4$next_gene4 <- c(df3$transcript_id[remove_rows],NA,NA,NA,NA)
df4$next_mut4 <- c(df3$protein_change[remove_rows],NA,NA,NA,NA)
df4$distance4 <- ifelse(df4$transcript_id == df4$next_gene4,
df4$next_position4 - df4$Protein_pos, NA)
#Apply filters
pep01 <- df4 %>% filter(distance1 <= 10 & distance1 >0 &
(is.na(distance2)|distance2>10|
(distance2==distance1 & (is.na(distance3)|distance3>10))))
pep02 <- df4 %>% filter(distance1 == 0 & distance2 <= 10 & distance2 > 0 &
(is.na(distance3)|distance3 > 10|
(distance3 == distance2 & (is.na(distance4)|distance4 > 10))))
pep012 <- df4 %>% filter(distance1 <= 10 & distance1 >0 &
distance2 <= 10 & distance2 >0 &
distance1!=distance2 &
(is.na(distance3)|distance3>10|
(distance3==distance2 & (is.na(distance4)|distance4>10))))
pep013 <- df4 %>% filter(distance1 <= 10 & distance1 >0 &
distance1==distance2 &
distance3 <= 10 & distance3 >0 &
(is.na(distance4)|distance4>10|distance4==distance3))
pep0123 <- df4 %>% filter(distance1 <= 10 & distance1 >0 &
distance1!=distance2 &
distance2 <= 10 & distance2 >0 &
distance2!=distance3 &
distance3 <= 10 & distance3 >0 &
(is.na(distance4)|distance4>10|distance4==distance3))
#Select columns for semplicity
pep01 <- pep01[,c(1,2,3,5,4,8)]
pep02 <- pep02[,c(1,2,3,5,4,12)]
pep012 <- pep012[,c(1,2,3,5,4,8,12)]
pep013<- pep013[,c(1,2,3,5,4,8,16)]
pep0123 <- pep0123[,c(1,2,3,5,4,8,12,16)]
#crea colonne con la singola mutazione e converte in codice ad una lettera
#for
Create_db <- function(dfi,x,name.pep="pep"){
Prot_mut <- dfi[,4]
for (i in x){
aminoacido_sost <- str_extract(dfi[,i], "(?<=\\d)[A-Z][a-z]{2}")
pos_inMut <- as.numeric(as.character(str_sub(dfi[,i], 6, -4)))
aminoacido_sost <- mappa_completa[aminoacido_sost]
Prot_mut<- transform(str_replace(Prot_mut, paste0("^(.{", pos_inMut-1, "})."), paste0("\\1", aminoacido_sost)))
Prot_mut <- Prot_mut[,1]}
Pep_mut <- substr(Prot_mut, dfi[,3]-9, dfi[,3]+10)
Pep_wt <- substr(dfi[,4], dfi[,3]-9, dfi[,3]+10)
Gene_ID <- paste0(dfi[,1],name.pep)
mut0123<- data.frame(Gene_ID,Pep_wt,Pep_mut)
return(mut0123)}
pep01 <- pep01[,c(1,2,3,5,4,8)]
pep02 <- pep02[,c(1,2,3,5,4,12)]
pep012 <- pep012[,c(1,2,3,5,4,8,12)]
pep013<- pep013[,c(1,2,3,5,4,8,16)]
pep0123 <- pep0123[,c(1,2,3,5,4,8,12,16)]
FinalDB <- rbind(Create_db(pep01,5:6,".01"),
Create_db(pep02,5:6,".02"),
Create_db(pep012,5:7,".012"),
Create_db(pep013,5:7,".013"),
Create_db(pep0123,5:8,".0123"))
FinalDB <- unique(FinalDB)
write.xlsx(FinalDB,"multipleMuts_MNOT.xlsx")
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment