I added the manuscript's scripts

f69fe925 · Marco Monti · 3bcab94a · f69fe925 · f69fe925 · f69fe925
Commit f69fe925 authored Mar 11, 2025 by Marco Monti
--- a/Others/plot_stellare_function.R
+++ b/Others/plot_stellare_function.R
+library(Seurat)
+
+
+##########################################################
+####DOTPLOT WITH STATISTIC FROM LIVER####
+# Create function
+PlotStellare <- function(obs1=obs1, features1=features1, valueG="NewlablesG",
+                         i="CD8 T cells", group.by0 ="Clonetype2.Group", controGroup="SIINFEKL", noCtrlvsAll=T){
+  obs1 <- SetIdent(obs1, value = valueG)
+  pa1 <- DotPlot(obs1, features = features1, idents = i, dot.scale=15, split.by = group.by0,
+                 cols="RdBu", scale=T)+RotatedAxis()
+  df1 <- pa1$data
+  contro0<- paste0(i,"_",controGroup)
+  a1 <- sub(paste0(i,"_"),"",unique(df1$id))
+  
+  dfcombo <- NULL
+  for (ix in a1){if(ix!=controGroup){
+    m.Combo <- FindMarkers(obs1, ident.1 = ix,ident.2=controGroup, group.by = group.by0,
+                           subset.ident = i,features = features1,
+                           min.pct=0.01, logfc.threshold=0, min.cells.feature=1, min.cells.group=1)
+  }else{
+    m.Combo <- FindMarkers(obs1, ident.1 = ix,                     group.by = group.by0,
+                           subset.ident = i,features = features1,
+                           min.pct=0.01, logfc.threshold=0, min.cells.feature=1, min.cells.group=1)
+    if(noCtrlvsAll){m.Combo$p_val_adj <- 1}
+  }
+    m.Combo <- m.Combo[match(features1,rownames(m.Combo)),]
+    m.Combo$ix <- ix
+    dfcombo <- rbind(dfcombo,m.Combo)}
+  
+  df2 <- cbind(df1,dfcombo)
+  data.frame(df2$id,df2$ix)
+  df2$stars <-  ifelse(df2$p_val_adj< 0.001,"***",
+                       ifelse(df2$p_val_adj< 0.01,"**",
+                              ifelse(df2$p_val_adj< 0.05,"*","")))
+  
+  df2$id <- factor(x = df2$id, levels = paste0(i,"_",rev(a1)))
+  
+  a <- ggplot(df2, aes(y=id, x=features.plot, size=pct.exp ,color=avg.exp.scaled)) + 
+    geom_point() +
+    scale_size_area(max_size=15) +
+    scale_colour_gradient2(low="blue",mid="darkgrey",high="darkred") +  #"#CCCCFF"
+    labs(y='',x='')+
+    geom_text(aes(label=stars,vjust=0.8),col="black",size=4)+
+    theme_classic()+
+    theme(axis.text.y = element_text(color = "Black"))+
+    theme(axis.text.x = element_text(angle=45, hjust=1, color = "Black"))+
+    theme(axis.line.x=element_line(color="Black"))+
+    theme(axis.line.y=element_line(color="Black"))+
+    ggtitle(paste0(unique(obs1$Tissue),".",i," vs ",controGroup))
+  return(a)}
+
+
+plotRadar <- function(i){
+  ploTData<- rbind(max1,min1,get(i))
+  id <- sub(".db","",i)
+  radarchart(ploTData, title = id, pcol=colors_border, pfcol=colors_in, plwd=2, plty=1, cglcol="grey", cglty = 1, cglwd=0.8)
+}  #function to plot radarplot
+
+
--- a/TAD012/001_TAD012_APCs_analysis_liver.R
+++ b/TAD012/001_TAD012_APCs_analysis_liver.R
+library(Seurat)
+library(openxlsx)
+library(readxl)
+library(tidyr)
+library("RColorBrewer")
+library(dplyr)
+library(reshape2)
+library(fgsea)
+library(patchwork)
+library(presto)
+library(ggplot2)
+library(matrixStats)
+library(fmsb)
+
+cluster_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-856756828_scRNAseq_Notaro/Analysis_MM3/results2"
+
+wdir <- paste0(cluster_dir, "/Result2_MNOT")
+ldir <- paste0(cluster_dir, "/Liver_APCs/")
+pdir <- paste0(cluster_dir, "/Result2_MNOT/Plots")
+
+
+# Custom functions
+#PlotStellare <-readRDS(paste0(username,"Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/Notaro et al/Scripts/Functions/PlotStellare.rds"))
+source(file = "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A018/Analysis_MM/last_scripts_MNOT/TAD018/plot_stellare_function.R")
+
+#############################################################################
+##########################LIVER MACS ANALYSIS################################
+#############################################################################
+apcs.liver <- readRDS(paste0(ldir, "Liver_APCs_final.rds"))
+
+#assign groups   
+#name and order groups
+apcs.liver@meta.data$RNA_Group[apcs.liver@meta.data$PROP.Condition=="li.OVA"] <- "liOVA"
+apcs.liver@meta.data$RNA_Group[apcs.liver@meta.data$PROP.Condition=="li.OVA.Combo"] <- "OVA.Combo"
+apcs.liver@meta.data$RNA_Group[apcs.liver@meta.data$PROP.Condition=="li.Stop"] <- "SIINFEKL"
+apcs.liver@meta.data$RNA_Group[apcs.liver@meta.data$PROP.Condition=="li.Stop.Combo"] <- "SIINFEKL.Combo"
+apcs.liver@meta.data$RNA_Group <- factor(x = apcs.liver@meta.data$RNA_Group, levels = c("liOVA", "OVA.Combo","SIINFEKL", "SIINFEKL.Combo"))
+apcs.liver@meta.data$Tissue <- "Liver"
+
+
+#######Figure S4L- PLOT STELLARE################
+features1 <- c("H2-Q7", "H2-T22", "H2-Q4", "H2-K1", "H2-T23", "H2-M3", "H2-D1","Tap1","Tap2", #MHCI
+              "H2-Eb1", "H2-Ab1", "H2-Aa", "H2-DMb1", "H2-DMa", "Ciita", #MHCII
+              "Ifi44", "Oasl1","Irf7","Isg15", "Isg20","Oas1a","Oas1g", #IFNa
+              "Ccl5", "Upp1", "Slamf7", "Cxcl9", "Gbp4","Cd74", #IFNg
+              "Vav2","Tgfb1","Il10","Ccl24","Mmp8","Tmem176b","Trem2","Fn1") #protumor
+
+
+apcs.liver<-SetIdent(apcs.liver, value = "RNA_Group")
+
+pdf(paste0(ldir, "/Dotplot_stellare_APCs_FigS4L.pdf"), width=13, height=3)
+PlotStellare(obs1 = apcs.liver,features1 = features1, valueG = "NewlablesM_12_clu", i="CD8 cDC1", group.by0="RNA_Group" , noCtrlvsAll=T, controGroup="SIINFEKL")
+dev.off()
+
+#####################################################################
+############Figure S4M - RADAR PLOT MACROCLASSES####################
+#####################################################################
+apcs.liver@meta.data$Shortlables<-apcs.liver@meta.data$NewlablesM_12_clu
+#DCs<-c("CD8 cDC1","moDCs", "Ccr7 DCs","pDCs","pre DCs")
+#apcs.liver@meta.data$Shortlables[apcs.liver@meta.data$NewlablesM_12_clu%in%DCs]<- "DCs"
+#granu<-c("Neutrophils","Basophils")
+#apcs.liver@meta.data$Shortlables[apcs.liver@meta.data$NewlablesM_12_clu%in%granu]<- "Granulocytes"
+
+plotRadar <- function(i){
+  ploTData<- rbind(max1,min1,get(i))
+  id <- sub(".db","",i)
+  radarchart(ploTData, title = id, pcol=colors_border, pfcol=colors_in, plwd=2, plty=1, cglcol="grey", cglty = 1,cglwd=0.8)
+}  #function to plot radarplot
+
+##############LIVER RADARPLOT##########################
+MHCI_genes <- list(c("H2-Q7", "H2-T22", "H2-Q4", "H2-K1", "H2-T23", "H2-M3", "H2-D1","Tap1","Tap2"))
+MHCII_genes <- list( c("H2-Eb1", "H2-Ab1", "H2-Aa", "H2-DMb1", "H2-DMa", "Ciita"))
+Protumoral_genes <- list(c("Vav2","Tgfb1","Il10","Ccl24","Mmp8","Tmem176b","Trem2","Fn1"))
+IFNa_genes <- list(c("Ifi44", "Oasl1","Irf7","Isg15", "Isg20","Oas1a","Oas1g"))
+IFNg_genes <-list(c("Ccl5", "Upp1", "Slamf7", "Cxcl9", "Gbp4","Cd74"))
+
+
+apcs.liver <- AddModuleScore(apcs.liver, features = MHCI_genes, name = "MHCI_genes")
+apcs.liver <- AddModuleScore(apcs.liver, features = MHCII_genes, name = "MHCII_genes")
+apcs.liver <- AddModuleScore(apcs.liver, features = Protumoral_genes, name = "Protumoral_genes")
+apcs.liver <- AddModuleScore(apcs.liver, features = IFNa_genes, name = "IFNa_genes")
+apcs.liver <- AddModuleScore(apcs.liver, features = IFNg_genes, name = "IFNg_genes")
+
+#Normalize data
+#we needed to find a way to normalize the module scores. 
+#we used a quantile normalization method.
+
+M1<-apcs.liver@meta.data[,c("Shortlables", "RNA_Group", "MHCI_genes1","MHCII_genes1","Protumoral_genes1","IFNa_genes1","IFNg_genes1")]
+M1[,3:7]<-limma::normalizeQuantiles(M1[,3:7])
+Groups <-unique(apcs.liver$RNA_Group)
+CellType<-unique(M1$Shortlables)
+
+for (i in CellType){
+  M2<-M1[M1$Shortlables%in%i,]
+  M3a<-colMedians(as.matrix(M2[M2$RNA_Group%in%Groups[1],3:7]),na.rm=T)
+  M3b<-colMedians(as.matrix(M2[M2$RNA_Group%in%Groups[2],3:7]),na.rm=T)
+  M3c<-colMedians(as.matrix(M2[M2$RNA_Group%in%Groups[3],3:7]),na.rm=T)
+  M3d<-colMedians(as.matrix(M2[M2$RNA_Group%in%Groups[4],3:7]),na.rm=T)
+  M4<-rbind(M3a,M3b,M3c,M3d)
+  rownames(M4)<-Groups
+  colnames(M4)<-colnames(M2[,3:7])
+  x<-as.data.frame(M4)
+  assign(paste0(i,".db"),x)}
+
+lsCells <- grep(".db",ls(),value=T)
+MinMax<-lapply(lsCells,get)
+max1 <- rep(do.call("max", MinMax),5)
+min1 <- rep(do.call("min", MinMax),5)
+
+colors_border=c(rgb(0,0,1,0.9), rgb(0,0.8,0,0.9), rgb(0,0.5,0.5,0.9), rgb(0,0,0,0.9))
+colors_in=c(rgb(0,0,0.6,0.1), rgb(0,0.6,0,0.1), rgb(0,0.1,0.1,0.1), rgb(0.1,0.1,0.1,0.1))
+
+pdf(paste0(pdir, "/Radar_plot_liverAPCs_shorterlables_FigS4M_new2.pdf"), width=17, height=6)
+par(mfrow=c(2,5))
+for (i in 1:length(unique(apcs.liver$Shortlables))){
+  plotRadar(lsCells[i])
+}
+dev.off()
+
--- a/TAD012/002_TAD012_Liver_Tetramer_and_CD4_GSEA_DOTPLOT.R
+++ b/TAD012/002_TAD012_Liver_Tetramer_and_CD4_GSEA_DOTPLOT.R
--- a/TAD018/000_TAD018_liver_tumor_dot_plot_markers.R
+++ b/TAD018/000_TAD018_liver_tumor_dot_plot_markers.R
+suppressPackageStartupMessages(library(Seurat))
+suppressPackageStartupMessages(library(harmony))
+suppressPackageStartupMessages(library(openxlsx))
+suppressPackageStartupMessages(library(readxl))
+suppressPackageStartupMessages(library(tidyr))
+suppressPackageStartupMessages(library(ggplot2))
+suppressPackageStartupMessages(library(RColorBrewer))   # Color palettes for visualization
+suppressPackageStartupMessages(library(dplyr))
+
+
+##############################################################################
+################Figure S4D - Liver clusters dotplot markers###################
+
+dataset <- "18"
+Tissue <- "Liver" # "Liver" or "Tumor"
+
+wd <- paste0("/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A018/Analysis_MM")
+setwd(wd)
+
+# Plot dir
+plot_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A018/Analysis_MM/results3/Result3_MNOT/Plots"
+dir.create(plot_dir, showWarnings = FALSE)
+
+# Load the RDS
+obs1 <- readRDS(paste0(wd, "/results2/", Tissue, "/", Tissue, "_final.rds"))
+
+# Assign groups 
+
+obs1@meta.data$RNA_Group[obs1@meta.data$PROP.Condition=="liOVA_IL12"] <- "OVA.IL12"
+obs1@meta.data$RNA_Group[obs1@meta.data$PROP.Condition=="liOVA_IFNa"]  <- "OVA.IFNa"
+obs1@meta.data$RNA_Group[obs1@meta.data$PROP.Condition=="liOVA"]  <- "OVA"
+obs1@meta.data$RNA_Group[obs1@meta.data$PROP.Condition=="Combo3x"]  <- "Combo"
+
+#sample origin
+obs1$Tube <- paste0(obs1$RNA_Group,obs1$hash.ID)
+obs1@meta.data$RNA_Group<- factor(x = obs1@meta.data$RNA_Group, levels = c("OVA","OVA.IFNa","OVA.IL12","Combo"))
+
+obs1@meta.data$Tissue <- obs1@meta.data$PROP.Source
+
+
+# Liver
+obs1 <- SetIdent(obs1, value = "Newlables2")
+obs1@meta.data$Newlables2 <- factor(x = obs1@meta.data$Newlables2, levels = c("APCs","TandNK","B cells","Hepatocytes","Endothelial cells","Others"))
+
+obs1_subset <- subset(obs1, Newlables2 != "Others")
+
+
+# Dot Plot
+features_to_plot_manual_liver_MNOT <- unique(c("Cd86", "Itgax", "Clec9a", "Flt3", "Clec4f", "Adgre1", "Lyz2", "Marco", "Cd14", # APCs
+                                   "Cd3e", "Cd8a", "Cd4", "Nkg7", "Gzmb", "Cd3g", "Klrb1c", # TandNK
+                                   "Cd79a", "Ms4a1", "Cd19", "Ighm", "Cd22", "Ebf1", "Pax5", # B cells
+                                   "Alb", "Adh1", "Cyp3a11", "Serpina1a", "Apoa1", "Fgb", "Hamp", # Hepatocytes
+                                   "Cdh5", "Pecam1", "Kdr", "Eng", "Nos3", "Esam", "Vwf" # Endothelial cells
+))
+
+
+paletteLength <- 100
+myColor <- colorRampPalette(c("blue", "darkgrey", "darkred"))(paletteLength)
+
+
+pdf(paste0(plot_dir, "/TAD018_", Tissue, "_dotplot.markers.Newlables2_FigS4D.pdf"), width=11, height=4)
+dotplot1<-scCustomize::Clustered_DotPlot(obs1_subset,
+                                         features = features_to_plot_manual_liver_MNOT,
+                                         plot_km_elbow=F,
+                                         row_label_size=10,
+                                         flip=T, colors_use_exp = myColor,
+                                         cluster_feature=F, cluster_ident=F)
+dev.off()
+
+
+
+##############################################################################
+################Figure S4E - Tumor clusters dotplot markers###################
+
+
+dataset <- "18"
+Tissue <- "Tumor" # "Liver" or "Tumor"
+
+wd <- paste0("/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A0", dataset, "/Analysis_MM")
+setwd(wd)
+
+# Plot dir
+plot_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A018/Analysis_MM/results3/Result3_MNOT/Plots"
+dir.create(plot_dir, showWarnings = FALSE)
+
+# Load the RDS
+obs1 <- readRDS(paste0(wd, "/results2/", Tissue, "/", Tissue, "_final.rds"))
+
+# Assign groups 
+
+obs1@meta.data$RNA_Group[obs1@meta.data$PROP.Condition=="liOVA_IL12"] <- "OVA.IL12"
+obs1@meta.data$RNA_Group[obs1@meta.data$PROP.Condition=="liOVA_IFNa"]  <- "OVA.IFNa"
+obs1@meta.data$RNA_Group[obs1@meta.data$PROP.Condition=="liOVA"]  <- "OVA"
+obs1@meta.data$RNA_Group[obs1@meta.data$PROP.Condition=="Combo3x"]  <- "Combo"
+
+#sample origin
+obs1$Tube <- paste0(obs1$RNA_Group,obs1$hash.ID)
+obs1@meta.data$RNA_Group<- factor(x = obs1@meta.data$RNA_Group, levels = c("OVA","OVA.IFNa","OVA.IL12","Combo"))
+
+obs1@meta.data$Tissue <- obs1@meta.data$PROP.Source
+
+# Tumor
+obs1 <- SetIdent(obs1, value = "Newlables2")
+obs1@meta.data$Newlables2 <- factor(x = obs1@meta.data$Newlables2, levels = c("APCs","TandNK","B cells","Cancer cells","Others"))
+
+obs1_subset <- subset(obs1, Newlables2 != "Others")
+
+
+
+# Dot Plot
+features_to_plot_manual_tumor_MNOT <- unique(c("Cd86", "Itgax", "Adgre1", "Lyz2", "Cd14", "Cd68", "Fcgr1", # APCs
+                                               "Cd3e", "Cd8a", "Cd4", "Nkg7", "Gzmb", "Cd3g", # TandNK
+                                               "Cd79a", "Ms4a1", "Cd19", "Ighm", "Cd22", "Ebf1", "Pax5", # B cells
+                                               "Cd34", "Serpinf1", "Sparc", "Mxra7", "Fstl1" # Cancer cells
+))
+
+paletteLength <- 100
+myColor <- colorRampPalette(c("blue", "darkgrey", "darkred"))(paletteLength)
+
+
+pdf(paste0(plot_dir, "/TAD018_", Tissue, "_dotplot.markers.Newlables2_FigS4E.pdf"), width=10, height=3)
+dotplot1<-scCustomize::Clustered_DotPlot(obs1_subset,
+                                         features = features_to_plot_manual_tumor_MNOT,
+                                         plot_km_elbow=F,
+                                         row_label_size=10,
+                                         flip=T, colors_use_exp = myColor,
+                                         cluster_feature=F, cluster_ident=F)
+dev.off()
+
+
+
+
--- a/TAD018/001_TAD018_liver_tumor_APCs_MLS_dotplot_stellari_radarplot.R
+++ b/TAD018/001_TAD018_liver_tumor_APCs_MLS_dotplot_stellari_radarplot.R
--- a/TAD018/002_TAD018_Liver_Tumor_TNK_dotplot_radar_GSEA.R
+++ b/TAD018/002_TAD018_Liver_Tumor_TNK_dotplot_radar_GSEA.R
--- a/TAD018/003_TAD018_Alluvial_plot.R
+++ b/TAD018/003_TAD018_Alluvial_plot.R
+#####Fluvial
+library(Seurat)
+library(openxlsx)
+library(readxl)
+library(tidyr)
+library("RColorBrewer")
+library(dplyr)
+library(reshape2)
+library(fgsea)
+library(ggalluvial)
+library("gridExtra")
+
+cluster_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A018/Analysis_MM/results3"
+wdir<- paste0(cluster_dir, "/Result3_MNOT")
+ddir<- paste0(cluster_dir, "/Liver_Tumor_TnK/")
+pdir<- paste0(cluster_dir, "/Result3_MNOT/Plots")
+
+# Load the RDS
+obs1 <- readRDS(paste0(ddir, "Liver_Tumor_TnK_final.rds"))
+
+obs1@meta.data$Tissue <- obs1@meta.data$PROP.Source
+obs1@meta.data$RNA_Group <- obs1@meta.data$PROP.Condition
+
+obs1@meta.data$RNA_Group[obs1@meta.data$RNA_Group == "liOVA"] <- "OVA"
+obs1@meta.data$RNA_Group[obs1@meta.data$RNA_Group == "liOVA_IL12"] <- "OVA.IL12"
+obs1@meta.data$RNA_Group[obs1@meta.data$RNA_Group == "liOVA_IFNa"] <- "OVA.IFNa"
+obs1@meta.data$RNA_Group[obs1@meta.data$RNA_Group == "Combo3x"] <- "Combo"
+obs1@meta.data$RNA_Group <- factor(x = obs1@meta.data$RNA_Group, levels = c("OVA","OVA.IFNa","OVA.IL12","Combo"))
+
+
+# Sample origin
+obs1$Tube <- paste0(obs1$RNA_Group, obs1$hash.ID)
+obs1@meta.data$Orig<-paste0(obs1@meta.data$Tissue, ".", obs1@meta.data$RNA_Group)
+
+obs1@meta.data$RNA_Group <- factor(x = obs1@meta.data$RNA_Group, levels = c("OVA","OVA.IFNa","OVA.IL12","Combo"))
+
+
+
+#################Create labels for all CD8 and CD4 T cells#############
+obs1@meta.data$NewlablesG <- obs1@meta.data$NewlablesM
+obs1@meta.data$NewlablesG[grep("CD4",obs1@meta.data$NewlablesM) ] <- "CD4 T cells"
+obs1@meta.data$NewlablesG[grep("CD8",obs1@meta.data$NewlablesM)] <- "CD8 T cells"
+
+
+###############create labels CD4 and CD8 clusters without naive####################
+obs1@meta.data$NewlablesS <- obs1@meta.data$NewlablesM
+indCd4<- grepl("CD4",obs1@meta.data$NewlablesM) & !grepl("Naive",obs1@meta.data$NewlablesM)
+indCd8<- grepl("CD8",obs1@meta.data$NewlablesM) & !grepl("Naive",obs1@meta.data$NewlablesM)
+obs1@meta.data$NewlablesS[indCd4] <- "CD4 T cells"
+obs1@meta.data$NewlablesS[indCd8] <- "CD8 T cells"
+obs1@meta.data$NewlablesST <- paste0(obs1@meta.data$Tissue, ".", obs1@meta.data$NewlablesS)
+
+
+################################################
+###Calculating hyperexpanded T cells#############
+################################################
+obs1$aa3.Tube <- paste0(obs1$cdr3_aa2,".",obs1$Tube)
+obs1$aa3.Tube[is.na(obs1$cdr3_aa2)] <- NA
+obs1@meta.data$ClonoFreq <- obs1@meta.data  %>% group_by(aa3.Tube)  %>% mutate(count = n()) %>% ungroup() %>%
+  as.data.frame() %>% select (count)
+obs1@meta.data$ClonoFreq[is.na(obs1$cdr3_aa2),] <- NA
+
+obs1@meta.data$Clonetype  <- NA
+obs1@meta.data$Clonetype[obs1@meta.data$ClonoFreq==1]  <- "unique"
+obs1@meta.data$Clonetype[obs1@meta.data$ClonoFreq > 1 & obs1@meta.data$ClonoFreq <= 5]  <- "small"
+obs1@meta.data$Clonetype[obs1@meta.data$ClonoFreq > 5 & obs1@meta.data$ClonoFreq <= 30]  <- "large"
+obs1@meta.data$Clonetype[obs1@meta.data$ClonoFreq > 30]  <- "hyperexpanded"
+
+obs1$Clonetype <- factor(x = obs1$Clonetype, levels = c("unique", "small", "large", "hyperexpanded")) #order clonotype variable
+
+
+
+############################################
+######FINDING Tetramer reactive TCRs##########
+############################################
+ind1 <- obs1$ADT.Tetramer
+ind1 <- ind1[order(ind1)]
+ind1 <- ind1[order(ind1)]
+
+TetramerCount <- obs1@meta.data$ADT.Tetramer[!is.na(obs1@meta.data$ADT.Tetramer)]
+TetramerCount <- TetramerCount[TetramerCount > 0]
+
+obs1@meta.data$TetramerT <- "Other"
+obs1@meta.data$TetramerT[obs1@meta.data$ADT.Tetramer>2] <- "OVA.TCR"
+
+df1 <- data.frame(obs1$cdr3_aa2, obs1$TetramerT,obs1$ClonoFreq,obs1$Tissue)
+df1$aa3.OVA <-  paste0(obs1$cdr3_aa2,".",obs1$TetramerT)
+df1 <- na.omit(df1)
+df1l <- df1[df1$obs1.Tissue=="Liver",]
+df2 <- df1l %>% group_by(aa3.OVA) %>% mutate(count = n()) %>% ungroup() %>% as.data.frame()
+df3 <- df2 %>% select(obs1.cdr3_aa2,obs1.TetramerT,obs1.ClonoFreq,count) %>% unique() %>%
+  pivot_wider(names_from = obs1.TetramerT, values_from = count) %>% as.data.frame()
+df3[is.na(df3)] <- 0
+df3$Tumor <- df3[,2]-df3[,3]-df3[,4]
+OVA.TCRs <- df3$obs1.cdr3_aa2[df3[,4]/df3[,3]>1]
+
+df1 <- data.frame(obs1$cdr3_aa2, obs1$TetramerT,obs1$ClonoFreq,obs1$Tissue,obs1$Tube)
+df1$aa3.Tube <-  paste0(obs1$cdr3_aa2,".",obs1$TetramerT,".",obs1$Tube)
+df1 <- na.omit(df1)
+df1l <- df1[df1$obs1.Tissue=="Liver",]
+df2 <- df1l %>% group_by(aa3.Tube) %>% mutate(count = n()) %>% ungroup() %>% as.data.frame()
+df3T <- df2 %>% select(obs1.cdr3_aa2,obs1.TetramerT,obs1.Tube,obs1.ClonoFreq,count) %>% unique() %>%
+  pivot_wider(names_from = obs1.TetramerT, values_from = count) %>% as.data.frame()
+df3T[is.na(df3T)] <- 0
+
+
+######Indicating Tetramer OVA in obs1######
+obs1$OVA.reactive <- ifelse(obs1$cdr3_aa2 %in% OVA.TCRs, "OVA", "Other")
+obs1@meta.data$TetramerTS <- paste0(obs1@meta.data$OVA.reactive,".",obs1@meta.data$NewlablesS)
+
+
+######TISSUE SHARED CLONOTYPES########
+#Identify cells with the same Cdr3aa sequence in both tissues
+df1<-data.frame(obs1@meta.data$cdr3_aa2,obs1@meta.data$hash.ID,obs1@meta.data$RNA_Group, obs1@meta.data$Tissue)
+df2<-na.omit(df1)
+df2$cdr3_mouse<-paste0(df2$obs1.meta.data.cdr3_aa2,".",df2$obs1.meta.data.hash.ID,".",df2$obs1.meta.data.RNA_Group)
+shared_cdr3aa <- intersect(df2$cdr3_mouse[df2$obs1.meta.data.Tissue == "Liver"], df2$cdr3_mouse[df2$obs1.meta.data.Tissue == "Tumor"])
+obs1$cdr3_mouse<-paste0(obs1$cdr3_aa2,".",obs1$hash.ID,".",obs1$RNA_Group)
+obs1$Shared <- ifelse(obs1$cdr3_mouse %in% shared_cdr3aa, "Shared", "Unique")
+
+# Create variables share tissue and shared OVA
+obs1@meta.data$Sharedtissue<-paste0(obs1@meta.data$Tissue,".",obs1@meta.data$Shared)
+obs1@meta.data$SharedOVA<-paste0(obs1@meta.data$Shared,".",obs1@meta.data$OVA.reactive)
+
+####Alluvial plot#####
+######Figure S3B-allOVA the same color########
+df1 <- obs1@meta.data
+df1 <- df1[df1$NewlablesG=="CD8 T cells",]
+#df2<- na.omit(df1[,c(13,38,39,49)])
+df2<- na.omit(df1[,c("cdr3_aa2","RNA_Group","Tissue","OVA.reactive")])
+for (i in unique(df1$RNA_Group)){
+  # i <- unique(df1$RNA_Group)[1]
+  df3 <- df2[df2$RNA_Group==i&df2$Tissue=="Liver",]
+  df4 <- df2[df2$RNA_Group==i&df2$Tissue=="Tumor",]
+  df3 <- df3 %>% group_by(cdr3_aa2) %>% mutate(ClonoFreq = n()) %>% unique() %>% ungroup() %>% as.data.frame() 
+  df4 <- df4 %>% group_by(cdr3_aa2) %>% mutate(ClonoFreq = n()) %>% unique() %>% ungroup() %>% as.data.frame()
+  df3 <- df3[df3$ClonoFreq>2,]
+  df4 <- df4[df4$ClonoFreq>2,]
+  df3$ClonoFreq.norm <- sapply(df3$ClonoFreq,function(x){100*x/sum(df3$ClonoFreq)})
+  df4$ClonoFreq.norm <- sapply(df4$ClonoFreq,function(x){100*x/sum(df4$ClonoFreq)})
+  df6 <- rbind(df3,df4)
+  df6 <- df6[order(df6$cdr3_aa2),]
+  df6$col1[!duplicated(df6$cdr3_aa2)] <- "#CCCCCC"
+  Dtests <- duplicated(df6$cdr3_aa2)|duplicated(df6$cdr3_aa2, fromLast=T)
+  df6$col1 [df6$OVA.reactive=="OVA"] <- "#3399FF" #"#3399FF"
+  df6$col1 [df6$OVA.reactive=="Other"&Dtests] <- "#CC6600"
+  plotA <- ggplot(data = df6,
+                  aes(x=Tissue, y=ClonoFreq.norm, alluvium=cdr3_aa2, stratum=cdr3_aa2)) + 
+    geom_flow(aes(fill=col1), decreasing=F)+
+    geom_stratum(aes(fill=col1, size=0.2), decreasing=F) +
+    scale_fill_manual(values=setNames(unique(df6$col), unique(df6$col)))+
+    theme_bw() + theme(legend.position="none")+
+    scale_size_identity()+
+    ggtitle(i)
+  nameR <- paste0(i, ".plot") 
+  assign(nameR, plotA)}
+
+pdf(paste0(pdir, "/Fluvial.Plot.18.CD8.norm.2clones.OVAblue_FigS3B.pdf"), width=4, height=4)
+grid.arrange(Combo.plot,OVA.IFNa.plot,OVA.IL12.plot,OVA.plot, ncol=2, nrow=2)
+dev.off()
+
+######CD4 T cells########
+####Figure 3H - normalized test CD4#######
+df1 <- obs1@meta.data
+df1 <- df1[df1$NewlablesG=="CD4 T cells",]
+#df2<- na.omit(df1[,c(13,38,39,49)])
+df2<- na.omit(df1[,c("cdr3_aa2","RNA_Group","Tissue","OVA.reactive")])
+for (i in unique(df1$RNA_Group)){
+ # i <- unique(df1$RNA_Group)[1]
+  df3 <- df2[df2$RNA_Group==i&df2$Tissue=="Liver",]
+  df4 <- df2[df2$RNA_Group==i&df2$Tissue=="Tumor",]
+  df3 <- df3 %>%  group_by(cdr3_aa2) %>% mutate(ClonoFreq = n()) %>% unique() %>% ungroup() %>% as.data.frame() 
+  df4 <- df4 %>%  group_by(cdr3_aa2) %>% mutate(ClonoFreq = n()) %>% unique() %>% ungroup() %>% as.data.frame()
+  df3 <- df3[df3$ClonoFreq>1,]
+  df4 <- df4[df4$ClonoFreq>1,]
+  df3$ClonoFreq.norm <- sapply(df3$ClonoFreq,function(x){100*x/sum(df3$ClonoFreq)})
+  df4$ClonoFreq.norm <- sapply(df4$ClonoFreq,function(x){100*x/sum(df4$ClonoFreq)})
+  df6 <- rbind(df3,df4)
+  df6 <- df6[order(df6$cdr3_aa2),]
+  df6$col1[!duplicated(df6$cdr3_aa2)] <- "#CCCCCC"
+  Dtests <- duplicated(df6$cdr3_aa2)|duplicated(df6$cdr3_aa2, fromLast=T)
+  df6$col1 [df6$OVA.reactive=="OVA"&Dtests] <- "#CC6600" #"#3399FF"
+  df6$col1 [df6$OVA.reactive=="Other"&Dtests] <- "#CC6600"
+  plotA <- ggplot(data = df6,
+                  aes(x = Tissue, y=ClonoFreq.norm, alluvium = cdr3_aa2,stratum=cdr3_aa2)) + 
+    geom_flow(aes(fill = col1),decreasing = F)+
+    geom_stratum(aes(fill = col1,size=0.2),decreasing = F) +
+    scale_fill_manual(values = setNames(unique(df6$col), unique(df6$col)))+
+    theme_bw() + theme(legend.position = "none")+
+    scale_size_identity()+
+    ggtitle(i)
+  nameR <- paste0(i,".plot") 
+  assign(nameR,plotA)}
+
+
+pdf(paste0(pdir, "/Fluvial.Plot.18.CD4.norm.2clones_Fig3H.pdf"), width=4, height=4)
+grid.arrange(Combo.plot,OVA.IFNa.plot,OVA.IL12.plot,OVA.plot, ncol=2, nrow=2)
+dev.off()
+
+
+
+
--- a/TAD019/001_TAD019_APCs_analysis.R
+++ b/TAD019/001_TAD019_APCs_analysis.R
+library(Seurat)
+library(openxlsx)
+library(readxl)
+library(tidyr)
+library("RColorBrewer")
+library(dplyr)
+library(reshape2)
+library(fgsea)
+library(matrixStats)
+library(fmsb)
+
+cluster_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A019/Analysis_MM"
+
+wdir <- paste0(cluster_dir, "/Result3_MNOT")
+ddir <- paste0(cluster_dir, "/results3/Liver_Tumor_APCs/")
+pdir <- paste0(cluster_dir, "/Result3_MNOT/Plots")
+
+# Custom functions
+source(file = "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A018/Analysis_MM/last_scripts_MNOT/TAD018/plot_stellare_function.R")
+
+# Load RDS
+obs1 <- readRDS(paste0(ddir, "Liver_Tumor_APCs_final.rds"))
+
+########################
+#####Annotations#######
+########################
+
+obs1@meta.data$Tissue <- obs1@meta.data$PROP.Source
+obs1@meta.data$RNA_Group <- obs1@meta.data$PROP.Condition
+
+obs1@meta.data$RNA_Group[obs1@meta.data$RNA_Group == "TA_only"] <- "TA33"
+obs1@meta.data$RNA_Group[obs1@meta.data$RNA_Group == "TA_combo"] <- "TA33.Combo"
+
+obs1@meta.data$RNA_Group <- factor(x = obs1@meta.data$RNA_Group, levels = c("TA33","TA33.Combo"))
+
+
+# Tissue Sample assign
+obs1$Tube <- paste0(obs1$hash.ID,obs1$RNA_Group)
+obs1@meta.data$Orig<-paste0(obs1@meta.data$Tissue, ".", obs1@meta.data$RNA_Group)
+table(obs1@meta.data$Tissue,obs1@meta.data$RNA_Group)
+table(obs1@meta.data$Orig)
+
+obs1@meta.data$allData <- "all"
+
+#############Analysis APCs###################
+features1 = c("H2-Q7", "H2-T22", "H2-Q4", "H2-K1", "H2-T23", "H2-M3", "H2-D1","Tap1","Tap2", #MHCI
+              "H2-Eb1", "H2-Ab1", "H2-Aa", "H2-DMb1", "H2-DMa", "Ciita", #MHCII
+              "Ifi44", "Oasl1","Irf7","Isg15", "Isg20","Oas1a","Oas1g", #IFNa
+              "Ccl5", "Upp1", "Slamf7", "Cxcl9", "Gbp4","Cd74", #IFNg
+              "Vav2","Tgfb1","Il10","Ccl24","Mmp8","Tmem176b","Trem2","Fn1") #protumor
+
+#####Figure 6A##############
+pdf(paste0(pdir, "/liver.apc.19.LandT_Fig6A.pdf"), width=14, height=3)
+plot.a <- PlotStellare(obs1, features1=features1, valueG="allData", i="all", group.by0="Orig", controGroup="Liver.TA33")
+print(plot.a)
+par(mfrow=c(1,2))
+dev.off()
+
+
+#######################GSEA#######################################################################
+#######################Figure S6B - gsea on Liver and tumor APCs##################################
+# Load signature
+#miDB_sig4.MLS <- readRDS("C:/Users/notaro.marco/Dropbox (HSR Global)/90-756142658_RNAseq_Squadrito/05-DGE-Tumor/miDB_sig4.MLS.rds")
+miDB_sig4.MLS <- readRDS("/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A018/reference/miDB_sig4.MLS.rds")
+
+sub.obj<-subset(obs1, subset= NewlablesM == "KCs"|
+                  NewlablesM == "Macrophages"|
+                  NewlablesM == "Monocytes")
+
+
+sub.obj <- SetIdent(sub.obj, value = "Orig")
+obs1.dge <- FindMarkers(sub.obj, ident.1 = "Liver.TA33.Combo", ident.2 = "Liver.TA33", min.pct=0.01, logfc.threshold=0, min.cells.feature=1, min.cells.group=1)
+obs2.dge <- FindMarkers(sub.obj, ident.1 = "Tumor.TA33.Combo", ident.2 = "Tumor.TA33", min.pct=0.01, logfc.threshold=0, min.cells.feature=1, min.cells.group=1)
+
+obs1.ranks <- obs1.dge$avg_log2FC
+obs2.ranks <- obs2.dge$avg_log2FC
+
+names(obs2.ranks) <- rownames(obs2.dge)
+names(obs1.ranks) <- rownames(obs1.dge)
+
+res.combo.liver<-fgsea(pathways = miDB_sig4.MLS, stats = obs1.ranks, minSize=0, maxSize=500, eps=0) # nPermSimple=50000
+res.combo.tumor<-fgsea(pathways = miDB_sig4.MLS, stats = obs2.ranks, minSize=0, maxSize=500, eps=0) # PermSimple=50000
+
+# Select columns of interest
+res.combo.liver2<-res.combo.liver[,c(1,3,6,8)]
+res.combo.tumor2<-res.combo.tumor[,c(1,3,6,8)]
+
+# Merge in a single file and save xlsx
+merge1 <- merge(x=res.combo.liver2, y=res.combo.tumor2, by="pathway", suffixes = c("-liver", "-tumor"))
+
+write.xlsx(merge1, paste0(pdir, "/GSEA_LandT_KC-mono-macs.xlsx"), asTable=T, rowNames=T)
+
+
+##################GSEA Heatmap of selected#######################################
+#rearrange data to plot heatmap
+res.combo.liver$origin<-"Liver"
+res.combo.tumor$origin<-"Tumor"
+
+
+# res.OVA$origin<- factor(x = res.OVA$origin, levels = c("OVA.Combo","OVA.Il12","OVA.IFNa","liOVA")) 
+merge.results<-rbind(res.combo.liver,res.combo.tumor)
+
+#saveRDS(merge.results, "GSEA.liverandtumormacs.nperm50000.rds")
+
+#create variable with star for statistic
+merge.results$stats <- ifelse(merge.results$padj < 0.0005, "***",
+                              ifelse(merge.results$padj < 0.005, "**",
+                                     ifelse(merge.results$padj < 0.05, "*", "")))
+
+
+# Manually selected pathways
+a1<-c("IFNa_RO",
+      "HALLMARK_INTERFERON_GAMMA_RESPONSE",
+      "GOBP_DEFENSE_RESPONSE_TO_VIRUS",
+      "GOBP_POSITIVE_REGULATION_OF_LEUKOCYTE_CELL_CELL_ADHESION",
+      "GOBP_POSITIVE_REGULATION_OF_CELL_KILLING",
+      "GOCC_MHC_PROTEIN_COMPLEX",
+      "GOBP_POSITIVE_REGULATION_OF_T_CELL_MEDIATED_CYTOTOXICITY",
+      "GOCC_MHC_CLASS_II_PROTEIN_COMPLEX",
+      "GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_ENDOGENOUS_ANTIGEN",
+      "HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION",
+      "GOBP_POSITIVE_REGULATION_OF_EPITHELIAL_CELL_PROLIFERATION",
+      "GOBP_POSITIVE_REGULATION_OF_ENDOTHELIAL_CELL_PROLIFERATION",
+      "PGE2_RO",
+      "HALLMARK_ANGIOGENESIS",
+      "IL4_RO"
+      )
+
+# Subset selected pathways
+merge.results1 <-merge.results[merge.results$pathway %in% a1,]
+
+# Rename pathways
+merge.results1[merge.results1 == "IFNa_RO"] <- "IFNa response (Cilenti et all)"
+merge.results1[merge.results1 == "HALLMARK_INTERFERON_GAMMA_RESPONSE"] <- "Hallmark_IFNg response"
+merge.results1[merge.results1 == "GOBP_DEFENSE_RESPONSE_TO_VIRUS"] <- "Antiviral response"
+merge.results1[merge.results1 == "GOBP_POSITIVE_REGULATION_OF_LEUKOCYTE_CELL_CELL_ADHESION"] <- "Positive regulation of cell adhesion"
+merge.results1[merge.results1 == "GOBP_POSITIVE_REGULATION_OF_CELL_KILLING"] <- "Positive regulation of cell killing"
+merge.results1[merge.results1 == "GOCC_MHC_PROTEIN_COMPLEX"] <- "MHC protein complex"
+merge.results1[merge.results1 == "GOBP_POSITIVE_REGULATION_OF_T_CELL_MEDIATED_CYTOTOXICITY"] <- "Positive regulation of T cell cytotoxicity"
+merge.results1[merge.results1 == "GOCC_MHC_CLASS_II_PROTEIN_COMPLEX"] <- "MHC-II protein complex"
+merge.results1[merge.results1 == "GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_ENDOGENOUS_ANTIGEN"] <- "Antigen processing and presentation"
+merge.results1[merge.results1 == "HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION"] <- "Hallmark EMT"
+merge.results1[merge.results1 == "GOBP_POSITIVE_REGULATION_OF_EPITHELIAL_CELL_PROLIFERATION"] <- "Positive regulation of epithelial cell proliferation"
+merge.results1[merge.results1 == "GOBP_POSITIVE_REGULATION_OF_ENDOTHELIAL_CELL_PROLIFERATION"] <- "Positive regulation of endothelial cell proliferation"
+merge.results1[merge.results1 == "PGE2_RO"] <- "PGE2 signature from Cilenti et al"
+merge.results1[merge.results1 == "HALLMARK_ANGIOGENESIS"] <- "Angiogenesis"
+merge.results1[merge.results1 == "IL4_RO"] <- "IL-4 signature from Cilenti et al"
+
+
+# Order in the way we want to see
+merge.results1$pathway<- factor(merge.results1$pathway, levels = c("IFNa response (Cilenti et all)",
+                                                                   "Hallmark_IFNg response",
+                                                                   "Antiviral response",
+                                                                   "Positive regulation of cell adhesion",
+                                                                   "Positive regulation of cell killing",
+                                                                   "Positive regulation of T cell cytotoxicity",
+                                                                   "Antigen processing and presentation",
+                                                                   "MHC protein complex",
+                                                                   "MHC-II protein complex",
+                                                                   "Hallmark EMT",
+                                                                   "Positive regulation of epithelial cell proliferation",
+                                                                   "Positive regulation of endothelial cell proliferation",
+                                                                   "Angiogenesis",
+                                                                   "IL-4 signature from Cilenti et al",
+                                                                   "PGE2 signature from Cilenti et al"))
+
+# Heatmap
+y<-ggplot(merge.results1, aes (x=origin, y=pathway, fill=NES)) + 
+  geom_tile() +
+  scale_fill_gradientn(limits = c(-4,5.2), colours=c("Blue","White","Red"))+
+  geom_text(aes(label = stats), vjust=0.7,angle=0) +
+  labs(y='',x='')+
+  theme_classic()+
+  theme(axis.text.y = element_text(color = "Black"),
+        axis.text.x = element_text(angle=90, vjust=0.5, hjust=1, color="Black"),
+        axis.line.x=element_line(color="Black"),
+        axis.line.y=element_line(color="Black"))+
+  ggtitle("GSEA_MacroKCandmono vs OVA_Liver")
+ggsave(filename = paste0(pdir, "/Liver_and_Tumor_MacroKCsMonovsOVA_FigS6B.pdf"), plot=y, width=5, height=3)
+
+# Unbias heatmap
+merge.results <- merge.results[order(merge.results$padj, decreasing = F)]
+merge.results_temp <-merge.results[merge.results$padj < 0.0001,]
+filter_in <- head(unique(merge.results_temp$pathway), 100)
+merge.results2 <-merge.results[merge.results$pathway %in% filter_in,]
+merge.results2$origin<- factor(x = merge.results2$origin, levels = c("Liver","Tumor"))
+
+y<-ggplot(merge.results2, aes (x=origin, y=pathway, fill=NES)) + 
+  geom_tile() +
+  scale_fill_distiller(palette = "RdBu") +
+  geom_text(aes(label = stats), vjust = 0.7,angle = 0) +
+  labs(y='',x='')+
+  theme_classic()+
+  theme(axis.text.y = element_text(color = "Black"), text = element_text(size = 15), 
+        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, color = "Black"), 
+        axis.line.x=element_line(color="Black"),
+        axis.line.y=element_line(color="Black"))
+ggtitle("GSEA_MacroKCandmono vs OVA_Liver")
+ggsave(filename = paste0(pdir, "/Liver_and_Tumor_MacroKCsMonovsOVA_unbias.pdf"), plot=y, width=15, height=30) # limitsize = FALSE
+
+
+##############################################################################
+##################Figure S6D - RADAR PLOT######################################
+##############################################################################
+plotRadar <- function(i){
+  ploTData<- rbind(max1,min1,get(i))
+  id <- sub(".db","",i)
+  radarchart(ploTData, title = id, pcol=colors_border, pfcol=colors_in, plwd=2, plty=1, cglcol="grey", cglty=1, cglwd=0.8)
+}  #function to plot radarplot
+
+####Create label to cluster dc and granulocytes together
+obs1@meta.data$Shortlables<-obs1@meta.data$NewlablesM
+#DCs<-c("CD8 cDC1","moDCs", "Ccr7 DCs","pDCs","pre DCs")
+#obs1@meta.data$Shortlables[obs1@meta.data$NewlablesM%in%DCs] <- "DCs"
+#granu<-c("Neutrophils","Basophils")
+#obs1@meta.data$Shortlables[obs1@meta.data$NewlablesM%in%granu] <- "Granulocytes"
+obs1$Shortlables <- paste0(obs1$Shortlables,".",obs1$Tissue)
+
+
+# AddModuleScore
+MHCI_genes<- list(c("H2-Q7", "H2-T22", "H2-Q4", "H2-K1", "H2-T23", "H2-M3", "H2-D1","Tap1","Tap2"))
+MHCII_genes<- list( c("H2-Eb1", "H2-Ab1", "H2-Aa", "H2-DMb1", "H2-DMa", "Ciita"))
+Protumoral_genes<- list(c("Vav2","Tgfb1","Il10","Ccl24","Mmp8","Tmem176b","Trem2","Fn1") )
+IFNa_genes<- list(c("Ifi44", "Oasl1","Irf7","Isg15", "Isg20","Oas1a","Oas1g") )
+IFNg_genes<-list(c("Ccl5", "Upp1", "Slamf7", "Cxcl9", "Gbp4","Cd74"))
+
+obs1<- AddModuleScore(obs1, features = MHCI_genes, name = "MHCI_genes")
+obs1<- AddModuleScore(obs1, features = MHCII_genes, name = "MHCII_genes")
+obs1<- AddModuleScore(obs1, features = Protumoral_genes, name = "Protumoral_genes")
+obs1<- AddModuleScore(obs1, features = IFNa_genes, name = "IFNa_genes")
+obs1<- AddModuleScore(obs1, features = IFNg_genes, name = "IFNg_genes")
+
+# Normalize data
+#we needed to find a way to normalize the module scores. 
+#we used a quantile normalization method.
+
+#M1 <- obs1@meta.data[,c(37,42, 43:47)]
+M1 <- obs1@meta.data[,c("Shortlables", "RNA_Group", "MHCI_genes1","MHCII_genes1","Protumoral_genes1","IFNa_genes1","IFNg_genes1")]
+M1[,3:7]<-limma::normalizeQuantiles(M1[,3:7])
+Groups <-unique(obs1$RNA_Group)
+CellType<-unique(M1$Shortlables)
+
+#with Median
+#TAMs
+#Groups 
+for (i in CellType){
+  M2<-M1[M1$Shortlables%in%i,]
+  M3a<-colMedians(as.matrix(M2[M2$RNA_Group%in%Groups[1],3:7]),na.rm=T)
+  M3b<-colMedians(as.matrix(M2[M2$RNA_Group%in%Groups[2],3:7]),na.rm=T)
+  M4<-rbind(M3a,M3b)
+  rownames(M4)<-Groups
+  colnames(M4)<-colnames(M2[,3:7])
+  x<-as.data.frame(M4)
+  assign(paste0(i,".db"),x)}
+
+lsCells <- grep(".db",ls(),value=T)
+MinMax<-lapply(lsCells,get)
+max1 <- rep(do.call("max", MinMax),5)
+min1 <- rep(do.call("min", MinMax),5)
+
+colors_border=c(rgb(0,0.8,0,0.9), rgb(0,0,0,0.9))
+colors_in=c(rgb(0,0.3,0,0.), rgb(0,0,0.0,0.1))
+
+
+pdf(paste0(pdir, "/Radar_plot_liver_and_tumor_APCs_FigS6D_new2.pdf"), width=15, height=12)
+
+par(mfrow=c(4,5))
+
+for (i in 1:length(unique(obs1$Shortlables))){
+  plotRadar(lsCells[i])
+}
+dev.off()
+
+
+
+
+#######################Plot UMAP################
+
+liver.apcs <- readRDS("/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A019/Analysis_MM/results3/Liver_APCs/Liver_APCs_final.rds")
+tumor.apcs <- readRDS("/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A019/Analysis_MM/results3/Tumor_APCs/Tumor_APCs_final.rds")
+
+liver.apcs@meta.data$Tissue <- liver.apcs@meta.data$PROP.Source
+liver.apcs@meta.data$RNA_Group <- liver.apcs@meta.data$PROP.Condition
+liver.apcs@meta.data$RNA_Group[liver.apcs@meta.data$RNA_Group == "TA_only"] <- "TA33"
+liver.apcs@meta.data$RNA_Group[liver.apcs@meta.data$RNA_Group == "TA_combo"] <- "TA33.Combo"
+liver.apcs@meta.data$RNA_Group <- factor(x = liver.apcs@meta.data$RNA_Group, levels = c("TA33","TA33.Combo"))
+liver.apcs$Tube <- paste0(liver.apcs$hash.ID,liver.apcs$RNA_Group)
+
+
+tumor.apcs@meta.data$Tissue <- tumor.apcs@meta.data$PROP.Source
+tumor.apcs@meta.data$RNA_Group <- tumor.apcs@meta.data$PROP.Condition
+tumor.apcs@meta.data$RNA_Group[tumor.apcs@meta.data$RNA_Group == "TA_only"] <- "TA33"
+tumor.apcs@meta.data$RNA_Group[tumor.apcs@meta.data$RNA_Group == "TA_combo"] <- "TA33.Combo"
+tumor.apcs@meta.data$RNA_Group <- factor(x = tumor.apcs@meta.data$RNA_Group, levels = c("TA33","TA33.Combo"))
+tumor.apcs$Tube <- paste0(tumor.apcs$hash.ID,tumor.apcs$RNA_Group)
+
+
+
+# Save plot
+pdf(paste0(pdir, "/liver_apcs_umap_FigS11A-1.pdf"), width=7, height=6) # Figure S11A-1
+DimPlot(liver.apcs, reduction = "umap.harmony.orig.ident",group.by = "NewlablesM_clu", pt.size = 0.5, label = T, label.size = 3, repel = T)
+dev.off()
+
+pdf(paste0(pdir, "/tumor_apcs_umap_FigS11A-2.pdf"), width=7, height=6) # Figure S11A-2
+DimPlot(tumor.apcs, reduction = "umap.harmony.orig.ident",group.by = "NewlablesM_clu", pt.size = 0.5, label = T, label.size = 3, repel = T)
+dev.off()
+
+
+# Export DimPlot as excel
+var_list <- c("UMAPh_1", "UMAPh_2","Tissue","orig.ident","Sample","RNA_Group","Tube","RNA_snn_h.orig.ident_res.1.2","NewlablesM_clu")
+liver.apcs_df <- FetchData(liver.apcs, vars = var_list)
+liver.apcs_df$cell_ID <- rownames(liver.apcs_df)
+write.xlsx(liver.apcs_df, file=paste0(pdir, "/liver_apcs_umap_FigS11A-1.xlsx"), overwrite=T)
+
+tumor.apcs_df <- FetchData(tumor.apcs, vars = var_list)
+tumor.apcs_df$cell_ID <- rownames(tumor.apcs_df)
+write.xlsx(tumor.apcs_df, file=paste0(pdir, "/tumor_apcs_umap_FigS11A-2.xlsx"), overwrite=T)
+
+
+
+#################################ENDS here##############################
\ No newline at end of file
--- a/TAD019/002_TAD019_TandNK_analysis.R
+++ b/TAD019/002_TAD019_TandNK_analysis.R
--- a/TAD019/003_TAD019_Multinichenet_Tumor.R
+++ b/TAD019/003_TAD019_Multinichenet_Tumor.R
+library(SingleCellExperiment)
+library(dplyr)
+library(ggplot2)
+library(multinichenetr)
+
+#####RELATIVE TO FIGURE 6H#########
+
+#############Prepare files
+wdir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_NotaroTAD_A019/Analysis_MM/results3/Result3_MNOT"
+setwd(wdir)
+all.labels <- readRDS("all.labels.rds")
+
+#####open LIVER Obj
+####Prepare files
+username <- "C:/Users/notaro.marco/"
+username <- "/Users/Squadrito/"
+ddir<- paste0(username, "Dropbox (HSR Global)/90-935462466_scRNAseq_NotaroTAD_A019/Analysis_MM/results2cond/Tumor")
+setwd(ddir)
+obs <- readRDS("Tumor_final.rds")
+
+obs@meta.data$NewlablesM2 <- all.labels[rownames(obs@meta.data)]
+obs$sample_id <- paste0(obs$hash.ID, obs$PROP.Condition)
+
+
+obs@meta.data$NewlablesG <- obs@meta.data$NewlablesM2
+obs@meta.data$NewlablesG[grep("CD4",obs@meta.data$NewlablesM2) ] <- "CD4_Tcells"
+obs@meta.data$NewlablesG[grep("CD8",obs@meta.data$NewlablesM2)] <- "CD8_Tcells"
+obs@meta.data$NewlablesG[grep("DCs",obs@meta.data$NewlablesM2) ] <- "DCs"
+
+###subset
+sub.obj<-subset(obs,
+                  subset= NewlablesG == "Macrophages"|
+                  NewlablesG == "DCs"|
+                  NewlablesG == "CD4_Tcells"|
+                  NewlablesG == "CD8_Tcells")
+
+#sub.obj2 <- subset(sub.obj,cells = sample(1:nrow(sub.obj@meta.data),500))
+sub.obj2 <- sub.obj
+
+nrow(sub.obj@meta.data)
+
+###Multinichnet
+organism = "mouse"
+if(organism == "human"){
+  lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds"))
+  lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% distinct(ligand, receptor) %>% mutate(ligand = make.names(ligand), receptor = make.names(receptor))
+  ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds"))
+  colnames(ligand_target_matrix) = colnames(ligand_target_matrix) %>% make.names()
+  rownames(ligand_target_matrix) = rownames(ligand_target_matrix) %>% make.names()
+} else if(organism == "mouse"){
+  lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds"))
+  lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% distinct(ligand, receptor) %>% mutate(ligand = make.names(ligand), receptor = make.names(receptor))
+  ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds"))
+  colnames(ligand_target_matrix) = colnames(ligand_target_matrix) %>% make.names()
+  rownames(ligand_target_matrix) = rownames(ligand_target_matrix) %>% make.names()
+}
+
+
+obs1 <- Seurat::as.SingleCellExperiment(sub.obj2, assay = "RNA")
+obs1 = alias_to_symbol_SCE(obs1, "mouse") %>% makenames_SCE()
+
+
+celltype_id = "NewlablesG"
+group_id = "PROP.Condition"
+sample_id = "sample_id"
+covariates = NA
+batches = NA
+
+senders_oi = SummarizedExperiment::colData(obs1)[,celltype_id] %>% unique()
+receivers_oi = SummarizedExperiment::colData(obs1)[,celltype_id] %>% unique()
+
+min_cells = 10
+
+abundance_expression_info = get_abundance_expression_info(sce = obs1, 
+                    sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, 
+                    min_cells = min_cells, senders_oi = senders_oi, 
+                    receivers_oi = receivers_oi, lr_network = lr_network, batches = batches)
+
+abundance_expression_info$abund_plot_sample
+
+contrasts_oi = c("'TA_combo-TA_only','TA_only-TA_combo'")
+
+contrast_tbl = tibble(contrast = 
+                        c("TA_combo-TA_only","TA_only-TA_combo"), 
+                      group = c("TA_combo","TA_only"))
+
+DE_info = get_DE_info(sce = obs1, sample_id = sample_id, 
+                      group_id = group_id, celltype_id = celltype_id, 
+                      batches = batches, covariates = covariates, 
+                      contrasts_oi = contrasts_oi, min_cells = min_cells)
+
+
+DE_info$celltype_de$de_output_tidy %>% arrange(p_adj) %>% head()
+
+celltype_de = DE_info$celltype_de$de_output_tidy
+
+sender_receiver_de = combine_sender_receiver_de(
+  sender_de = celltype_de,
+  receiver_de = celltype_de,
+  senders_oi = senders_oi,
+  receivers_oi = receivers_oi,
+  lr_network = lr_network)
+
+sender_receiver_de %>% head(20)
+
+logFC_threshold = 0.50
+p_val_threshold = 0.05
+fraction_cutoff = 0.05
+
+p_val_adj = FALSE 
+
+top_n_target = 250
+
+verbose = TRUE
+#cores_system = 8
+#n.cores = min(cores_system, sender_receiver_de$receiver %>% unique() %>% length()) # use one core per receiver cell type
+
+#takes time
+
+ligand_activities_targets_DEgenes = suppressMessages(suppressWarnings(get_ligand_activities_targets_DEgenes(
+  receiver_de = celltype_de,
+  receivers_oi = receivers_oi,
+  ligand_target_matrix = ligand_target_matrix,
+  logFC_threshold = logFC_threshold,
+  p_val_threshold = p_val_threshold,
+  p_val_adj = p_val_adj,
+  top_n_target = top_n_target,
+  verbose = verbose, 
+  n.cores = 1
+)))
+
+
+#prioritazing
+prioritizing_weights_DE = c("de_ligand" = 1,
+                            "de_receptor" = 1)
+prioritizing_weights_activity = c("activity_scaled" = 2)
+prioritizing_weights_expression_specificity = c("exprs_ligand" = 2,
+                                                "exprs_receptor" = 2)
+prioritizing_weights_expression_sufficiency = c("frac_exprs_ligand_receptor" = 1)
+prioritizing_weights_relative_abundance = c( "abund_sender" = 0,
+                                             "abund_receiver" = 0)
+
+prioritizing_weights = c(prioritizing_weights_DE, 
+                         prioritizing_weights_activity, 
+                         prioritizing_weights_expression_specificity,
+                         prioritizing_weights_expression_sufficiency, 
+                         prioritizing_weights_relative_abundance)
+
+sender_receiver_tbl = sender_receiver_de %>% dplyr::distinct(sender, receiver)
+
+metadata_combined = SummarizedExperiment::colData(obs1) %>% tibble::as_tibble()
+
+#grouping
+if(!is.na(batches)){
+  grouping_tbl = metadata_combined[,c(sample_id, group_id, batches)] %>% tibble::as_tibble() %>% dplyr::distinct()
+  colnames(grouping_tbl) = c("sample","group",batches)
+} else {
+  grouping_tbl = metadata_combined[,c(sample_id, group_id)] %>% tibble::as_tibble() %>% dplyr::distinct()
+  colnames(grouping_tbl) = c("sample","group")}
+
+#run it
+prioritization_tables = suppressMessages(generate_prioritization_tables(
+  sender_receiver_info = abundance_expression_info$sender_receiver_info,
+  sender_receiver_de = sender_receiver_de,
+  ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes,
+  contrast_tbl = contrast_tbl,
+  sender_receiver_tbl = sender_receiver_tbl,
+  grouping_tbl = grouping_tbl,
+  prioritizing_weights = prioritizing_weights,
+  fraction_cutoff = fraction_cutoff, 
+  abundance_data_receiver = abundance_expression_info$abundance_data_receiver,
+  abundance_data_sender = abundance_expression_info$abundance_data_sender
+))
+prioritization_tables$group_prioritization_tbl %>% head(20)
+
+##known info
+lr_target_prior_cor = lr_target_prior_cor_inference(prioritization_tables$group_prioritization_tbl$receiver %>% unique(), abundance_expression_info, celltype_de, grouping_tbl, prioritization_tables, ligand_target_matrix, logFC_threshold = logFC_threshold, p_val_threshold = p_val_threshold, p_val_adj = p_val_adj)
+
+##saving
+multinichenet_output = list(
+  celltype_info = abundance_expression_info$celltype_info,
+  celltype_de = celltype_de,
+  sender_receiver_info = abundance_expression_info$sender_receiver_info,
+  sender_receiver_de =  sender_receiver_de,
+  ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes,
+  prioritization_tables = prioritization_tables,
+  grouping_tbl = grouping_tbl,
+  lr_target_prior_cor = lr_target_prior_cor
+) 
+multinichenet_output = make_lite_output(multinichenet_output)
+
+seted(wdir)
+saveRDS(multinichenet_output, paste0(wdir, "multinichenet_output.Tumor.19.rds"))
+  
+
+##look at top 50
+prioritized_tbl_oi_all = get_top_n_lr_pairs(multinichenet_output$prioritization_tables, 50, rank_per_group = FALSE)
+
+prioritized_tbl_oi = multinichenet_output$prioritization_tables$group_prioritization_tbl %>%
+  filter(id %in% prioritized_tbl_oi_all$id) %>%
+  distinct(id, sender, receiver, ligand, receptor, group) %>% left_join(prioritized_tbl_oi_all)
+prioritized_tbl_oi$prioritization_score[is.na(prioritized_tbl_oi$prioritization_score)] = 0
+
+senders_receivers = union(prioritized_tbl_oi$sender %>% unique(), prioritized_tbl_oi$receiver %>% unique()) %>% sort()
+
+colors_sender = RColorBrewer::brewer.pal(n = length(senders_receivers), name = 'Spectral') %>% magrittr::set_names(senders_receivers)
+colors_receiver = RColorBrewer::brewer.pal(n = length(senders_receivers), name = 'Spectral') %>% magrittr::set_names(senders_receivers)
+
+
+circos_list = make_circos_group_comparison(prioritized_tbl_oi, colors_sender, colors_receiver)
+
+##see interactions
+group_oi = "TA_combo"
+
+##plot results
+prioritized_tbl_oi_M_50 = get_top_n_lr_pairs(multinichenet_output$prioritization_tables, 50, groups_oi = group_oi)
+
+plot_oi = make_sample_lr_prod_activity_plots(multinichenet_output$prioritization_tables, prioritized_tbl_oi_M_50)
+plot_oi
+
+##see interactions
+group_oi = "TA_only"
+
+##plot results
+prioritized_tbl_oi_M_50 = get_top_n_lr_pairs(multinichenet_output$prioritization_tables, 50, groups_oi = group_oi)
+
+plot_oi = make_sample_lr_prod_activity_plots(multinichenet_output$prioritization_tables, prioritized_tbl_oi_M_50)
+plot_oi
+
+sessionInfo()
+
+
--- a/Vizgen/Vizgen_transcript_distance.R
+++ b/Vizgen/Vizgen_transcript_distance.R
+suppressPackageStartupMessages(library(Seurat))
+options(Seurat.object.assay.version = "v5")
+suppressPackageStartupMessages(library(ggplot2))
+suppressPackageStartupMessages(library(openxlsx))
+suppressPackageStartupMessages(library(dplyr))
+suppressPackageStartupMessages(library(plyr))
+
+wd <- "/beegfs/scratch/ric.squadrito/ric.squadrito/202312011215_VMSC10702-NotaroTAD_A019/Analysis_MM/python_scripts/circles"
+setwd(wd)
+
+args <- commandArgs(trailingOnly = TRUE)
+
+sample <- args[1] # "M18", "M21", "M40", "M41"
+#sample <- "M18"
+
+out_dir <- paste0(wd, "/distance_table")
+dir.create(out_dir, showWarnings = F, recursive = T)
+
+#####################
+
+# Define a function to calculate Euclidean distance
+euclidean_dist <- function(x1, y1, x2, y2) {
+  return(sqrt((x1 - x2)^2 + (y1 - y2)^2))
+}
+
+#####################
+
+detected_transcripts <- read.csv(file=paste0(wd, "/detected_transcripts_", sample, ".csv"), header = T, numerals = c("no.loss"))
+
+# "global_x" and "global_y" give the position in um, x and y in pixels: https://vizgen.github.io/vizgen-postprocessing/output_data_formats/detected_transcripts_format.html
+df_molecules <- detected_transcripts[c("gene", "global_x", "global_y", "cell_id")]
+colnames(df_molecules) <- c("gene", "x", "y", "cell_id")
+rm(detected_transcripts)
+
+# Remove transcripts without cell_id
+nrow(df_molecules)
+df_molecules <- df_molecules %>% filter(! df_molecules$gene %in% grep(x = df_molecules$gene, pattern = "Blank", value = T))
+nrow(df_molecules)
+df_molecules <- df_molecules[df_molecules$cell_id != "-1",] # It might be possible to include transcripts outside the cells but the results appear similar
+nrow(df_molecules)
+
+# M18 transcripts: 4263642 => 4242609 => 3254954
+# M21 transcripts: 3508195 => 3490821 => 2430395
+# M40 transcripts: 2017821 => 2001106 => 1449746
+# M41 transcripts: 4510227 => 4494444 => 3030891
+
+# Number of genes
+n_genes <- 500
+
+# Define the distance threshold. Default d=30um
+d <- 30
+
+# SelectedGenes contains the list of genes manually selected to identify specific cell types.
+# It is possible to run the analysis with all the 500 genes but it takes much more time (5-10 days).
+SelectedGenes <- unique(c("S100a6", "Klf4", "Anxa2", "Cdkn2a", "Vim",
+                          # Antigen presentation
+                          "Ciita", "H2-DMb1", "H2-Aa", "H2-Ab1", 
+                          # Macrophages
+                          "Clec4f", "C1qa", "Marco",
+                          # T cell activation
+                          "Trac", "Cd3e", "Klf2", "Sell", "Il2rb", "Stat1","Ccr5", "Il12rb1", "Cd40",
+                          # IFN
+                          "Irf1", "Isg15", "Ifit2", "Cxcl9","Il18r1",
+                          # Protumoral
+                          "Serpine1","Cebpb","Pdgfb","Lrg1","Id2", "Vegfa", "Il1r1", "Cd274", "Tgfb1")) # Cd81
+
+df_molecules <- df_molecules[df_molecules$gene %in% SelectedGenes,]
+
+# subset to have a smaller dataframe. This is used mostly for benchmarking and testing
+#df_molecules <- df_molecules[df_molecules$gene %in% head(genes_ID, n_genes),]
+
+# Get the unique gene ID
+genes_ID <- unique(df_molecules$gene)
+
+# Sort genes_ID
+genes_ID <- sort(genes_ID, decreasing=F)
+
+n_genes <- length(genes_ID) # 500
+
+# Initialize an empty distance matrix
+distance_matrix <- matrix(nrow = length(genes_ID), ncol = length(genes_ID), 0)
+distance_matrix_norm <- matrix(nrow = length(genes_ID), ncol = length(genes_ID), 0)
+colnames(distance_matrix) <- genes_ID
+rownames(distance_matrix) <- genes_ID
+colnames(distance_matrix_norm) <- genes_ID
+rownames(distance_matrix_norm) <- genes_ID
+
+# Loop through each gene type combination
+for (i in 1:length(genes_ID)) { # 1:length(genes_ID) or 1:n_genes
+  print(i)
+  gene_A <- genes_ID[i]
+  
+  # Filter transcripts for gene A
+  transcripts_A <- df_molecules[df_molecules$gene == gene_A,]
+  for (j in 1:length(genes_ID)) { # (i + 1):length(genes_ID)) or 1:n_genes
+    gene_B <- genes_ID[j]
+    
+    # Filter transcripts for gene B
+    transcripts_B <- df_molecules[df_molecules$gene == gene_B,]
+    
+    # Initialize counters for close and not close transcripts
+    close_count <- 0
+    
+    # Loop through transcripts in transcripts_A
+    for (z in 1:nrow(transcripts_A)) {
+      a_x <- transcripts_A$x[z]
+      a_y <- transcripts_A$y[z]
+      
+      # filter to retain only transcripts_B that are not in the same cell as the transcripts_A analysed. This speed up the script.
+      transcripts_B_temp <- transcripts_B[transcripts_B$cell_id != transcripts_A$cell_id[z],]
+      # If you include also transcripts outside the cells add the following lines
+      #if (transcripts_A$cell_id[z] != "-1") {
+      #  transcripts_B_temp <- transcripts_B[transcripts_B$cell_id != transcripts_A$cell_id[z],]}
+      #else{transcripts_B_temp <- transcripts_B}
+      
+      # filter to retain only transcripts_B that are close to the transcripts_A analysed. This speed up the script. It looks only in a square of l=1.1*1.1*2d.
+      transcripts_B_temp <- transcripts_B_temp[transcripts_B_temp$x > a_x - 1.1*d & transcripts_B_temp$x < a_x + 1.1*d,]
+      transcripts_B_temp <- transcripts_B_temp[transcripts_B_temp$y > a_y - 1.1*d & transcripts_B_temp$y < a_y + 1.1*d,]
+      
+      is_close <- FALSE  # Flag to track if a close transcript is found
+      
+      if (nrow(transcripts_B_temp)==0){next} # This transcript of A is not close to any transcript of B
+      
+      # Loop through transcripts in transcripts_B
+      for (w in 1:nrow(transcripts_B_temp)) {
+        b_x <- transcripts_B_temp$x[w]
+        b_y <- transcripts_B_temp$y[w]
+        
+        # Calculate distance
+        distance <- euclidean_dist(a_x, a_y, b_x, b_y)
+        
+        # Check if distance is less than threshold
+        if (distance < d) {
+          is_close <- TRUE
+          break  # Exit inner loop if close transcript found
+        }
+      }
+
+      # Update counters based on the flag
+      if (is_close) {
+        close_count <- close_count + 1
+      }
+    
+    }
+  # Fill the distance matrix. Note: The matrix is not perfectly symmetrical because more A molecules can be close to the same B molecule and viceversa
+  distance_matrix[i, j] <- close_count
+  # Normalize for the sum of transcripts A and B.
+  # Note: I multiplied close_count * 2 because each transcript-transcript interaction involves two transcripts.
+  distance_matrix_norm[i, j] <- round(close_count*2/((nrow(transcripts_A)+nrow(transcripts_B))),5)
+  }
+}
+
+# Print the distance matrix
+print(distance_matrix)
+print(distance_matrix_norm)
+
+# Save the matrix
+write.csv(distance_matrix, paste0(out_dir, "/", sample, "_transcripts_distance_matrix_inCell_t", d, "_g", n_genes, "_selected.csv"), row.names=T)
+write.csv(distance_matrix_norm, paste0(out_dir, "/", sample, "_transcripts_distance_matrix_norm_inCell_t", d, "_g", n_genes, "_selected.csv"), row.names=T)
+
+
+
--- a/Vizgen/Vizgen_transcript_distance_heatmap.R
+++ b/Vizgen/Vizgen_transcript_distance_heatmap.R
+suppressPackageStartupMessages(library(pheatmap))
+suppressPackageStartupMessages(library(dplyr))
+suppressPackageStartupMessages(library(openxlsx))
+suppressPackageStartupMessages(library(RColorBrewer))
+
+
+wd <- "/beegfs/scratch/ric.squadrito/ric.squadrito/202312011215_VMSC10702-NotaroTAD_A019/Analysis_MM/python_scripts/circles"
+setwd(wd)
+
+# M21 and M40 are TA_Only and M18 and M41 are TA_Combo 
+sample <- "M18" # "M18", "M21", "M40", "M41"
+
+# Define the distance threshold. Default d=30um
+d <- 30 # 10 or 30
+
+inCell <- "inCell_" # "inCell_" or ""
+
+# _transcripts_distance_matrix_t30_g500.csv _transcripts_distance_matrix_inCell_t10_g500-2.csv _transcripts_distance_matrix_post_t30_g500.csv
+td_matrix_norm <- read.csv(paste0(wd, "/distance_table/", sample, "_transcripts_distance_matrix_norm_", inCell, "t", d, "_g35_selected.csv"))
+plot_dir <- paste0(wd, "/distance_table/selected/")
+dir.create(plot_dir, showWarnings=F, recursive=T)
+
+td_matrix_norm$X <- NULL
+genes <- colnames(td_matrix_norm)
+
+H2_genes <- genes[grep(x = genes, pattern = "H2")]
+H2_genes_modified <- gsub("\\.", "-", H2_genes) # replace "." with "-"
+
+# Change the column names of the genes that start with H2...
+#colnames(td_matrix_norm[H2_genes]) <- H2_genes_modified
+genes <- gsub("\\.", "-", genes) # replace "." with "-"
+
+colnames(td_matrix_norm) <- genes
+rownames(td_matrix_norm) <- genes
+
+td_matrix_norm$X <- NULL
+colnames(td_matrix_norm) <- genes
+rownames(td_matrix_norm) <- genes
+
+#Cancer cell signature from top genes
+SelectedGenes <- unique(c("S100a6", "Klf4", "Anxa2", "Cdkn2a", "Vim",
+                          #antigen presentation
+                          "Ciita", "H2-DMb1", "H2-Aa", "H2-Ab1", 
+                          #Macrophages
+                          "Clec4f", "C1qa", "Marco",
+                          #T cell activation
+                          "Trac", "Cd3e", "Klf2", "Sell", "Il2rb", "Stat1","Ccr5", "Il12rb1", "Cd40",
+                          #IFN
+                          "Irf1", "Isg15", "Ifit2", "Cxcl9","Il18r1",
+                          #protumoral
+                          "Serpine1","Cebpb","Pdgfb","Lrg1","Id2", "Vegfa", "Il1r1", "Cd274", "Tgfb1")) # Cd81
+
+SelectedGenes_ann <- c("Cancer genes", "Cancer genes", "Cancer genes", "Cancer genes", "Cancer genes",
+                       #antigen presentation
+                       "MHCI", "MHCI", "MHCI", "MHCI", 
+                       #Macrophages
+                       "Macs", "Macs","Macs",
+                       #T cell activation
+                       "T cell activation", "T cell activation", "T cell activation", "T cell activation", "T cell activation", "T cell activation","T cell activation", "T cell activation", "T cell activation",
+                       #IFN
+                       "IFN", "IFN", "IFN", "IFN","IFN",
+                       #Protumoral
+                       "Protumoral","Protumoral","Protumoral","Protumoral","Protumoral","Protumoral","Protumoral","Protumoral","Protumoral")
+
+gene_annotation <- data.frame(annotation = SelectedGenes_ann)
+row.names(gene_annotation) <- SelectedGenes
+
+signature_levels <- c("Cancer genes", "MHCI", "Macs", "T cell activation", "IFN", "Protumoral")
+gene_annotation$annotation <- factor(gene_annotation$annotation, levels = signature_levels, ordered = T)
+
+td_matrix_norm_selected <- td_matrix_norm[SelectedGenes, SelectedGenes]
+
+pheatmap(td_matrix_norm_selected, cluster_rows = F, cluster_cols = F, na_col = "grey",  scale = "none",
+         filename = paste0(plot_dir, sample, "_transcripts_distance_matrix_norm_", inCell, "d", d, "_selected.pdf"), width = 15, height = 15,
+         fontsize=9, annotation_col = gene_annotation, annotation_row = gene_annotation)
+
+
+# Compare two samples
+
+# M21 and M40 are TA_Only and M18 and M41 are TA_Combo 
+sample2 <- "M21" # "M18", "M21", "M40", "M41"
+
+td_matrix_norm_S2 <- read.csv(paste0(wd, "/distance_table/", sample2, "_transcripts_distance_matrix_norm_", inCell, "t", d, "_g35_selected.csv"))
+
+td_matrix_norm_S2$X <- NULL
+colnames(td_matrix_norm_S2) <- genes
+rownames(td_matrix_norm_S2) <-genes
+
+# Combine the two samples by subtracting the two matrices
+td_matrix_norm_S1_S2 <- td_matrix_norm - td_matrix_norm_S2
+
+td_matrix_norm_selected_S1_S2 <- td_matrix_norm_S1_S2[SelectedGenes, SelectedGenes]
+
+# Plot the results
+paletteLength <- 50
+myColor <- colorRampPalette(c("blue", "white", "red"))(paletteLength)
+
+# Use floor and ceiling to deal with even/odd length pallettelengths
+MIN <- min(td_matrix_norm_selected_S1_S2[!is.na(td_matrix_norm_selected_S1_S2)])
+MAX <- max(td_matrix_norm_selected_S1_S2[!is.na(td_matrix_norm_selected_S1_S2)])
+myBreaks <- c(seq(MIN, 0, length.out=ceiling(paletteLength/2) + 1), 
+              seq(MAX/paletteLength, MAX, length.out=floor(paletteLength/2)))
+
+
+pheatmap(td_matrix_norm_selected_S1_S2, cluster_rows = F, cluster_cols = F, na_col = "grey",  scale = "none",
+         filename = paste0(plot_dir, sample, "_", sample2, "_transcripts_distance_matrix_norm_", inCell, "d", d, "_selected.pdf"), width = 15, height = 15,
+         fontsize=9, annotation_col = gene_annotation, annotation_row = gene_annotation, color = myColor, breaks = myBreaks)
--- a/antigen_discovery/001_antigen_discovery_filtering.R
+++ b/antigen_discovery/001_antigen_discovery_filtering.R
+library(openxlsx)
+library(dplyr)
+library(readxl)
+library(psych)
+library(data.table)
+library(GGally)
+library(ggplot2)
+library(ggpubr)
+library(ggrepel)
+
+#WDs
+MNOT.gar<-"C:/Users/notaro.marco/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38/"
+MNOT.pvac<-"C:/Users/notaro.marco/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38/pvacseq_mouse/MHC_Class_I"
+MNOT.wd1<-"C:/Users/notaro.marco/Dropbox (HSR Global)/90-756142658_RNAseq_Squadrito/03-aln_GRCm38/"
+
+
+#WD MLS
+#MNOT.gar<-"/Users/Squadrito/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38"
+#MNOT.pvac<-"/Users/Squadrito/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38/pvacseq_mouse/MHC_Class_I"
+#MNOT.wd1<-"/Users/Squadrito/Dropbox (HSR Global)/90-756142658_RNAseq_Squadrito/03-aln_GRCm38"
+
+
+
+
+#define filtering functions
+
+filter_data_common <- function(df, a1, a2, a3, a4, a5, a6, a7, a8) {
+  subset(df, (Ensemble_score <= a1 | is.na(Ensemble_score)) &
+           (IC50.MT <= a2 | is.na(IC50.MT)) &
+           (DAI >= a3 | is.na(DAI)) &
+           (agretopicity <= a4 | is.na(agretopicity)) &
+           (average_vivo >= a5 | is.na(average_vivo)) &
+           (average_vitro >= a6 | is.na(average_vitro)) &
+           (dissimilarity >= a7 | is.na(dissimilarity)) &
+           (foreignness_score >= a8 | is.na(foreignness_score)))
+}
+
+filter_data_garnish <- function(df, a1, a3, a5, a6, a7, a8) {
+  subset(df, (Ensemble_score <= a1 | is.na(Ensemble_score)) &
+           (DAI >= a3 | is.na(DAI)) &
+           (average_vivo >= a5 | is.na(average_vivo)) &
+           (average_vitro >= a6 | is.na(average_vitro)) &
+           (dissimilarity >= a7 | is.na(dissimilarity)) &
+           (foreignness_score >= a8 | is.na(foreignness_score)))
+}
+
+
+filter_data_pvac <- function(df, a2, a4, a5, a6, a7, a8) {
+  subset(df,(IC50.MT <= a2 | is.na(IC50.MT)) &
+            (agretopicity <= a4 | is.na(agretopicity)) &
+            (average_vivo >= a5 | is.na(average_vivo)) &
+            (average_vitro >= a6 | is.na(average_vitro)) &
+            (dissimilarity >= a7 | is.na(dissimilarity)) &
+            (foreignness_score >= a8 | is.na(foreignness_score)))
+}
+
+
+
+
+##Garnish
+#carica la tabella di antigengarnish
+setwd(MNOT.gar)
+data_garnish<- fread("ant_garnish_res_AKTPFV3only_filt_coding.tsv", sep = "\t", header = TRUE)
+
+###pvac 
+setwd(MNOT.pvac)
+data_pvac <- read_excel("pvac_seq.xlsx")
+
+#Common
+common.pep.pvac<-unique(data_pvac$Best.Peptide)
+common.pep.garnish<-unique(data_garnish$nmer)
+common.pep<-intersect(common.pep.pvac,common.pep.garnish)
+
+data_pvac2<-subset(data_pvac,Best.Peptide%in%common.pep)
+data_pvac3<-data_pvac2 %>%
+  group_by(Best.Peptide) %>%
+  slice(which.min(IC50.MT))
+
+data_garnish2<-subset(data_garnish,nmer%in%common.pep)
+data_garnish3<-data_garnish2 %>%
+  group_by(nmer_uuid) %>%
+  slice(which.min(Ensemble_score))
+
+data.common<-merge(data_pvac3,data_garnish3, by.x = "Best.Peptide", by.y= "nmer")
+
+
+#ADD expression
+setwd(MNOT.wd1)
+count_tpm <- read_excel("featureCounts_results_tpm.xlsx")
+
+#average expression level to pply filtering
+count_tpm$average_vitro<-apply(count_tpm[,7:9],1,function(x)mean(x))
+#sum(count_tpm$average_vitro==0)
+count_tpm$average_vitro[apply(count_tpm[,7:9],1,function(x)mean(x)*min(x))==0]<-0
+
+count_tpm$average_vivo<-apply(count_tpm[,10:12],1,function(x)mean(x))
+#sum(count_tpm$average_vivo==0)
+count_tpm$average_vivo[apply(count_tpm[,10:12],1,function(x)mean(x)*min(x))==0]<-0
+
+
+merged.exp<-merge(data.common,count_tpm[,c("Geneid","average_vivo","average_vitro")], by.x = "Gene",by.y="Geneid")
+merged.df<-merged.exp #1244
+
+merged.df[,c(22)]<-as.numeric(as.character(merged.df[,c(22)]))
+merged.df[,c(24)]<-as.numeric(as.character(merged.df[,c(24)]))
+merged.df$dissimilarity.x[is.na(merged.df$dissimilarity.x)] <- 0
+merged.df$foreignness_score.x[is.na(merged.df$foreignness_score.x)] <- 0
+merged.df$dissimilarity<-merged.df$dissimilarity.x
+merged.df$foreignness_score<-merged.df$foreignness_score.x
+
+
+
+#FILTERING common based on expr and foreigness
+a1<-quantile(merged.df$Ensemble_score, 0.2) #minor 0.4
+a2<-quantile(merged.df$IC50.MT, 1) #minor 0.4
+a3<-quantile(merged.df$DAI, 0,na.rm=T) #higher 0.4
+a4<-quantile(merged.df$agretopicity, 1) #minor 0.6
+a5<-quantile(merged.df$average_vivo, 0.80) #higher 90
+a6<-quantile(merged.df$average_vitro, 0.90)  #higher 95
+a7<-quantile(merged.df$dissimilarity, 0)  #higher 
+a8<-quantile(merged.df$foreignness_score, 0.5)  #higher 
+
+
+data_filter1<-filter_data_common(merged.df,a1,a2,a3,a4,a5,a6,a7,a8)
+  
+
+db1 <- data_filter1[,c(1,2,3,6,10,11,131,12,132,138,139,140,141)]
+db1$type <- "exp-foreigness"
+db1
+
+#FILTERING common based on IC50,Agretopicity
+
+a1<-quantile(merged.df$Ensemble_score, 0.12) #minor
+a2<-quantile(merged.df$IC50.MT, 0.12) #minor
+a3<-quantile(merged.df$DAI, 0.2,na.rm=T) #higher
+a4<-quantile(merged.df$agretopicity, 0.1) #minor
+a5<-quantile(merged.df$average_vivo, 0.50) #higher
+a6<-quantile(merged.df$average_vitro, 0.60)  #higher
+a7<-quantile(merged.df$dissimilarity, 0)  #higher
+a8<-quantile(merged.df$foreignness_score, 0.0)  #higher
+
+data_filter2<-filter_data_common(merged.df,a1,a2,a3,a4,a5,a6,a7,a8)
+
+db2 <- data_filter2[,c(1,2,3,6,10,11,131,12,132,138,139,140,141)]
+db2$type <- "IC50-agretopicity"
+db2
+
+#FILTERING common based on expression and IC50
+
+a1<-quantile(merged.df$Ensemble_score, 0.3) #minor
+a2<-quantile(merged.df$IC50.MT, 0.3) #minor
+a3<-quantile(merged.df$DAI, 0.4,na.rm=T) #higher
+a4<-quantile(merged.df$agretopicity, 0.6) #minor
+a5<-quantile(merged.df$average_vivo, 0.90) #higher
+a6<-quantile(merged.df$average_vitro, 0.95)  #higher
+a7<-quantile(merged.df$dissimilarity, 0)  #higher
+a8<-quantile(merged.df$foreignness_score, 0.0)  #higher
+
+data_filter3<-filter_data_common(merged.df,a1,a2,a3,a4,a5,a6,a7,a8)
+
+db3 <- data_filter3[,c(1,2,3,6,10,11,131,12,132,138,139,140,141)]
+db3$type <- "IC50-exp" 
+db3
+
+common.sel<- rbind(db1,db2,db3)
+common.sel<-common.sel %>% group_by(Best.Peptide) %>%
+  slice(which.min(Ensemble_score))
+
+#top scoring in garnish
+#take only garnish not present in common
+garnish.only<- subset(data_garnish, !(nmer %in% common.pep))
+
+#ADD expression
+garnish.only<-merge(garnish.only,count_tpm[,c("Geneid",
+          "average_vivo","average_vitro")], by.x = "gene",by.y="Geneid")
+
+#rimuovi peptidi wt,più lunghi di 10aa, multipli e di frame diversi, tiene solo quelli con ensemble score piu alto
+garnish.only<- subset(garnish.only, pep_type != "wt")
+garnish.only<-subset(garnish.only, nmer_l<= 10 )
+
+garnish.only<-garnish.only %>%
+  group_by(nmer_uuid) %>%
+  slice(which.min(Ensemble_score))
+
+garnish.only <- garnish.only %>%
+  group_by(var_uuid) %>%
+  slice(which.min(Ensemble_score))
+
+garnish.only$foreignness_score[is.na(garnish.only$foreignness_score)]<-0
+
+#FILTERING garnish based on Enseble score
+a1<-quantile(garnish.only$Ensemble_score, 0.01) #minor
+a3<-quantile(garnish.only$DAI, 0.5,na.rm=T) #higher
+a5<-quantile(garnish.only$average_vivo, 0.40) #higher
+a6<-quantile(garnish.only$average_vitro, 0.40)  #higher
+a7<-quantile(garnish.only$dissimilarity, 0 ,na.rm=T )  #higher
+a8<-quantile(garnish.only$foreignness_score, .8,na.rm=T )  #higher
+
+
+data_filter1<-filter_data_garnish(garnish.only,a1,a3,a5,a6,a7,a8)
+db1<-data_filter1[,c(1,2,9,10,12,13,57,108,109,111,113,115,116)]
+db1$type <- "Escore" 
+
+#FILTERING garnish based on expression
+a1<-quantile(garnish.only$Ensemble_score, 0.1) #minor
+a3<-quantile(garnish.only$DAI, 0.5,na.rm=T) #higher
+a5<-quantile(garnish.only$average_vivo, 0.90) #higher
+a6<-quantile(garnish.only$average_vitro, 0.95)  #higher
+a7<-quantile(garnish.only$dissimilarity, 0 ,na.rm=T )  #higher
+a8<-quantile(garnish.only$foreignness_score, .8,na.rm=T )  #higher
+
+
+data_filter2<-filter_data_garnish(garnish.only,a1,a3,a5,a6,a7,a8)
+db2<-data_filter2[,c(1,2,9,10,12,13,57,108,109,111,113,115,116)]
+db2$type <- "expression" 
+
+
+garnish.sel<- rbind(db1,db2)
+garnish.sel<-garnish.sel %>% group_by(nmer) %>%
+  slice(which.min(Ensemble_score))
+
+#top scoring in pvac
+#take only pvac not present in common
+pvac.only<- subset(data_pvac, !(Best.Peptide %in% common.pep))
+
+#ADD expression
+pvac.only<-merge(pvac.only,count_tpm[,c("Geneid",
+            "average_vivo","average_vitro")], by.x = "Gene",by.y="Geneid")
+
+pvac.only$nmer_l<-nchar(pvac.only$Best.Peptide)
+
+pvac.only$foreignness_score<-as.numeric(pvac.only$foreignness_score)
+pvac.only$foreignness_score[is.na(pvac.only$foreignness_score)]<-0
+
+
+
+#FILTERING pvac based on IC50 
+a2<-quantile(pvac.only$IC50.MT, 0.2) #minor
+a4<-quantile(pvac.only$agretopicity, 0.2) #minor
+a5<-quantile(pvac.only$average_vivo, 0.2) #higher
+a6<-quantile(pvac.only$average_vitro, 0.2)  #higher
+a7<-quantile(pvac.only$dissimilarity, 0)  #higher
+a8<-quantile(pvac.only$foreignness_score, 0)  #higher
+
+data_filter1<-filter_data_pvac(pvac.only,a2,a4,a5,a6,a7,a8)
+db1<-data_filter1[,c(1,2,3,6,10,11,25,26,22,24)]
+
+db1$type <- "IC50" 
+
+#FILTERING pvac based on expression
+
+a2<-quantile(pvac.only$IC50.MT, .5) #minor
+a4<-quantile(pvac.only$agretopicity, 1) #minor
+a5<-quantile(pvac.only$average_vivo, 0.80) #higher
+a6<-quantile(pvac.only$average_vitro, 0.90)  #higher
+a7<-quantile(pvac.only$dissimilarity, 0)  #higher
+a8<-quantile(pvac.only$foreignness_score, 0)  #higher
+
+data_filter2<-filter_data_pvac(pvac.only,a2,a4,a5,a6,a7,a8)
+db2<-data_filter2[,c(1,2,3,6,10,11,25,26,22,24)]
+db2$type <- "exp" 
+
+
+pvac.sel<- rbind(db1,db2)
+
+pvac.sel<-pvac.sel %>%
+  group_by(Best.Peptide) %>%
+  slice(which.min(IC50.MT))
+
+#length of nmer in original db
+windows()
+par(mfrow=c(3,1))
+hist(garnish.only$nmer_l,xlim=c(8,12))
+hist(merged.df$nmer_l,xlim=c(8,12))
+hist(pvac.only$nmer_l,xlim=c(8,12))
+
+#length of selected nmer
+pvac.sel$nmer_l<-nchar(pvac.sel$Best.Peptide)
+common.sel$nmer_l<-nchar(common.sel$Best.Peptide)
+garnish.sel$nmer_l<-nchar(garnish.sel$nmer)
+
+windows()
+par(mfrow=c(3,1))
+hist(garnish.sel$nmer_l,xlim=c(8,12))
+hist(common.sel$nmer_l,xlim=c(8,12))
+hist(pvac.sel$nmer_l,xlim=c(8,12))
+
+
+#FINAL MERGE of selected adding origin
+
+common.sel$origin<-"common"
+garnish.sel$origin<-"garnish"
+pvac.sel$origin<-"pvac"
+
+colnames(common.sel)[1:3]<-c("gene","nmer","ID")
+colnames(pvac.sel)[c(1,2)]<-c("gene","nmer")
+
+db.All <- merge(garnish.sel,merge(common.sel,pvac.sel,all=TRUE),all=T)
+
+
+# Crea un nuovo file Excel e salva il dataframe 
+setwd(MNOT.gar)
+write.xlsx(db.All,"FinalTable.xlsx")
+
+
+#########################################################################################
+#####################ENDS HERE###########################################################
+###########################################################################################
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#plots
+
+FINAL_LIST_PEP <- read_excel("C:/Users/notaro.marco/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38/FINAL_LIST_PEP.xlsx")
+
+
+
+windows()
+par(mfrow=c(4,2))
+db.ES<- FINAL_LIST_PEP[!is.na(FINAL_LIST_PEP$Ensemble_score),]
+plot.ecdf((merged.df$Ensemble_score),main="Ensemble_score", xlab=(""))
+abline(v=(FINAL_LIST_PEP$Ensemble_score),col="red")
+
+db.ES<- FINAL_LIST_PEP[!is.na(FINAL_LIST_PEP$IC50.MT),]
+plot.ecdf((merged.df$IC50.MT),main="IC50.MT", xlab=(""))
+abline(v=(FINAL_LIST_PEP$IC50.MT),col="red")
+
+db.ES<- FINAL_LIST_PEP[!is.na(FINAL_LIST_PEP$foreignness_score),]
+plot.ecdf(merged.df$foreignness_score,main="foreigness_score", xlab=(""))
+abline(v=FINAL_LIST_PEP$foreignness_score,col="red")
+
+#db.ES<- FINAL_LIST_PEP[!is.na(FINAL_LIST_PEP$DAI),]
+#plot.ecdf((merged.df$DAI+0.1),main="DAI")
+#abline(v=(db.ES$DAI+0.1),col="red")
+
+db.ES<- FINAL_LIST_PEP[!is.na(FINAL_LIST_PEP$agretopicity),]
+plot.ecdf((merged.df$agretopicity+0.01),main="agretopicity", xlab=(""))
+abline(v=(FINAL_LIST_PEP$agretopicity+0.01),col="red")
+
+db.ES<- FINAL_LIST_PEP[!is.na(FINAL_LIST_PEP$average_vitro),]
+plot.ecdf((merged.df$average_vitro+0.00001),main="average_vitro", xlab=(""))
+abline(v=(FINAL_LIST_PEP$average_vitro+0.00001),col="red")
+
+db.ES<- FINAL_LIST_PEP[!is.na(FINAL_LIST_PEP$average_vivo),]
+plot.ecdf((merged.df$average_vivo+0.00001),main="average_vivo", xlab=(""))
+abline(v=(FINAL_LIST_PEP$average_vivo+0.00001),col="red")
+
+#gg plots
+
+windows()
+par(mfrow=c(2,2))
+
+p<- ggplot(merged.df, aes(IC50.MT)) +
+  geom_line(stat = "ecdf")+
+  geom_point(stat="ecdf",size=2) +
+  geom_vline(xintercept=FINAL_LIST_PEP$IC50.MT, colour = "red")
+p
+
+
+#histos
+
+windows()
+par(mfrow=c(2,2))
+
+hist(merged.df$IC50.MT, breaks = 100, xlab="IC50", main ="IC50")
+abline(v=(FINAL_LIST_PEP$IC50.MT),col="black")
+
+
+hist(log10(merged.df$average_vivo), breaks = 100, xlab="Average expression (log TPM)", main ="Log TPM")
+abline(v=(log10(FINAL_LIST_PEP$average_vivo)),col="black")
+
+hist(merged.df$agretopicity, xlab="Agretopicity", main ="Agretopicity")
+abline(v=(FINAL_LIST_PEP$agretopicity),col="red")
+
+hist(merged.df$foreignness_score, xlab="Foreigness", main ="Foreigness")
+abline(v=(FINAL_LIST_PEP$foreignness_score),col="red")
+
+
+#corr
+windows()
+par(mfrow=c(2,2))
+p<-ggscatter(merged.df, x = "IC50.MT", y = "average_vivo", 
+          cor.coef = TRUE, cor.method = "pearson",
+          xlab = "IC50", ylab = "log10 TPM",
+          color= "black", fill='grey', shape=21, size=5)+
+            scale_y_log10()+
+  scale_x_log10()+
+  geom_vline(xintercept=1000, colour = "red",linetype = "dashed")+
+  geom_hline(yintercept=10, colour = "red",linetype = "dashed")+
+  geom_point(data=FINAL_LIST_PEP, fill= "green",color="black", , shape=21, size=5)+
+  grids(linetype = "dashed")+
+  geom_text_repel(
+    data = FINAL_LIST_PEP,
+    aes(label = gene),
+    size = 5,
+    box.padding = unit(0.35, "lines"),
+    point.padding = unit(0.3, "lines")
+  )
+p
+
+
+windows()
+par(mfrow=c(2,2))
+p<-ggscatter(merged.df, x = "foreignness_score", y = "agretopicity", 
+             cor.coef = TRUE, cor.method = "pearson",
+             xlab = "Foreigness", ylab = "Agretopicity",
+             color= "black", fill='grey', shape=21, size=5)+
+  scale_y_log10()+
+  scale_x_log10()+
+  geom_point(data=FINAL_LIST_PEP, fill= "green",color="black", , shape=21, size=5)+
+  grids(linetype = "dashed")+ 
+  geom_text_repel(
+    data = FINAL_LIST_PEP,
+    aes(label = gene),
+    size = 5,
+    box.padding = unit(0.35, "lines"),
+    point.padding = unit(0.3, "lines")
+  )
+p  
+  
+
+
--- a/antigen_discovery/002_antigen_discovery_VCF_double_mutations.R
+++ b/antigen_discovery/002_antigen_discovery_VCF_double_mutations.R
+library(openxlsx)
+library(dplyr)
+library(readxl)
+library(psych)
+library(data.table)
+library(tidyr)
+library(stringr)
+library(R.utils)
+
+#WDs
+MNOT.gar<-"C:/Users/notaro.marco/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38/"
+MNOT.pvac<-"C:/Users/notaro.marco/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38/pvacseq_mouse/MHC_Class_I"
+MNOT.wd1<-"C:/Users/notaro.marco/Dropbox (HSR Global)/90-756142658_RNAseq_Squadrito/03-aln_GRCm38/"
+
+#MLS
+MNOT.gar<-"/Users/Squadrito/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38"
+MNOT.pvac<-"/Users/Squadrito/Dropbox (HSR Global)/90-756147447_WES_Squadrito/WES_07-diff_GRCm38/pvacseq_mouse/MHC_Class_I"
+MNOT.wd1<-"/Users/Squadrito/Dropbox (HSR Global)/90-756142658_RNAseq_Squadrito/03-aln_GRCm38"
+
+mappa_completa <- c("Ala" = "A", "Arg" = "R", "Asn" = "N", "Asp" = "D", "Cys" = "C", 
+                    "Gln" = "Q", "Glu" = "E", "Gly" = "G", "His" = "H", "Ile" = "I", 
+                    "Leu" = "L", "Lys" = "K", "Met" = "M", "Phe" = "F", "Pro" = "P", 
+                    "Ser" = "S", "Thr" = "T", "Trp" = "W", "Tyr" = "Y", "Val" = "V")
+
+##Garnish
+#carica la tabella di antigengarnish
+#setwd(MNOT.gar)
+#data_garnish<- fread("ant_garnish_res_AKTPFV3only_filt_coding.tsv.gz", sep = "\t", header = TRUE)
+
+#clean garnish
+#data_garnish<-subset(data_garnish, effect_type == "missense_variant")
+#data_garnish<- subset(data_garnish, pep_type != "wt")
+#saveRDS(data_garnish,"data_garnish.rds")
+data_garnish<-readRDS("data_garnish.rds")
+
+
+#create protein_pos column
+data_garnish <- separate(data_garnish, Protein_position_Protein_len, 
+                         into = c("Protein_pos", "Protein_length"), sep = "/")
+data_garnish$Protein_pos<-as.numeric(as.character(data_garnish$Protein_pos))
+
+
+#calculate distance from previous mutation in the same gene, take only mutation that are nearby in the protein (distance<10)
+data_garnish2<-data_garnish[,c(50,7,60,57,67)]
+data_garnish2 <- unique(data_garnish2)
+double_mut <- data_garnish2[order(data_garnish2$transcript_id, data_garnish2$Protein_pos),]
+double_mut$next_position1 <- c(double_mut$Protein_pos[-1],NA)
+double_mut$next_gene1 <- c(double_mut$transcript_id[-1],NA)
+double_mut$next_mut1 <- c(double_mut$protein_change[-1],NA)
+double_mut$distance1 <- ifelse(double_mut$transcript_id == double_mut$next_gene1, 
+                            double_mut$next_position1-double_mut$Protein_pos, NA)
+
+#genes with 3 concatenate mutations
+df1<-double_mut
+remove_rows<- c(-1,-2)
+df2<- df1[order(df1$transcript_id, df1$Protein_pos),]
+df2$next_position2 <- c(df2$Protein_pos[remove_rows],NA,NA)
+df2$next_gene2 <- c(df2$transcript_id[remove_rows],NA,NA)
+df2$next_mut2 <- c(df2$protein_change[remove_rows],NA,NA)
+df2$distance2 <- ifelse(df2$transcript_id == df2$next_gene2, 
+                        df2$next_position2 - df2$Protein_pos, NA)
+
+#transcripts with 4 concatenate mutations
+remove_rows<- c(-1,-2,-3)
+df3<- df2[order(df2$transcript_id, df2$Protein_pos),]
+df3$next_position3 <- c(df2$Protein_pos[remove_rows],NA,NA,NA)
+df3$next_gene3 <- c(df2$transcript_id[remove_rows],NA,NA,NA)
+df3$next_mut3 <- c(df2$protein_change[remove_rows],NA,NA,NA)
+df3$distance3 <- ifelse(df3$transcript_id == df3$next_gene3, 
+                       df3$next_position3 - df3$Protein_pos, NA)
+
+#transcripts with 5 concatenate mutations
+remove_rows<- c(-1,-2,-3,-4)
+df4<- df3[order(df3$transcript_id, df3$Protein_pos),]
+df4$next_position4 <- c(df3$Protein_pos[remove_rows],NA,NA,NA,NA)
+df4$next_gene4 <- c(df3$transcript_id[remove_rows],NA,NA,NA,NA)
+df4$next_mut4 <- c(df3$protein_change[remove_rows],NA,NA,NA,NA)
+df4$distance4 <- ifelse(df4$transcript_id == df4$next_gene4, 
+                        df4$next_position4 - df4$Protein_pos, NA)
+
+#Apply filters
+pep01 <- df4 %>% filter(distance1 <= 10 & distance1 >0 &
+                  (is.na(distance2)|distance2>10|
+                   (distance2==distance1 & (is.na(distance3)|distance3>10))))
+
+pep02 <- df4 %>% filter(distance1 == 0 & distance2 <= 10 & distance2 > 0 &
+                  (is.na(distance3)|distance3 > 10|
+                    (distance3 == distance2 & (is.na(distance4)|distance4 > 10))))
+
+pep012 <- df4 %>% filter(distance1 <= 10 & distance1 >0 &
+                      distance2 <= 10 & distance2 >0 &
+                        distance1!=distance2 &
+                          (is.na(distance3)|distance3>10|
+                             (distance3==distance2 & (is.na(distance4)|distance4>10))))
+
+pep013 <- df4 %>% filter(distance1 <= 10 & distance1 >0 &
+                           distance1==distance2 &
+                            distance3 <= 10 & distance3 >0 &
+                             (is.na(distance4)|distance4>10|distance4==distance3))
+
+pep0123 <- df4 %>% filter(distance1 <= 10 & distance1 >0 &
+                            distance1!=distance2 &
+                        distance2 <= 10 & distance2 >0 &
+                          distance2!=distance3 &
+                          distance3 <= 10 & distance3 >0 &
+                           (is.na(distance4)|distance4>10|distance4==distance3))
+
+#Select columns for semplicity
+pep01 <- pep01[,c(1,2,3,5,4,8)]
+pep02 <- pep02[,c(1,2,3,5,4,12)]
+pep012 <- pep012[,c(1,2,3,5,4,8,12)]
+pep013<- pep013[,c(1,2,3,5,4,8,16)]
+pep0123 <- pep0123[,c(1,2,3,5,4,8,12,16)]
+
+#crea colonne con la singola mutazione e converte in codice ad una lettera
+#for 
+Create_db <- function(dfi,x,name.pep="pep"){
+Prot_mut <- dfi[,4]
+  for (i in x){
+  aminoacido_sost <- str_extract(dfi[,i], "(?<=\\d)[A-Z][a-z]{2}")
+  pos_inMut <- as.numeric(as.character(str_sub(dfi[,i], 6, -4)))
+  aminoacido_sost <- mappa_completa[aminoacido_sost]
+  Prot_mut<- transform(str_replace(Prot_mut, paste0("^(.{", pos_inMut-1, "})."), paste0("\\1", aminoacido_sost)))
+  Prot_mut <- Prot_mut[,1]} 
+  Pep_mut <- substr(Prot_mut, dfi[,3]-9, dfi[,3]+10)
+  Pep_wt <- substr(dfi[,4], dfi[,3]-9, dfi[,3]+10)
+  Gene_ID <- paste0(dfi[,1],name.pep)
+  mut0123<- data.frame(Gene_ID,Pep_wt,Pep_mut)
+      return(mut0123)}
+
+pep01 <- pep01[,c(1,2,3,5,4,8)]
+pep02 <- pep02[,c(1,2,3,5,4,12)]
+pep012 <- pep012[,c(1,2,3,5,4,8,12)]
+pep013<- pep013[,c(1,2,3,5,4,8,16)]
+pep0123 <- pep0123[,c(1,2,3,5,4,8,12,16)]
+
+FinalDB <- rbind(Create_db(pep01,5:6,".01"),
+                 Create_db(pep02,5:6,".02"),
+                 Create_db(pep012,5:7,".012"),
+                 Create_db(pep013,5:7,".013"),
+                 Create_db(pep0123,5:8,".0123"))
+
+FinalDB <- unique(FinalDB)
+write.xlsx(FinalDB,"multipleMuts_MNOT.xlsx")
+  
+  
\ No newline at end of file
--- a/environment_singlecell5.yml
+++ b/environment_singlecell5.yml
--- a/reference/miDB_sig4.MLS.rds
+++ b/reference/miDB_sig4.MLS.rds
--- a/reference/regev_lab_cell_cycle_mouse.rds
+++ b/reference/regev_lab_cell_cycle_mouse.rds