Initial commit

f4a3560c · Ivan Merelli · f4a3560c · f4a3560c · f4a3560c · f4a3560c
Commit f4a3560c authored Sep 14, 2024 by Ivan Merelli
--- a/BM/heatmap_BM.R
+++ b/BM/heatmap_BM.R
+library('dplyr')
+library('ggplot2')
+library('reshape2')
+library('RColorBrewer')
+#heatmap THR vs CTRL
+hallmark_ds <- c("Thr_CTRL")
+clusters <- dir ("/DATA/31/molteni/vexas_bm/results/integration/GSEA_OSR_Wu/Thr_CTRL/")
+clusters <- gsub("Thr_CTRL_markers_cluster_", "", clusters)
+wdir="/DATA/31/molteni/vexas_bm/results/integration"
+hallmark.full <- data.frame()
+for (ds in hallmark_ds) {
+  for (i in clusters) {
+    s <- paste(ds,"markers_cluster", i,sep="_") #qui cambiare 1 con i e inserire ciclo for
+    ff <- paste(wdir, "GSEA_OSR_Wu/Thr_CTRL", s, "tables", paste(s, "NA-gsea-h.all.v7.2.symbols.txt", sep = "-"), sep = "/")
+    ds.t <- read.table(ff, header = T, sep = "\t")
+    ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
+    ds.t.filt$Dataset <- paste(ds,i,sep="_")
+    if (nrow(hallmark.full) == 0) {
+      hallmark.full <- ds.t.filt
+    } else {
+      hallmark.full <- rbind(hallmark.full, ds.t.filt)
+    }
+  }
+}
+hallmark.full_1 <- hallmark.full
+### ONE HEATMAP ###
+all_hallmark <- hallmark.full_1
+all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", ""))  # Create column of significance labels
+all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
+hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
+hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
+all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
+colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
+all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
+colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
+all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
+all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
+level_order <- c("Thr_CTRL_Late_erythroid_cells",
+                 "Thr_CTRL_Mid_erythroid_cells",
+                 "Thr_CTRL_Early_erythroid", 
+                 "Thr_CTRL_Erythroid_progenitors",
+                 "Thr_CTRL_MEP",
+                 "Thr_CTRL_MEP_BASO_MAST_prog",
+                 "Thr_CTRL_Plasmacytoid_dentritic_cells" ,
+                 "Thr_CTRL_CD14+_CD16_low_monocytes",
+                 "Thr_CTRL_CD14+_monocytes_2",
+                 "Thr_CTRL_CD14+_monocytes_1",
+                 "Thr_CTRL_Myeloid_dendritic_cells",
+                 "Thr_CTRL_Immature_Neutrophil",
+                 "Thr_CTRL_Prolif_GP",
+                 "Thr_CTRL_GP",
+                 "Thr_CTRL_HSC_MPP",
+                 "Thr_CTRL_MLP",
+                 "Thr_CTRL_CD4_T_cells",  
+                 "Thr_CTRL_CD8_T_cells", 
+                 "Thr_CTRL_B_cells",
+                 "Thr_CTRL_Plasma_cells", 
+                 "Thr_CTRL_VEXAS_GP", 
+                 "Thr_CTRL_VEXAS_NK_cells")
+#ALL 
+all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
+p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
+  theme_bw(base_size = 12) +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+  theme(axis.text.x=element_text(size=10))+
+  theme(axis.text.y=element_text(size=8))+
+  geom_tile(aes(fill = NES), colour = "white") +
+  geom_text(aes(label = STARS), color="black", size=2) +
+  ggtitle("GSEA OSR Thr Patients vs Control Pool") + 
+  theme(plot.title = element_text(hjust = 0.5, vjust = 10), title = element_text(size=20, face = 'bold'))+
+  theme(legend.title = element_text(size = 10))+
+  scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
+                       #limits = c(-3.5, 3.5),
+                       limits = c(-4.5, 4.5),
+                       na.value = "grey"
+                       #guide = guide_colourbar(reverse = TRUE)
+  ) +
+  ylab("") + 
+  xlab("") +
+  coord_fixed(ratio = 0.4)
+plot.dir="/DATA/31/molteni/vexas_bm/results/integration/heatmap_OSR_Wu/"
+ggsave(filename = paste(plot.dir, paste("hallmark_Thr_CTRL", "heatmap.pdf", sep = "-"), sep = "/"), 
+       plot = p.hallmark,
+       width = 15, height = 15, dpi = 600)
+#heatmap VAL vs CTRL
+hallmark_ds <- c("Val_CTRL")
+clusters <- dir ("/DATA/31/molteni/vexas_bm/results/integration/GSEA_OSR_Wu/Val_CTRL/")
+clusters <- gsub("Val_CTRL_markers_cluster_", "", clusters)
+wdir="/DATA/31/molteni/vexas_bm/results/integration"
+hallmark.full <- data.frame()
+for (ds in hallmark_ds) {
+  for (i in clusters) {
+    s <- paste(ds,"markers_cluster", i,sep="_") #qui cambiare 1 con i e inserire ciclo for
+    ff <- paste(wdir, "GSEA_OSR_Wu/Val_CTRL", s, "tables", paste(s, "NA-gsea-h.all.v7.2.symbols.txt", sep = "-"), sep = "/")
+    ds.t <- read.table(ff, header = T, sep = "\t")
+    ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
+    ds.t.filt$Dataset <- paste(ds,i,sep="_")
+    if (nrow(hallmark.full) == 0) {
+      hallmark.full <- ds.t.filt
+    } else {
+      hallmark.full <- rbind(hallmark.full, ds.t.filt)
+    }
+  }
+}
+hallmark.full_2 <- hallmark.full
+### ONE HEATMAP ###
+all_hallmark <- hallmark.full_2
+all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", ""))  # Create column of significance labels
+all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
+hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
+hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
+all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
+colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
+all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
+colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
+all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
+all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
+#
+level_order <- c("Val_CTRL_Late_erythroid_cells",
+                 "Val_CTRL_Mid_erythroid_cells",
+                 "Val_CTRL_Early_erythroid", 
+                 "Val_CTRL_Erythroid_progenitors",
+                 "Val_CTRL_MEP",
+                 "Val_CTRL_MEP_BASO_MAST_prog",
+                 "Val_CTRL_Plasmacytoid_dentritic_cells" ,
+                 "Val_CTRL_CD14+_CD16_low_monocytes",
+                 "Val_CTRL_CD14+_monocytes_2",
+                 "Val_CTRL_CD14+_monocytes_1",
+                 "Val_CTRL_Myeloid_dendritic_cells",
+                 "Val_CTRL_Immature_Neutrophil",
+                 "Val_CTRL_Prolif_GP",
+                 "Val_CTRL_GP",
+                 "Val_CTRL_HSC_MPP",
+                 "Val_CTRL_MLP",
+                 "Val_CTRL_CD4_T_cells",  
+                 "Val_CTRL_CD8_T_cells", 
+                 "Val_CTRL_B_cells",
+                 "Val_CTRL_Plasma_cells",
+                 "Val_CTRL_Endothelial_cells",
+                 "Val_CTRL_VEXAS_GP", 
+                 "Val_CTRL_VEXAS_NK_cells")
+#ALL 
+all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
+p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
+  theme_bw(base_size = 12) +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+  theme(axis.text.x=element_text(size=10))+
+  theme(axis.text.y=element_text(size=8))+
+  geom_tile(aes(fill = NES), colour = "white") +
+  geom_text(aes(label = STARS), color="black", size=2) +
+  ggtitle("GSEA Val OSR Patients vs Control Pool") + 
+  theme(plot.title = element_text(hjust = 0.5, vjust = 10), title = element_text(size=20, face = 'bold'))+
+  theme(legend.title = element_text(size = 10))+
+  scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
+                       #limits = c(-3.5, 3.5),
+                       limits = c(-4.5, 4.5),
+                       na.value = "grey"
+                       #guide = guide_colourbar(reverse = TRUE)
+  ) +
+  ylab("") + 
+  xlab("") +
+  coord_fixed(ratio = 0.4)
+plot.dir="/DATA/31/molteni/vexas_bm/results/integration/heatmap_OSR_Wu/"
+ggsave(filename = paste(plot.dir, paste("hallmark_Val_CTRL", "heatmap.pdf", sep = "-"), sep = "/"), 
+       plot = p.hallmark,
+       width = 15, height = 15, dpi = 600)
+#heatmap LEU vs CTRL
+hallmark_ds <- c("Leu_CTRL")
+clusters <- dir ("/DATA/31/molteni/vexas_bm/results/integration/GSEA_OSR_Wu/Leu_CTRL/")
+clusters <- gsub("Leu_CTRL_markers_cluster_", "", clusters)
+wdir="/DATA/31/molteni/vexas_bm/results/integration"
+hallmark.full <- data.frame()
+for (ds in hallmark_ds) {
+  for (i in clusters) {
+    s <- paste(ds,"markers_cluster", i,sep="_") #qui cambiare 1 con i e inserire ciclo for
+    ff <- paste(wdir, "GSEA_OSR_Wu/Leu_CTRL", s, "tables", paste(s, "NA-gsea-h.all.v7.2.symbols.txt", sep = "-"), sep = "/")
+    ds.t <- read.table(ff, header = T, sep = "\t")
+    ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
+    ds.t.filt$Dataset <- paste(ds,i,sep="_")
+    if (nrow(hallmark.full) == 0) {
+      hallmark.full <- ds.t.filt
+    } else {
+      hallmark.full <- rbind(hallmark.full, ds.t.filt)
+    }
+  }
+}
+hallmark.full_3 <- hallmark.full
+### ONE HEATMAP ###
+all_hallmark <- hallmark.full_3
+all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", ""))  # Create column of significance labels
+all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
+hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
+hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
+all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
+colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
+all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
+colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
+all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
+all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
+#
+level_order <- c("Leu_CTRL_Late_erythroid_cells",
+                 "Leu_CTRL_Mid_erythroid_cells",
+                 "Leu_CTRL_Early_erythroid", 
+                 "Leu_CTRL_Erythroid_progenitors",
+                 "Leu_CTRL_MEP",
+                 "Leu_CTRL_MEP_BASO_MAST_prog",
+                 "Leu_CTRL_Plasmacytoid_dentritic_cells" ,
+                 "Leu_CTRL_CD14+_CD16_low_monocytes",
+                 "Leu_CTRL_CD14+_monocytes_2",
+                 "Leu_CTRL_CD14+_monocytes_1",
+                 "Leu_CTRL_Myeloid_dendritic_cells",
+                 "Leu_CTRL_Immature_Neutrophil",
+                 "Leu_CTRL_Prolif_GP",
+                 "Leu_CTRL_GP",
+                 "Leu_CTRL_HSC_MPP",
+                 "Leu_CTRL_MLP",
+                 "Leu_CTRL_CD4_T_cells",  
+                 "Leu_CTRL_CD8_T_cells", 
+                 "Leu_CTRL_B_cells",
+                 "Leu_CTRL_Plasma_cells",
+                 "Leu_CTRL_Endothelial_cells",
+                 "Leu_CTRL_VEXAS_GP", 
+                 "Leu_CTRL_VEXAS_NK_cells")
+#ALL 
+all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
+p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
+  theme_bw(base_size = 12) +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+  theme(axis.text.x=element_text(size=10))+
+  theme(axis.text.y=element_text(size=8))+
+  geom_tile(aes(fill = NES), colour = "white") +
+  geom_text(aes(label = STARS), color="black", size=2) +
+  ggtitle("GSEA Leu OSR Patients vs Control Pool") + 
+  theme(plot.title = element_text(hjust = 0.5, vjust = 10), title = element_text(size=20, face = 'bold'))+
+  theme(legend.title = element_text(size = 10))+
+  scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
+                       #limits = c(-3.5, 3.5),
+                       limits = c(-4.5, 4.5),
+                       na.value = "grey"
+                       #guide = guide_colourbar(reverse = TRUE)
+  ) +
+  ylab("") + 
+  xlab("") +
+  coord_fixed(ratio = 0.4)
+plot.dir="/DATA/31/molteni/vexas_bm/results/integration/heatmap_OSR_Wu/"
+ggsave(filename = paste(plot.dir, paste("hallmark_Leu_CTRL", "heatmap.pdf", sep = "-"), sep = "/"), 
+       plot = p.hallmark,
+       width = 15, height = 15, dpi = 600)
+###ONE HEATMAP 
+all_hallmark <- rbind (hallmark.full_1, hallmark.full_2, hallmark.full_3)
+all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", ""))  # Create column of significance labels
+all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
+hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
+hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
+hallmark.order.terms <- hallmark.order[order(hallmark.order$ID, decreasing = TRUE), "ID", drop = FALSE]
+all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
+colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
+all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
+colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
+all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
+all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
+#ONE VS ALL COMPARISON ##
+level_order <- c("Thr_CTRL_Late_erythroid_cells","Val_CTRL_Late_erythroid_cells","Leu_CTRL_Late_erythroid_cells",
+                 "Thr_CTRL_Mid_erythroid_cells","Val_CTRL_Mid_erythroid_cells","Leu_CTRL_Mid_erythroid_cells",
+                 "Thr_CTRL_Early_erythroid",  "Val_CTRL_Early_erythroid", "Leu_CTRL_Early_erythroid", 
+                 "Thr_CTRL_Erythroid_progenitors", "Val_CTRL_Erythroid_progenitors","Leu_CTRL_Erythroid_progenitors",
+                 "Thr_CTRL_MEP","Val_CTRL_MEP","Leu_CTRL_MEP",
+                 "Thr_CTRL_MEP_BASO_MAST_prog","Val_CTRL_MEP_BASO_MAST_prog","Leu_CTRL_MEP_BASO_MAST_prog",
+                 "Thr_CTRL_Plasmacytoid_dentritic_cells", "Val_CTRL_Plasmacytoid_dentritic_cells","Leu_CTRL_Plasmacytoid_dentritic_cells",
+                 "Thr_CTRL_CD14+_CD16_low_monocytes","Val_CTRL_CD14+_CD16_low_monocytes","Leu_CTRL_CD14+_CD16_low_monocytes",
+                 "Thr_CTRL_CD14+_monocytes_2","Val_CTRL_CD14+_monocytes_2","Leu_CTRL_CD14+_monocytes_2",
+                 "Thr_CTRL_CD14+_monocytes_1", "Val_CTRL_CD14+_monocytes_1","Leu_CTRL_CD14+_monocytes_1",
+                 "Thr_CTRL_Myeloid_dendritic_cells", "Val_CTRL_Myeloid_dendritic_cells","Leu_CTRL_Myeloid_dendritic_cells",
+                 "Thr_CTRL_Immature_Neutrophil","Val_CTRL_Immature_Neutrophil","Leu_CTRL_Immature_Neutrophil",
+                 "Thr_CTRL_Prolif_GP","Val_CTRL_Prolif_GP","Leu_CTRL_Prolif_GP",
+                 "Thr_CTRL_GP","Val_CTRL_GP","Leu_CTRL_GP",
+                 "Thr_CTRL_HSC_MPP", "Val_CTRL_HSC_MPP","Leu_CTRL_HSC_MPP",
+                 "Thr_CTRL_MLP", "Val_CTRL_MLP","Leu_CTRL_MLP",
+                 "Thr_CTRL_CD4_T_cells", "Val_CTRL_CD4_T_cells","Leu_CTRL_CD4_T_cells",
+                 "Thr_CTRL_CD8_T_cells", "Val_CTRL_CD8_T_cells", "Leu_CTRL_CD8_T_cells", 
+                 "Thr_CTRL_B_cells","Val_CTRL_B_cells","Leu_CTRL_B_cells",
+                 "Thr_CTRL_Plasma_cells","Val_CTRL_Plasma_cells","Leu_CTRL_Plasma_cells", 
+                 "Val_CTRL_Endothelial_cells", "Leu_CTRL_Endothelial_cells",
+                 "Thr_CTRL_VEXAS_GP", "Val_CTRL_VEXAS_GP","Leu_CTRL_VEXAS_GP",
+                 "Thr_CTRL_VEXAS_NK_cells", "Val_CTRL_VEXAS_NK_cells", "Leu_CTRL_VEXAS_NK_cells")
+#ALL 
+all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
+p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
+  theme_bw(base_size = 12) +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+  theme(axis.text.x=element_text(size=10))+
+  theme(axis.text.y=element_text(size=8))+
+  geom_tile(aes(fill = NES), colour = "white") +
+  geom_text(aes(label = STARS), color="black", size=2) +
+  ggtitle("GSEA - All Mutations OSR Patients vs Control") + 
+  theme(plot.title = element_text(hjust = 0.5, vjust = 10), title = element_text(size=20, face = 'bold'))+
+  theme(legend.title = element_text(size = 10))+
+  scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
+                       #limits = c(-3.5, 3.5),
+                       limits = c(-4.5, 4.5),
+                       na.value = "grey"
+                       #guide = guide_colourbar(reverse = TRUE)
+  ) +
+  ylab("") + 
+  xlab("") +
+  coord_fixed(ratio = 0.4)
+plot.dir="/DATA/31/molteni/vexas_bm/results/integration/heatmap_OSR_Wu/"
+ggsave(filename = paste(plot.dir, paste("hallmark_ALLMut", "heatmap.pdf", sep = "-"), sep = "/"), 
+       plot = p.hallmark,
+       width = 20, height = 10, dpi = 600)
+## PAZIENTI vs CONTROLLI ##
+#heatmap PZ vs CTRL
+hallmark_ds <- c("Pz_CTRL")
+clusters <- dir ("/DATA/31/molteni/vexas_bm/results/integration/GSEA_OSR_Wu/Pz_CTRL/")
+clusters <- gsub("Pz_CTRL_markers_cluster_", "", clusters)
+wdir="/DATA/31/molteni/vexas_bm/results/integration"
+hallmark.full <- data.frame()
+for (ds in hallmark_ds) {
+  for (i in clusters) {
+    s <- paste(ds,"markers_cluster", i,sep="_") #qui cambiare 1 con i e inserire ciclo for
+    ff <- paste(wdir, "GSEA_OSR_Wu/Pz_CTRL", s, "tables", paste(s, "NA-gsea-h.all.v7.2.symbols.txt", sep = "-"), sep = "/")
+    ds.t <- read.table(ff, header = T, sep = "\t")
+    ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
+    ds.t.filt$Dataset <- paste(ds,i,sep="_")
+    if (nrow(hallmark.full) == 0) {
+      hallmark.full <- ds.t.filt
+    } else {
+      hallmark.full <- rbind(hallmark.full, ds.t.filt)
+    }
+  }
+}
+hallmark.full_4 <- hallmark.full
+### ONE HEATMAP ###
+all_hallmark <- hallmark.full_4
+all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", ""))  # Create column of significance labels
+all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
+hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
+hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
+all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
+colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
+all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
+colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
+all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
+all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
+level_order <- c("Pz_CTRL_Late_erythroid_cells",
+                 "Pz_CTRL_Mid_erythroid_cells",
+                 "Pz_CTRL_Early_erythroid", 
+                 "Pz_CTRL_Erythroid_progenitors",
+                 "Pz_CTRL_MEP",
+                 "Pz_CTRL_MEP_BASO_MAST_prog",
+                 "Pz_CTRL_Plasmacytoid_dentritic_cells" ,
+                 "Pz_CTRL_CD14+_CD16_low_monocytes",
+                 "Pz_CTRL_CD14+_monocytes_2",
+                 "Pz_CTRL_CD14+_monocytes_1",
+                 "Pz_CTRL_Myeloid_dendritic_cells",
+                 "Pz_CTRL_Immature_Neutrophil",
+                 "Pz_CTRL_Prolif_GP",
+                 "Pz_CTRL_GP",
+                 "Pz_CTRL_HSC_MPP",
+                 "Pz_CTRL_MLP",
+                 "Pz_CTRL_CD4_T_cells",  
+                 "Pz_CTRL_CD8_T_cells", 
+                 "Pz_CTRL_B_cells",
+                 "Pz_CTRL_Plasma_cells",
+                 "Pz_CTRL_Endothelial_cells",
+                 "Pz_CTRL_VEXAS_GP", 
+                 "Pz_CTRL_VEXAS_NK_cells")
+#ALL 
+all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
+p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
+  theme_bw(base_size = 12) +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+  theme(axis.text.x=element_text(size=10))+
+  theme(axis.text.y=element_text(size=8))+
+  geom_tile(aes(fill = NES), colour = "white") +
+  geom_text(aes(label = STARS), color="black", size=2) +
+  ggtitle("GSEA OSR Patients vs Control Pool") + 
+  theme(plot.title = element_text(hjust = 0.5, vjust = 10), title = element_text(size=20, face = 'bold'))+
+  theme(legend.title = element_text(size = 10))+
+  scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
+                       #limits = c(-3.5, 3.5),
+                       limits = c(-4.5, 4.5),
+                       na.value = "grey"
+                       #guide = guide_colourbar(reverse = TRUE)
+  ) +
+  ylab("") + 
+  xlab("") +
+  coord_fixed(ratio = 0.4)
+plot.dir="/DATA/31/molteni/vexas_bm/results/integration/heatmap_OSR_Wu/"
+ggsave(filename = paste(plot.dir, paste("hallmark_Pz_CTRL", "heatmap.pdf", sep = "-"), sep = "/"), 
+       plot = p.hallmark,
+       width = 15, height = 15, dpi = 600)
--- a/BM/integration_BM.R
+++ b/BM/integration_BM.R
+library(Seurat)
+library(ggplot2)
+library(harmony)
+library(RColorBrewer)
+library(plotly)
+library(htmlwidgets)
+library(ComplexHeatmap)
+library(openxlsx)
+library(dplyr)
+library(future)
+library(gridExtra)
+library(SeuratWrappers)
+## Wu et al. public data (GSE196052)
+countsData_bm <- read.delim(file = "/DATA/31/molteni/vexas_sc/public/GSE196052_dataCount_BM.csv", header = TRUE, sep = ",", row.names = 1)
+sampleData <- read.delim(file = "/DATA/31/molteni/vexas_sc/public/BM_metaDatatSNECellType_ALiceManual.csv", header = TRUE, sep = ",")
+sampleData <- sampleData[,c(1:6)]
+obj_BM <- CreateSeuratObject(counts = countsData_bm)
+dataset_numbers <- sub(".*\\.", "", colnames(obj_BM))
+padded_numbers <- ifelse(as.numeric(dataset_numbers) < 10, 
+                         paste0("0", dataset_numbers),
+                         dataset_numbers)
+final_dataset_labels <- paste0("BM-PU-", padded_numbers)
+obj_BM$dataset <- final_dataset_labels
+obj_BM@meta.data$subject <- sampleData$subject
+obj_BM@meta.data$orig.ident <- obj_BM@meta.data$subject
+obj_BM@meta.data$sample_info <- "public"
+obj_BM@meta.data$subject <- NULL
+obj_BM@meta.data$dataset <- NULL
+## our vexas data
+BM_minimal <- readRDS("/DATA/31/molteni/vexas_bm/results/Full/Full_minimal.rds")
+BM_minimal@meta.data$scDblFinder_class <- NULL
+BM_minimal@meta.data$scds_class <- NULL
+BM_minimal@meta.data$RNA_Source <- NULL
+BM_minimal@meta.data$sample_info <- "vexas"
+BM.combo <- merge(BM_minimal, y = obj_BM, add.cell.ids = c("vexas", "vexas_public"))
+# normalize data and find variable features
+# select only 2000 genes as variable
+varfeatures <- 2000
+BM.combo <- NormalizeData(object = BM.combo)
+BM.combo <- FindVariableFeatures(BM.combo, selection.method = "vst",
+                                 nfeatures = varfeatures,
+                                 verbose = T)
+# scaling
+reg_vars = c("percent.mt", "nCount_RNA")
+BM.combo <- ScaleData(object = BM.combo, vars.to.regress = reg_vars,  display.progress = T, features = rownames(BM.combo))
+saveRDS(BM.combo, file = "combined_preIntegration_scaled.rds")
+BM.combo <- readRDS("/DATA/31/molteni/vexas_bm/results/integration/combined_preIntegration_scaled.rds")
+#PCA
+max_pca <- 70
+BM.combo <- RunPCA(object = BM.combo, features = VariableFeatures(object = BM.combo), npcs = max_pca, reduction.name="pca", reduction.key="PC_")
+BM.combo@meta.data$condition <- NA
+BM.combo@meta.data$condition <- with(BM.combo@meta.data,
+                                     ifelse(BM.combo@meta.data$orig.ident == "BM-01","PZ", 
+                                            ifelse(BM.combo@meta.data$orig.ident == "BM-02","PZ",
+                                                   ifelse(BM.combo@meta.data$orig.ident == "BM-03","PZ",
+                                                          ifelse(BM.combo@meta.data$orig.ident == "BM-04","PZ",
+                                                                 ifelse(BM.combo@meta.data$orig.ident == "BM-08","PZ",
+                                                                        ifelse(BM.combo@meta.data$orig.ident == "BM-09","PZ",
+                                                                               ifelse(BM.combo@meta.data$orig.ident == "PT1","PZ",
+                                                                                      ifelse(BM.combo@meta.data$orig.ident == "PT2","PZ",
+                                                                                             ifelse(BM.combo@meta.data$orig.ident == "PT3","PZ",
+                                                                                                    ifelse(BM.combo@meta.data$orig.ident == "PT4","PZ",
+                                                                                                           ifelse(BM.combo@meta.data$orig.ident == "PT5","PZ",
+                                                                                                                  ifelse(BM.combo@meta.data$orig.ident == "PT6","PZ",
+                                                                                                                         ifelse(BM.combo@meta.data$orig.ident == "PT7","PZ",
+                                                                                                                                ifelse(BM.combo@meta.data$orig.ident == "PT8","PZ",
+                                                                                                                                       ifelse(BM.combo@meta.data$orig.ident == "PT9","PZ",
+                                                                                                                                              ifelse(BM.combo@meta.data$orig.ident == "HD1","HD",
+                                                                                                                                                     ifelse(BM.combo@meta.data$orig.ident == "HD2","HD",
+                                                                                                                                                            ifelse(BM.combo@meta.data$orig.ident == "HD3","HD",
+                                                                                                                                                                   ifelse(BM.combo@meta.data$orig.ident == "HD4","HD", "NA"))))))))))))))))))))
+integration.var <- c("orig.ident", "sample_info", "condition") ## + condition 
+BM.combo <- RunHarmony(object = BM.combo, 
+                       group.by.vars = integration.var, 
+                       max.iter.harmony = 30, plot_convergence = FALSE, reduction.save = "harmony")
+#PC = 15
+setwd("/DATA/31/molteni/vexas_bm/results/integration/pc15")
+BM.combo <- RunUMAP(object = BM.combo, seed.use = 123, dims = 1:15, 
+                    reduction = "harmony", reduction.name = "harmony_umap", reduction.key = "UMAPh_", return.model = TRUE)
+g <- DimPlot(BM.combo, split.by = "orig.ident", group.by = "sample_info", reduction = "harmony_umap", raster = F, ncol = 6) + ggtitle("UMAP post-harmony")
+ggsave(g, filename = 'UMAP_PostHarmony_donor_cond.png', width = 12, height = 6) 
+# split by
+Idents(BM.combo) <- "sample_info"
+g <- DimPlot(BM.combo, split.by = "sample_info", reduction = "harmony_umap") + ggtitle("UMAP post-harmony")
+ggsave(g, filename = 'UMAP_PostHarmony_batch_cond2.png', width = 7, height = 6)
+Idents(BM.combo) <- "condition"
+g <- DimPlot(BM.combo, split.by = "condition", reduction = "harmony_umap") + ggtitle("UMAP post-harmony")
+ggsave(g, filename = 'UMAP_PostHarmony_cond_2.png', width = 7, height = 6)
+#PC=15
+BM.combo <- FindNeighbors(object = BM.combo,
+                          dims = 1:15,
+                          force.recalc = T,
+                          reduction = "harmony",
+                          graph.name = c("RNA_nn_h.", "RNA_snn_h."))
+BM.combo <- FindClusters(object = BM.combo, graph.name = "RNA_snn_h.", resolution = 0.4)
+BM.combo <- FindClusters(object = BM.combo, graph.name = "RNA_snn_h.", resolution = 0.6)
+BM.combo <- FindClusters(object = BM.combo, graph.name = "RNA_snn_h.", resolution = 0.8)
+# SPLIT CLUSTER 20 OF RES=0.8
+Idents(BM.combo) <- "RNA_snn_h._res.0.8"
+BM.combo <- FindSubCluster(BM.combo, cluster= "20", graph.name = "RNA_snn_h.", subcluster.name = "subcluster", resolution = 0.05)
+table(BM.combo$subcluster)
+Idents(BM.combo) <- "subcluster"
+mapping_vector <- c("20_0" = "20", 
+                    "20_1" = "25")
+# Update the reference object 
+for(old_ident in names(mapping_vector)) {
+  BM.combo$subcluster[Idents(BM.combo) == old_ident] <- mapping_vector[old_ident]
+}
+BM.combo$RNA_snn_h._res.0.8 <- BM.combo$subcluster
+BM.combo@misc$markers <- FindAllMarkers(object = BM.combo, min.pct = 0.2, logfc.threshold = 0.25, only.pos = FALSE,
+                           min.cells.group = 10, test.use = "wilcox", return.thresh = 0.05, fc.name = "avg_logFC")
+markers_h <- list(BM.combo@misc$markers_h)
+names(markers_h) <- "RNA_snn_h._res.0.8"
+BM.combo@misc$markers_h <- markers_h
+saveRDS(BM.combo, file = "/DATA/31/molteni/vexas_bm/results/integration/pc15/BM.combo_final.rds")
+#custom cluster annotation
+Idents(BM.combo) <- "RNA_snn_h._res.0.8"
+# mapping vector
+new.cluster.ids <- c("CD8_T_cells","Early_erythroid","CD14+_monocytes_1","CD4_T_cells",
+                     "GP","Immature_Neutrophil","HSC_MPP","VEXAS_NK_cells","B_cells","MEP",
+                     "Erythroid_progenitors","Mid_erythroid_cells","Prolif_GP","VEXAS_GP",
+                     "CD14+_monocytes_2","Early_erythroid","Plasma_cells","Late_erythroid_cells",
+                     "CD14+_CD16_low_monocytes","Myeloid_dendritic_cells","Plasmacytoid_dentritic_cells",
+                     "CD8_T_cells","MLP","MEP_BASO_MAST_prog","Endothelial_cells","B_cells")
+cluster_mapping <- setNames(new.cluster.ids, c(1:length(new.cluster.ids)))
+BM.combo$celltypes_annot <- new.cluster.ids[as.numeric(as.character(BM.combo$RNA_snn_h._res.0.8)) + 1]
+Idents(BM.combo) <- "celltypes_annot"
+ggsave("UMAP_label.png", width = 10, height = 7)
+DimPlot(BM.combo, reduction = "harmony_umap", label = TRUE, label.box = TRUE,  pt.size = 0.5, repel = TRUE) + NoLegend()
+saveRDS(BM.combo, file = "/DATA/31/molteni/vexas_bm/results/integration/pc15/BM.combo_anno.rds")
--- a/BM/markers_BM.R
+++ b/BM/markers_BM.R
+library(Seurat)
+library(dplyr)
+BM.combo <- readRDS("/DATA/31/molteni/vexas_bm/results/integration/pc15/BM.combo_anno.rds")
+BM.combo@meta.data$mutations <- with(BM.combo@meta.data,
+                                     ifelse(BM.combo@meta.data$orig.ident == "BM-01","THR",
+                                            ifelse(BM.combo@meta.data$orig.ident == "BM-02","VAL",
+                                                   ifelse(BM.combo@meta.data$orig.ident == "BM-03","VAL", 
+                                                          ifelse(BM.combo@meta.data$orig.ident == "BM-04","LEU",
+                                                                 ifelse(BM.combo@meta.data$orig.ident == "BM-08","VAL",
+                                                                        ifelse(BM.combo@meta.data$orig.ident == "BM-09","LEU",
+                                                                               ifelse(BM.combo@meta.data$orig.ident == "PT1","VAL",
+                                                                                      ifelse(BM.combo@meta.data$orig.ident == "PT2","THR",
+                                                                                             ifelse(BM.combo@meta.data$orig.ident == "PT3","THR",
+                                                                                                    ifelse(BM.combo@meta.data$orig.ident == "PT4","THR",
+                                                                                                           ifelse(BM.combo@meta.data$orig.ident == "PT5","THR",
+                                                                                                                  ifelse(BM.combo@meta.data$orig.ident == "PT6","LEU",
+                                                                                                                         ifelse(BM.combo@meta.data$orig.ident == "PT7","LEU",
+                                                                                                                                ifelse(BM.combo@meta.data$orig.ident == "PT8","THR",
+                                                                                                                                       ifelse(BM.combo@meta.data$orig.ident == "PT9","THR",
+                                                                                                                                                     ifelse(BM.combo@meta.data$orig.ident == "HD1","CTRL",
+                                                                                                                                                            ifelse(BM.combo@meta.data$orig.ident == "HD2","CTRL",
+                                                                                                                                                                   ifelse(BM.combo@meta.data$orig.ident == "HD3","CTRL",
+                                                                                                                                                                          ifelse(BM.combo@meta.data$orig.ident == "HD4","CTRL","NA"))))))))))))))))))))
+#"BM-01","BM-02","BM-03","BM-04", "BM-08","BM-09",
+#COMPARISON THR VEXAS PATIENTS vs HEALTHY CONTROLS WU
+#select only the subjects involved in the comparison by mutations
+Idents(BM.combo) <- "orig.ident"
+BM.combo_subset <- subset(BM.combo, idents = c("BM-01","HD1", "HD2", "HD3", "HD4"))
+clusters <- unique(BM.combo_subset$celltypes_annot)
+Idents(BM.combo_subset) <- "celltypes_annot"
+setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Thr_CTRL/")
+for(i in clusters) {
+  cells_group1 = sum(BM.combo_subset$mutations == "THR" & BM.combo_subset$celltypes_annot == i)
+  cells_group2 = sum(BM.combo_subset$mutations == "CTRL" & BM.combo_subset$celltypes_annot == i)
+  if(cells_group1 >= 3 & cells_group2 >= 3 ) {
+    markers <- FindMarkers(BM.combo_subset, ident.1 = "THR", group.by = "mutations", subset.ident = i)
+    markers['cluster'] <- i
+    filename=paste("markers_cluster_",i,".csv", sep="")
+    write.csv(markers, file=filename)
+  }
+  else {
+    print(paste("Skipping comparison for", i, "due to insufficient cell count in one or both groups."))
+  }
+}
+setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers/Thr_CTRL")
+df <- list.files(path="/DATA/31/molteni/vexas_bm/results/integration/DE_markers/Thr_CTRL") %>% 
+  lapply(read.csv) %>% 
+  bind_rows
+df <- df[df$p_val_adj<0.05,]
+setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/summary")
+write.csv(df, file="THRvsCTRL_sigMarkers.csv")
+#COMPARISON VAL VEXAS PATIENTS vs HEALTHY CONTROLS WU
+#select only the subjects involved in the comparison
+Idents(BM.combo) <- "orig.ident"
+BM.combo_subset <- subset(BM.combo, idents = c("BM-02","BM-03","BM-08","HD1", "HD2", "HD3", "HD4"))
+clusters <- unique(BM.combo_subset$celltypes_annot)
+Idents(BM.combo_subset) <- "celltypes_annot"
+setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Val_CTRL/")
+for(i in clusters) {
+  cells_group1 = sum(BM.combo_subset$mutations == "VAL" & BM.combo_subset$celltypes_annot == i)
+  cells_group2 = sum(BM.combo_subset$mutations == "CTRL" & BM.combo_subset$celltypes_annot == i)
+  if(cells_group1 >= 3 & cells_group2 >= 3 ) {
+    markers <- FindMarkers(BM.combo_subset, ident.1 = "VAL", group.by = "mutations", subset.ident = i)
+    markers['cluster'] <- i
+    filename=paste("markers_cluster_",i,".csv", sep="")
+    write.csv(markers, file=filename)
+  }
+  else {
+    print(paste("Skipping comparison for", i, "due to insufficient cell count in one or both groups."))
+  }
+}
+setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Val_CTRL")
+df <- list.files(path="/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Val_CTRL") %>% 
+  lapply(read.csv) %>% 
+  bind_rows
+df <- df[df$p_val_adj<0.05,]
+setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/summary")
+write.csv(df, file="VALvsCTRL_sigMarkers.csv")
+#COMPARISON LEU VEXAS PATIENTS vs HEALTHY CONTROLS WU
+#select only the subjects involved in the comparison
+Idents(BM.combo) <- "orig.ident"
+BM.combo_subset <- subset(BM.combo, idents = c("BM-04", "BM-09","HD1", "HD2", "HD3", "HD4"))
+clusters <- unique(BM.combo_subset$celltypes_annot)
+Idents(BM.combo_subset) <- "celltypes_annot"
+setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Leu_CTRL/")
+for(i in clusters) {
+  cells_group1 = sum(BM.combo_subset$mutations == "LEU" & BM.combo_subset$celltypes_annot == i)
+  cells_group2 = sum(BM.combo_subset$mutations == "CTRL" & BM.combo_subset$celltypes_annot == i)
+  if(cells_group1 >= 3 & cells_group2 >= 3 ) {
+    markers <- FindMarkers(BM.combo_subset, ident.1 = "LEU", group.by = "mutations", subset.ident = i)
+    markers['cluster'] <- i
+    filename=paste("markers_cluster_",i,".csv", sep="")
+    write.csv(markers, file=filename)
+  }
+  else {
+    print(paste("Skipping comparison for", i, "due to insufficient cell count in one or both groups."))
+  }
+}
+setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Leu_CTRL")
+df <- list.files(path="/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Leu_CTRL") %>% 
+  lapply(read.csv) %>% 
+  bind_rows
+df <- df[df$p_val_adj<0.05,]
+setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/summary")
+write.csv(df, file="LEUvsCTRL_sigMarkers.csv")
+#COMPARISON VEXAS PATIENTS vs HEALTHY CONTROLS WU
+#select only the subjects involved in the comparison
+Idents(BM.combo) <- "orig.ident"
+BM.combo_subset <- subset(BM.combo, idents = c("BM-01", "BM-02", "BM-03", "BM-04", "BM-08","BM-09","HD1", "HD2", "HD3", "HD4"))
+clusters <- unique(BM.combo_subset$celltypes_annot)
+Idents(BM.combo_subset) <- "celltypes_annot"
+setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Pz_CTRL/")
+for(i in clusters) {
+  cells_group1 = sum(BM.combo_subset$condition == "PZ" & BM.combo_subset$celltypes_annot == i)
+  cells_group2 = sum(BM.combo_subset$condition == "HD" & BM.combo_subset$celltypes_annot == i)
+  if(cells_group1 >= 3 & cells_group2 >= 3 ) {
+    markers <- FindMarkers(BM.combo_subset, ident.1 = "PZ", group.by = "condition", subset.ident = i)
+    markers['cluster'] <- i
+    filename=paste("markers_cluster_",i,".csv", sep="")
+    write.csv(markers, file=filename)
+  }
+  else {
+    print(paste("Skipping comparison for", i, "due to insufficient cell count in one or both groups."))
+  }
+}
+setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Pz_CTRL")
+df <- list.files(path="/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Pz_CTRL") %>% 
+  lapply(read.csv) %>% 
+  bind_rows
+df <- df[df$p_val_adj<0.05,]
+setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/summary")
+write.csv(df, file="PZvsCTRL_sigMarkers.csv")
--- a/BM/vulcano.R
+++ b/BM/vulcano.R
+library(dplyr)
+library(ggplot2)
+library(ggrepel)
+args <- commandArgs(trailingOnly = TRUE)
+file_path <- args[1]
+max_log2FC <- 5
+min_log2FC <- -5
+min_p_val_adj <-  Il valore minimo di p-value adjusted passato da Python
+# Se il minimo p-value adjusted ?? inferiore a 10^-100, fissalo a 10^-100
+min_p_val_adj <- 10^-100
+raw <- read.table("/DATA/31/molteni/vexas_bm/results/integration/DE_markers/Pz_CTRL/markers_cluster_Late_erythroid_cells.csv", header = TRUE, sep = ",", quote = "\"", dec = ".")
+adj.pval.thr <- 0.05
+# Imposta un limite inferiore per i p-value adjusted direttamente nei dati
+raw$p_val_adj <- pmax(raw$p_val_adj, min_p_val_adj)
+data <- data.frame(gene = raw$X, pval = -log10(raw$p_val_adj), lfc = raw$avg_log2FC)
+data <- na.omit(data)
+data <- mutate(data, color = case_when(
+  data$lfc > 0 & data$pval > -log10(adj.pval.thr) ~ "Overexpressed",
+  data$lfc < 0 & data$pval > -log10(adj.pval.thr) ~ "Underexpressed",
+  TRUE ~ "NonSignificant"
+))
+# Seleziona i geni di interesse per etichettarli
+# HBG1/2, ALAS2, HBB, HBD, FECH, NCOA4, SLC25A37, GLRX5, EIF2AK1.
+selected_genes <- c("HBG1" ,"HBG2", "ALAS2", "HBB", "HBD", "FECH", "NCOA4", "SLC25A37", "GLRX5", "EIF2AK1")
+highl <-  data %>% filter(gene %in% selected_genes)
+highl <- head(subset(data, color != "NonSignificant"), 20)
+vol <- ggplot(data, aes(x = lfc, y = pval, colour = color)) +
+  geom_point(size = 2, alpha = 0.8, na.rm = TRUE) +
+  geom_hline(yintercept = -log10(adj.pval.thr), colour = "darkgray", linetype = "dashed") +
+  scale_color_manual(name = "Expression", values = c(Overexpressed = "#CD4F39", Underexpressed = "#008B00", NonSignificant = "darkgray")) +
+  geom_label_repel(data = highl, aes(label = gene), box.padding = 0.35, point.padding = 0.5, size = 5, max.overlaps = 10) +
+  theme_bw(base_size = 12) +
+  theme(legend.position = "right") +
+  xlab("log2 Fold Change") +
+  ylab(expression(-log[10]("adjusted p-value"))) +
+  xlim(min_log2FC, max_log2FC) +
+  ylim(0, max(-log10(min_p_val_adj), -log10(raw$p_val_adj)) + 1) # Assicura che il plot includa tutti i valori di p-value
+# Salva il plot
+ggsave("volcano_late_ery.pdf", plot = vol, width = 11, height = 8, dpi = 300)
--- a/vexas_mouse_3/heatmap.R
+++ b/vexas_mouse_3/heatmap.R
+# HALLMARK GSEA
+library('dplyr')
+library('ggplot2')
+library('reshape2')
+library('RColorBrewer')
+plot.dir="/DATA/31/molteni/vexas_mouse_3/results/heatmap/"
+wdir="/DATA/31/molteni/vexas_mouse_3/results"
+#heatmap custom cluster & order (05/06/24)
+wdir="/DATA/31/molteni/vexas_mouse_3/results"
+plot.dir="/DATA/31/molteni/vexas_mouse_3/results/heatmap/"
+Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_3/results/Full/01-seurat/Full_final_annot_filt.rds")
+hallmark_ds <- c("A_C")
+clusters <- unique(Full_final$celltypes_annot)
+hallmark.full <- data.frame()
+for (ds in hallmark_ds) {
+  for (i in clusters) {
+    s <- paste(ds,"markers_cluster", i,sep="_") #qui cambiare 1 con i e inserire ciclo for
+    ff <- paste(wdir, "GSEA", "AvsC",s, "tables", paste(s, "NA-gsea-h.all.v7.2.symbols.txt", sep = "-"), sep = "/")
+    ds.t <- read.table(ff, header = T, sep = "\t")
+    ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
+    ds.t.filt$Dataset <- paste(ds,i,sep="_")
+    if (nrow(hallmark.full) == 0) {
+      hallmark.full <- ds.t.filt
+    } else {
+      hallmark.full <- rbind(hallmark.full, ds.t.filt)
+    }
+  }
+}
+hallmark.full.filt <- hallmark.full[, c("ID", "NES", "Dataset","p.adjust")]
+##
+all_hallmark <- hallmark.full.filt
+all_hallmark$Dataset <- gsub("A_C", "VEXAS-like_WT", all_hallmark$Dataset)
+all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", ""))  # Create column of significance labels
+all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
+hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
+hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
+all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
+colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
+all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
+colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
+all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
+all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
+level_order <- c("VEXAS-like_WT_Erythroid_prog", "VEXAS-like_WT_Prolif_Myelo_NK_prec", "VEXAS-like_WT_Promyelocyte",
+                 "VEXAS-like_WT_Monocyte_biased_prog","VEXAS-like_WT_pDC_biased_prog","VEXAS-like_WT_GMP_2",
+                 "VEXAS-like_WT_GMP_1","VEXAS-like_WT_CMP","VEXAS-like_WT_HSC_MPP",
+                 "VEXAS-like_WT_LMPP","VEXAS-like_WT_CLP", "VEXAS-like_WT_Prolif_lymphoid_prec", 
+                 "VEXAS-like_WT_B_cell_progenitors_1", "VEXAS-like_WT_B_cell_progenitors_2")
+all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
+p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
+  theme_bw(base_size = 12) +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+  theme(axis.text.y=element_text(size=6))+
+  geom_tile(aes(fill = NES), colour = "white") +
+  geom_text(aes(label = STARS), color="black", size=3) +
+  scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
+                       limits = c(-5, 5),
+                       na.value = "grey"
+                       #guide = guide_colourbar(reverse = TRUE)
+  ) +
+  ylab("") + 
+  xlab("") +
+  coord_fixed(ratio = 0.4)
+ggsave(filename = paste(plot.dir, paste("hallmark_AvsC_ordered_last", "heatmap.pdf", sep = "-"), sep = "/"), 
+       plot = p.hallmark,
+       width = 10, height = 8, dpi = 600)
+#heatmap specific comparison GMP2 vs GMP1 (05/06/24)
+wdir="/DATA/31/molteni/vexas_mouse_3/results"
+plot.dir="/DATA/31/molteni/vexas_mouse_3/results/heatmap/"
+Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_3/results/Full/01-seurat/Full_final_annot_filt.rds")
+hallmark_ds <- c("A", "C", "Full")
+clusters <- unique(Full_final$celltypes_annot)
+hallmark.full <- data.frame()
+for (ds in hallmark_ds) {
+  #for (i in clusters) {
+    #s <- paste("_",ds,sep="") #qui cambiare 1 con i e inserire ciclo for
+    ff <- paste(wdir, "GSEA_GMP2_GMP1", paste("GMP2vsGMP1" , ds , sep="_"),"tables", paste(paste("GMP2vsGMP1" , ds , sep="_"),"NA-gsea-h.all.v7.2.symbols.txt", sep = "-"), sep = "/")
+    ds.t <- read.table(ff, header = T, sep = "\t")
+    ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
+    ds.t.filt$Dataset <- paste(ds,"GMP2vsGMP1",sep="_")
+    if (nrow(hallmark.full) == 0) {
+      hallmark.full <- ds.t.filt
+    } else {
+      hallmark.full <- rbind(hallmark.full, ds.t.filt)
+    #}
+  }
+}
+hallmark.full.filt <- hallmark.full[, c("ID", "NES", "Dataset","p.adjust")]
+##
+all_hallmark <- hallmark.full.filt
+all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", ""))  # Create column of significance labels
+all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
+hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
+hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
+all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
+colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
+all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
+colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
+all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
+all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
+level_order <- c("A_GMP2vsGMP1", "C_GMP2vsGMP1", "Full_GMP2vsGMP1")
+all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
+p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
+  theme_bw(base_size = 12) +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+  theme(axis.text.y=element_text(size=6))+
+  geom_tile(aes(fill = NES), colour = "white") +
+  geom_text(aes(label = STARS), color="black", size=3) +
+  scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
+                       limits = c(-5, 5),
+                       na.value = "grey"
+                       #guide = guide_colourbar(reverse = TRUE)
+  ) +
+  ylab("") + 
+  xlab("") +
+  coord_fixed(ratio = 0.4)
+ggsave(filename = paste(plot.dir, paste("hallmark_GMP2_GMP1", "heatmap.pdf", sep = "-"), sep = "/"), 
+       plot = p.hallmark,
+       width = 10, height = 8, dpi = 600)
+#heatmap specific comparison Bprog1 vs BProg2 (05/06/24)
+wdir="/DATA/31/molteni/vexas_mouse_3/results"
+plot.dir="/DATA/31/molteni/vexas_mouse_3/results/heatmap/"
+Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_3/results/Full/01-seurat/Full_final_annot_filt.rds")
+hallmark_ds <- c("A", "C", "Full")
+clusters <- unique(Full_final$celltypes_annot)
+hallmark.full <- data.frame()
+for (ds in hallmark_ds) {
+  #for (i in clusters) {
+  #s <- paste("_",ds,sep="") #qui cambiare 1 con i e inserire ciclo for
+  ff <- paste(wdir, "GSEA_BP1_BP2", paste("BP1vsBP2" , ds , sep="_"),"tables", paste(paste("BP1vsBP2" , ds , sep="_"),"NA-gsea-h.all.v7.2.symbols.txt", sep = "-"), sep = "/")
+  ds.t <- read.table(ff, header = T, sep = "\t")
+  ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
+  ds.t.filt$Dataset <- paste(ds,"BP1vsBP2",sep="_")
+  if (nrow(hallmark.full) == 0) {
+    hallmark.full <- ds.t.filt
+  } else {
+    hallmark.full <- rbind(hallmark.full, ds.t.filt)
+    #}
+  }
+}
+hallmark.full.filt <- hallmark.full[, c("ID", "NES", "Dataset","p.adjust")]
+##
+all_hallmark <- hallmark.full.filt
+all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", ""))  # Create column of significance labels
+all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
+hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
+hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
+all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
+colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
+all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
+colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
+all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
+all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
+level_order <- c("A_BP1vsBP2", "C_BP1vsBP2", "Full_BP1vsBP2")
+all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
+p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
+  theme_bw(base_size = 12) +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+  theme(axis.text.y=element_text(size=6))+
+  geom_tile(aes(fill = NES), colour = "white") +
+  geom_text(aes(label = STARS), color="black", size=3) +
+  scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
+                       limits = c(-5, 5),
+                       na.value = "grey"
+                       #guide = guide_colourbar(reverse = TRUE)
+  ) +
+  ylab("") + 
+  xlab("") +
+  coord_fixed(ratio = 0.4)
+ggsave(filename = paste(plot.dir, paste("hallmark_BP1_BP2", "heatmap.pdf", sep = "-"), sep = "/"), 
+       plot = p.hallmark,
+       width = 10, height = 8, dpi = 600)
--- a/vexas_mouse_3/markers_mouse.R
+++ b/vexas_mouse_3/markers_mouse.R
+library(Seurat)
+library(dplyr)
+Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_3/results/Full/01-seurat/Full_final.rds")
+# computation on cluster numbers
+setwd("/DATA/31/molteni/vexas_mouse_3/results/DE_markers/")
+Idents(Full_final)<-"RNA_snn_h.orig.ident_res.0.6"
+clusters <- levels(Full_final@meta.data$RNA_snn_h.orig.ident_res.0.6)
+for(i in 0:length(clusters)) {
+  markers <- FindMarkers(Full_final, ident.1 = "A", group.by = "orig.ident", subset.ident = i)
+  markers['cluster'] <- i
+  filename=paste("markers_cluster_",i,".csv", sep="")
+  write.csv(markers, file=filename)
+}
+df <- list.files(path='/DATA/31/molteni/vexas_mouse_3/results/DE_markers/') %>% 
+  lapply(read.csv) %>% 
+  bind_rows
+df <- df[df$p_val_adj<0.05,]
+write.csv(df, file="AvsC_sigMarkers.csv")
+cells_by_sample <- table(Full_final@meta.data$RNA_snn_h.orig.ident_res.0.6, Full_final@meta.data$orig.ident)
+write.csv(cells_by_sample, file="cells_bysample.csv")
+# computation on custom annotation
+Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_3/results/Full/01-seurat/Full_final_annot.rds")
+setwd("/DATA/31/molteni/vexas_mouse_3/results/DE_markers2/new/")
+clusters <- unique (Full_final$celltypes_annot)
+for(i in clusters) {
+  markers <- FindMarkers(Full_final, ident.1 = "A", group.by = "orig.ident", subset.ident = i)
+  markers['cluster'] <- i
+  filename=paste("markers_cluster_",i,".csv", sep="")
+  write.csv(markers, file=filename)
+}
+df <- list.files(path='/DATA/31/molteni/vexas_mouse_3/results/DE_markers2/new/') %>% 
+  lapply(read.csv) %>% 
+  bind_rows
+df <- df[df$p_val_adj<0.05,]
+setwd("/DATA/31/molteni/vexas_mouse_3/results/summary")
+write.csv(df, file="AvsC_sigMarkers_custom.csv")
+cc_distribution <-as.data.frame (table(Full_final$Phase, Full_final$orig.ident, Full_final$celltypes_annot))
+colnames(cc_distribution) <- c("Phase","orig.ident","cluster","CellNumb")
+write.csv(cc_distribution, file="cell_cycle.csv")
+# computation on custom annotation after filtering cluster 19, res 1.2
+Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_3/results/Full/01-seurat/Full_final_annot_filt.rds")
+setwd("/DATA/31/molteni/vexas_mouse_3/results/DE_markers/AvsC/")
+clusters <- unique (Full_final$celltypes_annot)
+for(i in clusters) {
+  markers <- FindMarkers(Full_final, ident.1 = "A", group.by = "orig.ident", subset.ident = i)
+  markers['cluster'] <- i
+  filename=paste("markers_cluster_",i,".csv", sep="")
+  write.csv(markers, file=filename)
+}
+df <- list.files(path='/DATA/31/molteni/vexas_mouse_3/results/DE_markers/AvsC/') %>% 
+  lapply(read.csv) %>% 
+  bind_rows
+df <- df[df$p_val_adj<0.05,]
+setwd("/DATA/31/molteni/vexas_mouse_3/results/summary")
+write.csv(df, file="AvsC_sigMarkers_custom.csv")
+cells_by_sample <- table(Full_final@meta.data$celltypes_annot, Full_final@meta.data$orig.ident)
+write.csv(cells_by_sample, file="cells_bysample.csv")
+cc_distribution <-as.data.frame (table(Full_final$Phase, Full_final$orig.ident, Full_final$celltypes_annot))
+colnames(cc_distribution) <- c("Phase","orig.ident","cluster","CellNumb")
+write.csv(cc_distribution, file="cell_cycle.csv")
+#markers tra cluster specifici
+## GMP2 VS GMP2
+setwd("/DATA/31/molteni/vexas_mouse_3/results/DE_GMP2_GMP1")
+Idents(Full_final) <- "celltypes_annot"
+markers <- FindMarkers(Full_final, ident.1 = "GMP_2", ident.2 = "GMP_1")
+write.csv(markers, file="GMP2vsGMP1_Full.csv")
+Idents(Full_final) <- "orig.ident"
+Full_final_A <- subset(Full_final, idents = "A")
+Idents(Full_final_A) <- "celltypes_annot"
+markers <- FindMarkers(Full_final_A, ident.1 = "GMP_2", ident.2 = "GMP_1")
+write.csv(markers, file="GMP2vsGMP1_A.csv")
+Idents(Full_final) <- "orig.ident"
+Full_final_C <- subset(Full_final, idents ="C")
+Idents(Full_final_C) <- "celltypes_annot"
+markers <- FindMarkers(Full_final_C, ident.1 = "GMP_2", ident.2 = "GMP_1")
+write.csv(markers, file="GMP2vsGMP1_C.csv")
+## B Cell Prog 1 vs B Cell Prog 2
+setwd("/DATA/31/molteni/vexas_mouse_3/results/DE_BP1_BP2")
+Idents(Full_final) <- "celltypes_annot"
+markers <- FindMarkers(Full_final, ident.1 = "B_cell_progenitors_1", ident.2 = "B_cell_progenitors_2")
+write.csv(markers, file="BP1vsBP2_Full.csv")
+Idents(Full_final_A) <- "celltypes_annot"
+markers <- FindMarkers(Full_final_A, ident.1 = "B_cell_progenitors_1", ident.2 = "B_cell_progenitors_2")
+write.csv(markers, file="BP1vsBP2_A.csv")
+Idents(Full_final_C) <- "celltypes_annot"
+markers <- FindMarkers(Full_final_C, ident.1 = "B_cell_progenitors_1", ident.2 = "B_cell_progenitors_2")
+write.csv(markers, file="BP1vsBP2_C.csv")
+setwd("/DATA/31/molteni/vexas_mouse_3/results/DE_markers/AvsC/")
+df <- list.files(path='/DATA/31/molteni/vexas_mouse_3/results/DE_markers/AvsC/') %>% 
+  lapply(read.csv) %>% 
+  bind_rows
+#df <- df[df$p_val_adj<0.05,]
+top50 <- df %>%
+  group_by(cluster) %>%
+  dplyr::filter(avg_log2FC > 0) %>%
+  slice_head(n = 50) %>%
+  ungroup() -> top50
+write.csv(top50, file="top50_by_cluster.csv")
--- a/vexas_mouse_3/pseudotime.R
+++ b/vexas_mouse_3/pseudotime.R
+obj <- readRDS ("/DATA/31/molteni/vexas_mouse_3/results/Full/01-seurat/Full_final_annot_filt.rds")
+assay = "RNA"
+reduction <- toupper(paste0("umap.harmony.orig.ident"))
+clu_res <- "celltypes_annot"
+obj <- SetIdent(obj, value = clu_res)
+# convert to scExperiment object
+obj_sc <- as.SingleCellExperiment(obj)
+out_dir <- "/DATA/31/molteni/vexas_mouse_3/results/pseudotime_filt2_FINAL/"
+cell_pal <- function(cell_vars, pal_fun,...) {
+  if (is.numeric(cell_vars)) {
+    pal <- pal_fun(100, ...)
+    return(pal[cut(cell_vars, breaks = 100)])
+  } else {
+    categories <- sort(unique(cell_vars))
+    pal <- setNames(pal_fun(length(categories), ...), categories)
+    return(pal[cell_vars])
+  }
+}
+#my_cols <- hue_pal()(length(levels(obj_sc[[clu_res]])))
+#my_cols  <- colorRampPalette(pal_npg("nrc")(10))(16) # nature publishing group palette
+my_cols <- colorRampPalette(brewer.pal(12, "Paired"))(length(levels(obj_sc[[clu_res]])))
+my_cols <- brewer.pal(12, "Paired")[1:14] #inserire numero di cluster
+names(my_cols) <- levels(obj_sc[[clu_res]])
+#cell_colors_clust <- "black"
+cell_colors_clust <- my_cols[obj_sc[[clu_res]]]
+# Settings: resolution 1.2 harmony - starting point - cluster 3
+namex <- toupper("umap.harmony.orig.ident")
+scs <- slingshot(data = obj_sc,
+                 reducedDim = reduction, 
+                 start.clus = "HSC_MPP", 
+                 extend = 'n', 
+                 clusterLabels = obj_sc[[clu_res]])
+pal <- viridis::inferno(100,end = 0.95)
+sl_name <- 'Slingshot'
+pt <- slingPseudotime(scs)
+head(pt)
+#se clu_res non è un numero#####
+# Assuming clu_res is a character vector with cluster assignments like "A", "B", "C", etc.
+# Convert it to a factor
+factor_clu_res <- as.factor(obj_sc[[clu_res]])
+# Then convert the factor to numeric
+# The levels will be assigned numbers in the order they appear in the factor
+numeric_clu_res <- as.numeric(factor_clu_res)
+# Now you can use numeric_clu_res to index into your colors
+cell_colors_clust <- my_cols[numeric_clu_res]
+#grafici
+for (num_curve in seq(1, length(colnames(pt)))) {
+  lineagename <- paste('Curve',as.character(num_curve),sep='')
+  colors <- pal[cut(pt[,num_curve], breaks = 100)]
+  # Set the filename and create a PNG device
+  pdf(paste(out_dir, paste0(sl_name, "_branch_", num_curve, ".pdf"), sep = "/"), width = 1500/90, height = 750/90) 
+  # Start a new plot
+  plot.new()
+  #grid.newpage()
+  pushViewport(viewport(layout = grid.layout(1, 2))) # Adjusted layout grid to 1 row and 2 columns
+  # Pseudotime plot with adjusted margins
+  pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 1))
+  par(mar = c(5, 5, 5, 2) + 0.1) # Adjust margins here; default is c(5, 4, 4, 2) + 0.1
+  par(fig = gridFIG(), new = TRUE)
+  plot(reducedDim(scs, type = namex), col = colors, 
+       pch = 16, cex = 1, main = "Pseudotime", cex.axis=2.0, cex.lab=2.0, cex.main=2.2) # Adjusted cex.lab for label size
+  lines(SlingshotDataSet(scs), type = 'lineages', show.constraints = TRUE, col = "black")
+  lines(slingCurves(scs)[[num_curve]], lwd = 2, col = 'red')
+  popViewport()
+  # Sample density plot with adjusted margins
+  pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 2))
+  par(mar = c(5, 5, 5, 2) + 0.1) # Adjust margins here; default is c(5, 4, 4, 2) + 0.1
+  par(fig = gridFIG(), new = TRUE)
+  ds <- list()
+  for (samp in unique(colData(scs)$sample_label)) {
+    ds[[samp]] <- density(pt[colData(scs)$sample_label == samp, num_curve], na.rm = TRUE)
+  }
+  # Setting the plot limits
+  x_min <- min(sapply(ds, function(x) min(x$x, na.rm = TRUE)))
+  x_max <- max(sapply(ds, function(x) max(x$x, na.rm = TRUE)))
+  y_min <- min(sapply(ds, function(x) min(x$y, na.rm = TRUE)))
+  y_max <- max(sapply(ds, function(x) max(x$y, na.rm = TRUE)))
+  xlim <- range(c(x_min, x_max))
+  ylim <- range(c(y_min, y_max))
+  plot(xlim, ylim, type = "n", xlab = "Pseudotime", ylab = "", main = paste0("Sample density on branch ", num_curve),
+       cex.axis=2.0, cex.lab=2.0, cex.main=2.2)
+  for (samp in names(ds)) {
+    polygon(c(min(ds[[samp]]$x), ds[[samp]]$x, max(ds[[samp]]$x)), c(0, ds[[samp]]$y, 0),
+            col = alpha(brewer.pal(length(names(ds)), "Set1")[which(names(ds) == samp)], alpha = .5))
+  }
+  legend("topright", legend = names(ds), fill = alpha(brewer.pal(length(names(ds)), "Set1"), alpha = .5), bty = "n", cex = 2.0)
+  popViewport()
+  # Close the graphical device
+  dev.off()
+}
--- a/vexas_mouse_4/heatmap.R
+++ b/vexas_mouse_4/heatmap.R
+# HALLMARK GSEA
+library('dplyr')
+library('ggplot2')
+library('reshape2')
+library('RColorBrewer')
+plot.dir="/DATA/31/molteni/vexas_mouse_4/results/heatmap/"
+wdir="/DATA/31/molteni/vexas_mouse_4/results"
+# 
+# #heatmap custom cluster
+# 
+# 
+plot.dir="/DATA/31/molteni/vexas_mouse_4/results/heatmap/"
+wdir="/DATA/31/molteni/vexas_mouse_4/results"
+#custom
+Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_4/results/Full/01-seurat/Full_final_annot.rds")
+hallmark_ds <- c("A_B")
+clusters <- unique(Full_final$celltypes_annot)
+hallmark.full <- data.frame()
+for (ds in hallmark_ds) {
+  for (i in clusters) {
+    s <- paste(ds,"markers_cluster", i,sep="_") #qui cambiare 1 con i e inserire ciclo for
+    ff <- paste(wdir, "GSEA", "AvsB",s, "tables", paste(s, "NA-gsea-h.all.v7.2.sym_mouse.txt", sep = "-"), sep = "/")
+    ds.t <- read.table(ff, header = T, sep = "\t")
+    ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
+    ds.t.filt$Dataset <- paste(ds,i,sep="_")
+    if (nrow(hallmark.full) == 0) {
+      hallmark.full <- ds.t.filt
+    } else {
+      hallmark.full <- rbind(hallmark.full, ds.t.filt)
+    }
+  }
+}
+hallmark.full.filt <- hallmark.full[, c("ID", "NES", "Dataset","p.adjust")]
+##
+all_hallmark <- hallmark.full.filt
+all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", ""))  # Create column of significance labels
+all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
+hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
+hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
+all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
+colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
+all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
+colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
+all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
+all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
+level_order <- c("A_B_HSC_MPP", "A_B_CMP", "A_B_GMP", "A_B_GMP_MEP", "A_B_Myelocytes", "A_B_Neutrophils", 
+                 "A_B_MEP_Baso_Mast_cells", "A_B_Plasmacytoid_DC_prog", "A_B_Conventional_DC_prog", "A_B_Proliferating_myeloid_cells",
+                 "A_B_Pro_monocyte", "A_B_Monocytes", "A_B_Dendritic_cells", "A_B_Macrophages", "A_B_Erythroid_cells",
+                 "A_B_Undefined_1", "A_B_Undefined_2")
+all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
+p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
+  theme_bw(base_size = 12) +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+  theme(axis.text.y=element_text(size=6))+
+  geom_tile(aes(fill = NES), colour = "white") +
+  geom_text(aes(label = STARS), color="black", size=3) +
+  scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
+                       limits = c(-4.5, 4.5),
+                       na.value = "grey"
+                       #guide = guide_colourbar(reverse = TRUE)
+  ) +
+  ylab("") + 
+  xlab("") +
+  coord_fixed(ratio = 0.4)
+ggsave(filename = paste(plot.dir, paste("hallmark_AvsB", "heatmap.pdf", sep = "-"), sep = "/"), 
+       plot = p.hallmark,
+       width = 10, height = 10, dpi = 600)
--- a/vexas_mouse_4/markers.R
+++ b/vexas_mouse_4/markers.R
+library(Seurat)
+library(dplyr)
+Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_4/results/Full/01-seurat/Full_final.rds")
+# #########################################################################################################################
+# # computation on custom annotation
+# 
+Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_4/results/Full/01-seurat/Full_final_annot.rds")
+setwd("/DATA/31/molteni/vexas_mouse_4/results/DE_markers/AvsB/")
+clusters <- unique(Full_final@meta.data$celltypes_annot)
+for(i in clusters) {
+  markers <- FindMarkers(Full_final, ident.1 = "Pool_A_m", group.by = "orig.ident", subset.ident = i)
+  markers['cluster'] <- i
+  filename=paste("markers_cluster_",i,".csv", sep="")
+  write.csv(markers, file=filename)
+}
+df <- list.files(path="/DATA/31/molteni/vexas_mouse_4/results/DE_markers/AvsB") %>% 
+  lapply(read.csv) %>% 
+  bind_rows
+df <- df[df$p_val_adj<0.05,]
+setwd("/DATA/31/molteni/vexas_mouse_4/results/summary/")
+write.csv(df, file="PoolA_mvsPoolB_m_sigMarkers.csv")
+cells_by_sample <- table(Full_final@meta.data$celltypes_annot, Full_final@meta.data$orig.ident)
+write.csv(cells_by_sample, file="cells_bysample.csv")
+cc_distribution <-as.data.frame (table(Full_final$Phase, Full_final$orig.ident, Full_final$celltypes_annot))
+colnames(cc_distribution) <- c("Phase","orig.ident","cluster","CellNumb")
+write.csv(cc_distribution, file="cell_cycle.csv")
--- a/vexas_sorted_4/heatmap_vexas_sorted_4.R
+++ b/vexas_sorted_4/heatmap_vexas_sorted_4.R
+# HALLMARK GSEA
+library('dplyr')
+library('ggplot2')
+library('reshape2')
+library('RColorBrewer')
+plot.dir="/DATA/31/molteni/vexas_sorted_4/results/heatmap/"
+wdir="/DATA/31/molteni/vexas_sorted_4/results"
+# #heatmap custom cluster
+# 
+Full_final <- readRDS("/DATA/31/molteni/vexas_sorted_4/results/Full/01-seurat/Full_final_annot.rds")
+hallmark_ds <- c("A_B")
+clusters <- unique(Full_final$celltypes_annot)
+hallmark.full <- data.frame()
+for (ds in hallmark_ds) {
+  for (i in clusters) {
+    s <- paste(ds,"markers_cluster", i,sep="_") #qui cambiare 1 con i e inserire ciclo for
+    ff <- paste(wdir, "GSEA", "AvsB",s, "tables", paste(s, "NA-gsea-h.all.v7.2.symbols.txt", sep = "-"), sep = "/")
+    ds.t <- read.table(ff, header = T, sep = "\t")
+    ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
+    ds.t.filt$Dataset <- paste(ds,i,sep="_")
+    if (nrow(hallmark.full) == 0) {
+      hallmark.full <- ds.t.filt
+    } else {
+      hallmark.full <- rbind(hallmark.full, ds.t.filt)
+    }
+  }
+}
+hallmark.full.filt <- hallmark.full[, c("ID", "NES", "Dataset","p.adjust")]
+##
+all_hallmark <- hallmark.full.filt
+all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", ""))  # Create column of significance labels
+all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
+hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
+hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
+all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
+colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
+all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
+colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
+all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
+all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
+level_order <- c("A_B_Erythroid_cells", "A_B_Plasmacytoid_DC", "A_B_Conv_DC1", "A_B_Conv_DC2", "A_B_Monocytes",
+                 "A_B_GMP", "A_B_MEP_Baso_Mast", "A_B_HSC_MPP", "A_B_Cycling_prog", "A_B_Cycling_lymph_prog", "A_B_PreBNK",
+                 "A_B_ProB_cells_1","A_B_ProB_cells_2","A_B_ProB_cells_3", "A_B_Immature_B_cells", "A_B_Undefined1", "A_B_Undefined2","A_B_Undefined3")
+all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
+p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
+  theme_bw(base_size = 12) +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+  theme(axis.text.y=element_text(size=6))+
+  geom_tile(aes(fill = NES), colour = "white") +
+  geom_text(aes(label = STARS), color="black", size=3) +
+  scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
+                       limits = c(-5, 5),
+                       na.value = "grey"
+                       #guide = guide_colourbar(reverse = TRUE)
+  ) +
+  ylab("") + 
+  xlab("") +
+  coord_fixed(ratio = 0.4)
+ggsave(filename = paste(plot.dir, paste("hallmark_AvsB", "heatmap.pdf", sep = "-"), sep = "/"), 
+       plot = p.hallmark,
+       width = 10, height = 8, dpi = 600)
--- a/vexas_sorted_4/markers_vexas_sorted_4.R
+++ b/vexas_sorted_4/markers_vexas_sorted_4.R
+library(Seurat)
+library(dplyr)
+Full_final <- readRDS("/DATA/31/molteni/vexas_sorted_4/results/Full/01-seurat/Full_final.rds")
+# computation on cluster numbers 
+# res 0.6
+setwd("/DATA/31/molteni/vexas_sorted_4/results/DE_markers_06/")
+Idents(Full_final)<-"RNA_snn_h.orig.ident_res.0.6"
+clusters <- levels(Full_final@meta.data$RNA_snn_h.orig.ident_res.0.6)
+for(i in 0:length(clusters)) {
+  markers <- FindMarkers(Full_final, ident.1 = "Pool_A_h", group.by = "orig.ident", subset.ident = i)
+  markers['cluster'] <- i
+  filename=paste("markers_cluster_",i,".csv", sep="")
+  write.csv(markers, file=filename)
+}
+df <- list.files(path='/DATA/31/molteni/vexas_sorted_4/results/DE_markers_06/') %>% 
+  lapply(read.csv) %>% 
+  bind_rows
+df <- df[df$p_val_adj<0.05,]
+setwd("/DATA/31/molteni/vexas_sorted_4/results/summary/")
+write.csv(df, file="PoolA_hvsPoolB_h_sigMarkers_06.csv")
+cells_by_sample <- table(Full_final@meta.data$RNA_snn_h.orig.ident_res.0.6, Full_final@meta.data$orig.ident)
+write.csv(cells_by_sample, file="cells_bysample.csv")
+cc_distribution <-as.data.frame (table(Full_final$Phase, Full_final$orig.ident, Full_final$RNA_snn_h.orig.ident_res.0.6))
+colnames(cc_distribution) <- c("Phase","orig.ident","cluster","CellNumb")
+write.csv(cc_distribution, file="cell_cycle_06.csv")
+### res 1.2
+setwd("/DATA/31/molteni/vexas_sorted_4/results/DE_markers_12/")
+Idents(Full_final)<-"RNA_snn_h.orig.ident_res.1.2"
+clusters <- levels(Full_final@meta.data$RNA_snn_h.orig.ident_res.1.2)
+for(i in clusters) {
+  cells_group1 = sum(Full_final$orig.ident == "Pool_A_h" & Full_final$RNA_snn_h.orig.ident_res.1.2 == i)
+  cells_group2 = sum(Full_final$orig.ident == "Pool_B_h" & Full_final$RNA_snn_h.orig.ident_res.1.2 == i)
+  if(cells_group1 >= 3 & cells_group2 >= 3 ) {
+    markers <- FindMarkers(Full_final, ident.1 = "Pool_A_h", group.by = "orig.ident", subset.ident = i)
+    markers['cluster'] <- i
+    filename=paste("markers_cluster_",i,".csv", sep="")
+    write.csv(markers, file=filename)
+  }
+  else {
+    print(paste("Skipping comparison for", i, "due to insufficient cell count in one or both groups."))
+  }
+}
+df <- list.files(path='/DATA/31/molteni/vexas_sorted_4/results/DE_markers_12/') %>% 
+  lapply(read.csv) %>% 
+  bind_rows
+df <- df[df$p_val_adj<0.05,]
+setwd("/DATA/31/molteni/vexas_sorted_4/results/summary/")
+write.csv(df, file="PoolA_hvsPoolB_h_sigMarkers_12.csv")
+cells_by_sample <- table(Full_final@meta.data$RNA_snn_h.orig.ident_res.1.2, Full_final@meta.data$orig.ident)
+write.csv(cells_by_sample, file="cells_bysample_12.csv")
+cc_distribution <-as.data.frame (table(Full_final$Phase, Full_final$orig.ident, Full_final$RNA_snn_h.orig.ident_res.1.2))
+colnames(cc_distribution) <- c("Phase","orig.ident","cluster","CellNumb")
+write.csv(cc_distribution, file="cell_cycle_12.csv")
+###############################################################################################################
+# computation on custom annotation
+setwd("/DATA/31/molteni/vexas_sorted_4/results/DE_markers/AvsB/")
+Full_final <- readRDS("/DATA/31/molteni/vexas_sorted_4/results/Full/01-seurat/Full_final_annot.rds")
+clusters <- unique(Full_final@meta.data$celltypes_annot)
+for(i in clusters) {
+  markers <- FindMarkers(Full_final, ident.1 = "Pool_A_h", group.by = "orig.ident", subset.ident = i)
+  markers['cluster'] <- i
+  filename=paste("markers_cluster_",i,".csv", sep="")
+  write.csv(markers, file=filename)
+}
+df <- list.files(path='/DATA/31/molteni/vexas_sorted_4/results/DE_markers/AvsB/') %>% 
+  lapply(read.csv) %>% 
+  bind_rows
+df <- df[df$p_val_adj<0.05,]
+setwd("/DATA/31/molteni/vexas_sorted_4/results/summary/")
+write.csv(df, file="PoolA_hvsPoolB_h_sigMarkers.csv")
+cells_by_sample <- table(Full_final@meta.data$celltypes_annot, Full_final@meta.data$orig.ident)
+write.csv(cells_by_sample, file="cells_bysample.csv")
+cc_distribution <-as.data.frame (table(Full_final$Phase, Full_final$orig.ident, Full_final$celltypes_annot))
+colnames(cc_distribution) <- c("Phase","orig.ident","cluster","CellNumb")
+write.csv(cc_distribution, file="cell_cycle.csv")
--- a/vexas_sorted_4/pseudotime_vexas_sorted_4.R
+++ b/vexas_sorted_4/pseudotime_vexas_sorted_4.R
+obj <- readRDS("/DATA/31/molteni/vexas_sorted_4/results/Full/01-seurat/Full_final_annot.rds")
+Idents(obj) <- "celltypes_annot"
+assay = "RNA"
+reduction <- toupper(paste0("umap.harmony.orig.ident"))
+clu_res <- "celltypes_annot"
+obj <- SetIdent(obj, value = clu_res)
+# convert to scExperiment object
+obj_sc <- as.SingleCellExperiment(obj)
+out_dir <- "/DATA/31/molteni/vexas_sorted_4/results/pseudotime"
+cell_pal <- function(cell_vars, pal_fun,...) {
+  if (is.numeric(cell_vars)) {
+    pal <- pal_fun(100, ...)
+    return(pal[cut(cell_vars, breaks = 100)])
+  } else {
+    categories <- sort(unique(cell_vars))
+    pal <- setNames(pal_fun(length(categories), ...), categories)
+    return(pal[cell_vars])
+  }
+}
+my_cols <- colorRampPalette(brewer.pal(12, "Paired"))(length(levels(obj_sc[[clu_res]])))
+my_cols <- brewer.pal(12, "Paired")[1:14] #inserire numero di cluster
+names(my_cols) <- levels(obj_sc[[clu_res]])
+cell_colors_clust <- my_cols[obj_sc[[clu_res]]]
+# Settings: resolution 1.2 harmony - starting point - cluster 3
+namex <- toupper("umap.harmony.orig.ident")
+scs <- slingshot(data = obj_sc,
+                 reducedDim = reduction, 
+                 start.clus = "HSC_MPP", 
+                 extend = 'n', 
+                 clusterLabels = obj_sc[[clu_res]])
+pal <- viridis::inferno(100,end = 0.95)
+sl_name <- 'Slingshot'
+pt <- slingPseudotime(scs)
+head(pt)
+#se clu_res non è un numero#####
+# Assuming clu_res is a character vector with cluster assignments like "A", "B", "C", etc.
+# Convert it to a factor
+factor_clu_res <- as.factor(obj_sc[[clu_res]])
+# Then convert the factor to numeric
+# The levels will be assigned numbers in the order they appear in the factor
+numeric_clu_res <- as.numeric(factor_clu_res)
+# Now you can use numeric_clu_res to index into your colors
+cell_colors_clust <- my_cols[numeric_clu_res]
+#graphics
+for (num_curve in seq(1, length(colnames(pt)))) {
+  lineagename <- paste('Curve',as.character(num_curve),sep='')
+  colors <- pal[cut(pt[,num_curve], breaks = 100)]
+  # Set the filename and create a PDF device
+  pdf(paste(out_dir, paste0(sl_name, "_branch_", num_curve, ".pdf"), sep = "/"), width = 1500/90, height = 750/90) 
+  # Start a new plot
+  plot.new()
+  #grid.newpage()
+  pushViewport(viewport(layout = grid.layout(1, 2))) # Adjusted layout grid to 1 row and 2 columns
+  # Pseudotime plot with adjusted margins
+  pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 1))
+  par(mar = c(5, 5, 5, 2) + 0.1) # Adjust margins here; default is c(5, 4, 4, 2) + 0.1
+  par(fig = gridFIG(), new = TRUE)
+  plot(reducedDim(scs, type = namex), col = colors, 
+       pch = 16, cex = 1, main = "Pseudotime", cex.axis=2.0, cex.lab=2.0, cex.main=2.2) # Adjusted cex.lab for label size
+  lines(SlingshotDataSet(scs), type = 'lineages', show.constraints = TRUE, col = "black")
+  lines(slingCurves(scs)[[num_curve]], lwd = 2, col = 'red')
+  popViewport()
+  # Sample density plot with adjusted margins
+  pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 2))
+  par(mar = c(5, 5, 5, 2) + 0.1) # Adjust margins here; default is c(5, 4, 4, 2) + 0.1
+  par(fig = gridFIG(), new = TRUE)
+  ds <- list()
+  for (samp in unique(colData(scs)$sample_label)) {
+    ds[[samp]] <- density(pt[colData(scs)$sample_label == samp, num_curve], na.rm = TRUE)
+  }
+  # Setting the plot limits
+  x_min <- min(sapply(ds, function(x) min(x$x, na.rm = TRUE)))
+  x_max <- max(sapply(ds, function(x) max(x$x, na.rm = TRUE)))
+  y_min <- min(sapply(ds, function(x) min(x$y, na.rm = TRUE)))
+  y_max <- max(sapply(ds, function(x) max(x$y, na.rm = TRUE)))
+  xlim <- range(c(x_min, x_max))
+  ylim <- range(c(y_min, y_max))
+  plot(xlim, ylim, type = "n", xlab = "Pseudotime", ylab = "", main = paste0("Sample density on branch ", num_curve),
+       cex.axis=2.0, cex.lab=2.0, cex.main=2.2)
+  for (samp in names(ds)) {
+    polygon(c(min(ds[[samp]]$x), ds[[samp]]$x, max(ds[[samp]]$x)), c(0, ds[[samp]]$y, 0),
+            col = alpha(brewer.pal(length(names(ds)), "Set1")[which(names(ds) == samp)], alpha = .5))
+  }
+  legend("topright", legend = names(ds), fill = alpha(brewer.pal(length(names(ds)), "Set1"), alpha = .5), bty = "n", cex = 2.0)
+  popViewport()
+  # Close the graphical device
+  dev.off()
+}
--- a/vexas_sorted_4/violin_signature_vexas_sorted_4.R
+++ b/vexas_sorted_4/violin_signature_vexas_sorted_4.R
+library(UCell)
+library(Seurat)
+library(ggplot2)
+library(dplyr)
+library(tidyr)
+### load signatures
+vexas_50 <- read.gmt("/DATA/31/molteni/vexas_bm/reference/custom_vexas_50.gmt")
+vexas_100 <- read.gmt("/DATA/31/molteni/vexas_bm/reference/custom_vexas_100.gmt")
+vexas_signature <- list(vexas_50 = vexas_50[[2]],
+                        vexas_100 = vexas_100[[2]])
+# load dataset
+query <- readRDS("/DATA/31/molteni/vexas_bm/results/integration/pc15/BM.combo_anno.rds")
+### UCell
+# compute modules
+names(vexas_signature) <- paste0("VEXAS_Xenograft_sig", c("50", "100"))
+ncol <- ncol(query@meta.data)
+query <- AddModuleScore_UCell(query, features = vexas_signature)
+colnames(query@meta.data) <- c(colnames(query@meta.data)[seq_len(ncol)], names(vexas_signature))
+# wilcoxon test
+x <- melt(data = query@meta.data, id.vars = c("condition", "celltypes_annot"), measure.vars = c("VEXAS_Xenograft_sig50"))
+t_predpop <- x %>% dplyr::group_by(variable, celltypes_annot) %>% dplyr::summarise(pvalue = wilcox.test(x = value[condition == "PZ"], y = value[condition == "HD"])$p.value)
+t_predpop$bonferroni_adjpvalue <- p.adjust(p = t_predpop$pvalue)
+t_predpop <- t_predpop[order(t_predpop$bonferroni_adjpvalue, decreasing = F),]
+write.table(x = t_predpop, file = "VexasXenograftSignatures_SeuratAnnotatedClustersWilcoxonTest_VEXASvsHD_sig50.txt", sep = "\t", row.names = F, col.names = T, quote = F)
+# violin plots: VEXAS vs HD
+t_predpop$sig_label <- ifelse(t_predpop$bonferroni_adjpvalue > 0.05, "ns",
+                              ifelse(t_predpop$bonferroni_adjpvalue < 0.05, "*",
+                                     ifelse(t_predpop$bonferroni_adjpvalue < 0.01, "**",
+                                            ifelse(t_predpop$bonferroni_adjpvalue < 0.001, "***", "***"))))
+g <- query@meta.data %>%
+  ggplot() +
+  theme_classic() +
+  geom_violin(aes(x = celltypes_annot, y = VEXAS_Xenograft_sig50, fill = condition), scale = "width") +
+  geom_text(data = t_predpop, aes(x = celltypes_annot, y = 0.7, label = sig_label)) +
+  theme(plot.title = element_text(hjust = 0.5, size = 10),
+        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
+        legend.position = "right") +
+  ylab("UCell Module score") + xlab("") +
+  ggtitle("VEXAS xenograft signature (n = 50)") +
+  theme(axis.text.x = element_text(angle = 45, , vjust = 1, hjust = 1)) +
+  scale_fill_manual(values = adjustcolor(col = c("#F8766D", "#02818a"), alpha.f = 0.8), name = "")
+ggsave(g,
+       filename = "ViolinPlots_VexasXenograftSignatureTop50.pdf",
+       width = length(unique(query$celltypes_annot))*5*0.1,
+       height = 5,
+       limitsize = FALSE)
\ No newline at end of file