Commit f4a3560c authored by Ivan Merelli's avatar Ivan Merelli
Browse files

Initial commit

parents
library('dplyr')
library('ggplot2')
library('reshape2')
library('RColorBrewer')
#heatmap THR vs CTRL
hallmark_ds <- c("Thr_CTRL")
clusters <- dir ("/DATA/31/molteni/vexas_bm/results/integration/GSEA_OSR_Wu/Thr_CTRL/")
clusters <- gsub("Thr_CTRL_markers_cluster_", "", clusters)
wdir="/DATA/31/molteni/vexas_bm/results/integration"
hallmark.full <- data.frame()
for (ds in hallmark_ds) {
for (i in clusters) {
s <- paste(ds,"markers_cluster", i,sep="_") #qui cambiare 1 con i e inserire ciclo for
ff <- paste(wdir, "GSEA_OSR_Wu/Thr_CTRL", s, "tables", paste(s, "NA-gsea-h.all.v7.2.symbols.txt", sep = "-"), sep = "/")
ds.t <- read.table(ff, header = T, sep = "\t")
ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
ds.t.filt$Dataset <- paste(ds,i,sep="_")
if (nrow(hallmark.full) == 0) {
hallmark.full <- ds.t.filt
} else {
hallmark.full <- rbind(hallmark.full, ds.t.filt)
}
}
}
hallmark.full_1 <- hallmark.full
### ONE HEATMAP ###
all_hallmark <- hallmark.full_1
all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", "")) # Create column of significance labels
all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
level_order <- c("Thr_CTRL_Late_erythroid_cells",
"Thr_CTRL_Mid_erythroid_cells",
"Thr_CTRL_Early_erythroid",
"Thr_CTRL_Erythroid_progenitors",
"Thr_CTRL_MEP",
"Thr_CTRL_MEP_BASO_MAST_prog",
"Thr_CTRL_Plasmacytoid_dentritic_cells" ,
"Thr_CTRL_CD14+_CD16_low_monocytes",
"Thr_CTRL_CD14+_monocytes_2",
"Thr_CTRL_CD14+_monocytes_1",
"Thr_CTRL_Myeloid_dendritic_cells",
"Thr_CTRL_Immature_Neutrophil",
"Thr_CTRL_Prolif_GP",
"Thr_CTRL_GP",
"Thr_CTRL_HSC_MPP",
"Thr_CTRL_MLP",
"Thr_CTRL_CD4_T_cells",
"Thr_CTRL_CD8_T_cells",
"Thr_CTRL_B_cells",
"Thr_CTRL_Plasma_cells",
"Thr_CTRL_VEXAS_GP",
"Thr_CTRL_VEXAS_NK_cells")
#ALL
all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
theme_bw(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.text.x=element_text(size=10))+
theme(axis.text.y=element_text(size=8))+
geom_tile(aes(fill = NES), colour = "white") +
geom_text(aes(label = STARS), color="black", size=2) +
ggtitle("GSEA OSR Thr Patients vs Control Pool") +
theme(plot.title = element_text(hjust = 0.5, vjust = 10), title = element_text(size=20, face = 'bold'))+
theme(legend.title = element_text(size = 10))+
scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
#limits = c(-3.5, 3.5),
limits = c(-4.5, 4.5),
na.value = "grey"
#guide = guide_colourbar(reverse = TRUE)
) +
ylab("") +
xlab("") +
coord_fixed(ratio = 0.4)
plot.dir="/DATA/31/molteni/vexas_bm/results/integration/heatmap_OSR_Wu/"
ggsave(filename = paste(plot.dir, paste("hallmark_Thr_CTRL", "heatmap.pdf", sep = "-"), sep = "/"),
plot = p.hallmark,
width = 15, height = 15, dpi = 600)
#heatmap VAL vs CTRL
hallmark_ds <- c("Val_CTRL")
clusters <- dir ("/DATA/31/molteni/vexas_bm/results/integration/GSEA_OSR_Wu/Val_CTRL/")
clusters <- gsub("Val_CTRL_markers_cluster_", "", clusters)
wdir="/DATA/31/molteni/vexas_bm/results/integration"
hallmark.full <- data.frame()
for (ds in hallmark_ds) {
for (i in clusters) {
s <- paste(ds,"markers_cluster", i,sep="_") #qui cambiare 1 con i e inserire ciclo for
ff <- paste(wdir, "GSEA_OSR_Wu/Val_CTRL", s, "tables", paste(s, "NA-gsea-h.all.v7.2.symbols.txt", sep = "-"), sep = "/")
ds.t <- read.table(ff, header = T, sep = "\t")
ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
ds.t.filt$Dataset <- paste(ds,i,sep="_")
if (nrow(hallmark.full) == 0) {
hallmark.full <- ds.t.filt
} else {
hallmark.full <- rbind(hallmark.full, ds.t.filt)
}
}
}
hallmark.full_2 <- hallmark.full
### ONE HEATMAP ###
all_hallmark <- hallmark.full_2
all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", "")) # Create column of significance labels
all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
#
level_order <- c("Val_CTRL_Late_erythroid_cells",
"Val_CTRL_Mid_erythroid_cells",
"Val_CTRL_Early_erythroid",
"Val_CTRL_Erythroid_progenitors",
"Val_CTRL_MEP",
"Val_CTRL_MEP_BASO_MAST_prog",
"Val_CTRL_Plasmacytoid_dentritic_cells" ,
"Val_CTRL_CD14+_CD16_low_monocytes",
"Val_CTRL_CD14+_monocytes_2",
"Val_CTRL_CD14+_monocytes_1",
"Val_CTRL_Myeloid_dendritic_cells",
"Val_CTRL_Immature_Neutrophil",
"Val_CTRL_Prolif_GP",
"Val_CTRL_GP",
"Val_CTRL_HSC_MPP",
"Val_CTRL_MLP",
"Val_CTRL_CD4_T_cells",
"Val_CTRL_CD8_T_cells",
"Val_CTRL_B_cells",
"Val_CTRL_Plasma_cells",
"Val_CTRL_Endothelial_cells",
"Val_CTRL_VEXAS_GP",
"Val_CTRL_VEXAS_NK_cells")
#ALL
all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
theme_bw(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.text.x=element_text(size=10))+
theme(axis.text.y=element_text(size=8))+
geom_tile(aes(fill = NES), colour = "white") +
geom_text(aes(label = STARS), color="black", size=2) +
ggtitle("GSEA Val OSR Patients vs Control Pool") +
theme(plot.title = element_text(hjust = 0.5, vjust = 10), title = element_text(size=20, face = 'bold'))+
theme(legend.title = element_text(size = 10))+
scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
#limits = c(-3.5, 3.5),
limits = c(-4.5, 4.5),
na.value = "grey"
#guide = guide_colourbar(reverse = TRUE)
) +
ylab("") +
xlab("") +
coord_fixed(ratio = 0.4)
plot.dir="/DATA/31/molteni/vexas_bm/results/integration/heatmap_OSR_Wu/"
ggsave(filename = paste(plot.dir, paste("hallmark_Val_CTRL", "heatmap.pdf", sep = "-"), sep = "/"),
plot = p.hallmark,
width = 15, height = 15, dpi = 600)
#heatmap LEU vs CTRL
hallmark_ds <- c("Leu_CTRL")
clusters <- dir ("/DATA/31/molteni/vexas_bm/results/integration/GSEA_OSR_Wu/Leu_CTRL/")
clusters <- gsub("Leu_CTRL_markers_cluster_", "", clusters)
wdir="/DATA/31/molteni/vexas_bm/results/integration"
hallmark.full <- data.frame()
for (ds in hallmark_ds) {
for (i in clusters) {
s <- paste(ds,"markers_cluster", i,sep="_") #qui cambiare 1 con i e inserire ciclo for
ff <- paste(wdir, "GSEA_OSR_Wu/Leu_CTRL", s, "tables", paste(s, "NA-gsea-h.all.v7.2.symbols.txt", sep = "-"), sep = "/")
ds.t <- read.table(ff, header = T, sep = "\t")
ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
ds.t.filt$Dataset <- paste(ds,i,sep="_")
if (nrow(hallmark.full) == 0) {
hallmark.full <- ds.t.filt
} else {
hallmark.full <- rbind(hallmark.full, ds.t.filt)
}
}
}
hallmark.full_3 <- hallmark.full
### ONE HEATMAP ###
all_hallmark <- hallmark.full_3
all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", "")) # Create column of significance labels
all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
#
level_order <- c("Leu_CTRL_Late_erythroid_cells",
"Leu_CTRL_Mid_erythroid_cells",
"Leu_CTRL_Early_erythroid",
"Leu_CTRL_Erythroid_progenitors",
"Leu_CTRL_MEP",
"Leu_CTRL_MEP_BASO_MAST_prog",
"Leu_CTRL_Plasmacytoid_dentritic_cells" ,
"Leu_CTRL_CD14+_CD16_low_monocytes",
"Leu_CTRL_CD14+_monocytes_2",
"Leu_CTRL_CD14+_monocytes_1",
"Leu_CTRL_Myeloid_dendritic_cells",
"Leu_CTRL_Immature_Neutrophil",
"Leu_CTRL_Prolif_GP",
"Leu_CTRL_GP",
"Leu_CTRL_HSC_MPP",
"Leu_CTRL_MLP",
"Leu_CTRL_CD4_T_cells",
"Leu_CTRL_CD8_T_cells",
"Leu_CTRL_B_cells",
"Leu_CTRL_Plasma_cells",
"Leu_CTRL_Endothelial_cells",
"Leu_CTRL_VEXAS_GP",
"Leu_CTRL_VEXAS_NK_cells")
#ALL
all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
theme_bw(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.text.x=element_text(size=10))+
theme(axis.text.y=element_text(size=8))+
geom_tile(aes(fill = NES), colour = "white") +
geom_text(aes(label = STARS), color="black", size=2) +
ggtitle("GSEA Leu OSR Patients vs Control Pool") +
theme(plot.title = element_text(hjust = 0.5, vjust = 10), title = element_text(size=20, face = 'bold'))+
theme(legend.title = element_text(size = 10))+
scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
#limits = c(-3.5, 3.5),
limits = c(-4.5, 4.5),
na.value = "grey"
#guide = guide_colourbar(reverse = TRUE)
) +
ylab("") +
xlab("") +
coord_fixed(ratio = 0.4)
plot.dir="/DATA/31/molteni/vexas_bm/results/integration/heatmap_OSR_Wu/"
ggsave(filename = paste(plot.dir, paste("hallmark_Leu_CTRL", "heatmap.pdf", sep = "-"), sep = "/"),
plot = p.hallmark,
width = 15, height = 15, dpi = 600)
###ONE HEATMAP
all_hallmark <- rbind (hallmark.full_1, hallmark.full_2, hallmark.full_3)
all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", "")) # Create column of significance labels
all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
hallmark.order.terms <- hallmark.order[order(hallmark.order$ID, decreasing = TRUE), "ID", drop = FALSE]
all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
#ONE VS ALL COMPARISON ##
level_order <- c("Thr_CTRL_Late_erythroid_cells","Val_CTRL_Late_erythroid_cells","Leu_CTRL_Late_erythroid_cells",
"Thr_CTRL_Mid_erythroid_cells","Val_CTRL_Mid_erythroid_cells","Leu_CTRL_Mid_erythroid_cells",
"Thr_CTRL_Early_erythroid", "Val_CTRL_Early_erythroid", "Leu_CTRL_Early_erythroid",
"Thr_CTRL_Erythroid_progenitors", "Val_CTRL_Erythroid_progenitors","Leu_CTRL_Erythroid_progenitors",
"Thr_CTRL_MEP","Val_CTRL_MEP","Leu_CTRL_MEP",
"Thr_CTRL_MEP_BASO_MAST_prog","Val_CTRL_MEP_BASO_MAST_prog","Leu_CTRL_MEP_BASO_MAST_prog",
"Thr_CTRL_Plasmacytoid_dentritic_cells", "Val_CTRL_Plasmacytoid_dentritic_cells","Leu_CTRL_Plasmacytoid_dentritic_cells",
"Thr_CTRL_CD14+_CD16_low_monocytes","Val_CTRL_CD14+_CD16_low_monocytes","Leu_CTRL_CD14+_CD16_low_monocytes",
"Thr_CTRL_CD14+_monocytes_2","Val_CTRL_CD14+_monocytes_2","Leu_CTRL_CD14+_monocytes_2",
"Thr_CTRL_CD14+_monocytes_1", "Val_CTRL_CD14+_monocytes_1","Leu_CTRL_CD14+_monocytes_1",
"Thr_CTRL_Myeloid_dendritic_cells", "Val_CTRL_Myeloid_dendritic_cells","Leu_CTRL_Myeloid_dendritic_cells",
"Thr_CTRL_Immature_Neutrophil","Val_CTRL_Immature_Neutrophil","Leu_CTRL_Immature_Neutrophil",
"Thr_CTRL_Prolif_GP","Val_CTRL_Prolif_GP","Leu_CTRL_Prolif_GP",
"Thr_CTRL_GP","Val_CTRL_GP","Leu_CTRL_GP",
"Thr_CTRL_HSC_MPP", "Val_CTRL_HSC_MPP","Leu_CTRL_HSC_MPP",
"Thr_CTRL_MLP", "Val_CTRL_MLP","Leu_CTRL_MLP",
"Thr_CTRL_CD4_T_cells", "Val_CTRL_CD4_T_cells","Leu_CTRL_CD4_T_cells",
"Thr_CTRL_CD8_T_cells", "Val_CTRL_CD8_T_cells", "Leu_CTRL_CD8_T_cells",
"Thr_CTRL_B_cells","Val_CTRL_B_cells","Leu_CTRL_B_cells",
"Thr_CTRL_Plasma_cells","Val_CTRL_Plasma_cells","Leu_CTRL_Plasma_cells",
"Val_CTRL_Endothelial_cells", "Leu_CTRL_Endothelial_cells",
"Thr_CTRL_VEXAS_GP", "Val_CTRL_VEXAS_GP","Leu_CTRL_VEXAS_GP",
"Thr_CTRL_VEXAS_NK_cells", "Val_CTRL_VEXAS_NK_cells", "Leu_CTRL_VEXAS_NK_cells")
#ALL
all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
theme_bw(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.text.x=element_text(size=10))+
theme(axis.text.y=element_text(size=8))+
geom_tile(aes(fill = NES), colour = "white") +
geom_text(aes(label = STARS), color="black", size=2) +
ggtitle("GSEA - All Mutations OSR Patients vs Control") +
theme(plot.title = element_text(hjust = 0.5, vjust = 10), title = element_text(size=20, face = 'bold'))+
theme(legend.title = element_text(size = 10))+
scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
#limits = c(-3.5, 3.5),
limits = c(-4.5, 4.5),
na.value = "grey"
#guide = guide_colourbar(reverse = TRUE)
) +
ylab("") +
xlab("") +
coord_fixed(ratio = 0.4)
plot.dir="/DATA/31/molteni/vexas_bm/results/integration/heatmap_OSR_Wu/"
ggsave(filename = paste(plot.dir, paste("hallmark_ALLMut", "heatmap.pdf", sep = "-"), sep = "/"),
plot = p.hallmark,
width = 20, height = 10, dpi = 600)
## PAZIENTI vs CONTROLLI ##
#heatmap PZ vs CTRL
hallmark_ds <- c("Pz_CTRL")
clusters <- dir ("/DATA/31/molteni/vexas_bm/results/integration/GSEA_OSR_Wu/Pz_CTRL/")
clusters <- gsub("Pz_CTRL_markers_cluster_", "", clusters)
wdir="/DATA/31/molteni/vexas_bm/results/integration"
hallmark.full <- data.frame()
for (ds in hallmark_ds) {
for (i in clusters) {
s <- paste(ds,"markers_cluster", i,sep="_") #qui cambiare 1 con i e inserire ciclo for
ff <- paste(wdir, "GSEA_OSR_Wu/Pz_CTRL", s, "tables", paste(s, "NA-gsea-h.all.v7.2.symbols.txt", sep = "-"), sep = "/")
ds.t <- read.table(ff, header = T, sep = "\t")
ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
ds.t.filt$Dataset <- paste(ds,i,sep="_")
if (nrow(hallmark.full) == 0) {
hallmark.full <- ds.t.filt
} else {
hallmark.full <- rbind(hallmark.full, ds.t.filt)
}
}
}
hallmark.full_4 <- hallmark.full
### ONE HEATMAP ###
all_hallmark <- hallmark.full_4
all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", "")) # Create column of significance labels
all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
level_order <- c("Pz_CTRL_Late_erythroid_cells",
"Pz_CTRL_Mid_erythroid_cells",
"Pz_CTRL_Early_erythroid",
"Pz_CTRL_Erythroid_progenitors",
"Pz_CTRL_MEP",
"Pz_CTRL_MEP_BASO_MAST_prog",
"Pz_CTRL_Plasmacytoid_dentritic_cells" ,
"Pz_CTRL_CD14+_CD16_low_monocytes",
"Pz_CTRL_CD14+_monocytes_2",
"Pz_CTRL_CD14+_monocytes_1",
"Pz_CTRL_Myeloid_dendritic_cells",
"Pz_CTRL_Immature_Neutrophil",
"Pz_CTRL_Prolif_GP",
"Pz_CTRL_GP",
"Pz_CTRL_HSC_MPP",
"Pz_CTRL_MLP",
"Pz_CTRL_CD4_T_cells",
"Pz_CTRL_CD8_T_cells",
"Pz_CTRL_B_cells",
"Pz_CTRL_Plasma_cells",
"Pz_CTRL_Endothelial_cells",
"Pz_CTRL_VEXAS_GP",
"Pz_CTRL_VEXAS_NK_cells")
#ALL
all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
theme_bw(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.text.x=element_text(size=10))+
theme(axis.text.y=element_text(size=8))+
geom_tile(aes(fill = NES), colour = "white") +
geom_text(aes(label = STARS), color="black", size=2) +
ggtitle("GSEA OSR Patients vs Control Pool") +
theme(plot.title = element_text(hjust = 0.5, vjust = 10), title = element_text(size=20, face = 'bold'))+
theme(legend.title = element_text(size = 10))+
scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
#limits = c(-3.5, 3.5),
limits = c(-4.5, 4.5),
na.value = "grey"
#guide = guide_colourbar(reverse = TRUE)
) +
ylab("") +
xlab("") +
coord_fixed(ratio = 0.4)
plot.dir="/DATA/31/molteni/vexas_bm/results/integration/heatmap_OSR_Wu/"
ggsave(filename = paste(plot.dir, paste("hallmark_Pz_CTRL", "heatmap.pdf", sep = "-"), sep = "/"),
plot = p.hallmark,
width = 15, height = 15, dpi = 600)
library(Seurat)
library(ggplot2)
library(harmony)
library(RColorBrewer)
library(plotly)
library(htmlwidgets)
library(ComplexHeatmap)
library(openxlsx)
library(dplyr)
library(future)
library(gridExtra)
library(SeuratWrappers)
## Wu et al. public data (GSE196052)
countsData_bm <- read.delim(file = "/DATA/31/molteni/vexas_sc/public/GSE196052_dataCount_BM.csv", header = TRUE, sep = ",", row.names = 1)
sampleData <- read.delim(file = "/DATA/31/molteni/vexas_sc/public/BM_metaDatatSNECellType_ALiceManual.csv", header = TRUE, sep = ",")
sampleData <- sampleData[,c(1:6)]
obj_BM <- CreateSeuratObject(counts = countsData_bm)
dataset_numbers <- sub(".*\\.", "", colnames(obj_BM))
padded_numbers <- ifelse(as.numeric(dataset_numbers) < 10,
paste0("0", dataset_numbers),
dataset_numbers)
final_dataset_labels <- paste0("BM-PU-", padded_numbers)
obj_BM$dataset <- final_dataset_labels
obj_BM@meta.data$subject <- sampleData$subject
obj_BM@meta.data$orig.ident <- obj_BM@meta.data$subject
obj_BM@meta.data$sample_info <- "public"
obj_BM@meta.data$subject <- NULL
obj_BM@meta.data$dataset <- NULL
## our vexas data
BM_minimal <- readRDS("/DATA/31/molteni/vexas_bm/results/Full/Full_minimal.rds")
BM_minimal@meta.data$scDblFinder_class <- NULL
BM_minimal@meta.data$scds_class <- NULL
BM_minimal@meta.data$RNA_Source <- NULL
BM_minimal@meta.data$sample_info <- "vexas"
BM.combo <- merge(BM_minimal, y = obj_BM, add.cell.ids = c("vexas", "vexas_public"))
# normalize data and find variable features
# select only 2000 genes as variable
varfeatures <- 2000
BM.combo <- NormalizeData(object = BM.combo)
BM.combo <- FindVariableFeatures(BM.combo, selection.method = "vst",
nfeatures = varfeatures,
verbose = T)
# scaling
reg_vars = c("percent.mt", "nCount_RNA")
BM.combo <- ScaleData(object = BM.combo, vars.to.regress = reg_vars, display.progress = T, features = rownames(BM.combo))
saveRDS(BM.combo, file = "combined_preIntegration_scaled.rds")
BM.combo <- readRDS("/DATA/31/molteni/vexas_bm/results/integration/combined_preIntegration_scaled.rds")
#PCA
max_pca <- 70
BM.combo <- RunPCA(object = BM.combo, features = VariableFeatures(object = BM.combo), npcs = max_pca, reduction.name="pca", reduction.key="PC_")
BM.combo@meta.data$condition <- NA
BM.combo@meta.data$condition <- with(BM.combo@meta.data,
ifelse(BM.combo@meta.data$orig.ident == "BM-01","PZ",
ifelse(BM.combo@meta.data$orig.ident == "BM-02","PZ",
ifelse(BM.combo@meta.data$orig.ident == "BM-03","PZ",
ifelse(BM.combo@meta.data$orig.ident == "BM-04","PZ",
ifelse(BM.combo@meta.data$orig.ident == "BM-08","PZ",
ifelse(BM.combo@meta.data$orig.ident == "BM-09","PZ",
ifelse(BM.combo@meta.data$orig.ident == "PT1","PZ",
ifelse(BM.combo@meta.data$orig.ident == "PT2","PZ",
ifelse(BM.combo@meta.data$orig.ident == "PT3","PZ",
ifelse(BM.combo@meta.data$orig.ident == "PT4","PZ",
ifelse(BM.combo@meta.data$orig.ident == "PT5","PZ",
ifelse(BM.combo@meta.data$orig.ident == "PT6","PZ",
ifelse(BM.combo@meta.data$orig.ident == "PT7","PZ",
ifelse(BM.combo@meta.data$orig.ident == "PT8","PZ",
ifelse(BM.combo@meta.data$orig.ident == "PT9","PZ",
ifelse(BM.combo@meta.data$orig.ident == "HD1","HD",
ifelse(BM.combo@meta.data$orig.ident == "HD2","HD",
ifelse(BM.combo@meta.data$orig.ident == "HD3","HD",
ifelse(BM.combo@meta.data$orig.ident == "HD4","HD", "NA"))))))))))))))))))))
integration.var <- c("orig.ident", "sample_info", "condition") ## + condition
BM.combo <- RunHarmony(object = BM.combo,
group.by.vars = integration.var,
max.iter.harmony = 30, plot_convergence = FALSE, reduction.save = "harmony")
#PC = 15
setwd("/DATA/31/molteni/vexas_bm/results/integration/pc15")
BM.combo <- RunUMAP(object = BM.combo, seed.use = 123, dims = 1:15,
reduction = "harmony", reduction.name = "harmony_umap", reduction.key = "UMAPh_", return.model = TRUE)
g <- DimPlot(BM.combo, split.by = "orig.ident", group.by = "sample_info", reduction = "harmony_umap", raster = F, ncol = 6) + ggtitle("UMAP post-harmony")
ggsave(g, filename = 'UMAP_PostHarmony_donor_cond.png', width = 12, height = 6)
# split by
Idents(BM.combo) <- "sample_info"
g <- DimPlot(BM.combo, split.by = "sample_info", reduction = "harmony_umap") + ggtitle("UMAP post-harmony")
ggsave(g, filename = 'UMAP_PostHarmony_batch_cond2.png', width = 7, height = 6)
Idents(BM.combo) <- "condition"
g <- DimPlot(BM.combo, split.by = "condition", reduction = "harmony_umap") + ggtitle("UMAP post-harmony")
ggsave(g, filename = 'UMAP_PostHarmony_cond_2.png', width = 7, height = 6)
#PC=15
BM.combo <- FindNeighbors(object = BM.combo,
dims = 1:15,
force.recalc = T,
reduction = "harmony",
graph.name = c("RNA_nn_h.", "RNA_snn_h."))
BM.combo <- FindClusters(object = BM.combo, graph.name = "RNA_snn_h.", resolution = 0.4)
BM.combo <- FindClusters(object = BM.combo, graph.name = "RNA_snn_h.", resolution = 0.6)
BM.combo <- FindClusters(object = BM.combo, graph.name = "RNA_snn_h.", resolution = 0.8)
# SPLIT CLUSTER 20 OF RES=0.8
Idents(BM.combo) <- "RNA_snn_h._res.0.8"
BM.combo <- FindSubCluster(BM.combo, cluster= "20", graph.name = "RNA_snn_h.", subcluster.name = "subcluster", resolution = 0.05)
table(BM.combo$subcluster)
Idents(BM.combo) <- "subcluster"
mapping_vector <- c("20_0" = "20",
"20_1" = "25")
# Update the reference object
for(old_ident in names(mapping_vector)) {
BM.combo$subcluster[Idents(BM.combo) == old_ident] <- mapping_vector[old_ident]
}
BM.combo$RNA_snn_h._res.0.8 <- BM.combo$subcluster
BM.combo@misc$markers <- FindAllMarkers(object = BM.combo, min.pct = 0.2, logfc.threshold = 0.25, only.pos = FALSE,
min.cells.group = 10, test.use = "wilcox", return.thresh = 0.05, fc.name = "avg_logFC")
markers_h <- list(BM.combo@misc$markers_h)
names(markers_h) <- "RNA_snn_h._res.0.8"
BM.combo@misc$markers_h <- markers_h
saveRDS(BM.combo, file = "/DATA/31/molteni/vexas_bm/results/integration/pc15/BM.combo_final.rds")
#custom cluster annotation
Idents(BM.combo) <- "RNA_snn_h._res.0.8"
# mapping vector
new.cluster.ids <- c("CD8_T_cells","Early_erythroid","CD14+_monocytes_1","CD4_T_cells",
"GP","Immature_Neutrophil","HSC_MPP","VEXAS_NK_cells","B_cells","MEP",
"Erythroid_progenitors","Mid_erythroid_cells","Prolif_GP","VEXAS_GP",
"CD14+_monocytes_2","Early_erythroid","Plasma_cells","Late_erythroid_cells",
"CD14+_CD16_low_monocytes","Myeloid_dendritic_cells","Plasmacytoid_dentritic_cells",
"CD8_T_cells","MLP","MEP_BASO_MAST_prog","Endothelial_cells","B_cells")
cluster_mapping <- setNames(new.cluster.ids, c(1:length(new.cluster.ids)))
BM.combo$celltypes_annot <- new.cluster.ids[as.numeric(as.character(BM.combo$RNA_snn_h._res.0.8)) + 1]
Idents(BM.combo) <- "celltypes_annot"
ggsave("UMAP_label.png", width = 10, height = 7)
DimPlot(BM.combo, reduction = "harmony_umap", label = TRUE, label.box = TRUE, pt.size = 0.5, repel = TRUE) + NoLegend()
saveRDS(BM.combo, file = "/DATA/31/molteni/vexas_bm/results/integration/pc15/BM.combo_anno.rds")
library(Seurat)
library(dplyr)
BM.combo <- readRDS("/DATA/31/molteni/vexas_bm/results/integration/pc15/BM.combo_anno.rds")
BM.combo@meta.data$mutations <- with(BM.combo@meta.data,
ifelse(BM.combo@meta.data$orig.ident == "BM-01","THR",
ifelse(BM.combo@meta.data$orig.ident == "BM-02","VAL",
ifelse(BM.combo@meta.data$orig.ident == "BM-03","VAL",
ifelse(BM.combo@meta.data$orig.ident == "BM-04","LEU",
ifelse(BM.combo@meta.data$orig.ident == "BM-08","VAL",
ifelse(BM.combo@meta.data$orig.ident == "BM-09","LEU",
ifelse(BM.combo@meta.data$orig.ident == "PT1","VAL",
ifelse(BM.combo@meta.data$orig.ident == "PT2","THR",
ifelse(BM.combo@meta.data$orig.ident == "PT3","THR",
ifelse(BM.combo@meta.data$orig.ident == "PT4","THR",
ifelse(BM.combo@meta.data$orig.ident == "PT5","THR",
ifelse(BM.combo@meta.data$orig.ident == "PT6","LEU",
ifelse(BM.combo@meta.data$orig.ident == "PT7","LEU",
ifelse(BM.combo@meta.data$orig.ident == "PT8","THR",
ifelse(BM.combo@meta.data$orig.ident == "PT9","THR",
ifelse(BM.combo@meta.data$orig.ident == "HD1","CTRL",
ifelse(BM.combo@meta.data$orig.ident == "HD2","CTRL",
ifelse(BM.combo@meta.data$orig.ident == "HD3","CTRL",
ifelse(BM.combo@meta.data$orig.ident == "HD4","CTRL","NA"))))))))))))))))))))
#"BM-01","BM-02","BM-03","BM-04", "BM-08","BM-09",
#COMPARISON THR VEXAS PATIENTS vs HEALTHY CONTROLS WU
#select only the subjects involved in the comparison by mutations
Idents(BM.combo) <- "orig.ident"
BM.combo_subset <- subset(BM.combo, idents = c("BM-01","HD1", "HD2", "HD3", "HD4"))
clusters <- unique(BM.combo_subset$celltypes_annot)
Idents(BM.combo_subset) <- "celltypes_annot"
setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Thr_CTRL/")
for(i in clusters) {
cells_group1 = sum(BM.combo_subset$mutations == "THR" & BM.combo_subset$celltypes_annot == i)
cells_group2 = sum(BM.combo_subset$mutations == "CTRL" & BM.combo_subset$celltypes_annot == i)
if(cells_group1 >= 3 & cells_group2 >= 3 ) {
markers <- FindMarkers(BM.combo_subset, ident.1 = "THR", group.by = "mutations", subset.ident = i)
markers['cluster'] <- i
filename=paste("markers_cluster_",i,".csv", sep="")
write.csv(markers, file=filename)
}
else {
print(paste("Skipping comparison for", i, "due to insufficient cell count in one or both groups."))
}
}
setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers/Thr_CTRL")
df <- list.files(path="/DATA/31/molteni/vexas_bm/results/integration/DE_markers/Thr_CTRL") %>%
lapply(read.csv) %>%
bind_rows
df <- df[df$p_val_adj<0.05,]
setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/summary")
write.csv(df, file="THRvsCTRL_sigMarkers.csv")
#COMPARISON VAL VEXAS PATIENTS vs HEALTHY CONTROLS WU
#select only the subjects involved in the comparison
Idents(BM.combo) <- "orig.ident"
BM.combo_subset <- subset(BM.combo, idents = c("BM-02","BM-03","BM-08","HD1", "HD2", "HD3", "HD4"))
clusters <- unique(BM.combo_subset$celltypes_annot)
Idents(BM.combo_subset) <- "celltypes_annot"
setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Val_CTRL/")
for(i in clusters) {
cells_group1 = sum(BM.combo_subset$mutations == "VAL" & BM.combo_subset$celltypes_annot == i)
cells_group2 = sum(BM.combo_subset$mutations == "CTRL" & BM.combo_subset$celltypes_annot == i)
if(cells_group1 >= 3 & cells_group2 >= 3 ) {
markers <- FindMarkers(BM.combo_subset, ident.1 = "VAL", group.by = "mutations", subset.ident = i)
markers['cluster'] <- i
filename=paste("markers_cluster_",i,".csv", sep="")
write.csv(markers, file=filename)
}
else {
print(paste("Skipping comparison for", i, "due to insufficient cell count in one or both groups."))
}
}
setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Val_CTRL")
df <- list.files(path="/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Val_CTRL") %>%
lapply(read.csv) %>%
bind_rows
df <- df[df$p_val_adj<0.05,]
setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/summary")
write.csv(df, file="VALvsCTRL_sigMarkers.csv")
#COMPARISON LEU VEXAS PATIENTS vs HEALTHY CONTROLS WU
#select only the subjects involved in the comparison
Idents(BM.combo) <- "orig.ident"
BM.combo_subset <- subset(BM.combo, idents = c("BM-04", "BM-09","HD1", "HD2", "HD3", "HD4"))
clusters <- unique(BM.combo_subset$celltypes_annot)
Idents(BM.combo_subset) <- "celltypes_annot"
setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Leu_CTRL/")
for(i in clusters) {
cells_group1 = sum(BM.combo_subset$mutations == "LEU" & BM.combo_subset$celltypes_annot == i)
cells_group2 = sum(BM.combo_subset$mutations == "CTRL" & BM.combo_subset$celltypes_annot == i)
if(cells_group1 >= 3 & cells_group2 >= 3 ) {
markers <- FindMarkers(BM.combo_subset, ident.1 = "LEU", group.by = "mutations", subset.ident = i)
markers['cluster'] <- i
filename=paste("markers_cluster_",i,".csv", sep="")
write.csv(markers, file=filename)
}
else {
print(paste("Skipping comparison for", i, "due to insufficient cell count in one or both groups."))
}
}
setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Leu_CTRL")
df <- list.files(path="/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Leu_CTRL") %>%
lapply(read.csv) %>%
bind_rows
df <- df[df$p_val_adj<0.05,]
setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/summary")
write.csv(df, file="LEUvsCTRL_sigMarkers.csv")
#COMPARISON VEXAS PATIENTS vs HEALTHY CONTROLS WU
#select only the subjects involved in the comparison
Idents(BM.combo) <- "orig.ident"
BM.combo_subset <- subset(BM.combo, idents = c("BM-01", "BM-02", "BM-03", "BM-04", "BM-08","BM-09","HD1", "HD2", "HD3", "HD4"))
clusters <- unique(BM.combo_subset$celltypes_annot)
Idents(BM.combo_subset) <- "celltypes_annot"
setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Pz_CTRL/")
for(i in clusters) {
cells_group1 = sum(BM.combo_subset$condition == "PZ" & BM.combo_subset$celltypes_annot == i)
cells_group2 = sum(BM.combo_subset$condition == "HD" & BM.combo_subset$celltypes_annot == i)
if(cells_group1 >= 3 & cells_group2 >= 3 ) {
markers <- FindMarkers(BM.combo_subset, ident.1 = "PZ", group.by = "condition", subset.ident = i)
markers['cluster'] <- i
filename=paste("markers_cluster_",i,".csv", sep="")
write.csv(markers, file=filename)
}
else {
print(paste("Skipping comparison for", i, "due to insufficient cell count in one or both groups."))
}
}
setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Pz_CTRL")
df <- list.files(path="/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/Pz_CTRL") %>%
lapply(read.csv) %>%
bind_rows
df <- df[df$p_val_adj<0.05,]
setwd("/DATA/31/molteni/vexas_bm/results/integration/DE_markers_OSR_Wu/summary")
write.csv(df, file="PZvsCTRL_sigMarkers.csv")
library(dplyr)
library(ggplot2)
library(ggrepel)
args <- commandArgs(trailingOnly = TRUE)
file_path <- args[1]
max_log2FC <- 5
min_log2FC <- -5
min_p_val_adj <- Il valore minimo di p-value adjusted passato da Python
# Se il minimo p-value adjusted ?? inferiore a 10^-100, fissalo a 10^-100
min_p_val_adj <- 10^-100
raw <- read.table("/DATA/31/molteni/vexas_bm/results/integration/DE_markers/Pz_CTRL/markers_cluster_Late_erythroid_cells.csv", header = TRUE, sep = ",", quote = "\"", dec = ".")
adj.pval.thr <- 0.05
# Imposta un limite inferiore per i p-value adjusted direttamente nei dati
raw$p_val_adj <- pmax(raw$p_val_adj, min_p_val_adj)
data <- data.frame(gene = raw$X, pval = -log10(raw$p_val_adj), lfc = raw$avg_log2FC)
data <- na.omit(data)
data <- mutate(data, color = case_when(
data$lfc > 0 & data$pval > -log10(adj.pval.thr) ~ "Overexpressed",
data$lfc < 0 & data$pval > -log10(adj.pval.thr) ~ "Underexpressed",
TRUE ~ "NonSignificant"
))
# Seleziona i geni di interesse per etichettarli
# HBG1/2, ALAS2, HBB, HBD, FECH, NCOA4, SLC25A37, GLRX5, EIF2AK1.
selected_genes <- c("HBG1" ,"HBG2", "ALAS2", "HBB", "HBD", "FECH", "NCOA4", "SLC25A37", "GLRX5", "EIF2AK1")
highl <- data %>% filter(gene %in% selected_genes)
highl <- head(subset(data, color != "NonSignificant"), 20)
vol <- ggplot(data, aes(x = lfc, y = pval, colour = color)) +
geom_point(size = 2, alpha = 0.8, na.rm = TRUE) +
geom_hline(yintercept = -log10(adj.pval.thr), colour = "darkgray", linetype = "dashed") +
scale_color_manual(name = "Expression", values = c(Overexpressed = "#CD4F39", Underexpressed = "#008B00", NonSignificant = "darkgray")) +
geom_label_repel(data = highl, aes(label = gene), box.padding = 0.35, point.padding = 0.5, size = 5, max.overlaps = 10) +
theme_bw(base_size = 12) +
theme(legend.position = "right") +
xlab("log2 Fold Change") +
ylab(expression(-log[10]("adjusted p-value"))) +
xlim(min_log2FC, max_log2FC) +
ylim(0, max(-log10(min_p_val_adj), -log10(raw$p_val_adj)) + 1) # Assicura che il plot includa tutti i valori di p-value
# Salva il plot
ggsave("volcano_late_ery.pdf", plot = vol, width = 11, height = 8, dpi = 300)
# HALLMARK GSEA
library('dplyr')
library('ggplot2')
library('reshape2')
library('RColorBrewer')
plot.dir="/DATA/31/molteni/vexas_mouse_3/results/heatmap/"
wdir="/DATA/31/molteni/vexas_mouse_3/results"
#heatmap custom cluster & order (05/06/24)
wdir="/DATA/31/molteni/vexas_mouse_3/results"
plot.dir="/DATA/31/molteni/vexas_mouse_3/results/heatmap/"
Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_3/results/Full/01-seurat/Full_final_annot_filt.rds")
hallmark_ds <- c("A_C")
clusters <- unique(Full_final$celltypes_annot)
hallmark.full <- data.frame()
for (ds in hallmark_ds) {
for (i in clusters) {
s <- paste(ds,"markers_cluster", i,sep="_") #qui cambiare 1 con i e inserire ciclo for
ff <- paste(wdir, "GSEA", "AvsC",s, "tables", paste(s, "NA-gsea-h.all.v7.2.symbols.txt", sep = "-"), sep = "/")
ds.t <- read.table(ff, header = T, sep = "\t")
ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
ds.t.filt$Dataset <- paste(ds,i,sep="_")
if (nrow(hallmark.full) == 0) {
hallmark.full <- ds.t.filt
} else {
hallmark.full <- rbind(hallmark.full, ds.t.filt)
}
}
}
hallmark.full.filt <- hallmark.full[, c("ID", "NES", "Dataset","p.adjust")]
##
all_hallmark <- hallmark.full.filt
all_hallmark$Dataset <- gsub("A_C", "VEXAS-like_WT", all_hallmark$Dataset)
all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", "")) # Create column of significance labels
all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
level_order <- c("VEXAS-like_WT_Erythroid_prog", "VEXAS-like_WT_Prolif_Myelo_NK_prec", "VEXAS-like_WT_Promyelocyte",
"VEXAS-like_WT_Monocyte_biased_prog","VEXAS-like_WT_pDC_biased_prog","VEXAS-like_WT_GMP_2",
"VEXAS-like_WT_GMP_1","VEXAS-like_WT_CMP","VEXAS-like_WT_HSC_MPP",
"VEXAS-like_WT_LMPP","VEXAS-like_WT_CLP", "VEXAS-like_WT_Prolif_lymphoid_prec",
"VEXAS-like_WT_B_cell_progenitors_1", "VEXAS-like_WT_B_cell_progenitors_2")
all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
theme_bw(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.text.y=element_text(size=6))+
geom_tile(aes(fill = NES), colour = "white") +
geom_text(aes(label = STARS), color="black", size=3) +
scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
limits = c(-5, 5),
na.value = "grey"
#guide = guide_colourbar(reverse = TRUE)
) +
ylab("") +
xlab("") +
coord_fixed(ratio = 0.4)
ggsave(filename = paste(plot.dir, paste("hallmark_AvsC_ordered_last", "heatmap.pdf", sep = "-"), sep = "/"),
plot = p.hallmark,
width = 10, height = 8, dpi = 600)
#heatmap specific comparison GMP2 vs GMP1 (05/06/24)
wdir="/DATA/31/molteni/vexas_mouse_3/results"
plot.dir="/DATA/31/molteni/vexas_mouse_3/results/heatmap/"
Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_3/results/Full/01-seurat/Full_final_annot_filt.rds")
hallmark_ds <- c("A", "C", "Full")
clusters <- unique(Full_final$celltypes_annot)
hallmark.full <- data.frame()
for (ds in hallmark_ds) {
#for (i in clusters) {
#s <- paste("_",ds,sep="") #qui cambiare 1 con i e inserire ciclo for
ff <- paste(wdir, "GSEA_GMP2_GMP1", paste("GMP2vsGMP1" , ds , sep="_"),"tables", paste(paste("GMP2vsGMP1" , ds , sep="_"),"NA-gsea-h.all.v7.2.symbols.txt", sep = "-"), sep = "/")
ds.t <- read.table(ff, header = T, sep = "\t")
ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
ds.t.filt$Dataset <- paste(ds,"GMP2vsGMP1",sep="_")
if (nrow(hallmark.full) == 0) {
hallmark.full <- ds.t.filt
} else {
hallmark.full <- rbind(hallmark.full, ds.t.filt)
#}
}
}
hallmark.full.filt <- hallmark.full[, c("ID", "NES", "Dataset","p.adjust")]
##
all_hallmark <- hallmark.full.filt
all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", "")) # Create column of significance labels
all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
level_order <- c("A_GMP2vsGMP1", "C_GMP2vsGMP1", "Full_GMP2vsGMP1")
all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
theme_bw(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.text.y=element_text(size=6))+
geom_tile(aes(fill = NES), colour = "white") +
geom_text(aes(label = STARS), color="black", size=3) +
scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
limits = c(-5, 5),
na.value = "grey"
#guide = guide_colourbar(reverse = TRUE)
) +
ylab("") +
xlab("") +
coord_fixed(ratio = 0.4)
ggsave(filename = paste(plot.dir, paste("hallmark_GMP2_GMP1", "heatmap.pdf", sep = "-"), sep = "/"),
plot = p.hallmark,
width = 10, height = 8, dpi = 600)
#heatmap specific comparison Bprog1 vs BProg2 (05/06/24)
wdir="/DATA/31/molteni/vexas_mouse_3/results"
plot.dir="/DATA/31/molteni/vexas_mouse_3/results/heatmap/"
Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_3/results/Full/01-seurat/Full_final_annot_filt.rds")
hallmark_ds <- c("A", "C", "Full")
clusters <- unique(Full_final$celltypes_annot)
hallmark.full <- data.frame()
for (ds in hallmark_ds) {
#for (i in clusters) {
#s <- paste("_",ds,sep="") #qui cambiare 1 con i e inserire ciclo for
ff <- paste(wdir, "GSEA_BP1_BP2", paste("BP1vsBP2" , ds , sep="_"),"tables", paste(paste("BP1vsBP2" , ds , sep="_"),"NA-gsea-h.all.v7.2.symbols.txt", sep = "-"), sep = "/")
ds.t <- read.table(ff, header = T, sep = "\t")
ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
ds.t.filt$Dataset <- paste(ds,"BP1vsBP2",sep="_")
if (nrow(hallmark.full) == 0) {
hallmark.full <- ds.t.filt
} else {
hallmark.full <- rbind(hallmark.full, ds.t.filt)
#}
}
}
hallmark.full.filt <- hallmark.full[, c("ID", "NES", "Dataset","p.adjust")]
##
all_hallmark <- hallmark.full.filt
all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", "")) # Create column of significance labels
all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
level_order <- c("A_BP1vsBP2", "C_BP1vsBP2", "Full_BP1vsBP2")
all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
theme_bw(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.text.y=element_text(size=6))+
geom_tile(aes(fill = NES), colour = "white") +
geom_text(aes(label = STARS), color="black", size=3) +
scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
limits = c(-5, 5),
na.value = "grey"
#guide = guide_colourbar(reverse = TRUE)
) +
ylab("") +
xlab("") +
coord_fixed(ratio = 0.4)
ggsave(filename = paste(plot.dir, paste("hallmark_BP1_BP2", "heatmap.pdf", sep = "-"), sep = "/"),
plot = p.hallmark,
width = 10, height = 8, dpi = 600)
library(Seurat)
library(dplyr)
Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_3/results/Full/01-seurat/Full_final.rds")
# computation on cluster numbers
setwd("/DATA/31/molteni/vexas_mouse_3/results/DE_markers/")
Idents(Full_final)<-"RNA_snn_h.orig.ident_res.0.6"
clusters <- levels(Full_final@meta.data$RNA_snn_h.orig.ident_res.0.6)
for(i in 0:length(clusters)) {
markers <- FindMarkers(Full_final, ident.1 = "A", group.by = "orig.ident", subset.ident = i)
markers['cluster'] <- i
filename=paste("markers_cluster_",i,".csv", sep="")
write.csv(markers, file=filename)
}
df <- list.files(path='/DATA/31/molteni/vexas_mouse_3/results/DE_markers/') %>%
lapply(read.csv) %>%
bind_rows
df <- df[df$p_val_adj<0.05,]
write.csv(df, file="AvsC_sigMarkers.csv")
cells_by_sample <- table(Full_final@meta.data$RNA_snn_h.orig.ident_res.0.6, Full_final@meta.data$orig.ident)
write.csv(cells_by_sample, file="cells_bysample.csv")
# computation on custom annotation
Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_3/results/Full/01-seurat/Full_final_annot.rds")
setwd("/DATA/31/molteni/vexas_mouse_3/results/DE_markers2/new/")
clusters <- unique (Full_final$celltypes_annot)
for(i in clusters) {
markers <- FindMarkers(Full_final, ident.1 = "A", group.by = "orig.ident", subset.ident = i)
markers['cluster'] <- i
filename=paste("markers_cluster_",i,".csv", sep="")
write.csv(markers, file=filename)
}
df <- list.files(path='/DATA/31/molteni/vexas_mouse_3/results/DE_markers2/new/') %>%
lapply(read.csv) %>%
bind_rows
df <- df[df$p_val_adj<0.05,]
setwd("/DATA/31/molteni/vexas_mouse_3/results/summary")
write.csv(df, file="AvsC_sigMarkers_custom.csv")
cc_distribution <-as.data.frame (table(Full_final$Phase, Full_final$orig.ident, Full_final$celltypes_annot))
colnames(cc_distribution) <- c("Phase","orig.ident","cluster","CellNumb")
write.csv(cc_distribution, file="cell_cycle.csv")
# computation on custom annotation after filtering cluster 19, res 1.2
Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_3/results/Full/01-seurat/Full_final_annot_filt.rds")
setwd("/DATA/31/molteni/vexas_mouse_3/results/DE_markers/AvsC/")
clusters <- unique (Full_final$celltypes_annot)
for(i in clusters) {
markers <- FindMarkers(Full_final, ident.1 = "A", group.by = "orig.ident", subset.ident = i)
markers['cluster'] <- i
filename=paste("markers_cluster_",i,".csv", sep="")
write.csv(markers, file=filename)
}
df <- list.files(path='/DATA/31/molteni/vexas_mouse_3/results/DE_markers/AvsC/') %>%
lapply(read.csv) %>%
bind_rows
df <- df[df$p_val_adj<0.05,]
setwd("/DATA/31/molteni/vexas_mouse_3/results/summary")
write.csv(df, file="AvsC_sigMarkers_custom.csv")
cells_by_sample <- table(Full_final@meta.data$celltypes_annot, Full_final@meta.data$orig.ident)
write.csv(cells_by_sample, file="cells_bysample.csv")
cc_distribution <-as.data.frame (table(Full_final$Phase, Full_final$orig.ident, Full_final$celltypes_annot))
colnames(cc_distribution) <- c("Phase","orig.ident","cluster","CellNumb")
write.csv(cc_distribution, file="cell_cycle.csv")
#markers tra cluster specifici
## GMP2 VS GMP2
setwd("/DATA/31/molteni/vexas_mouse_3/results/DE_GMP2_GMP1")
Idents(Full_final) <- "celltypes_annot"
markers <- FindMarkers(Full_final, ident.1 = "GMP_2", ident.2 = "GMP_1")
write.csv(markers, file="GMP2vsGMP1_Full.csv")
Idents(Full_final) <- "orig.ident"
Full_final_A <- subset(Full_final, idents = "A")
Idents(Full_final_A) <- "celltypes_annot"
markers <- FindMarkers(Full_final_A, ident.1 = "GMP_2", ident.2 = "GMP_1")
write.csv(markers, file="GMP2vsGMP1_A.csv")
Idents(Full_final) <- "orig.ident"
Full_final_C <- subset(Full_final, idents ="C")
Idents(Full_final_C) <- "celltypes_annot"
markers <- FindMarkers(Full_final_C, ident.1 = "GMP_2", ident.2 = "GMP_1")
write.csv(markers, file="GMP2vsGMP1_C.csv")
## B Cell Prog 1 vs B Cell Prog 2
setwd("/DATA/31/molteni/vexas_mouse_3/results/DE_BP1_BP2")
Idents(Full_final) <- "celltypes_annot"
markers <- FindMarkers(Full_final, ident.1 = "B_cell_progenitors_1", ident.2 = "B_cell_progenitors_2")
write.csv(markers, file="BP1vsBP2_Full.csv")
Idents(Full_final_A) <- "celltypes_annot"
markers <- FindMarkers(Full_final_A, ident.1 = "B_cell_progenitors_1", ident.2 = "B_cell_progenitors_2")
write.csv(markers, file="BP1vsBP2_A.csv")
Idents(Full_final_C) <- "celltypes_annot"
markers <- FindMarkers(Full_final_C, ident.1 = "B_cell_progenitors_1", ident.2 = "B_cell_progenitors_2")
write.csv(markers, file="BP1vsBP2_C.csv")
setwd("/DATA/31/molteni/vexas_mouse_3/results/DE_markers/AvsC/")
df <- list.files(path='/DATA/31/molteni/vexas_mouse_3/results/DE_markers/AvsC/') %>%
lapply(read.csv) %>%
bind_rows
#df <- df[df$p_val_adj<0.05,]
top50 <- df %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 0) %>%
slice_head(n = 50) %>%
ungroup() -> top50
write.csv(top50, file="top50_by_cluster.csv")
obj <- readRDS ("/DATA/31/molteni/vexas_mouse_3/results/Full/01-seurat/Full_final_annot_filt.rds")
assay = "RNA"
reduction <- toupper(paste0("umap.harmony.orig.ident"))
clu_res <- "celltypes_annot"
obj <- SetIdent(obj, value = clu_res)
# convert to scExperiment object
obj_sc <- as.SingleCellExperiment(obj)
out_dir <- "/DATA/31/molteni/vexas_mouse_3/results/pseudotime_filt2_FINAL/"
cell_pal <- function(cell_vars, pal_fun,...) {
if (is.numeric(cell_vars)) {
pal <- pal_fun(100, ...)
return(pal[cut(cell_vars, breaks = 100)])
} else {
categories <- sort(unique(cell_vars))
pal <- setNames(pal_fun(length(categories), ...), categories)
return(pal[cell_vars])
}
}
#my_cols <- hue_pal()(length(levels(obj_sc[[clu_res]])))
#my_cols <- colorRampPalette(pal_npg("nrc")(10))(16) # nature publishing group palette
my_cols <- colorRampPalette(brewer.pal(12, "Paired"))(length(levels(obj_sc[[clu_res]])))
my_cols <- brewer.pal(12, "Paired")[1:14] #inserire numero di cluster
names(my_cols) <- levels(obj_sc[[clu_res]])
#cell_colors_clust <- "black"
cell_colors_clust <- my_cols[obj_sc[[clu_res]]]
# Settings: resolution 1.2 harmony - starting point - cluster 3
namex <- toupper("umap.harmony.orig.ident")
scs <- slingshot(data = obj_sc,
reducedDim = reduction,
start.clus = "HSC_MPP",
extend = 'n',
clusterLabels = obj_sc[[clu_res]])
pal <- viridis::inferno(100,end = 0.95)
sl_name <- 'Slingshot'
pt <- slingPseudotime(scs)
head(pt)
#se clu_res non è un numero#####
# Assuming clu_res is a character vector with cluster assignments like "A", "B", "C", etc.
# Convert it to a factor
factor_clu_res <- as.factor(obj_sc[[clu_res]])
# Then convert the factor to numeric
# The levels will be assigned numbers in the order they appear in the factor
numeric_clu_res <- as.numeric(factor_clu_res)
# Now you can use numeric_clu_res to index into your colors
cell_colors_clust <- my_cols[numeric_clu_res]
#grafici
for (num_curve in seq(1, length(colnames(pt)))) {
lineagename <- paste('Curve',as.character(num_curve),sep='')
colors <- pal[cut(pt[,num_curve], breaks = 100)]
# Set the filename and create a PNG device
pdf(paste(out_dir, paste0(sl_name, "_branch_", num_curve, ".pdf"), sep = "/"), width = 1500/90, height = 750/90)
# Start a new plot
plot.new()
#grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 2))) # Adjusted layout grid to 1 row and 2 columns
# Pseudotime plot with adjusted margins
pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 1))
par(mar = c(5, 5, 5, 2) + 0.1) # Adjust margins here; default is c(5, 4, 4, 2) + 0.1
par(fig = gridFIG(), new = TRUE)
plot(reducedDim(scs, type = namex), col = colors,
pch = 16, cex = 1, main = "Pseudotime", cex.axis=2.0, cex.lab=2.0, cex.main=2.2) # Adjusted cex.lab for label size
lines(SlingshotDataSet(scs), type = 'lineages', show.constraints = TRUE, col = "black")
lines(slingCurves(scs)[[num_curve]], lwd = 2, col = 'red')
popViewport()
# Sample density plot with adjusted margins
pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 2))
par(mar = c(5, 5, 5, 2) + 0.1) # Adjust margins here; default is c(5, 4, 4, 2) + 0.1
par(fig = gridFIG(), new = TRUE)
ds <- list()
for (samp in unique(colData(scs)$sample_label)) {
ds[[samp]] <- density(pt[colData(scs)$sample_label == samp, num_curve], na.rm = TRUE)
}
# Setting the plot limits
x_min <- min(sapply(ds, function(x) min(x$x, na.rm = TRUE)))
x_max <- max(sapply(ds, function(x) max(x$x, na.rm = TRUE)))
y_min <- min(sapply(ds, function(x) min(x$y, na.rm = TRUE)))
y_max <- max(sapply(ds, function(x) max(x$y, na.rm = TRUE)))
xlim <- range(c(x_min, x_max))
ylim <- range(c(y_min, y_max))
plot(xlim, ylim, type = "n", xlab = "Pseudotime", ylab = "", main = paste0("Sample density on branch ", num_curve),
cex.axis=2.0, cex.lab=2.0, cex.main=2.2)
for (samp in names(ds)) {
polygon(c(min(ds[[samp]]$x), ds[[samp]]$x, max(ds[[samp]]$x)), c(0, ds[[samp]]$y, 0),
col = alpha(brewer.pal(length(names(ds)), "Set1")[which(names(ds) == samp)], alpha = .5))
}
legend("topright", legend = names(ds), fill = alpha(brewer.pal(length(names(ds)), "Set1"), alpha = .5), bty = "n", cex = 2.0)
popViewport()
# Close the graphical device
dev.off()
}
# HALLMARK GSEA
library('dplyr')
library('ggplot2')
library('reshape2')
library('RColorBrewer')
plot.dir="/DATA/31/molteni/vexas_mouse_4/results/heatmap/"
wdir="/DATA/31/molteni/vexas_mouse_4/results"
#
# #heatmap custom cluster
#
#
plot.dir="/DATA/31/molteni/vexas_mouse_4/results/heatmap/"
wdir="/DATA/31/molteni/vexas_mouse_4/results"
#custom
Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_4/results/Full/01-seurat/Full_final_annot.rds")
hallmark_ds <- c("A_B")
clusters <- unique(Full_final$celltypes_annot)
hallmark.full <- data.frame()
for (ds in hallmark_ds) {
for (i in clusters) {
s <- paste(ds,"markers_cluster", i,sep="_") #qui cambiare 1 con i e inserire ciclo for
ff <- paste(wdir, "GSEA", "AvsB",s, "tables", paste(s, "NA-gsea-h.all.v7.2.sym_mouse.txt", sep = "-"), sep = "/")
ds.t <- read.table(ff, header = T, sep = "\t")
ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
ds.t.filt$Dataset <- paste(ds,i,sep="_")
if (nrow(hallmark.full) == 0) {
hallmark.full <- ds.t.filt
} else {
hallmark.full <- rbind(hallmark.full, ds.t.filt)
}
}
}
hallmark.full.filt <- hallmark.full[, c("ID", "NES", "Dataset","p.adjust")]
##
all_hallmark <- hallmark.full.filt
all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", "")) # Create column of significance labels
all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
level_order <- c("A_B_HSC_MPP", "A_B_CMP", "A_B_GMP", "A_B_GMP_MEP", "A_B_Myelocytes", "A_B_Neutrophils",
"A_B_MEP_Baso_Mast_cells", "A_B_Plasmacytoid_DC_prog", "A_B_Conventional_DC_prog", "A_B_Proliferating_myeloid_cells",
"A_B_Pro_monocyte", "A_B_Monocytes", "A_B_Dendritic_cells", "A_B_Macrophages", "A_B_Erythroid_cells",
"A_B_Undefined_1", "A_B_Undefined_2")
all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
theme_bw(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.text.y=element_text(size=6))+
geom_tile(aes(fill = NES), colour = "white") +
geom_text(aes(label = STARS), color="black", size=3) +
scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
limits = c(-4.5, 4.5),
na.value = "grey"
#guide = guide_colourbar(reverse = TRUE)
) +
ylab("") +
xlab("") +
coord_fixed(ratio = 0.4)
ggsave(filename = paste(plot.dir, paste("hallmark_AvsB", "heatmap.pdf", sep = "-"), sep = "/"),
plot = p.hallmark,
width = 10, height = 10, dpi = 600)
library(Seurat)
library(dplyr)
Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_4/results/Full/01-seurat/Full_final.rds")
# #########################################################################################################################
# # computation on custom annotation
#
Full_final <- readRDS("/DATA/31/molteni/vexas_mouse_4/results/Full/01-seurat/Full_final_annot.rds")
setwd("/DATA/31/molteni/vexas_mouse_4/results/DE_markers/AvsB/")
clusters <- unique(Full_final@meta.data$celltypes_annot)
for(i in clusters) {
markers <- FindMarkers(Full_final, ident.1 = "Pool_A_m", group.by = "orig.ident", subset.ident = i)
markers['cluster'] <- i
filename=paste("markers_cluster_",i,".csv", sep="")
write.csv(markers, file=filename)
}
df <- list.files(path="/DATA/31/molteni/vexas_mouse_4/results/DE_markers/AvsB") %>%
lapply(read.csv) %>%
bind_rows
df <- df[df$p_val_adj<0.05,]
setwd("/DATA/31/molteni/vexas_mouse_4/results/summary/")
write.csv(df, file="PoolA_mvsPoolB_m_sigMarkers.csv")
cells_by_sample <- table(Full_final@meta.data$celltypes_annot, Full_final@meta.data$orig.ident)
write.csv(cells_by_sample, file="cells_bysample.csv")
cc_distribution <-as.data.frame (table(Full_final$Phase, Full_final$orig.ident, Full_final$celltypes_annot))
colnames(cc_distribution) <- c("Phase","orig.ident","cluster","CellNumb")
write.csv(cc_distribution, file="cell_cycle.csv")
# HALLMARK GSEA
library('dplyr')
library('ggplot2')
library('reshape2')
library('RColorBrewer')
plot.dir="/DATA/31/molteni/vexas_sorted_4/results/heatmap/"
wdir="/DATA/31/molteni/vexas_sorted_4/results"
# #heatmap custom cluster
#
Full_final <- readRDS("/DATA/31/molteni/vexas_sorted_4/results/Full/01-seurat/Full_final_annot.rds")
hallmark_ds <- c("A_B")
clusters <- unique(Full_final$celltypes_annot)
hallmark.full <- data.frame()
for (ds in hallmark_ds) {
for (i in clusters) {
s <- paste(ds,"markers_cluster", i,sep="_") #qui cambiare 1 con i e inserire ciclo for
ff <- paste(wdir, "GSEA", "AvsB",s, "tables", paste(s, "NA-gsea-h.all.v7.2.symbols.txt", sep = "-"), sep = "/")
ds.t <- read.table(ff, header = T, sep = "\t")
ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalues"), drop = FALSE]
ds.t.filt$Dataset <- paste(ds,i,sep="_")
if (nrow(hallmark.full) == 0) {
hallmark.full <- ds.t.filt
} else {
hallmark.full <- rbind(hallmark.full, ds.t.filt)
}
}
}
hallmark.full.filt <- hallmark.full[, c("ID", "NES", "Dataset","p.adjust")]
##
all_hallmark <- hallmark.full.filt
all_hallmark$star <- cut(all_hallmark$p.adjust, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", "")) # Create column of significance labels
all_hallmark$ID <- gsub(x = all_hallmark$ID, "HALLMARK_", "")
hallmark.order <- all_hallmark %>% group_by(ID) %>% summarise(Pos = sum(NES))
hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE]
all_hallmark.nn <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="NES"), id.vars = c("ID"))
colnames(all_hallmark.nn) <- c("ID", "Dataset", "NES")
all_hallmark.pp <- melt(reshape2::dcast(all_hallmark, ID ~ Dataset, value.var="star"), id.vars = c("ID"))
colnames(all_hallmark.pp) <- c("ID", "Dataset", "STARS")
all_hallmark.tt <- merge(all_hallmark.nn,all_hallmark.pp)
all_hallmark.tt$ID <- factor(all_hallmark.tt$ID, levels = hallmark.order.terms$ID)
level_order <- c("A_B_Erythroid_cells", "A_B_Plasmacytoid_DC", "A_B_Conv_DC1", "A_B_Conv_DC2", "A_B_Monocytes",
"A_B_GMP", "A_B_MEP_Baso_Mast", "A_B_HSC_MPP", "A_B_Cycling_prog", "A_B_Cycling_lymph_prog", "A_B_PreBNK",
"A_B_ProB_cells_1","A_B_ProB_cells_2","A_B_ProB_cells_3", "A_B_Immature_B_cells", "A_B_Undefined1", "A_B_Undefined2","A_B_Undefined3")
all_hallmark.tt$Dataset <- factor(all_hallmark.tt$Dataset, levels =level_order )
p.hallmark <- ggplot(all_hallmark.tt, aes(x = Dataset, y = ID)) +
theme_bw(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.text.y=element_text(size=6))+
geom_tile(aes(fill = NES), colour = "white") +
geom_text(aes(label = STARS), color="black", size=3) +
scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
limits = c(-5, 5),
na.value = "grey"
#guide = guide_colourbar(reverse = TRUE)
) +
ylab("") +
xlab("") +
coord_fixed(ratio = 0.4)
ggsave(filename = paste(plot.dir, paste("hallmark_AvsB", "heatmap.pdf", sep = "-"), sep = "/"),
plot = p.hallmark,
width = 10, height = 8, dpi = 600)
library(Seurat)
library(dplyr)
Full_final <- readRDS("/DATA/31/molteni/vexas_sorted_4/results/Full/01-seurat/Full_final.rds")
# computation on cluster numbers
# res 0.6
setwd("/DATA/31/molteni/vexas_sorted_4/results/DE_markers_06/")
Idents(Full_final)<-"RNA_snn_h.orig.ident_res.0.6"
clusters <- levels(Full_final@meta.data$RNA_snn_h.orig.ident_res.0.6)
for(i in 0:length(clusters)) {
markers <- FindMarkers(Full_final, ident.1 = "Pool_A_h", group.by = "orig.ident", subset.ident = i)
markers['cluster'] <- i
filename=paste("markers_cluster_",i,".csv", sep="")
write.csv(markers, file=filename)
}
df <- list.files(path='/DATA/31/molteni/vexas_sorted_4/results/DE_markers_06/') %>%
lapply(read.csv) %>%
bind_rows
df <- df[df$p_val_adj<0.05,]
setwd("/DATA/31/molteni/vexas_sorted_4/results/summary/")
write.csv(df, file="PoolA_hvsPoolB_h_sigMarkers_06.csv")
cells_by_sample <- table(Full_final@meta.data$RNA_snn_h.orig.ident_res.0.6, Full_final@meta.data$orig.ident)
write.csv(cells_by_sample, file="cells_bysample.csv")
cc_distribution <-as.data.frame (table(Full_final$Phase, Full_final$orig.ident, Full_final$RNA_snn_h.orig.ident_res.0.6))
colnames(cc_distribution) <- c("Phase","orig.ident","cluster","CellNumb")
write.csv(cc_distribution, file="cell_cycle_06.csv")
### res 1.2
setwd("/DATA/31/molteni/vexas_sorted_4/results/DE_markers_12/")
Idents(Full_final)<-"RNA_snn_h.orig.ident_res.1.2"
clusters <- levels(Full_final@meta.data$RNA_snn_h.orig.ident_res.1.2)
for(i in clusters) {
cells_group1 = sum(Full_final$orig.ident == "Pool_A_h" & Full_final$RNA_snn_h.orig.ident_res.1.2 == i)
cells_group2 = sum(Full_final$orig.ident == "Pool_B_h" & Full_final$RNA_snn_h.orig.ident_res.1.2 == i)
if(cells_group1 >= 3 & cells_group2 >= 3 ) {
markers <- FindMarkers(Full_final, ident.1 = "Pool_A_h", group.by = "orig.ident", subset.ident = i)
markers['cluster'] <- i
filename=paste("markers_cluster_",i,".csv", sep="")
write.csv(markers, file=filename)
}
else {
print(paste("Skipping comparison for", i, "due to insufficient cell count in one or both groups."))
}
}
df <- list.files(path='/DATA/31/molteni/vexas_sorted_4/results/DE_markers_12/') %>%
lapply(read.csv) %>%
bind_rows
df <- df[df$p_val_adj<0.05,]
setwd("/DATA/31/molteni/vexas_sorted_4/results/summary/")
write.csv(df, file="PoolA_hvsPoolB_h_sigMarkers_12.csv")
cells_by_sample <- table(Full_final@meta.data$RNA_snn_h.orig.ident_res.1.2, Full_final@meta.data$orig.ident)
write.csv(cells_by_sample, file="cells_bysample_12.csv")
cc_distribution <-as.data.frame (table(Full_final$Phase, Full_final$orig.ident, Full_final$RNA_snn_h.orig.ident_res.1.2))
colnames(cc_distribution) <- c("Phase","orig.ident","cluster","CellNumb")
write.csv(cc_distribution, file="cell_cycle_12.csv")
###############################################################################################################
# computation on custom annotation
setwd("/DATA/31/molteni/vexas_sorted_4/results/DE_markers/AvsB/")
Full_final <- readRDS("/DATA/31/molteni/vexas_sorted_4/results/Full/01-seurat/Full_final_annot.rds")
clusters <- unique(Full_final@meta.data$celltypes_annot)
for(i in clusters) {
markers <- FindMarkers(Full_final, ident.1 = "Pool_A_h", group.by = "orig.ident", subset.ident = i)
markers['cluster'] <- i
filename=paste("markers_cluster_",i,".csv", sep="")
write.csv(markers, file=filename)
}
df <- list.files(path='/DATA/31/molteni/vexas_sorted_4/results/DE_markers/AvsB/') %>%
lapply(read.csv) %>%
bind_rows
df <- df[df$p_val_adj<0.05,]
setwd("/DATA/31/molteni/vexas_sorted_4/results/summary/")
write.csv(df, file="PoolA_hvsPoolB_h_sigMarkers.csv")
cells_by_sample <- table(Full_final@meta.data$celltypes_annot, Full_final@meta.data$orig.ident)
write.csv(cells_by_sample, file="cells_bysample.csv")
cc_distribution <-as.data.frame (table(Full_final$Phase, Full_final$orig.ident, Full_final$celltypes_annot))
colnames(cc_distribution) <- c("Phase","orig.ident","cluster","CellNumb")
write.csv(cc_distribution, file="cell_cycle.csv")
obj <- readRDS("/DATA/31/molteni/vexas_sorted_4/results/Full/01-seurat/Full_final_annot.rds")
Idents(obj) <- "celltypes_annot"
assay = "RNA"
reduction <- toupper(paste0("umap.harmony.orig.ident"))
clu_res <- "celltypes_annot"
obj <- SetIdent(obj, value = clu_res)
# convert to scExperiment object
obj_sc <- as.SingleCellExperiment(obj)
out_dir <- "/DATA/31/molteni/vexas_sorted_4/results/pseudotime"
cell_pal <- function(cell_vars, pal_fun,...) {
if (is.numeric(cell_vars)) {
pal <- pal_fun(100, ...)
return(pal[cut(cell_vars, breaks = 100)])
} else {
categories <- sort(unique(cell_vars))
pal <- setNames(pal_fun(length(categories), ...), categories)
return(pal[cell_vars])
}
}
my_cols <- colorRampPalette(brewer.pal(12, "Paired"))(length(levels(obj_sc[[clu_res]])))
my_cols <- brewer.pal(12, "Paired")[1:14] #inserire numero di cluster
names(my_cols) <- levels(obj_sc[[clu_res]])
cell_colors_clust <- my_cols[obj_sc[[clu_res]]]
# Settings: resolution 1.2 harmony - starting point - cluster 3
namex <- toupper("umap.harmony.orig.ident")
scs <- slingshot(data = obj_sc,
reducedDim = reduction,
start.clus = "HSC_MPP",
extend = 'n',
clusterLabels = obj_sc[[clu_res]])
pal <- viridis::inferno(100,end = 0.95)
sl_name <- 'Slingshot'
pt <- slingPseudotime(scs)
head(pt)
#se clu_res non è un numero#####
# Assuming clu_res is a character vector with cluster assignments like "A", "B", "C", etc.
# Convert it to a factor
factor_clu_res <- as.factor(obj_sc[[clu_res]])
# Then convert the factor to numeric
# The levels will be assigned numbers in the order they appear in the factor
numeric_clu_res <- as.numeric(factor_clu_res)
# Now you can use numeric_clu_res to index into your colors
cell_colors_clust <- my_cols[numeric_clu_res]
#graphics
for (num_curve in seq(1, length(colnames(pt)))) {
lineagename <- paste('Curve',as.character(num_curve),sep='')
colors <- pal[cut(pt[,num_curve], breaks = 100)]
# Set the filename and create a PDF device
pdf(paste(out_dir, paste0(sl_name, "_branch_", num_curve, ".pdf"), sep = "/"), width = 1500/90, height = 750/90)
# Start a new plot
plot.new()
#grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 2))) # Adjusted layout grid to 1 row and 2 columns
# Pseudotime plot with adjusted margins
pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 1))
par(mar = c(5, 5, 5, 2) + 0.1) # Adjust margins here; default is c(5, 4, 4, 2) + 0.1
par(fig = gridFIG(), new = TRUE)
plot(reducedDim(scs, type = namex), col = colors,
pch = 16, cex = 1, main = "Pseudotime", cex.axis=2.0, cex.lab=2.0, cex.main=2.2) # Adjusted cex.lab for label size
lines(SlingshotDataSet(scs), type = 'lineages', show.constraints = TRUE, col = "black")
lines(slingCurves(scs)[[num_curve]], lwd = 2, col = 'red')
popViewport()
# Sample density plot with adjusted margins
pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 2))
par(mar = c(5, 5, 5, 2) + 0.1) # Adjust margins here; default is c(5, 4, 4, 2) + 0.1
par(fig = gridFIG(), new = TRUE)
ds <- list()
for (samp in unique(colData(scs)$sample_label)) {
ds[[samp]] <- density(pt[colData(scs)$sample_label == samp, num_curve], na.rm = TRUE)
}
# Setting the plot limits
x_min <- min(sapply(ds, function(x) min(x$x, na.rm = TRUE)))
x_max <- max(sapply(ds, function(x) max(x$x, na.rm = TRUE)))
y_min <- min(sapply(ds, function(x) min(x$y, na.rm = TRUE)))
y_max <- max(sapply(ds, function(x) max(x$y, na.rm = TRUE)))
xlim <- range(c(x_min, x_max))
ylim <- range(c(y_min, y_max))
plot(xlim, ylim, type = "n", xlab = "Pseudotime", ylab = "", main = paste0("Sample density on branch ", num_curve),
cex.axis=2.0, cex.lab=2.0, cex.main=2.2)
for (samp in names(ds)) {
polygon(c(min(ds[[samp]]$x), ds[[samp]]$x, max(ds[[samp]]$x)), c(0, ds[[samp]]$y, 0),
col = alpha(brewer.pal(length(names(ds)), "Set1")[which(names(ds) == samp)], alpha = .5))
}
legend("topright", legend = names(ds), fill = alpha(brewer.pal(length(names(ds)), "Set1"), alpha = .5), bty = "n", cex = 2.0)
popViewport()
# Close the graphical device
dev.off()
}
library(UCell)
library(Seurat)
library(ggplot2)
library(dplyr)
library(tidyr)
### load signatures
vexas_50 <- read.gmt("/DATA/31/molteni/vexas_bm/reference/custom_vexas_50.gmt")
vexas_100 <- read.gmt("/DATA/31/molteni/vexas_bm/reference/custom_vexas_100.gmt")
vexas_signature <- list(vexas_50 = vexas_50[[2]],
vexas_100 = vexas_100[[2]])
# load dataset
query <- readRDS("/DATA/31/molteni/vexas_bm/results/integration/pc15/BM.combo_anno.rds")
### UCell
# compute modules
names(vexas_signature) <- paste0("VEXAS_Xenograft_sig", c("50", "100"))
ncol <- ncol(query@meta.data)
query <- AddModuleScore_UCell(query, features = vexas_signature)
colnames(query@meta.data) <- c(colnames(query@meta.data)[seq_len(ncol)], names(vexas_signature))
# wilcoxon test
x <- melt(data = query@meta.data, id.vars = c("condition", "celltypes_annot"), measure.vars = c("VEXAS_Xenograft_sig50"))
t_predpop <- x %>% dplyr::group_by(variable, celltypes_annot) %>% dplyr::summarise(pvalue = wilcox.test(x = value[condition == "PZ"], y = value[condition == "HD"])$p.value)
t_predpop$bonferroni_adjpvalue <- p.adjust(p = t_predpop$pvalue)
t_predpop <- t_predpop[order(t_predpop$bonferroni_adjpvalue, decreasing = F),]
write.table(x = t_predpop, file = "VexasXenograftSignatures_SeuratAnnotatedClustersWilcoxonTest_VEXASvsHD_sig50.txt", sep = "\t", row.names = F, col.names = T, quote = F)
# violin plots: VEXAS vs HD
t_predpop$sig_label <- ifelse(t_predpop$bonferroni_adjpvalue > 0.05, "ns",
ifelse(t_predpop$bonferroni_adjpvalue < 0.05, "*",
ifelse(t_predpop$bonferroni_adjpvalue < 0.01, "**",
ifelse(t_predpop$bonferroni_adjpvalue < 0.001, "***", "***"))))
g <- query@meta.data %>%
ggplot() +
theme_classic() +
geom_violin(aes(x = celltypes_annot, y = VEXAS_Xenograft_sig50, fill = condition), scale = "width") +
geom_text(data = t_predpop, aes(x = celltypes_annot, y = 0.7, label = sig_label)) +
theme(plot.title = element_text(hjust = 0.5, size = 10),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "right") +
ylab("UCell Module score") + xlab("") +
ggtitle("VEXAS xenograft signature (n = 50)") +
theme(axis.text.x = element_text(angle = 45, , vjust = 1, hjust = 1)) +
scale_fill_manual(values = adjustcolor(col = c("#F8766D", "#02818a"), alpha.f = 0.8), name = "")
ggsave(g,
filename = "ViolinPlots_VexasXenograftSignatureTop50.pdf",
width = length(unique(query$celltypes_annot))*5*0.1,
height = 5,
limitsize = FALSE)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment