library(Seurat) library(ggplot2) library(harmony) library(RColorBrewer) library(plotly) library(htmlwidgets) library(ComplexHeatmap) library(openxlsx) library(dplyr) library(future) library(gridExtra) library(SeuratWrappers) ## Wu et al. public data (GSE196052) countsData_bm <- read.delim(file = "/DATA/31/molteni/vexas_sc/public/GSE196052_dataCount_BM.csv", header = TRUE, sep = ",", row.names = 1) sampleData <- read.delim(file = "/DATA/31/molteni/vexas_sc/public/BM_metaDatatSNECellType_ALiceManual.csv", header = TRUE, sep = ",") sampleData <- sampleData[,c(1:6)] obj_BM <- CreateSeuratObject(counts = countsData_bm) dataset_numbers <- sub(".*\\.", "", colnames(obj_BM)) padded_numbers <- ifelse(as.numeric(dataset_numbers) < 10, paste0("0", dataset_numbers), dataset_numbers) final_dataset_labels <- paste0("BM-PU-", padded_numbers) obj_BM$dataset <- final_dataset_labels obj_BM@meta.data$subject <- sampleData$subject obj_BM@meta.data$orig.ident <- obj_BM@meta.data$subject obj_BM@meta.data$sample_info <- "public" obj_BM@meta.data$subject <- NULL obj_BM@meta.data$dataset <- NULL ## our vexas data BM_minimal <- readRDS("/DATA/31/molteni/vexas_bm/results/Full/Full_minimal.rds") BM_minimal@meta.data$scDblFinder_class <- NULL BM_minimal@meta.data$scds_class <- NULL BM_minimal@meta.data$RNA_Source <- NULL BM_minimal@meta.data$sample_info <- "vexas" BM.combo <- merge(BM_minimal, y = obj_BM, add.cell.ids = c("vexas", "vexas_public")) # normalize data and find variable features # select only 2000 genes as variable varfeatures <- 2000 BM.combo <- NormalizeData(object = BM.combo) BM.combo <- FindVariableFeatures(BM.combo, selection.method = "vst", nfeatures = varfeatures, verbose = T) # scaling reg_vars = c("percent.mt", "nCount_RNA") BM.combo <- ScaleData(object = BM.combo, vars.to.regress = reg_vars, display.progress = T, features = rownames(BM.combo)) saveRDS(BM.combo, file = "combined_preIntegration_scaled.rds") BM.combo <- readRDS("/DATA/31/molteni/vexas_bm/results/integration/combined_preIntegration_scaled.rds") #PCA max_pca <- 70 BM.combo <- RunPCA(object = BM.combo, features = VariableFeatures(object = BM.combo), npcs = max_pca, reduction.name="pca", reduction.key="PC_") BM.combo@meta.data$condition <- NA BM.combo@meta.data$condition <- with(BM.combo@meta.data, ifelse(BM.combo@meta.data$orig.ident == "BM-01","PZ", ifelse(BM.combo@meta.data$orig.ident == "BM-02","PZ", ifelse(BM.combo@meta.data$orig.ident == "BM-03","PZ", ifelse(BM.combo@meta.data$orig.ident == "BM-04","PZ", ifelse(BM.combo@meta.data$orig.ident == "BM-08","PZ", ifelse(BM.combo@meta.data$orig.ident == "BM-09","PZ", ifelse(BM.combo@meta.data$orig.ident == "PT1","PZ", ifelse(BM.combo@meta.data$orig.ident == "PT2","PZ", ifelse(BM.combo@meta.data$orig.ident == "PT3","PZ", ifelse(BM.combo@meta.data$orig.ident == "PT4","PZ", ifelse(BM.combo@meta.data$orig.ident == "PT5","PZ", ifelse(BM.combo@meta.data$orig.ident == "PT6","PZ", ifelse(BM.combo@meta.data$orig.ident == "PT7","PZ", ifelse(BM.combo@meta.data$orig.ident == "PT8","PZ", ifelse(BM.combo@meta.data$orig.ident == "PT9","PZ", ifelse(BM.combo@meta.data$orig.ident == "HD1","HD", ifelse(BM.combo@meta.data$orig.ident == "HD2","HD", ifelse(BM.combo@meta.data$orig.ident == "HD3","HD", ifelse(BM.combo@meta.data$orig.ident == "HD4","HD", "NA")))))))))))))))))))) integration.var <- c("orig.ident", "sample_info", "condition") ## + condition BM.combo <- RunHarmony(object = BM.combo, group.by.vars = integration.var, max.iter.harmony = 30, plot_convergence = FALSE, reduction.save = "harmony") #PC = 15 setwd("/DATA/31/molteni/vexas_bm/results/integration/pc15") BM.combo <- RunUMAP(object = BM.combo, seed.use = 123, dims = 1:15, reduction = "harmony", reduction.name = "harmony_umap", reduction.key = "UMAPh_", return.model = TRUE) g <- DimPlot(BM.combo, split.by = "orig.ident", group.by = "sample_info", reduction = "harmony_umap", raster = F, ncol = 6) + ggtitle("UMAP post-harmony") ggsave(g, filename = 'UMAP_PostHarmony_donor_cond.png', width = 12, height = 6) # split by Idents(BM.combo) <- "sample_info" g <- DimPlot(BM.combo, split.by = "sample_info", reduction = "harmony_umap") + ggtitle("UMAP post-harmony") ggsave(g, filename = 'UMAP_PostHarmony_batch_cond2.png', width = 7, height = 6) Idents(BM.combo) <- "condition" g <- DimPlot(BM.combo, split.by = "condition", reduction = "harmony_umap") + ggtitle("UMAP post-harmony") ggsave(g, filename = 'UMAP_PostHarmony_cond_2.png', width = 7, height = 6) #PC=15 BM.combo <- FindNeighbors(object = BM.combo, dims = 1:15, force.recalc = T, reduction = "harmony", graph.name = c("RNA_nn_h.", "RNA_snn_h.")) BM.combo <- FindClusters(object = BM.combo, graph.name = "RNA_snn_h.", resolution = 0.4) BM.combo <- FindClusters(object = BM.combo, graph.name = "RNA_snn_h.", resolution = 0.6) BM.combo <- FindClusters(object = BM.combo, graph.name = "RNA_snn_h.", resolution = 0.8) # SPLIT CLUSTER 20 OF RES=0.8 Idents(BM.combo) <- "RNA_snn_h._res.0.8" BM.combo <- FindSubCluster(BM.combo, cluster= "20", graph.name = "RNA_snn_h.", subcluster.name = "subcluster", resolution = 0.05) table(BM.combo$subcluster) Idents(BM.combo) <- "subcluster" mapping_vector <- c("20_0" = "20", "20_1" = "25") # Update the reference object for(old_ident in names(mapping_vector)) { BM.combo$subcluster[Idents(BM.combo) == old_ident] <- mapping_vector[old_ident] } BM.combo$RNA_snn_h._res.0.8 <- BM.combo$subcluster BM.combo@misc$markers <- FindAllMarkers(object = BM.combo, min.pct = 0.2, logfc.threshold = 0.25, only.pos = FALSE, min.cells.group = 10, test.use = "wilcox", return.thresh = 0.05, fc.name = "avg_logFC") markers_h <- list(BM.combo@misc$markers_h) names(markers_h) <- "RNA_snn_h._res.0.8" BM.combo@misc$markers_h <- markers_h saveRDS(BM.combo, file = "/DATA/31/molteni/vexas_bm/results/integration/pc15/BM.combo_final.rds") #custom cluster annotation Idents(BM.combo) <- "RNA_snn_h._res.0.8" # mapping vector new.cluster.ids <- c("CD8_T_cells","Early_erythroid","CD14+_monocytes_1","CD4_T_cells", "GP","Immature_Neutrophil","HSC_MPP","VEXAS_NK_cells","B_cells","MEP", "Erythroid_progenitors","Mid_erythroid_cells","Prolif_GP","VEXAS_GP", "CD14+_monocytes_2","Early_erythroid","Plasma_cells","Late_erythroid_cells", "CD14+_CD16_low_monocytes","Myeloid_dendritic_cells","Plasmacytoid_dentritic_cells", "CD8_T_cells","MLP","MEP_BASO_MAST_prog","Endothelial_cells","B_cells") cluster_mapping <- setNames(new.cluster.ids, c(1:length(new.cluster.ids))) BM.combo$celltypes_annot <- new.cluster.ids[as.numeric(as.character(BM.combo$RNA_snn_h._res.0.8)) + 1] Idents(BM.combo) <- "celltypes_annot" ggsave("UMAP_label.png", width = 10, height = 7) DimPlot(BM.combo, reduction = "harmony_umap", label = TRUE, label.box = TRUE, pt.size = 0.5, repel = TRUE) + NoLegend() saveRDS(BM.combo, file = "/DATA/31/molteni/vexas_bm/results/integration/pc15/BM.combo_anno.rds")