### upload on gitlab # 1) load R packages library(Seurat) library(GEOquery) library(limma) library(dplyr) library(data.table) library(clusterProfiler) library(ggplot2) library(RColorBrewer) library(qusage) library(UCell) # 2) define input and output paths # set path to the working directory path <- "" # 3) load scRNA-seq BM-object ## 3.1) restrict analysis to CD34+ clusters obj <- readRDS(file = paste0(path, "BM_20240222.rds")) CD34p_clusters <- as.character(c(2, 8, 14, 15, 16, 19, 20, 26, 27, 30, 33)) obj$seurat <- obj$RNA_snn_h.orig.ident_res.1.8 obj <- SetIdent(object = obj, value = "seurat") obj <- subset(obj, subset = (seurat %in% CD34p_clusters)) saveRDS(object = obj, file = paste0(path, "BM_CD34p.rds")) ## 3.2) free memory space l <- ls() l <- l[!l %in% "path"] rm(list = l); gc() # 4) load published scRNA-seq dataset ## 4.1) Ainciburu et al. ### 4.1.1) download tar file and metadata destfolder <- paste0(path, "GSE180298/"); dir.create(path = destfolder, showWarnings = F, recursive = T) destfile <- paste0(destfolder, "GSE180298_RAW.tar") download.file(url = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE180298&format=file", destfile = destfile) metadata_files <- c("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE180298&format=file&file=GSE180298%5Felderly%5Fmetadata%2Etxt%2Egz", "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE180298&format=file&file=GSE180298%5Fmds%5Fmetadata%2Etxt%2Egz", "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE180298&format=file&file=GSE180298%5Fyoung%5Fmetadata%2Etxt%2Egz") download.file(url = metadata_files, destfile = paste0(destfolder, "GSE180298_", c("elderly", "mds", "young"), "_metadata.txt.gz")) ### 4.1.2) untar file untar(tarfile = destfile, exdir = paste0(destfolder, "GSE180298_RAW/")) files <- list.files(path = paste0(destfolder, "GSE180298_RAW/"), full.names = T) md <- list.files(path = destfolder, full.names = T) md_files <- md[grepl(md, pattern = "metadata")] ### 4.1.3) define seurat object metadata <- c() for(i in seq_len(length(md_files))){ gunzip(md_files[i], remove = FALSE, overwrite = TRUE) unzfile <- gsub(md_files[i], pattern = ".gz", replacement = "") temp <- read.delim(file = unzfile, header = T, sep = "\t") metadata <- rbind(metadata, temp) } metadata <- as.data.frame(metadata) metadata$orig.ident <- strsplit2(rownames(metadata), split = "\\_")[, 2] counts <- cell_counts <- samples <- c() for(i in seq_len(length(files))){ split_filename <- strsplit2(files[i], split = "/") sampleid <- strsplit2(split_filename[, ncol(split_filename)], split = "\\_")[, 2] print(paste0("Processing file ", sampleid, " (", i, " over ", length(files), ")")) # load h5 object temp <- Read10X_h5(filename = files[i], use.names = TRUE, unique.features = TRUE) coln <- paste0(strsplit2(colnames(temp), split = "-")[, 1], "_", sampleid) colnames(temp) <- coln # store temp_object <- CreateSeuratObject(counts = temp) counts_temp <- matrix(data = temp_object@assays$RNA@counts, ncol = ncol(temp_object)) rownames(counts_temp) <- rownames(temp_object) colnames(counts_temp) <- colnames(temp_object) # store info cell_counts <- c(cell_counts, ncol(counts_temp)) samples <- c(samples, sampleid) if(is.null(nrow(counts))){ counts <- counts_temp gs <- rownames(counts) }else{ gs <- intersect(gs, rownames(counts_temp)) counts <- cbind(counts[match(gs, rownames(counts)), ], counts_temp[match(gs, rownames(counts_temp)), ]) } } names(cell_counts) <- samples md <- metadata[rownames(metadata) %in% colnames(counts),]; dim(md) m <- match(rownames(md), colnames(counts)); table(is.na(m)) counts <- counts[, m] GSE180298 <- CreateSeuratObject(counts = counts, meta.data = metadata) ### 4.1.4) select only elderly donors GSE180298 <- SetIdent(object = GSE180298, value = "orig.ident") elderly_sampleid <- unique(GSE180298$orig.ident)[grepl(unique(GSE180298$orig.ident), pattern = "elderly")] GSE180298 <- subset(GSE180298, subset = (orig.ident %in% elderly_sampleid)) saveRDS(object = GSE180298, file = paste0(path, "GSE180298_elderly.rds")); gc() ## 4.1.5) free memory space l <- ls() l <- l[!l %in% "path"] rm(list = l); gc() ## 4.2) Wu et al. ### 4.2.1) download tar file destfolder <- paste0(path, "GSE196052/"); dir.create(path = destfolder, showWarnings = F, recursive = T) destfile <- paste0(destfolder, "GSE196052_RAW.tar") download.file(url = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE196052&format=file", destfile = destfile) metadata_files <- c("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE196052&format=file&file=GSE196052%5FdataCount%5FCD34%2Ecsv%2Egz") download.file(url = metadata_files, destfile = paste0(destfolder, "GSE196052_dataCount_CD34.csv.gz")) ### 4.2.2) untar file untar(tarfile = destfile, exdir = paste0(destfolder, "GSE196052_RAW/")) files_annotation <- list.files(path = paste0(destfolder, "GSE196052_RAW/"), full.names = T) files <- list.files(path = destfolder, full.names = T) ### 4.2.3) define seurat object md <- read.delim(file = files[grepl(files, pattern = "SraRun")], header = T, sep = ",") table(md$subject_id.status, md$tissue.cell_type) mdfiles <- files_annotation[grepl(files_annotation, pattern = "GSM")] metadata <- c() for(i in seq_len(length(mdfiles))){ x <- mdfiles[i] temp <- read.delim(gzfile(x), header = T, sep = ",") metadata <- rbind(metadata, temp) } f <- files[grepl(files, pattern = "CD34.csv.gz")] counts_cd34 <- fread(f) %>% as.data.frame() rn <- as.character(counts_cd34[, 1]) counts_cd34 <- counts_cd34[, -1] rownames(counts_cd34) <- rn cd34_cells <- colnames(counts_cd34) cd34_cells <- gsub(x = cd34_cells, pattern = "CD34_", replacement = "") wu_annot <- read.csv(file = paste0(destfolder, "CD34_metaDatatSNECellType_ALiceManual.csv"), header = T) m <- match(cd34_cells, wu_annot$orig.ident); table(is.na(m)) status <- wu_annot$group[m] sampleid <- wu_annot$subject[m] mdt <- data.frame(sampleid, status) rownames(mdt) <- colnames(counts_cd34) GSE196052 <- CreateSeuratObject(counts = counts_cd34, meta.data = mdt) GSE196052 <- SetIdent(object = GSE196052, value = "orig.ident") saveRDS(object = GSE196052, file = paste0(path, "GSE196052.rds")) ## 4.2.4) free memory space l <- ls() l <- l[!l %in% "path"] rm(list = l); gc() # 5) Define single seurat object ## 5.1) load datasets fiumara <- readRDS(paste0(path, "BM_CD34p.rds")) GSE180298 <- readRDS(paste0(path, "GSE180298_elderly.rds")) GSE196052 <- readRDS(paste0(path, "GSE196052.rds")) ## 5.2) define common features genes <- table(c(rownames(fiumara), rownames(GSE180298), rownames(GSE196052))) common_genes <- names(genes)[genes == 3]; length(common_genes) ## 5.3) add sampleinfo fiumara$source <- "fiumara" fiumara$sample_info <- paste0("fiumara_", fiumara$orig.ident) GSE180298$source <- "GSE180298" GSE180298$sample_info <- paste0("GSE180298_", GSE180298$orig.ident) GSE196052$source <- "GSE196052" GSE196052$sample_info <- paste0("GSE196052_", GSE196052$sampleid) ## 5.4) extract counts relative to these genes and define Seurat object counts <- cbind(fiumara@assays$RNA@counts[common_genes,], GSE180298@assays$RNA@counts[common_genes,], GSE196052@assays$RNA@counts[common_genes,]) vars <- c("source", "sample_info", "nCount_RNA", "nFeature_RNA") md <- rbind(fiumara@meta.data[, vars], GSE180298@meta.data[, vars], GSE196052@meta.data[, vars]) obj <- CreateSeuratObject(counts = counts, meta.data = md) obj$percent.mt <- (colSums(obj@assays$RNA@counts[grepl(rownames(obj), pattern = "^MT-"),])/colSums(obj@assays$RNA@counts))*100 obj$percent.rb <- (colSums(obj@assays$RNA@counts[grepl(rownames(obj), pattern = "^RPL"),])/colSums(obj@assays$RNA@counts))*100 saveRDS(object = obj, file = paste0(path, "combined.rds")); gc() ## 5.5) free memory space l <- ls() l <- l[!l %in% c("path", "obj")] rm(list = l); gc() # 6) removing low quality cells, normalization, scaling, integration and clustering ## 6.1) cell filtering obj$keep <- (obj$nFeature_RNA > 200) & (obj$percent.mt < 25) table(obj$keep, obj$source) obj <- subset(obj, subset = (keep %in% TRUE)) saveRDS(obj, file = paste0(path, "combined_filtered.rds")) ## 6.2) normalization varfeatures <- 1000 obj <- NormalizeData(object = obj) obj <- FindVariableFeatures(obj, selection.method = "vst", nfeatures = varfeatures, verbose = T) ## 6.3) scaling reg_vars = c("percent.mt", "nCount_RNA") obj <- ScaleData(object = obj, vars.to.regress = reg_vars, display.progress = T, features = rownames(obj)) saveRDS(obj, file = paste0(path, "combined_normscaled.rds")) ## 6.4) dimensionality reduction max_pca <- 100 obj <- RunPCA(object = obj, features = VariableFeatures(object = obj), npcs = max_pca, reduction.name="pca", reduction.key="PC_") explvar <- ((obj@reductions$pca@stdev^2)/sum((obj@reductions$pca@stdev^2)))*100 delta <- explvar - c(explvar[-1], 0) opt_delta <- length(delta[delta > 1e-2]) opt_explvar <- min(which(cumsum(explvar) > 80)) opt <- min(opt_delta, opt_explvar) obj <- RunUMAP(object = obj, seed.use = 123, reduction = "pca", dims = 1:opt) ## 6.5) harmony dataset integration integration.var <- c("source", "sample_info") obj <- RunHarmony(object = obj, group.by.vars = integration.var, max.iter.harmony = 30, plot_convergence = FALSE, reduction.save = "harmony") obj <- RunUMAP(object = obj, seed.use = 123, dims = 1:opt, reduction = "harmony", reduction.name = "harmony_umap", reduction.key = "UMAPh_", return.model = TRUE) ## 6.6) find neighboring cells obj <- FindNeighbors(object = obj, dims = 1:opt, force.recalc = T, reduction = "harmony", graph.name = c("RNA_nn_h.iv", "RNA_snn_h.iv")) ## 6.7) cell clustering clu_res <- seq(0.1, 1, by = 0.1) for(res in clu_res){ obj <- FindClusters(object = obj, algorithm = 1, resolution = as.numeric(res), graph.name = "RNA_snn_h.iv") } ## 6.8) assign celltype to each cluster obj$seurat <- obj$RNA_snn_h.iv_res.0.5 obj$seurat_annotation <- NA obj$seurat_annotation[obj$seurat %in% "0"] <- "HSC/MPP" obj$seurat_annotation[obj$seurat %in% c("1", "6", "11")] <- "Mature Ery" obj$seurat_annotation[obj$seurat %in% "7"] <- "VEXAS Ery/CMP" obj$seurat_annotation[obj$seurat %in% c("8", "22")] <- "Immature Ery" obj$seurat_annotation[obj$seurat %in% "2"] <- "MPP/CMP" obj$seurat_annotation[obj$seurat %in% "12"] <- "PreBNK" obj$seurat_annotation[obj$seurat %in% "3"] <- "CMP/GMP" obj$seurat_annotation[obj$seurat %in% "4"] <- "GP" obj$seurat_annotation[obj$seurat %in% "13"] <- "MDP" obj$seurat_annotation[obj$seurat %in% "5"] <- "MyeloLympho/CMP" obj$seurat_annotation[obj$seurat %in% c("9", "21")] <- "VEXAS Immature Ery" obj$seurat_annotation[obj$seurat %in% "10"] <- "Undefined" obj$seurat_annotation[obj$seurat %in% "14"] <- "BEM" obj$seurat_annotation[obj$seurat %in% "16"] <- "Monocyte Progenitors" obj$seurat_annotation[obj$seurat %in% c("15", "19")] <- "MLP" obj$seurat_annotation[obj$seurat %in% "17"] <- "MEP" obj$seurat_annotation[obj$seurat %in% "18"] <- "VEXAS Mature Ery" obj$seurat_annotation[obj$seurat %in% "20"] <- "VEXAS MPP-Ery" obj$seurat_annotation[obj$seurat %in% "23"] <- "VEXAS Myelo/CMP" annotated_cols <- c("HSC/MPP" = '#FB0207', "MyeloLympho/CMP" = '#c6dbef', "MPP/CMP" = "#7fcdbb", "CMP/GMP" = '#9ecae1', "Monocyte Progenitors" = '#66CCFF', "MDP" = '#0F80FF', "GP" = "#08519c", "PreBNK" = '#118040', "MLP" = "#FECC66", "BEM" = "#bdbdbd", "MEP" = '#f768a1', "Immature Ery" = '#B17DFC', "Mature Ery" = "#800080", "VEXAS MPP-Ery" = '#fde0dd', "VEXAS Ery/CMP" = '#fcc5c0', "VEXAS Immature Ery" = '#fa9fb5', "VEXAS Mature Ery" = "#dd3497", "VEXAS Myelo/CMP" = "#ccebc5", "Undefined" = '#d9d9d9') levs <- names(annotated_cols) obj$seurat_annotation <- factor(obj$seurat_annotation, levels = levs) ## 6.9) define status and vexas-mutation variables obj$cell_barcode <- strsplit2(rownames(obj@meta.data), split = "\\_")[, 2] GSE196052_annot <- read.csv(file = paste0(path, "GSE196052/CD34_metaDatatSNECellType_ALiceManual.csv"), header = T) GSE196052_cases <- GSE196052_annot[GSE196052_annot$group %in% "PT",] ### 6.9.1) status obj$status <- "HD" obj$status[(obj$source %in% "GSE196052") & (obj$cell_barcode %in% GSE196052_cases$orig.ident)] <- "PT" obj$status[(obj$source %in% "fiumara")] <- "PT" table(obj$source, obj$status) ### 6.9.2) VEXAS mutation GSE196052_pt2upn <- paste0("GSE196052_PT", 1:9) names(GSE196052_pt2upn) <- paste0("GSE196052_UPN", c(6, 11, 1, 10, 13, 14, 15, 16, 17)) obj$sample_info_upn <- obj$sample_info for(i in seq_len(length(GSE196052_pt2upn))){ obj$sample_info_upn[obj$sample_info_upn %in% GSE196052_pt2upn[i]] <- names(GSE196052_pt2upn)[i] } table(obj$source, obj$sample_info_upn) table(obj$sample_info, obj$sample_info_upn) obj$vexas_mutation <- "HD" obj$vexas_mutation[obj$sample_info_upn %in% c(paste0("fiumara_BM-0", 1), paste0("GSE196052_UPN", c(1, 10, 11, 13, 16, 17)))] <- "THR" obj$vexas_mutation[obj$sample_info_upn %in% c(paste0("fiumara_BM-0", c(2, 3, 8)), paste0("GSE196052_UPN", c(6)))] <- "VAL" obj$vexas_mutation[obj$sample_info_upn %in% c(paste0("fiumara_BM-0", c(4, 9)), paste0("GSE196052_UPN", c(14, 15)))] <- "LEU" saveRDS(obj, file = paste0(path, "combined_annotated.rds")) table(obj$status, obj$vexas_mutation) table(obj$source, obj$vexas_mutation) table(obj$vexas_mutation, obj$sample_info_upn) ## 6.10) free memory space l <- ls() l <- l[!l %in% c("path", "obj")] rm(list = l); gc() # 7) Celltype-wise VEXAS vs HD (DE and GSEA) outpath <- paste0(path, "DE_GSEA/"); dir.create(path = outpath, showWarnings = F, recursive = T) ## 7.1) define function to run clusterProfiler GSEA gsea_run <- function(marks, gmt){ # load gmt file gmt.obj <- clusterProfiler::read.gmt(gmt) # order DE results by logFC genes <- marks$avg_log2FC names(genes) <- marks$gene_name genes <- genes[order(genes, decreasing = T)] genes <- genes[!duplicated(names(genes))] # run GSEA gsea <- GSEA(geneList = genes, TERM2GENE = gmt.obj, nPerm = 10000, pvalueCutoff = 1) return(gsea) } ## 7.2) download hallmarks gene sets hallmarks_gsea <- c("https://data.broadinstitute.org/gsea-msigdb/msigdb/release/7.4/h.all.v7.4.symbols.gmt") download.file(url = hallmarks_gsea, destfile = paste0(outpath, "h.all.v7.4.symbols.gmt")) ## 7.3) VEXAS vs HD ### 7.3.1) DE analysis annclusters <- names(annotated_cols) mincells <- 10 for(i in seq_len(length(annclusters))){ cl <- annclusters[i] cl_id <- gsub(x = cl, pattern = "/", replacement = "-") temp <- subset(obj, subset = (seurat_annotation %in% cl)) temp <- SetIdent(object = temp, value = "status") if(all(table(temp$status) >= mincells)){ de <- FindMarkers(temp, ident.1 = "PT", ident.2 = "HD", test.use = "wilcox", min.pct = 0.1, logfc.threshold = 0) marks <- de[order(de$p_val_adj, decreasing = F),] marks$gene_name <- rownames(marks) write.table(x = marks, file = paste0(outpath, "de_", cl_id, ".txt"), sep = '\t', row.names = F) } } ### 7.3.2) GSEA Hallmarks for(i in seq_len(length(annclusters))){ cl <- annclusters[i] cl_id <- gsub(x = cl, pattern = "/", replacement = "-") defile <- paste0(outpath, 'de_', cl_id, ".txt") if(file.exists(defile)){ marks <- read.table(file = defile, sep = "\t", header = T) gsea <- gsea_run(marks, gmt = hallmarks_gsea) write.table(x = gsea, file = paste0(outpath, 'gsea_', cl_id, ".txt"), sep = '\t', row.names = F) } } ## 7.4) VEXAS_MUT vs HD ### 7.4.1) DE analysis annclusters <- names(annotated_cols) mincells <- 10 mut <- c("LEU", "THR", "VAL") for(m in mut){ levs <- c(m, "HD") sub <- subset(obj, subset = (vexas_mutation %in% levs)) for(i in seq_len(length(annclusters))){ cl <- annclusters[i] cl_id <- gsub(x = cl, pattern = "/", replacement = "-") temp <- subset(sub, subset = (seurat_annotation %in% cl)) temp <- SetIdent(object = temp, value = "vexas_mutation") if(all(table(temp$vexas_mutation) >= mincells)){ de <- FindMarkers(temp, ident.1 = m, ident.2 = "HD", test.use = "wilcox", min.pct = 0.1, logfc.threshold = 0) marks <- de[order(de$p_val_adj, decreasing = F),] marks$gene_name <- rownames(marks) write.table(x = marks, file = paste0(outpath, "de_", cl_id, "_", m, "vHD.txt"), sep = '\t', row.names = F) } } } ### 7.4.2) GSEA Hallmarks for(m in mut){ for(i in seq_len(length(annclusters))){ cl <- annclusters[i] cl_id <- gsub(x = cl, pattern = "/", replacement = "-") defile <- paste0(outpath, "de_", cl_id, "_", m, "vHD.txt") if(file.exists(defile)){ marks <- read.table(file = defile, sep = "\t", header = T) gsea <- gsea_run(marks, gmt = hallmarks_gsea) write.table(x = gsea, file = paste0(outpath, 'gsea_', cl_id, "_", m, "vHD.txt"), sep = '\t', row.names = F) } } } ## 7.5) free memory space l <- ls() l <- l[!l %in% c("path", "obj")] rm(list = l); gc() # 8) UCell module scores and wilcoxon test ## 8.1) load marker gene set vexas_50 <- qusage::read.gmt(file = paste0(path, "xenograft_signatures/custom_vexas_50.gmt")) vexas_signature <- vexas_50[[1]] ## 8.2) compute UCell module scores names(vexas_signature) <- "VEXAS_Xenograft_sig50" ncol <- ncol(obj@meta.data) obj <- AddModuleScore_UCell(obj, features = vexas_signature) colnames(obj@meta.data) <- c(colnames(obj@meta.data)[seq_len(ncol)], names(vexas_signature)) ## 8.3) Celltype-wise wilcoxon test: VEXAS vs HD x <- melt(data = obj@meta.data, id.vars = c("status", "seurat_annotation"), measure.vars = c("VEXAS_Xenograft_sig50")) w <- x %>% dplyr::group_by(variable, seurat_annotation) %>% dplyr::summarise(pvalue = wilcox.test(x = value[status == "PT"], y = value[status == "HD"])$p.value) w$p.adjust <- p.adjust(p = w$pvalue) w <- w[order(w$p.adjust, decreasing = F),] write.table(x = w, file = paste0(path, "xenograft_signatures/wilcoxon.txt"), sep = "\t", row.names = F, col.names = T, quote = F) # 9) figures figpath <- paste0(path, "figures/"); dir.create(path = figpath, showWarnings = F, recursive = T) ## 9.1) figure 5d: Annotated UMAP obj <- SetIdent(object = obj, value = "seurat_annotation") g <- DimPlot(obj, reduction = "harmony_umap", label.box = T, label = T, label.color = T, label.size = 2) + scale_color_manual(values = annotated_cols, limits = levs) + scale_fill_manual(values = annotated_cols, limits = levs) + ggtitle("Cluster Annotation") + theme(plot.title = element_text(hjust = 0.5), legend.position = "right", legend.text = element_text(size=7)) ggsave(g, filename = paste0(figpath, "Fig5D_UMAP_AnnotatedClusters.png"), width = 10, height = 7, limitsize = FALSE) ## 9.2) figure 5e/5f: GSEA Hallmarks ### 9.2.1) load results res_VEXASvHD <- c() for(i in seq_len(length(annclusters))){ cl <- annclusters[i] cl_id <- gsub(x = cl, pattern = "/", replacement = "-") file <- paste0(outpath, 'gsea_', cl_id, ".txt") if(file.exists(file)){ x <- read.delim(file = file, header = T) x <- x[order(x$p.adjust, -x$NES),] res_VEXASvHD <- rbind(res_VEXASvHD, data.frame(x, celltype = cl, test = "VEXASvOLD")) } } res_MUTvHD <- c() for(m in mut){ for(i in seq_len(length(annclusters))){ cl <- annclusters[i] cl_id <- gsub(x = cl, pattern = "/", replacement = "-") file <- paste0(outpath, 'gsea_', cl_id, "_", m, "vHD.txt") if(file.exists(file)){ id <- paste0(m, "vHD") x <- read.delim(file = file, header = T) x <- x[order(x$p.adjust, -x$NES),] res_MUTvHD <- rbind(res_MUTvHD, data.frame(x, celltype = cl, test = id)) } } } res <- rbind(res_VEXASvHD, res_MUTvHD) res$significance_asterisk <- "" res$significance_asterisk[res$p.adjust < 0.05] <- "*" res$significance_asterisk[res$p.adjust < 0.01] <- "**" res$significance_asterisk[res$p.adjust < 0.001] <- "***" res <- res %>% tidyr::complete(ID, celltype, test) %>% as.data.frame() res$ID <- gsub(x = res$ID, pattern = "HALLMARK_", replacement = "") ### 9.2.2) plot g <- res %>% ggplot() + theme_bw() + facet_grid(. ~ test) + geom_tile(aes(x = celltype, y = ID, fill = NES)) + geom_text(aes(x = celltype, y = ID, label = significance_asterisk), size = 2) + theme(plot.title = element_text(hjust = 0.5, size = 10), axis.text.x = element_text(angle = 45, , vjust = 1, hjust = 1), legend.position = "top", strip.background =element_rect(fill="white")) + ylab("") + xlab("") + scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100), limits = c(-4, 4), na.value = "grey") ggsave(g, filename = paste0(figpath, "Fig5EF_GSEA_Celltype_Hallmarks.png"), width = length(unique(res$celltype))*0.3*length(unique(res_complete$test)), height = length(unique(res$ID))*0.2, limitsize = FALSE) ## 9.3) figure 5g: Monocyte xenograft signature CD34+ ### 9.3.1) load wilcoxon test results w <- read.table(file = paste0(path, "xenograft_signatures/wilcoxon.txt"), sep = "\t", header = T) w$significance_asterisk <- "" w$significance_asterisk[w$p.adjust < 0.05] <- "*" w$significance_asterisk[w$p.adjust < 0.01] <- "**" w$significance_asterisk[w$p.adjust < 0.001] <- "***" ### 9.3.2) plot g <- obj@meta.data %>% ggplot() + theme_classic() + geom_violin(aes(x = seurat_annotation, y = VEXAS_Xenograft_sig50, fill = status), scale = "width") + geom_text(data = w, aes(x = seurat_annotation, y = 0.7, label = significance_asterisk)) + theme(plot.title = element_text(hjust = 0.5, size = 10), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), legend.position = "right") + ylab("UCell Module score") + xlab("") + ggtitle("VEXAS xenograft signature (n = 50)") + theme(axis.text.x = element_text(angle = 45, , vjust = 1, hjust = 1)) + scale_fill_manual(values = adjustcolor(col = c("#F8766D", "#02818a"), alpha.f = 0.8), name = "") ggsave(g, filename = paste0(figpath, "Fig5G_UCell_MonocyteXenograft_WilcoxonTest.png"), width = length(unique(obj$seurat_annotation))*5*0.1, height = 5, limitsize = FALSE)