Commit a988d531 authored by Ivan Merelli's avatar Ivan Merelli
Browse files

Upload New File

parent f4a3560c
### upload on gitlab
# 1) load R packages
library(Seurat)
library(GEOquery)
library(limma)
library(dplyr)
library(data.table)
library(clusterProfiler)
library(ggplot2)
library(RColorBrewer)
library(qusage)
library(UCell)
# 2) define input and output paths
# set path to the working directory
path <- ""
# 3) load scRNA-seq BM-object
## 3.1) restrict analysis to CD34+ clusters
obj <- readRDS(file = paste0(path, "BM_20240222.rds"))
CD34p_clusters <- as.character(c(2, 8, 14, 15, 16, 19, 20, 26, 27, 30, 33))
obj$seurat <- obj$RNA_snn_h.orig.ident_res.1.8
obj <- SetIdent(object = obj, value = "seurat")
obj <- subset(obj, subset = (seurat %in% CD34p_clusters))
saveRDS(object = obj, file = paste0(path, "BM_CD34p.rds"))
## 3.2) free memory space
l <- ls()
l <- l[!l %in% "path"]
rm(list = l); gc()
# 4) load published scRNA-seq dataset
## 4.1) Ainciburu et al.
### 4.1.1) download tar file and metadata
destfolder <- paste0(path, "GSE180298/"); dir.create(path = destfolder, showWarnings = F, recursive = T)
destfile <- paste0(destfolder, "GSE180298_RAW.tar")
download.file(url = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE180298&format=file", destfile = destfile)
metadata_files <- c("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE180298&format=file&file=GSE180298%5Felderly%5Fmetadata%2Etxt%2Egz",
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE180298&format=file&file=GSE180298%5Fmds%5Fmetadata%2Etxt%2Egz",
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE180298&format=file&file=GSE180298%5Fyoung%5Fmetadata%2Etxt%2Egz")
download.file(url = metadata_files, destfile = paste0(destfolder, "GSE180298_", c("elderly", "mds", "young"), "_metadata.txt.gz"))
### 4.1.2) untar file
untar(tarfile = destfile, exdir = paste0(destfolder, "GSE180298_RAW/"))
files <- list.files(path = paste0(destfolder, "GSE180298_RAW/"), full.names = T)
md <- list.files(path = destfolder, full.names = T)
md_files <- md[grepl(md, pattern = "metadata")]
### 4.1.3) define seurat object
metadata <- c()
for(i in seq_len(length(md_files))){
gunzip(md_files[i], remove = FALSE, overwrite = TRUE)
unzfile <- gsub(md_files[i], pattern = ".gz", replacement = "")
temp <- read.delim(file = unzfile, header = T, sep = "\t")
metadata <- rbind(metadata, temp)
}
metadata <- as.data.frame(metadata)
metadata$orig.ident <- strsplit2(rownames(metadata), split = "\\_")[, 2]
counts <- cell_counts <- samples <- c()
for(i in seq_len(length(files))){
split_filename <- strsplit2(files[i], split = "/")
sampleid <- strsplit2(split_filename[, ncol(split_filename)], split = "\\_")[, 2]
print(paste0("Processing file ", sampleid, " (", i, " over ", length(files), ")"))
# load h5 object
temp <- Read10X_h5(filename = files[i], use.names = TRUE, unique.features = TRUE)
coln <- paste0(strsplit2(colnames(temp), split = "-")[, 1], "_", sampleid)
colnames(temp) <- coln
# store
temp_object <- CreateSeuratObject(counts = temp)
counts_temp <- matrix(data = temp_object@assays$RNA@counts, ncol = ncol(temp_object))
rownames(counts_temp) <- rownames(temp_object)
colnames(counts_temp) <- colnames(temp_object)
# store info
cell_counts <- c(cell_counts, ncol(counts_temp))
samples <- c(samples, sampleid)
if(is.null(nrow(counts))){
counts <- counts_temp
gs <- rownames(counts)
}else{
gs <- intersect(gs, rownames(counts_temp))
counts <- cbind(counts[match(gs, rownames(counts)), ],
counts_temp[match(gs, rownames(counts_temp)), ])
}
}
names(cell_counts) <- samples
md <- metadata[rownames(metadata) %in% colnames(counts),]; dim(md)
m <- match(rownames(md), colnames(counts)); table(is.na(m))
counts <- counts[, m]
GSE180298 <- CreateSeuratObject(counts = counts, meta.data = metadata)
### 4.1.4) select only elderly donors
GSE180298 <- SetIdent(object = GSE180298, value = "orig.ident")
elderly_sampleid <- unique(GSE180298$orig.ident)[grepl(unique(GSE180298$orig.ident), pattern = "elderly")]
GSE180298 <- subset(GSE180298, subset = (orig.ident %in% elderly_sampleid))
saveRDS(object = GSE180298, file = paste0(path, "GSE180298_elderly.rds")); gc()
## 4.1.5) free memory space
l <- ls()
l <- l[!l %in% "path"]
rm(list = l); gc()
## 4.2) Wu et al.
### 4.2.1) download tar file
destfolder <- paste0(path, "GSE196052/"); dir.create(path = destfolder, showWarnings = F, recursive = T)
destfile <- paste0(destfolder, "GSE196052_RAW.tar")
download.file(url = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE196052&format=file", destfile = destfile)
metadata_files <- c("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE196052&format=file&file=GSE196052%5FdataCount%5FCD34%2Ecsv%2Egz")
download.file(url = metadata_files, destfile = paste0(destfolder, "GSE196052_dataCount_CD34.csv.gz"))
### 4.2.2) untar file
untar(tarfile = destfile, exdir = paste0(destfolder, "GSE196052_RAW/"))
files_annotation <- list.files(path = paste0(destfolder, "GSE196052_RAW/"), full.names = T)
files <- list.files(path = destfolder, full.names = T)
### 4.2.3) define seurat object
md <- read.delim(file = files[grepl(files, pattern = "SraRun")], header = T, sep = ",")
table(md$subject_id.status, md$tissue.cell_type)
mdfiles <- files_annotation[grepl(files_annotation, pattern = "GSM")]
metadata <- c()
for(i in seq_len(length(mdfiles))){
x <- mdfiles[i]
temp <- read.delim(gzfile(x), header = T, sep = ",")
metadata <- rbind(metadata, temp)
}
f <- files[grepl(files, pattern = "CD34.csv.gz")]
counts_cd34 <- fread(f) %>% as.data.frame()
rn <- as.character(counts_cd34[, 1])
counts_cd34 <- counts_cd34[, -1]
rownames(counts_cd34) <- rn
cd34_cells <- colnames(counts_cd34)
cd34_cells <- gsub(x = cd34_cells, pattern = "CD34_", replacement = "")
wu_annot <- read.csv(file = paste0(destfolder, "CD34_metaDatatSNECellType_ALiceManual.csv"), header = T)
m <- match(cd34_cells, wu_annot$orig.ident); table(is.na(m))
status <- wu_annot$group[m]
sampleid <- wu_annot$subject[m]
mdt <- data.frame(sampleid, status)
rownames(mdt) <- colnames(counts_cd34)
GSE196052 <- CreateSeuratObject(counts = counts_cd34, meta.data = mdt)
GSE196052 <- SetIdent(object = GSE196052, value = "orig.ident")
saveRDS(object = GSE196052, file = paste0(path, "GSE196052.rds"))
## 4.2.4) free memory space
l <- ls()
l <- l[!l %in% "path"]
rm(list = l); gc()
# 5) Define single seurat object
## 5.1) load datasets
fiumara <- readRDS(paste0(path, "BM_CD34p.rds"))
GSE180298 <- readRDS(paste0(path, "GSE180298_elderly.rds"))
GSE196052 <- readRDS(paste0(path, "GSE196052.rds"))
## 5.2) define common features
genes <- table(c(rownames(fiumara), rownames(GSE180298), rownames(GSE196052)))
common_genes <- names(genes)[genes == 3]; length(common_genes)
## 5.3) add sampleinfo
fiumara$source <- "fiumara"
fiumara$sample_info <- paste0("fiumara_", fiumara$orig.ident)
GSE180298$source <- "GSE180298"
GSE180298$sample_info <- paste0("GSE180298_", GSE180298$orig.ident)
GSE196052$source <- "GSE196052"
GSE196052$sample_info <- paste0("GSE196052_", GSE196052$sampleid)
## 5.4) extract counts relative to these genes and define Seurat object
counts <- cbind(fiumara@assays$RNA@counts[common_genes,],
GSE180298@assays$RNA@counts[common_genes,],
GSE196052@assays$RNA@counts[common_genes,])
vars <- c("source", "sample_info", "nCount_RNA", "nFeature_RNA")
md <- rbind(fiumara@meta.data[, vars],
GSE180298@meta.data[, vars],
GSE196052@meta.data[, vars])
obj <- CreateSeuratObject(counts = counts, meta.data = md)
obj$percent.mt <- (colSums(obj@assays$RNA@counts[grepl(rownames(obj), pattern = "^MT-"),])/colSums(obj@assays$RNA@counts))*100
obj$percent.rb <- (colSums(obj@assays$RNA@counts[grepl(rownames(obj), pattern = "^RPL"),])/colSums(obj@assays$RNA@counts))*100
saveRDS(object = obj, file = paste0(path, "combined.rds")); gc()
## 5.5) free memory space
l <- ls()
l <- l[!l %in% c("path", "obj")]
rm(list = l); gc()
# 6) removing low quality cells, normalization, scaling, integration and clustering
## 6.1) cell filtering
obj$keep <- (obj$nFeature_RNA > 200) & (obj$percent.mt < 25)
table(obj$keep, obj$source)
obj <- subset(obj, subset = (keep %in% TRUE))
saveRDS(obj, file = paste0(path, "combined_filtered.rds"))
## 6.2) normalization
varfeatures <- 1000
obj <- NormalizeData(object = obj)
obj <- FindVariableFeatures(obj, selection.method = "vst",
nfeatures = varfeatures,
verbose = T)
## 6.3) scaling
reg_vars = c("percent.mt", "nCount_RNA")
obj <- ScaleData(object = obj, vars.to.regress = reg_vars, display.progress = T, features = rownames(obj))
saveRDS(obj, file = paste0(path, "combined_normscaled.rds"))
## 6.4) dimensionality reduction
max_pca <- 100
obj <- RunPCA(object = obj, features = VariableFeatures(object = obj), npcs = max_pca, reduction.name="pca", reduction.key="PC_")
explvar <- ((obj@reductions$pca@stdev^2)/sum((obj@reductions$pca@stdev^2)))*100
delta <- explvar - c(explvar[-1], 0)
opt_delta <- length(delta[delta > 1e-2])
opt_explvar <- min(which(cumsum(explvar) > 80))
opt <- min(opt_delta, opt_explvar)
obj <- RunUMAP(object = obj, seed.use = 123, reduction = "pca", dims = 1:opt)
## 6.5) harmony dataset integration
integration.var <- c("source", "sample_info")
obj <- RunHarmony(object = obj,
group.by.vars = integration.var,
max.iter.harmony = 30,
plot_convergence = FALSE,
reduction.save = "harmony")
obj <- RunUMAP(object = obj,
seed.use = 123,
dims = 1:opt,
reduction = "harmony",
reduction.name = "harmony_umap",
reduction.key = "UMAPh_",
return.model = TRUE)
## 6.6) find neighboring cells
obj <- FindNeighbors(object = obj,
dims = 1:opt,
force.recalc = T,
reduction = "harmony",
graph.name = c("RNA_nn_h.iv", "RNA_snn_h.iv"))
## 6.7) cell clustering
clu_res <- seq(0.1, 1, by = 0.1)
for(res in clu_res){
obj <- FindClusters(object = obj,
algorithm = 1,
resolution = as.numeric(res),
graph.name = "RNA_snn_h.iv")
}
## 6.8) assign celltype to each cluster
obj$seurat <- obj$RNA_snn_h.iv_res.0.5
obj$seurat_annotation <- NA
obj$seurat_annotation[obj$seurat %in% "0"] <- "HSC/MPP"
obj$seurat_annotation[obj$seurat %in% c("1", "6", "11")] <- "Mature Ery"
obj$seurat_annotation[obj$seurat %in% "7"] <- "VEXAS Ery/CMP"
obj$seurat_annotation[obj$seurat %in% c("8", "22")] <- "Immature Ery"
obj$seurat_annotation[obj$seurat %in% "2"] <- "MPP/CMP"
obj$seurat_annotation[obj$seurat %in% "12"] <- "PreBNK"
obj$seurat_annotation[obj$seurat %in% "3"] <- "CMP/GMP"
obj$seurat_annotation[obj$seurat %in% "4"] <- "GP"
obj$seurat_annotation[obj$seurat %in% "13"] <- "MDP"
obj$seurat_annotation[obj$seurat %in% "5"] <- "MyeloLympho/CMP"
obj$seurat_annotation[obj$seurat %in% c("9", "21")] <- "VEXAS Immature Ery"
obj$seurat_annotation[obj$seurat %in% "10"] <- "Undefined"
obj$seurat_annotation[obj$seurat %in% "14"] <- "BEM"
obj$seurat_annotation[obj$seurat %in% "16"] <- "Monocyte Progenitors"
obj$seurat_annotation[obj$seurat %in% c("15", "19")] <- "MLP"
obj$seurat_annotation[obj$seurat %in% "17"] <- "MEP"
obj$seurat_annotation[obj$seurat %in% "18"] <- "VEXAS Mature Ery"
obj$seurat_annotation[obj$seurat %in% "20"] <- "VEXAS MPP-Ery"
obj$seurat_annotation[obj$seurat %in% "23"] <- "VEXAS Myelo/CMP"
annotated_cols <- c("HSC/MPP" = '#FB0207',
"MyeloLympho/CMP" = '#c6dbef',
"MPP/CMP" = "#7fcdbb",
"CMP/GMP" = '#9ecae1',
"Monocyte Progenitors" = '#66CCFF',
"MDP" = '#0F80FF',
"GP" = "#08519c",
"PreBNK" = '#118040',
"MLP" = "#FECC66",
"BEM" = "#bdbdbd",
"MEP" = '#f768a1',
"Immature Ery" = '#B17DFC',
"Mature Ery" = "#800080",
"VEXAS MPP-Ery" = '#fde0dd',
"VEXAS Ery/CMP" = '#fcc5c0',
"VEXAS Immature Ery" = '#fa9fb5',
"VEXAS Mature Ery" = "#dd3497",
"VEXAS Myelo/CMP" = "#ccebc5",
"Undefined" = '#d9d9d9')
levs <- names(annotated_cols)
obj$seurat_annotation <- factor(obj$seurat_annotation, levels = levs)
## 6.9) define status and vexas-mutation variables
obj$cell_barcode <- strsplit2(rownames(obj@meta.data), split = "\\_")[, 2]
GSE196052_annot <- read.csv(file = paste0(path, "GSE196052/CD34_metaDatatSNECellType_ALiceManual.csv"), header = T)
GSE196052_cases <- GSE196052_annot[GSE196052_annot$group %in% "PT",]
### 6.9.1) status
obj$status <- "HD"
obj$status[(obj$source %in% "GSE196052") & (obj$cell_barcode %in% GSE196052_cases$orig.ident)] <- "PT"
obj$status[(obj$source %in% "fiumara")] <- "PT"
table(obj$source, obj$status)
### 6.9.2) VEXAS mutation
GSE196052_pt2upn <- paste0("GSE196052_PT", 1:9)
names(GSE196052_pt2upn) <- paste0("GSE196052_UPN", c(6, 11, 1, 10, 13, 14, 15, 16, 17))
obj$sample_info_upn <- obj$sample_info
for(i in seq_len(length(GSE196052_pt2upn))){
obj$sample_info_upn[obj$sample_info_upn %in% GSE196052_pt2upn[i]] <- names(GSE196052_pt2upn)[i]
}
table(obj$source, obj$sample_info_upn)
table(obj$sample_info, obj$sample_info_upn)
obj$vexas_mutation <- "HD"
obj$vexas_mutation[obj$sample_info_upn %in% c(paste0("fiumara_BM-0", 1),
paste0("GSE196052_UPN", c(1, 10, 11, 13, 16, 17)))] <- "THR"
obj$vexas_mutation[obj$sample_info_upn %in% c(paste0("fiumara_BM-0", c(2, 3, 8)),
paste0("GSE196052_UPN", c(6)))] <- "VAL"
obj$vexas_mutation[obj$sample_info_upn %in% c(paste0("fiumara_BM-0", c(4, 9)),
paste0("GSE196052_UPN", c(14, 15)))] <- "LEU"
saveRDS(obj, file = paste0(path, "combined_annotated.rds"))
table(obj$status, obj$vexas_mutation)
table(obj$source, obj$vexas_mutation)
table(obj$vexas_mutation, obj$sample_info_upn)
## 6.10) free memory space
l <- ls()
l <- l[!l %in% c("path", "obj")]
rm(list = l); gc()
# 7) Celltype-wise VEXAS vs HD (DE and GSEA)
outpath <- paste0(path, "DE_GSEA/"); dir.create(path = outpath, showWarnings = F, recursive = T)
## 7.1) define function to run clusterProfiler GSEA
gsea_run <- function(marks, gmt){
# load gmt file
gmt.obj <- clusterProfiler::read.gmt(gmt)
# order DE results by logFC
genes <- marks$avg_log2FC
names(genes) <- marks$gene_name
genes <- genes[order(genes, decreasing = T)]
genes <- genes[!duplicated(names(genes))]
# run GSEA
gsea <- GSEA(geneList = genes, TERM2GENE = gmt.obj, nPerm = 10000, pvalueCutoff = 1)
return(gsea)
}
## 7.2) download hallmarks gene sets
hallmarks_gsea <- c("https://data.broadinstitute.org/gsea-msigdb/msigdb/release/7.4/h.all.v7.4.symbols.gmt")
download.file(url = hallmarks_gsea, destfile = paste0(outpath, "h.all.v7.4.symbols.gmt"))
## 7.3) VEXAS vs HD
### 7.3.1) DE analysis
annclusters <- names(annotated_cols)
mincells <- 10
for(i in seq_len(length(annclusters))){
cl <- annclusters[i]
cl_id <- gsub(x = cl, pattern = "/", replacement = "-")
temp <- subset(obj, subset = (seurat_annotation %in% cl))
temp <- SetIdent(object = temp, value = "status")
if(all(table(temp$status) >= mincells)){
de <- FindMarkers(temp,
ident.1 = "PT",
ident.2 = "HD",
test.use = "wilcox",
min.pct = 0.1,
logfc.threshold = 0)
marks <- de[order(de$p_val_adj, decreasing = F),]
marks$gene_name <- rownames(marks)
write.table(x = marks, file = paste0(outpath, "de_", cl_id, ".txt"), sep = '\t', row.names = F)
}
}
### 7.3.2) GSEA Hallmarks
for(i in seq_len(length(annclusters))){
cl <- annclusters[i]
cl_id <- gsub(x = cl, pattern = "/", replacement = "-")
defile <- paste0(outpath, 'de_', cl_id, ".txt")
if(file.exists(defile)){
marks <- read.table(file = defile, sep = "\t", header = T)
gsea <- gsea_run(marks, gmt = hallmarks_gsea)
write.table(x = gsea, file = paste0(outpath, 'gsea_', cl_id, ".txt"), sep = '\t', row.names = F)
}
}
## 7.4) VEXAS_MUT vs HD
### 7.4.1) DE analysis
annclusters <- names(annotated_cols)
mincells <- 10
mut <- c("LEU", "THR", "VAL")
for(m in mut){
levs <- c(m, "HD")
sub <- subset(obj, subset = (vexas_mutation %in% levs))
for(i in seq_len(length(annclusters))){
cl <- annclusters[i]
cl_id <- gsub(x = cl, pattern = "/", replacement = "-")
temp <- subset(sub, subset = (seurat_annotation %in% cl))
temp <- SetIdent(object = temp, value = "vexas_mutation")
if(all(table(temp$vexas_mutation) >= mincells)){
de <- FindMarkers(temp,
ident.1 = m,
ident.2 = "HD",
test.use = "wilcox",
min.pct = 0.1,
logfc.threshold = 0)
marks <- de[order(de$p_val_adj, decreasing = F),]
marks$gene_name <- rownames(marks)
write.table(x = marks, file = paste0(outpath, "de_", cl_id, "_", m, "vHD.txt"), sep = '\t', row.names = F)
}
}
}
### 7.4.2) GSEA Hallmarks
for(m in mut){
for(i in seq_len(length(annclusters))){
cl <- annclusters[i]
cl_id <- gsub(x = cl, pattern = "/", replacement = "-")
defile <- paste0(outpath, "de_", cl_id, "_", m, "vHD.txt")
if(file.exists(defile)){
marks <- read.table(file = defile, sep = "\t", header = T)
gsea <- gsea_run(marks, gmt = hallmarks_gsea)
write.table(x = gsea, file = paste0(outpath, 'gsea_', cl_id, "_", m, "vHD.txt"), sep = '\t', row.names = F)
}
}
}
## 7.5) free memory space
l <- ls()
l <- l[!l %in% c("path", "obj")]
rm(list = l); gc()
# 8) UCell module scores and wilcoxon test
## 8.1) load marker gene set
vexas_50 <- qusage::read.gmt(file = paste0(path, "xenograft_signatures/custom_vexas_50.gmt"))
vexas_signature <- vexas_50[[1]]
## 8.2) compute UCell module scores
names(vexas_signature) <- "VEXAS_Xenograft_sig50"
ncol <- ncol(obj@meta.data)
obj <- AddModuleScore_UCell(obj, features = vexas_signature)
colnames(obj@meta.data) <- c(colnames(obj@meta.data)[seq_len(ncol)], names(vexas_signature))
## 8.3) Celltype-wise wilcoxon test: VEXAS vs HD
x <- melt(data = obj@meta.data, id.vars = c("status", "seurat_annotation"), measure.vars = c("VEXAS_Xenograft_sig50"))
w <- x %>%
dplyr::group_by(variable, seurat_annotation) %>%
dplyr::summarise(pvalue = wilcox.test(x = value[status == "PT"], y = value[status == "HD"])$p.value)
w$p.adjust <- p.adjust(p = w$pvalue)
w <- w[order(w$p.adjust, decreasing = F),]
write.table(x = w, file = paste0(path, "xenograft_signatures/wilcoxon.txt"), sep = "\t", row.names = F, col.names = T, quote = F)
# 9) figures
figpath <- paste0(path, "figures/"); dir.create(path = figpath, showWarnings = F, recursive = T)
## 9.1) figure 5d: Annotated UMAP
obj <- SetIdent(object = obj, value = "seurat_annotation")
g <- DimPlot(obj, reduction = "harmony_umap",
label.box = T, label = T, label.color = T, label.size = 2) +
scale_color_manual(values = annotated_cols, limits = levs) +
scale_fill_manual(values = annotated_cols, limits = levs) +
ggtitle("Cluster Annotation") +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "right",
legend.text = element_text(size=7))
ggsave(g, filename = paste0(figpath, "Fig5D_UMAP_AnnotatedClusters.png"),
width = 10, height = 7, limitsize = FALSE)
## 9.2) figure 5e/5f: GSEA Hallmarks
### 9.2.1) load results
res_VEXASvHD <- c()
for(i in seq_len(length(annclusters))){
cl <- annclusters[i]
cl_id <- gsub(x = cl, pattern = "/", replacement = "-")
file <- paste0(outpath, 'gsea_', cl_id, ".txt")
if(file.exists(file)){
x <- read.delim(file = file, header = T)
x <- x[order(x$p.adjust, -x$NES),]
res_VEXASvHD <- rbind(res_VEXASvHD,
data.frame(x, celltype = cl, test = "VEXASvOLD"))
}
}
res_MUTvHD <- c()
for(m in mut){
for(i in seq_len(length(annclusters))){
cl <- annclusters[i]
cl_id <- gsub(x = cl, pattern = "/", replacement = "-")
file <- paste0(outpath, 'gsea_', cl_id, "_", m, "vHD.txt")
if(file.exists(file)){
id <- paste0(m, "vHD")
x <- read.delim(file = file, header = T)
x <- x[order(x$p.adjust, -x$NES),]
res_MUTvHD <- rbind(res_MUTvHD,
data.frame(x, celltype = cl, test = id))
}
}
}
res <- rbind(res_VEXASvHD, res_MUTvHD)
res$significance_asterisk <- ""
res$significance_asterisk[res$p.adjust < 0.05] <- "*"
res$significance_asterisk[res$p.adjust < 0.01] <- "**"
res$significance_asterisk[res$p.adjust < 0.001] <- "***"
res <- res %>% tidyr::complete(ID, celltype, test) %>% as.data.frame()
res$ID <- gsub(x = res$ID, pattern = "HALLMARK_", replacement = "")
### 9.2.2) plot
g <- res %>%
ggplot() +
theme_bw() +
facet_grid(. ~ test) +
geom_tile(aes(x = celltype, y = ID, fill = NES)) +
geom_text(aes(x = celltype, y = ID, label = significance_asterisk), size = 2) +
theme(plot.title = element_text(hjust = 0.5, size = 10),
axis.text.x = element_text(angle = 45, , vjust = 1, hjust = 1),
legend.position = "top",
strip.background =element_rect(fill="white")) +
ylab("") + xlab("") +
scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100),
limits = c(-4, 4),
na.value = "grey")
ggsave(g, filename = paste0(figpath, "Fig5EF_GSEA_Celltype_Hallmarks.png"),
width = length(unique(res$celltype))*0.3*length(unique(res_complete$test)),
height = length(unique(res$ID))*0.2, limitsize = FALSE)
## 9.3) figure 5g: Monocyte xenograft signature CD34+
### 9.3.1) load wilcoxon test results
w <- read.table(file = paste0(path, "xenograft_signatures/wilcoxon.txt"), sep = "\t", header = T)
w$significance_asterisk <- ""
w$significance_asterisk[w$p.adjust < 0.05] <- "*"
w$significance_asterisk[w$p.adjust < 0.01] <- "**"
w$significance_asterisk[w$p.adjust < 0.001] <- "***"
### 9.3.2) plot
g <- obj@meta.data %>%
ggplot() +
theme_classic() +
geom_violin(aes(x = seurat_annotation, y = VEXAS_Xenograft_sig50, fill = status), scale = "width") +
geom_text(data = w, aes(x = seurat_annotation, y = 0.7, label = significance_asterisk)) +
theme(plot.title = element_text(hjust = 0.5, size = 10),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "right") +
ylab("UCell Module score") + xlab("") +
ggtitle("VEXAS xenograft signature (n = 50)") +
theme(axis.text.x = element_text(angle = 45, , vjust = 1, hjust = 1)) +
scale_fill_manual(values = adjustcolor(col = c("#F8766D", "#02818a"), alpha.f = 0.8), name = "")
ggsave(g,
filename = paste0(figpath, "Fig5G_UCell_MonocyteXenograft_WilcoxonTest.png"),
width = length(unique(obj$seurat_annotation))*5*0.1,
height = 5,
limitsize = FALSE)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment