# ========================================================- # Title: GSVA analysis on scRNAseq and snRNAseq samples # Description: Code to generate SuppFig13 # Author: Monah Abou Alezz # Date: 2025-03-06 # ========================================================- suppressPackageStartupMessages(library(Seurat)) suppressPackageStartupMessages(library(tidyverse)) suppressPackageStartupMessages(library(openxlsx)) suppressPackageStartupMessages(library(patchwork)) suppressPackageStartupMessages(library(pheatmap)) suppressPackageStartupMessages(library(GSVA)) suppressPackageStartupMessages(library(purrr)) suppressPackageStartupMessages(library(presto)) suppressPackageStartupMessages(library(DESeq2)) suppressPackageStartupMessages(library(cogena)) #### loading the hallmark gmt file as a list using the gmt2list function from cogena gs_human <- gmt2list("~/databases/GSEA/GSEA_human_v7.2/h.all.v7.2.symbols.gmt") gs_mouse <- gmt2list("~/databases/GSEA/GSEA_mouse_v7.2/h.all.v7.2.symbols_mouse.gmt") ## loading datasets mouse.combined <- readRDS("data/mouse_striatum_snRNA_AAV9.rds") unique(mouse.combined$Anno) mouse.combined <- subset(mouse.combined, Anno == "Astrocytes" | Anno == "Mature_Oligos" | Anno == "Immature_Oligos" | Anno == "Excitatory" | Anno == "Inhibitory") mouse.combined@meta.data[['Anno']] <- recode_factor( mouse.combined@meta.data[["Anno"]], "Astrocytes" = "Astrocytes", "Mature_Oligos" = "Oligodendrocytes", "Immature_Oligos" = "Oligodendrocytes", "Excitatory" = "Neurons", "Inhibitory" = "Neurons") mouse.combined$Anno <- droplevels(mouse.combined$Anno) unique(mouse.combined$Anno) mouse.combined$CellType <- mouse.combined$Anno d4_1707 <- readRDS("data/mixed_D4_final.rds") d4_1707@meta.data[['orig.ident']] <- recode_factor( d4_1707@meta.data[["orig.ident"]], "Sample2" = "AAV2", "Sample4" = "d4_AAV9", "Sample6" = "Spk", "Sample8" = "d4_UT") d4_1707@meta.data[["RNA_snn_h.orig.ident_res.1.2"]] <- recode_factor( d4_1707@meta.data[["RNA_snn_h.orig.ident_res.1.2"]], "0" = "Undifferentiated", "1" = "d4_Oligodendrocytes", "2" = "d4_Neurons", "3" = "d4_Astrocytes", "4" = "d4_Neurons", "5" = "d4_Oligodendrocytes", "6" = "d4_Astrocytes", "7" = "d4_Oligodendrocytes", "8" = "d4_Oligodendrocytes", "9" = "d4_Neurons", "10" = "d4_Oligodendrocytes", "11" = "d4_Oligodendrocytes", "12" = "d4_Astrocytes", "13" = "d4_Astrocytes", "14" = "d4_Oligodendrocytes", "15" = "d4_Astrocytes", "16" = "d4_Astrocytes") Idents(d4_1707) <- "RNA_snn_h.orig.ident_res.1.2" d4_1707$RNA_snn_h.orig.ident_res.1.2 <- factor(d4_1707$RNA_snn_h.orig.ident_res.1.2, levels = c("d4_Astrocytes", "d4_Neurons", "d4_Oligodendrocytes", "Undifferentiated")) d4_1707 <- subset(d4_1707, RNA_snn_h.orig.ident_res.1.2 != "Undifferentiated") d4_1707$RNA_snn_h.orig.ident_res.1.2 <- droplevels(d4_1707$RNA_snn_h.orig.ident_res.1.2) unique(d4_1707$RNA_snn_h.orig.ident_res.1.2) d4_1707 <- subset(d4_1707, orig.ident == "d4_AAV9" | orig.ident == "d4_UT") d4_1707$orig.ident <- droplevels(d4_1707$orig.ident) d4_1707$CellType <- d4_1707$RNA_snn_h.orig.ident_res.1.2 d5_1743 <- readRDS("data/organoids_D5_final.rds") d5_1743@meta.data[['orig.ident']] <- recode_factor( d5_1743@meta.data[["orig.ident"]], "Sample2" = "AAV2", "Sample4" = "d5_AAV9", "Sample6" = "Spk", "Sample8" = "d5_UT") d5_1743@meta.data[["RNA_snn_h.orig.ident_res.0.6"]] <- recode_factor( d5_1743@meta.data[["RNA_snn_h.orig.ident_res.0.6"]], "0" = "d5_Astrocytes", "1" = "d5_Neurons", "2" = "Undifferentiated", "3" = "d5_Neurons", "4" = "d5_Oligodendrocytes", "5" = "Pericytes", "6" = "Pericytes", "7" = "d5_Astrocytes", "8" = "d5_Oligodendrocytes", "9" = "d5_Neurons", "10" = "d5_Astrocytes", "11" = "Pericytes", "12" = "d5_Astrocytes", "13" = "d5_Neurons") d5_1743 <- subset(d5_1743, RNA_snn_h.orig.ident_res.0.6 != "Pericytes") Idents(d5_1743) <- "RNA_snn_h.orig.ident_res.0.6" d5_1743$RNA_snn_h.orig.ident_res.0.6 <- factor(d5_1743$RNA_snn_h.orig.ident_res.0.6, levels = c("d5_Astrocytes", "d5_Neurons", "d5_Oligodendrocytes", "Undifferentiated")) d5_1743 <- subset(d5_1743, RNA_snn_h.orig.ident_res.0.6 != "Undifferentiated") d5_1743$RNA_snn_h.orig.ident_res.0.6 <- droplevels(d5_1743$RNA_snn_h.orig.ident_res.0.6) unique(d5_1743$RNA_snn_h.orig.ident_res.0.6) d5_1743 <- subset(d5_1743, orig.ident == "d5_AAV9" | orig.ident == "d5_UT") d5_1743$orig.ident <- droplevels(d5_1743$orig.ident) d5_1743$CellType <- d5_1743$RNA_snn_h.orig.ident_res.0.6 ## create a pseudobulk and normalize datasets_pseudo <- c("mouse.combined", "d4_1707", "d5_1743") for (i in datasets_pseudo) { print(paste0("Analyzing... ",i)) obj_i <- get(i) data_collapsed <- presto::collapse_counts(obj_i@assays$RNA@counts, obj_i@meta.data, c('CellType', 'orig.ident')) meta_data<- data_collapsed$meta_data mat <- data_collapsed$counts_mat colnames(mat)<- paste(meta_data$CellType, meta_data$orig.ident, sep="_") rownames(meta_data) <- paste(meta_data$CellType, meta_data$orig.ident, sep="_") pseudo_bulk_obj <- list(mat = mat, meta_data = meta_data) assign(paste0(i, "_mat"), mat) assign(paste0(i,"_pseduo_bulk_obj"), pseudo_bulk_obj) DESeq.ds <- DESeq2::DESeqDataSetFromMatrix(countData = pseudo_bulk_obj$mat, colData = pseudo_bulk_obj$meta_data, design = ~orig.ident) DESeq.ds <- DESeq.ds[rowSums(counts(DESeq.ds)) > 0, ] DESeq.ds <- DESeq(DESeq.ds) DESeq.rlog <- rlogTransformation(DESeq.ds, blind = TRUE) counts <- assay(DESeq.rlog) assign(paste0(i,"_counts"), counts) } ## converting matrices into expressionsets d4_1707_counts_es <- ExpressionSet(assayData = as.matrix(d4_1707_counts), annotation = "org.Hs.eg.db") d5_1743_counts_es <- ExpressionSet(assayData = as.matrix(d5_1743_counts), annotation = "org.Hs.eg.db") mouse_counts_es <- ExpressionSet(assayData = as.matrix(mouse.combined_counts), annotation = "org.Mm.eg.db") gsva_d4_1707_counts <- gsva(as.matrix(d4_1707_counts), gs_human, min.sz=5, max.sz=500) gsva_d5_1743_counts <- gsva(as.matrix(d5_1743_counts), gs_human, min.sz=5, max.sz=500) gsva_mouse.combined_counts <- gsva(as.matrix(mouse.combined_counts), gs_mouse, min.sz=5, max.sz=500) gsva_counts_combined <- cbind(gsva_d4_1707_counts, gsva_d5_1743_counts, gsva_mouse.combined_counts) p <- pheatmap(gsva_counts_combined)