# ========================================================- # Title: Seurat analysis of organoids scRNAseq # Description: Code to generate Fig4, SuppFig8, SuppFig9 # Author: Monah Abou Alezz # Date: 2025-03-06 # ========================================================- suppressPackageStartupMessages(library(Seurat)) suppressPackageStartupMessages(library(tidyverse)) suppressPackageStartupMessages(library(reshape2)) suppressPackageStartupMessages(library(ggrepel)) suppressPackageStartupMessages(library(clusterProfiler)) suppressPackageStartupMessages(library(RColorBrewer)) ## load seurat data tp <- c("h48","d5") for (t in tp) { sc_obj <- readRDS(paste0("data/organoids_",t,"_final.rds")) Idents(sc_obj) <- "Cell_Type" sc_obj$Cell_type <- factor( sc_obj$Cell_Type, levels = c("Astrocytes", "Neurons", "Oligodendrocytes", "Immature.Astrocytes", "Immature.Neurons", "OPC", "Undifferentiated")) assign(paste0(t,"_final"), sc_obj) } ggplotColours <- function(n = 6, h = c(0, 360) + 15){ if ((diff(h) %% 360) < 1) h[2] <- h[2] - 360/n hcl(h = (seq(h[1], h[2], length = n)), c = 100, l = 65) } ## umap DimPlot(object = h48_final, reduction = "umap.harmony.orig.ident", group.by = "Cell_Type", label = T, repel = T, pt.size = 0.2, label.size = 6) + scale_color_manual(labels=c("Astrocytes", "Neurons", "Oligodendrocytes", "Immature Astrocytes", "Immature Neurons", "OPC", "Undifferentiated"), values = ggplotColours(n = 7)) ## donut plot ggplot_object_clusters_labels <- melt(table(h48_final$Cell_Type)) colnames(ggplot_object_clusters_labels) <- c("Cell_type", "Count") totalB <- ggplot_object_clusters_labels %>% mutate(percentage = Count/sum(Count)) totalB$percentage <- totalB$percentage*100 totalB$fraction = totalB$Count / sum(totalB$Count) totalB$ymax <- cumsum(totalB$fraction) totalB$ymin <- c(0, head(totalB$ymax, n=-1)) totalB$labelPosition <- (totalB$ymax + totalB$ymin) / 2 totalB$Samples <- c("Astrocytes", "Neurons", "Oligodendrocytes", "Immature\nAstrocytes", "Immature\nNeurons", "OPC", "Undifferentiated") totalB$label <- paste0(totalB$Cell_type, ":","\n",round(totalB$percentage, 2), "%") totalB$label2 <- c(paste0(totalB$Samples[1], ":","\n",round(totalB$percentage[1], 2), "%"), paste0(totalB$Samples[2], ":","\n",round(totalB$percentage[2], 2), "%"), paste0(totalB$Samples[3], ":","\n",round(totalB$percentage[3], 2), "%"), paste0(totalB$Samples[4], ": ",round(totalB$percentage[4], 2), "%"), paste0(totalB$Samples[5], ": ",round(totalB$percentage[5], 2), "%"), paste0(totalB$Samples[6], ":","\n",round(totalB$percentage[6], 2), "%"), paste0(totalB$Samples[7], ":","\n",round(totalB$percentage[7], 2), "%")) ggplot(totalB, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=Cell_type)) + geom_rect() + geom_label_repel(x=3.5, aes(y=labelPosition, label=label), size=10) + coord_polar(theta="y") + xlim(c(2, 4)) + theme_void() + theme(legend.position = "none") ## barplot PrctCellExpringGene <- function(object, genes, group.by = "all"){ if(group.by == "all"){ prct = unlist(lapply(genes,calc_helper, object=object)) result = data.frame(Markers = genes, Cell_proportion = prct) return(result) } else{ list = SplitObject(object, group.by) factors = names(list) results = lapply(list, PrctCellExpringGene, genes=genes) for(i in 1:length(factors)){ results[[i]]$Feature = factors[i] } combined = do.call("rbind", results) return(combined) } } calc_helper <- function(object,genes){ counts = object[['RNA']]@counts ncells = ncol(counts) if(genes %in% row.names(counts)){ sum(counts[genes,]>0)/ncells }else{return(NA)} } aav9 <- subset(d5_final, orig.ident=="AAV9") gfp_percentage_cell_type <- PrctCellExpringGene(aav9, genes ="GFP", group.by = "Cell_Type") gfp_percentage_cell_type$Percentage <- gfp_percentage_cell_type$Cell_proportion*100 gfp_percentage_cell_type <- gfp_percentage_cell_type[,c(4,3)] colnames(gfp_percentage_cell_type) <- c("Percentage", "Cell_Types") gfp_percentage_cell_type <- gfp_percentage_cell_type[c(7,1,6,4,5,3,2),] gfp_percentage_cell_type$Cell_Types <- factor(gfp_percentage_cell_type$Cell_Types, levels = c("Immature.Neurons", "Neurons", "Immature.Astrocytes", "Astrocytes", "OPC", "Oligodendrocytes", "Undifferentiated")) custom_colors <- c("Immature.Neurons" = "#00B6EB", "Neurons" = "#C49A00", "Immature.Astrocytes" = "#00C094", "Astrocytes" = "#F8766D", "OPC" = "#A58AFF", "Oligodendrocytes" = "#53B400", "Undifferentiated" = "#FB61D7") ggplot(data=gfp_percentage_cell_type, aes(x=Cell_Types, y=Percentage, fill = Cell_Types)) + geom_bar(stat="identity", width = 0.7, position=position_dodge()) + # width of bars scale_fill_manual(values = custom_colors) + ylab("% GFP mRNA positive") + theme_bw() + theme( text = element_text(family = "Arial"), axis.title.x = element_blank(), axis.title.y = element_text(size=20*96/72), axis.text.x = element_text(size=16*96/72, angle = 45, vjust = 0.9, hjust = 0.9), axis.text.y = element_text(size=18*96/72), legend.position = "none", aspect.ratio = 1.1) samples <- c("AAV9", "UT") tp <- c("h48","d5") for (t in tp) { obj_t <- get(paste0(t,"_final")) for (i in samples) { obj_i <- subset(obj_t, orig.ident == i) ggplot_object_clusters_labels <- melt(table(obj_i$Cell_Type)) colnames(ggplot_object_clusters_labels) <- c("Cell_type", "Count") totalB <- ggplot_object_clusters_labels %>% mutate(percentage = Count/sum(Count)) totalB$percentage <- totalB$percentage*100 totalB$fraction = totalB$Count / sum(totalB$Count) totalB$ymax <- cumsum(totalB$fraction) totalB$ymin <- c(0, head(totalB$ymax, n=-1)) totalB$labelPosition <- (totalB$ymax + totalB$ymin) / 2 totalB$Samples <- c("Astrocytes", "Neurons", "Oligodendrocytes", "Immature\nAstrocytes", "Immature\nNeurons", "OPC", "Undifferentiated") totalB$label <- paste0(totalB$Cell_type, ":","\n",round(totalB$percentage, 2), "%") totalB$label2 <- c(paste0(totalB$Samples[1], ":","\n",round(totalB$percentage[1], 2), "%"), paste0(totalB$Samples[2], ":","\n",round(totalB$percentage[2], 2), "%"), paste0(totalB$Samples[3], ":","\n",round(totalB$percentage[3], 2), "%"), paste0(totalB$Samples[4], ": ",round(totalB$percentage[4], 2), "%"), paste0(totalB$Samples[5], ": ",round(totalB$percentage[5], 2), "%"), paste0(totalB$Samples[6], ":","\n",round(totalB$percentage[6], 2), "%"), paste0(totalB$Samples[7], ":","\n",round(totalB$percentage[7], 2), "%")) donut_i <- ggplot(totalB, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=Cell_type)) + geom_rect() + geom_label_repel(x=3.5, aes(y=labelPosition, label=label), size=10) + coord_polar(theta="y") + xlim(c(2, 4)) + ggtitle(paste0(i)) + theme_void() + theme(legend.position = "none", plot.title = element_text(hjust = 0.5, size = 50, family = "Arial"), plot.title.position = "plot") assign(paste0("donut_",t,"_", i), donut_i) } } donut_final <- (donut_h48_UT | donut_h48_AAV9 | donut_d5_UT | donut_d5_AAV9) ## markers for (t in tp) { obj_t <- get(paste0(t,"_final")) neurons <- subset(obj_t, Cell_Type=="Neurons" | Cell_Type=="Immature.Neurons") astro <- subset(obj_t, Cell_Type=="Astrocytes" | Cell_Type=="Immature.Astrocytes") oligo <- subset(obj_t, Cell_Type=="Oligodendrocytes" | Cell_Type=="OPC") assign(paste0("neurons_",t,"_obj_x_markers"), neurons) assign(paste0("astro_",t,"_obj_x_markers"), astro) assign(paste0("oligo_",t,"_obj_x_markers"), oligo) } cell_types <- c("neurons_h48", "astro_h48", "oligo_h48", "neurons_d5", "astro_d5", "oligo_d5") samples_degs <- c("AAV9", "AAV2", "Spk") for (k in cell_types) { cells <- get(paste0(k,"_obj_x_markers")) for (j in samples_degs) { print(paste("Extracting differentially expressed genes of",j,"vs UT")) degs <- FindMarkers(cells, ident.1 = j, ident.2 = "UT", group.by = "orig.ident", only.pos = FALSE, min.pct = 0.1, test.use = "wilcox", logfc.threshold = 0.1) assign(paste(k,j,"vs_UT_degs",sep = "_"), degs, envir = .GlobalEnv) degs_all <- FindMarkers(object = cells, ident.1 = j, ident.2 = "UT", group.by = "orig.ident", only.pos = FALSE, min.pct = 0, test.use = "wilcox", logfc.threshold = 0, min.cells.group = 0) assign(paste(k,j,"vs_UT_degs_all",sep = "_"), degs_all, envir = .GlobalEnv) ## volcano plot print(paste("Making volcano plot for DEGs of...", j, " vs UT" )) logfc.thr <- 0 adj.pval.thr <- 0.05 data <- data.frame(gene = row.names(degs), pval = -log10(degs$p_val_adj), lfc = degs$avg_log2FC) data <- na.omit(data) data <- mutate(data,color=case_when(data$lfc>logfc.thr & data$pval> -log10(adj.pval.thr)~"Overexpressed", data$lfc< -logfc.thr & data$pval> -log10(adj.pval.thr)~"Underexpressed", abs(data$lfc) 0), 10) highl_down <- head(subset(data, color != "NonSignificant" & lfc < 0), 10) highl <- rbind(highl_up, highl_down) sig <- subset(degs, degs$p_val_adj<0.05) n_sig <- nrow(sig) vol <- ggplot(data, aes(x = lfc, y = pval, colour = color)) + geom_point(size = 3, alpha = 0.8, na.rm = T) + geom_hline(yintercept = -log10(adj.pval.thr), colour = "darkgray") + geom_label_repel(data = highl, aes(label = gene), show.legend = FALSE, size = 6, family = "Arial") + xlab(expression(log[2]("Fold Change"))) + ylab(expression(-log[10]("adjusted p-value"))) + scale_color_manual(name = "Expression", values = c(Overexpressed = "red", Underexpressed = "royalblue3", NonSignificant = "black")) + ggtitle(paste("Significant genes = ", n_sig)) + theme_bw() assign(paste(k,j,"vs_UT_volcano",sep = "_"), vol, envir = .GlobalEnv) } } rm(degs_all) ## gsea h_gmt <- read.gmt("data/h.all.v7.2.symbols.gmt") for (g in ls()[grep("degs_all", ls())]) { gene_res <- get(g) logfc_symbol <- as.vector(gene_res$avg_log2FC) names(logfc_symbol) <- row.names(gene_res) logfc_symbol <- sort(logfc_symbol, decreasing = TRUE) contr.gsea <- GSEA(logfc_symbol, TERM2GENE = h_gmt, nPerm = 10000, pvalueCutoff = 1) contr.gsea.symb <- as.data.frame(contr.gsea) assign(paste0("GSEA_",g), contr.gsea.symb) } ## GSEA heatmaps type <- c("astro", "neurons", "oligo") samples_degs <- c("AAV9", "AAV2", "Spk") for (p in type) { for (j in samples_degs) { hallmark.full <- data.frame() for (ds in ls()[grep(paste0("GSEA_.*",p,".*",j), ls())]) { ds.t <- get(ds) ds.t.filt <- ds.t[,c("ID", "setSize", "enrichmentScore", "NES", "pvalue", "p.adjust", "qvalue"), drop = FALSE] ds.t.filt$Dataset <- ds if (nrow(hallmark.full) == 0) { hallmark.full <- ds.t.filt } else { hallmark.full <- rbind(hallmark.full, ds.t.filt) } } hallmark.full.filt <- hallmark.full[, c("ID", "NES", "Dataset", "qvalue")] hallmark.full.filt$ID <- gsub(x = hallmark.full.filt$ID, "HALLMARK_", "") hallmark.full.filt$sig <- ifelse( hallmark.full.filt$qvalue <= 0.05 & hallmark.full.filt$qvalue > 0.01, '*', ifelse(hallmark.full.filt$qvalue <= 0.01 & hallmark.full.filt$qvalue > 0.001, '**', ifelse(hallmark.full.filt$qvalue <= 0.001 & hallmark.full.filt$qvalue > 0.0001, '***', ifelse(hallmark.full.filt$qvalue <= 0.0001, '****', '')))) hallmark.order <- hallmark.full.filt %>% group_by(ID) %>% summarise(Pos = sum(NES)) hallmark.order.terms <- hallmark.order[order(hallmark.order$Pos, decreasing = FALSE), "ID", drop = FALSE] hallmark.full.filt.tt <- reshape2::melt(reshape2::dcast(hallmark.full.filt, ID ~ Dataset, value.var="NES"), id.vars = c("ID")) colnames(hallmark.full.filt.tt) <- c("ID", "Dataset", "NES") hallmark.full.filt.tt$ID <- factor(hallmark.full.filt.tt$ID, levels = hallmark.order.terms$ID) hallmark.full.filt.tt <- merge(hallmark.full.filt.tt, hallmark.full.filt[, c("ID", "Dataset", "sig")], by = c("ID", "Dataset"), all.x = TRUE) labels <- c("Day4", "Day2") p.hallmark <- ggplot(hallmark.full.filt.tt, aes(x = Dataset, y = ID)) + geom_tile(aes(fill = NES), colour = "white") + geom_text(aes(label = paste(sig)), size=4*96/72, fontface = "bold") + scale_fill_gradientn(colours = colorRampPalette(rev(brewer.pal(11,"RdBu")))(100), limits = c(-3.5, 3.5), na.value = "white") + ylab("") + xlab("") + coord_fixed(ratio = 0.6) + scale_x_discrete(labels = labels) + theme_bw(base_size = 12) + theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold", size = 18*96/72), axis.text.y = element_text(face = "bold", size = 12*96/72), legend.text = element_text(face = "bold", size = 14*96/72), legend.title = element_text(face = "bold", size = 12*96/72)) assign(paste("p.hallmark", p,j,sep = "_"), p.hallmark) } }