1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# ========================================================-
# Title: Seurat analysis of transgene scRNAseq
# Description: Code to generate Fig5
# Author: Monah Abou Alezz
# Date: 2025-03-06
# ========================================================-
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(ggrepel))
suppressPackageStartupMessages(library(clusterProfiler))
suppressPackageStartupMessages(library(RColorBrewer))
## load seurat data
full_final <- readRDS("data/full_transgenes.rds")
Idents(full_final) <- "Cell_Type"
FeaturePlot(full_final,
features = c("GAA-new","GFP","LUC"),
split.by = "orig.ident")
# Define transgene metadata
cell_types <- c("Neurons", "Immature.Neurons", "Astrocytes", "Immature.Astrocytes", "Oligodendrocytes", "OPC")
ut <- subset(full_final, orig.ident == "UT")
transgene_info <- list(
gfp = list(gene = "GFP", orig.ident = c("AAV9_CAG_GFP", "Spk_CAG_GFP", "Spk_hAAT_GFP")),
gaa = list(gene = "GAA-new", orig.ident = "AAV9_CAG_GAA"),
luc = list(gene = "LUC", orig.ident = "Spk_CAG_LUC")
)
deg_results <- list()
for (trans in names(transgene_info)) {
gene <- transgene_info[[trans]]$gene
samples <- transgene_info[[trans]]$orig.ident
obj <- subset(full_final, orig.ident %in% samples)
merged <- merge(ut, obj)
# Subset by lineage
neurons <- subset(merged, Cell_Type %in% c("Neurons", "Immature.Neurons"))
astrocytes <- subset(merged, Cell_Type %in% c("Astrocytes", "Immature.Astrocytes"))
oligo <- subset(merged, Cell_Type %in% c("Oligodendrocytes", "OPC"))
# DEG for each lineage
for (lineage in c("neurons", "astrocytes", "oligo")) {
lineage_obj <- get(paste0(lineage))
for (samp in samples) {
cat("Extracting DEGs in", lineage, "of", samp, "vs UT\n")
degs <- FindMarkers(lineage_obj,
ident.1 = samp,
ident.2 = "UT",
group.by = "orig.ident",
only.pos = FALSE,
min.pct = 0,
test.use = "wilcox",
logfc.threshold = 0,
min.cells.group = 0)
deg_results[[paste(trans, lineage, samp, "total", sep = "_")]] <- degs
}
}
# Select cells that express the transgene AND belong to selected cell types
expr_cells <- GetAssayData(obj, slot = "counts")[gene, ] > 0
selected_cells <- colnames(obj)[expr_cells & Idents(obj) %in% cell_types]
obj_pos <- subset(obj, cells = selected_cells)
# Merge with UT cells
merged_pos <- merge(ut, obj_pos)
# Subset by lineage
neurons_pos <- subset(merged_pos, Cell_Type %in% c("Neurons", "Immature.Neurons"))
astrocytes_pos <- subset(merged_pos, Cell_Type %in% c("Astrocytes", "Immature.Astrocytes"))
oligo_pos <- subset(merged_pos, Cell_Type %in% c("Oligodendrocytes", "OPC"))
# DEG for each lineage
for (lineage in c("neurons", "astrocytes", "oligo")) {
lineage_obj <- get(paste0(lineage, "_pos"))
for (samp in samples) {
cat("Extracting DEGs in", lineage, "of", samp, "vs UT (transgene-positive cells)\n")
degs <- FindMarkers(lineage_obj,
ident.1 = samp,
ident.2 = "UT",
group.by = "orig.ident",
only.pos = FALSE,
min.pct = 0,
test.use = "wilcox",
logfc.threshold = 0,
min.cells.group = 0)
deg_results[[paste(trans, lineage, samp, "pos", sep = "_")]] <- degs
}
}
# Select cells that DO NOT express the transgene AND belong to selected cell types
expr_cells <- GetAssayData(obj, slot = "counts")[gene, ] == 0
selected_cells <- colnames(obj)[expr_cells & Idents(obj) %in% cell_types]
obj_neg <- subset(obj, cells = selected_cells)
# Merge with UT cells
merged_neg <- merge(ut, obj_neg)
# Subset by lineage
neurons_neg <- subset(merged_neg, Cell_Type %in% c("Neurons", "Immature.Neurons"))
astrocytes_neg <- subset(merged_neg, Cell_Type %in% c("Astrocytes", "Immature.Astrocytes"))
oligo_neg <- subset(merged_neg, Cell_Type %in% c("Oligodendrocytes", "OPC"))
# DEG for each lineage
for (lineage in c("neurons", "astrocytes", "oligo")) {
lineage_obj <- get(paste0(lineage, "_neg"))
for (samp in samples) {
cat("Extracting DEGs in", lineage, "of", samp, "vs UT (transgene-negative cells)\n")
degs <- FindMarkers(lineage_obj,
ident.1 = samp,
ident.2 = "UT",
group.by = "orig.ident",
only.pos = FALSE,
min.pct = 0,
test.use = "wilcox",
logfc.threshold = 0,
min.cells.group = 0)
deg_results[[paste(trans, lineage, samp, "neg", sep = "_")]] <- degs
}
}
}
## GSEA
h_gmt <- read.gmt("data/h.all.v7.2.symbols.gmt")
gsea_results <- list()
for (deg_name in names(deg_results)) {
gene_res <- deg_results[[deg_name]]
if (!"avg_log2FC" %in% colnames(gene_res)) next
logfc_symbol <- as.vector(gene_res$avg_log2FC)
names(logfc_symbol) <- rownames(gene_res)
logfc_symbol <- sort(logfc_symbol, decreasing = TRUE)
contr.gsea <- tryCatch({
GSEA(logfc_symbol,
TERM2GENE = h_gmt,
nPerm = 10000,
pvalueCutoff = 1)
}, error = function(e) NULL)
if (!is.null(contr.gsea)) {
gsea_results[[paste0("GSEA_", deg_name)]] <- as.data.frame(contr.gsea)
}
}