integration_BM.R 9.04 KB
Newer Older
Ivan Merelli's avatar
Ivan Merelli committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
library(Seurat)
library(ggplot2)
library(harmony)
library(RColorBrewer)
library(plotly)
library(htmlwidgets)
library(ComplexHeatmap)
library(openxlsx)
library(dplyr)
library(future)
library(gridExtra)
library(SeuratWrappers)

## Wu et al. public data (GSE196052)
countsData_bm <- read.delim(file = "/DATA/31/molteni/vexas_sc/public/GSE196052_dataCount_BM.csv", header = TRUE, sep = ",", row.names = 1)

sampleData <- read.delim(file = "/DATA/31/molteni/vexas_sc/public/BM_metaDatatSNECellType_ALiceManual.csv", header = TRUE, sep = ",")
sampleData <- sampleData[,c(1:6)]


obj_BM <- CreateSeuratObject(counts = countsData_bm)
dataset_numbers <- sub(".*\\.", "", colnames(obj_BM))

padded_numbers <- ifelse(as.numeric(dataset_numbers) < 10, 
                         paste0("0", dataset_numbers),
                         dataset_numbers)

final_dataset_labels <- paste0("BM-PU-", padded_numbers)

obj_BM$dataset <- final_dataset_labels

obj_BM@meta.data$subject <- sampleData$subject
obj_BM@meta.data$orig.ident <- obj_BM@meta.data$subject
obj_BM@meta.data$sample_info <- "public"

obj_BM@meta.data$subject <- NULL
obj_BM@meta.data$dataset <- NULL

## our vexas data
BM_minimal <- readRDS("/DATA/31/molteni/vexas_bm/results/Full/Full_minimal.rds")
BM_minimal@meta.data$scDblFinder_class <- NULL
BM_minimal@meta.data$scds_class <- NULL
BM_minimal@meta.data$RNA_Source <- NULL
BM_minimal@meta.data$sample_info <- "vexas"

BM.combo <- merge(BM_minimal, y = obj_BM, add.cell.ids = c("vexas", "vexas_public"))


# normalize data and find variable features
# select only 2000 genes as variable
varfeatures <- 2000

BM.combo <- NormalizeData(object = BM.combo)
BM.combo <- FindVariableFeatures(BM.combo, selection.method = "vst",
                                 nfeatures = varfeatures,
                                 verbose = T)

# scaling
reg_vars = c("percent.mt", "nCount_RNA")
BM.combo <- ScaleData(object = BM.combo, vars.to.regress = reg_vars,  display.progress = T, features = rownames(BM.combo))
saveRDS(BM.combo, file = "combined_preIntegration_scaled.rds")

BM.combo <- readRDS("/DATA/31/molteni/vexas_bm/results/integration/combined_preIntegration_scaled.rds")

#PCA

max_pca <- 70

BM.combo <- RunPCA(object = BM.combo, features = VariableFeatures(object = BM.combo), npcs = max_pca, reduction.name="pca", reduction.key="PC_")
BM.combo@meta.data$condition <- NA


BM.combo@meta.data$condition <- with(BM.combo@meta.data,
                                     ifelse(BM.combo@meta.data$orig.ident == "BM-01","PZ", 
                                            ifelse(BM.combo@meta.data$orig.ident == "BM-02","PZ",
                                                   ifelse(BM.combo@meta.data$orig.ident == "BM-03","PZ",
                                                          ifelse(BM.combo@meta.data$orig.ident == "BM-04","PZ",
                                                                 ifelse(BM.combo@meta.data$orig.ident == "BM-08","PZ",
                                                                        ifelse(BM.combo@meta.data$orig.ident == "BM-09","PZ",
                                                                               ifelse(BM.combo@meta.data$orig.ident == "PT1","PZ",
                                                                                      ifelse(BM.combo@meta.data$orig.ident == "PT2","PZ",
                                                                                             ifelse(BM.combo@meta.data$orig.ident == "PT3","PZ",
                                                                                                    ifelse(BM.combo@meta.data$orig.ident == "PT4","PZ",
                                                                                                           ifelse(BM.combo@meta.data$orig.ident == "PT5","PZ",
                                                                                                                  ifelse(BM.combo@meta.data$orig.ident == "PT6","PZ",
                                                                                                                         ifelse(BM.combo@meta.data$orig.ident == "PT7","PZ",
                                                                                                                                ifelse(BM.combo@meta.data$orig.ident == "PT8","PZ",
                                                                                                                                       ifelse(BM.combo@meta.data$orig.ident == "PT9","PZ",
                                                                                                                                              ifelse(BM.combo@meta.data$orig.ident == "HD1","HD",
                                                                                                                                                     ifelse(BM.combo@meta.data$orig.ident == "HD2","HD",
                                                                                                                                                            ifelse(BM.combo@meta.data$orig.ident == "HD3","HD",
                                                                                                                                                                   ifelse(BM.combo@meta.data$orig.ident == "HD4","HD", "NA"))))))))))))))))))))

integration.var <- c("orig.ident", "sample_info", "condition") ## + condition 

BM.combo <- RunHarmony(object = BM.combo, 
                       group.by.vars = integration.var, 
                       max.iter.harmony = 30, plot_convergence = FALSE, reduction.save = "harmony")

#PC = 15

setwd("/DATA/31/molteni/vexas_bm/results/integration/pc15")
BM.combo <- RunUMAP(object = BM.combo, seed.use = 123, dims = 1:15, 
                    reduction = "harmony", reduction.name = "harmony_umap", reduction.key = "UMAPh_", return.model = TRUE)

g <- DimPlot(BM.combo, split.by = "orig.ident", group.by = "sample_info", reduction = "harmony_umap", raster = F, ncol = 6) + ggtitle("UMAP post-harmony")
ggsave(g, filename = 'UMAP_PostHarmony_donor_cond.png', width = 12, height = 6) 

# split by

Idents(BM.combo) <- "sample_info"
g <- DimPlot(BM.combo, split.by = "sample_info", reduction = "harmony_umap") + ggtitle("UMAP post-harmony")
ggsave(g, filename = 'UMAP_PostHarmony_batch_cond2.png', width = 7, height = 6)

Idents(BM.combo) <- "condition"
g <- DimPlot(BM.combo, split.by = "condition", reduction = "harmony_umap") + ggtitle("UMAP post-harmony")
ggsave(g, filename = 'UMAP_PostHarmony_cond_2.png', width = 7, height = 6)

#PC=15
BM.combo <- FindNeighbors(object = BM.combo,
                          dims = 1:15,
                          force.recalc = T,
                          reduction = "harmony",
                          graph.name = c("RNA_nn_h.", "RNA_snn_h."))


BM.combo <- FindClusters(object = BM.combo, graph.name = "RNA_snn_h.", resolution = 0.4)
BM.combo <- FindClusters(object = BM.combo, graph.name = "RNA_snn_h.", resolution = 0.6)
BM.combo <- FindClusters(object = BM.combo, graph.name = "RNA_snn_h.", resolution = 0.8)

# SPLIT CLUSTER 20 OF RES=0.8
Idents(BM.combo) <- "RNA_snn_h._res.0.8"

BM.combo <- FindSubCluster(BM.combo, cluster= "20", graph.name = "RNA_snn_h.", subcluster.name = "subcluster", resolution = 0.05)
table(BM.combo$subcluster)

Idents(BM.combo) <- "subcluster"

mapping_vector <- c("20_0" = "20", 
                    "20_1" = "25")

# Update the reference object 
for(old_ident in names(mapping_vector)) {
  BM.combo$subcluster[Idents(BM.combo) == old_ident] <- mapping_vector[old_ident]
}

BM.combo$RNA_snn_h._res.0.8 <- BM.combo$subcluster

BM.combo@misc$markers <- FindAllMarkers(object = BM.combo, min.pct = 0.2, logfc.threshold = 0.25, only.pos = FALSE,
                           min.cells.group = 10, test.use = "wilcox", return.thresh = 0.05, fc.name = "avg_logFC")


markers_h <- list(BM.combo@misc$markers_h)
names(markers_h) <- "RNA_snn_h._res.0.8"

BM.combo@misc$markers_h <- markers_h

saveRDS(BM.combo, file = "/DATA/31/molteni/vexas_bm/results/integration/pc15/BM.combo_final.rds")

#custom cluster annotation

Idents(BM.combo) <- "RNA_snn_h._res.0.8"

# mapping vector
new.cluster.ids <- c("CD8_T_cells","Early_erythroid","CD14+_monocytes_1","CD4_T_cells",
                     "GP","Immature_Neutrophil","HSC_MPP","VEXAS_NK_cells","B_cells","MEP",
                     "Erythroid_progenitors","Mid_erythroid_cells","Prolif_GP","VEXAS_GP",
                     "CD14+_monocytes_2","Early_erythroid","Plasma_cells","Late_erythroid_cells",
                     "CD14+_CD16_low_monocytes","Myeloid_dendritic_cells","Plasmacytoid_dentritic_cells",
                     "CD8_T_cells","MLP","MEP_BASO_MAST_prog","Endothelial_cells","B_cells")


cluster_mapping <- setNames(new.cluster.ids, c(1:length(new.cluster.ids)))

BM.combo$celltypes_annot <- new.cluster.ids[as.numeric(as.character(BM.combo$RNA_snn_h._res.0.8)) + 1]

Idents(BM.combo) <- "celltypes_annot"
ggsave("UMAP_label.png", width = 10, height = 7)
DimPlot(BM.combo, reduction = "harmony_umap", label = TRUE, label.box = TRUE,  pt.size = 0.5, repel = TRUE) + NoLegend()

saveRDS(BM.combo, file = "/DATA/31/molteni/vexas_bm/results/integration/pc15/BM.combo_anno.rds")