Commit 387c08bc authored by Stefano Beretta's avatar Stefano Beretta
Browse files

Upload New File

parent b855eb51
library(SeuratObject)
library(Seurat)
library(harmony)
library(DoubletFinder)
library(grid)
library(cowplot)
library(gridExtra)
library(RColorBrewer)
library(patchwork)
library(scales)
library(ggplot2)
library(openxlsx)
library(SingleR)
#########################
### Utility Functions ###
#########################
define_region <- function(row, col){
viewport(layout.pos.row = row, layout.pos.col = col)
}
########################
### General Settings ###
########################
# Working dir
wdir <- "squadrito_livertumor2022_scrnaseq"
# Raw data dir (download from GEO)
raw_dir <- "GEO_data"
# Results dir
out_dir <- paste(wdir, "results", sep = "/")
dir.create(path = out_dir, showWarnings = F)
samples <- c("GG-11", "GG-18", "GG-22", "GG-23", "GG-25")
# cell-cycle scoring - NicheNet convertion of Seurat cc.genes.updated.2019 lists
s.genes <- c("Mcm5","Pcna","Tyms","Fen1","Mcm7","Mcm4","Rrm1","Ung","Gins2","Mcm6",
"Cdca7","Dtl","Prim1","Uhrf1","Cenpu","Hells","Rfc2","Polr1b","Nasp",
"Rad51ap1","Gmnn","Wdr76","Slbp","Ccne2","Ubr7","Pold3","Msh2","Atad2",
"Rad51","Rrm2","Cdc45","Cdc6","Exo1","Tipin","Dscc1","Blm","Casp8ap2",
"Usp1","Clspn","Pola1","Chaf1b","Mrpl36","E2f8")
g2m.genes <- c("Hmgb2","Cdk1","Nusap1","Ube2c","Birc5","Tpx2","Top2a","Ndc80",
"Cks2","Nuf2","Cks1b","Mki67","Tmpo","Cenpf","Tacc3","Pimreg",
"Smc4","Ccnb2","Ckap2l","Ckap2","Aurkb","Bub1","Kif11","Anp32e",
"Tubb4b","Gtse1","Kif20b","Hjurp","Cdca3","Jpt1","Cdc20","Ttk",
"Cdc25c","Kif2c","Rangap1","Ncapd2","Dlgap5","Cdca2","Cdca8","Ect2",
"Kif23","Hmmr","Aurka","Psrc1","Anln","Lbr","Ckap5","Cenpe","Ctcf",
"Nek2","G2e3","Gas2l3","Cbx5","Cenpa")
reg_vars <- c("CC.Difference", "percent.mt", "nCount_RNA")
#############################
### Create Seurat Objects ###
#############################
for (sample in samples) {
print(sample)
mtx_file <- paste(raw_dir, paste0(sample, "_matrix.mtx.gz"), sep = "/")
if (!file.exists(mtx_file)) {
stop(paste("Cannot find", sample, "MTX file:", mtx_file))
}
bcode_file <- paste(raw_dir, paste0(sample, "_barcodes.tsv.gz"), sep = "/")
if (!file.exists(bcode_file)) {
stop(paste("Cannot find", sample, "barcode file:", bcode_file))
}
feature_file <- paste(raw_dir, paste0(sample, "_features.tsv.gz"), sep = "/")
if (!file.exists(feature_file)) {
stop(paste("Cannot find", sample, "features file:", feature_file))
}
sample_mtx <- ReadMtx(mtx = mtx_file, cells = bcode_file, features = feature_file)
colnames(x = sample_mtx) <- paste(sample, colnames(x = sample_mtx), sep = '_') # add id tag to cellnames
sample_obj <- CreateSeuratObject(counts = sample_mtx, project = sample)
sample_dir <- paste(out_dir, sample, sep = "/")
dir.create(path = sample_dir, showWarnings = F)
saveRDS(object = sample_obj, file = paste(sample_dir, paste0(sample, "_final.rds"), sep = "/"))
}
####################################################
### Exploratory analysis for parameter selection ###
####################################################
# Perform DoubletFinder on each samples with different
# combinations of parameters
for (sample in samples) {
print(sample)
sample_dir <- paste(out_dir, sample, sep = "/")
plot_dir <- paste(sample_dir, "plots", sep = "/")
dir.create(path = plot_dir, showWarnings = F)
obj <- readRDS(paste(sample_dir, paste0(sample, "_final.rds"), sep = "/"))
obj[["percent.mt"]] <- PercentageFeatureSet(obj, pattern = "^mt-")
obj <- CellCycleScoring(object = obj, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
obj@meta.data$CC.Difference <- obj@meta.data$S.Score - obj@meta.data$G2M.Score
Idents(obj) <- "orig.ident"
png(paste(plot_dir, paste0(sample, "_prefilter.png"), sep = "/"), width = 1200, height = 1000)
print(VlnPlot(obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1))
dev.off()
obj@misc$analysis_params$min.feature <- 1000
obj@misc$analysis_params$max.feature <- 6000
obj@misc$analysis_params$max.pc.mito <- 20
obj <- subset(obj, subset = nFeature_RNA > obj@misc$analysis_params$min.feature &
nFeature_RNA < obj@misc$analysis_params$max.feature &
percent.mt < obj@misc$analysis_params$max.pc.mito)
png(paste(plot_dir, paste0(sample, "_postfilter.png"), sep = "/"), width = 1200, height = 1000)
print(VlnPlot(obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1))
dev.off()
obj <- SCTransform(obj, vars.to.regress = c("CC.Difference", "percent.mt", "nCount_RNA"), verbose = FALSE)
DefaultAssay(object = obj) <- "SCT"
obj@misc$analysis_params$max_pca <- 50
obj <- RunPCA(object = obj, npcs = obj@misc$analysis_params$max_pca)
png(paste(plot_dir, paste0(sample, "_PCA_elbowplot_SCT.png"), sep = "/"), width = 1200, height = 1000)
print(ElbowPlot(object = obj, ndims = obj@misc$analysis_params$max_pca, reduction = "pca"))
dev.off()
obj@misc$analysis_params$num_pc <- 30
obj <- RunUMAP(object = obj, dims = 1:obj@misc$analysis_params$num_pc)
## pK Identification (no ground-truth) --------------------------------------------------
sweep.res.obj <- paramSweep_v3(obj, PCs = 1:obj@misc$analysis_params$num_pc, sct = TRUE)
sweep.stats_obj <- summarizeSweep(sweep.res.obj, GT = FALSE)
bcmvn_obj <- find.pK(sweep.stats_obj)
dev.off()
pK <- as.numeric(as.character(bcmvn_obj$pK))
BCmetric <- bcmvn_obj$BCmetric
pK_choose <- pK[which(BCmetric %in% max(BCmetric))]
png(paste(plot_dir, paste0(sample, "_DF_SCT_pK_selection.png"), sep = "/"), width = 1200, height = 1000)
par(mar = c(5,4,4,8)+1, cex.main = 1.2, font.main = 2)
plot(x = pK, y = BCmetric, pch = 16, type = "b", col = "blue", lty = 1)
abline(v = pK_choose, lwd = 2, col = 'red', lty = 2)
title("The BCmvn distributions")
text(pK_choose, max(BCmetric), as.character(pK_choose), pos = 4, col = "red")
dev.off()
for (pK_select in pK) {
for (doub_pct in c(0.07, 0.09)) {
nExp <- round(ncol(obj) * doub_pct) #expected % doublet - approx
obj <- doubletFinder_v3(obj, pN = 0.25, pK = pK_select, nExp = nExp,
PCs = 1:obj@misc$analysis_params$num_pc, sct = TRUE)
df_ident <- grep("DF.classification", colnames(obj@meta.data), value = T)[1]
Idents(obj) <- df_ident
new_cn <- paste0("DF_SCT_nExp", doub_pct, "_pK", pK_select)
colnames(obj@meta.data)[colnames(obj@meta.data) == df_ident] <- new_cn
str(obj@meta.data)
dp_df <- DimPlot(object = obj, reduction = "umap", pt.size = 1, group.by = new_cn, split.by = new_cn, ncol= 2) +
NoLegend()
dp_phase <- DimPlot(object = obj, reduction = "umap", pt.size = 1, group.by = "Phase",
split.by = new_cn, ncol= 3)
fp1 <- FeaturePlot(object = obj, reduction = "umap", pt.size = 1, features = c("Cd19"),
split.by = new_cn, order = T, ncol= 2)
fp2 <- FeaturePlot(object = obj, reduction = "umap", pt.size = 1, features = c("Cd3g"),
split.by = new_cn, order = T, ncol= 2)
fp3 <- FeaturePlot(object = obj, reduction = "umap", pt.size = 1, features = c("Itgam"),
split.by = new_cn, order = T, ncol = 2)
fp4 <- FeaturePlot(object = obj, reduction = "umap", pt.size = 1, features = c("Alas2"),
split.by = new_cn, order = T, ncol = 2)
png(paste(plot_dir, paste0("DF_SCT_nExp", doub_pct, "_pK", pK_select, ".png"), sep = "/"),
height = 1500, width = 1700)
plot.new()
grid.newpage()
pushViewport(viewport(layout = grid.layout(3, 2)))
print(dp_phase, vp = define_region(row = 1, col = 1))
print(dp_df, vp = define_region(row = 1, col = 2))
print(fp1, vp = define_region(row = 2, col = 1))
print(fp2, vp = define_region(row = 2, col = 2))
print(fp3, vp = define_region(row = 3, col = 1))
print(fp4, vp = define_region(row = 3, col = 2))
dev.off()
}
}
saveRDS(object = obj, paste(sample_dir, paste0(sample, "_afterDoubletFinder.rds"), sep = "/"))
}
# Prepare some plots on DoubletFinder results
for (sample in samples) {
print(sample)
sample_dir <- paste(out_dir, sample, sep = "/")
plot_dir <- paste(sample_dir, "plots-DBF", sep = "/")
dir.create(plot_dir, showWarnings = F)
obj = readRDS(paste(sample_dir, paste0(sample, "_afterDoubletFinder.rds"), sep = "/"))
for (pK_select in c(0.005, seq(0.01, 0.3, 0.01))) {
for (doub_pct in c(0.05, 0.07, 0.09)) {
nExp <- round(ncol(obj) * doub_pct)
new_cn <- paste0("DF_SCT_nExp", doub_pct, "_pK", pK_select)
subtitle <- list("Full" = "", "Cd19" = "", "Cd3g" = "", "Itgam" = "", "Alas2" = "")
num_title <- 0
for (md_val in names(table(obj[[new_cn]]))) {
if (num_title == 0) {
num_title <- 1
subtitle[["Full"]] <- paste(subtitle[["Full"]],
paste(md_val, table(obj[[new_cn]])[[md_val]], sep = " : "), sep = "\n")
for (gg in c("Cd19", "Cd3g", "Itgam", "Alas2")) {
gg_expr <- FetchData(object = obj, vars = gg)
obj_filt <- obj[, which(x = gg_expr > 0)]
vv <- ifelse(md_val %in% names(table(obj_filt[[new_cn]])), table(obj_filt[[new_cn]])[[md_val]], 0)
subtitle[[gg]] <- paste0(gg, ">0 :\t", subtitle[[gg]], paste(md_val, vv, sep = "= "))
}
} else {
subtitle[["Full"]] <- paste(subtitle[["Full"]], paste(md_val, table(obj[[new_cn]])[[md_val]], sep = " : "),
sep = "\t|\t")
for (gg in c("Cd19", "Cd3g", "Itgam", "Alas2")) {
gg_expr <- FetchData(object = obj, vars = gg)
obj_filt <- obj[, which(x = gg_expr > 0)]
vv <- ifelse(md_val %in% names(table(obj_filt[[new_cn]])), table(obj_filt[[new_cn]])[[md_val]], 0)
subtitle[[gg]] <- paste(subtitle[[gg]], paste(md_val, vv, sep = "= "), sep = "\t| ")
}
}
num_title <- num_title + 1
}
Idents(obj) <- new_cn
dp_df <- DimPlot(object = obj, reduction = "umap", pt.size = 1, group.by = new_cn, split.by = new_cn, ncol= 2) +
NoLegend() + ggtitle(subtitle[["Full"]])
dp_phase <- DimPlot(object = obj, reduction = "umap", pt.size = 1, group.by = "Phase", split.by = new_cn, ncol= 3)
fp1 <- plot_grid(ggdraw() + draw_label(subtitle[["Cd19"]]),
FeaturePlot(object = obj, reduction = "umap", pt.size = 1, features = c("Cd19"), split.by = new_cn,
order = T, ncol= 2),
ncol = 1, rel_heights = c(0.1, 1))
fp2 <- plot_grid(ggdraw() + draw_label(subtitle[["Cd3g"]]),
FeaturePlot(object = obj, reduction = "umap", pt.size = 1, features = c("Cd3g"), split.by = new_cn,
order = T, ncol= 2),
ncol = 1, rel_heights = c(0.1, 1))
fp3 <- plot_grid(ggdraw() + draw_label(subtitle[["Itgam"]]),
FeaturePlot(object = obj, reduction = "umap", pt.size = 1, features = c("Itgam"),
split.by = new_cn, order = T, ncol = 2),
ncol = 1, rel_heights = c(0.1, 1))
fp4 <- plot_grid(ggdraw() + draw_label(subtitle[["Alas2"]]),
FeaturePlot(object = obj, reduction = "umap", pt.size = 1, features = c("Alas2"),
split.by = new_cn, order = T, ncol = 2),
ncol = 1, rel_heights = c(0.1, 1))
png(paste(plot_dir, paste0("DF_SCT_nExp", doub_pct, "_pK", pK_select, ".png"), sep = "/"),
height = 1500, width = 1700)
plot.new()
grid.newpage()
pushViewport(viewport(layout = grid.layout(3, 2)))
print(dp_phase, vp = define_region(row = 1, col = 1))
print(dp_df, vp = define_region(row = 1, col = 2))
print(fp1, vp = define_region(row = 2, col = 1))
print(fp2, vp = define_region(row = 2, col = 2))
print(fp3, vp = define_region(row = 3, col = 1))
print(fp4, vp = define_region(row = 3, col = 2))
dev.off()
}
}
}
# Select the best combinations of parameters of DoubletFinder
# for each sample
samples <- list("GG-11" = c("nExp" = 0.05, "pK" = 0.05),
"GG-18" = c("nExp" = 0.07, "pK" = 0.2),
"GG-22" = c("nExp" = 0.09, "pK" = 0.005),
"GG-23" = c("nExp" = 0.07, "pK" = 0.03),
"GG-25" = c("nExp" = 0.07, "pK" = 0.03))
# Add metadata information on doublets in each sample
full_DoubFinder <- data.frame()
for (sample in names(samples)) {
print(sample)
sample_dir <- paste(out_dir, sample, sep = "/")
obj <- readRDS(paste(sample_dir, paste0(sample, "_afterDoubletFinder.rds"), sep = "/"))
nExp <- round(ncol(obj) * samples[[sample]][["nExp"]]) #expected % doublet - approx
obj@misc$analysis_params$doubletFinder <- list(pN = 0.25,
pK = samples[[sample]][["pK"]],
nExp = nExp,
nExpPerc = samples[[sample]][["nExp"]],
PCs = obj@misc$analysis_params$num_pc)
new_cn <- paste0("DF_SCT_nExp", obj@misc$analysis_params$doubletFinder$nExpPerc, "_pK",
obj@misc$analysis_params$doubletFinder$pK)
df_sel_classif <- obj@meta.data[, new_cn, drop = F]
print(head(df_sel_classif))
colnames(df_sel_classif) <- "DoubFinderClassif"
full_DoubFinder <- rbind(full_DoubFinder, df_sel_classif)
saveRDS(object = obj, paste(sample_dir, paste0(sample, "_afterDoubletFinder.rds"), sep = "/"))
}
############################################
### Create Full (Combined) Seurat Object ###
############################################
samples_obj <- list()
for (sample in names(samples)) {
print(sample)
sample_dir <- paste(out_dir, sample, sep = "/")
samples_obj[[sample]] <- readRDS(file = paste(sample_dir, paste0(sample, "_final.rds"), sep = "/"))
}
Full_obj <- merge(x = samples_obj[[1]], y = samples_obj[2:length(samples_obj)])
dir.create(path = paste(out_dir, "Full_DBR", sep = "/"))
saveRDS(object = Full_obj, paste(out_dir, "Full_DBR", "Full_DBR_final.rds", sep = "/"))
plot_dir <- paste(out_dir, "Full_DBR", "plots", sep = "/")
dir.create(path = plot_dir, showWarnings = F)
# Assign MT genes
Full_obj[["percent.mt"]] <- PercentageFeatureSet(Full_obj, pattern = "^mt-")
# Identify tresholds visualy and apply tresholds
Idents(Full_obj) <- "RNA_Condition"
png(filename = paste(plot_dir, "Full_Prefilter_Vlnplot.png", sep = "/"), width = 1500, height = 1000)
VlnPlot(Full_obj, features = reg_vars, ncol = 3, pt.size = 0)
dev.off()
png(filename = paste(plot_dir, "Full_Prefilter_ScatterMT.png", sep = "/"), width = 1200, height = 800)
FeatureScatter(Full_obj, feature1 = "nCount_RNA", feature2 = "percent.mt")
dev.off()
png(filename = paste(plot_dir, "Full_Prefilter_ScatterFeatures.png", sep = "/"), width = 1200, height = 800)
FeatureScatter(Full_obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
dev.off()
Full_obj@misc$analysis_params$min.feature <- 1000
Full_obj@misc$analysis_params$max.feature <- 6000
Full_obj@misc$analysis_params$max.pc.mito <- 20
table(Full_obj$orig.ident)
dim(Full_obj)
Full_obj <- subset(Full_obj, subset = nFeature_RNA > Full_obj@misc$analysis_params$min.feature &
nFeature_RNA < Full_obj@misc$analysis_params$max.feature &
percent.mt < Full_obj@misc$analysis_params$max.pc.mito)
table(Full_obj$orig.ident)
dim(Full_obj)
png(filename = paste(plot_dir, "Full_Postfilter_Vlnplot.png", sep = "/"), width = 1500, height = 1000)
VlnPlot(Full_obj, features = reg_vars, ncol = 3, pt.size = 0)
dev.off()
png(filename = paste(plot_dir, "Full_Postfilter_ScatterMT.png", sep = "/"), width = 1200, height = 800)
FeatureScatter(Full_obj, feature1 = "nCount_RNA", feature2 = "percent.mt")
dev.off()
png(filename = paste(plot_dir, "Full_Postfilter_ScatterFeatures.png", sep = "/"), width = 1200, height = 800)
FeatureScatter(Full_obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
dev.off()
## Import filtered object and process (SCTransform, runPCA)
# normalize, Scaledata are not computed, since we are not doing diff expression at this point
Full_obj <- SCTransform(Full_obj, vars.to.regress = reg_vars, verbose = TRUE)
DefaultAssay(object = Full_obj) <- "SCT"
Full_obj@misc$analysis_params$max_pca <- 50
Full_obj <- RunPCA(object = Full_obj, npcs = Full_obj@misc$analysis_params$max_pca)
png(filename = paste(plot_dir, "Full_ElbowPlot.png", sep = "/"), width = 1200, height = 800)
ElbowPlot(Full_obj, ndims = Full_obj@misc$analysis_params$max_pca, reduction = "pca")
dev.off()
Full_obj@misc$analysis_params$num_pc <- 25
## Import Doublets
full_DoubFinder_md <- full_DoubFinder$DoubFinderClassif
names(full_DoubFinder_md) <- rownames(full_DoubFinder)
Full_obj <- AddMetaData(object = Full_obj, metadata = full_DoubFinder_md, col.name = "DoubletFinder_classification")
colnames(Full_obj@meta.data)
#####################
## Remove Doublets ##
#####################
Idents(Full_obj) <- "DoubletFinder_classification"
table(Full_obj$orig.ident, Full_obj$DoubletFinder_classification)
dim(Full_obj)
Full_obj <- subset(Full_obj, idents = "Singlet")
table(Full_obj$orig.ident)
dim(Full_obj)
## Process (normalize, Scaledata, SCTransform, runPCA)
# Scale data will be used when commputing top marker (we think)
Full_obj <- NormalizeData(object = Full_obj)
Full_obj <- ScaleData(object = Full_obj, vars.to.regress = reg_vars,
display.progress = T, features = rownames(Full_obj))
Full_obj <- SCTransform(Full_obj, vars.to.regress = reg_vars, verbose = TRUE)
DefaultAssay(object = Full_obj) <- "SCT"
Full_obj <- RunPCA(object = Full_obj, npcs = Full_obj@misc$analysis_params$max_pca)
ElbowPlot(Full_obj, ndims = Full_obj@misc$analysis_params$max_pca, reduction = "pca")
Full_obj <- RunUMAP(Full_obj, dims = 1:Full_obj@misc$analysis_params$num_pc, seed.use = 1,reduction = "pca")
Full_obj <- FindNeighbors(Full_obj ,reduction = "pca", dims = 1:Full_obj@misc$analysis_params$num_pc,
force.recalc = T, n.trees = 5, annoy.metric = "euclidean")
# Perform clustering on SCT
for (res in c(0.4, 0.6, 1.2)) {
Full_obj <- FindClusters(Full_obj , resolution = res)
}
for (res in c("SCT_snn_res.0.4", "SCT_snn_res.0.6", "SCT_snn_res.1.2", "orig.ident", "Phase")) {
Idents(Full_obj) <- res
png(filename = paste(plot_dir, paste0("Full_DBR_", res, ".png"), sep = "/"), width = 1200, height = 800)
print(DimPlot(object = Full_obj, pt.size = .7, label = T))
dev.off()
}
# Markers
for (res in c("SCT_snn_res.0.4", "SCT_snn_res.0.6", "SCT_snn_res.1.2")) {
Idents(Full_obj) <- res
marks <- FindAllMarkers(object = Full_obj, logfc.threshold = 0.25, min.pct = 0.1, return.thresh = 0.01)
wb_markers <- createWorkbook()
for (clu in unique(marks$cluster)) {
df_markers_clu <- subset(marks, cluster == clu)
addWorksheet(wb_markers, clu)
writeData(wb_markers, sheet = clu, x = df_markers_clu)
}
saveWorkbook(wb = wb_markers, overwrite = T,
file = paste(out_dir, "Full_DBR", paste0("Full_DBR_", res, "_markers.xlsx"), sep = "/"))
mname <- paste0(res, "_markers")
Full_obj@misc[[manme]] <- marks
}
##########################
## Export Table results ##
##########################
# Counts
full_counts <- Full_obj@assays$RNA@counts
gz_out_counts <- gzfile(paste(out_dir, "Full_DBR", "Full_DBR_final_DBR_labeled_counts.csv.gz", sep = "/"), "w")
write.csv(full_counts, gz_out_counts)
close(gz_out_counts)
# Meta Data
full_md <- Full_obj@meta.data
gz_out_md <- gzfile(paste(out_dir, "Full_DBR", "Full_DBR_final_DBR_labeled_metadata.csv.gz", sep = "/"), "w")
write.csv(x = full_md, gz_out_md)
close(gz_out_md)
# UMAP Coordinates
full_umap <- Full_obj@reductions$umap@cell.embeddings
gz_out_umap <- gzfile(paste(out_dir, "Full_DBR", "Full_DBR_final_DBR_labeled_umap.csv.gz", sep = "/"), "w")
write.csv(x = full_umap, gz_out_umap)
close(gz_out_umap)
# Save final object
saveRDS(object = Full_obj, paste(out_dir, "Full_DBR", "Full_DBR_final_DBR_labeled.rds", sep = "/"))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment