Commit 6b545956 authored by Teresa Tavella's avatar Teresa Tavella
Browse files

Upload New File

parent c5f8183d
library(scales)
library(viridis)
library(Seurat)
library(stringr)
library(tibble)
library(dplyr)
library(patchwork)
library(ggplot2)
library(tidyr)
library(RColorBrewer)
library(openxlsx)
library(cowplot)
args <- commandArgs(trailingOnly = TRUE)
#########################################################################################
# KEEP ONLY BARCODE USED IN EXPERIMENT FROM FILE, NORMALIZE HTO AND DEMUX
#########################################################################################
filterbarcodes <- function(object_seurat,i,marg.norm) {
cat('## FILTER BACRODES')
counts <- GetAssayData(object_seurat, slot="counts", assay="HTO")
cat('number of cells for each HTO ')
cat(rowSums(data.frame(counts)))
hto.filter <- str_split(hto_conditions[hto_conditions$sample == sampleid, 'HTO'], ',')[[1]] #hto_conditions[[sampleid]]
counts.sub <- counts[hto.filter,]
##filter and update number of cells per sample
hto.assay <- CreateSeuratObject(counts=counts.sub)
object_seurat@assays$HTO <- NULL ### to use if you want to subset HTO
object_seurat@assays$HTO <- CreateAssayObject(counts = counts.sub) ### ### to use if you want to subset HTO
object_seurat@assays$HTO@key <- 'hto_'
cat('Normalize HTO data')
scrna.hashtag <- NormalizeData(object_seurat, assay = "HTO",normalization.method = "CLR", margin = marg.norm)
scrna.hashtag <- HTODemux(scrna.hashtag, assay = "HTO", positive.quantile = i, seed = 42)
## rerturn list of barcodes and obj demultiplexed
return(list(sampleid,
obj_demux=scrna.hashtag,
barcodes=rownames(scrna.hashtag@assays$HTO)))
}
#########################################################################################
# CHOOSE POSITIVE QUANTILE PARAMETER
#########################################################################################
eval_pq <- function(out_dir, obj.combined, sampleid, library_id, min_feature, max_feature, mito.prefix, max_pc_mito, marg.norm) {
eval_pq_out_list <- list()
#cat(sampleid)
eval_pq_out <- tibble()
#obj.combined = readRDS(paste(folder,sampleid,paste0(sampleid,'_minimal.rds'), sep='/'))
DefaultAssay(obj.combined) <- 'RNA'
print(DefaultAssay(obj.combined))
cat('filter cells', mito.prefix)
# Compute mito %
obj.combined[["percent.mt"]] <- PercentageFeatureSet(obj.combined, assay = 'RNA', pattern = mito.prefix)
obj <- obj.combined %>%
subset(
nFeature_RNA > min_feature & ### Remove cells with < 250 detected genes
nFeature_RNA < max_feature & ### Remove cells with > 2500 detected genes (could be doublets)
percent.mt < max_pc_mito ### Remove cells with > 0.15 mito/total reads
)
# filtering object based on hto count to zero
DefaultAssay(obj) <- 'HTO'
print(DefaultAssay(obj))
obj <- subset(obj,subset = nCount_HTO != 0 )
cat('find pq')
for (i in seq(from = 0.9, to = 0.999999999, by = 0.005)) {
cat(' demultiplexing cells based on HTO')
x <- filterbarcodes(obj, i, marg.norm)
object <- x$obj_demux
cat('save pq')
HTO_classification <- as.data.frame.matrix(table(
object@meta.data$HTO_maxID,
object@meta.data$HTO_classification.global
)) %>%
summarise(max.singlet = sum(Singlet)) %>%
mutate(threshold = i)
eval_pq_out <- bind_rows(eval_pq_out, HTO_classification) %>%
mutate(library = sampleid)
}
eval_pq_out_list <- rbind( eval_pq_out_list, eval_pq_out)
result_list <- list(
"eval_pq_out" = eval_pq_out_list
)
return(result_list)
}
obj_minimal <- args[1] # Sample minimal object
obj_combined <- readRDS(obj_minimal)
out_dir <- args[2] # Path to output result folder (results)
sampleid <- args[3] #Sample id
#min_cells <- as.numeric(args[4])
min_feature <- as.numeric(args[4])
max_feature <- as.numeric(args[5])
mito.prefix = args[6]
max_pc_mito <- as.numeric(args[7])
marg.norm <- as.numeric(args[8])
hto <- args[9]
cat('########## mito prefix #######')
cat(mito.prefix)
hto_conditions <- read.table(hto, header = TRUE, stringsAsFactors = FALSE) # Table with HTO list per sample
## Evaluate positive quantile parameter
eval_pq_lib1 <- eval_pq(out_dir, obj_combined, sampleid, hto_conditions, min_feature, max_feature, mito.prefix, max_pc_mito, marg.norm)
## Plot number of Singlet detected
pq <- eval_pq_lib1$eval_pq_out %>%
ggplot(aes(x = threshold, y = max.singlet, fill = library, color = library)) +
geom_line() +
geom_point() +
scale_color_brewer(palette = "Set1", direction = -1) +
labs(x = "Postive Quantile", y = "# Singlet cells", fill = "", color = "") +
theme_bw()
out_dir_sample <- paste(out_dir, sampleid, 'plots',sep = "/")
dir.create(out_dir_sample, showWarnings = FALSE)
png(filename = paste(out_dir_sample, paste0(sampleid,"_max_threshod_pq_margin_",as.character(marg.norm),'.png'), sep = '/'),
width = 12, height = 9, units = "in", res = 96)
print(pq)
dev.off()
## save threshold
dx <- data.frame(eval_pq_lib1$eval_pq_out)
dx$library <- as.factor(dx$library)
max_threshold <- merge(aggregate(max.singlet ~ library, data=dx, max), dx, all.x=T)
write.table(max_threshold,paste(out_dir_sample, paste0(sampleid,"_max_threshod_pq_margin_",as.character(marg.norm),".txt"), sep = '/'))
# #####################################################################################
# # After computing best pq with higher number of singlets perform demux with max value
# #####################################################################################
cat(sampleid)
maxi <- max_threshold[max_threshold$library == sampleid, 'threshold']
# sample minimal object
obj <- readRDS(obj_minimal)
### Add mitochondrial percentage to meta.data table
DefaultAssay(obj) <- 'RNA'
print(DefaultAssay(obj))
# Compute mito %
obj[["percent.mt"]] <- PercentageFeatureSet(obj, assay = 'RNA', pattern = mito.prefix)
obj <- obj %>%
subset(
nFeature_RNA > min_feature & ### Remove cells with < 250 detected genes
nFeature_RNA < max_feature & ### Remove cells with > 2500 detected genes (could be doublets)
percent.mt < max_pc_mito ### Remove cells with > 0.15 mito/total reads
)
# filtering object based on hto count to zero
DefaultAssay(obj) <- 'HTO'
print(DefaultAssay(obj))
obj <- subset(obj,subset = nCount_HTO != 0 )
table(obj$orig.ident)
cat('Demultiplexing cells based on HTO')
x <- filterbarcodes(obj, maxi, marg.norm)
scrna.hashtag <- x$obj_demux
rowSums(data.frame(scrna.hashtag@assays$HTO@counts))
# Save demux Seurat object
#oiut_dir_sample <- paste(out_dir, sampleid, sep = "/")
#dir.create(out_dir_sample, showWarnings = FALSE)
DefaultAssay(scrna.hashtag) <- "RNA"
out_dir <- paste(out_dir, sampleid, sep = '/')
saveRDS(object = scrna.hashtag, file = paste(out_dir, paste(sampleid, "demux_minimal.rds", sep = "_"), sep = "/"))
cat(x$barcodes)
cat('visualize demultiplexing - Global classification results', sampleid)
class_hto <- list()
class_hto[[sampleid]] <- table(scrna.hashtag$HTO_classification)
class_hto[[sampleid]]
class_htoglobal <- list()
class_htoglobal[[sampleid]] <- table(scrna.hashtag$HTO_classification.global)
class_htoglobal[[sampleid]]
cat('Group cells based on the max HTO signal')
Idents(scrna.hashtag) <- "HTO_maxID"
DefaultAssay(scrna.hashtag) <- "HTO"
rdgplot <- scrna.hashtag %>%
RidgePlot(
assay = "HTO",
features = rownames(scrna.hashtag),
ncol = 2)
png(filename = paste(out_dir_sample, paste0(sampleid,"_ridgeplot.png"), sep = '/'),
width = 12, height = 9, units = "in", res = 96)
print(rdgplot)
dev.off()
### Calculate doublet rate
scrna.hashtag@meta.data %>%
group_by(hash.ID) %>%
summarize(fract = n() / nrow(.))
### Create bar graphs comparing cell count for each sample
HTO_bars_1 <- scrna.hashtag@meta.data %>%
rownames_to_column("cell_id") %>%
ggplot(aes(hash.ID, fill = hash.ID)) +
geom_bar() +
labs(y = "Cell Count") +
cowplot::theme_cowplot() +
theme(
legend.position = "none",
axis.title.x = element_blank(),
axis.text.x = element_text(hjust = 1, angle = 45)
)
### Create stacked bar graph showing fraction of cells
HTO_bars_2 <- scrna.hashtag@meta.data %>%
rownames_to_column("cell_id") %>%
group_by(hash.ID) %>%
summarize(hash_frac = n() / nrow(.)) %>%
ggplot(aes("scRNA-seq", hash_frac, fill = hash.ID)) +
geom_bar(stat = "identity", color = "white", size = 0.5) +
labs(y = "Fraction of Cells", x= sampleid) +
cowplot::theme_cowplot() +
theme(
legend.title = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
### Combine plots
### now add the title
# title <- ggdraw() +
# draw_label(
# sampleid,
# fontface = 'bold',
# x = 0,
# hjust = 0
# ) +
# theme(
# ### add margin on the left of the drawing canvas,
# ### so title is aligned with left edge of first plot
# plot.margin = margin(0, 0, 0, 7)
# )
x <- plot_grid(#title,
HTO_bars_1, HTO_bars_2,
nrow = 1,
rel_widths = c(0.5, 0.5),
rel_heights = c(1, 0.8)
)
png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_bars.png"), sep = '/'),
width = 12, height = 9, units = "in", res = 96)
print(x)
dev.off()
# cell count by hto classification
scrna.hashtag@meta.data %>%
group_by(HTO_classification) %>%
summarize(fract = n() / nrow(.))
### Create bar graphs comparing cell count by HTO classification
HTO_bars_3 <- scrna.hashtag@meta.data %>%
rownames_to_column("cell_id") %>%
ggplot(aes(HTO_classification, fill = HTO_classification)) +
geom_bar() +
labs(y = "Cell Count") +
cowplot::theme_cowplot() +
theme(
legend.position = "none",
axis.title.x = element_blank(),
axis.text.x = element_text(hjust = 1, angle = 45)
)
### Create stacked bar graph showing fraction of cells
HTO_bars_4 <- scrna.hashtag@meta.data %>%
rownames_to_column("cell_id") %>%
group_by(HTO_classification) %>%
summarize(hash_frac = n() / nrow(.)) %>%
ggplot(aes("scRNA-seq", hash_frac, fill = HTO_classification)) +
geom_bar(stat = "identity", color = "white", size = 0.5) +
labs(y = "Fraction of Cells", x= sampleid) +
cowplot::theme_cowplot() +
theme(
legend.title = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
### Combine plots
x <- plot_grid(#title,
HTO_bars_3, HTO_bars_4,
nrow = 1,
rel_widths = c(0.5, 0.5),
rel_heights = c(1, 0.8)
)
png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_bars_classification.png"), sep = '/'),
width = 12, height = 9, units = "in", res = 96)
print(x)
dev.off()
fs <- FeatureScatter(scrna.hashtag, feature1 = "hto_H1", feature2 = "hto_H2")
png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_1_2_featurescatter.png"), sep = '/'),
width = 12, height = 9, units = "in", res = 96)
print(fs)
dev.off()
Idents(scrna.hashtag) <- "HTO_classification.global"
vnlp <- VlnPlot(scrna.hashtag, features = "nCount_RNA", pt.size = 0.1, log = TRUE)
png(filename = paste(out_dir_sample, paste0(sampleid,"_ncount_RNA.png"), sep = '/'),
width = 12, height = 9, units = "in", res = 96)
print(vnlp)
dev.off()
cat('remove negative cells from the object')
scrna.hashtag.subset <- subset(scrna.hashtag, idents = "Negative", invert = TRUE)
cat('Calculate a tSNE embedding of the HTO data')
DefaultAssay(scrna.hashtag.subset) <- "HTO"
scrna.hashtag.subset <- ScaleData(scrna.hashtag.subset, features = rownames(scrna.hashtag.subset),
verbose = FALSE)
scrna.hashtag.subset <- RunPCA(scrna.hashtag.subset, features = rownames(scrna.hashtag.subset), approx = FALSE)
# Calculate a distance matrix using HTO
hto.dist.mtx <- as.matrix(dist(t(GetAssayData(object = scrna.hashtag.subset, assay = "HTO"))))
scrna.hashtag.subset <- RunTSNE(scrna.hashtag.subset, dims = 1:length( hto_conditions[[sampleid]]), perplexity = 100,distance.matrix = hto.dist.mtx,
check_duplicates = FALSE)
Idents(scrna.hashtag.subset) <- 'HTO_classification'
pdim_hto <- DimPlot(scrna.hashtag.subset,label=TRUE)
png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_classification_dimplot.png"), sep = '/'),
width = 12, height = 9, units = "in", res = 96)
print(pdim_hto)
dev.off()
Idents(scrna.hashtag.subset) <- 'HTO_classification.global'
pdim_hto_cl <- DimPlot(scrna.hashtag.subset,label=TRUE)
png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_classification_global_dimplot.png"), sep = '/'),
width = 12, height = 9, units = "in", res = 96)
print(pdim_hto_cl)
dev.off()
htohm <- HTOHeatmap(scrna.hashtag.subset, assay = "HTO", ncells = 5000)
png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_heatmap.png"), sep = '/'),
width = 12, height = 9, units = "in", res = 96)
print(htohm)
dev.off()
## count all barcodes
cat('count HTO reads')
count_HTO_reads <- list()
count_HTO_reads[[sampleid]] <- rowSums(data.frame(obj@assays$HTO@counts))
count_HTO_reads[[sampleid]]
###cat('Extract the singlets')
scrna.hashtag.singlet <- subset(scrna.hashtag.subset, idents = "Singlet")
count_HTO_reads_singlet <- list()
count_HTO_reads_singlet[[sampleid]] <- rowSums(data.frame(scrna.hashtag.singlet@assays$HTO@counts))
cat('count HTO singlet reads\n')
count_HTO_reads_singlet[[sampleid]]
Idents(scrna.hashtag.singlet) <- "HTO_classification"
count_HTO_reads_singlet_HTO <- list()
count_HTO_reads_singlet_HTO_mean <- list()
for (n in rownames(scrna.hashtag.singlet@assays$HTO@counts)){
x <- subset(scrna.hashtag.singlet, idents = n )
cat('hto', n)
print(rowSums(data.frame(x@assays$HTO@counts)))
## count of reads HTO in cell classified as specific HTO
count_HTO_reads_singlet_HTO[[sampleid]][n] <- rowSums(data.frame(x@assays$HTO@counts))[n]
## MEAN read count per hashatg over cell classified as specific-HTO
count_HTO_reads_singlet_HTO_mean[[sampleid]][n] <- unname(rowSums(data.frame(x@assays$HTO@counts))[n])/unname(class_hto[[sampleid]][n])
}
cat('count HTO reads, only in cell classified as singlet for specific HTO\n')
count_HTO_reads_singlet_HTO
cat('mean HTO reads, only in cell classified as singlet for specific HTO\n')
count_HTO_reads_singlet_HTO_mean
Idents(scrna.hashtag.subset) <- 'HTO_classification.global'
### Projecting singlet identities on TSNE visualization
dimpl_htocl <- DimPlot(scrna.hashtag.singlet, group.by = "HTO_classification",label=TRUE)
png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_classification_singlets.png"), sep = '/'),
width = 12, height = 9, units = "in", res = 96)
dimpl_htocl
dev.off()
# dimpl_htoglobal <- DimPlot(scrna.hashtag.singlet, group.by = 'HTO_classification.global', label=TRUE)
# png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_classification_global_singlets.png"), sep = '/'),
# width = 12, height = 9, units = "in", res = 96)
# print(dimpl_htoglobal)
# dev.off()
vnl_hto <- scrna.hashtag.singlet %>%
VlnPlot(
features = c( "nCount_HTO"),
ncol = 1,
pt.size = 0.0005,
log = T
)
png(filename = paste(out_dir_sample, paste0(sampleid,"_violin_hto_singlets.png"), sep = '/'),
width = 12, height = 9, units = "in", res = 96)
print(vnl_hto)
dev.off()
## Save Singlets cell and HTO classification in meta data
DefaultAssay(scrna.hashtag) <- "RNA"
scrna.hashtag[['HTO']] <- NULL # eliminate HTO assay
saveRDS(object = scrna.hashtag.singlet, file = paste(out_dir, paste(sampleid, "singlet_minimal.rds", sep = "_"), sep = "/"))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment