Upload New File

6b545956 · Teresa Tavella · c5f8183d · 6b545956
Commit 6b545956 authored Oct 14, 2024 by Teresa Tavella
--- a/HTO_demux.R
+++ b/HTO_demux.R
+library(scales)
+library(viridis)
+library(Seurat)
+library(stringr)
+library(tibble)
+library(dplyr)
+library(patchwork)
+library(ggplot2)
+library(tidyr)
+library(RColorBrewer)
+library(openxlsx)
+library(cowplot)
+args <- commandArgs(trailingOnly = TRUE)
+#########################################################################################
+# KEEP ONLY BARCODE USED IN EXPERIMENT FROM FILE, NORMALIZE HTO AND DEMUX
+#########################################################################################
+filterbarcodes <- function(object_seurat,i,marg.norm) {
+  cat('## FILTER BACRODES')
+  counts <- GetAssayData(object_seurat, slot="counts", assay="HTO")   
+  cat('number of cells for each HTO ')
+  cat(rowSums(data.frame(counts)))
+  hto.filter <-  str_split(hto_conditions[hto_conditions$sample == sampleid, 'HTO'], ',')[[1]] #hto_conditions[[sampleid]] 
+  counts.sub <- counts[hto.filter,]
+  ##filter and update number of cells per sample
+  hto.assay <- CreateSeuratObject(counts=counts.sub)
+  object_seurat@assays$HTO <- NULL  ### to use if you want to subset HTO
+  object_seurat@assays$HTO <- CreateAssayObject(counts =  counts.sub)  ### ### to use if you want to subset HTO
+  object_seurat@assays$HTO@key <- 'hto_'
+  cat('Normalize HTO data')
+  scrna.hashtag <- NormalizeData(object_seurat, assay = "HTO",normalization.method = "CLR", margin = marg.norm) 
+  scrna.hashtag <- HTODemux(scrna.hashtag, assay = "HTO", positive.quantile = i,  seed = 42)
+  ## rerturn list of barcodes and obj demultiplexed
+  return(list(sampleid,
+              obj_demux=scrna.hashtag,
+              barcodes=rownames(scrna.hashtag@assays$HTO)))
+}
+#########################################################################################
+# CHOOSE POSITIVE QUANTILE PARAMETER
+#########################################################################################
+eval_pq <- function(out_dir, obj.combined, sampleid, library_id, min_feature, max_feature, mito.prefix, max_pc_mito, marg.norm) {
+eval_pq_out_list <- list()
+  #cat(sampleid)
+  eval_pq_out <- tibble()
+  #obj.combined = readRDS(paste(folder,sampleid,paste0(sampleid,'_minimal.rds'), sep='/'))
+ DefaultAssay(obj.combined) <- 'RNA'
+ print(DefaultAssay(obj.combined))
+ cat('filter cells', mito.prefix)
+ # Compute mito % 
+ obj.combined[["percent.mt"]] <- PercentageFeatureSet(obj.combined, assay = 'RNA', pattern = mito.prefix)
+ obj  <- obj.combined %>%
+  subset(
+    nFeature_RNA > min_feature &    ### Remove cells with < 250 detected genes
+      nFeature_RNA < max_feature &   ### Remove cells with > 2500 detected genes (could be doublets)
+      percent.mt < max_pc_mito  ### Remove cells with > 0.15 mito/total reads
+  )
+# filtering object based on hto count to zero
+DefaultAssay(obj) <- 'HTO'
+print(DefaultAssay(obj))
+obj <- subset(obj,subset = nCount_HTO != 0 )
+cat('find pq')
+for (i in seq(from = 0.9, to = 0.999999999, by = 0.005)) {
+  cat(' demultiplexing cells based on HTO')
+  x <- filterbarcodes(obj, i, marg.norm)
+  object <- x$obj_demux
+  cat('save pq')
+  HTO_classification <- as.data.frame.matrix(table(
+    object@meta.data$HTO_maxID,
+    object@meta.data$HTO_classification.global
+  )) %>%
+    summarise(max.singlet = sum(Singlet)) %>%
+    mutate(threshold = i)
+  eval_pq_out <- bind_rows(eval_pq_out, HTO_classification) %>%
+    mutate(library = sampleid)
+}
+eval_pq_out_list <- rbind( eval_pq_out_list, eval_pq_out)
+result_list <- list(
+"eval_pq_out" =  eval_pq_out_list
+)
+return(result_list)
+}
+obj_minimal <- args[1] # Sample minimal object
+obj_combined <- readRDS(obj_minimal)
+out_dir <- args[2]      # Path to output result folder (results)
+sampleid <- args[3]     #Sample id
+#min_cells <- as.numeric(args[4])
+min_feature <- as.numeric(args[4]) 
+max_feature <- as.numeric(args[5]) 
+mito.prefix = args[6]
+max_pc_mito <-  as.numeric(args[7])
+marg.norm <- as.numeric(args[8])
+hto <- args[9]
+cat('########## mito prefix  #######')
+cat(mito.prefix)
+hto_conditions <- read.table(hto, header = TRUE, stringsAsFactors = FALSE) # Table with HTO list per sample
+## Evaluate positive quantile parameter
+eval_pq_lib1 <- eval_pq(out_dir, obj_combined, sampleid, hto_conditions, min_feature, max_feature, mito.prefix, max_pc_mito, marg.norm)
+## Plot number of Singlet detected 
+pq <- eval_pq_lib1$eval_pq_out %>%
+  ggplot(aes(x = threshold, y = max.singlet, fill = library, color = library)) +
+  geom_line() +
+  geom_point() +
+  scale_color_brewer(palette = "Set1", direction = -1) +
+  labs(x = "Postive Quantile", y = "# Singlet cells", fill = "", color = "") +
+  theme_bw()
+out_dir_sample <- paste(out_dir, sampleid, 'plots',sep = "/")
+dir.create(out_dir_sample, showWarnings = FALSE)
+png(filename = paste(out_dir_sample, paste0(sampleid,"_max_threshod_pq_margin_",as.character(marg.norm),'.png'), sep = '/'),
+    width = 12, height = 9, units = "in", res = 96)
+print(pq)
+dev.off()
+## save threshold
+dx <- data.frame(eval_pq_lib1$eval_pq_out)
+dx$library <- as.factor(dx$library)
+max_threshold <- merge(aggregate(max.singlet ~ library, data=dx, max), dx, all.x=T)
+write.table(max_threshold,paste(out_dir_sample, paste0(sampleid,"_max_threshod_pq_margin_",as.character(marg.norm),".txt"), sep = '/'))
+# #####################################################################################
+# # After computing best pq with higher number of singlets perform demux with max value
+# #####################################################################################
+cat(sampleid)
+maxi <- max_threshold[max_threshold$library == sampleid, 'threshold']
+# sample minimal object
+obj <- readRDS(obj_minimal)
+### Add mitochondrial percentage to meta.data table
+DefaultAssay(obj) <- 'RNA'
+print(DefaultAssay(obj))
+# Compute mito %
+obj[["percent.mt"]] <- PercentageFeatureSet(obj, assay = 'RNA', pattern = mito.prefix)
+obj  <- obj %>%
+  subset(
+    nFeature_RNA > min_feature &    ### Remove cells with < 250 detected genes
+      nFeature_RNA < max_feature &   ### Remove cells with > 2500 detected genes (could be doublets)
+      percent.mt < max_pc_mito  ### Remove cells with > 0.15 mito/total reads
+  )
+# filtering object based on hto count to zero
+DefaultAssay(obj) <- 'HTO'
+print(DefaultAssay(obj))
+obj <- subset(obj,subset = nCount_HTO != 0 )
+table(obj$orig.ident)
+cat('Demultiplexing cells based on HTO')
+x <- filterbarcodes(obj, maxi, marg.norm)
+scrna.hashtag <- x$obj_demux
+rowSums(data.frame(scrna.hashtag@assays$HTO@counts))
+# Save demux Seurat object
+#oiut_dir_sample <- paste(out_dir, sampleid, sep = "/")
+#dir.create(out_dir_sample, showWarnings = FALSE)
+DefaultAssay(scrna.hashtag) <- "RNA"
+out_dir <- paste(out_dir, sampleid, sep = '/')
+saveRDS(object = scrna.hashtag, file = paste(out_dir, paste(sampleid, "demux_minimal.rds", sep = "_"), sep = "/"))
+cat(x$barcodes)
+cat('visualize demultiplexing - Global classification results', sampleid)
+class_hto <- list()
+class_hto[[sampleid]] <- table(scrna.hashtag$HTO_classification)
+class_hto[[sampleid]]
+class_htoglobal <- list()
+class_htoglobal[[sampleid]] <- table(scrna.hashtag$HTO_classification.global)
+class_htoglobal[[sampleid]]
+cat('Group cells based on the max HTO signal')
+Idents(scrna.hashtag) <- "HTO_maxID"
+DefaultAssay(scrna.hashtag) <- "HTO"
+rdgplot <- scrna.hashtag %>%
+    RidgePlot(
+      assay    = "HTO",
+      features = rownames(scrna.hashtag),
+      ncol     = 2)
+png(filename = paste(out_dir_sample, paste0(sampleid,"_ridgeplot.png"), sep = '/'),
+    width = 12, height = 9, units = "in", res = 96)
+print(rdgplot)
+dev.off()
+### Calculate doublet rate
+scrna.hashtag@meta.data %>%
+    group_by(hash.ID) %>%
+    summarize(fract = n() / nrow(.))
+### Create bar graphs comparing cell count for each sample
+HTO_bars_1 <- scrna.hashtag@meta.data %>%
+    rownames_to_column("cell_id") %>%
+    ggplot(aes(hash.ID, fill = hash.ID)) +
+    geom_bar() +
+    labs(y = "Cell Count") +
+    cowplot::theme_cowplot() +
+    theme(
+      legend.position = "none",
+      axis.title.x    = element_blank(),
+      axis.text.x     = element_text(hjust = 1, angle = 45)
+    )
+### Create stacked bar graph showing fraction of cells
+HTO_bars_2 <- scrna.hashtag@meta.data %>%
+    rownames_to_column("cell_id") %>%
+    group_by(hash.ID) %>%
+    summarize(hash_frac = n() / nrow(.)) %>%
+    ggplot(aes("scRNA-seq", hash_frac, fill = hash.ID)) +
+    geom_bar(stat = "identity", color = "white", size = 0.5) +
+    labs(y = "Fraction of Cells", x= sampleid) +
+    cowplot::theme_cowplot() +
+    theme(
+      legend.title = element_blank(),
+      axis.title.x = element_blank(),
+      axis.text.x  = element_blank(),
+      axis.ticks.x = element_blank()
+    )
+  ### Combine plots
+  ### now add the title
+  # title <- ggdraw() +
+  #   draw_label(
+  #     sampleid,
+  #     fontface = 'bold',
+  #     x = 0,
+  #     hjust = 0
+  #   ) +
+  #   theme(
+  #     ### add margin on the left of the drawing canvas,
+  #     ### so title is aligned with left edge of first plot
+  #     plot.margin = margin(0, 0, 0, 7)
+  #   )
+x <- plot_grid(#title,
+    HTO_bars_1, HTO_bars_2,
+    nrow        = 1,
+    rel_widths  = c(0.5, 0.5),
+    rel_heights = c(1, 0.8)
+  )
+png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_bars.png"), sep = '/'),
+    width = 12, height = 9, units = "in", res = 96)
+print(x)
+dev.off()
+# cell count by hto classification
+scrna.hashtag@meta.data %>%
+    group_by(HTO_classification) %>%
+    summarize(fract = n() / nrow(.))
+### Create bar graphs comparing cell count by HTO classification
+HTO_bars_3 <- scrna.hashtag@meta.data %>%
+    rownames_to_column("cell_id") %>%
+    ggplot(aes(HTO_classification, fill = HTO_classification)) +
+    geom_bar() +
+    labs(y = "Cell Count") +
+    cowplot::theme_cowplot() +
+    theme(
+      legend.position = "none",
+      axis.title.x    = element_blank(),
+      axis.text.x     = element_text(hjust = 1, angle = 45)
+    )
+### Create stacked bar graph showing fraction of cells
+HTO_bars_4 <- scrna.hashtag@meta.data %>%
+    rownames_to_column("cell_id") %>%
+    group_by(HTO_classification) %>%
+    summarize(hash_frac = n() / nrow(.)) %>%
+    ggplot(aes("scRNA-seq", hash_frac, fill = HTO_classification)) +
+    geom_bar(stat = "identity", color = "white", size = 0.5) +
+    labs(y = "Fraction of Cells", x= sampleid) +
+    cowplot::theme_cowplot() +
+    theme(
+      legend.title = element_blank(),
+      axis.title.x = element_blank(),
+      axis.text.x  = element_blank(),
+      axis.ticks.x = element_blank()
+    )
+### Combine plots
+x <- plot_grid(#title,
+    HTO_bars_3, HTO_bars_4,
+    nrow        = 1,
+    rel_widths  = c(0.5, 0.5),
+    rel_heights = c(1, 0.8)
+  )
+png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_bars_classification.png"), sep = '/'),
+      width = 12, height = 9, units = "in", res = 96)
+print(x)
+dev.off()
+fs <- FeatureScatter(scrna.hashtag, feature1 = "hto_H1", feature2 = "hto_H2")
+png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_1_2_featurescatter.png"), sep = '/'),
+    width = 12, height = 9, units = "in", res = 96)
+print(fs)
+dev.off()
+Idents(scrna.hashtag) <- "HTO_classification.global"
+vnlp <- VlnPlot(scrna.hashtag, features = "nCount_RNA", pt.size = 0.1, log = TRUE)
+png(filename = paste(out_dir_sample, paste0(sampleid,"_ncount_RNA.png"), sep = '/'),
+      width = 12, height = 9, units = "in", res = 96)
+print(vnlp)
+dev.off()
+cat('remove negative cells from the object')
+scrna.hashtag.subset <- subset(scrna.hashtag, idents = "Negative", invert = TRUE)
+cat('Calculate a tSNE embedding of the HTO data')
+DefaultAssay(scrna.hashtag.subset) <- "HTO"
+scrna.hashtag.subset <- ScaleData(scrna.hashtag.subset, features = rownames(scrna.hashtag.subset),
+                                    verbose = FALSE)
+scrna.hashtag.subset <- RunPCA(scrna.hashtag.subset, features = rownames(scrna.hashtag.subset), approx = FALSE)
+# Calculate a distance matrix using HTO
+hto.dist.mtx <- as.matrix(dist(t(GetAssayData(object = scrna.hashtag.subset, assay = "HTO"))))
+scrna.hashtag.subset <- RunTSNE(scrna.hashtag.subset, dims = 1:length( hto_conditions[[sampleid]]), perplexity = 100,distance.matrix = hto.dist.mtx,
+                                check_duplicates = FALSE)
+Idents(scrna.hashtag.subset) <- 'HTO_classification'
+pdim_hto <- DimPlot(scrna.hashtag.subset,label=TRUE)
+png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_classification_dimplot.png"), sep = '/'),
+      width = 12, height = 9, units = "in", res = 96)
+print(pdim_hto)
+dev.off()
+Idents(scrna.hashtag.subset) <- 'HTO_classification.global'
+pdim_hto_cl <- DimPlot(scrna.hashtag.subset,label=TRUE)
+png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_classification_global_dimplot.png"), sep = '/'),
+      width = 12, height = 9, units = "in", res = 96)
+print(pdim_hto_cl)
+dev.off()
+htohm <- HTOHeatmap(scrna.hashtag.subset, assay = "HTO", ncells = 5000)
+png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_heatmap.png"), sep = '/'),
+      width = 12, height = 9, units = "in", res = 96)
+print(htohm)
+dev.off()
+## count all barcodes
+cat('count HTO reads')
+count_HTO_reads <- list()
+count_HTO_reads[[sampleid]] <- rowSums(data.frame(obj@assays$HTO@counts))
+count_HTO_reads[[sampleid]] 
+###cat('Extract the singlets')
+scrna.hashtag.singlet <- subset(scrna.hashtag.subset, idents = "Singlet")
+count_HTO_reads_singlet <- list()
+count_HTO_reads_singlet[[sampleid]] <- rowSums(data.frame(scrna.hashtag.singlet@assays$HTO@counts))
+cat('count HTO singlet reads\n')
+count_HTO_reads_singlet[[sampleid]]
+Idents(scrna.hashtag.singlet) <- "HTO_classification"
+count_HTO_reads_singlet_HTO <- list()
+count_HTO_reads_singlet_HTO_mean <- list()
+for (n in rownames(scrna.hashtag.singlet@assays$HTO@counts)){
+    x <- subset(scrna.hashtag.singlet, idents = n  )
+    cat('hto', n)
+    print(rowSums(data.frame(x@assays$HTO@counts)))
+    ## count of reads HTO in cell classified as specific HTO
+    count_HTO_reads_singlet_HTO[[sampleid]][n] <- rowSums(data.frame(x@assays$HTO@counts))[n]
+    ## MEAN read count per hashatg over cell classified as specific-HTO
+    count_HTO_reads_singlet_HTO_mean[[sampleid]][n] <- unname(rowSums(data.frame(x@assays$HTO@counts))[n])/unname(class_hto[[sampleid]][n])
+  }
+cat('count HTO reads, only in cell classified as singlet for specific HTO\n')
+count_HTO_reads_singlet_HTO
+cat('mean HTO reads, only in cell classified as singlet for specific HTO\n')
+count_HTO_reads_singlet_HTO_mean
+Idents(scrna.hashtag.subset) <- 'HTO_classification.global'
+### Projecting singlet identities on TSNE visualization
+dimpl_htocl <- DimPlot(scrna.hashtag.singlet, group.by = "HTO_classification",label=TRUE)
+png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_classification_singlets.png"), sep = '/'),
+      width = 12, height = 9, units = "in", res = 96)
+dimpl_htocl
+dev.off()
+# dimpl_htoglobal <- DimPlot(scrna.hashtag.singlet, group.by = 'HTO_classification.global', label=TRUE)
+# png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_classification_global_singlets.png"), sep = '/'),
+#       width = 12, height = 9, units = "in", res = 96)
+# print(dimpl_htoglobal)
+# dev.off()
+vnl_hto <- scrna.hashtag.singlet %>%
+    VlnPlot(
+      features = c( "nCount_HTO"),
+      ncol    = 1,
+      pt.size = 0.0005,
+      log     = T
+    )
+png(filename = paste(out_dir_sample, paste0(sampleid,"_violin_hto_singlets.png"), sep = '/'),
+      width = 12, height = 9, units = "in", res = 96)
+print(vnl_hto)
+dev.off()
+## Save Singlets cell and HTO classification in meta data
+DefaultAssay(scrna.hashtag) <- "RNA"
+scrna.hashtag[['HTO']] <- NULL # eliminate HTO assay
+saveRDS(object =  scrna.hashtag.singlet, file = paste(out_dir, paste(sampleid, "singlet_minimal.rds", sep = "_"), sep = "/"))