Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
DellaVolpe_GeneEditing
DellaVolpe_scRNA-seq
Commits
6b545956
Commit
6b545956
authored
Oct 14, 2024
by
Teresa Tavella
Browse files
Upload New File
parent
c5f8183d
Changes
1
Hide whitespace changes
Inline
Side-by-side
HTO_demux.R
0 → 100644
View file @
6b545956
library
(
scales
)
library
(
viridis
)
library
(
Seurat
)
library
(
stringr
)
library
(
tibble
)
library
(
dplyr
)
library
(
patchwork
)
library
(
ggplot2
)
library
(
tidyr
)
library
(
RColorBrewer
)
library
(
openxlsx
)
library
(
cowplot
)
args
<-
commandArgs
(
trailingOnly
=
TRUE
)
#########################################################################################
# KEEP ONLY BARCODE USED IN EXPERIMENT FROM FILE, NORMALIZE HTO AND DEMUX
#########################################################################################
filterbarcodes
<-
function
(
object_seurat
,
i
,
marg.norm
)
{
cat
(
'## FILTER BACRODES'
)
counts
<-
GetAssayData
(
object_seurat
,
slot
=
"counts"
,
assay
=
"HTO"
)
cat
(
'number of cells for each HTO '
)
cat
(
rowSums
(
data.frame
(
counts
)))
hto.filter
<-
str_split
(
hto_conditions
[
hto_conditions
$
sample
==
sampleid
,
'HTO'
],
','
)[[
1
]]
#hto_conditions[[sampleid]]
counts.sub
<-
counts
[
hto.filter
,]
##filter and update number of cells per sample
hto.assay
<-
CreateSeuratObject
(
counts
=
counts.sub
)
object_seurat
@
assays
$
HTO
<-
NULL
### to use if you want to subset HTO
object_seurat
@
assays
$
HTO
<-
CreateAssayObject
(
counts
=
counts.sub
)
### ### to use if you want to subset HTO
object_seurat
@
assays
$
HTO
@
key
<-
'hto_'
cat
(
'Normalize HTO data'
)
scrna.hashtag
<-
NormalizeData
(
object_seurat
,
assay
=
"HTO"
,
normalization.method
=
"CLR"
,
margin
=
marg.norm
)
scrna.hashtag
<-
HTODemux
(
scrna.hashtag
,
assay
=
"HTO"
,
positive.quantile
=
i
,
seed
=
42
)
## rerturn list of barcodes and obj demultiplexed
return
(
list
(
sampleid
,
obj_demux
=
scrna.hashtag
,
barcodes
=
rownames
(
scrna.hashtag
@
assays
$
HTO
)))
}
#########################################################################################
# CHOOSE POSITIVE QUANTILE PARAMETER
#########################################################################################
eval_pq
<-
function
(
out_dir
,
obj.combined
,
sampleid
,
library_id
,
min_feature
,
max_feature
,
mito.prefix
,
max_pc_mito
,
marg.norm
)
{
eval_pq_out_list
<-
list
()
#cat(sampleid)
eval_pq_out
<-
tibble
()
#obj.combined = readRDS(paste(folder,sampleid,paste0(sampleid,'_minimal.rds'), sep='/'))
DefaultAssay
(
obj.combined
)
<-
'RNA'
print
(
DefaultAssay
(
obj.combined
))
cat
(
'filter cells'
,
mito.prefix
)
# Compute mito %
obj.combined
[[
"percent.mt"
]]
<-
PercentageFeatureSet
(
obj.combined
,
assay
=
'RNA'
,
pattern
=
mito.prefix
)
obj
<-
obj.combined
%>%
subset
(
nFeature_RNA
>
min_feature
&
### Remove cells with < 250 detected genes
nFeature_RNA
<
max_feature
&
### Remove cells with > 2500 detected genes (could be doublets)
percent.mt
<
max_pc_mito
### Remove cells with > 0.15 mito/total reads
)
# filtering object based on hto count to zero
DefaultAssay
(
obj
)
<-
'HTO'
print
(
DefaultAssay
(
obj
))
obj
<-
subset
(
obj
,
subset
=
nCount_HTO
!=
0
)
cat
(
'find pq'
)
for
(
i
in
seq
(
from
=
0.9
,
to
=
0.999999999
,
by
=
0.005
))
{
cat
(
' demultiplexing cells based on HTO'
)
x
<-
filterbarcodes
(
obj
,
i
,
marg.norm
)
object
<-
x
$
obj_demux
cat
(
'save pq'
)
HTO_classification
<-
as.data.frame.matrix
(
table
(
object
@
meta.data
$
HTO_maxID
,
object
@
meta.data
$
HTO_classification.global
))
%>%
summarise
(
max.singlet
=
sum
(
Singlet
))
%>%
mutate
(
threshold
=
i
)
eval_pq_out
<-
bind_rows
(
eval_pq_out
,
HTO_classification
)
%>%
mutate
(
library
=
sampleid
)
}
eval_pq_out_list
<-
rbind
(
eval_pq_out_list
,
eval_pq_out
)
result_list
<-
list
(
"eval_pq_out"
=
eval_pq_out_list
)
return
(
result_list
)
}
obj_minimal
<-
args
[
1
]
# Sample minimal object
obj_combined
<-
readRDS
(
obj_minimal
)
out_dir
<-
args
[
2
]
# Path to output result folder (results)
sampleid
<-
args
[
3
]
#Sample id
#min_cells <- as.numeric(args[4])
min_feature
<-
as.numeric
(
args
[
4
])
max_feature
<-
as.numeric
(
args
[
5
])
mito.prefix
=
args
[
6
]
max_pc_mito
<-
as.numeric
(
args
[
7
])
marg.norm
<-
as.numeric
(
args
[
8
])
hto
<-
args
[
9
]
cat
(
'########## mito prefix #######'
)
cat
(
mito.prefix
)
hto_conditions
<-
read.table
(
hto
,
header
=
TRUE
,
stringsAsFactors
=
FALSE
)
# Table with HTO list per sample
## Evaluate positive quantile parameter
eval_pq_lib1
<-
eval_pq
(
out_dir
,
obj_combined
,
sampleid
,
hto_conditions
,
min_feature
,
max_feature
,
mito.prefix
,
max_pc_mito
,
marg.norm
)
## Plot number of Singlet detected
pq
<-
eval_pq_lib1
$
eval_pq_out
%>%
ggplot
(
aes
(
x
=
threshold
,
y
=
max.singlet
,
fill
=
library
,
color
=
library
))
+
geom_line
()
+
geom_point
()
+
scale_color_brewer
(
palette
=
"Set1"
,
direction
=
-1
)
+
labs
(
x
=
"Postive Quantile"
,
y
=
"# Singlet cells"
,
fill
=
""
,
color
=
""
)
+
theme_bw
()
out_dir_sample
<-
paste
(
out_dir
,
sampleid
,
'plots'
,
sep
=
"/"
)
dir.create
(
out_dir_sample
,
showWarnings
=
FALSE
)
png
(
filename
=
paste
(
out_dir_sample
,
paste0
(
sampleid
,
"_max_threshod_pq_margin_"
,
as.character
(
marg.norm
),
'.png'
),
sep
=
'/'
),
width
=
12
,
height
=
9
,
units
=
"in"
,
res
=
96
)
print
(
pq
)
dev.off
()
## save threshold
dx
<-
data.frame
(
eval_pq_lib1
$
eval_pq_out
)
dx
$
library
<-
as.factor
(
dx
$
library
)
max_threshold
<-
merge
(
aggregate
(
max.singlet
~
library
,
data
=
dx
,
max
),
dx
,
all.x
=
T
)
write.table
(
max_threshold
,
paste
(
out_dir_sample
,
paste0
(
sampleid
,
"_max_threshod_pq_margin_"
,
as.character
(
marg.norm
),
".txt"
),
sep
=
'/'
))
# #####################################################################################
# # After computing best pq with higher number of singlets perform demux with max value
# #####################################################################################
cat
(
sampleid
)
maxi
<-
max_threshold
[
max_threshold
$
library
==
sampleid
,
'threshold'
]
# sample minimal object
obj
<-
readRDS
(
obj_minimal
)
### Add mitochondrial percentage to meta.data table
DefaultAssay
(
obj
)
<-
'RNA'
print
(
DefaultAssay
(
obj
))
# Compute mito %
obj
[[
"percent.mt"
]]
<-
PercentageFeatureSet
(
obj
,
assay
=
'RNA'
,
pattern
=
mito.prefix
)
obj
<-
obj
%>%
subset
(
nFeature_RNA
>
min_feature
&
### Remove cells with < 250 detected genes
nFeature_RNA
<
max_feature
&
### Remove cells with > 2500 detected genes (could be doublets)
percent.mt
<
max_pc_mito
### Remove cells with > 0.15 mito/total reads
)
# filtering object based on hto count to zero
DefaultAssay
(
obj
)
<-
'HTO'
print
(
DefaultAssay
(
obj
))
obj
<-
subset
(
obj
,
subset
=
nCount_HTO
!=
0
)
table
(
obj
$
orig.ident
)
cat
(
'Demultiplexing cells based on HTO'
)
x
<-
filterbarcodes
(
obj
,
maxi
,
marg.norm
)
scrna.hashtag
<-
x
$
obj_demux
rowSums
(
data.frame
(
scrna.hashtag
@
assays
$
HTO
@
counts
))
# Save demux Seurat object
#oiut_dir_sample <- paste(out_dir, sampleid, sep = "/")
#dir.create(out_dir_sample, showWarnings = FALSE)
DefaultAssay
(
scrna.hashtag
)
<-
"RNA"
out_dir
<-
paste
(
out_dir
,
sampleid
,
sep
=
'/'
)
saveRDS
(
object
=
scrna.hashtag
,
file
=
paste
(
out_dir
,
paste
(
sampleid
,
"demux_minimal.rds"
,
sep
=
"_"
),
sep
=
"/"
))
cat
(
x
$
barcodes
)
cat
(
'visualize demultiplexing - Global classification results'
,
sampleid
)
class_hto
<-
list
()
class_hto
[[
sampleid
]]
<-
table
(
scrna.hashtag
$
HTO_classification
)
class_hto
[[
sampleid
]]
class_htoglobal
<-
list
()
class_htoglobal
[[
sampleid
]]
<-
table
(
scrna.hashtag
$
HTO_classification.global
)
class_htoglobal
[[
sampleid
]]
cat
(
'Group cells based on the max HTO signal'
)
Idents
(
scrna.hashtag
)
<-
"HTO_maxID"
DefaultAssay
(
scrna.hashtag
)
<-
"HTO"
rdgplot
<-
scrna.hashtag
%>%
RidgePlot
(
assay
=
"HTO"
,
features
=
rownames
(
scrna.hashtag
),
ncol
=
2
)
png
(
filename
=
paste
(
out_dir_sample
,
paste0
(
sampleid
,
"_ridgeplot.png"
),
sep
=
'/'
),
width
=
12
,
height
=
9
,
units
=
"in"
,
res
=
96
)
print
(
rdgplot
)
dev.off
()
### Calculate doublet rate
scrna.hashtag
@
meta.data
%>%
group_by
(
hash.ID
)
%>%
summarize
(
fract
=
n
()
/
nrow
(
.
))
### Create bar graphs comparing cell count for each sample
HTO_bars_1
<-
scrna.hashtag
@
meta.data
%>%
rownames_to_column
(
"cell_id"
)
%>%
ggplot
(
aes
(
hash.ID
,
fill
=
hash.ID
))
+
geom_bar
()
+
labs
(
y
=
"Cell Count"
)
+
cowplot
::
theme_cowplot
()
+
theme
(
legend.position
=
"none"
,
axis.title.x
=
element_blank
(),
axis.text.x
=
element_text
(
hjust
=
1
,
angle
=
45
)
)
### Create stacked bar graph showing fraction of cells
HTO_bars_2
<-
scrna.hashtag
@
meta.data
%>%
rownames_to_column
(
"cell_id"
)
%>%
group_by
(
hash.ID
)
%>%
summarize
(
hash_frac
=
n
()
/
nrow
(
.
))
%>%
ggplot
(
aes
(
"scRNA-seq"
,
hash_frac
,
fill
=
hash.ID
))
+
geom_bar
(
stat
=
"identity"
,
color
=
"white"
,
size
=
0.5
)
+
labs
(
y
=
"Fraction of Cells"
,
x
=
sampleid
)
+
cowplot
::
theme_cowplot
()
+
theme
(
legend.title
=
element_blank
(),
axis.title.x
=
element_blank
(),
axis.text.x
=
element_blank
(),
axis.ticks.x
=
element_blank
()
)
### Combine plots
### now add the title
# title <- ggdraw() +
# draw_label(
# sampleid,
# fontface = 'bold',
# x = 0,
# hjust = 0
# ) +
# theme(
# ### add margin on the left of the drawing canvas,
# ### so title is aligned with left edge of first plot
# plot.margin = margin(0, 0, 0, 7)
# )
x
<-
plot_grid
(
#title,
HTO_bars_1
,
HTO_bars_2
,
nrow
=
1
,
rel_widths
=
c
(
0.5
,
0.5
),
rel_heights
=
c
(
1
,
0.8
)
)
png
(
filename
=
paste
(
out_dir_sample
,
paste0
(
sampleid
,
"_HTO_bars.png"
),
sep
=
'/'
),
width
=
12
,
height
=
9
,
units
=
"in"
,
res
=
96
)
print
(
x
)
dev.off
()
# cell count by hto classification
scrna.hashtag
@
meta.data
%>%
group_by
(
HTO_classification
)
%>%
summarize
(
fract
=
n
()
/
nrow
(
.
))
### Create bar graphs comparing cell count by HTO classification
HTO_bars_3
<-
scrna.hashtag
@
meta.data
%>%
rownames_to_column
(
"cell_id"
)
%>%
ggplot
(
aes
(
HTO_classification
,
fill
=
HTO_classification
))
+
geom_bar
()
+
labs
(
y
=
"Cell Count"
)
+
cowplot
::
theme_cowplot
()
+
theme
(
legend.position
=
"none"
,
axis.title.x
=
element_blank
(),
axis.text.x
=
element_text
(
hjust
=
1
,
angle
=
45
)
)
### Create stacked bar graph showing fraction of cells
HTO_bars_4
<-
scrna.hashtag
@
meta.data
%>%
rownames_to_column
(
"cell_id"
)
%>%
group_by
(
HTO_classification
)
%>%
summarize
(
hash_frac
=
n
()
/
nrow
(
.
))
%>%
ggplot
(
aes
(
"scRNA-seq"
,
hash_frac
,
fill
=
HTO_classification
))
+
geom_bar
(
stat
=
"identity"
,
color
=
"white"
,
size
=
0.5
)
+
labs
(
y
=
"Fraction of Cells"
,
x
=
sampleid
)
+
cowplot
::
theme_cowplot
()
+
theme
(
legend.title
=
element_blank
(),
axis.title.x
=
element_blank
(),
axis.text.x
=
element_blank
(),
axis.ticks.x
=
element_blank
()
)
### Combine plots
x
<-
plot_grid
(
#title,
HTO_bars_3
,
HTO_bars_4
,
nrow
=
1
,
rel_widths
=
c
(
0.5
,
0.5
),
rel_heights
=
c
(
1
,
0.8
)
)
png
(
filename
=
paste
(
out_dir_sample
,
paste0
(
sampleid
,
"_HTO_bars_classification.png"
),
sep
=
'/'
),
width
=
12
,
height
=
9
,
units
=
"in"
,
res
=
96
)
print
(
x
)
dev.off
()
fs
<-
FeatureScatter
(
scrna.hashtag
,
feature1
=
"hto_H1"
,
feature2
=
"hto_H2"
)
png
(
filename
=
paste
(
out_dir_sample
,
paste0
(
sampleid
,
"_HTO_1_2_featurescatter.png"
),
sep
=
'/'
),
width
=
12
,
height
=
9
,
units
=
"in"
,
res
=
96
)
print
(
fs
)
dev.off
()
Idents
(
scrna.hashtag
)
<-
"HTO_classification.global"
vnlp
<-
VlnPlot
(
scrna.hashtag
,
features
=
"nCount_RNA"
,
pt.size
=
0.1
,
log
=
TRUE
)
png
(
filename
=
paste
(
out_dir_sample
,
paste0
(
sampleid
,
"_ncount_RNA.png"
),
sep
=
'/'
),
width
=
12
,
height
=
9
,
units
=
"in"
,
res
=
96
)
print
(
vnlp
)
dev.off
()
cat
(
'remove negative cells from the object'
)
scrna.hashtag.subset
<-
subset
(
scrna.hashtag
,
idents
=
"Negative"
,
invert
=
TRUE
)
cat
(
'Calculate a tSNE embedding of the HTO data'
)
DefaultAssay
(
scrna.hashtag.subset
)
<-
"HTO"
scrna.hashtag.subset
<-
ScaleData
(
scrna.hashtag.subset
,
features
=
rownames
(
scrna.hashtag.subset
),
verbose
=
FALSE
)
scrna.hashtag.subset
<-
RunPCA
(
scrna.hashtag.subset
,
features
=
rownames
(
scrna.hashtag.subset
),
approx
=
FALSE
)
# Calculate a distance matrix using HTO
hto.dist.mtx
<-
as.matrix
(
dist
(
t
(
GetAssayData
(
object
=
scrna.hashtag.subset
,
assay
=
"HTO"
))))
scrna.hashtag.subset
<-
RunTSNE
(
scrna.hashtag.subset
,
dims
=
1
:
length
(
hto_conditions
[[
sampleid
]]),
perplexity
=
100
,
distance.matrix
=
hto.dist.mtx
,
check_duplicates
=
FALSE
)
Idents
(
scrna.hashtag.subset
)
<-
'HTO_classification'
pdim_hto
<-
DimPlot
(
scrna.hashtag.subset
,
label
=
TRUE
)
png
(
filename
=
paste
(
out_dir_sample
,
paste0
(
sampleid
,
"_HTO_classification_dimplot.png"
),
sep
=
'/'
),
width
=
12
,
height
=
9
,
units
=
"in"
,
res
=
96
)
print
(
pdim_hto
)
dev.off
()
Idents
(
scrna.hashtag.subset
)
<-
'HTO_classification.global'
pdim_hto_cl
<-
DimPlot
(
scrna.hashtag.subset
,
label
=
TRUE
)
png
(
filename
=
paste
(
out_dir_sample
,
paste0
(
sampleid
,
"_HTO_classification_global_dimplot.png"
),
sep
=
'/'
),
width
=
12
,
height
=
9
,
units
=
"in"
,
res
=
96
)
print
(
pdim_hto_cl
)
dev.off
()
htohm
<-
HTOHeatmap
(
scrna.hashtag.subset
,
assay
=
"HTO"
,
ncells
=
5000
)
png
(
filename
=
paste
(
out_dir_sample
,
paste0
(
sampleid
,
"_HTO_heatmap.png"
),
sep
=
'/'
),
width
=
12
,
height
=
9
,
units
=
"in"
,
res
=
96
)
print
(
htohm
)
dev.off
()
## count all barcodes
cat
(
'count HTO reads'
)
count_HTO_reads
<-
list
()
count_HTO_reads
[[
sampleid
]]
<-
rowSums
(
data.frame
(
obj
@
assays
$
HTO
@
counts
))
count_HTO_reads
[[
sampleid
]]
###cat('Extract the singlets')
scrna.hashtag.singlet
<-
subset
(
scrna.hashtag.subset
,
idents
=
"Singlet"
)
count_HTO_reads_singlet
<-
list
()
count_HTO_reads_singlet
[[
sampleid
]]
<-
rowSums
(
data.frame
(
scrna.hashtag.singlet
@
assays
$
HTO
@
counts
))
cat
(
'count HTO singlet reads\n'
)
count_HTO_reads_singlet
[[
sampleid
]]
Idents
(
scrna.hashtag.singlet
)
<-
"HTO_classification"
count_HTO_reads_singlet_HTO
<-
list
()
count_HTO_reads_singlet_HTO_mean
<-
list
()
for
(
n
in
rownames
(
scrna.hashtag.singlet
@
assays
$
HTO
@
counts
)){
x
<-
subset
(
scrna.hashtag.singlet
,
idents
=
n
)
cat
(
'hto'
,
n
)
print
(
rowSums
(
data.frame
(
x
@
assays
$
HTO
@
counts
)))
## count of reads HTO in cell classified as specific HTO
count_HTO_reads_singlet_HTO
[[
sampleid
]][
n
]
<-
rowSums
(
data.frame
(
x
@
assays
$
HTO
@
counts
))[
n
]
## MEAN read count per hashatg over cell classified as specific-HTO
count_HTO_reads_singlet_HTO_mean
[[
sampleid
]][
n
]
<-
unname
(
rowSums
(
data.frame
(
x
@
assays
$
HTO
@
counts
))[
n
])
/
unname
(
class_hto
[[
sampleid
]][
n
])
}
cat
(
'count HTO reads, only in cell classified as singlet for specific HTO\n'
)
count_HTO_reads_singlet_HTO
cat
(
'mean HTO reads, only in cell classified as singlet for specific HTO\n'
)
count_HTO_reads_singlet_HTO_mean
Idents
(
scrna.hashtag.subset
)
<-
'HTO_classification.global'
### Projecting singlet identities on TSNE visualization
dimpl_htocl
<-
DimPlot
(
scrna.hashtag.singlet
,
group.by
=
"HTO_classification"
,
label
=
TRUE
)
png
(
filename
=
paste
(
out_dir_sample
,
paste0
(
sampleid
,
"_HTO_classification_singlets.png"
),
sep
=
'/'
),
width
=
12
,
height
=
9
,
units
=
"in"
,
res
=
96
)
dimpl_htocl
dev.off
()
# dimpl_htoglobal <- DimPlot(scrna.hashtag.singlet, group.by = 'HTO_classification.global', label=TRUE)
# png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_classification_global_singlets.png"), sep = '/'),
# width = 12, height = 9, units = "in", res = 96)
# print(dimpl_htoglobal)
# dev.off()
vnl_hto
<-
scrna.hashtag.singlet
%>%
VlnPlot
(
features
=
c
(
"nCount_HTO"
),
ncol
=
1
,
pt.size
=
0.0005
,
log
=
T
)
png
(
filename
=
paste
(
out_dir_sample
,
paste0
(
sampleid
,
"_violin_hto_singlets.png"
),
sep
=
'/'
),
width
=
12
,
height
=
9
,
units
=
"in"
,
res
=
96
)
print
(
vnl_hto
)
dev.off
()
## Save Singlets cell and HTO classification in meta data
DefaultAssay
(
scrna.hashtag
)
<-
"RNA"
scrna.hashtag
[[
'HTO'
]]
<-
NULL
# eliminate HTO assay
saveRDS
(
object
=
scrna.hashtag.singlet
,
file
=
paste
(
out_dir
,
paste
(
sampleid
,
"singlet_minimal.rds"
,
sep
=
"_"
),
sep
=
"/"
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment