Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
Squadrito_LiverTumor2022
Squadrito_LiverTumor2022_scRNAseqTRC
Commits
387c08bc
Commit
387c08bc
authored
Jul 26, 2023
by
Stefano Beretta
Browse files
Upload New File
parent
b855eb51
Changes
1
Hide whitespace changes
Inline
Side-by-side
scripts/0_PreProcessData.R
0 → 100644
View file @
387c08bc
library
(
SeuratObject
)
library
(
Seurat
)
library
(
harmony
)
library
(
DoubletFinder
)
library
(
grid
)
library
(
cowplot
)
library
(
gridExtra
)
library
(
RColorBrewer
)
library
(
patchwork
)
library
(
scales
)
library
(
ggplot2
)
library
(
openxlsx
)
library
(
SingleR
)
#########################
### Utility Functions ###
#########################
define_region
<-
function
(
row
,
col
){
viewport
(
layout.pos.row
=
row
,
layout.pos.col
=
col
)
}
########################
### General Settings ###
########################
# Working dir
wdir
<-
"squadrito_livertumor2022_scrnaseq"
# Raw data dir (download from GEO)
raw_dir
<-
"GEO_data"
# Results dir
out_dir
<-
paste
(
wdir
,
"results"
,
sep
=
"/"
)
dir.create
(
path
=
out_dir
,
showWarnings
=
F
)
samples
<-
c
(
"GG-11"
,
"GG-18"
,
"GG-22"
,
"GG-23"
,
"GG-25"
)
# cell-cycle scoring - NicheNet convertion of Seurat cc.genes.updated.2019 lists
s.genes
<-
c
(
"Mcm5"
,
"Pcna"
,
"Tyms"
,
"Fen1"
,
"Mcm7"
,
"Mcm4"
,
"Rrm1"
,
"Ung"
,
"Gins2"
,
"Mcm6"
,
"Cdca7"
,
"Dtl"
,
"Prim1"
,
"Uhrf1"
,
"Cenpu"
,
"Hells"
,
"Rfc2"
,
"Polr1b"
,
"Nasp"
,
"Rad51ap1"
,
"Gmnn"
,
"Wdr76"
,
"Slbp"
,
"Ccne2"
,
"Ubr7"
,
"Pold3"
,
"Msh2"
,
"Atad2"
,
"Rad51"
,
"Rrm2"
,
"Cdc45"
,
"Cdc6"
,
"Exo1"
,
"Tipin"
,
"Dscc1"
,
"Blm"
,
"Casp8ap2"
,
"Usp1"
,
"Clspn"
,
"Pola1"
,
"Chaf1b"
,
"Mrpl36"
,
"E2f8"
)
g2m.genes
<-
c
(
"Hmgb2"
,
"Cdk1"
,
"Nusap1"
,
"Ube2c"
,
"Birc5"
,
"Tpx2"
,
"Top2a"
,
"Ndc80"
,
"Cks2"
,
"Nuf2"
,
"Cks1b"
,
"Mki67"
,
"Tmpo"
,
"Cenpf"
,
"Tacc3"
,
"Pimreg"
,
"Smc4"
,
"Ccnb2"
,
"Ckap2l"
,
"Ckap2"
,
"Aurkb"
,
"Bub1"
,
"Kif11"
,
"Anp32e"
,
"Tubb4b"
,
"Gtse1"
,
"Kif20b"
,
"Hjurp"
,
"Cdca3"
,
"Jpt1"
,
"Cdc20"
,
"Ttk"
,
"Cdc25c"
,
"Kif2c"
,
"Rangap1"
,
"Ncapd2"
,
"Dlgap5"
,
"Cdca2"
,
"Cdca8"
,
"Ect2"
,
"Kif23"
,
"Hmmr"
,
"Aurka"
,
"Psrc1"
,
"Anln"
,
"Lbr"
,
"Ckap5"
,
"Cenpe"
,
"Ctcf"
,
"Nek2"
,
"G2e3"
,
"Gas2l3"
,
"Cbx5"
,
"Cenpa"
)
reg_vars
<-
c
(
"CC.Difference"
,
"percent.mt"
,
"nCount_RNA"
)
#############################
### Create Seurat Objects ###
#############################
for
(
sample
in
samples
)
{
print
(
sample
)
mtx_file
<-
paste
(
raw_dir
,
paste0
(
sample
,
"_matrix.mtx.gz"
),
sep
=
"/"
)
if
(
!
file.exists
(
mtx_file
))
{
stop
(
paste
(
"Cannot find"
,
sample
,
"MTX file:"
,
mtx_file
))
}
bcode_file
<-
paste
(
raw_dir
,
paste0
(
sample
,
"_barcodes.tsv.gz"
),
sep
=
"/"
)
if
(
!
file.exists
(
bcode_file
))
{
stop
(
paste
(
"Cannot find"
,
sample
,
"barcode file:"
,
bcode_file
))
}
feature_file
<-
paste
(
raw_dir
,
paste0
(
sample
,
"_features.tsv.gz"
),
sep
=
"/"
)
if
(
!
file.exists
(
feature_file
))
{
stop
(
paste
(
"Cannot find"
,
sample
,
"features file:"
,
feature_file
))
}
sample_mtx
<-
ReadMtx
(
mtx
=
mtx_file
,
cells
=
bcode_file
,
features
=
feature_file
)
colnames
(
x
=
sample_mtx
)
<-
paste
(
sample
,
colnames
(
x
=
sample_mtx
),
sep
=
'_'
)
# add id tag to cellnames
sample_obj
<-
CreateSeuratObject
(
counts
=
sample_mtx
,
project
=
sample
)
sample_dir
<-
paste
(
out_dir
,
sample
,
sep
=
"/"
)
dir.create
(
path
=
sample_dir
,
showWarnings
=
F
)
saveRDS
(
object
=
sample_obj
,
file
=
paste
(
sample_dir
,
paste0
(
sample
,
"_final.rds"
),
sep
=
"/"
))
}
####################################################
### Exploratory analysis for parameter selection ###
####################################################
# Perform DoubletFinder on each samples with different
# combinations of parameters
for
(
sample
in
samples
)
{
print
(
sample
)
sample_dir
<-
paste
(
out_dir
,
sample
,
sep
=
"/"
)
plot_dir
<-
paste
(
sample_dir
,
"plots"
,
sep
=
"/"
)
dir.create
(
path
=
plot_dir
,
showWarnings
=
F
)
obj
<-
readRDS
(
paste
(
sample_dir
,
paste0
(
sample
,
"_final.rds"
),
sep
=
"/"
))
obj
[[
"percent.mt"
]]
<-
PercentageFeatureSet
(
obj
,
pattern
=
"^mt-"
)
obj
<-
CellCycleScoring
(
object
=
obj
,
s.features
=
s.genes
,
g2m.features
=
g2m.genes
,
set.ident
=
TRUE
)
obj
@
meta.data
$
CC.Difference
<-
obj
@
meta.data
$
S.Score
-
obj
@
meta.data
$
G2M.Score
Idents
(
obj
)
<-
"orig.ident"
png
(
paste
(
plot_dir
,
paste0
(
sample
,
"_prefilter.png"
),
sep
=
"/"
),
width
=
1200
,
height
=
1000
)
print
(
VlnPlot
(
obj
,
features
=
c
(
"nFeature_RNA"
,
"nCount_RNA"
,
"percent.mt"
),
ncol
=
3
,
pt.size
=
0.1
))
dev.off
()
obj
@
misc
$
analysis_params
$
min.feature
<-
1000
obj
@
misc
$
analysis_params
$
max.feature
<-
6000
obj
@
misc
$
analysis_params
$
max.pc.mito
<-
20
obj
<-
subset
(
obj
,
subset
=
nFeature_RNA
>
obj
@
misc
$
analysis_params
$
min.feature
&
nFeature_RNA
<
obj
@
misc
$
analysis_params
$
max.feature
&
percent.mt
<
obj
@
misc
$
analysis_params
$
max.pc.mito
)
png
(
paste
(
plot_dir
,
paste0
(
sample
,
"_postfilter.png"
),
sep
=
"/"
),
width
=
1200
,
height
=
1000
)
print
(
VlnPlot
(
obj
,
features
=
c
(
"nFeature_RNA"
,
"nCount_RNA"
,
"percent.mt"
),
ncol
=
3
,
pt.size
=
0.1
))
dev.off
()
obj
<-
SCTransform
(
obj
,
vars.to.regress
=
c
(
"CC.Difference"
,
"percent.mt"
,
"nCount_RNA"
),
verbose
=
FALSE
)
DefaultAssay
(
object
=
obj
)
<-
"SCT"
obj
@
misc
$
analysis_params
$
max_pca
<-
50
obj
<-
RunPCA
(
object
=
obj
,
npcs
=
obj
@
misc
$
analysis_params
$
max_pca
)
png
(
paste
(
plot_dir
,
paste0
(
sample
,
"_PCA_elbowplot_SCT.png"
),
sep
=
"/"
),
width
=
1200
,
height
=
1000
)
print
(
ElbowPlot
(
object
=
obj
,
ndims
=
obj
@
misc
$
analysis_params
$
max_pca
,
reduction
=
"pca"
))
dev.off
()
obj
@
misc
$
analysis_params
$
num_pc
<-
30
obj
<-
RunUMAP
(
object
=
obj
,
dims
=
1
:
obj
@
misc
$
analysis_params
$
num_pc
)
## pK Identification (no ground-truth) --------------------------------------------------
sweep.res.obj
<-
paramSweep_v3
(
obj
,
PCs
=
1
:
obj
@
misc
$
analysis_params
$
num_pc
,
sct
=
TRUE
)
sweep.stats_obj
<-
summarizeSweep
(
sweep.res.obj
,
GT
=
FALSE
)
bcmvn_obj
<-
find.pK
(
sweep.stats_obj
)
dev.off
()
pK
<-
as.numeric
(
as.character
(
bcmvn_obj
$
pK
))
BCmetric
<-
bcmvn_obj
$
BCmetric
pK_choose
<-
pK
[
which
(
BCmetric
%in%
max
(
BCmetric
))]
png
(
paste
(
plot_dir
,
paste0
(
sample
,
"_DF_SCT_pK_selection.png"
),
sep
=
"/"
),
width
=
1200
,
height
=
1000
)
par
(
mar
=
c
(
5
,
4
,
4
,
8
)
+1
,
cex.main
=
1.2
,
font.main
=
2
)
plot
(
x
=
pK
,
y
=
BCmetric
,
pch
=
16
,
type
=
"b"
,
col
=
"blue"
,
lty
=
1
)
abline
(
v
=
pK_choose
,
lwd
=
2
,
col
=
'red'
,
lty
=
2
)
title
(
"The BCmvn distributions"
)
text
(
pK_choose
,
max
(
BCmetric
),
as.character
(
pK_choose
),
pos
=
4
,
col
=
"red"
)
dev.off
()
for
(
pK_select
in
pK
)
{
for
(
doub_pct
in
c
(
0.07
,
0.09
))
{
nExp
<-
round
(
ncol
(
obj
)
*
doub_pct
)
#expected % doublet - approx
obj
<-
doubletFinder_v3
(
obj
,
pN
=
0.25
,
pK
=
pK_select
,
nExp
=
nExp
,
PCs
=
1
:
obj
@
misc
$
analysis_params
$
num_pc
,
sct
=
TRUE
)
df_ident
<-
grep
(
"DF.classification"
,
colnames
(
obj
@
meta.data
),
value
=
T
)[
1
]
Idents
(
obj
)
<-
df_ident
new_cn
<-
paste0
(
"DF_SCT_nExp"
,
doub_pct
,
"_pK"
,
pK_select
)
colnames
(
obj
@
meta.data
)[
colnames
(
obj
@
meta.data
)
==
df_ident
]
<-
new_cn
str
(
obj
@
meta.data
)
dp_df
<-
DimPlot
(
object
=
obj
,
reduction
=
"umap"
,
pt.size
=
1
,
group.by
=
new_cn
,
split.by
=
new_cn
,
ncol
=
2
)
+
NoLegend
()
dp_phase
<-
DimPlot
(
object
=
obj
,
reduction
=
"umap"
,
pt.size
=
1
,
group.by
=
"Phase"
,
split.by
=
new_cn
,
ncol
=
3
)
fp1
<-
FeaturePlot
(
object
=
obj
,
reduction
=
"umap"
,
pt.size
=
1
,
features
=
c
(
"Cd19"
),
split.by
=
new_cn
,
order
=
T
,
ncol
=
2
)
fp2
<-
FeaturePlot
(
object
=
obj
,
reduction
=
"umap"
,
pt.size
=
1
,
features
=
c
(
"Cd3g"
),
split.by
=
new_cn
,
order
=
T
,
ncol
=
2
)
fp3
<-
FeaturePlot
(
object
=
obj
,
reduction
=
"umap"
,
pt.size
=
1
,
features
=
c
(
"Itgam"
),
split.by
=
new_cn
,
order
=
T
,
ncol
=
2
)
fp4
<-
FeaturePlot
(
object
=
obj
,
reduction
=
"umap"
,
pt.size
=
1
,
features
=
c
(
"Alas2"
),
split.by
=
new_cn
,
order
=
T
,
ncol
=
2
)
png
(
paste
(
plot_dir
,
paste0
(
"DF_SCT_nExp"
,
doub_pct
,
"_pK"
,
pK_select
,
".png"
),
sep
=
"/"
),
height
=
1500
,
width
=
1700
)
plot.new
()
grid.newpage
()
pushViewport
(
viewport
(
layout
=
grid.layout
(
3
,
2
)))
print
(
dp_phase
,
vp
=
define_region
(
row
=
1
,
col
=
1
))
print
(
dp_df
,
vp
=
define_region
(
row
=
1
,
col
=
2
))
print
(
fp1
,
vp
=
define_region
(
row
=
2
,
col
=
1
))
print
(
fp2
,
vp
=
define_region
(
row
=
2
,
col
=
2
))
print
(
fp3
,
vp
=
define_region
(
row
=
3
,
col
=
1
))
print
(
fp4
,
vp
=
define_region
(
row
=
3
,
col
=
2
))
dev.off
()
}
}
saveRDS
(
object
=
obj
,
paste
(
sample_dir
,
paste0
(
sample
,
"_afterDoubletFinder.rds"
),
sep
=
"/"
))
}
# Prepare some plots on DoubletFinder results
for
(
sample
in
samples
)
{
print
(
sample
)
sample_dir
<-
paste
(
out_dir
,
sample
,
sep
=
"/"
)
plot_dir
<-
paste
(
sample_dir
,
"plots-DBF"
,
sep
=
"/"
)
dir.create
(
plot_dir
,
showWarnings
=
F
)
obj
=
readRDS
(
paste
(
sample_dir
,
paste0
(
sample
,
"_afterDoubletFinder.rds"
),
sep
=
"/"
))
for
(
pK_select
in
c
(
0.005
,
seq
(
0.01
,
0.3
,
0.01
)))
{
for
(
doub_pct
in
c
(
0.05
,
0.07
,
0.09
))
{
nExp
<-
round
(
ncol
(
obj
)
*
doub_pct
)
new_cn
<-
paste0
(
"DF_SCT_nExp"
,
doub_pct
,
"_pK"
,
pK_select
)
subtitle
<-
list
(
"Full"
=
""
,
"Cd19"
=
""
,
"Cd3g"
=
""
,
"Itgam"
=
""
,
"Alas2"
=
""
)
num_title
<-
0
for
(
md_val
in
names
(
table
(
obj
[[
new_cn
]])))
{
if
(
num_title
==
0
)
{
num_title
<-
1
subtitle
[[
"Full"
]]
<-
paste
(
subtitle
[[
"Full"
]],
paste
(
md_val
,
table
(
obj
[[
new_cn
]])[[
md_val
]],
sep
=
" : "
),
sep
=
"\n"
)
for
(
gg
in
c
(
"Cd19"
,
"Cd3g"
,
"Itgam"
,
"Alas2"
))
{
gg_expr
<-
FetchData
(
object
=
obj
,
vars
=
gg
)
obj_filt
<-
obj
[,
which
(
x
=
gg_expr
>
0
)]
vv
<-
ifelse
(
md_val
%in%
names
(
table
(
obj_filt
[[
new_cn
]])),
table
(
obj_filt
[[
new_cn
]])[[
md_val
]],
0
)
subtitle
[[
gg
]]
<-
paste0
(
gg
,
">0 :\t"
,
subtitle
[[
gg
]],
paste
(
md_val
,
vv
,
sep
=
"= "
))
}
}
else
{
subtitle
[[
"Full"
]]
<-
paste
(
subtitle
[[
"Full"
]],
paste
(
md_val
,
table
(
obj
[[
new_cn
]])[[
md_val
]],
sep
=
" : "
),
sep
=
"\t|\t"
)
for
(
gg
in
c
(
"Cd19"
,
"Cd3g"
,
"Itgam"
,
"Alas2"
))
{
gg_expr
<-
FetchData
(
object
=
obj
,
vars
=
gg
)
obj_filt
<-
obj
[,
which
(
x
=
gg_expr
>
0
)]
vv
<-
ifelse
(
md_val
%in%
names
(
table
(
obj_filt
[[
new_cn
]])),
table
(
obj_filt
[[
new_cn
]])[[
md_val
]],
0
)
subtitle
[[
gg
]]
<-
paste
(
subtitle
[[
gg
]],
paste
(
md_val
,
vv
,
sep
=
"= "
),
sep
=
"\t| "
)
}
}
num_title
<-
num_title
+
1
}
Idents
(
obj
)
<-
new_cn
dp_df
<-
DimPlot
(
object
=
obj
,
reduction
=
"umap"
,
pt.size
=
1
,
group.by
=
new_cn
,
split.by
=
new_cn
,
ncol
=
2
)
+
NoLegend
()
+
ggtitle
(
subtitle
[[
"Full"
]])
dp_phase
<-
DimPlot
(
object
=
obj
,
reduction
=
"umap"
,
pt.size
=
1
,
group.by
=
"Phase"
,
split.by
=
new_cn
,
ncol
=
3
)
fp1
<-
plot_grid
(
ggdraw
()
+
draw_label
(
subtitle
[[
"Cd19"
]]),
FeaturePlot
(
object
=
obj
,
reduction
=
"umap"
,
pt.size
=
1
,
features
=
c
(
"Cd19"
),
split.by
=
new_cn
,
order
=
T
,
ncol
=
2
),
ncol
=
1
,
rel_heights
=
c
(
0.1
,
1
))
fp2
<-
plot_grid
(
ggdraw
()
+
draw_label
(
subtitle
[[
"Cd3g"
]]),
FeaturePlot
(
object
=
obj
,
reduction
=
"umap"
,
pt.size
=
1
,
features
=
c
(
"Cd3g"
),
split.by
=
new_cn
,
order
=
T
,
ncol
=
2
),
ncol
=
1
,
rel_heights
=
c
(
0.1
,
1
))
fp3
<-
plot_grid
(
ggdraw
()
+
draw_label
(
subtitle
[[
"Itgam"
]]),
FeaturePlot
(
object
=
obj
,
reduction
=
"umap"
,
pt.size
=
1
,
features
=
c
(
"Itgam"
),
split.by
=
new_cn
,
order
=
T
,
ncol
=
2
),
ncol
=
1
,
rel_heights
=
c
(
0.1
,
1
))
fp4
<-
plot_grid
(
ggdraw
()
+
draw_label
(
subtitle
[[
"Alas2"
]]),
FeaturePlot
(
object
=
obj
,
reduction
=
"umap"
,
pt.size
=
1
,
features
=
c
(
"Alas2"
),
split.by
=
new_cn
,
order
=
T
,
ncol
=
2
),
ncol
=
1
,
rel_heights
=
c
(
0.1
,
1
))
png
(
paste
(
plot_dir
,
paste0
(
"DF_SCT_nExp"
,
doub_pct
,
"_pK"
,
pK_select
,
".png"
),
sep
=
"/"
),
height
=
1500
,
width
=
1700
)
plot.new
()
grid.newpage
()
pushViewport
(
viewport
(
layout
=
grid.layout
(
3
,
2
)))
print
(
dp_phase
,
vp
=
define_region
(
row
=
1
,
col
=
1
))
print
(
dp_df
,
vp
=
define_region
(
row
=
1
,
col
=
2
))
print
(
fp1
,
vp
=
define_region
(
row
=
2
,
col
=
1
))
print
(
fp2
,
vp
=
define_region
(
row
=
2
,
col
=
2
))
print
(
fp3
,
vp
=
define_region
(
row
=
3
,
col
=
1
))
print
(
fp4
,
vp
=
define_region
(
row
=
3
,
col
=
2
))
dev.off
()
}
}
}
# Select the best combinations of parameters of DoubletFinder
# for each sample
samples
<-
list
(
"GG-11"
=
c
(
"nExp"
=
0.05
,
"pK"
=
0.05
),
"GG-18"
=
c
(
"nExp"
=
0.07
,
"pK"
=
0.2
),
"GG-22"
=
c
(
"nExp"
=
0.09
,
"pK"
=
0.005
),
"GG-23"
=
c
(
"nExp"
=
0.07
,
"pK"
=
0.03
),
"GG-25"
=
c
(
"nExp"
=
0.07
,
"pK"
=
0.03
))
# Add metadata information on doublets in each sample
full_DoubFinder
<-
data.frame
()
for
(
sample
in
names
(
samples
))
{
print
(
sample
)
sample_dir
<-
paste
(
out_dir
,
sample
,
sep
=
"/"
)
obj
<-
readRDS
(
paste
(
sample_dir
,
paste0
(
sample
,
"_afterDoubletFinder.rds"
),
sep
=
"/"
))
nExp
<-
round
(
ncol
(
obj
)
*
samples
[[
sample
]][[
"nExp"
]])
#expected % doublet - approx
obj
@
misc
$
analysis_params
$
doubletFinder
<-
list
(
pN
=
0.25
,
pK
=
samples
[[
sample
]][[
"pK"
]],
nExp
=
nExp
,
nExpPerc
=
samples
[[
sample
]][[
"nExp"
]],
PCs
=
obj
@
misc
$
analysis_params
$
num_pc
)
new_cn
<-
paste0
(
"DF_SCT_nExp"
,
obj
@
misc
$
analysis_params
$
doubletFinder
$
nExpPerc
,
"_pK"
,
obj
@
misc
$
analysis_params
$
doubletFinder
$
pK
)
df_sel_classif
<-
obj
@
meta.data
[,
new_cn
,
drop
=
F
]
print
(
head
(
df_sel_classif
))
colnames
(
df_sel_classif
)
<-
"DoubFinderClassif"
full_DoubFinder
<-
rbind
(
full_DoubFinder
,
df_sel_classif
)
saveRDS
(
object
=
obj
,
paste
(
sample_dir
,
paste0
(
sample
,
"_afterDoubletFinder.rds"
),
sep
=
"/"
))
}
############################################
### Create Full (Combined) Seurat Object ###
############################################
samples_obj
<-
list
()
for
(
sample
in
names
(
samples
))
{
print
(
sample
)
sample_dir
<-
paste
(
out_dir
,
sample
,
sep
=
"/"
)
samples_obj
[[
sample
]]
<-
readRDS
(
file
=
paste
(
sample_dir
,
paste0
(
sample
,
"_final.rds"
),
sep
=
"/"
))
}
Full_obj
<-
merge
(
x
=
samples_obj
[[
1
]],
y
=
samples_obj
[
2
:
length
(
samples_obj
)])
dir.create
(
path
=
paste
(
out_dir
,
"Full_DBR"
,
sep
=
"/"
))
saveRDS
(
object
=
Full_obj
,
paste
(
out_dir
,
"Full_DBR"
,
"Full_DBR_final.rds"
,
sep
=
"/"
))
plot_dir
<-
paste
(
out_dir
,
"Full_DBR"
,
"plots"
,
sep
=
"/"
)
dir.create
(
path
=
plot_dir
,
showWarnings
=
F
)
# Assign MT genes
Full_obj
[[
"percent.mt"
]]
<-
PercentageFeatureSet
(
Full_obj
,
pattern
=
"^mt-"
)
# Identify tresholds visualy and apply tresholds
Idents
(
Full_obj
)
<-
"RNA_Condition"
png
(
filename
=
paste
(
plot_dir
,
"Full_Prefilter_Vlnplot.png"
,
sep
=
"/"
),
width
=
1500
,
height
=
1000
)
VlnPlot
(
Full_obj
,
features
=
reg_vars
,
ncol
=
3
,
pt.size
=
0
)
dev.off
()
png
(
filename
=
paste
(
plot_dir
,
"Full_Prefilter_ScatterMT.png"
,
sep
=
"/"
),
width
=
1200
,
height
=
800
)
FeatureScatter
(
Full_obj
,
feature1
=
"nCount_RNA"
,
feature2
=
"percent.mt"
)
dev.off
()
png
(
filename
=
paste
(
plot_dir
,
"Full_Prefilter_ScatterFeatures.png"
,
sep
=
"/"
),
width
=
1200
,
height
=
800
)
FeatureScatter
(
Full_obj
,
feature1
=
"nCount_RNA"
,
feature2
=
"nFeature_RNA"
)
dev.off
()
Full_obj
@
misc
$
analysis_params
$
min.feature
<-
1000
Full_obj
@
misc
$
analysis_params
$
max.feature
<-
6000
Full_obj
@
misc
$
analysis_params
$
max.pc.mito
<-
20
table
(
Full_obj
$
orig.ident
)
dim
(
Full_obj
)
Full_obj
<-
subset
(
Full_obj
,
subset
=
nFeature_RNA
>
Full_obj
@
misc
$
analysis_params
$
min.feature
&
nFeature_RNA
<
Full_obj
@
misc
$
analysis_params
$
max.feature
&
percent.mt
<
Full_obj
@
misc
$
analysis_params
$
max.pc.mito
)
table
(
Full_obj
$
orig.ident
)
dim
(
Full_obj
)
png
(
filename
=
paste
(
plot_dir
,
"Full_Postfilter_Vlnplot.png"
,
sep
=
"/"
),
width
=
1500
,
height
=
1000
)
VlnPlot
(
Full_obj
,
features
=
reg_vars
,
ncol
=
3
,
pt.size
=
0
)
dev.off
()
png
(
filename
=
paste
(
plot_dir
,
"Full_Postfilter_ScatterMT.png"
,
sep
=
"/"
),
width
=
1200
,
height
=
800
)
FeatureScatter
(
Full_obj
,
feature1
=
"nCount_RNA"
,
feature2
=
"percent.mt"
)
dev.off
()
png
(
filename
=
paste
(
plot_dir
,
"Full_Postfilter_ScatterFeatures.png"
,
sep
=
"/"
),
width
=
1200
,
height
=
800
)
FeatureScatter
(
Full_obj
,
feature1
=
"nCount_RNA"
,
feature2
=
"nFeature_RNA"
)
dev.off
()
## Import filtered object and process (SCTransform, runPCA)
# normalize, Scaledata are not computed, since we are not doing diff expression at this point
Full_obj
<-
SCTransform
(
Full_obj
,
vars.to.regress
=
reg_vars
,
verbose
=
TRUE
)
DefaultAssay
(
object
=
Full_obj
)
<-
"SCT"
Full_obj
@
misc
$
analysis_params
$
max_pca
<-
50
Full_obj
<-
RunPCA
(
object
=
Full_obj
,
npcs
=
Full_obj
@
misc
$
analysis_params
$
max_pca
)
png
(
filename
=
paste
(
plot_dir
,
"Full_ElbowPlot.png"
,
sep
=
"/"
),
width
=
1200
,
height
=
800
)
ElbowPlot
(
Full_obj
,
ndims
=
Full_obj
@
misc
$
analysis_params
$
max_pca
,
reduction
=
"pca"
)
dev.off
()
Full_obj
@
misc
$
analysis_params
$
num_pc
<-
25
## Import Doublets
full_DoubFinder_md
<-
full_DoubFinder
$
DoubFinderClassif
names
(
full_DoubFinder_md
)
<-
rownames
(
full_DoubFinder
)
Full_obj
<-
AddMetaData
(
object
=
Full_obj
,
metadata
=
full_DoubFinder_md
,
col.name
=
"DoubletFinder_classification"
)
colnames
(
Full_obj
@
meta.data
)
#####################
## Remove Doublets ##
#####################
Idents
(
Full_obj
)
<-
"DoubletFinder_classification"
table
(
Full_obj
$
orig.ident
,
Full_obj
$
DoubletFinder_classification
)
dim
(
Full_obj
)
Full_obj
<-
subset
(
Full_obj
,
idents
=
"Singlet"
)
table
(
Full_obj
$
orig.ident
)
dim
(
Full_obj
)
## Process (normalize, Scaledata, SCTransform, runPCA)
# Scale data will be used when commputing top marker (we think)
Full_obj
<-
NormalizeData
(
object
=
Full_obj
)
Full_obj
<-
ScaleData
(
object
=
Full_obj
,
vars.to.regress
=
reg_vars
,
display.progress
=
T
,
features
=
rownames
(
Full_obj
))
Full_obj
<-
SCTransform
(
Full_obj
,
vars.to.regress
=
reg_vars
,
verbose
=
TRUE
)
DefaultAssay
(
object
=
Full_obj
)
<-
"SCT"
Full_obj
<-
RunPCA
(
object
=
Full_obj
,
npcs
=
Full_obj
@
misc
$
analysis_params
$
max_pca
)
ElbowPlot
(
Full_obj
,
ndims
=
Full_obj
@
misc
$
analysis_params
$
max_pca
,
reduction
=
"pca"
)
Full_obj
<-
RunUMAP
(
Full_obj
,
dims
=
1
:
Full_obj
@
misc
$
analysis_params
$
num_pc
,
seed.use
=
1
,
reduction
=
"pca"
)
Full_obj
<-
FindNeighbors
(
Full_obj
,
reduction
=
"pca"
,
dims
=
1
:
Full_obj
@
misc
$
analysis_params
$
num_pc
,
force.recalc
=
T
,
n.trees
=
5
,
annoy.metric
=
"euclidean"
)
# Perform clustering on SCT
for
(
res
in
c
(
0.4
,
0.6
,
1.2
))
{
Full_obj
<-
FindClusters
(
Full_obj
,
resolution
=
res
)
}
for
(
res
in
c
(
"SCT_snn_res.0.4"
,
"SCT_snn_res.0.6"
,
"SCT_snn_res.1.2"
,
"orig.ident"
,
"Phase"
))
{
Idents
(
Full_obj
)
<-
res
png
(
filename
=
paste
(
plot_dir
,
paste0
(
"Full_DBR_"
,
res
,
".png"
),
sep
=
"/"
),
width
=
1200
,
height
=
800
)
print
(
DimPlot
(
object
=
Full_obj
,
pt.size
=
.7
,
label
=
T
))
dev.off
()
}
# Markers
for
(
res
in
c
(
"SCT_snn_res.0.4"
,
"SCT_snn_res.0.6"
,
"SCT_snn_res.1.2"
))
{
Idents
(
Full_obj
)
<-
res
marks
<-
FindAllMarkers
(
object
=
Full_obj
,
logfc.threshold
=
0.25
,
min.pct
=
0.1
,
return.thresh
=
0.01
)
wb_markers
<-
createWorkbook
()
for
(
clu
in
unique
(
marks
$
cluster
))
{
df_markers_clu
<-
subset
(
marks
,
cluster
==
clu
)
addWorksheet
(
wb_markers
,
clu
)
writeData
(
wb_markers
,
sheet
=
clu
,
x
=
df_markers_clu
)
}
saveWorkbook
(
wb
=
wb_markers
,
overwrite
=
T
,
file
=
paste
(
out_dir
,
"Full_DBR"
,
paste0
(
"Full_DBR_"
,
res
,
"_markers.xlsx"
),
sep
=
"/"
))
mname
<-
paste0
(
res
,
"_markers"
)
Full_obj
@
misc
[[
manme
]]
<-
marks
}
##########################
## Export Table results ##
##########################
# Counts
full_counts
<-
Full_obj
@
assays
$
RNA
@
counts
gz_out_counts
<-
gzfile
(
paste
(
out_dir
,
"Full_DBR"
,
"Full_DBR_final_DBR_labeled_counts.csv.gz"
,
sep
=
"/"
),
"w"
)
write.csv
(
full_counts
,
gz_out_counts
)
close
(
gz_out_counts
)
# Meta Data
full_md
<-
Full_obj
@
meta.data
gz_out_md
<-
gzfile
(
paste
(
out_dir
,
"Full_DBR"
,
"Full_DBR_final_DBR_labeled_metadata.csv.gz"
,
sep
=
"/"
),
"w"
)
write.csv
(
x
=
full_md
,
gz_out_md
)
close
(
gz_out_md
)
# UMAP Coordinates
full_umap
<-
Full_obj
@
reductions
$
umap
@
cell.embeddings
gz_out_umap
<-
gzfile
(
paste
(
out_dir
,
"Full_DBR"
,
"Full_DBR_final_DBR_labeled_umap.csv.gz"
,
sep
=
"/"
),
"w"
)
write.csv
(
x
=
full_umap
,
gz_out_umap
)
close
(
gz_out_umap
)
# Save final object
saveRDS
(
object
=
Full_obj
,
paste
(
out_dir
,
"Full_DBR"
,
"Full_DBR_final_DBR_labeled.rds"
,
sep
=
"/"
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment