Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
Molteni NatMed2024
Commits
a988d531
Commit
a988d531
authored
Nov 22, 2024
by
Ivan Merelli
Browse files
Upload New File
parent
f4a3560c
Changes
1
Show whitespace changes
Inline
Side-by-side
fiumara_20241115_UploadScript.R
0 → 100644
View file @
a988d531
### upload on gitlab
# 1) load R packages
library
(
Seurat
)
library
(
GEOquery
)
library
(
limma
)
library
(
dplyr
)
library
(
data.table
)
library
(
clusterProfiler
)
library
(
ggplot2
)
library
(
RColorBrewer
)
library
(
qusage
)
library
(
UCell
)
# 2) define input and output paths
# set path to the working directory
path
<-
""
# 3) load scRNA-seq BM-object
## 3.1) restrict analysis to CD34+ clusters
obj
<-
readRDS
(
file
=
paste0
(
path
,
"BM_20240222.rds"
))
CD34p_clusters
<-
as.character
(
c
(
2
,
8
,
14
,
15
,
16
,
19
,
20
,
26
,
27
,
30
,
33
))
obj
$
seurat
<-
obj
$
RNA_snn_h.orig.ident_res.1.8
obj
<-
SetIdent
(
object
=
obj
,
value
=
"seurat"
)
obj
<-
subset
(
obj
,
subset
=
(
seurat
%in%
CD34p_clusters
))
saveRDS
(
object
=
obj
,
file
=
paste0
(
path
,
"BM_CD34p.rds"
))
## 3.2) free memory space
l
<-
ls
()
l
<-
l
[
!
l
%in%
"path"
]
rm
(
list
=
l
);
gc
()
# 4) load published scRNA-seq dataset
## 4.1) Ainciburu et al.
### 4.1.1) download tar file and metadata
destfolder
<-
paste0
(
path
,
"GSE180298/"
);
dir.create
(
path
=
destfolder
,
showWarnings
=
F
,
recursive
=
T
)
destfile
<-
paste0
(
destfolder
,
"GSE180298_RAW.tar"
)
download.file
(
url
=
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE180298&format=file"
,
destfile
=
destfile
)
metadata_files
<-
c
(
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE180298&format=file&file=GSE180298%5Felderly%5Fmetadata%2Etxt%2Egz"
,
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE180298&format=file&file=GSE180298%5Fmds%5Fmetadata%2Etxt%2Egz"
,
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE180298&format=file&file=GSE180298%5Fyoung%5Fmetadata%2Etxt%2Egz"
)
download.file
(
url
=
metadata_files
,
destfile
=
paste0
(
destfolder
,
"GSE180298_"
,
c
(
"elderly"
,
"mds"
,
"young"
),
"_metadata.txt.gz"
))
### 4.1.2) untar file
untar
(
tarfile
=
destfile
,
exdir
=
paste0
(
destfolder
,
"GSE180298_RAW/"
))
files
<-
list.files
(
path
=
paste0
(
destfolder
,
"GSE180298_RAW/"
),
full.names
=
T
)
md
<-
list.files
(
path
=
destfolder
,
full.names
=
T
)
md_files
<-
md
[
grepl
(
md
,
pattern
=
"metadata"
)]
### 4.1.3) define seurat object
metadata
<-
c
()
for
(
i
in
seq_len
(
length
(
md_files
))){
gunzip
(
md_files
[
i
],
remove
=
FALSE
,
overwrite
=
TRUE
)
unzfile
<-
gsub
(
md_files
[
i
],
pattern
=
".gz"
,
replacement
=
""
)
temp
<-
read.delim
(
file
=
unzfile
,
header
=
T
,
sep
=
"\t"
)
metadata
<-
rbind
(
metadata
,
temp
)
}
metadata
<-
as.data.frame
(
metadata
)
metadata
$
orig.ident
<-
strsplit2
(
rownames
(
metadata
),
split
=
"\\_"
)[,
2
]
counts
<-
cell_counts
<-
samples
<-
c
()
for
(
i
in
seq_len
(
length
(
files
))){
split_filename
<-
strsplit2
(
files
[
i
],
split
=
"/"
)
sampleid
<-
strsplit2
(
split_filename
[,
ncol
(
split_filename
)],
split
=
"\\_"
)[,
2
]
print
(
paste0
(
"Processing file "
,
sampleid
,
" ("
,
i
,
" over "
,
length
(
files
),
")"
))
# load h5 object
temp
<-
Read10X_h5
(
filename
=
files
[
i
],
use.names
=
TRUE
,
unique.features
=
TRUE
)
coln
<-
paste0
(
strsplit2
(
colnames
(
temp
),
split
=
"-"
)[,
1
],
"_"
,
sampleid
)
colnames
(
temp
)
<-
coln
# store
temp_object
<-
CreateSeuratObject
(
counts
=
temp
)
counts_temp
<-
matrix
(
data
=
temp_object
@
assays
$
RNA
@
counts
,
ncol
=
ncol
(
temp_object
))
rownames
(
counts_temp
)
<-
rownames
(
temp_object
)
colnames
(
counts_temp
)
<-
colnames
(
temp_object
)
# store info
cell_counts
<-
c
(
cell_counts
,
ncol
(
counts_temp
))
samples
<-
c
(
samples
,
sampleid
)
if
(
is.null
(
nrow
(
counts
))){
counts
<-
counts_temp
gs
<-
rownames
(
counts
)
}
else
{
gs
<-
intersect
(
gs
,
rownames
(
counts_temp
))
counts
<-
cbind
(
counts
[
match
(
gs
,
rownames
(
counts
)),
],
counts_temp
[
match
(
gs
,
rownames
(
counts_temp
)),
])
}
}
names
(
cell_counts
)
<-
samples
md
<-
metadata
[
rownames
(
metadata
)
%in%
colnames
(
counts
),];
dim
(
md
)
m
<-
match
(
rownames
(
md
),
colnames
(
counts
));
table
(
is.na
(
m
))
counts
<-
counts
[,
m
]
GSE180298
<-
CreateSeuratObject
(
counts
=
counts
,
meta.data
=
metadata
)
### 4.1.4) select only elderly donors
GSE180298
<-
SetIdent
(
object
=
GSE180298
,
value
=
"orig.ident"
)
elderly_sampleid
<-
unique
(
GSE180298
$
orig.ident
)[
grepl
(
unique
(
GSE180298
$
orig.ident
),
pattern
=
"elderly"
)]
GSE180298
<-
subset
(
GSE180298
,
subset
=
(
orig.ident
%in%
elderly_sampleid
))
saveRDS
(
object
=
GSE180298
,
file
=
paste0
(
path
,
"GSE180298_elderly.rds"
));
gc
()
## 4.1.5) free memory space
l
<-
ls
()
l
<-
l
[
!
l
%in%
"path"
]
rm
(
list
=
l
);
gc
()
## 4.2) Wu et al.
### 4.2.1) download tar file
destfolder
<-
paste0
(
path
,
"GSE196052/"
);
dir.create
(
path
=
destfolder
,
showWarnings
=
F
,
recursive
=
T
)
destfile
<-
paste0
(
destfolder
,
"GSE196052_RAW.tar"
)
download.file
(
url
=
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE196052&format=file"
,
destfile
=
destfile
)
metadata_files
<-
c
(
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE196052&format=file&file=GSE196052%5FdataCount%5FCD34%2Ecsv%2Egz"
)
download.file
(
url
=
metadata_files
,
destfile
=
paste0
(
destfolder
,
"GSE196052_dataCount_CD34.csv.gz"
))
### 4.2.2) untar file
untar
(
tarfile
=
destfile
,
exdir
=
paste0
(
destfolder
,
"GSE196052_RAW/"
))
files_annotation
<-
list.files
(
path
=
paste0
(
destfolder
,
"GSE196052_RAW/"
),
full.names
=
T
)
files
<-
list.files
(
path
=
destfolder
,
full.names
=
T
)
### 4.2.3) define seurat object
md
<-
read.delim
(
file
=
files
[
grepl
(
files
,
pattern
=
"SraRun"
)],
header
=
T
,
sep
=
","
)
table
(
md
$
subject_id.status
,
md
$
tissue.cell_type
)
mdfiles
<-
files_annotation
[
grepl
(
files_annotation
,
pattern
=
"GSM"
)]
metadata
<-
c
()
for
(
i
in
seq_len
(
length
(
mdfiles
))){
x
<-
mdfiles
[
i
]
temp
<-
read.delim
(
gzfile
(
x
),
header
=
T
,
sep
=
","
)
metadata
<-
rbind
(
metadata
,
temp
)
}
f
<-
files
[
grepl
(
files
,
pattern
=
"CD34.csv.gz"
)]
counts_cd34
<-
fread
(
f
)
%>%
as.data.frame
()
rn
<-
as.character
(
counts_cd34
[,
1
])
counts_cd34
<-
counts_cd34
[,
-1
]
rownames
(
counts_cd34
)
<-
rn
cd34_cells
<-
colnames
(
counts_cd34
)
cd34_cells
<-
gsub
(
x
=
cd34_cells
,
pattern
=
"CD34_"
,
replacement
=
""
)
wu_annot
<-
read.csv
(
file
=
paste0
(
destfolder
,
"CD34_metaDatatSNECellType_ALiceManual.csv"
),
header
=
T
)
m
<-
match
(
cd34_cells
,
wu_annot
$
orig.ident
);
table
(
is.na
(
m
))
status
<-
wu_annot
$
group
[
m
]
sampleid
<-
wu_annot
$
subject
[
m
]
mdt
<-
data.frame
(
sampleid
,
status
)
rownames
(
mdt
)
<-
colnames
(
counts_cd34
)
GSE196052
<-
CreateSeuratObject
(
counts
=
counts_cd34
,
meta.data
=
mdt
)
GSE196052
<-
SetIdent
(
object
=
GSE196052
,
value
=
"orig.ident"
)
saveRDS
(
object
=
GSE196052
,
file
=
paste0
(
path
,
"GSE196052.rds"
))
## 4.2.4) free memory space
l
<-
ls
()
l
<-
l
[
!
l
%in%
"path"
]
rm
(
list
=
l
);
gc
()
# 5) Define single seurat object
## 5.1) load datasets
fiumara
<-
readRDS
(
paste0
(
path
,
"BM_CD34p.rds"
))
GSE180298
<-
readRDS
(
paste0
(
path
,
"GSE180298_elderly.rds"
))
GSE196052
<-
readRDS
(
paste0
(
path
,
"GSE196052.rds"
))
## 5.2) define common features
genes
<-
table
(
c
(
rownames
(
fiumara
),
rownames
(
GSE180298
),
rownames
(
GSE196052
)))
common_genes
<-
names
(
genes
)[
genes
==
3
];
length
(
common_genes
)
## 5.3) add sampleinfo
fiumara
$
source
<-
"fiumara"
fiumara
$
sample_info
<-
paste0
(
"fiumara_"
,
fiumara
$
orig.ident
)
GSE180298
$
source
<-
"GSE180298"
GSE180298
$
sample_info
<-
paste0
(
"GSE180298_"
,
GSE180298
$
orig.ident
)
GSE196052
$
source
<-
"GSE196052"
GSE196052
$
sample_info
<-
paste0
(
"GSE196052_"
,
GSE196052
$
sampleid
)
## 5.4) extract counts relative to these genes and define Seurat object
counts
<-
cbind
(
fiumara
@
assays
$
RNA
@
counts
[
common_genes
,],
GSE180298
@
assays
$
RNA
@
counts
[
common_genes
,],
GSE196052
@
assays
$
RNA
@
counts
[
common_genes
,])
vars
<-
c
(
"source"
,
"sample_info"
,
"nCount_RNA"
,
"nFeature_RNA"
)
md
<-
rbind
(
fiumara
@
meta.data
[,
vars
],
GSE180298
@
meta.data
[,
vars
],
GSE196052
@
meta.data
[,
vars
])
obj
<-
CreateSeuratObject
(
counts
=
counts
,
meta.data
=
md
)
obj
$
percent.mt
<-
(
colSums
(
obj
@
assays
$
RNA
@
counts
[
grepl
(
rownames
(
obj
),
pattern
=
"^MT-"
),])
/
colSums
(
obj
@
assays
$
RNA
@
counts
))
*
100
obj
$
percent.rb
<-
(
colSums
(
obj
@
assays
$
RNA
@
counts
[
grepl
(
rownames
(
obj
),
pattern
=
"^RPL"
),])
/
colSums
(
obj
@
assays
$
RNA
@
counts
))
*
100
saveRDS
(
object
=
obj
,
file
=
paste0
(
path
,
"combined.rds"
));
gc
()
## 5.5) free memory space
l
<-
ls
()
l
<-
l
[
!
l
%in%
c
(
"path"
,
"obj"
)]
rm
(
list
=
l
);
gc
()
# 6) removing low quality cells, normalization, scaling, integration and clustering
## 6.1) cell filtering
obj
$
keep
<-
(
obj
$
nFeature_RNA
>
200
)
&
(
obj
$
percent.mt
<
25
)
table
(
obj
$
keep
,
obj
$
source
)
obj
<-
subset
(
obj
,
subset
=
(
keep
%in%
TRUE
))
saveRDS
(
obj
,
file
=
paste0
(
path
,
"combined_filtered.rds"
))
## 6.2) normalization
varfeatures
<-
1000
obj
<-
NormalizeData
(
object
=
obj
)
obj
<-
FindVariableFeatures
(
obj
,
selection.method
=
"vst"
,
nfeatures
=
varfeatures
,
verbose
=
T
)
## 6.3) scaling
reg_vars
=
c
(
"percent.mt"
,
"nCount_RNA"
)
obj
<-
ScaleData
(
object
=
obj
,
vars.to.regress
=
reg_vars
,
display.progress
=
T
,
features
=
rownames
(
obj
))
saveRDS
(
obj
,
file
=
paste0
(
path
,
"combined_normscaled.rds"
))
## 6.4) dimensionality reduction
max_pca
<-
100
obj
<-
RunPCA
(
object
=
obj
,
features
=
VariableFeatures
(
object
=
obj
),
npcs
=
max_pca
,
reduction.name
=
"pca"
,
reduction.key
=
"PC_"
)
explvar
<-
((
obj
@
reductions
$
pca
@
stdev
^
2
)
/
sum
((
obj
@
reductions
$
pca
@
stdev
^
2
)))
*
100
delta
<-
explvar
-
c
(
explvar
[
-1
],
0
)
opt_delta
<-
length
(
delta
[
delta
>
1e-2
])
opt_explvar
<-
min
(
which
(
cumsum
(
explvar
)
>
80
))
opt
<-
min
(
opt_delta
,
opt_explvar
)
obj
<-
RunUMAP
(
object
=
obj
,
seed.use
=
123
,
reduction
=
"pca"
,
dims
=
1
:
opt
)
## 6.5) harmony dataset integration
integration.var
<-
c
(
"source"
,
"sample_info"
)
obj
<-
RunHarmony
(
object
=
obj
,
group.by.vars
=
integration.var
,
max.iter.harmony
=
30
,
plot_convergence
=
FALSE
,
reduction.save
=
"harmony"
)
obj
<-
RunUMAP
(
object
=
obj
,
seed.use
=
123
,
dims
=
1
:
opt
,
reduction
=
"harmony"
,
reduction.name
=
"harmony_umap"
,
reduction.key
=
"UMAPh_"
,
return.model
=
TRUE
)
## 6.6) find neighboring cells
obj
<-
FindNeighbors
(
object
=
obj
,
dims
=
1
:
opt
,
force.recalc
=
T
,
reduction
=
"harmony"
,
graph.name
=
c
(
"RNA_nn_h.iv"
,
"RNA_snn_h.iv"
))
## 6.7) cell clustering
clu_res
<-
seq
(
0.1
,
1
,
by
=
0.1
)
for
(
res
in
clu_res
){
obj
<-
FindClusters
(
object
=
obj
,
algorithm
=
1
,
resolution
=
as.numeric
(
res
),
graph.name
=
"RNA_snn_h.iv"
)
}
## 6.8) assign celltype to each cluster
obj
$
seurat
<-
obj
$
RNA_snn_h.iv_res.0.5
obj
$
seurat_annotation
<-
NA
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"0"
]
<-
"HSC/MPP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
c
(
"1"
,
"6"
,
"11"
)]
<-
"Mature Ery"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"7"
]
<-
"VEXAS Ery/CMP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
c
(
"8"
,
"22"
)]
<-
"Immature Ery"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"2"
]
<-
"MPP/CMP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"12"
]
<-
"PreBNK"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"3"
]
<-
"CMP/GMP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"4"
]
<-
"GP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"13"
]
<-
"MDP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"5"
]
<-
"MyeloLympho/CMP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
c
(
"9"
,
"21"
)]
<-
"VEXAS Immature Ery"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"10"
]
<-
"Undefined"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"14"
]
<-
"BEM"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"16"
]
<-
"Monocyte Progenitors"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
c
(
"15"
,
"19"
)]
<-
"MLP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"17"
]
<-
"MEP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"18"
]
<-
"VEXAS Mature Ery"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"20"
]
<-
"VEXAS MPP-Ery"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"23"
]
<-
"VEXAS Myelo/CMP"
annotated_cols
<-
c
(
"HSC/MPP"
=
'#FB0207'
,
"MyeloLympho/CMP"
=
'#c6dbef'
,
"MPP/CMP"
=
"#7fcdbb"
,
"CMP/GMP"
=
'#9ecae1'
,
"Monocyte Progenitors"
=
'#66CCFF'
,
"MDP"
=
'#0F80FF'
,
"GP"
=
"#08519c"
,
"PreBNK"
=
'#118040'
,
"MLP"
=
"#FECC66"
,
"BEM"
=
"#bdbdbd"
,
"MEP"
=
'#f768a1'
,
"Immature Ery"
=
'#B17DFC'
,
"Mature Ery"
=
"#800080"
,
"VEXAS MPP-Ery"
=
'#fde0dd'
,
"VEXAS Ery/CMP"
=
'#fcc5c0'
,
"VEXAS Immature Ery"
=
'#fa9fb5'
,
"VEXAS Mature Ery"
=
"#dd3497"
,
"VEXAS Myelo/CMP"
=
"#ccebc5"
,
"Undefined"
=
'#d9d9d9'
)
levs
<-
names
(
annotated_cols
)
obj
$
seurat_annotation
<-
factor
(
obj
$
seurat_annotation
,
levels
=
levs
)
## 6.9) define status and vexas-mutation variables
obj
$
cell_barcode
<-
strsplit2
(
rownames
(
obj
@
meta.data
),
split
=
"\\_"
)[,
2
]
GSE196052_annot
<-
read.csv
(
file
=
paste0
(
path
,
"GSE196052/CD34_metaDatatSNECellType_ALiceManual.csv"
),
header
=
T
)
GSE196052_cases
<-
GSE196052_annot
[
GSE196052_annot
$
group
%in%
"PT"
,]
### 6.9.1) status
obj
$
status
<-
"HD"
obj
$
status
[(
obj
$
source
%in%
"GSE196052"
)
&
(
obj
$
cell_barcode
%in%
GSE196052_cases
$
orig.ident
)]
<-
"PT"
obj
$
status
[(
obj
$
source
%in%
"fiumara"
)]
<-
"PT"
table
(
obj
$
source
,
obj
$
status
)
### 6.9.2) VEXAS mutation
GSE196052_pt2upn
<-
paste0
(
"GSE196052_PT"
,
1
:
9
)
names
(
GSE196052_pt2upn
)
<-
paste0
(
"GSE196052_UPN"
,
c
(
6
,
11
,
1
,
10
,
13
,
14
,
15
,
16
,
17
))
obj
$
sample_info_upn
<-
obj
$
sample_info
for
(
i
in
seq_len
(
length
(
GSE196052_pt2upn
))){
obj
$
sample_info_upn
[
obj
$
sample_info_upn
%in%
GSE196052_pt2upn
[
i
]]
<-
names
(
GSE196052_pt2upn
)[
i
]
}
table
(
obj
$
source
,
obj
$
sample_info_upn
)
table
(
obj
$
sample_info
,
obj
$
sample_info_upn
)
obj
$
vexas_mutation
<-
"HD"
obj
$
vexas_mutation
[
obj
$
sample_info_upn
%in%
c
(
paste0
(
"fiumara_BM-0"
,
1
),
paste0
(
"GSE196052_UPN"
,
c
(
1
,
10
,
11
,
13
,
16
,
17
)))]
<-
"THR"
obj
$
vexas_mutation
[
obj
$
sample_info_upn
%in%
c
(
paste0
(
"fiumara_BM-0"
,
c
(
2
,
3
,
8
)),
paste0
(
"GSE196052_UPN"
,
c
(
6
)))]
<-
"VAL"
obj
$
vexas_mutation
[
obj
$
sample_info_upn
%in%
c
(
paste0
(
"fiumara_BM-0"
,
c
(
4
,
9
)),
paste0
(
"GSE196052_UPN"
,
c
(
14
,
15
)))]
<-
"LEU"
saveRDS
(
obj
,
file
=
paste0
(
path
,
"combined_annotated.rds"
))
table
(
obj
$
status
,
obj
$
vexas_mutation
)
table
(
obj
$
source
,
obj
$
vexas_mutation
)
table
(
obj
$
vexas_mutation
,
obj
$
sample_info_upn
)
## 6.10) free memory space
l
<-
ls
()
l
<-
l
[
!
l
%in%
c
(
"path"
,
"obj"
)]
rm
(
list
=
l
);
gc
()
# 7) Celltype-wise VEXAS vs HD (DE and GSEA)
outpath
<-
paste0
(
path
,
"DE_GSEA/"
);
dir.create
(
path
=
outpath
,
showWarnings
=
F
,
recursive
=
T
)
## 7.1) define function to run clusterProfiler GSEA
gsea_run
<-
function
(
marks
,
gmt
){
# load gmt file
gmt.obj
<-
clusterProfiler
::
read.gmt
(
gmt
)
# order DE results by logFC
genes
<-
marks
$
avg_log2FC
names
(
genes
)
<-
marks
$
gene_name
genes
<-
genes
[
order
(
genes
,
decreasing
=
T
)]
genes
<-
genes
[
!
duplicated
(
names
(
genes
))]
# run GSEA
gsea
<-
GSEA
(
geneList
=
genes
,
TERM2GENE
=
gmt.obj
,
nPerm
=
10000
,
pvalueCutoff
=
1
)
return
(
gsea
)
}
## 7.2) download hallmarks gene sets
hallmarks_gsea
<-
c
(
"https://data.broadinstitute.org/gsea-msigdb/msigdb/release/7.4/h.all.v7.4.symbols.gmt"
)
download.file
(
url
=
hallmarks_gsea
,
destfile
=
paste0
(
outpath
,
"h.all.v7.4.symbols.gmt"
))
## 7.3) VEXAS vs HD
### 7.3.1) DE analysis
annclusters
<-
names
(
annotated_cols
)
mincells
<-
10
for
(
i
in
seq_len
(
length
(
annclusters
))){
cl
<-
annclusters
[
i
]
cl_id
<-
gsub
(
x
=
cl
,
pattern
=
"/"
,
replacement
=
"-"
)
temp
<-
subset
(
obj
,
subset
=
(
seurat_annotation
%in%
cl
))
temp
<-
SetIdent
(
object
=
temp
,
value
=
"status"
)
if
(
all
(
table
(
temp
$
status
)
>=
mincells
)){
de
<-
FindMarkers
(
temp
,
ident.1
=
"PT"
,
ident.2
=
"HD"
,
test.use
=
"wilcox"
,
min.pct
=
0.1
,
logfc.threshold
=
0
)
marks
<-
de
[
order
(
de
$
p_val_adj
,
decreasing
=
F
),]
marks
$
gene_name
<-
rownames
(
marks
)
write.table
(
x
=
marks
,
file
=
paste0
(
outpath
,
"de_"
,
cl_id
,
".txt"
),
sep
=
'\t'
,
row.names
=
F
)
}
}
### 7.3.2) GSEA Hallmarks
for
(
i
in
seq_len
(
length
(
annclusters
))){
cl
<-
annclusters
[
i
]
cl_id
<-
gsub
(
x
=
cl
,
pattern
=
"/"
,
replacement
=
"-"
)
defile
<-
paste0
(
outpath
,
'de_'
,
cl_id
,
".txt"
)
if
(
file.exists
(
defile
)){
marks
<-
read.table
(
file
=
defile
,
sep
=
"\t"
,
header
=
T
)
gsea
<-
gsea_run
(
marks
,
gmt
=
hallmarks_gsea
)
write.table
(
x
=
gsea
,
file
=
paste0
(
outpath
,
'gsea_'
,
cl_id
,
".txt"
),
sep
=
'\t'
,
row.names
=
F
)
}
}
## 7.4) VEXAS_MUT vs HD
### 7.4.1) DE analysis
annclusters
<-
names
(
annotated_cols
)
mincells
<-
10
mut
<-
c
(
"LEU"
,
"THR"
,
"VAL"
)
for
(
m
in
mut
){
levs
<-
c
(
m
,
"HD"
)
sub
<-
subset
(
obj
,
subset
=
(
vexas_mutation
%in%
levs
))
for
(
i
in
seq_len
(
length
(
annclusters
))){
cl
<-
annclusters
[
i
]
cl_id
<-
gsub
(
x
=
cl
,
pattern
=
"/"
,
replacement
=
"-"
)
temp
<-
subset
(
sub
,
subset
=
(
seurat_annotation
%in%
cl
))
temp
<-
SetIdent
(
object
=
temp
,
value
=
"vexas_mutation"
)
if
(
all
(
table
(
temp
$
vexas_mutation
)
>=
mincells
)){
de
<-
FindMarkers
(
temp
,
ident.1
=
m
,
ident.2
=
"HD"
,
test.use
=
"wilcox"
,
min.pct
=
0.1
,
logfc.threshold
=
0
)
marks
<-
de
[
order
(
de
$
p_val_adj
,
decreasing
=
F
),]
marks
$
gene_name
<-
rownames
(
marks
)
write.table
(
x
=
marks
,
file
=
paste0
(
outpath
,
"de_"
,
cl_id
,
"_"
,
m
,
"vHD.txt"
),
sep
=
'\t'
,
row.names
=
F
)
}
}
}
### 7.4.2) GSEA Hallmarks
for
(
m
in
mut
){
for
(
i
in
seq_len
(
length
(
annclusters
))){
cl
<-
annclusters
[
i
]
cl_id
<-
gsub
(
x
=
cl
,
pattern
=
"/"
,
replacement
=
"-"
)
defile
<-
paste0
(
outpath
,
"de_"
,
cl_id
,
"_"
,
m
,
"vHD.txt"
)
if
(
file.exists
(
defile
)){
marks
<-
read.table
(
file
=
defile
,
sep
=
"\t"
,
header
=
T
)
gsea
<-
gsea_run
(
marks
,
gmt
=
hallmarks_gsea
)
write.table
(
x
=
gsea
,
file
=
paste0
(
outpath
,
'gsea_'
,
cl_id
,
"_"
,
m
,
"vHD.txt"
),
sep
=
'\t'
,
row.names
=
F
)
}
}
}
## 7.5) free memory space
l
<-
ls
()
l
<-
l
[
!
l
%in%
c
(
"path"
,
"obj"
)]
rm
(
list
=
l
);
gc
()
# 8) UCell module scores and wilcoxon test
## 8.1) load marker gene set
vexas_50
<-
qusage
::
read.gmt
(
file
=
paste0
(
path
,
"xenograft_signatures/custom_vexas_50.gmt"
))
vexas_signature
<-
vexas_50
[[
1
]]
## 8.2) compute UCell module scores
names
(
vexas_signature
)
<-
"VEXAS_Xenograft_sig50"
ncol
<-
ncol
(
obj
@
meta.data
)
obj
<-
AddModuleScore_UCell
(
obj
,
features
=
vexas_signature
)
colnames
(
obj
@
meta.data
)
<-
c
(
colnames
(
obj
@
meta.data
)[
seq_len
(
ncol
)],
names
(
vexas_signature
))
## 8.3) Celltype-wise wilcoxon test: VEXAS vs HD
x
<-
melt
(
data
=
obj
@
meta.data
,
id.vars
=
c
(
"status"
,
"seurat_annotation"
),
measure.vars
=
c
(
"VEXAS_Xenograft_sig50"
))
w
<-
x
%>%
dplyr
::
group_by
(
variable
,
seurat_annotation
)
%>%
dplyr
::
summarise
(
pvalue
=
wilcox.test
(
x
=
value
[
status
==
"PT"
],
y
=
value
[
status
==
"HD"
])
$
p.value
)
w
$
p.adjust
<-
p.adjust
(
p
=
w
$
pvalue
)
w
<-
w
[
order
(
w
$
p.adjust
,
decreasing
=
F
),]
write.table
(
x
=
w
,
file
=
paste0
(
path
,
"xenograft_signatures/wilcoxon.txt"
),
sep
=
"\t"
,
row.names
=
F
,
col.names
=
T
,
quote
=
F
)
# 9) figures
figpath
<-
paste0
(
path
,
"figures/"
);
dir.create
(
path
=
figpath
,
showWarnings
=
F
,
recursive
=
T
)
## 9.1) figure 5d: Annotated UMAP
obj
<-
SetIdent
(
object
=
obj
,
value
=
"seurat_annotation"
)
g
<-
DimPlot
(
obj
,
reduction
=
"harmony_umap"
,
label.box
=
T
,
label
=
T
,
label.color
=
T
,
label.size
=
2
)
+
scale_color_manual
(
values
=
annotated_cols
,
limits
=
levs
)
+
scale_fill_manual
(
values
=
annotated_cols
,
limits
=
levs
)
+
ggtitle
(
"Cluster Annotation"
)
+
theme
(
plot.title
=
element_text
(
hjust
=
0.5
),
legend.position
=
"right"
,
legend.text
=
element_text
(
size
=
7
))
ggsave
(
g
,
filename
=
paste0
(
figpath
,
"Fig5D_UMAP_AnnotatedClusters.png"
),
width
=
10
,
height
=
7
,
limitsize
=
FALSE
)
## 9.2) figure 5e/5f: GSEA Hallmarks
### 9.2.1) load results
res_VEXASvHD
<-
c
()
for
(
i
in
seq_len
(
length
(
annclusters
))){
cl
<-
annclusters
[
i
]
cl_id
<-
gsub
(
x
=
cl
,
pattern
=
"/"
,
replacement
=
"-"
)
file
<-
paste0
(
outpath
,
'gsea_'
,
cl_id
,
".txt"
)
if
(
file.exists
(
file
)){
x
<-
read.delim
(
file
=
file
,
header
=
T
)
x
<-
x
[
order
(
x
$
p.adjust
,
-
x
$
NES
),]
res_VEXASvHD
<-
rbind
(
res_VEXASvHD
,
data.frame
(
x
,
celltype
=
cl
,
test
=
"VEXASvOLD"
))
}
}
res_MUTvHD
<-
c
()
for
(
m
in
mut
){
for
(
i
in
seq_len
(
length
(
annclusters
))){
cl
<-
annclusters
[
i
]
cl_id
<-
gsub
(
x
=
cl
,
pattern
=
"/"
,
replacement
=
"-"
)
file
<-
paste0
(
outpath
,
'gsea_'
,
cl_id
,
"_"
,
m
,
"vHD.txt"
)
if
(
file.exists
(
file
)){
id
<-
paste0
(
m
,
"vHD"
)
x
<-
read.delim
(
file
=
file
,
header
=
T
)
x
<-
x
[
order
(
x
$
p.adjust
,
-
x
$
NES
),]
res_MUTvHD
<-
rbind
(
res_MUTvHD
,
data.frame
(
x
,
celltype
=
cl
,
test
=
id
))
}
}
}
res
<-
rbind
(
res_VEXASvHD
,
res_MUTvHD
)
res
$
significance_asterisk
<-
""
res
$
significance_asterisk
[
res
$
p.adjust
<
0.05
]
<-
"*"
res
$
significance_asterisk
[
res
$
p.adjust
<
0.01
]
<-
"**"
res
$
significance_asterisk
[
res
$
p.adjust
<
0.001
]
<-
"***"
res
<-
res
%>%
tidyr
::
complete
(
ID
,
celltype
,
test
)
%>%
as.data.frame
()
res
$
ID
<-
gsub
(
x
=
res
$
ID
,
pattern
=
"HALLMARK_"
,
replacement
=
""
)
### 9.2.2) plot
g
<-
res
%>%
ggplot
()
+
theme_bw
()
+
facet_grid
(
.
~
test
)
+
geom_tile
(
aes
(
x
=
celltype
,
y
=
ID
,
fill
=
NES
))
+
geom_text
(
aes
(
x
=
celltype
,
y
=
ID
,
label
=
significance_asterisk
),
size
=
2
)
+
theme
(
plot.title
=
element_text
(
hjust
=
0.5
,
size
=
10
),
axis.text.x
=
element_text
(
angle
=
45
,
,
vjust
=
1
,
hjust
=
1
),
legend.position
=
"top"
,
strip.background
=
element_rect
(
fill
=
"white"
))
+
ylab
(
""
)
+
xlab
(
""
)
+
scale_fill_gradientn
(
colours
=
colorRampPalette
(
rev
(
brewer.pal
(
11
,
"RdBu"
)))(
100
),
limits
=
c
(
-4
,
4
),
na.value
=
"grey"
)
ggsave
(
g
,
filename
=
paste0
(
figpath
,
"Fig5EF_GSEA_Celltype_Hallmarks.png"
),
width
=
length
(
unique
(
res
$
celltype
))
*
0.3
*
length
(
unique
(
res_complete
$
test
)),
height
=
length
(
unique
(
res
$
ID
))
*
0.2
,
limitsize
=
FALSE
)
## 9.3) figure 5g: Monocyte xenograft signature CD34+
### 9.3.1) load wilcoxon test results
w
<-
read.table
(
file
=
paste0
(
path
,
"xenograft_signatures/wilcoxon.txt"
),
sep
=
"\t"
,
header
=
T
)
w
$
significance_asterisk
<-
""
w
$
significance_asterisk
[
w
$
p.adjust
<
0.05
]
<-
"*"
w
$
significance_asterisk
[
w
$
p.adjust
<
0.01
]
<-
"**"
w
$
significance_asterisk
[
w
$
p.adjust
<
0.001
]
<-
"***"
### 9.3.2) plot
g
<-
obj
@
meta.data
%>%
ggplot
()
+
theme_classic
()
+
geom_violin
(
aes
(
x
=
seurat_annotation
,
y
=
VEXAS_Xenograft_sig50
,
fill
=
status
),
scale
=
"width"
)
+
geom_text
(
data
=
w
,
aes
(
x
=
seurat_annotation
,
y
=
0.7
,
label
=
significance_asterisk
))
+
theme
(
plot.title
=
element_text
(
hjust
=
0.5
,
size
=
10
),
axis.text.x
=
element_text
(
angle
=
90
,
vjust
=
0.5
,
hjust
=
1
),
legend.position
=
"right"
)
+
ylab
(
"UCell Module score"
)
+
xlab
(
""
)
+
ggtitle
(
"VEXAS xenograft signature (n = 50)"
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
45
,
,
vjust
=
1
,
hjust
=
1
))
+
scale_fill_manual
(
values
=
adjustcolor
(
col
=
c
(
"#F8766D"
,
"#02818a"
),
alpha.f
=
0.8
),
name
=
""
)
ggsave
(
g
,
filename
=
paste0
(
figpath
,
"Fig5G_UCell_MonocyteXenograft_WilcoxonTest.png"
),
width
=
length
(
unique
(
obj
$
seurat_annotation
))
*
5
*
0.1
,
height
=
5
,
limitsize
=
FALSE
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment