Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
Scarfo HEC2023
Commits
f0c3cffa
Commit
f0c3cffa
authored
Dec 12, 2023
by
Monah Abou Alezz
Browse files
revisions_update_v1
parent
7dcd8057
Changes
3
Show whitespace changes
Inline
Side-by-side
BulkRNAseq_2/ED_Figure7A_plots.R
0 → 100644
View file @
f0c3cffa
suppressPackageStartupMessages
(
library
(
DESeq2
))
suppressPackageStartupMessages
(
library
(
stringr
))
suppressPackageStartupMessages
(
library
(
tidyverse
))
suppressPackageStartupMessages
(
library
(
dplyr
))
suppressPackageStartupMessages
(
library
(
pheatmap
))
suppressPackageStartupMessages
(
library
(
RColorBrewer
))
## Input featureCounts file
fcount.file
<-
"Scarfo_HEC2023/BulkRNAseq_2/featureCounts_results_corrected.txt"
## Contrasts to perform
contr
<-
"DLL4_plus:CD32_plus"
contr.list
<-
unlist
(
strsplit
(
contr
,
","
))
## Design Formula
formula.str
<-
"condition"
design.str
<-
gsub
(
","
,
"+"
,
formula.str
)
num_cond
<-
str_count
(
formula.str
,
","
)
+
1
design.formula
<-
as.formula
(
paste
(
"~"
,
design.str
,
sep
=
""
))
## Read featureCounts results
if
(
!
file.exists
(
fcount.file
))
{
stop
(
"Error: featureCounts file not found."
)
}
else
{
read.counts
<-
read.table
(
fcount.file
,
header
=
TRUE
,
check.names
=
FALSE
)
%>%
column_to_rownames
(
var
=
"Geneid"
)
%>%
select
(
-
c
(
1
:
5
))
}
orig.names
<-
names
(
read.counts
)
ds
<-
list
()
cond
<-
list
()
for
(
i
in
seq
(
orig.names
[
1
:
length
(
orig.names
)]))
{
ds
[
i
]
<-
unlist
(
strsplit
(
orig.names
[
i
],
":"
))[
2
]
cond
[
i
]
<-
unlist
(
strsplit
(
orig.names
[
i
],
":"
))[
3
]
}
ds
<-
unlist
(
ds
)
cond
<-
unlist
(
cond
)
## Samples - Conditions
sample_info
<-
as.data.frame
(
str_split_fixed
(
cond
,
","
,
num_cond
))
colnames
(
sample_info
)
<-
str_split_fixed
(
formula.str
,
","
,
num_cond
)
rownames
(
sample_info
)
<-
ds
colnames
(
read.counts
)
<-
ds
sample_info
$
condition
<-
factor
(
sample_info
$
condition
)
last_condition
<-
str_split_fixed
(
formula.str
,
","
,
num_cond
)[
num_cond
]
## thresholds
adj.pval.thr
<-
0.05
logfc.thr
<-
0
## DESeq2
DESeq.ds
<-
DESeqDataSetFromMatrix
(
countData
=
read.counts
,
colData
=
sample_info
,
design
=
design.formula
)
## Remove genes without any counts
DESeq.ds
<-
DESeq.ds
[
rowSums
(
counts
(
DESeq.ds
))
>
0
,
]
## RunDGE
DESeq.ds
<-
DESeq
(
DESeq.ds
)
## obtain regularized log-transformed values
DESeq.rlog
<-
rlogTransformation
(
DESeq.ds
,
blind
=
TRUE
)
ctrs
<-
unlist
(
strsplit
(
contr.list
[
1
],
":"
))
c1
<-
ctrs
[
1
]
c2
<-
ctrs
[
2
]
DGE.results
<-
results
(
DESeq.ds
,
pAdjustMethod
=
"BH"
,
independentFiltering
=
TRUE
,
contrast
=
c
(
last_condition
,
c1
,
c2
),
alpha
=
adj.pval.thr
)
# Change format
DGE.results.annot
<-
as.data.frame
(
DGE.results
)
%>%
dplyr
::
select
(
log2FoldChange
,
lfcSE
,
baseMean
,
pvalue
,
padj
)
colnames
(
DGE.results.annot
)
<-
c
(
"logFC"
,
"lfcSE"
,
"baseMean"
,
"PValue"
,
"FDR"
)
## sort the results according to the adjusted p-value
DGE.results.sorted
<-
DGE.results.annot
[
order
(
DGE.results.annot
$
FDR
),
]
## Subset for only significant genes
DGE.results.sig
<-
subset
(
DGE.results.sorted
,
FDR
<
adj.pval.thr
&
abs
(
logFC
)
>
logfc.thr
)
DGEgenes
<-
rownames
(
DGE.results.sig
)
# ED_Fig7A -----------------------------------------------------------------
gmt.obj
<-
read.gmt
(
"Scarfo_HEC2023/BulkRNAseq_1/c5.all.v7.2.symbols.gmt"
)
organism.db
<-
org.Hs.eg.db
# Remove genes with no FDR
DGE.results.annot
<-
DGE.results.annot
[
!
is.na
(
DGE.results.annot
$
FDR
),
]
gene_univ
<-
row.names
(
DGE.results.annot
)
# get positive and negative gene names
gene.res.top
<-
subset
(
DGE.results.annot
,
FDR
<
adj.pval.thr
&
abs
(
logFC
)
>
logfc.thr
)
gene.res.top.pos
<-
subset
(
gene.res.top
,
logFC
>
0
)
gene.res.top.neg
<-
subset
(
gene.res.top
,
logFC
<
0
)
contr.enricher.neg
<-
enricher
(
row.names
(
gene.res.top.neg
),
universe
=
gene_univ
,
pvalueCutoff
=
adj.pval.thr
,
TERM2GENE
=
gmt.obj
)
contr.enricher.res
<-
as.data.frame
(
contr.enricher.neg
)
toi
<-
c
(
"GO_RESPONSE_TO_BMP"
,
"GO_REGULATION_OF_BMP_SIGNALING_PATHWAY"
)
GO_terms_int
<-
contr.enricher.res
[
contr.enricher.res
$
Description
%in%
toi
,
]
# reformatting the ID column
tmp
<-
as.data.frame
(
str_split_fixed
(
GO_terms_int
$
ID
,
"GO_"
,
2
))
tmp2
<-
as.data.frame
(
str_replace_all
(
tmp
$
V2
,
"_"
,
" "
))
colnames
(
tmp2
)
<-
"id"
tmp2
$
Terms
<-
str_to_title
(
tmp2
$
id
)
GO_terms_final
<-
cbind
(
GO_terms_int
,
tmp2
)
GO_terms_final
<-
GO_terms_final
[,
c
(
11
,
9
,
6
)]
colnames
(
GO_terms_final
)[
3
]
<-
"Adjusted p-value"
ggplot
(
data
=
GO_terms_final
,
aes
(
x
=
Terms
,
y
=
Count
,
fill
=
`Adjusted p-value`
))
+
geom_bar
(
stat
=
"identity"
,
width
=
0.7
)
+
scale_x_discrete
(
limits
=
GO_terms_final
$
Terms
)
+
scale_fill_gradient
(
low
=
"red"
,
high
=
"blue"
)
+
coord_flip
()
+
ggtitle
(
"Downregulated in DLL4+ vs CD32+"
)
+
theme_bw
()
+
theme
(
aspect.ratio
=
0.8
)
README.md
View file @
f0c3cffa
...
@@ -65,7 +65,9 @@ scRNAseq analysis was performed with [Seurat](https://satijalab.org/seurat/). Be
...
@@ -65,7 +65,9 @@ scRNAseq analysis was performed with [Seurat](https://satijalab.org/seurat/). Be
-
[6.1] Clusters related markers
-
[6.1] Clusters related markers
-
[6.2] Intracluster differential expression analysis according to comparison of interest
-
[6.2] Intracluster differential expression analysis according to comparison of interest
Pseudotime analysis was performed using
[
Monocle3
](
http://cole-trapnell-lab.github.io/monocle3/
)
Pseudotime analysis was performed using
[
Monocle3
](
http://cole-trapnell-lab.github.io/monocle3/
)
and PAGA-tree from
[
dynverse
](
https://dynverse.org/
)
In-silico perturbation analysis was performed using
[
CellOracle
](
https://github.com/morris-lab/CellOracle
)
Input files for scRNAseq analysis are available in the following
[
link
](
https://www.dropbox.com/sh/83dxrxqer8cl081/AAC8zALRuYRGh1mEe4lZuaHZa?dl=0
)
Input files for scRNAseq analysis are available in the following
[
link
](
https://www.dropbox.com/sh/83dxrxqer8cl081/AAC8zALRuYRGh1mEe4lZuaHZa?dl=0
)
...
...
scRNAseq/ED_Figure7F_plots.R
0 → 100644
View file @
f0c3cffa
suppressPackageStartupMessages
(
library
(
Seurat
))
suppressPackageStartupMessages
(
library
(
dplyr
))
suppressPackageStartupMessages
(
library
(
reshape2
))
suppressPackageStartupMessages
(
library
(
ggplot2
))
suppressPackageStartupMessages
(
library
(
clusterProfiler
))
## load Seurat data
sample
<-
readRDS
(
"Scarfo_HEC2023/scRNAseq/WNTd_HE.rds"
)
## Seurat markers
markers
<-
FindAllMarkers
(
sample
,
only.pos
=
T
,
min.pct
=
0.1
,
logfc.threshold
=
0.25
)
## define RUNX1 clusters
runx1.clusters
<-
subset
(
markers
,
markers
$
gene
==
"RUNX1"
)
runx1.clusters
<-
droplevels
(
runx1.clusters
$
cluster
)
## subset RUNX1 clusters into new object
runx1
<-
subset
(
sample
,
idents
=
runx1.clusters
)
## differential markers
degs_clu11_vs_clu0_1_2
<-
FindMarkers
(
object
=
runx1
,
ident.1
=
11
,
ident.2
=
c
(
0
,
1
,
2
),
only.pos
=
FALSE
,
min.pct
=
0
,
test.use
=
"wilcox"
,
logfc.threshold
=
0
,
min.cells.group
=
0
)
## thresholds
adj.pval.thr
<-
0.05
logfc.thr
<-
0
# Remove genes with no FDR
markers
<-
degs_clu11_vs_clu0_1_2
[
!
is.na
(
degs_clu11_vs_clu0_1_2
$
p_val_adj
),
]
gene_univ
<-
row.names
(
markers
)
# get positive and negative gene names
gene.res.top
<-
subset
(
markers
,
p_val_adj
<
adj.pval.thr
&
abs
(
avg_logFC
)
>
logfc.thr
)
gene.res.top.pos
<-
subset
(
gene.res.top
,
avg_logFC
>
0
)
gene.res.top.neg
<-
subset
(
gene.res.top
,
avg_logFC
<
0
)
organism.db
<-
org.Hs.eg.db
contr.enrich_path
<-
enrichPathway
(
gene
=
gene.res.top.neg
,
organism
=
organism.db
,
universe
=
gene_univ
,
pvalueCutoff
=
adj.pval.thr
)
contr.enrich_path.symb
<-
setReadable
(
contr.enrich_path
,
org_db
,
keyType
=
"ENTREZID"
)
toi
<-
c
(
"RHO GTPases activate IQGAPs"
,
"RHO GTPases Activate ROCKs"
)
GO_terms_int
<-
contr.enrich_path.symb
[
contr.enrich_path.symb
$
Description
%in%
toi
,
]
colnames
(
GO_terms_int
)[
2
]
<-
"Terms"
GO_terms_int
$
`Adjusted p-value`
<-
GO_terms_int
$
qvalue
ggplot
(
data
=
GO_terms_int
,
aes
(
x
=
Terms
,
y
=
Count
,
fill
=
`Adjusted p-value`
))
+
geom_bar
(
stat
=
"identity"
,
width
=
0.7
)
+
scale_fill_gradient
(
low
=
"red"
,
high
=
"blue"
)
+
coord_flip
()
+
xlab
(
"Terms\n"
)
+
ggtitle
(
"Downregulated in Cluster 11 vs 0,1,2"
)
+
theme_bw
()
+
theme
(
aspect.ratio
=
0.8
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment