Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
Vavassori_LNP2022
Vavassori_LNP2022_Proteomics
Commits
1f7cc998
Commit
1f7cc998
authored
Oct 21, 2022
by
Stefano Beretta
Browse files
Added paper plot script.
parent
c0ca1c22
Changes
1
Hide whitespace changes
Inline
Side-by-side
paper_plots.R
0 → 100644
View file @
1f7cc998
library
(
DEP
)
library
(
dplyr
)
library
(
openxlsx
)
library
(
ReactomePA
)
library
(
org.Hs.eg.db
)
library
(
ggfortify
)
library
(
stringr
)
library
(
reshape2
)
library
(
RColorBrewer
)
library
(
clusterProfiler
)
wdir
<-
"vavassori_lnp2022_proteomics"
out_dir
<-
paste
(
wdir
,
"paper_plots"
,
sep
=
"/"
)
dir.create
(
out_dir
,
showWarnings
=
F
)
data_dir
<-
paste
(
wdir
,
"paper_data"
,
sep
=
"/"
)
design
<-
read.xlsx
(
xlsxFile
=
paste
(
data_dir
,
"ProteomicsDesign.xlsx"
,
sep
=
"/"
))
str
(
design
)
# Day 4
d4_design
<-
design
[
4
:
18
,]
d4_design
$
label
dd
<-
read.xlsx
(
xlsxFile
=
paste
(
data_dir
,
"proteinGroups_SR-TIGET_200112.xlsx"
,
sep
=
"/"
))
colnames
(
dd
)
for
(
col
in
d4_design
$
label
)
{
dd
[[
col
]]
<-
as.numeric
(
dd
[[
col
]])
dd
[[
col
]]
<-
2
^
dd
[[
col
]]
dd
[[
col
]][
is.na
(
dd
[[
col
]])]
<-
0
}
head
(
dd
)
dd_unique
<-
make_unique
(
dd
,
"Gene.names"
,
"Protein.IDs"
,
delim
=
";"
)
colnames
(
dd_unique
)
str
(
dd_unique
)
samp_id
<-
match
(
d4_design
$
label
,
colnames
(
dd_unique
))
print
(
samp_id
)
data_dd
<-
make_se
(
proteins_unique
=
dd_unique
,
columns
=
samp_id
,
expdesign
=
d4_design
)
# Filter for proteins that are identified in all replicates of at least one condition
data_dd_filt
<-
filter_missval
(
data_dd
,
thr
=
0
)
# Normalize the data
data_dd_norm
<-
normalize_vsn
(
se
=
data_dd_filt
)
# Impute missing data using random draws from a Gaussian distribution centered around a minimal value (for MNAR)
data_dd_imp
<-
impute
(
data_dd_norm
,
fun
=
"MinProb"
,
q
=
0.01
)
n_top_genes
<-
100
mat
<-
as.data.frame
(
data_dd_imp
@
assays
@
data
@
listData
)
mat_var
<-
sort
(
apply
(
mat
,
1
,
var
),
decreasing
=
T
)
mat_filt
<-
mat
[
names
(
mat_var
[
1
:
n_top_genes
]),]
ss
<-
limma
::
removeBatchEffect
(
x
=
as.data.frame
(
data_dd_imp
@
assays
@
data
@
listData
),
batch
=
data_dd_imp
$
replicate
)
ss_var
<-
sort
(
apply
(
ss
,
1
,
var
),
decreasing
=
T
)
ss_filt
<-
ss
[
names
(
ss_var
[
1
:
n_top_genes
]),]
sinfo
<-
data.frame
(
Condition
=
data_dd_imp
$
condition
,
Replicate
=
data_dd_imp
$
replicate
)
rownames
(
sinfo
)
<-
data_dd_imp
$
ID
sinfo
<-
mutate
(
sinfo
,
Cond
=
case_when
(
sinfo
$
Condition
==
"UT_D4"
~
"UT"
,
sinfo
$
Condition
==
"UT_electro_D4"
~
"Mock Electro"
,
sinfo
$
Condition
==
"RNP_D4"
~
"RNP"
,
sinfo
$
Condition
==
"RNP_AAV6_D4"
~
"RNP+AAV6"
,
sinfo
$
Condition
==
"AAV6_D4"
~
"AAV6 only"
))
sinfo
$
Condition
<-
sinfo
$
Cond
pca2
<-
prcomp
(
x
=
t
(
ss_filt
),
center
=
TRUE
,
scale.
=
T
)
pdf
(
file
=
paste
(
out_dir
,
"DEP-PCA-Day4-top100-batchcorr.pdf"
,
sep
=
"/"
),
width
=
9
,
height
=
7
)
autoplot
(
object
=
pca2
,
data
=
sinfo
,
label
=
FALSE
,
colour
=
'Condition'
,
size
=
6
)
+
ggtitle
(
label
=
"Principal Component Analysis (PCA)"
,
subtitle
=
paste
(
"Top"
,
n_top_genes
,
"proteins (with batch correction)"
,
sep
=
" "
))
+
theme_bw
(
base_size
=
20
)
dev.off
()
con
=
c
(
"UT_electro_D4_vs_UT_D4"
,
"RNP_D4_vs_UT_electro_D4"
,
"RNP_D4_vs_UT_D4"
,
"AAV6_D4_vs_RNP_D4"
,
"AAV6_D4_vs_UT_electro_D4"
,
"AAV6_D4_vs_UT_D4"
,
"RNP_AAV6_D4_vs_AAV6_D4"
,
"RNP_AAV6_D4_vs_RNP_D4"
,
"RNP_AAV6_D4_vs_UT_electro_D4"
,
"RNP_AAV6_D4_vs_UT_D4"
)
for
(
gsea_val
in
c
(
"_GSEApval"
))
{
for
(
gsea_db
in
c
(
"gsea_HALLMARK"
))
{
hallmark_ds_order
<-
c
(
"UT_electro_D4_vs_UT_D4"
,
# UT electro vs UT
"RNP_D4_vs_UT_D4"
,
# RNP vs UT
"RNP_AAV6_D4_vs_UT_D4"
,
# RNP+AAV6 vs UT
"UT_electro_D4_vs_AAV6_D4"
,
# old "AAV6_D4-UT_electro_D4", # Utelectro vs AAV6 !!!
"RNP_D4_vs_AAV6_D4"
,
# old "AAV6_D4-RNP_D4", # RNP vs AAV6 !!!
"RNP_AAV6_D4_vs_AAV6_D4"
,
# RNP+AAV6 vs AAV6
"RNP_D4_vs_UT_electro_D4"
,
# RNP vs UT electro
"RNP_AAV6_D4_vs_UT_electro_D4"
,
# RNP+AAV6 vs Utelectro
"RNP_AAV6_D4_vs_RNP_D4"
,
# RNP+AAV6 vs RNP
"AAV6_D4_vs_UT_D4"
# AAV6 vs UT
)
hallmark.full
<-
data.frame
()
for
(
contr
in
con
)
{
print
(
contr
)
ds.t
<-
read.xlsx
(
xlsxFile
=
paste
(
wdir
,
"results_DEP"
,
paste0
(
contr
,
paste0
(
gsea_val
,
".xlsx"
)),
sep
=
"/"
),
sheet
=
gsea_db
)
if
(
nrow
(
ds.t
)
==
0
)
{
next
}
ds.t.filt
<-
ds.t
[,
c
(
"Description"
,
"setSize"
,
"enrichmentScore"
,
"NES"
,
"pvalue"
,
"p.adjust"
,
"qvalues"
),
drop
=
FALSE
]
ds.t.filt
$
Dataset
<-
contr
if
(
nrow
(
hallmark.full
)
==
0
)
{
hallmark.full
<-
ds.t.filt
}
else
{
hallmark.full
<-
rbind
(
hallmark.full
,
ds.t.filt
)
}
}
hallmark.full.filt
<-
hallmark.full
[,
c
(
"Description"
,
"NES"
,
"Dataset"
)]
hallmark.full.filt
$
Description
<-
gsub
(
x
=
hallmark.full.filt
$
Description
,
"HALLMARK_"
,
""
)
for
(
dd
in
hallmark_ds_order
)
{
if
(
!
dd
%in%
unique
(
hallmark.full.filt
$
Dataset
))
{
newdd
<-
paste
(
as.character
(
str_split_fixed
(
dd
,
"_vs_"
,
2
)[,
2
]),
as.character
(
str_split_fixed
(
dd
,
"_vs_"
,
2
)[,
1
]),
sep
=
"_vs_"
)
if
(
newdd
%in%
unique
(
hallmark.full.filt
$
Dataset
))
{
newvals
<-
-1
*
hallmark.full.filt
[
which
(
hallmark.full.filt
$
Dataset
==
newdd
),
"NES"
]
hallmark.full.filt
[
which
(
hallmark.full.filt
$
Dataset
==
newdd
),
"NES"
]
<-
newvals
hallmark.full.filt
[
which
(
hallmark.full.filt
$
Dataset
==
newdd
),
"Dataset"
]
<-
dd
}
}
}
head
(
hallmark.full.filt
)
hallmark.order
<-
hallmark.full.filt
%>%
group_by
(
Description
)
%>%
summarise
(
Pos
=
sum
(
NES
))
hallmark.order.terms
<-
hallmark.order
[
order
(
hallmark.order
$
Pos
,
decreasing
=
FALSE
),
"Description"
,
drop
=
FALSE
]
hallmark.full.filt.tt
<-
melt
(
reshape2
::
dcast
(
hallmark.full.filt
,
Description
~
Dataset
,
value.var
=
"NES"
),
id.vars
=
c
(
"Description"
))
colnames
(
hallmark.full.filt.tt
)
<-
c
(
"Description"
,
"Dataset"
,
"NES"
)
hallmark.full.filt.tt
$
Description
<-
factor
(
hallmark.full.filt.tt
$
Description
,
levels
=
hallmark.order.terms
$
Description
)
hallmark.full.filt.tt
$
Dataset
<-
gsub
(
"UT_D4"
,
"UT"
,
hallmark.full.filt.tt
$
Dataset
)
hallmark.full.filt.tt
$
Dataset
<-
gsub
(
"UT_electro_D4"
,
"Mock Electro"
,
hallmark.full.filt.tt
$
Dataset
)
hallmark.full.filt.tt
$
Dataset
<-
gsub
(
"RNP_D4"
,
"RNP"
,
hallmark.full.filt.tt
$
Dataset
)
hallmark.full.filt.tt
$
Dataset
<-
gsub
(
"RNP_AAV6_D4"
,
"RNP+AAV6"
,
hallmark.full.filt.tt
$
Dataset
)
hallmark.full.filt.tt
$
Dataset
<-
gsub
(
"AAV6_D4"
,
"AAV6 only"
,
hallmark.full.filt.tt
$
Dataset
)
hallmark.full.filt.tt
$
Dataset
<-
gsub
(
"_vs_"
,
" - "
,
hallmark.full.filt.tt
$
Dataset
)
hallmark_ds_order
<-
gsub
(
"UT_D4"
,
"UT"
,
hallmark_ds_order
)
hallmark_ds_order
<-
gsub
(
"UT_electro_D4"
,
"Mock Electro"
,
hallmark_ds_order
)
hallmark_ds_order
<-
gsub
(
"RNP_D4"
,
"RNP"
,
hallmark_ds_order
)
hallmark_ds_order
<-
gsub
(
"RNP_AAV6_D4"
,
"RNP+AAV6"
,
hallmark_ds_order
)
hallmark_ds_order
<-
gsub
(
"AAV6_D4"
,
"AAV6 only"
,
hallmark_ds_order
)
hallmark_ds_order
<-
gsub
(
"_vs_"
,
" - "
,
hallmark_ds_order
)
hallmark.full.filt.tt
$
Dataset
<-
factor
(
hallmark.full.filt.tt
$
Dataset
,
levels
=
hallmark_ds_order
)
p.hallmark
<-
ggplot
(
hallmark.full.filt.tt
,
aes
(
x
=
Dataset
,
y
=
Description
))
+
theme_bw
(
base_size
=
20
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
45
,
hjust
=
1
))
+
geom_tile
(
aes
(
fill
=
NES
),
colour
=
"white"
)
+
scale_fill_gradientn
(
colours
=
colorRampPalette
(
rev
(
brewer.pal
(
11
,
"RdBu"
)))(
100
),
na.value
=
"grey"
)
+
ylab
(
""
)
+
xlab
(
""
)
plot_height
<-
ifelse
(
length
(
levels
(
hallmark.full.filt.tt
$
Description
))
<
50
,
7
,
15
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
"HM_D4_"
,
gsea_db
,
gsea_val
,
".pdf"
),
sep
=
"/"
),
plot
=
p.hallmark
,
width
=
10
,
height
=
plot_height
,
dpi
=
150
)
}
}
# Day 12
d12_design
<-
design
[
19
:
33
,]
d12_design
$
label
dd
<-
read.xlsx
(
xlsxFile
=
paste
(
wdir
,
"proteinGroups_SR-TIGET_200112.xlsx"
,
sep
=
"/"
))
colnames
(
dd
)
for
(
col
in
d12_design
$
label
)
{
dd
[[
col
]]
<-
as.numeric
(
dd
[[
col
]])
dd
[[
col
]]
<-
2
^
dd
[[
col
]]
dd
[[
col
]][
is.na
(
dd
[[
col
]])]
<-
0
}
head
(
dd
)
dd_unique
<-
make_unique
(
dd
,
"Gene.names"
,
"Protein.IDs"
,
delim
=
";"
)
colnames
(
dd_unique
)
str
(
dd_unique
)
samp_id
<-
match
(
d12_design
$
label
,
colnames
(
dd_unique
))
print
(
samp_id
)
data_dd
<-
make_se
(
proteins_unique
=
dd_unique
,
columns
=
samp_id
,
expdesign
=
d12_design
)
# Filter for proteins that are identified in all replicates of at least one condition
data_dd_filt
<-
filter_missval
(
data_dd
,
thr
=
0
)
# Normalize the data
data_dd_norm
<-
normalize_vsn
(
se
=
data_dd_filt
)
# Impute missing data using random draws from a Gaussian distribution centered around a minimal value (for MNAR)
data_dd_imp
<-
impute
(
data_dd_norm
,
fun
=
"MinProb"
,
q
=
0.01
)
mat
<-
as.data.frame
(
data_dd_imp
@
assays
@
data
@
listData
)
mat_var
<-
sort
(
apply
(
mat
,
1
,
var
),
decreasing
=
T
)
mat_filt
<-
mat
[
names
(
mat_var
[
1
:
n_top_genes
]),]
ss
<-
limma
::
removeBatchEffect
(
x
=
as.data.frame
(
data_dd_imp
@
assays
@
data
@
listData
),
batch
=
data_dd_imp
$
replicate
)
ss_var
<-
sort
(
apply
(
ss
,
1
,
var
),
decreasing
=
T
)
ss_filt
<-
ss
[
names
(
ss_var
[
1
:
n_top_genes
]),]
sinfo
<-
data.frame
(
Condition
=
data_dd_imp
$
condition
,
Replicate
=
data_dd_imp
$
replicate
)
rownames
(
sinfo
)
<-
data_dd_imp
$
ID
sinfo
<-
mutate
(
sinfo
,
Cond
=
case_when
(
sinfo
$
Condition
==
"UT_D12"
~
"UT"
,
sinfo
$
Condition
==
"UT_electro_D12"
~
"Mock Electro"
,
sinfo
$
Condition
==
"RNP_D12"
~
"RNP"
,
sinfo
$
Condition
==
"RNP_AAV6_D12"
~
"RNP+AAV6"
,
sinfo
$
Condition
==
"AAV6_D12"
~
"AAV6 only"
))
sinfo
$
Condition
<-
sinfo
$
Cond
pca2
<-
prcomp
(
x
=
t
(
ss_filt
),
center
=
TRUE
,
scale.
=
T
)
pdf
(
file
=
paste
(
out_dir
,
paste0
(
"DEP-PCA-Day12-top"
,
n_top_genes
,
"-batchcorr.pdf"
),
sep
=
"/"
),
width
=
9
,
height
=
7
)
autoplot
(
object
=
pca2
,
data
=
sinfo
,
label
=
FALSE
,
colour
=
'Condition'
,
size
=
6
)
+
ggtitle
(
label
=
"Principal Component Analysis (PCA)"
,
subtitle
=
paste
(
"Top"
,
n_top_genes
,
"proteins (with batch correction)"
,
sep
=
" "
))
+
theme_bw
(
base_size
=
20
)
dev.off
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment