Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
casertanucera_leukemia2023
Commits
8891fc61
Commit
8891fc61
authored
Jul 25, 2023
by
Matteo Barcella
Browse files
Adding scripts for differential methylation analysis and heatmap in manuscript
parent
0c684c53
Changes
4
Hide whitespace changes
Inline
Side-by-side
Methylation/CpG_Heatmaps_Paper.R
0 → 100644
View file @
8891fc61
#CpGs from position 136669244 to 13666449 (the entire fragment of the intron 4 putative transcription starting site),
#CpGs from position 136670325 to 136670386 (5’ of the intron 7) and
#CpGs from position 136670827 to 136670945 (3’ of the intron 7).
# load data:
source
(
"MethylAnalysis.R"
)
MethylAnalysis
(
methcall.folder
=
"input/"
,
outfolder
=
"Adults_vs_Prog"
,
samplesheet
=
"SampleSheets_BisulphiteExp_Sets.xlsx"
,
sheetnum
=
2
,
do.subset
=
T
,
chr.subset
=
"chr9"
,
start.subset
=
136668000
,
end.subset
=
136671000
,
idcol
=
"SampleID"
,
design.var
=
"TestVar"
,
case.var
=
"Case"
,
control.var
=
"Controls"
,
mincoverage
=
10
,
idstoremove
=
c
(
"S150310"
),
assem
=
"hg38"
,
pipeline.meth
=
"bismarkCoverage"
,
proj.id
=
"Adults_vs_Prog_Case_vs_Control"
,
plot.covariates
=
c
(
"Condition"
,
"MRD"
,
"Cytogenetics"
,
"TestVar"
),
lowperc
=
5
,
lowcount
=
10
,
delta.meth
=
10
,
plot.height
=
12
,
plot.width
=
18
,
plot.categorical.vars
=
c
(
"Condition"
,
"Cytogenetics"
),
plot.continuous.vars
=
c
(
"miR-126"
),
cellw
=
14
,
cellh
=
8
,
fontnumsize
=
7
,
fontsize
=
7
)
library
(
openxlsx
)
library
(
pheatmap
)
library
(
RColorBrewer
)
# load data pct methylation
pctmeth_matrix
<-
readRDS
(
file
=
"Adults_vs_Prog/Adults_vs_Prog_Case_vs_Control_CpG_percent_methylation_matrix_pheatmap.rds"
)
# fixing colors
annotations
<-
readRDS
(
"Adults_vs_Prog/Adults_vs_Prog_Case_vs_Control_annotations_matrix_pheatmap.rds"
)
annotations2
<-
annotations
annotations2
$
anncolors
$
Condition
<-
c
(
"#006600"
,
"#929292"
,
"#c9c9c9"
,
"#b30101"
)
# #006600: verdescuro, #b30101: rosso scuro, grigio-scuro: #929292, grigiochiaro: #c9c9c9
names
(
annotations2
$
anncolors
$
Condition
)
<-
names
(
annotations
$
anncolors
$
Condition
)
annotations2
$
anncolors
$
Cytogenetics
<-
c
(
"#0080cc"
,
"#f6a145"
,
"white"
)
# https://www.color-hex.com/color-palette/96440
names
(
annotations2
$
anncolors
$
Cytogenetics
)
<-
names
(
annotations
$
anncolors
$
Cytogenetics
)
png
(
filename
=
"Adults_vs_Prog/Adults_vs_Prog_Case_vs_Control_CpG_percent_methylation_matrix_pheatmap_CUSTOM.png"
,
width
=
15
,
height
=
7
,
units
=
"in"
,
res
=
300
)
pheatmap
(
mat
=
pctmeth_matrix
,
width
=
15
,
height
=
7
,
na_col
=
"black"
,
cluster_cols
=
FALSE
,
cluster_rows
=
TRUE
,
annotation_row
=
annotations2
$
annrow
,
cellwidth
=
14
,
cellheight
=
13
,
display_numbers
=
T
,
fontsize
=
10
,
fontsize_number
=
7
,
number_format
=
"%.0f"
,
border_color
=
FALSE
,
annotation_col
=
annotations2
$
anncol
,
legend
=
F
,
annotation_legend
=
TRUE
,
annotation_names_col
=
T
,
#labels_row = lrow,
#labels_col = lcol,
# gaps_row = c(23),
gaps_col
=
c
(
9
,
43
),
annotation_colors
=
annotations2
$
anncolors
,
color
=
c
(
"#F5F5F5"
,
"#EEEEEE"
,
"#CCCCCC"
,
"#999999"
,
"#666666"
,
"#333333"
,
"#000000"
),
breaks
=
c
(
0
,
10
,
20
,
30
,
50
,
70
,
90
,
100
)
)
dev.off
()
#################
# Disease 12-27 #
#################
source
(
"MethylAnalysis_bulk.R"
)
MethylAnalysis_bulk
(
methcall.folder
=
"input/"
,
outfolder
=
"S120167_Case_vs_Controls"
,
samplesheet
=
"SampleSheets_BisulphiteExp_Sets.xlsx"
,
sheetnum
=
4
,
do.subset
=
T
,
chr.subset
=
"chr9"
,
start.subset
=
136668000
,
end.subset
=
136671000
,
idcol
=
"SampleID"
,
design.var
=
"TestVar"
,
case.var
=
"POP"
,
control.var
=
"DGN"
,
mincoverage
=
10
,
idstoremove
=
NULL
,
assem
=
"hg38"
,
pipeline.meth
=
"bismarkCoverage"
,
proj.id
=
"S120167_POP_vs_DGN"
,
plot.covariates
=
c
(
"Condition"
,
"TestVar"
),
lowperc
=
5
,
lowcount
=
10
,
delta.meth
=
10
,
plot.height
=
12
,
plot.width
=
18
,
plot.categorical.vars
=
c
(
"Condition"
),
plot.continuous.vars
=
c
(
"miR-126"
),
cellw
=
14
,
cellh
=
8
,
fontnumsize
=
7
,
fontsize
=
7
)
# RUNNING GFPLOW VS GFPHIGH to get diff meth results and stats
source
(
"MethylAnalysis_bulk_v2.R"
)
MethylAnalysis_bulk_v2
(
methcall.folder
=
"input/"
,
outfolder
=
"S120167_GFP_L_vs_GFP_H"
,
samplesheet
=
"SampleSheets_BisulphiteExp_Sets.xlsx"
,
sheetnum
=
4
,
do.subset
=
T
,
chr.subset
=
"chr9"
,
start.subset
=
136668000
,
end.subset
=
136671000
,
idcol
=
"SampleID"
,
design.var
=
"Condition"
,
case.var
=
"GFP_L"
,
control.var
=
"GFP_H"
,
mincoverage
=
10
,
idstoremove
=
NULL
,
assem
=
"hg38"
,
pipeline.meth
=
"bismarkCoverage"
,
proj.id
=
"S120167_GFP_L_vs_GFP_H"
,
plot.covariates
=
c
(
"Condition"
),
lowperc
=
5
,
lowcount
=
10
,
delta.meth
=
10
,
plot.height
=
12
,
plot.width
=
18
,
plot.categorical.vars
=
c
(
"Condition"
),
plot.continuous.vars
=
c
(
"miR-126"
),
cellw
=
14
,
cellh
=
8
,
fontnumsize
=
7
,
fontsize
=
7
)
# FIXING THE IMAGE
pctmeth_matrix
<-
readRDS
(
file
=
"S120167_Case_vs_Controls/S120167_POP_vs_DGN_CpG_percent_methylation_matrix_pheatmap.rds"
)
annotations
<-
readRDS
(
"S120167_Case_vs_Controls/S120167_POP_vs_DGN_annotations_matrix_pheatmap.rds"
)
annotations_GFP_L_vs_GFP_H
<-
readRDS
(
"S120167_GFP_L_vs_GFP_H/S120167_GFP_L_vs_GFP_H_annotations_matrix_pheatmap.rds"
)
# replacing diff meth and GFP_L vs GFP_H qvalue_r in POP vs DGN chart
annotations
$
anncol
$
qvalue_r
<-
annotations_GFP_L_vs_GFP_H
$
anncol
$
qvalue_r
annotations
$
anncol
$
meth.diff
<-
annotations_GFP_L_vs_GFP_H
$
anncol
$
meth.diff
annotations2
<-
annotations
annotations2
$
anncolors
$
Condition
<-
c
(
"#656161"
,
"#0b64a1"
,
"#df4041"
)
# #006600: verdescuro, #b30101: rosso scuro, grigio-scuro: #929292, grigiochiaro: #c9c9c9
names
(
annotations2
$
anncolors
$
Condition
)
<-
names
(
annotations
$
anncolors
$
Condition
)
annotations2
$
anncolors
$
qvalue_r
<-
annotations_GFP_L_vs_GFP_H
$
anncolors
$
qvalue_r
annotations2
$
anncolors
$
meth.diff
<-
annotations_GFP_L_vs_GFP_H
$
anncolors
$
meth.diff
annotations2
$
annrow
$
`miR-126`
<-
NULL
# plot heatmap custom
png
(
filename
=
"S120167_Case_vs_Controls/S120167_GFP_Lvs_GFP_H_CpG_percent_methylation_matrix_pheatmap_CUSTOM_CpG_main.png"
,
width
=
15
,
height
=
5
,
units
=
"in"
,
res
=
300
)
pheatmap
(
mat
=
pctmeth_matrix
[,
pctmeth_matrix_ADULTs_CpG
],
#filename = "S10272_Case_vs_Controls/S10272_GFP_Lvs_GFP_H_CpG_percent_methylation_matrix_pheatmap_CUSTOM.png",
width
=
15
,
height
=
7
,
na_col
=
"black"
,
cluster_cols
=
FALSE
,
cluster_rows
=
TRUE
,
annotation_row
=
annotations2
$
annrow
,
cellwidth
=
14
,
cellheight
=
13
,
display_numbers
=
T
,
fontsize
=
10
,
fontsize_number
=
7
,
number_format
=
"%.0f"
,
border_color
=
FALSE
,
annotation_col
=
annotations2
$
anncol
,
legend
=
F
,
annotation_legend
=
TRUE
,
annotation_names_col
=
T
,
#labels_row = lrow,
#labels_col = lcol,
# gaps_row = c(23),
gaps_col
=
c
(
9
,
43
),
annotation_colors
=
annotations2
$
anncolors
,
color
=
c
(
"#F5F5F5"
,
"#EEEEEE"
,
"#CCCCCC"
,
"#999999"
,
"#666666"
,
"#333333"
,
"#000000"
),
breaks
=
c
(
0
,
10
,
20
,
30
,
50
,
70
,
90
,
100
)
)
dev.off
()
Methylation/MethylAnalysis.R
0 → 100644
View file @
8891fc61
MethylAnalysis
<-
function
(
methcall.folder
=
NULL
,
# input folder with .cov files (bismark)
outfolder
=
NULL
,
# output folder (if not exist it will be created)
proj.id
=
"myproject"
,
# project id to prefix in putput files
samplesheet
=
NULL
,
sheetnum
=
1
,
design.var
=
"Disease"
,
# variable to subset samplesheet
case.var
=
"Case"
,
# 1 in treatment vector
control.var
=
"Control"
,
# 0 in treatment vector
idcol
=
"SampleID"
,
mincoverage
=
10
,
lowperc
=
5
,
lowcount
=
10
,
assem
=
"hg38"
,
do.subset
=
T
,
chr.subset
=
"chr9"
,
start.subset
=
136668000
,
end.subset
=
136671000
,
pipeline.meth
=
"bismarkCoverage"
,
plot.covariates
=
c
(
"Condition"
,
"Batch"
,
"MRD"
,
"Torelapse"
,
"Chemorefractory"
,
"Sex"
,
"CytoA"
,
"Tissue"
),
idstoremove
=
NULL
,
delta.meth
=
10
,
plot.categorical.vars
=
c
(
"Condition"
,
"Batch"
,
"MRD"
,
"Torelapse"
,
"Chemorefractory"
,
"Sex"
,
"CytoA"
),
plot.continuous.vars
=
c
(
"miR-126"
,
"egfl7"
),
plot.height
=
9
,
plot.width
=
12
,
cellw
=
15
,
cellh
=
15
,
fontnumsize
=
5
,
fontsize
=
8
){
# load libraries
require
(
methylKit
)
require
(
ggplot2
)
require
(
reshape2
)
require
(
plyr
)
require
(
ggrepel
)
library
(
GenomicRanges
)
library
(
openxlsx
)
library
(
pheatmap
)
# check vars
if
(
is.null
(
methcall.folder
)){
stop
(
"Please set methcall.folder"
)
}
if
(
is.null
(
outfolder
)){
stop
(
"Please set outfolder"
)
}
if
(
is.null
(
samplesheet
)){
stop
(
"Please set samplesheet"
)
}
# Setup variables
infolder
=
paste0
(
methcall.folder
,
"/"
)
dir.create
(
infolder
,
showWarnings
=
F
)
outfolder
=
paste0
(
outfolder
,
"/"
)
dir.create
(
outfolder
,
showWarnings
=
F
)
qcfolder
<-
paste0
(
outfolder
,
"QC/"
)
dir.create
(
qcfolder
,
showWarnings
=
F
)
# reading sample sheet with metadata
ssheet
<-
read.xlsx
(
samplesheet
,
sheet
=
sheetnum
)
if
(
!
is.null
(
idstoremove
)){
ssheet
<-
subset.data.frame
(
x
=
ssheet
,
subset
=
!
ssheet
[[
idcol
]]
%in%
idstoremove
)
}
# defining design vector according to variable
ssheet
<-
subset.data.frame
(
x
=
ssheet
,
subset
=
ssheet
[[
design.var
]]
%in%
c
(
case.var
,
control.var
))
d.vec
<-
ssheet
[[
design.var
]]
d.vector
<-
ifelse
(
d.vec
==
case.var
,
1
,
0
)
# initialzing coverage .cov files list
covs
<-
list
()
sampleids
<-
as.list
(
ssheet
[[
idcol
]])
names
(
sampleids
)
<-
ssheet
[[
idcol
]]
for
(
i
in
ssheet
[[
idcol
]])
{
covs
[[
i
]]
<-
paste0
(
infolder
,
"/"
,
i
,
".bismark.cov"
)
}
saveRDS
(
object
=
covs
,
file
=
"covs_object.rds"
)
# creating object
myobj
<-
methRead
(
location
=
covs
,
sample.id
=
sampleids
,
assembly
=
assem
,
pipeline
=
pipeline.meth
,
treatment
=
d.vector
,
context
=
"CpG"
,
mincov
=
mincoverage
)
saveRDS
(
myobj
,
file
=
"Myobject.rds"
)
names
(
myobj
)
<-
ssheet
[[
idcol
]]
saveRDS
(
myobj
,
file
=
"Myobject_with_names.rds"
)
# subsetting if declared
if
(
isTRUE
(
do.subset
)){
my.win
=
GRanges
(
seqnames
=
chr.subset
,
ranges
=
IRanges
(
start
=
start.subset
,
end
=
end.subset
))
myobj
<-
selectByOverlap
(
myobj
,
my.win
)
}
saveRDS
(
myobj
,
file
=
"Myobject_with_names_aftersubset.rds"
)
# filtering on minimum coverage
myobj
<-
filterByCoverage
(
methylObj
=
myobj
,
lo.count
=
lowcount
,
lo.perc
=
lowperc
)
# Normalization
myobj
<-
normalizeCoverage
(
obj
=
myobj
)
# saveRDS(object = myobj, file = "Initial_object.rds")
# Calculate basic stats and PCs
metricsfolder
<-
paste0
(
qcfolder
,
"Metrics/"
)
dir.create
(
path
=
metricsfolder
,
showWarnings
=
F
)
for
(
id
in
ssheet
[[
idcol
]])
{
png
(
filename
=
paste0
(
metricsfolder
,
proj.id
,
"_CpG_pct_methylation_sample_"
,
id
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getMethylationStats
(
myobj
[[
id
]],
plot
=
TRUE
,
both.strands
=
FALSE
))
dev.off
()
png
(
filename
=
paste0
(
metricsfolder
,
proj.id
,
"_Coverage_stats_sample_"
,
id
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getCoverageStats
(
myobj
[[
id
]],
plot
=
TRUE
,
both.strands
=
FALSE
))
dev.off
()
}
# create meth obj
#meth <- unite(object = myobj, destrand=FALSE)
meth
<-
unite
(
object
=
myobj
,
destrand
=
FALSE
)
# for debugging
saveRDS
(
meth
,
"savemeth.tmp.rds"
)
# Perform correlation
sink
(
paste0
(
qcfolder
,
proj.id
,
"_Correlations.txt"
))
getCorrelation
(
meth
,
plot
=
FALSE
)
sink
()
if
(
length
(
ssheet
[[
idcol
]])
<
15
){
png
(
filename
=
paste0
(
qcfolder
,
proj.id
,
"_Correlations_pearson_pairwise.png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getCorrelation
(
meth
,
plot
=
TRUE
))
dev.off
()
}
png
(
filename
=
paste0
(
qcfolder
,
proj.id
,
"_Clustering.png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
clusterSamples
(
meth
,
dist
=
"euclidean"
,
plot
=
TRUE
,
method
=
"ward.D2"
)
dev.off
()
# Re-plotting PCs (custom chart)
# compute PCs and store in object
pca_compt
<-
PCASamples
(
meth
,
obj.return
=
T
,
screeplot
=
F
)
# extract PCs components
pcafolder
<-
paste0
(
qcfolder
,
"PCA/"
)
dir.create
(
path
=
pcafolder
,
showWarnings
=
F
)
pca_pc1_2
<-
as.data.frame
(
x
=
pca_compt
$
x
[,
1
:
2
])
for
(
myvars
in
plot.covariates
){
pca_pc1_2
$
condition
<-
as.factor
(
ssheet
[[
myvars
]])
png
(
filename
=
paste0
(
pcafolder
,
proj.id
,
"_PCA_"
,
myvars
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
ggplot
(
data
=
pca_pc1_2
,
mapping
=
aes
(
x
=
PC1
,
y
=
PC2
,
col
=
condition
,
label
=
rownames
(
pca_pc1_2
)))
+
geom_point
(
size
=
3
)
+
geom_text_repel
(
size
=
3
)
+
ggtitle
(
label
=
"Principal component analysis"
,
subtitle
=
myvars
)
+
theme
(
plot.title
=
element_text
(
size
=
16
,
face
=
"bold"
,
hjust
=
0.5
))
+
theme
(
plot.subtitle
=
element_text
(
size
=
12
,
hjust
=
0.5
,
face
=
"italic"
,
color
=
"black"
))
+
theme
(
axis.title
=
element_text
(
size
=
12
,
hjust
=
0.5
,
face
=
"bold"
,
color
=
"black"
))
+
theme
(
legend.text
=
element_text
(
size
=
8
,
hjust
=
0.5
))
+
theme
(
legend.title
=
element_blank
())
+
theme
(
axis.text
=
element_text
(
size
=
12
,
hjust
=
0.5
,
color
=
"black"
)))
dev.off
()
}
# retrieve and store % of methylation
perc.meth
<-
percMethylation
(
meth
)
saveRDS
(
perc.meth
,
"pctmethly.rds"
)
base
::
rownames
(
perc.meth
)
<-
paste0
(
meth
$
chr
,
"_"
,
meth
$
start
)
# Perform diff methylation
myDiff
=
calculateDiffMeth
(
meth
)
write.table
(
myDiff
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
difftest
<-
read.table
(
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
header
=
T
)
difftest
$
comparison
<-
proj.id
difftest
$
qvalue_r
<-
as.character
(
cut
(
x
=
difftest
$
qvalue
,
breaks
=
c
(
-1
,
1e-100
,
1e-10
,
1e-02
,
1
),
labels
=
c
(
"***"
,
"**"
,
"*"
,
"ns"
)))
myindex
<-
abs
(
difftest
$
meth.diff
)
<
delta.meth
difftest
$
meth.diff
<-
abs
(
difftest
$
meth.diff
)
difftest
$
qvalue_r
[
myindex
]
<-
"ns"
write.table
(
difftest
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
write.table
(
difftest
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
# Adding color list and annotations
library
(
RColorBrewer
)
color_list
<-
list
()
annrows
<-
NULL
anncols
<-
subset.data.frame
(
difftest
,
select
=
c
(
"qvalue_r"
,
"meth.diff"
))
base
::
rownames
(
anncols
)
<-
difftest
$
start
rows.annot.vars.cat
=
plot.categorical.vars
rows.annot.vars.con
=
plot.continuous.vars
if
(
!
is.null
(
rows.annot.vars.cat
)
|
!
is.null
(
rows.annot.vars.con
)){
rownames
(
ssheet
)
<-
ssheet
$
SampleID
annrows
<-
subset.data.frame
(
x
=
ssheet
,
select
=
c
(
rows.annot.vars.cat
,
rows.annot.vars.con
))
concolors
<-
RColorBrewer
::
brewer.pal
(
n
=
9
,
name
=
"Set1"
)
catcolors
<-
NULL
for
(
varcon
in
1
:
length
(
rows.annot.vars.con
))
{
color_list
[[
rows.annot.vars.con
[
varcon
]]]
<-
colorRampPalette
(
c
(
"lightgrey"
,
concolors
[
varcon
]))(
10
)
}
for
(
varcat
in
1
:
length
(
rows.annot.vars.cat
))
{
ncolor
<-
1
for
(
val
in
unique
(
ssheet
[[
rows.annot.vars.cat
[
varcat
]]])[
order
(
unique
(
ssheet
[[
rows.annot.vars.cat
[
varcat
]]]))]){
if
(
length
(
unique
(
ssheet
[[
rows.annot.vars.cat
[
varcat
]]]))
>
9
){
catcolors
<-
colorRampPalette
(
RColorBrewer
::
brewer.pal
(
n
=
9
,
name
=
"Set3"
))(
length
(
unique
(
ssheet
[[
rows.annot.vars.cat
[
varcat
]]])))
}
else
{
catcolors
<-
RColorBrewer
::
brewer.pal
(
n
=
9
,
name
=
"Set3"
)
}
color_list
[[
rows.annot.vars.cat
[
varcat
]]][[
val
]]
<-
catcolors
[
ncolor
]
ncolor
<-
ncolor
+
1
}
}
}
color_list
[[
"qvalue_r"
]]
=
c
(
"*"
=
"#6497b1"
,
"**"
=
"#03396c"
,
"***"
=
"#011f4b"
,
ns
=
"#c4cacf"
)
color_list
[[
"meth.diff"
]]
=
colorRampPalette
(
brewer.pal
(
n
=
11
,
"Reds"
))(
100
)
color_list
[[
"miR-126"
]]
<-
brewer.pal
(
n
=
9
,
"PuRd"
)
# Heatmap
pctmeth_matrix
<-
t
(
perc.meth
)
base
::
colnames
(
pctmeth_matrix
)
<-
gsub
(
x
=
base
::
colnames
(
pctmeth_matrix
),
pattern
=
"chr[0-9]+_"
,
replacement
=
""
)
pheatmap
(
mat
=
pctmeth_matrix
,
main
=
gsub
(
x
=
proj.id
,
pattern
=
"_"
,
replacement
=
" "
),
filename
=
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation_matrix_pheatmap.pdf"
),
width
=
plot.width
,
height
=
plot.height
,
na_col
=
"black"
,
cluster_cols
=
FALSE
,
cluster_rows
=
TRUE
,
annotation_row
=
annrows
,
cellwidth
=
cellw
,
cellheight
=
cellh
,
display_numbers
=
T
,
fontsize
=
fontsize
,
fontsize_number
=
fontnumsize
,
number_format
=
"%.0f"
,
#border_color = "#CBBEB5",
annotation_col
=
anncols
,
#labels_row = lrow,
#labels_col = lcol,
#gaps_row = gaps.row,
gaps_col
=
c
(
8
),
annotation_colors
=
color_list
,
color
=
c
(
"#F5F5F5"
,
"#EEEEEE"
,
"#CCCCCC"
,
"#999999"
,
"#666666"
,
"#333333"
,
"#000000"
),
breaks
=
c
(
0
,
10
,
20
,
30
,
50
,
70
,
90
,
100
)
)
saveRDS
(
object
=
pctmeth_matrix
,
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation_matrix_pheatmap.rds"
))
saveRDS
(
object
=
list
(
anncol
=
anncols
,
annrow
=
annrows
,
anncolors
=
color_list
),
paste0
(
outfolder
,
proj.id
,
"_annotations_matrix_pheatmap.rds"
))
saveRDS
(
object
=
difftest
,
paste0
(
outfolder
,
proj.id
,
"_CpG_differential_methylation.rds"
))
saveRDS
(
object
=
perc.meth
,
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation.rds"
))
write.table
(
x
=
perc.meth
,
file
=
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation.txt"
))
write.table
(
x
=
difftest
,
file
=
paste0
(
outfolder
,
proj.id
,
"_CpG_differential_methylation.txt"
))
saveRDS
(
myobj
,
file
=
paste0
(
outfolder
,
proj.id
,
"_methylkit.rds"
))
saveRDS
(
myobj
,
file
=
paste0
(
outfolder
,
proj.id
,
"_methylkit_meth.rds"
))
}
\ No newline at end of file
Methylation/MethylAnalysis_bulk.R
0 → 100644
View file @
8891fc61
MethylAnalysis_bulk
<-
function
(
methcall.folder
=
NULL
,
# input folder with .cov files (bismark)
outfolder
=
NULL
,
# output folder (if not exist it will be created)
proj.id
=
"myproject"
,
# project id to prefix in putput files
samplesheet
=
NULL
,
sheetnum
=
1
,
design.var
=
"Disease"
,
# variable to subset samplesheet
case.var
=
"Case"
,
# 1 in treatment vector
control.var
=
"Control"
,
# 0 in treatment vector
idcol
=
"SampleID"
,
mincoverage
=
10
,
lowperc
=
5
,
lowcount
=
10
,
assem
=
"hg38"
,
do.subset
=
T
,
chr.subset
=
"chr9"
,
start.subset
=
136668000
,
end.subset
=
136671000
,
pipeline.meth
=
"bismarkCoverage"
,
plot.covariates
=
c
(
"Condition"
,
"Batch"
,
"MRD"
,
"Torelapse"
,
"Chemorefractory"
,
"Sex"
,
"CytoA"
,
"Tissue"
),
idstoremove
=
NULL
,
delta.meth
=
10
,
plot.categorical.vars
=
c
(
"Condition"
,
"Batch"
,
"MRD"
,
"Torelapse"
,
"Chemorefractory"
,
"Sex"
,
"CytoA"
),
plot.continuous.vars
=
c
(
"miR-126"
,
"egfl7"
),
plot.height
=
9
,
plot.width
=
12
,
cellw
=
15
,
cellh
=
15
,
fontnumsize
=
5
,
fontsize
=
8
){
# load libraries
require
(
methylKit
)
require
(
ggplot2
)
require
(
reshape2
)
require
(
plyr
)
require
(
ggrepel
)
library
(
GenomicRanges
)
library
(
openxlsx
)
library
(
pheatmap
)
# check vars
if
(
is.null
(
methcall.folder
)){
stop
(
"Please set methcall.folder"
)
}
if
(
is.null
(
outfolder
)){
stop
(
"Please set outfolder"
)
}
if
(
is.null
(
samplesheet
)){
stop
(
"Please set samplesheet"
)
}
# Setup variables
infolder
=
paste0
(
methcall.folder
,
"/"
)
dir.create
(
infolder
,
showWarnings
=
F
)
outfolder
=
paste0
(
outfolder
,
"/"
)
dir.create
(
outfolder
,
showWarnings
=
F
)
qcfolder
<-
paste0
(
outfolder
,
"QC/"
)
dir.create
(
qcfolder
,
showWarnings
=
F
)
# reading sample sheet with metadata
ssheet
<-
read.xlsx
(
samplesheet
,
sheet
=
sheetnum
)
if
(
!
is.null
(
idstoremove
)){
ssheet
<-
subset.data.frame
(
x
=
ssheet
,
subset
=
!
ssheet
[[
idcol
]]
%in%
idstoremove
)
}
# defining design vector according to variable
ssheet
<-
subset.data.frame
(
x
=
ssheet
,
subset
=
ssheet
[[
design.var
]]
%in%
c
(
case.var
,
control.var
))
d.vec
<-
ssheet
[[
design.var
]]
d.vector
<-
ifelse
(
d.vec
==
case.var
,
1
,
0
)
# initialzing coverage .cov files list
covs
<-
list
()
sampleids
<-
as.list
(
ssheet
[[
idcol
]])
names
(
sampleids
)
<-
ssheet
[[
idcol
]]
for
(
i
in
ssheet
[[
idcol
]])
{
covs
[[
i
]]
<-
paste0
(
infolder
,
"/"
,
i
,
".bismark.cov"
)
}
saveRDS
(
object
=
covs
,
file
=
"covs_object.rds"
)
# creating object
myobj
<-
methRead
(
location
=
covs
,
sample.id
=
sampleids
,
assembly
=
assem
,
pipeline
=
pipeline.meth
,
treatment
=
d.vector
,
context
=
"CpG"
,
mincov
=
mincoverage
)
saveRDS
(
myobj
,
file
=
"Myobject.rds"
)
names
(
myobj
)
<-
ssheet
[[
idcol
]]
saveRDS
(
myobj
,
file
=
"Myobject_with_names.rds"
)
# subsetting if declared
if
(
isTRUE
(
do.subset
)){
my.win
=
GRanges
(
seqnames
=
chr.subset
,
ranges
=
IRanges
(
start
=
start.subset
,
end
=
end.subset
))
myobj
<-
selectByOverlap
(
myobj
,
my.win
)
}
saveRDS
(
myobj
,
file
=
"Myobject_with_names_aftersubset.rds"
)
# filtering on minimum coverage
myobj
<-
filterByCoverage
(
methylObj
=
myobj
,
lo.count
=
lowcount
,
lo.perc
=
lowperc
)
# Normalization
myobj
<-
normalizeCoverage
(
obj
=
myobj
)
# saveRDS(object = myobj, file = "Initial_object.rds")
# Calculate basic stats and PCs
metricsfolder
<-
paste0
(
qcfolder
,
"Metrics/"
)
dir.create
(
path
=
metricsfolder
,
showWarnings
=
F
)
for
(
id
in
ssheet
[[
idcol
]])
{
png
(
filename
=
paste0
(
metricsfolder
,
proj.id
,
"_CpG_pct_methylation_sample_"
,
id
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getMethylationStats
(
myobj
[[
id
]],
plot
=
TRUE
,
both.strands
=
FALSE
))
dev.off
()
png
(
filename
=
paste0
(
metricsfolder
,
proj.id
,
"_Coverage_stats_sample_"
,
id
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getCoverageStats
(
myobj
[[
id
]],
plot
=
TRUE
,
both.strands
=
FALSE
))
dev.off
()
}
# create meth obj
#meth <- unite(object = myobj, destrand=FALSE)
meth
<-
unite
(
object
=
myobj
,
destrand
=
FALSE
)
# for debugging
saveRDS
(
meth
,
"savemeth.tmp.rds"
)
# Perform correlation
sink
(
paste0
(
qcfolder
,
proj.id
,
"_Correlations.txt"
))
getCorrelation
(
meth
,
plot
=
FALSE
)
sink
()
if
(
length
(
ssheet
[[
idcol
]])
<
15
){
png
(
filename
=
paste0
(
qcfolder
,
proj.id
,
"_Correlations_pearson_pairwise.png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getCorrelation
(
meth
,
plot
=
TRUE
))
dev.off
()
}
png
(
filename
=
paste0
(
qcfolder
,
proj.id
,
"_Clustering.png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
clusterSamples
(
meth
,
dist
=
"euclidean"
,
plot
=
TRUE
,
method
=
"ward.D2"
)
dev.off
()
# Re-plotting PCs (custom chart)
# compute PCs and store in object
pca_compt
<-
PCASamples
(
meth
,
obj.return
=
T
,
screeplot
=
F
)
# extract PCs components
pcafolder
<-
paste0
(
qcfolder
,
"PCA/"
)
dir.create
(
path
=
pcafolder
,
showWarnings
=
F
)
pca_pc1_2
<-
as.data.frame
(
x
=
pca_compt
$
x
[,
1
:
2
])
for
(
myvars
in
plot.covariates
){
pca_pc1_2
$
condition
<-
as.factor
(
ssheet
[[
myvars
]])
png
(
filename
=
paste0
(
pcafolder
,
proj.id
,
"_PCA_"
,
myvars
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
ggplot
(
data
=
pca_pc1_2
,
mapping
=
aes
(
x
=
PC1
,
y
=
PC2
,
col
=
condition
,
label
=
rownames
(
pca_pc1_2
)))
+
geom_point
(
size
=
3
)
+
geom_text_repel
(
size
=
3
)
+
ggtitle
(
label
=
"Principal component analysis"
,
subtitle
=
myvars
)
+
theme
(
plot.title
=
element_text
(
size
=
16
,
face
=
"bold"
,
hjust
=
0.5
))
+
theme
(
plot.subtitle
=
element_text
(
size
=
12
,
hjust
=
0.5
,
face
=
"italic"
,
color
=
"black"
))
+
theme
(
axis.title
=
element_text
(
size
=
12
,
hjust
=
0.5
,
face
=
"bold"
,
color
=
"black"
))
+
theme
(
legend.text
=
element_text
(
size
=
8
,
hjust
=
0.5
))
+
theme
(
legend.title
=
element_blank
())
+
theme
(
axis.text
=
element_text
(
size
=
12
,
hjust
=
0.5
,
color
=
"black"
)))
dev.off
()
}
# retrieve and store % of methylation
perc.meth
<-
percMethylation
(
meth
)
saveRDS
(
perc.meth
,
"pctmethly.rds"
)
base
::
rownames
(
perc.meth
)
<-
paste0
(
meth
$
chr
,
"_"
,
meth
$
start
)
# Perform diff methylation
myDiff
=
calculateDiffMeth
(
meth
)
write.table
(
myDiff
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
difftest
<-
read.table
(
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
header
=
T
)
difftest
$
comparison
<-
proj.id
difftest
$
qvalue_r
<-
as.character
(
cut
(
x
=
difftest
$
qvalue
,
breaks
=
c
(
-1
,
1e-100
,
1e-10
,
1e-02
,
1
),
labels
=
c
(
"***"
,
"**"
,
"*"
,
"ns"
)))
myindex
<-
abs
(
difftest
$
meth.diff
)
<
delta.meth
difftest
$
meth.diff
<-
abs
(
difftest
$
meth.diff
)
difftest
$
qvalue_r
[
myindex
]
<-
"ns"
write.table
(
difftest
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
write.table
(
difftest
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
# Adding color list and annotations
library
(
RColorBrewer
)
color_list
<-
list
()
annrows
<-
NULL
anncols
<-
subset.data.frame
(
difftest
,
select
=
c
(
"qvalue_r"
,
"meth.diff"
))
base
::
rownames
(
anncols
)
<-
difftest
$
start
rows.annot.vars.cat
=
plot.categorical.vars
rows.annot.vars.con
=
plot.continuous.vars
if
(
!
is.null
(
rows.annot.vars.cat
)
|
!
is.null
(
rows.annot.vars.con
)){
rownames
(
ssheet
)
<-
ssheet
$
SampleID
annrows
<-
subset.data.frame
(
x
=
ssheet
,
select
=
c
(
rows.annot.vars.cat
,
rows.annot.vars.con
))
concolors
<-
RColorBrewer
::
brewer.pal
(
n
=
9
,
name
=
"Set1"
)
catcolors
<-
NULL
for
(
varcon
in
1
:
length
(
rows.annot.vars.con
))
{
color_list
[[
rows.annot.vars.con
[
varcon
]]]
<-
colorRampPalette
(
c
(
"lightgrey"
,
concolors
[
varcon
]))(
10
)
}
for
(
varcat
in
1
:
length
(
rows.annot.vars.cat
))
{
ncolor
<-
1
for
(
val
in
unique
(
ssheet
[[
rows.annot.vars.cat
[
varcat
]]])[
order
(
unique
(
ssheet
[[
rows.annot.vars.cat
[
varcat
]]]))]){
if
(
length
(
unique
(
ssheet
[[
rows.annot.vars.cat
[
varcat
]]]))
>
9
){
catcolors
<-
colorRampPalette
(
RColorBrewer
::
brewer.pal
(
n
=
9
,
name
=
"Set3"
))(
length
(
unique
(
ssheet
[[
rows.annot.vars.cat
[
varcat
]]])))
}
else
{
catcolors
<-
RColorBrewer
::
brewer.pal
(
n
=
9
,
name
=
"Set3"
)
}
color_list
[[
rows.annot.vars.cat
[
varcat
]]][[
val
]]
<-
catcolors
[
ncolor
]
ncolor
<-
ncolor
+
1
}
}
}
color_list
[[
"qvalue_r"
]]
=
c
(
"*"
=
"#6497b1"
,
"**"
=
"#03396c"
,
"***"
=
"#011f4b"
,
ns
=
"#c4cacf"
)
color_list
[[
"meth.diff"
]]
=
colorRampPalette
(
brewer.pal
(
n
=
11
,
"Reds"
))(
100
)
color_list
[[
"miR-126"
]]
<-
brewer.pal
(
n
=
9
,
"PuRd"
)
# Heatmap
pctmeth_matrix
<-
t
(
perc.meth
)
base
::
colnames
(
pctmeth_matrix
)
<-
gsub
(
x
=
base
::
colnames
(
pctmeth_matrix
),
pattern
=
"chr[0-9]+_"
,
replacement
=
""
)
pheatmap
(
mat
=
pctmeth_matrix
,
main
=
gsub
(
x
=
proj.id
,
pattern
=
"_"
,
replacement
=
" "
),
filename
=
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation_matrix_pheatmap.pdf"
),
width
=
plot.width
,
height
=
plot.height
,
na_col
=
"black"
,
cluster_cols
=
FALSE
,
cluster_rows
=
TRUE
,
annotation_row
=
annrows
,
cellwidth
=
cellw
,
cellheight
=
cellh
,
display_numbers
=
T
,
fontsize
=
fontsize
,
fontsize_number
=
fontnumsize
,
number_format
=
"%.0f"
,
#border_color = "#CBBEB5",
annotation_col
=
anncols
,
#labels_row = lrow,
#labels_col = lcol,
#gaps_row = gaps.row,
gaps_col
=
c
(
8
),
annotation_colors
=
color_list
,
color
=
c
(
"#F5F5F5"
,
"#EEEEEE"
,
"#CCCCCC"
,
"#999999"
,
"#666666"
,
"#333333"
,
"#000000"
),
breaks
=
c
(
0
,
10
,
20
,
30
,
50
,
70
,
90
,
100
)
)
saveRDS
(
object
=
pctmeth_matrix
,
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation_matrix_pheatmap.rds"
))
saveRDS
(
object
=
list
(
anncol
=
anncols
,
annrow
=
annrows
,
anncolors
=
color_list
),
paste0
(
outfolder
,
proj.id
,
"_annotations_matrix_pheatmap.rds"
))
saveRDS
(
object
=
difftest
,
paste0
(
outfolder
,
proj.id
,
"_CpG_differential_methylation.rds"
))
saveRDS
(
object
=
perc.meth
,
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation.rds"
))
write.table
(
x
=
perc.meth
,
file
=
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation.txt"
))
write.table
(
x
=
difftest
,
file
=
paste0
(
outfolder
,
proj.id
,
"_CpG_differential_methylation.txt"
))
saveRDS
(
myobj
,
file
=
paste0
(
outfolder
,
proj.id
,
"_methylkit.rds"
))
saveRDS
(
myobj
,
file
=
paste0
(
outfolder
,
proj.id
,
"_methylkit_meth.rds"
))
}
\ No newline at end of file
Methylation/MethylAnalysis_bulk_v2.R
0 → 100644
View file @
8891fc61
MethylAnalysis_bulk_v2
<-
function
(
methcall.folder
=
NULL
,
# input folder with .cov files (bismark)
outfolder
=
NULL
,
# output folder (if not exist it will be created)
proj.id
=
"myproject"
,
# project id to prefix in putput files
samplesheet
=
NULL
,
sheetnum
=
1
,
design.var
=
"Disease"
,
# variable to subset samplesheet
case.var
=
"Case"
,
# 1 in treatment vector
control.var
=
"Control"
,
# 0 in treatment vector
idcol
=
"SampleID"
,
mincoverage
=
10
,
lowperc
=
5
,
lowcount
=
10
,
assem
=
"hg38"
,
do.subset
=
T
,
chr.subset
=
"chr9"
,
start.subset
=
136668000
,
end.subset
=
136671000
,
pipeline.meth
=
"bismarkCoverage"
,
plot.covariates
=
c
(
"Condition"
,
"Batch"
,
"MRD"
,
"Torelapse"
,
"Chemorefractory"
,
"Sex"
,
"CytoA"
,
"Tissue"
),
idstoremove
=
NULL
,
delta.meth
=
10
,
plot.categorical.vars
=
c
(
"Condition"
,
"Batch"
,
"MRD"
,
"Torelapse"
,
"Chemorefractory"
,
"Sex"
,
"CytoA"
),
plot.continuous.vars
=
c
(
"miR-126"
,
"egfl7"
),
plot.height
=
9
,
plot.width
=
12
,
cellw
=
15
,
cellh
=
15
,
fontnumsize
=
5
,
fontsize
=
8
){
# load libraries
require
(
methylKit
)
require
(
ggplot2
)
require
(
reshape2
)
require
(
plyr
)
require
(
ggrepel
)
library
(
GenomicRanges
)
library
(
openxlsx
)
library
(
pheatmap
)
# check vars
if
(
is.null
(
methcall.folder
)){
stop
(
"Please set methcall.folder"
)
}
if
(
is.null
(
outfolder
)){
stop
(
"Please set outfolder"
)
}
if
(
is.null
(
samplesheet
)){
stop
(
"Please set samplesheet"
)
}
# Setup variables
infolder
=
paste0
(
methcall.folder
,
"/"
)
dir.create
(
infolder
,
showWarnings
=
F
)
outfolder
=
paste0
(
outfolder
,
"/"
)
dir.create
(
outfolder
,
showWarnings
=
F
)
qcfolder
<-
paste0
(
outfolder
,
"QC/"
)
dir.create
(
qcfolder
,
showWarnings
=
F
)
# reading sample sheet with metadata
ssheet
<-
read.xlsx
(
samplesheet
,
sheet
=
sheetnum
)
if
(
!
is.null
(
idstoremove
)){
ssheet
<-
subset.data.frame
(
x
=
ssheet
,
subset
=
!
ssheet
[[
idcol
]]
%in%
idstoremove
)
}
# defining design vector according to variable
ssheet
<-
subset.data.frame
(
x
=
ssheet
,
subset
=
ssheet
[[
design.var
]]
%in%
c
(
case.var
,
control.var
))
d.vec
<-
ssheet
[[
design.var
]]
d.vector
<-
ifelse
(
d.vec
==
case.var
,
1
,
0
)
# initialzing coverage .cov files list
covs
<-
list
()
sampleids
<-
as.list
(
ssheet
[[
idcol
]])
names
(
sampleids
)
<-
ssheet
[[
idcol
]]
for
(
i
in
ssheet
[[
idcol
]])
{
covs
[[
i
]]
<-
paste0
(
infolder
,
"/"
,
i
,
".bismark.cov"
)
}
saveRDS
(
object
=
covs
,
file
=
"covs_object.rds"
)
# creating object
myobj
<-
methRead
(
location
=
covs
,
sample.id
=
sampleids
,
assembly
=
assem
,
pipeline
=
pipeline.meth
,
treatment
=
d.vector
,
context
=
"CpG"
,
mincov
=
mincoverage
)
saveRDS
(
myobj
,
file
=
"Myobject.rds"
)
names
(
myobj
)
<-
ssheet
[[
idcol
]]
saveRDS
(
myobj
,
file
=
"Myobject_with_names.rds"
)
# subsetting if declared
if
(
isTRUE
(
do.subset
)){
my.win
=
GRanges
(
seqnames
=
chr.subset
,
ranges
=
IRanges
(
start
=
start.subset
,
end
=
end.subset
))
myobj
<-
selectByOverlap
(
myobj
,
my.win
)
}
saveRDS
(
myobj
,
file
=
"Myobject_with_names_aftersubset.rds"
)
# filtering on minimum coverage
myobj
<-
filterByCoverage
(
methylObj
=
myobj
,
lo.count
=
lowcount
,
lo.perc
=
lowperc
)
# Normalization
myobj
<-
normalizeCoverage
(
obj
=
myobj
)
# saveRDS(object = myobj, file = "Initial_object.rds")
# Calculate basic stats and PCs
metricsfolder
<-
paste0
(
qcfolder
,
"Metrics/"
)
dir.create
(
path
=
metricsfolder
,
showWarnings
=
F
)
for
(
id
in
ssheet
[[
idcol
]])
{
png
(
filename
=
paste0
(
metricsfolder
,
proj.id
,
"_CpG_pct_methylation_sample_"
,
id
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getMethylationStats
(
myobj
[[
id
]],
plot
=
TRUE
,
both.strands
=
FALSE
))
dev.off
()
png
(
filename
=
paste0
(
metricsfolder
,
proj.id
,
"_Coverage_stats_sample_"
,
id
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getCoverageStats
(
myobj
[[
id
]],
plot
=
TRUE
,
both.strands
=
FALSE
))
dev.off
()
}
# create meth obj
#meth <- unite(object = myobj, destrand=FALSE)
meth
<-
unite
(
object
=
myobj
,
destrand
=
FALSE
)
# for debugging
saveRDS
(
meth
,
"savemeth.tmp.rds"
)
# Perform correlation
sink
(
paste0
(
qcfolder
,
proj.id
,
"_Correlations.txt"
))
getCorrelation
(
meth
,
plot
=
FALSE
)
sink
()
if
(
length
(
ssheet
[[
idcol
]])
<
15
){
png
(
filename
=
paste0
(
qcfolder
,
proj.id
,
"_Correlations_pearson_pairwise.png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getCorrelation
(
meth
,
plot
=
TRUE
))
dev.off
()
}
png
(
filename
=
paste0
(
qcfolder
,
proj.id
,
"_Clustering.png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
clusterSamples
(
meth
,
dist
=
"euclidean"
,
plot
=
TRUE
,
method
=
"ward.D2"
)
dev.off
()
# Re-plotting PCs (custom chart)
# compute PCs and store in object
pca_compt
<-
PCASamples
(
meth
,
obj.return
=
T
,
screeplot
=
F
)
# extract PCs components
pcafolder
<-
paste0
(
qcfolder
,
"PCA/"
)
dir.create
(
path
=
pcafolder
,
showWarnings
=
F
)
pca_pc1_2
<-
as.data.frame
(
x
=
pca_compt
$
x
[,
1
:
2
])
for
(
myvars
in
plot.covariates
){
pca_pc1_2
$
condition
<-
as.factor
(
ssheet
[[
myvars
]])
png
(
filename
=
paste0
(
pcafolder
,
proj.id
,
"_PCA_"
,
myvars
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
ggplot
(
data
=
pca_pc1_2
,
mapping
=
aes
(
x
=
PC1
,
y
=
PC2
,
col
=
condition
,
label
=
rownames
(
pca_pc1_2
)))
+
geom_point
(
size
=
3
)
+
geom_text_repel
(
size
=
3
)
+
ggtitle
(
label
=
"Principal component analysis"
,
subtitle
=
myvars
)
+
theme
(
plot.title
=
element_text
(
size
=
16
,
face
=
"bold"
,
hjust
=
0.5
))
+
theme
(
plot.subtitle
=
element_text
(
size
=
12
,
hjust
=
0.5
,
face
=
"italic"
,
color
=
"black"
))
+
theme
(
axis.title
=
element_text
(
size
=
12
,
hjust
=
0.5
,
face
=
"bold"
,
color
=
"black"
))
+
theme
(
legend.text
=
element_text
(
size
=
8
,
hjust
=
0.5
))
+
theme
(
legend.title
=
element_blank
())
+
theme
(
axis.text
=
element_text
(
size
=
12
,
hjust
=
0.5
,
color
=
"black"
)))
dev.off
()
}
# retrieve and store % of methylation
perc.meth
<-
percMethylation
(
meth
)
saveRDS
(
perc.meth
,
"pctmethly.rds"
)
base
::
rownames
(
perc.meth
)
<-
paste0
(
meth
$
chr
,
"_"
,
meth
$
start
)
# Perform diff methylation
myDiff
=
calculateDiffMeth
(
meth
)
write.table
(
myDiff
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
difftest
<-
read.table
(
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
header
=
T
)
difftest
$
comparison
<-
proj.id
difftest
$
qvalue_r
<-
as.character
(
cut
(
x
=
difftest
$
qvalue
,
breaks
=
c
(
-1
,
1e-100
,
1e-10
,
1e-02
,
1
),
labels
=
c
(
"***"
,
"**"
,
"*"
,
"ns"
)))
myindex
<-
abs
(
difftest
$
meth.diff
)
<
delta.meth
difftest
$
meth.diff
<-
abs
(
difftest
$
meth.diff
)
difftest
$
qvalue_r
[
myindex
]
<-
"ns"
write.table
(
difftest
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
# Adding color list and annotations
library
(
RColorBrewer
)
color_list
<-
list
()
annrows
<-
NULL
anncols
<-
subset.data.frame
(
difftest
,
select
=
c
(
"qvalue_r"
,
"meth.diff"
))
base
::
rownames
(
anncols
)
<-
difftest
$
start
rows.annot.vars.cat
=
plot.categorical.vars
rows.annot.vars.con
=
plot.continuous.vars
# if(!is.null(rows.annot.vars.cat) | !is.null(rows.annot.vars.con)){
# rownames(ssheet) <- ssheet$SampleID
# annrows <- subset.data.frame(x = ssheet, select = c(rows.annot.vars.cat, rows.annot.vars.con))
# concolors <- RColorBrewer::brewer.pal(n = 9, name = "Set1")
# catcolors <- NULL
# for (varcon in 1:length(rows.annot.vars.con)) {
# color_list[[rows.annot.vars.con[varcon]]] <- colorRampPalette(c("lightgrey", concolors[varcon]))(10)
# }
# for (varcat in 1:length(rows.annot.vars.cat)) {
# ncolor <- 1
# for (val in unique(ssheet[[rows.annot.vars.cat[varcat]]])[order(unique(ssheet[[rows.annot.vars.cat[varcat]]]))]){
# if(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])) > 9){
# catcolors <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name = "Set3"))(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])))
# }
# else{
# catcolors <- RColorBrewer::brewer.pal(n = 9, name = "Set3")
# }
# color_list[[rows.annot.vars.cat[varcat]]][[val]] <- catcolors[ncolor]
# ncolor <- ncolor + 1
# }
# }
# }
color_list
[[
"qvalue_r"
]]
=
c
(
"*"
=
"#6497b1"
,
"**"
=
"#03396c"
,
"***"
=
"#011f4b"
,
ns
=
"#c4cacf"
)
color_list
[[
"meth.diff"
]]
=
colorRampPalette
(
brewer.pal
(
n
=
11
,
"Reds"
))(
100
)
color_list
[[
"miR-126"
]]
<-
brewer.pal
(
n
=
9
,
"PuRd"
)
# Heatmap
pctmeth_matrix
<-
t
(
perc.meth
)
base
::
colnames
(
pctmeth_matrix
)
<-
gsub
(
x
=
base
::
colnames
(
pctmeth_matrix
),
pattern
=
"chr[0-9]+_"
,
replacement
=
""
)
pheatmap
(
mat
=
pctmeth_matrix
,
main
=
gsub
(
x
=
proj.id
,
pattern
=
"_"
,
replacement
=
" "
),
filename
=
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation_matrix_pheatmap.pdf"
),
width
=
plot.width
,
height
=
plot.height
,
na_col
=
"black"
,
cluster_cols
=
FALSE
,
cluster_rows
=
TRUE
,
annotation_row
=
annrows
,
cellwidth
=
cellw
,
cellheight
=
cellh
,
display_numbers
=
T
,
fontsize
=
fontsize
,
fontsize_number
=
fontnumsize
,
number_format
=
"%.0f"
,
#border_color = "#CBBEB5",
annotation_col
=
anncols
,
#labels_row = lrow,
#labels_col = lcol,
#gaps_row = gaps.row,
gaps_col
=
c
(
8
),
annotation_colors
=
color_list
,
color
=
c
(
"#F5F5F5"
,
"#EEEEEE"
,
"#CCCCCC"
,
"#999999"
,
"#666666"
,
"#333333"
,
"#000000"
),
breaks
=
c
(
0
,
10
,
20
,
30
,
50
,
70
,
90
,
100
)
)
saveRDS
(
object
=
pctmeth_matrix
,
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation_matrix_pheatmap.rds"
))
saveRDS
(
object
=
list
(
anncol
=
anncols
,
annrow
=
annrows
,
anncolors
=
color_list
),
paste0
(
outfolder
,
proj.id
,
"_annotations_matrix_pheatmap.rds"
))
saveRDS
(
object
=
difftest
,
paste0
(
outfolder
,
proj.id
,
"_CpG_differential_methylation.rds"
))
saveRDS
(
object
=
perc.meth
,
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation.rds"
))
write.table
(
x
=
perc.meth
,
file
=
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation.txt"
))
write.table
(
x
=
difftest
,
file
=
paste0
(
outfolder
,
proj.id
,
"_CpG_differential_methylation.txt"
))
saveRDS
(
myobj
,
file
=
paste0
(
outfolder
,
proj.id
,
"_methylkit.rds"
))
saveRDS
(
myobj
,
file
=
paste0
(
outfolder
,
proj.id
,
"_methylkit_meth.rds"
))
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment