Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
casertanucera_leukemia2023
Commits
8891fc61
Commit
8891fc61
authored
1 year ago
by
Matteo Barcella
Browse files
Options
Download
Email Patches
Plain Diff
Adding scripts for differential methylation analysis and heatmap in manuscript
parent
0c684c53
master
No related merge requests found
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
Methylation/CpG_Heatmaps_Paper.R
+199
-0
Methylation/CpG_Heatmaps_Paper.R
Methylation/MethylAnalysis.R
+305
-0
Methylation/MethylAnalysis.R
Methylation/MethylAnalysis_bulk.R
+305
-0
Methylation/MethylAnalysis_bulk.R
Methylation/MethylAnalysis_bulk_v2.R
+304
-0
Methylation/MethylAnalysis_bulk_v2.R
with
1113 additions
and
0 deletions
+1113
-0
Methylation/CpG_Heatmaps_Paper.R
0 → 100644
View file @
8891fc61
#CpGs from position 136669244 to 13666449 (the entire fragment of the intron 4 putative transcription starting site),
#CpGs from position 136670325 to 136670386 (5’ of the intron 7) and
#CpGs from position 136670827 to 136670945 (3’ of the intron 7).
# load data:
source
(
"MethylAnalysis.R"
)
MethylAnalysis
(
methcall.folder
=
"input/"
,
outfolder
=
"Adults_vs_Prog"
,
samplesheet
=
"SampleSheets_BisulphiteExp_Sets.xlsx"
,
sheetnum
=
2
,
do.subset
=
T
,
chr.subset
=
"chr9"
,
start.subset
=
136668000
,
end.subset
=
136671000
,
idcol
=
"SampleID"
,
design.var
=
"TestVar"
,
case.var
=
"Case"
,
control.var
=
"Controls"
,
mincoverage
=
10
,
idstoremove
=
c
(
"S150310"
),
assem
=
"hg38"
,
pipeline.meth
=
"bismarkCoverage"
,
proj.id
=
"Adults_vs_Prog_Case_vs_Control"
,
plot.covariates
=
c
(
"Condition"
,
"MRD"
,
"Cytogenetics"
,
"TestVar"
),
lowperc
=
5
,
lowcount
=
10
,
delta.meth
=
10
,
plot.height
=
12
,
plot.width
=
18
,
plot.categorical.vars
=
c
(
"Condition"
,
"Cytogenetics"
),
plot.continuous.vars
=
c
(
"miR-126"
),
cellw
=
14
,
cellh
=
8
,
fontnumsize
=
7
,
fontsize
=
7
)
library
(
openxlsx
)
library
(
pheatmap
)
library
(
RColorBrewer
)
# load data pct methylation
pctmeth_matrix
<-
readRDS
(
file
=
"Adults_vs_Prog/Adults_vs_Prog_Case_vs_Control_CpG_percent_methylation_matrix_pheatmap.rds"
)
# fixing colors
annotations
<-
readRDS
(
"Adults_vs_Prog/Adults_vs_Prog_Case_vs_Control_annotations_matrix_pheatmap.rds"
)
annotations2
<-
annotations
annotations2
$
anncolors
$
Condition
<-
c
(
"#006600"
,
"#929292"
,
"#c9c9c9"
,
"#b30101"
)
# #006600: verdescuro, #b30101: rosso scuro, grigio-scuro: #929292, grigiochiaro: #c9c9c9
names
(
annotations2
$
anncolors
$
Condition
)
<-
names
(
annotations
$
anncolors
$
Condition
)
annotations2
$
anncolors
$
Cytogenetics
<-
c
(
"#0080cc"
,
"#f6a145"
,
"white"
)
# https://www.color-hex.com/color-palette/96440
names
(
annotations2
$
anncolors
$
Cytogenetics
)
<-
names
(
annotations
$
anncolors
$
Cytogenetics
)
png
(
filename
=
"Adults_vs_Prog/Adults_vs_Prog_Case_vs_Control_CpG_percent_methylation_matrix_pheatmap_CUSTOM.png"
,
width
=
15
,
height
=
7
,
units
=
"in"
,
res
=
300
)
pheatmap
(
mat
=
pctmeth_matrix
,
width
=
15
,
height
=
7
,
na_col
=
"black"
,
cluster_cols
=
FALSE
,
cluster_rows
=
TRUE
,
annotation_row
=
annotations2
$
annrow
,
cellwidth
=
14
,
cellheight
=
13
,
display_numbers
=
T
,
fontsize
=
10
,
fontsize_number
=
7
,
number_format
=
"%.0f"
,
border_color
=
FALSE
,
annotation_col
=
annotations2
$
anncol
,
legend
=
F
,
annotation_legend
=
TRUE
,
annotation_names_col
=
T
,
#labels_row = lrow,
#labels_col = lcol,
# gaps_row = c(23),
gaps_col
=
c
(
9
,
43
),
annotation_colors
=
annotations2
$
anncolors
,
color
=
c
(
"#F5F5F5"
,
"#EEEEEE"
,
"#CCCCCC"
,
"#999999"
,
"#666666"
,
"#333333"
,
"#000000"
),
breaks
=
c
(
0
,
10
,
20
,
30
,
50
,
70
,
90
,
100
)
)
dev.off
()
#################
# Disease 12-27 #
#################
source
(
"MethylAnalysis_bulk.R"
)
MethylAnalysis_bulk
(
methcall.folder
=
"input/"
,
outfolder
=
"S120167_Case_vs_Controls"
,
samplesheet
=
"SampleSheets_BisulphiteExp_Sets.xlsx"
,
sheetnum
=
4
,
do.subset
=
T
,
chr.subset
=
"chr9"
,
start.subset
=
136668000
,
end.subset
=
136671000
,
idcol
=
"SampleID"
,
design.var
=
"TestVar"
,
case.var
=
"POP"
,
control.var
=
"DGN"
,
mincoverage
=
10
,
idstoremove
=
NULL
,
assem
=
"hg38"
,
pipeline.meth
=
"bismarkCoverage"
,
proj.id
=
"S120167_POP_vs_DGN"
,
plot.covariates
=
c
(
"Condition"
,
"TestVar"
),
lowperc
=
5
,
lowcount
=
10
,
delta.meth
=
10
,
plot.height
=
12
,
plot.width
=
18
,
plot.categorical.vars
=
c
(
"Condition"
),
plot.continuous.vars
=
c
(
"miR-126"
),
cellw
=
14
,
cellh
=
8
,
fontnumsize
=
7
,
fontsize
=
7
)
# RUNNING GFPLOW VS GFPHIGH to get diff meth results and stats
source
(
"MethylAnalysis_bulk_v2.R"
)
MethylAnalysis_bulk_v2
(
methcall.folder
=
"input/"
,
outfolder
=
"S120167_GFP_L_vs_GFP_H"
,
samplesheet
=
"SampleSheets_BisulphiteExp_Sets.xlsx"
,
sheetnum
=
4
,
do.subset
=
T
,
chr.subset
=
"chr9"
,
start.subset
=
136668000
,
end.subset
=
136671000
,
idcol
=
"SampleID"
,
design.var
=
"Condition"
,
case.var
=
"GFP_L"
,
control.var
=
"GFP_H"
,
mincoverage
=
10
,
idstoremove
=
NULL
,
assem
=
"hg38"
,
pipeline.meth
=
"bismarkCoverage"
,
proj.id
=
"S120167_GFP_L_vs_GFP_H"
,
plot.covariates
=
c
(
"Condition"
),
lowperc
=
5
,
lowcount
=
10
,
delta.meth
=
10
,
plot.height
=
12
,
plot.width
=
18
,
plot.categorical.vars
=
c
(
"Condition"
),
plot.continuous.vars
=
c
(
"miR-126"
),
cellw
=
14
,
cellh
=
8
,
fontnumsize
=
7
,
fontsize
=
7
)
# FIXING THE IMAGE
pctmeth_matrix
<-
readRDS
(
file
=
"S120167_Case_vs_Controls/S120167_POP_vs_DGN_CpG_percent_methylation_matrix_pheatmap.rds"
)
annotations
<-
readRDS
(
"S120167_Case_vs_Controls/S120167_POP_vs_DGN_annotations_matrix_pheatmap.rds"
)
annotations_GFP_L_vs_GFP_H
<-
readRDS
(
"S120167_GFP_L_vs_GFP_H/S120167_GFP_L_vs_GFP_H_annotations_matrix_pheatmap.rds"
)
# replacing diff meth and GFP_L vs GFP_H qvalue_r in POP vs DGN chart
annotations
$
anncol
$
qvalue_r
<-
annotations_GFP_L_vs_GFP_H
$
anncol
$
qvalue_r
annotations
$
anncol
$
meth.diff
<-
annotations_GFP_L_vs_GFP_H
$
anncol
$
meth.diff
annotations2
<-
annotations
annotations2
$
anncolors
$
Condition
<-
c
(
"#656161"
,
"#0b64a1"
,
"#df4041"
)
# #006600: verdescuro, #b30101: rosso scuro, grigio-scuro: #929292, grigiochiaro: #c9c9c9
names
(
annotations2
$
anncolors
$
Condition
)
<-
names
(
annotations
$
anncolors
$
Condition
)
annotations2
$
anncolors
$
qvalue_r
<-
annotations_GFP_L_vs_GFP_H
$
anncolors
$
qvalue_r
annotations2
$
anncolors
$
meth.diff
<-
annotations_GFP_L_vs_GFP_H
$
anncolors
$
meth.diff
annotations2
$
annrow
$
`miR-126`
<-
NULL
# plot heatmap custom
png
(
filename
=
"S120167_Case_vs_Controls/S120167_GFP_Lvs_GFP_H_CpG_percent_methylation_matrix_pheatmap_CUSTOM_CpG_main.png"
,
width
=
15
,
height
=
5
,
units
=
"in"
,
res
=
300
)
pheatmap
(
mat
=
pctmeth_matrix
[,
pctmeth_matrix_ADULTs_CpG
],
#filename = "S10272_Case_vs_Controls/S10272_GFP_Lvs_GFP_H_CpG_percent_methylation_matrix_pheatmap_CUSTOM.png",
width
=
15
,
height
=
7
,
na_col
=
"black"
,
cluster_cols
=
FALSE
,
cluster_rows
=
TRUE
,
annotation_row
=
annotations2
$
annrow
,
cellwidth
=
14
,
cellheight
=
13
,
display_numbers
=
T
,
fontsize
=
10
,
fontsize_number
=
7
,
number_format
=
"%.0f"
,
border_color
=
FALSE
,
annotation_col
=
annotations2
$
anncol
,
legend
=
F
,
annotation_legend
=
TRUE
,
annotation_names_col
=
T
,
#labels_row = lrow,
#labels_col = lcol,
# gaps_row = c(23),
gaps_col
=
c
(
9
,
43
),
annotation_colors
=
annotations2
$
anncolors
,
color
=
c
(
"#F5F5F5"
,
"#EEEEEE"
,
"#CCCCCC"
,
"#999999"
,
"#666666"
,
"#333333"
,
"#000000"
),
breaks
=
c
(
0
,
10
,
20
,
30
,
50
,
70
,
90
,
100
)
)
dev.off
()
This diff is collapsed.
Click to expand it.
Methylation/MethylAnalysis.R
0 → 100644
View file @
8891fc61
MethylAnalysis
<-
function
(
methcall.folder
=
NULL
,
# input folder with .cov files (bismark)
outfolder
=
NULL
,
# output folder (if not exist it will be created)
proj.id
=
"myproject"
,
# project id to prefix in putput files
samplesheet
=
NULL
,
sheetnum
=
1
,
design.var
=
"Disease"
,
# variable to subset samplesheet
case.var
=
"Case"
,
# 1 in treatment vector
control.var
=
"Control"
,
# 0 in treatment vector
idcol
=
"SampleID"
,
mincoverage
=
10
,
lowperc
=
5
,
lowcount
=
10
,
assem
=
"hg38"
,
do.subset
=
T
,
chr.subset
=
"chr9"
,
start.subset
=
136668000
,
end.subset
=
136671000
,
pipeline.meth
=
"bismarkCoverage"
,
plot.covariates
=
c
(
"Condition"
,
"Batch"
,
"MRD"
,
"Torelapse"
,
"Chemorefractory"
,
"Sex"
,
"CytoA"
,
"Tissue"
),
idstoremove
=
NULL
,
delta.meth
=
10
,
plot.categorical.vars
=
c
(
"Condition"
,
"Batch"
,
"MRD"
,
"Torelapse"
,
"Chemorefractory"
,
"Sex"
,
"CytoA"
),
plot.continuous.vars
=
c
(
"miR-126"
,
"egfl7"
),
plot.height
=
9
,
plot.width
=
12
,
cellw
=
15
,
cellh
=
15
,
fontnumsize
=
5
,
fontsize
=
8
){
# load libraries
require
(
methylKit
)
require
(
ggplot2
)
require
(
reshape2
)
require
(
plyr
)
require
(
ggrepel
)
library
(
GenomicRanges
)
library
(
openxlsx
)
library
(
pheatmap
)
# check vars
if
(
is.null
(
methcall.folder
)){
stop
(
"Please set methcall.folder"
)
}
if
(
is.null
(
outfolder
)){
stop
(
"Please set outfolder"
)
}
if
(
is.null
(
samplesheet
)){
stop
(
"Please set samplesheet"
)
}
# Setup variables
infolder
=
paste0
(
methcall.folder
,
"/"
)
dir.create
(
infolder
,
showWarnings
=
F
)
outfolder
=
paste0
(
outfolder
,
"/"
)
dir.create
(
outfolder
,
showWarnings
=
F
)
qcfolder
<-
paste0
(
outfolder
,
"QC/"
)
dir.create
(
qcfolder
,
showWarnings
=
F
)
# reading sample sheet with metadata
ssheet
<-
read.xlsx
(
samplesheet
,
sheet
=
sheetnum
)
if
(
!
is.null
(
idstoremove
)){
ssheet
<-
subset.data.frame
(
x
=
ssheet
,
subset
=
!
ssheet
[[
idcol
]]
%in%
idstoremove
)
}
# defining design vector according to variable
ssheet
<-
subset.data.frame
(
x
=
ssheet
,
subset
=
ssheet
[[
design.var
]]
%in%
c
(
case.var
,
control.var
))
d.vec
<-
ssheet
[[
design.var
]]
d.vector
<-
ifelse
(
d.vec
==
case.var
,
1
,
0
)
# initialzing coverage .cov files list
covs
<-
list
()
sampleids
<-
as.list
(
ssheet
[[
idcol
]])
names
(
sampleids
)
<-
ssheet
[[
idcol
]]
for
(
i
in
ssheet
[[
idcol
]])
{
covs
[[
i
]]
<-
paste0
(
infolder
,
"/"
,
i
,
".bismark.cov"
)
}
saveRDS
(
object
=
covs
,
file
=
"covs_object.rds"
)
# creating object
myobj
<-
methRead
(
location
=
covs
,
sample.id
=
sampleids
,
assembly
=
assem
,
pipeline
=
pipeline.meth
,
treatment
=
d.vector
,
context
=
"CpG"
,
mincov
=
mincoverage
)
saveRDS
(
myobj
,
file
=
"Myobject.rds"
)
names
(
myobj
)
<-
ssheet
[[
idcol
]]
saveRDS
(
myobj
,
file
=
"Myobject_with_names.rds"
)
# subsetting if declared
if
(
isTRUE
(
do.subset
)){
my.win
=
GRanges
(
seqnames
=
chr.subset
,
ranges
=
IRanges
(
start
=
start.subset
,
end
=
end.subset
))
myobj
<-
selectByOverlap
(
myobj
,
my.win
)
}
saveRDS
(
myobj
,
file
=
"Myobject_with_names_aftersubset.rds"
)
# filtering on minimum coverage
myobj
<-
filterByCoverage
(
methylObj
=
myobj
,
lo.count
=
lowcount
,
lo.perc
=
lowperc
)
# Normalization
myobj
<-
normalizeCoverage
(
obj
=
myobj
)
# saveRDS(object = myobj, file = "Initial_object.rds")
# Calculate basic stats and PCs
metricsfolder
<-
paste0
(
qcfolder
,
"Metrics/"
)
dir.create
(
path
=
metricsfolder
,
showWarnings
=
F
)
for
(
id
in
ssheet
[[
idcol
]])
{
png
(
filename
=
paste0
(
metricsfolder
,
proj.id
,
"_CpG_pct_methylation_sample_"
,
id
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getMethylationStats
(
myobj
[[
id
]],
plot
=
TRUE
,
both.strands
=
FALSE
))
dev.off
()
png
(
filename
=
paste0
(
metricsfolder
,
proj.id
,
"_Coverage_stats_sample_"
,
id
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getCoverageStats
(
myobj
[[
id
]],
plot
=
TRUE
,
both.strands
=
FALSE
))
dev.off
()
}
# create meth obj
#meth <- unite(object = myobj, destrand=FALSE)
meth
<-
unite
(
object
=
myobj
,
destrand
=
FALSE
)
# for debugging
saveRDS
(
meth
,
"savemeth.tmp.rds"
)
# Perform correlation
sink
(
paste0
(
qcfolder
,
proj.id
,
"_Correlations.txt"
))
getCorrelation
(
meth
,
plot
=
FALSE
)
sink
()
if
(
length
(
ssheet
[[
idcol
]])
<
15
){
png
(
filename
=
paste0
(
qcfolder
,
proj.id
,
"_Correlations_pearson_pairwise.png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getCorrelation
(
meth
,
plot
=
TRUE
))
dev.off
()
}
png
(
filename
=
paste0
(
qcfolder
,
proj.id
,
"_Clustering.png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
clusterSamples
(
meth
,
dist
=
"euclidean"
,
plot
=
TRUE
,
method
=
"ward.D2"
)
dev.off
()
# Re-plotting PCs (custom chart)
# compute PCs and store in object
pca_compt
<-
PCASamples
(
meth
,
obj.return
=
T
,
screeplot
=
F
)
# extract PCs components
pcafolder
<-
paste0
(
qcfolder
,
"PCA/"
)
dir.create
(
path
=
pcafolder
,
showWarnings
=
F
)
pca_pc1_2
<-
as.data.frame
(
x
=
pca_compt
$
x
[,
1
:
2
])
for
(
myvars
in
plot.covariates
){
pca_pc1_2
$
condition
<-
as.factor
(
ssheet
[[
myvars
]])
png
(
filename
=
paste0
(
pcafolder
,
proj.id
,
"_PCA_"
,
myvars
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
ggplot
(
data
=
pca_pc1_2
,
mapping
=
aes
(
x
=
PC1
,
y
=
PC2
,
col
=
condition
,
label
=
rownames
(
pca_pc1_2
)))
+
geom_point
(
size
=
3
)
+
geom_text_repel
(
size
=
3
)
+
ggtitle
(
label
=
"Principal component analysis"
,
subtitle
=
myvars
)
+
theme
(
plot.title
=
element_text
(
size
=
16
,
face
=
"bold"
,
hjust
=
0.5
))
+
theme
(
plot.subtitle
=
element_text
(
size
=
12
,
hjust
=
0.5
,
face
=
"italic"
,
color
=
"black"
))
+
theme
(
axis.title
=
element_text
(
size
=
12
,
hjust
=
0.5
,
face
=
"bold"
,
color
=
"black"
))
+
theme
(
legend.text
=
element_text
(
size
=
8
,
hjust
=
0.5
))
+
theme
(
legend.title
=
element_blank
())
+
theme
(
axis.text
=
element_text
(
size
=
12
,
hjust
=
0.5
,
color
=
"black"
)))
dev.off
()
}
# retrieve and store % of methylation
perc.meth
<-
percMethylation
(
meth
)
saveRDS
(
perc.meth
,
"pctmethly.rds"
)
base
::
rownames
(
perc.meth
)
<-
paste0
(
meth
$
chr
,
"_"
,
meth
$
start
)
# Perform diff methylation
myDiff
=
calculateDiffMeth
(
meth
)
write.table
(
myDiff
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
difftest
<-
read.table
(
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
header
=
T
)
difftest
$
comparison
<-
proj.id
difftest
$
qvalue_r
<-
as.character
(
cut
(
x
=
difftest
$
qvalue
,
breaks
=
c
(
-1
,
1e-100
,
1e-10
,
1e-02
,
1
),
labels
=
c
(
"***"
,
"**"
,
"*"
,
"ns"
)))
myindex
<-
abs
(
difftest
$
meth.diff
)
<
delta.meth
difftest
$
meth.diff
<-
abs
(
difftest
$
meth.diff
)
difftest
$
qvalue_r
[
myindex
]
<-
"ns"
write.table
(
difftest
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
write.table
(
difftest
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
# Adding color list and annotations
library
(
RColorBrewer
)
color_list
<-
list
()
annrows
<-
NULL
anncols
<-
subset.data.frame
(
difftest
,
select
=
c
(
"qvalue_r"
,
"meth.diff"
))
base
::
rownames
(
anncols
)
<-
difftest
$
start
rows.annot.vars.cat
=
plot.categorical.vars
rows.annot.vars.con
=
plot.continuous.vars
if
(
!
is.null
(
rows.annot.vars.cat
)
|
!
is.null
(
rows.annot.vars.con
)){
rownames
(
ssheet
)
<-
ssheet
$
SampleID
annrows
<-
subset.data.frame
(
x
=
ssheet
,
select
=
c
(
rows.annot.vars.cat
,
rows.annot.vars.con
))
concolors
<-
RColorBrewer
::
brewer.pal
(
n
=
9
,
name
=
"Set1"
)
catcolors
<-
NULL
for
(
varcon
in
1
:
length
(
rows.annot.vars.con
))
{
color_list
[[
rows.annot.vars.con
[
varcon
]]]
<-
colorRampPalette
(
c
(
"lightgrey"
,
concolors
[
varcon
]))(
10
)
}
for
(
varcat
in
1
:
length
(
rows.annot.vars.cat
))
{
ncolor
<-
1
for
(
val
in
unique
(
ssheet
[[
rows.annot.vars.cat
[
varcat
]]])[
order
(
unique
(
ssheet
[[
rows.annot.vars.cat
[
varcat
]]]))]){
if
(
length
(
unique
(
ssheet
[[
rows.annot.vars.cat
[
varcat
]]]))
>
9
){
catcolors
<-
colorRampPalette
(
RColorBrewer
::
brewer.pal
(
n
=
9
,
name
=
"Set3"
))(
length
(
unique
(
ssheet
[[
rows.annot.vars.cat
[
varcat
]]])))
}
else
{
catcolors
<-
RColorBrewer
::
brewer.pal
(
n
=
9
,
name
=
"Set3"
)
}
color_list
[[
rows.annot.vars.cat
[
varcat
]]][[
val
]]
<-
catcolors
[
ncolor
]
ncolor
<-
ncolor
+
1
}
}
}
color_list
[[
"qvalue_r"
]]
=
c
(
"*"
=
"#6497b1"
,
"**"
=
"#03396c"
,
"***"
=
"#011f4b"
,
ns
=
"#c4cacf"
)
color_list
[[
"meth.diff"
]]
=
colorRampPalette
(
brewer.pal
(
n
=
11
,
"Reds"
))(
100
)
color_list
[[
"miR-126"
]]
<-
brewer.pal
(
n
=
9
,
"PuRd"
)
# Heatmap
pctmeth_matrix
<-
t
(
perc.meth
)
base
::
colnames
(
pctmeth_matrix
)
<-
gsub
(
x
=
base
::
colnames
(
pctmeth_matrix
),
pattern
=
"chr[0-9]+_"
,
replacement
=
""
)
pheatmap
(
mat
=
pctmeth_matrix
,
main
=
gsub
(
x
=
proj.id
,
pattern
=
"_"
,
replacement
=
" "
),
filename
=
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation_matrix_pheatmap.pdf"
),
width
=
plot.width
,
height
=
plot.height
,
na_col
=
"black"
,
cluster_cols
=
FALSE
,
cluster_rows
=
TRUE
,
annotation_row
=
annrows
,
cellwidth
=
cellw
,
cellheight
=
cellh
,
display_numbers
=
T
,
fontsize
=
fontsize
,
fontsize_number
=
fontnumsize
,
number_format
=
"%.0f"
,
#border_color = "#CBBEB5",
annotation_col
=
anncols
,
#labels_row = lrow,
#labels_col = lcol,
#gaps_row = gaps.row,
gaps_col
=
c
(
8
),
annotation_colors
=
color_list
,
color
=
c
(
"#F5F5F5"
,
"#EEEEEE"
,
"#CCCCCC"
,
"#999999"
,
"#666666"
,
"#333333"
,
"#000000"
),
breaks
=
c
(
0
,
10
,
20
,
30
,
50
,
70
,
90
,
100
)
)
saveRDS
(
object
=
pctmeth_matrix
,
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation_matrix_pheatmap.rds"
))
saveRDS
(
object
=
list
(
anncol
=
anncols
,
annrow
=
annrows
,
anncolors
=
color_list
),
paste0
(
outfolder
,
proj.id
,
"_annotations_matrix_pheatmap.rds"
))
saveRDS
(
object
=
difftest
,
paste0
(
outfolder
,
proj.id
,
"_CpG_differential_methylation.rds"
))
saveRDS
(
object
=
perc.meth
,
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation.rds"
))
write.table
(
x
=
perc.meth
,
file
=
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation.txt"
))
write.table
(
x
=
difftest
,
file
=
paste0
(
outfolder
,
proj.id
,
"_CpG_differential_methylation.txt"
))
saveRDS
(
myobj
,
file
=
paste0
(
outfolder
,
proj.id
,
"_methylkit.rds"
))
saveRDS
(
myobj
,
file
=
paste0
(
outfolder
,
proj.id
,
"_methylkit_meth.rds"
))
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Methylation/MethylAnalysis_bulk.R
0 → 100644
View file @
8891fc61
MethylAnalysis_bulk
<-
function
(
methcall.folder
=
NULL
,
# input folder with .cov files (bismark)
outfolder
=
NULL
,
# output folder (if not exist it will be created)
proj.id
=
"myproject"
,
# project id to prefix in putput files
samplesheet
=
NULL
,
sheetnum
=
1
,
design.var
=
"Disease"
,
# variable to subset samplesheet
case.var
=
"Case"
,
# 1 in treatment vector
control.var
=
"Control"
,
# 0 in treatment vector
idcol
=
"SampleID"
,
mincoverage
=
10
,
lowperc
=
5
,
lowcount
=
10
,
assem
=
"hg38"
,
do.subset
=
T
,
chr.subset
=
"chr9"
,
start.subset
=
136668000
,
end.subset
=
136671000
,
pipeline.meth
=
"bismarkCoverage"
,
plot.covariates
=
c
(
"Condition"
,
"Batch"
,
"MRD"
,
"Torelapse"
,
"Chemorefractory"
,
"Sex"
,
"CytoA"
,
"Tissue"
),
idstoremove
=
NULL
,
delta.meth
=
10
,
plot.categorical.vars
=
c
(
"Condition"
,
"Batch"
,
"MRD"
,
"Torelapse"
,
"Chemorefractory"
,
"Sex"
,
"CytoA"
),
plot.continuous.vars
=
c
(
"miR-126"
,
"egfl7"
),
plot.height
=
9
,
plot.width
=
12
,
cellw
=
15
,
cellh
=
15
,
fontnumsize
=
5
,
fontsize
=
8
){
# load libraries
require
(
methylKit
)
require
(
ggplot2
)
require
(
reshape2
)
require
(
plyr
)
require
(
ggrepel
)
library
(
GenomicRanges
)
library
(
openxlsx
)
library
(
pheatmap
)
# check vars
if
(
is.null
(
methcall.folder
)){
stop
(
"Please set methcall.folder"
)
}
if
(
is.null
(
outfolder
)){
stop
(
"Please set outfolder"
)
}
if
(
is.null
(
samplesheet
)){
stop
(
"Please set samplesheet"
)
}
# Setup variables
infolder
=
paste0
(
methcall.folder
,
"/"
)
dir.create
(
infolder
,
showWarnings
=
F
)
outfolder
=
paste0
(
outfolder
,
"/"
)
dir.create
(
outfolder
,
showWarnings
=
F
)
qcfolder
<-
paste0
(
outfolder
,
"QC/"
)
dir.create
(
qcfolder
,
showWarnings
=
F
)
# reading sample sheet with metadata
ssheet
<-
read.xlsx
(
samplesheet
,
sheet
=
sheetnum
)
if
(
!
is.null
(
idstoremove
)){
ssheet
<-
subset.data.frame
(
x
=
ssheet
,
subset
=
!
ssheet
[[
idcol
]]
%in%
idstoremove
)
}
# defining design vector according to variable
ssheet
<-
subset.data.frame
(
x
=
ssheet
,
subset
=
ssheet
[[
design.var
]]
%in%
c
(
case.var
,
control.var
))
d.vec
<-
ssheet
[[
design.var
]]
d.vector
<-
ifelse
(
d.vec
==
case.var
,
1
,
0
)
# initialzing coverage .cov files list
covs
<-
list
()
sampleids
<-
as.list
(
ssheet
[[
idcol
]])
names
(
sampleids
)
<-
ssheet
[[
idcol
]]
for
(
i
in
ssheet
[[
idcol
]])
{
covs
[[
i
]]
<-
paste0
(
infolder
,
"/"
,
i
,
".bismark.cov"
)
}
saveRDS
(
object
=
covs
,
file
=
"covs_object.rds"
)
# creating object
myobj
<-
methRead
(
location
=
covs
,
sample.id
=
sampleids
,
assembly
=
assem
,
pipeline
=
pipeline.meth
,
treatment
=
d.vector
,
context
=
"CpG"
,
mincov
=
mincoverage
)
saveRDS
(
myobj
,
file
=
"Myobject.rds"
)
names
(
myobj
)
<-
ssheet
[[
idcol
]]
saveRDS
(
myobj
,
file
=
"Myobject_with_names.rds"
)
# subsetting if declared
if
(
isTRUE
(
do.subset
)){
my.win
=
GRanges
(
seqnames
=
chr.subset
,
ranges
=
IRanges
(
start
=
start.subset
,
end
=
end.subset
))
myobj
<-
selectByOverlap
(
myobj
,
my.win
)
}
saveRDS
(
myobj
,
file
=
"Myobject_with_names_aftersubset.rds"
)
# filtering on minimum coverage
myobj
<-
filterByCoverage
(
methylObj
=
myobj
,
lo.count
=
lowcount
,
lo.perc
=
lowperc
)
# Normalization
myobj
<-
normalizeCoverage
(
obj
=
myobj
)
# saveRDS(object = myobj, file = "Initial_object.rds")
# Calculate basic stats and PCs
metricsfolder
<-
paste0
(
qcfolder
,
"Metrics/"
)
dir.create
(
path
=
metricsfolder
,
showWarnings
=
F
)
for
(
id
in
ssheet
[[
idcol
]])
{
png
(
filename
=
paste0
(
metricsfolder
,
proj.id
,
"_CpG_pct_methylation_sample_"
,
id
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getMethylationStats
(
myobj
[[
id
]],
plot
=
TRUE
,
both.strands
=
FALSE
))
dev.off
()
png
(
filename
=
paste0
(
metricsfolder
,
proj.id
,
"_Coverage_stats_sample_"
,
id
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getCoverageStats
(
myobj
[[
id
]],
plot
=
TRUE
,
both.strands
=
FALSE
))
dev.off
()
}
# create meth obj
#meth <- unite(object = myobj, destrand=FALSE)
meth
<-
unite
(
object
=
myobj
,
destrand
=
FALSE
)
# for debugging
saveRDS
(
meth
,
"savemeth.tmp.rds"
)
# Perform correlation
sink
(
paste0
(
qcfolder
,
proj.id
,
"_Correlations.txt"
))
getCorrelation
(
meth
,
plot
=
FALSE
)
sink
()
if
(
length
(
ssheet
[[
idcol
]])
<
15
){
png
(
filename
=
paste0
(
qcfolder
,
proj.id
,
"_Correlations_pearson_pairwise.png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getCorrelation
(
meth
,
plot
=
TRUE
))
dev.off
()
}
png
(
filename
=
paste0
(
qcfolder
,
proj.id
,
"_Clustering.png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
clusterSamples
(
meth
,
dist
=
"euclidean"
,
plot
=
TRUE
,
method
=
"ward.D2"
)
dev.off
()
# Re-plotting PCs (custom chart)
# compute PCs and store in object
pca_compt
<-
PCASamples
(
meth
,
obj.return
=
T
,
screeplot
=
F
)
# extract PCs components
pcafolder
<-
paste0
(
qcfolder
,
"PCA/"
)
dir.create
(
path
=
pcafolder
,
showWarnings
=
F
)
pca_pc1_2
<-
as.data.frame
(
x
=
pca_compt
$
x
[,
1
:
2
])
for
(
myvars
in
plot.covariates
){
pca_pc1_2
$
condition
<-
as.factor
(
ssheet
[[
myvars
]])
png
(
filename
=
paste0
(
pcafolder
,
proj.id
,
"_PCA_"
,
myvars
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
ggplot
(
data
=
pca_pc1_2
,
mapping
=
aes
(
x
=
PC1
,
y
=
PC2
,
col
=
condition
,
label
=
rownames
(
pca_pc1_2
)))
+
geom_point
(
size
=
3
)
+
geom_text_repel
(
size
=
3
)
+
ggtitle
(
label
=
"Principal component analysis"
,
subtitle
=
myvars
)
+
theme
(
plot.title
=
element_text
(
size
=
16
,
face
=
"bold"
,
hjust
=
0.5
))
+
theme
(
plot.subtitle
=
element_text
(
size
=
12
,
hjust
=
0.5
,
face
=
"italic"
,
color
=
"black"
))
+
theme
(
axis.title
=
element_text
(
size
=
12
,
hjust
=
0.5
,
face
=
"bold"
,
color
=
"black"
))
+
theme
(
legend.text
=
element_text
(
size
=
8
,
hjust
=
0.5
))
+
theme
(
legend.title
=
element_blank
())
+
theme
(
axis.text
=
element_text
(
size
=
12
,
hjust
=
0.5
,
color
=
"black"
)))
dev.off
()
}
# retrieve and store % of methylation
perc.meth
<-
percMethylation
(
meth
)
saveRDS
(
perc.meth
,
"pctmethly.rds"
)
base
::
rownames
(
perc.meth
)
<-
paste0
(
meth
$
chr
,
"_"
,
meth
$
start
)
# Perform diff methylation
myDiff
=
calculateDiffMeth
(
meth
)
write.table
(
myDiff
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
difftest
<-
read.table
(
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
header
=
T
)
difftest
$
comparison
<-
proj.id
difftest
$
qvalue_r
<-
as.character
(
cut
(
x
=
difftest
$
qvalue
,
breaks
=
c
(
-1
,
1e-100
,
1e-10
,
1e-02
,
1
),
labels
=
c
(
"***"
,
"**"
,
"*"
,
"ns"
)))
myindex
<-
abs
(
difftest
$
meth.diff
)
<
delta.meth
difftest
$
meth.diff
<-
abs
(
difftest
$
meth.diff
)
difftest
$
qvalue_r
[
myindex
]
<-
"ns"
write.table
(
difftest
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
write.table
(
difftest
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
# Adding color list and annotations
library
(
RColorBrewer
)
color_list
<-
list
()
annrows
<-
NULL
anncols
<-
subset.data.frame
(
difftest
,
select
=
c
(
"qvalue_r"
,
"meth.diff"
))
base
::
rownames
(
anncols
)
<-
difftest
$
start
rows.annot.vars.cat
=
plot.categorical.vars
rows.annot.vars.con
=
plot.continuous.vars
if
(
!
is.null
(
rows.annot.vars.cat
)
|
!
is.null
(
rows.annot.vars.con
)){
rownames
(
ssheet
)
<-
ssheet
$
SampleID
annrows
<-
subset.data.frame
(
x
=
ssheet
,
select
=
c
(
rows.annot.vars.cat
,
rows.annot.vars.con
))
concolors
<-
RColorBrewer
::
brewer.pal
(
n
=
9
,
name
=
"Set1"
)
catcolors
<-
NULL
for
(
varcon
in
1
:
length
(
rows.annot.vars.con
))
{
color_list
[[
rows.annot.vars.con
[
varcon
]]]
<-
colorRampPalette
(
c
(
"lightgrey"
,
concolors
[
varcon
]))(
10
)
}
for
(
varcat
in
1
:
length
(
rows.annot.vars.cat
))
{
ncolor
<-
1
for
(
val
in
unique
(
ssheet
[[
rows.annot.vars.cat
[
varcat
]]])[
order
(
unique
(
ssheet
[[
rows.annot.vars.cat
[
varcat
]]]))]){
if
(
length
(
unique
(
ssheet
[[
rows.annot.vars.cat
[
varcat
]]]))
>
9
){
catcolors
<-
colorRampPalette
(
RColorBrewer
::
brewer.pal
(
n
=
9
,
name
=
"Set3"
))(
length
(
unique
(
ssheet
[[
rows.annot.vars.cat
[
varcat
]]])))
}
else
{
catcolors
<-
RColorBrewer
::
brewer.pal
(
n
=
9
,
name
=
"Set3"
)
}
color_list
[[
rows.annot.vars.cat
[
varcat
]]][[
val
]]
<-
catcolors
[
ncolor
]
ncolor
<-
ncolor
+
1
}
}
}
color_list
[[
"qvalue_r"
]]
=
c
(
"*"
=
"#6497b1"
,
"**"
=
"#03396c"
,
"***"
=
"#011f4b"
,
ns
=
"#c4cacf"
)
color_list
[[
"meth.diff"
]]
=
colorRampPalette
(
brewer.pal
(
n
=
11
,
"Reds"
))(
100
)
color_list
[[
"miR-126"
]]
<-
brewer.pal
(
n
=
9
,
"PuRd"
)
# Heatmap
pctmeth_matrix
<-
t
(
perc.meth
)
base
::
colnames
(
pctmeth_matrix
)
<-
gsub
(
x
=
base
::
colnames
(
pctmeth_matrix
),
pattern
=
"chr[0-9]+_"
,
replacement
=
""
)
pheatmap
(
mat
=
pctmeth_matrix
,
main
=
gsub
(
x
=
proj.id
,
pattern
=
"_"
,
replacement
=
" "
),
filename
=
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation_matrix_pheatmap.pdf"
),
width
=
plot.width
,
height
=
plot.height
,
na_col
=
"black"
,
cluster_cols
=
FALSE
,
cluster_rows
=
TRUE
,
annotation_row
=
annrows
,
cellwidth
=
cellw
,
cellheight
=
cellh
,
display_numbers
=
T
,
fontsize
=
fontsize
,
fontsize_number
=
fontnumsize
,
number_format
=
"%.0f"
,
#border_color = "#CBBEB5",
annotation_col
=
anncols
,
#labels_row = lrow,
#labels_col = lcol,
#gaps_row = gaps.row,
gaps_col
=
c
(
8
),
annotation_colors
=
color_list
,
color
=
c
(
"#F5F5F5"
,
"#EEEEEE"
,
"#CCCCCC"
,
"#999999"
,
"#666666"
,
"#333333"
,
"#000000"
),
breaks
=
c
(
0
,
10
,
20
,
30
,
50
,
70
,
90
,
100
)
)
saveRDS
(
object
=
pctmeth_matrix
,
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation_matrix_pheatmap.rds"
))
saveRDS
(
object
=
list
(
anncol
=
anncols
,
annrow
=
annrows
,
anncolors
=
color_list
),
paste0
(
outfolder
,
proj.id
,
"_annotations_matrix_pheatmap.rds"
))
saveRDS
(
object
=
difftest
,
paste0
(
outfolder
,
proj.id
,
"_CpG_differential_methylation.rds"
))
saveRDS
(
object
=
perc.meth
,
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation.rds"
))
write.table
(
x
=
perc.meth
,
file
=
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation.txt"
))
write.table
(
x
=
difftest
,
file
=
paste0
(
outfolder
,
proj.id
,
"_CpG_differential_methylation.txt"
))
saveRDS
(
myobj
,
file
=
paste0
(
outfolder
,
proj.id
,
"_methylkit.rds"
))
saveRDS
(
myobj
,
file
=
paste0
(
outfolder
,
proj.id
,
"_methylkit_meth.rds"
))
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Methylation/MethylAnalysis_bulk_v2.R
0 → 100644
View file @
8891fc61
MethylAnalysis_bulk_v2
<-
function
(
methcall.folder
=
NULL
,
# input folder with .cov files (bismark)
outfolder
=
NULL
,
# output folder (if not exist it will be created)
proj.id
=
"myproject"
,
# project id to prefix in putput files
samplesheet
=
NULL
,
sheetnum
=
1
,
design.var
=
"Disease"
,
# variable to subset samplesheet
case.var
=
"Case"
,
# 1 in treatment vector
control.var
=
"Control"
,
# 0 in treatment vector
idcol
=
"SampleID"
,
mincoverage
=
10
,
lowperc
=
5
,
lowcount
=
10
,
assem
=
"hg38"
,
do.subset
=
T
,
chr.subset
=
"chr9"
,
start.subset
=
136668000
,
end.subset
=
136671000
,
pipeline.meth
=
"bismarkCoverage"
,
plot.covariates
=
c
(
"Condition"
,
"Batch"
,
"MRD"
,
"Torelapse"
,
"Chemorefractory"
,
"Sex"
,
"CytoA"
,
"Tissue"
),
idstoremove
=
NULL
,
delta.meth
=
10
,
plot.categorical.vars
=
c
(
"Condition"
,
"Batch"
,
"MRD"
,
"Torelapse"
,
"Chemorefractory"
,
"Sex"
,
"CytoA"
),
plot.continuous.vars
=
c
(
"miR-126"
,
"egfl7"
),
plot.height
=
9
,
plot.width
=
12
,
cellw
=
15
,
cellh
=
15
,
fontnumsize
=
5
,
fontsize
=
8
){
# load libraries
require
(
methylKit
)
require
(
ggplot2
)
require
(
reshape2
)
require
(
plyr
)
require
(
ggrepel
)
library
(
GenomicRanges
)
library
(
openxlsx
)
library
(
pheatmap
)
# check vars
if
(
is.null
(
methcall.folder
)){
stop
(
"Please set methcall.folder"
)
}
if
(
is.null
(
outfolder
)){
stop
(
"Please set outfolder"
)
}
if
(
is.null
(
samplesheet
)){
stop
(
"Please set samplesheet"
)
}
# Setup variables
infolder
=
paste0
(
methcall.folder
,
"/"
)
dir.create
(
infolder
,
showWarnings
=
F
)
outfolder
=
paste0
(
outfolder
,
"/"
)
dir.create
(
outfolder
,
showWarnings
=
F
)
qcfolder
<-
paste0
(
outfolder
,
"QC/"
)
dir.create
(
qcfolder
,
showWarnings
=
F
)
# reading sample sheet with metadata
ssheet
<-
read.xlsx
(
samplesheet
,
sheet
=
sheetnum
)
if
(
!
is.null
(
idstoremove
)){
ssheet
<-
subset.data.frame
(
x
=
ssheet
,
subset
=
!
ssheet
[[
idcol
]]
%in%
idstoremove
)
}
# defining design vector according to variable
ssheet
<-
subset.data.frame
(
x
=
ssheet
,
subset
=
ssheet
[[
design.var
]]
%in%
c
(
case.var
,
control.var
))
d.vec
<-
ssheet
[[
design.var
]]
d.vector
<-
ifelse
(
d.vec
==
case.var
,
1
,
0
)
# initialzing coverage .cov files list
covs
<-
list
()
sampleids
<-
as.list
(
ssheet
[[
idcol
]])
names
(
sampleids
)
<-
ssheet
[[
idcol
]]
for
(
i
in
ssheet
[[
idcol
]])
{
covs
[[
i
]]
<-
paste0
(
infolder
,
"/"
,
i
,
".bismark.cov"
)
}
saveRDS
(
object
=
covs
,
file
=
"covs_object.rds"
)
# creating object
myobj
<-
methRead
(
location
=
covs
,
sample.id
=
sampleids
,
assembly
=
assem
,
pipeline
=
pipeline.meth
,
treatment
=
d.vector
,
context
=
"CpG"
,
mincov
=
mincoverage
)
saveRDS
(
myobj
,
file
=
"Myobject.rds"
)
names
(
myobj
)
<-
ssheet
[[
idcol
]]
saveRDS
(
myobj
,
file
=
"Myobject_with_names.rds"
)
# subsetting if declared
if
(
isTRUE
(
do.subset
)){
my.win
=
GRanges
(
seqnames
=
chr.subset
,
ranges
=
IRanges
(
start
=
start.subset
,
end
=
end.subset
))
myobj
<-
selectByOverlap
(
myobj
,
my.win
)
}
saveRDS
(
myobj
,
file
=
"Myobject_with_names_aftersubset.rds"
)
# filtering on minimum coverage
myobj
<-
filterByCoverage
(
methylObj
=
myobj
,
lo.count
=
lowcount
,
lo.perc
=
lowperc
)
# Normalization
myobj
<-
normalizeCoverage
(
obj
=
myobj
)
# saveRDS(object = myobj, file = "Initial_object.rds")
# Calculate basic stats and PCs
metricsfolder
<-
paste0
(
qcfolder
,
"Metrics/"
)
dir.create
(
path
=
metricsfolder
,
showWarnings
=
F
)
for
(
id
in
ssheet
[[
idcol
]])
{
png
(
filename
=
paste0
(
metricsfolder
,
proj.id
,
"_CpG_pct_methylation_sample_"
,
id
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getMethylationStats
(
myobj
[[
id
]],
plot
=
TRUE
,
both.strands
=
FALSE
))
dev.off
()
png
(
filename
=
paste0
(
metricsfolder
,
proj.id
,
"_Coverage_stats_sample_"
,
id
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getCoverageStats
(
myobj
[[
id
]],
plot
=
TRUE
,
both.strands
=
FALSE
))
dev.off
()
}
# create meth obj
#meth <- unite(object = myobj, destrand=FALSE)
meth
<-
unite
(
object
=
myobj
,
destrand
=
FALSE
)
# for debugging
saveRDS
(
meth
,
"savemeth.tmp.rds"
)
# Perform correlation
sink
(
paste0
(
qcfolder
,
proj.id
,
"_Correlations.txt"
))
getCorrelation
(
meth
,
plot
=
FALSE
)
sink
()
if
(
length
(
ssheet
[[
idcol
]])
<
15
){
png
(
filename
=
paste0
(
qcfolder
,
proj.id
,
"_Correlations_pearson_pairwise.png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getCorrelation
(
meth
,
plot
=
TRUE
))
dev.off
()
}
png
(
filename
=
paste0
(
qcfolder
,
proj.id
,
"_Clustering.png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
clusterSamples
(
meth
,
dist
=
"euclidean"
,
plot
=
TRUE
,
method
=
"ward.D2"
)
dev.off
()
# Re-plotting PCs (custom chart)
# compute PCs and store in object
pca_compt
<-
PCASamples
(
meth
,
obj.return
=
T
,
screeplot
=
F
)
# extract PCs components
pcafolder
<-
paste0
(
qcfolder
,
"PCA/"
)
dir.create
(
path
=
pcafolder
,
showWarnings
=
F
)
pca_pc1_2
<-
as.data.frame
(
x
=
pca_compt
$
x
[,
1
:
2
])
for
(
myvars
in
plot.covariates
){
pca_pc1_2
$
condition
<-
as.factor
(
ssheet
[[
myvars
]])
png
(
filename
=
paste0
(
pcafolder
,
proj.id
,
"_PCA_"
,
myvars
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
ggplot
(
data
=
pca_pc1_2
,
mapping
=
aes
(
x
=
PC1
,
y
=
PC2
,
col
=
condition
,
label
=
rownames
(
pca_pc1_2
)))
+
geom_point
(
size
=
3
)
+
geom_text_repel
(
size
=
3
)
+
ggtitle
(
label
=
"Principal component analysis"
,
subtitle
=
myvars
)
+
theme
(
plot.title
=
element_text
(
size
=
16
,
face
=
"bold"
,
hjust
=
0.5
))
+
theme
(
plot.subtitle
=
element_text
(
size
=
12
,
hjust
=
0.5
,
face
=
"italic"
,
color
=
"black"
))
+
theme
(
axis.title
=
element_text
(
size
=
12
,
hjust
=
0.5
,
face
=
"bold"
,
color
=
"black"
))
+
theme
(
legend.text
=
element_text
(
size
=
8
,
hjust
=
0.5
))
+
theme
(
legend.title
=
element_blank
())
+
theme
(
axis.text
=
element_text
(
size
=
12
,
hjust
=
0.5
,
color
=
"black"
)))
dev.off
()
}
# retrieve and store % of methylation
perc.meth
<-
percMethylation
(
meth
)
saveRDS
(
perc.meth
,
"pctmethly.rds"
)
base
::
rownames
(
perc.meth
)
<-
paste0
(
meth
$
chr
,
"_"
,
meth
$
start
)
# Perform diff methylation
myDiff
=
calculateDiffMeth
(
meth
)
write.table
(
myDiff
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
difftest
<-
read.table
(
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
header
=
T
)
difftest
$
comparison
<-
proj.id
difftest
$
qvalue_r
<-
as.character
(
cut
(
x
=
difftest
$
qvalue
,
breaks
=
c
(
-1
,
1e-100
,
1e-10
,
1e-02
,
1
),
labels
=
c
(
"***"
,
"**"
,
"*"
,
"ns"
)))
myindex
<-
abs
(
difftest
$
meth.diff
)
<
delta.meth
difftest
$
meth.diff
<-
abs
(
difftest
$
meth.diff
)
difftest
$
qvalue_r
[
myindex
]
<-
"ns"
write.table
(
difftest
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
# Adding color list and annotations
library
(
RColorBrewer
)
color_list
<-
list
()
annrows
<-
NULL
anncols
<-
subset.data.frame
(
difftest
,
select
=
c
(
"qvalue_r"
,
"meth.diff"
))
base
::
rownames
(
anncols
)
<-
difftest
$
start
rows.annot.vars.cat
=
plot.categorical.vars
rows.annot.vars.con
=
plot.continuous.vars
# if(!is.null(rows.annot.vars.cat) | !is.null(rows.annot.vars.con)){
# rownames(ssheet) <- ssheet$SampleID
# annrows <- subset.data.frame(x = ssheet, select = c(rows.annot.vars.cat, rows.annot.vars.con))
# concolors <- RColorBrewer::brewer.pal(n = 9, name = "Set1")
# catcolors <- NULL
# for (varcon in 1:length(rows.annot.vars.con)) {
# color_list[[rows.annot.vars.con[varcon]]] <- colorRampPalette(c("lightgrey", concolors[varcon]))(10)
# }
# for (varcat in 1:length(rows.annot.vars.cat)) {
# ncolor <- 1
# for (val in unique(ssheet[[rows.annot.vars.cat[varcat]]])[order(unique(ssheet[[rows.annot.vars.cat[varcat]]]))]){
# if(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])) > 9){
# catcolors <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name = "Set3"))(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])))
# }
# else{
# catcolors <- RColorBrewer::brewer.pal(n = 9, name = "Set3")
# }
# color_list[[rows.annot.vars.cat[varcat]]][[val]] <- catcolors[ncolor]
# ncolor <- ncolor + 1
# }
# }
# }
color_list
[[
"qvalue_r"
]]
=
c
(
"*"
=
"#6497b1"
,
"**"
=
"#03396c"
,
"***"
=
"#011f4b"
,
ns
=
"#c4cacf"
)
color_list
[[
"meth.diff"
]]
=
colorRampPalette
(
brewer.pal
(
n
=
11
,
"Reds"
))(
100
)
color_list
[[
"miR-126"
]]
<-
brewer.pal
(
n
=
9
,
"PuRd"
)
# Heatmap
pctmeth_matrix
<-
t
(
perc.meth
)
base
::
colnames
(
pctmeth_matrix
)
<-
gsub
(
x
=
base
::
colnames
(
pctmeth_matrix
),
pattern
=
"chr[0-9]+_"
,
replacement
=
""
)
pheatmap
(
mat
=
pctmeth_matrix
,
main
=
gsub
(
x
=
proj.id
,
pattern
=
"_"
,
replacement
=
" "
),
filename
=
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation_matrix_pheatmap.pdf"
),
width
=
plot.width
,
height
=
plot.height
,
na_col
=
"black"
,
cluster_cols
=
FALSE
,
cluster_rows
=
TRUE
,
annotation_row
=
annrows
,
cellwidth
=
cellw
,
cellheight
=
cellh
,
display_numbers
=
T
,
fontsize
=
fontsize
,
fontsize_number
=
fontnumsize
,
number_format
=
"%.0f"
,
#border_color = "#CBBEB5",
annotation_col
=
anncols
,
#labels_row = lrow,
#labels_col = lcol,
#gaps_row = gaps.row,
gaps_col
=
c
(
8
),
annotation_colors
=
color_list
,
color
=
c
(
"#F5F5F5"
,
"#EEEEEE"
,
"#CCCCCC"
,
"#999999"
,
"#666666"
,
"#333333"
,
"#000000"
),
breaks
=
c
(
0
,
10
,
20
,
30
,
50
,
70
,
90
,
100
)
)
saveRDS
(
object
=
pctmeth_matrix
,
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation_matrix_pheatmap.rds"
))
saveRDS
(
object
=
list
(
anncol
=
anncols
,
annrow
=
annrows
,
anncolors
=
color_list
),
paste0
(
outfolder
,
proj.id
,
"_annotations_matrix_pheatmap.rds"
))
saveRDS
(
object
=
difftest
,
paste0
(
outfolder
,
proj.id
,
"_CpG_differential_methylation.rds"
))
saveRDS
(
object
=
perc.meth
,
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation.rds"
))
write.table
(
x
=
perc.meth
,
file
=
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation.txt"
))
write.table
(
x
=
difftest
,
file
=
paste0
(
outfolder
,
proj.id
,
"_CpG_differential_methylation.txt"
))
saveRDS
(
myobj
,
file
=
paste0
(
outfolder
,
proj.id
,
"_methylkit.rds"
))
saveRDS
(
myobj
,
file
=
paste0
(
outfolder
,
proj.id
,
"_methylkit_meth.rds"
))
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment
Menu
Projects
Groups
Snippets
Help