Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
casertanucera_leukemia2023
Commits
c987301f
Commit
c987301f
authored
Aug 04, 2023
by
Matteo Barcella
Browse files
remoing methylation script - wrong place
parent
7dd55d1e
Changes
1
Hide whitespace changes
Inline
Side-by-side
WES/MethylAnalysis_bulk_v3.R
deleted
100644 → 0
View file @
7dd55d1e
MethylAnalysis_bulk_v3
<-
function
(
methcall.folder
=
NULL
,
# input folder with .cov files (bismark)
outfolder
=
NULL
,
# output folder (if not exist it will be created)
proj.id
=
"myproject"
,
# project id to prefix in putput files
samplesheet
=
NULL
,
sheetnum
=
1
,
design.var
=
"Disease"
,
# variable to subset samplesheet
case.var
=
"Case"
,
# 1 in treatment vector
control.var
=
"Control"
,
# 0 in treatment vector
idcol
=
"SampleID"
,
mincoverage
=
10
,
lowperc
=
5
,
lowcount
=
10
,
assem
=
"hg38"
,
do.subset
=
T
,
chr.subset
=
"chr9"
,
start.subset
=
136668000
,
end.subset
=
136671000
,
pipeline.meth
=
"bismarkCoverage"
,
plot.covariates
=
c
(
"Condition"
,
"Batch"
,
"MRD"
,
"Torelapse"
,
"Chemorefractory"
,
"Sex"
,
"CytoA"
,
"Tissue"
),
idstoremove
=
NULL
,
delta.meth
=
10
,
plot.categorical.vars
=
c
(
"Condition"
,
"Batch"
,
"MRD"
,
"Torelapse"
,
"Chemorefractory"
,
"Sex"
,
"CytoA"
),
plot.continuous.vars
=
c
(
"miR-126"
,
"egfl7"
),
plot.height
=
9
,
plot.width
=
12
,
cellw
=
15
,
cellh
=
15
,
fontnumsize
=
5
,
fontsize
=
8
){
# load libraries
require
(
methylKit
)
require
(
ggplot2
)
require
(
reshape2
)
require
(
plyr
)
require
(
ggrepel
)
library
(
GenomicRanges
)
library
(
openxlsx
)
library
(
pheatmap
)
# check vars
if
(
is.null
(
methcall.folder
)){
stop
(
"Please set methcall.folder"
)
}
if
(
is.null
(
outfolder
)){
stop
(
"Please set outfolder"
)
}
if
(
is.null
(
samplesheet
)){
stop
(
"Please set samplesheet"
)
}
# Setup variables
infolder
=
paste0
(
methcall.folder
,
"/"
)
dir.create
(
infolder
,
showWarnings
=
F
)
outfolder
=
paste0
(
outfolder
,
"/"
)
dir.create
(
outfolder
,
showWarnings
=
F
)
qcfolder
<-
paste0
(
outfolder
,
"QC/"
)
dir.create
(
qcfolder
,
showWarnings
=
F
)
# reading sample sheet with metadata
ssheet
<-
read.xlsx
(
samplesheet
,
sheet
=
sheetnum
)
if
(
!
is.null
(
idstoremove
)){
ssheet
<-
subset.data.frame
(
x
=
ssheet
,
subset
=
!
ssheet
[[
idcol
]]
%in%
idstoremove
)
}
# defining design vector according to variable
ssheet
<-
subset.data.frame
(
x
=
ssheet
,
subset
=
ssheet
[[
design.var
]]
%in%
c
(
case.var
,
control.var
))
d.vec
<-
ssheet
[[
design.var
]]
d.vector
<-
ifelse
(
d.vec
==
case.var
,
1
,
0
)
# initialzing coverage .cov files list
covs
<-
list
()
sampleids
<-
as.list
(
ssheet
[[
idcol
]])
names
(
sampleids
)
<-
ssheet
[[
idcol
]]
for
(
i
in
ssheet
[[
idcol
]])
{
covs
[[
i
]]
<-
paste0
(
infolder
,
"/"
,
i
,
".bismark.cov"
)
}
saveRDS
(
object
=
covs
,
file
=
"covs_object.rds"
)
# creating object
myobj
<-
methRead
(
location
=
covs
,
sample.id
=
sampleids
,
assembly
=
assem
,
pipeline
=
pipeline.meth
,
treatment
=
d.vector
,
context
=
"CpG"
,
mincov
=
mincoverage
)
saveRDS
(
myobj
,
file
=
"Myobject.rds"
)
names
(
myobj
)
<-
ssheet
[[
idcol
]]
saveRDS
(
myobj
,
file
=
"Myobject_with_names.rds"
)
# subsetting if declared
if
(
isTRUE
(
do.subset
)){
my.win
=
GRanges
(
seqnames
=
chr.subset
,
ranges
=
IRanges
(
start
=
start.subset
,
end
=
end.subset
))
myobj
<-
selectByOverlap
(
myobj
,
my.win
)
}
saveRDS
(
myobj
,
file
=
"Myobject_with_names_aftersubset.rds"
)
# filtering on minimum coverage
myobj
<-
filterByCoverage
(
methylObj
=
myobj
,
lo.count
=
lowcount
,
lo.perc
=
lowperc
)
# Normalization
myobj
<-
normalizeCoverage
(
obj
=
myobj
)
# saveRDS(object = myobj, file = "Initial_object.rds")
# Calculate basic stats and PCs
metricsfolder
<-
paste0
(
qcfolder
,
"Metrics/"
)
dir.create
(
path
=
metricsfolder
,
showWarnings
=
F
)
for
(
id
in
ssheet
[[
idcol
]])
{
png
(
filename
=
paste0
(
metricsfolder
,
proj.id
,
"_CpG_pct_methylation_sample_"
,
id
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getMethylationStats
(
myobj
[[
id
]],
plot
=
TRUE
,
both.strands
=
FALSE
))
dev.off
()
png
(
filename
=
paste0
(
metricsfolder
,
proj.id
,
"_Coverage_stats_sample_"
,
id
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getCoverageStats
(
myobj
[[
id
]],
plot
=
TRUE
,
both.strands
=
FALSE
))
dev.off
()
}
# create meth obj
#meth <- unite(object = myobj, destrand=FALSE)
meth
<-
unite
(
object
=
myobj
,
destrand
=
FALSE
)
# for debugging
saveRDS
(
meth
,
"savemeth.tmp.rds"
)
# Perform correlation
sink
(
paste0
(
qcfolder
,
proj.id
,
"_Correlations.txt"
))
getCorrelation
(
meth
,
plot
=
FALSE
)
sink
()
if
(
length
(
ssheet
[[
idcol
]])
<
15
){
png
(
filename
=
paste0
(
qcfolder
,
proj.id
,
"_Correlations_pearson_pairwise.png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
getCorrelation
(
meth
,
plot
=
TRUE
))
dev.off
()
}
png
(
filename
=
paste0
(
qcfolder
,
proj.id
,
"_Clustering.png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
clusterSamples
(
meth
,
dist
=
"euclidean"
,
plot
=
TRUE
,
method
=
"ward.D2"
)
dev.off
()
# Re-plotting PCs (custom chart)
# compute PCs and store in object
pca_compt
<-
PCASamples
(
meth
,
obj.return
=
T
,
screeplot
=
F
)
# extract PCs components
pcafolder
<-
paste0
(
qcfolder
,
"PCA/"
)
dir.create
(
path
=
pcafolder
,
showWarnings
=
F
)
pca_pc1_2
<-
as.data.frame
(
x
=
pca_compt
$
x
[,
1
:
2
])
for
(
myvars
in
plot.covariates
){
pca_pc1_2
$
condition
<-
as.factor
(
ssheet
[[
myvars
]])
png
(
filename
=
paste0
(
pcafolder
,
proj.id
,
"_PCA_"
,
myvars
,
".png"
),
width
=
9
,
height
=
6
,
units
=
"in"
,
res
=
96
)
print
(
ggplot
(
data
=
pca_pc1_2
,
mapping
=
aes
(
x
=
PC1
,
y
=
PC2
,
col
=
condition
,
label
=
rownames
(
pca_pc1_2
)))
+
geom_point
(
size
=
3
)
+
geom_text_repel
(
size
=
3
)
+
ggtitle
(
label
=
"Principal component analysis"
,
subtitle
=
myvars
)
+
theme
(
plot.title
=
element_text
(
size
=
16
,
face
=
"bold"
,
hjust
=
0.5
))
+
theme
(
plot.subtitle
=
element_text
(
size
=
12
,
hjust
=
0.5
,
face
=
"italic"
,
color
=
"black"
))
+
theme
(
axis.title
=
element_text
(
size
=
12
,
hjust
=
0.5
,
face
=
"bold"
,
color
=
"black"
))
+
theme
(
legend.text
=
element_text
(
size
=
8
,
hjust
=
0.5
))
+
theme
(
legend.title
=
element_blank
())
+
theme
(
axis.text
=
element_text
(
size
=
12
,
hjust
=
0.5
,
color
=
"black"
)))
dev.off
()
}
# retrieve and store % of methylation
perc.meth
<-
percMethylation
(
meth
)
saveRDS
(
perc.meth
,
"pctmethly.rds"
)
base
::
rownames
(
perc.meth
)
<-
paste0
(
meth
$
chr
,
"_"
,
meth
$
start
)
# Perform diff methylation
myDiff
=
calculateDiffMeth
(
meth
)
write.table
(
myDiff
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
difftest
<-
read.table
(
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
header
=
T
)
difftest
$
comparison
<-
proj.id
difftest
$
qvalue_r
<-
as.character
(
cut
(
x
=
difftest
$
qvalue
,
breaks
=
c
(
-1
,
1e-100
,
1e-10
,
1e-02
,
1
),
labels
=
c
(
"***"
,
"**"
,
"*"
,
"ns"
)))
myindex
<-
abs
(
difftest
$
meth.diff
)
<
delta.meth
difftest
$
meth.diff
<-
abs
(
difftest
$
meth.diff
)
difftest
$
qvalue_r
[
myindex
]
<-
"ns"
write.table
(
difftest
,
paste0
(
outfolder
,
proj.id
,
"_DiffMeth_single_CpG.txt"
),
row.names
=
F
)
# Adding color list and annotations
library
(
RColorBrewer
)
color_list
<-
list
()
annrows
<-
NULL
anncols
<-
subset.data.frame
(
difftest
,
select
=
c
(
"qvalue_r"
,
"meth.diff"
))
base
::
rownames
(
anncols
)
<-
difftest
$
start
rows.annot.vars.cat
=
plot.categorical.vars
rows.annot.vars.con
=
plot.continuous.vars
# if(!is.null(rows.annot.vars.cat) | !is.null(rows.annot.vars.con)){
# rownames(ssheet) <- ssheet$SampleID
# annrows <- subset.data.frame(x = ssheet, select = c(rows.annot.vars.cat, rows.annot.vars.con))
# concolors <- RColorBrewer::brewer.pal(n = 9, name = "Set1")
# catcolors <- NULL
# for (varcon in 1:length(rows.annot.vars.con)) {
# color_list[[rows.annot.vars.con[varcon]]] <- colorRampPalette(c("lightgrey", concolors[varcon]))(10)
# }
# for (varcat in 1:length(rows.annot.vars.cat)) {
# ncolor <- 1
# for (val in unique(ssheet[[rows.annot.vars.cat[varcat]]])[order(unique(ssheet[[rows.annot.vars.cat[varcat]]]))]){
# if(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])) > 9){
# catcolors <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name = "Set3"))(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])))
# }
# else{
# catcolors <- RColorBrewer::brewer.pal(n = 9, name = "Set3")
# }
# color_list[[rows.annot.vars.cat[varcat]]][[val]] <- catcolors[ncolor]
# ncolor <- ncolor + 1
# }
# }
# }
color_list
[[
"qvalue_r"
]]
=
c
(
"*"
=
"#6497b1"
,
"**"
=
"#03396c"
,
"***"
=
"#011f4b"
,
ns
=
"#c4cacf"
)
color_list
[[
"meth.diff"
]]
=
colorRampPalette
(
brewer.pal
(
n
=
11
,
"Reds"
))(
100
)
color_list
[[
"miR-126"
]]
<-
brewer.pal
(
n
=
9
,
"PuRd"
)
# Heatmap
pctmeth_matrix
<-
t
(
perc.meth
)
base
::
colnames
(
pctmeth_matrix
)
<-
gsub
(
x
=
base
::
colnames
(
pctmeth_matrix
),
pattern
=
"chr[0-9]+_"
,
replacement
=
""
)
pheatmap
(
mat
=
pctmeth_matrix
,
main
=
gsub
(
x
=
proj.id
,
pattern
=
"_"
,
replacement
=
" "
),
filename
=
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation_matrix_pheatmap.pdf"
),
width
=
plot.width
,
height
=
plot.height
,
na_col
=
"black"
,
cluster_cols
=
FALSE
,
cluster_rows
=
TRUE
,
annotation_row
=
annrows
,
cellwidth
=
cellw
,
cellheight
=
cellh
,
display_numbers
=
T
,
fontsize
=
fontsize
,
fontsize_number
=
fontnumsize
,
number_format
=
"%.0f"
,
#border_color = "#CBBEB5",
annotation_col
=
anncols
,
#labels_row = lrow,
#labels_col = lcol,
#gaps_row = gaps.row,
gaps_col
=
c
(
8
),
annotation_colors
=
color_list
,
color
=
c
(
"#F5F5F5"
,
"#EEEEEE"
,
"#CCCCCC"
,
"#999999"
,
"#666666"
,
"#333333"
,
"#000000"
),
breaks
=
c
(
0
,
10
,
20
,
30
,
50
,
70
,
90
,
100
)
)
saveRDS
(
object
=
pctmeth_matrix
,
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation_matrix_pheatmap.rds"
))
saveRDS
(
object
=
list
(
anncol
=
anncols
,
annrow
=
annrows
,
anncolors
=
color_list
),
paste0
(
outfolder
,
proj.id
,
"_annotations_matrix_pheatmap.rds"
))
saveRDS
(
object
=
difftest
,
paste0
(
outfolder
,
proj.id
,
"_CpG_differential_methylation.rds"
))
saveRDS
(
object
=
perc.meth
,
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation.rds"
))
write.table
(
x
=
perc.meth
,
file
=
paste0
(
outfolder
,
proj.id
,
"_CpG_percent_methylation.txt"
))
write.table
(
x
=
difftest
,
file
=
paste0
(
outfolder
,
proj.id
,
"_CpG_differential_methylation.txt"
))
saveRDS
(
myobj
,
file
=
paste0
(
outfolder
,
proj.id
,
"_methylkit.rds"
))
saveRDS
(
myobj
,
file
=
paste0
(
outfolder
,
proj.id
,
"_methylkit_meth.rds"
))
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment