Commit 8891fc61 authored by Matteo Barcella's avatar Matteo Barcella
Browse files

Adding scripts for differential methylation analysis and heatmap in manuscript

parent 0c684c53
No related merge requests found
Showing with 1113 additions and 0 deletions
+1113 -0
#CpGs from position 136669244 to 13666449 (the entire fragment of the intron 4 putative transcription starting site),
#CpGs from position 136670325 to 136670386 (5’ of the intron 7) and
#CpGs from position 136670827 to 136670945 (3’ of the intron 7).
# load data:
source("MethylAnalysis.R")
MethylAnalysis(methcall.folder = "input/",
outfolder = "Adults_vs_Prog",
samplesheet = "SampleSheets_BisulphiteExp_Sets.xlsx",
sheetnum = 2,
do.subset = T,
chr.subset = "chr9",
start.subset = 136668000,
end.subset = 136671000,
idcol = "SampleID",
design.var = "TestVar",
case.var = "Case",
control.var = "Controls",
mincoverage = 10,
idstoremove = c("S150310"),
assem = "hg38",
pipeline.meth = "bismarkCoverage",
proj.id = "Adults_vs_Prog_Case_vs_Control",
plot.covariates = c("Condition","MRD","Cytogenetics","TestVar"),
lowperc = 5,
lowcount = 10,
delta.meth = 10,
plot.height = 12, plot.width = 18,
plot.categorical.vars = c("Condition","Cytogenetics"),
plot.continuous.vars = c("miR-126"),cellw = 14, cellh = 8, fontnumsize = 7, fontsize = 7
)
library(openxlsx)
library(pheatmap)
library(RColorBrewer)
# load data pct methylation
pctmeth_matrix <- readRDS(file = "Adults_vs_Prog/Adults_vs_Prog_Case_vs_Control_CpG_percent_methylation_matrix_pheatmap.rds")
# fixing colors
annotations <- readRDS("Adults_vs_Prog/Adults_vs_Prog_Case_vs_Control_annotations_matrix_pheatmap.rds")
annotations2 <- annotations
annotations2$anncolors$Condition <- c("#006600","#929292","#c9c9c9","#b30101") # #006600: verdescuro, #b30101: rosso scuro, grigio-scuro: #929292, grigiochiaro: #c9c9c9
names(annotations2$anncolors$Condition) <- names(annotations$anncolors$Condition)
annotations2$anncolors$Cytogenetics <- c("#0080cc","#f6a145","white") # https://www.color-hex.com/color-palette/96440
names(annotations2$anncolors$Cytogenetics) <- names(annotations$anncolors$Cytogenetics)
png(filename = "Adults_vs_Prog/Adults_vs_Prog_Case_vs_Control_CpG_percent_methylation_matrix_pheatmap_CUSTOM.png",
width = 15, height = 7, units = "in", res = 300)
pheatmap(mat = pctmeth_matrix,
width = 15, height = 7,
na_col = "black",
cluster_cols = FALSE,
cluster_rows = TRUE,
annotation_row = annotations2$annrow,
cellwidth = 14,
cellheight = 13,
display_numbers = T,
fontsize = 10,
fontsize_number = 7,
number_format = "%.0f",
border_color = FALSE,
annotation_col = annotations2$anncol,
legend = F,
annotation_legend = TRUE,
annotation_names_col = T,
#labels_row = lrow,
#labels_col = lcol,
# gaps_row = c(23),
gaps_col = c(9,43),
annotation_colors = annotations2$anncolors,
color = c("#F5F5F5","#EEEEEE","#CCCCCC","#999999", "#666666","#333333","#000000"), breaks = c(0,10,20,30,50,70,90,100)
)
dev.off()
#################
# Disease 12-27 #
#################
source("MethylAnalysis_bulk.R")
MethylAnalysis_bulk(methcall.folder = "input/",
outfolder = "S120167_Case_vs_Controls",
samplesheet = "SampleSheets_BisulphiteExp_Sets.xlsx",
sheetnum = 4,
do.subset = T,
chr.subset = "chr9",
start.subset = 136668000,
end.subset = 136671000,
idcol = "SampleID",
design.var = "TestVar",
case.var = "POP",
control.var = "DGN",
mincoverage = 10,
idstoremove = NULL,
assem = "hg38",
pipeline.meth = "bismarkCoverage",
proj.id = "S120167_POP_vs_DGN",
plot.covariates = c("Condition","TestVar"),
lowperc = 5,
lowcount = 10,
delta.meth = 10,
plot.height = 12, plot.width = 18,
plot.categorical.vars = c("Condition"),
plot.continuous.vars = c("miR-126"),cellw = 14, cellh = 8, fontnumsize = 7, fontsize = 7
)
# RUNNING GFPLOW VS GFPHIGH to get diff meth results and stats
source("MethylAnalysis_bulk_v2.R")
MethylAnalysis_bulk_v2(methcall.folder = "input/",
outfolder = "S120167_GFP_L_vs_GFP_H",
samplesheet = "SampleSheets_BisulphiteExp_Sets.xlsx",
sheetnum = 4,
do.subset = T,
chr.subset = "chr9",
start.subset = 136668000,
end.subset = 136671000,
idcol = "SampleID",
design.var = "Condition",
case.var = "GFP_L",
control.var = "GFP_H",
mincoverage = 10,
idstoremove = NULL,
assem = "hg38",
pipeline.meth = "bismarkCoverage",
proj.id = "S120167_GFP_L_vs_GFP_H",
plot.covariates = c("Condition"),
lowperc = 5,
lowcount = 10,
delta.meth = 10,
plot.height = 12, plot.width = 18,
plot.categorical.vars = c("Condition"),
plot.continuous.vars = c("miR-126"),
cellw = 14,
cellh = 8,
fontnumsize = 7,
fontsize = 7
)
# FIXING THE IMAGE
pctmeth_matrix <- readRDS(file = "S120167_Case_vs_Controls/S120167_POP_vs_DGN_CpG_percent_methylation_matrix_pheatmap.rds")
annotations <- readRDS("S120167_Case_vs_Controls/S120167_POP_vs_DGN_annotations_matrix_pheatmap.rds")
annotations_GFP_L_vs_GFP_H <- readRDS("S120167_GFP_L_vs_GFP_H/S120167_GFP_L_vs_GFP_H_annotations_matrix_pheatmap.rds")
# replacing diff meth and GFP_L vs GFP_H qvalue_r in POP vs DGN chart
annotations$anncol$qvalue_r <- annotations_GFP_L_vs_GFP_H$anncol$qvalue_r
annotations$anncol$meth.diff <- annotations_GFP_L_vs_GFP_H$anncol$meth.diff
annotations2 <- annotations
annotations2$anncolors$Condition <- c("#656161","#0b64a1","#df4041") # #006600: verdescuro, #b30101: rosso scuro, grigio-scuro: #929292, grigiochiaro: #c9c9c9
names(annotations2$anncolors$Condition) <- names(annotations$anncolors$Condition)
annotations2$anncolors$qvalue_r <- annotations_GFP_L_vs_GFP_H$anncolors$qvalue_r
annotations2$anncolors$meth.diff <- annotations_GFP_L_vs_GFP_H$anncolors$meth.diff
annotations2$annrow$`miR-126` <- NULL
# plot heatmap custom
png(filename = "S120167_Case_vs_Controls/S120167_GFP_Lvs_GFP_H_CpG_percent_methylation_matrix_pheatmap_CUSTOM_CpG_main.png",
width = 15, height = 5, units = "in", res = 300)
pheatmap(mat = pctmeth_matrix[,pctmeth_matrix_ADULTs_CpG],
#filename = "S10272_Case_vs_Controls/S10272_GFP_Lvs_GFP_H_CpG_percent_methylation_matrix_pheatmap_CUSTOM.png",
width = 15, height = 7,
na_col = "black",
cluster_cols = FALSE,
cluster_rows = TRUE,
annotation_row = annotations2$annrow,
cellwidth = 14,
cellheight = 13,
display_numbers = T,
fontsize = 10,
fontsize_number = 7,
number_format = "%.0f",
border_color = FALSE,
annotation_col = annotations2$anncol,
legend = F,
annotation_legend = TRUE,
annotation_names_col = T,
#labels_row = lrow,
#labels_col = lcol,
# gaps_row = c(23),
gaps_col = c(9,43),
annotation_colors = annotations2$anncolors,
color = c("#F5F5F5","#EEEEEE","#CCCCCC","#999999", "#666666","#333333","#000000"), breaks = c(0,10,20,30,50,70,90,100)
)
dev.off()
MethylAnalysis <- function(methcall.folder = NULL, # input folder with .cov files (bismark)
outfolder = NULL, # output folder (if not exist it will be created)
proj.id = "myproject", # project id to prefix in putput files
samplesheet = NULL,
sheetnum = 1,
design.var = "Disease", # variable to subset samplesheet
case.var = "Case", # 1 in treatment vector
control.var = "Control", # 0 in treatment vector
idcol = "SampleID",
mincoverage = 10,
lowperc = 5,
lowcount = 10,
assem="hg38",
do.subset = T,
chr.subset = "chr9",
start.subset = 136668000,
end.subset = 136671000,
pipeline.meth = "bismarkCoverage",
plot.covariates = c("Condition","Batch","MRD",
"Torelapse","Chemorefractory",
"Sex","CytoA","Tissue"),
idstoremove = NULL,
delta.meth = 10,
plot.categorical.vars = c("Condition","Batch","MRD","Torelapse","Chemorefractory","Sex","CytoA"),
plot.continuous.vars = c("miR-126","egfl7"),
plot.height = 9,
plot.width = 12,
cellw = 15,
cellh = 15,
fontnumsize = 5,
fontsize = 8
){
# load libraries
require(methylKit)
require(ggplot2)
require(reshape2)
require(plyr)
require(ggrepel)
library(GenomicRanges)
library(openxlsx)
library(pheatmap)
# check vars
if(is.null(methcall.folder)){
stop("Please set methcall.folder")
}
if(is.null(outfolder)){
stop("Please set outfolder")
}
if(is.null(samplesheet)){
stop("Please set samplesheet")
}
# Setup variables
infolder = paste0(methcall.folder, "/")
dir.create(infolder, showWarnings = F)
outfolder = paste0(outfolder, "/")
dir.create(outfolder, showWarnings = F)
qcfolder <- paste0(outfolder, "QC/")
dir.create(qcfolder, showWarnings = F)
# reading sample sheet with metadata
ssheet <- read.xlsx(samplesheet, sheet = sheetnum)
if(!is.null(idstoremove)){
ssheet <- subset.data.frame(x = ssheet, subset = !ssheet[[idcol]] %in% idstoremove)
}
# defining design vector according to variable
ssheet <- subset.data.frame(x = ssheet, subset = ssheet[[design.var]] %in% c(case.var, control.var))
d.vec <- ssheet[[design.var]]
d.vector <- ifelse(d.vec == case.var, 1, 0)
# initialzing coverage .cov files list
covs <- list()
sampleids <- as.list(ssheet[[idcol]])
names(sampleids) <- ssheet[[idcol]]
for (i in ssheet[[idcol]]) {
covs[[i]] <- paste0(infolder, "/", i, ".bismark.cov")
}
saveRDS(object = covs, file = "covs_object.rds")
# creating object
myobj <- methRead(location = covs,
sample.id = sampleids,
assembly = assem,
pipeline = pipeline.meth,
treatment = d.vector,
context="CpG",
mincov = mincoverage)
saveRDS(myobj, file = "Myobject.rds")
names(myobj) <- ssheet[[idcol]]
saveRDS(myobj, file = "Myobject_with_names.rds")
# subsetting if declared
if(isTRUE(do.subset)){
my.win = GRanges(seqnames = chr.subset, ranges = IRanges(start = start.subset, end = end.subset))
myobj <- selectByOverlap(myobj,my.win)
}
saveRDS(myobj, file = "Myobject_with_names_aftersubset.rds")
# filtering on minimum coverage
myobj <- filterByCoverage(methylObj = myobj, lo.count=lowcount, lo.perc = lowperc)
# Normalization
myobj <- normalizeCoverage(obj = myobj)
# saveRDS(object = myobj, file = "Initial_object.rds")
# Calculate basic stats and PCs
metricsfolder <- paste0(qcfolder, "Metrics/")
dir.create(path = metricsfolder, showWarnings = F)
for (id in ssheet[[idcol]]) {
png(filename = paste0(metricsfolder,proj.id,"_CpG_pct_methylation_sample_", id, ".png"),
width = 9, height = 6, units = "in", res = 96)
print(getMethylationStats(myobj[[id]],plot=TRUE,both.strands=FALSE))
dev.off()
png(filename = paste0(metricsfolder, proj.id,"_Coverage_stats_sample_", id, ".png"),
width = 9, height = 6, units = "in", res = 96)
print(getCoverageStats(myobj[[id]],plot=TRUE,both.strands=FALSE))
dev.off()
}
# create meth obj
#meth <- unite(object = myobj, destrand=FALSE)
meth <- unite(object = myobj, destrand=FALSE) # for debugging
saveRDS(meth, "savemeth.tmp.rds")
# Perform correlation
sink(paste0(qcfolder, proj.id, "_Correlations.txt"))
getCorrelation(meth,plot=FALSE)
sink()
if(length(ssheet[[idcol]]) < 15){
png(filename = paste0(qcfolder,proj.id, "_Correlations_pearson_pairwise.png"),
width = 9, height = 6, units = "in", res = 96)
print(getCorrelation(meth,plot=TRUE))
dev.off()
}
png(filename = paste0(qcfolder, proj.id, "_Clustering.png"),
width = 9, height = 6, units = "in", res = 96)
clusterSamples(meth, dist="euclidean", plot=TRUE, method = "ward.D2")
dev.off()
# Re-plotting PCs (custom chart)
# compute PCs and store in object
pca_compt <- PCASamples(meth, obj.return = T, screeplot = F)
# extract PCs components
pcafolder <- paste0(qcfolder, "PCA/")
dir.create(path = pcafolder, showWarnings = F)
pca_pc1_2 <- as.data.frame(x = pca_compt$x[,1:2])
for(myvars in plot.covariates){
pca_pc1_2$condition <- as.factor(ssheet[[myvars]])
png(filename = paste0(pcafolder, proj.id, "_PCA_",myvars,".png"),
width = 9, height = 6, units = "in",res=96)
print(ggplot(data = pca_pc1_2,
mapping = aes(x = PC1, y=PC2, col=condition, label=rownames(pca_pc1_2))) +
geom_point(size=3) + geom_text_repel(size=3) + ggtitle(label = "Principal component analysis", subtitle = myvars) +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5)) +
theme(plot.subtitle=element_text(size=12, hjust=0.5, face="italic", color="black")) +
theme(axis.title = element_text(size=12, hjust=0.5, face="bold", color="black")) +
theme(legend.text = element_text(size=8, hjust=0.5)) +
theme(legend.title = element_blank()) +
theme(axis.text = element_text(size=12, hjust=0.5, color="black")))
dev.off()
}
# retrieve and store % of methylation
perc.meth <- percMethylation(meth)
saveRDS(perc.meth,"pctmethly.rds")
base::rownames(perc.meth) <- paste0(meth$chr, "_", meth$start)
# Perform diff methylation
myDiff=calculateDiffMeth(meth)
write.table(myDiff,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
difftest <- read.table(paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), header = T)
difftest$comparison <- proj.id
difftest$qvalue_r <- as.character(cut(x = difftest$qvalue,
breaks = c(-1, 1e-100, 1e-10, 1e-02, 1),
labels = c("***","**","*","ns")))
myindex <- abs(difftest$meth.diff) < delta.meth
difftest$meth.diff <- abs(difftest$meth.diff)
difftest$qvalue_r[myindex] <- "ns"
write.table(difftest,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
write.table(difftest,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
# Adding color list and annotations
library(RColorBrewer)
color_list <- list()
annrows <- NULL
anncols <- subset.data.frame(difftest, select = c("qvalue_r","meth.diff"))
base::rownames(anncols) <- difftest$start
rows.annot.vars.cat = plot.categorical.vars
rows.annot.vars.con = plot.continuous.vars
if(!is.null(rows.annot.vars.cat) | !is.null(rows.annot.vars.con)){
rownames(ssheet) <- ssheet$SampleID
annrows <- subset.data.frame(x = ssheet, select = c(rows.annot.vars.cat, rows.annot.vars.con))
concolors <- RColorBrewer::brewer.pal(n = 9, name = "Set1")
catcolors <- NULL
for (varcon in 1:length(rows.annot.vars.con)) {
color_list[[rows.annot.vars.con[varcon]]] <- colorRampPalette(c("lightgrey", concolors[varcon]))(10)
}
for (varcat in 1:length(rows.annot.vars.cat)) {
ncolor <- 1
for (val in unique(ssheet[[rows.annot.vars.cat[varcat]]])[order(unique(ssheet[[rows.annot.vars.cat[varcat]]]))]){
if(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])) > 9){
catcolors <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name = "Set3"))(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])))
}
else{
catcolors <- RColorBrewer::brewer.pal(n = 9, name = "Set3")
}
color_list[[rows.annot.vars.cat[varcat]]][[val]] <- catcolors[ncolor]
ncolor <- ncolor + 1
}
}
}
color_list[["qvalue_r"]] = c("*" = "#6497b1", "**" = "#03396c", "***" = "#011f4b", ns= "#c4cacf")
color_list[["meth.diff"]] = colorRampPalette(brewer.pal(n = 11, "Reds"))(100)
color_list[["miR-126"]] <- brewer.pal(n = 9, "PuRd")
# Heatmap
pctmeth_matrix <- t(perc.meth)
base::colnames(pctmeth_matrix) <- gsub(x = base::colnames(pctmeth_matrix), pattern = "chr[0-9]+_", replacement = "")
pheatmap(mat = pctmeth_matrix, main = gsub(x = proj.id, pattern = "_", replacement = " "),
filename = paste0(outfolder, proj.id,"_CpG_percent_methylation_matrix_pheatmap.pdf"), width = plot.width, height = plot.height,
na_col = "black",
cluster_cols = FALSE,
cluster_rows = TRUE,
annotation_row = annrows,
cellwidth = cellw,
cellheight = cellh,
display_numbers = T,
fontsize = fontsize,
fontsize_number = fontnumsize,
number_format = "%.0f",
#border_color = "#CBBEB5",
annotation_col = anncols,
#labels_row = lrow,
#labels_col = lcol,
#gaps_row = gaps.row,
gaps_col = c(8),
annotation_colors = color_list,
color = c("#F5F5F5","#EEEEEE","#CCCCCC","#999999", "#666666","#333333","#000000"), breaks = c(0,10,20,30,50,70,90,100)
)
saveRDS(object = pctmeth_matrix, paste0(outfolder, proj.id,"_CpG_percent_methylation_matrix_pheatmap.rds"))
saveRDS(object = list(anncol = anncols, annrow = annrows, anncolors = color_list), paste0(outfolder, proj.id,"_annotations_matrix_pheatmap.rds"))
saveRDS(object = difftest, paste0(outfolder, proj.id,"_CpG_differential_methylation.rds"))
saveRDS(object = perc.meth, paste0(outfolder, proj.id,"_CpG_percent_methylation.rds"))
write.table(x = perc.meth, file = paste0(outfolder, proj.id,"_CpG_percent_methylation.txt"))
write.table(x = difftest, file = paste0(outfolder, proj.id,"_CpG_differential_methylation.txt"))
saveRDS(myobj, file = paste0(outfolder,proj.id,"_methylkit.rds"))
saveRDS(myobj, file = paste0(outfolder,proj.id,"_methylkit_meth.rds"))
}
\ No newline at end of file
MethylAnalysis_bulk <- function(methcall.folder = NULL, # input folder with .cov files (bismark)
outfolder = NULL, # output folder (if not exist it will be created)
proj.id = "myproject", # project id to prefix in putput files
samplesheet = NULL,
sheetnum = 1,
design.var = "Disease", # variable to subset samplesheet
case.var = "Case", # 1 in treatment vector
control.var = "Control", # 0 in treatment vector
idcol = "SampleID",
mincoverage = 10,
lowperc = 5,
lowcount = 10,
assem="hg38",
do.subset = T,
chr.subset = "chr9",
start.subset = 136668000,
end.subset = 136671000,
pipeline.meth = "bismarkCoverage",
plot.covariates = c("Condition","Batch","MRD",
"Torelapse","Chemorefractory",
"Sex","CytoA","Tissue"),
idstoremove = NULL,
delta.meth = 10,
plot.categorical.vars = c("Condition","Batch","MRD","Torelapse","Chemorefractory","Sex","CytoA"),
plot.continuous.vars = c("miR-126","egfl7"),
plot.height = 9,
plot.width = 12,
cellw = 15,
cellh = 15,
fontnumsize = 5,
fontsize = 8
){
# load libraries
require(methylKit)
require(ggplot2)
require(reshape2)
require(plyr)
require(ggrepel)
library(GenomicRanges)
library(openxlsx)
library(pheatmap)
# check vars
if(is.null(methcall.folder)){
stop("Please set methcall.folder")
}
if(is.null(outfolder)){
stop("Please set outfolder")
}
if(is.null(samplesheet)){
stop("Please set samplesheet")
}
# Setup variables
infolder = paste0(methcall.folder, "/")
dir.create(infolder, showWarnings = F)
outfolder = paste0(outfolder, "/")
dir.create(outfolder, showWarnings = F)
qcfolder <- paste0(outfolder, "QC/")
dir.create(qcfolder, showWarnings = F)
# reading sample sheet with metadata
ssheet <- read.xlsx(samplesheet, sheet = sheetnum)
if(!is.null(idstoremove)){
ssheet <- subset.data.frame(x = ssheet, subset = !ssheet[[idcol]] %in% idstoremove)
}
# defining design vector according to variable
ssheet <- subset.data.frame(x = ssheet, subset = ssheet[[design.var]] %in% c(case.var, control.var))
d.vec <- ssheet[[design.var]]
d.vector <- ifelse(d.vec == case.var, 1, 0)
# initialzing coverage .cov files list
covs <- list()
sampleids <- as.list(ssheet[[idcol]])
names(sampleids) <- ssheet[[idcol]]
for (i in ssheet[[idcol]]) {
covs[[i]] <- paste0(infolder, "/", i, ".bismark.cov")
}
saveRDS(object = covs, file = "covs_object.rds")
# creating object
myobj <- methRead(location = covs,
sample.id = sampleids,
assembly = assem,
pipeline = pipeline.meth,
treatment = d.vector,
context="CpG",
mincov = mincoverage)
saveRDS(myobj, file = "Myobject.rds")
names(myobj) <- ssheet[[idcol]]
saveRDS(myobj, file = "Myobject_with_names.rds")
# subsetting if declared
if(isTRUE(do.subset)){
my.win = GRanges(seqnames = chr.subset, ranges = IRanges(start = start.subset, end = end.subset))
myobj <- selectByOverlap(myobj,my.win)
}
saveRDS(myobj, file = "Myobject_with_names_aftersubset.rds")
# filtering on minimum coverage
myobj <- filterByCoverage(methylObj = myobj, lo.count=lowcount, lo.perc = lowperc)
# Normalization
myobj <- normalizeCoverage(obj = myobj)
# saveRDS(object = myobj, file = "Initial_object.rds")
# Calculate basic stats and PCs
metricsfolder <- paste0(qcfolder, "Metrics/")
dir.create(path = metricsfolder, showWarnings = F)
for (id in ssheet[[idcol]]) {
png(filename = paste0(metricsfolder,proj.id,"_CpG_pct_methylation_sample_", id, ".png"),
width = 9, height = 6, units = "in", res = 96)
print(getMethylationStats(myobj[[id]],plot=TRUE,both.strands=FALSE))
dev.off()
png(filename = paste0(metricsfolder, proj.id,"_Coverage_stats_sample_", id, ".png"),
width = 9, height = 6, units = "in", res = 96)
print(getCoverageStats(myobj[[id]],plot=TRUE,both.strands=FALSE))
dev.off()
}
# create meth obj
#meth <- unite(object = myobj, destrand=FALSE)
meth <- unite(object = myobj, destrand=FALSE) # for debugging
saveRDS(meth, "savemeth.tmp.rds")
# Perform correlation
sink(paste0(qcfolder, proj.id, "_Correlations.txt"))
getCorrelation(meth,plot=FALSE)
sink()
if(length(ssheet[[idcol]]) < 15){
png(filename = paste0(qcfolder,proj.id, "_Correlations_pearson_pairwise.png"),
width = 9, height = 6, units = "in", res = 96)
print(getCorrelation(meth,plot=TRUE))
dev.off()
}
png(filename = paste0(qcfolder, proj.id, "_Clustering.png"),
width = 9, height = 6, units = "in", res = 96)
clusterSamples(meth, dist="euclidean", plot=TRUE, method = "ward.D2")
dev.off()
# Re-plotting PCs (custom chart)
# compute PCs and store in object
pca_compt <- PCASamples(meth, obj.return = T, screeplot = F)
# extract PCs components
pcafolder <- paste0(qcfolder, "PCA/")
dir.create(path = pcafolder, showWarnings = F)
pca_pc1_2 <- as.data.frame(x = pca_compt$x[,1:2])
for(myvars in plot.covariates){
pca_pc1_2$condition <- as.factor(ssheet[[myvars]])
png(filename = paste0(pcafolder, proj.id, "_PCA_",myvars,".png"),
width = 9, height = 6, units = "in",res=96)
print(ggplot(data = pca_pc1_2,
mapping = aes(x = PC1, y=PC2, col=condition, label=rownames(pca_pc1_2))) +
geom_point(size=3) + geom_text_repel(size=3) + ggtitle(label = "Principal component analysis", subtitle = myvars) +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5)) +
theme(plot.subtitle=element_text(size=12, hjust=0.5, face="italic", color="black")) +
theme(axis.title = element_text(size=12, hjust=0.5, face="bold", color="black")) +
theme(legend.text = element_text(size=8, hjust=0.5)) +
theme(legend.title = element_blank()) +
theme(axis.text = element_text(size=12, hjust=0.5, color="black")))
dev.off()
}
# retrieve and store % of methylation
perc.meth <- percMethylation(meth)
saveRDS(perc.meth,"pctmethly.rds")
base::rownames(perc.meth) <- paste0(meth$chr, "_", meth$start)
# Perform diff methylation
myDiff=calculateDiffMeth(meth)
write.table(myDiff,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
difftest <- read.table(paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), header = T)
difftest$comparison <- proj.id
difftest$qvalue_r <- as.character(cut(x = difftest$qvalue,
breaks = c(-1, 1e-100, 1e-10, 1e-02, 1),
labels = c("***","**","*","ns")))
myindex <- abs(difftest$meth.diff) < delta.meth
difftest$meth.diff <- abs(difftest$meth.diff)
difftest$qvalue_r[myindex] <- "ns"
write.table(difftest,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
write.table(difftest,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
# Adding color list and annotations
library(RColorBrewer)
color_list <- list()
annrows <- NULL
anncols <- subset.data.frame(difftest, select = c("qvalue_r","meth.diff"))
base::rownames(anncols) <- difftest$start
rows.annot.vars.cat = plot.categorical.vars
rows.annot.vars.con = plot.continuous.vars
if(!is.null(rows.annot.vars.cat) | !is.null(rows.annot.vars.con)){
rownames(ssheet) <- ssheet$SampleID
annrows <- subset.data.frame(x = ssheet, select = c(rows.annot.vars.cat, rows.annot.vars.con))
concolors <- RColorBrewer::brewer.pal(n = 9, name = "Set1")
catcolors <- NULL
for (varcon in 1:length(rows.annot.vars.con)) {
color_list[[rows.annot.vars.con[varcon]]] <- colorRampPalette(c("lightgrey", concolors[varcon]))(10)
}
for (varcat in 1:length(rows.annot.vars.cat)) {
ncolor <- 1
for (val in unique(ssheet[[rows.annot.vars.cat[varcat]]])[order(unique(ssheet[[rows.annot.vars.cat[varcat]]]))]){
if(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])) > 9){
catcolors <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name = "Set3"))(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])))
}
else{
catcolors <- RColorBrewer::brewer.pal(n = 9, name = "Set3")
}
color_list[[rows.annot.vars.cat[varcat]]][[val]] <- catcolors[ncolor]
ncolor <- ncolor + 1
}
}
}
color_list[["qvalue_r"]] = c("*" = "#6497b1", "**" = "#03396c", "***" = "#011f4b", ns= "#c4cacf")
color_list[["meth.diff"]] = colorRampPalette(brewer.pal(n = 11, "Reds"))(100)
color_list[["miR-126"]] <- brewer.pal(n = 9, "PuRd")
# Heatmap
pctmeth_matrix <- t(perc.meth)
base::colnames(pctmeth_matrix) <- gsub(x = base::colnames(pctmeth_matrix), pattern = "chr[0-9]+_", replacement = "")
pheatmap(mat = pctmeth_matrix, main = gsub(x = proj.id, pattern = "_", replacement = " "),
filename = paste0(outfolder, proj.id,"_CpG_percent_methylation_matrix_pheatmap.pdf"), width = plot.width, height = plot.height,
na_col = "black",
cluster_cols = FALSE,
cluster_rows = TRUE,
annotation_row = annrows,
cellwidth = cellw,
cellheight = cellh,
display_numbers = T,
fontsize = fontsize,
fontsize_number = fontnumsize,
number_format = "%.0f",
#border_color = "#CBBEB5",
annotation_col = anncols,
#labels_row = lrow,
#labels_col = lcol,
#gaps_row = gaps.row,
gaps_col = c(8),
annotation_colors = color_list,
color = c("#F5F5F5","#EEEEEE","#CCCCCC","#999999", "#666666","#333333","#000000"), breaks = c(0,10,20,30,50,70,90,100)
)
saveRDS(object = pctmeth_matrix, paste0(outfolder, proj.id,"_CpG_percent_methylation_matrix_pheatmap.rds"))
saveRDS(object = list(anncol = anncols, annrow = annrows, anncolors = color_list), paste0(outfolder, proj.id,"_annotations_matrix_pheatmap.rds"))
saveRDS(object = difftest, paste0(outfolder, proj.id,"_CpG_differential_methylation.rds"))
saveRDS(object = perc.meth, paste0(outfolder, proj.id,"_CpG_percent_methylation.rds"))
write.table(x = perc.meth, file = paste0(outfolder, proj.id,"_CpG_percent_methylation.txt"))
write.table(x = difftest, file = paste0(outfolder, proj.id,"_CpG_differential_methylation.txt"))
saveRDS(myobj, file = paste0(outfolder,proj.id,"_methylkit.rds"))
saveRDS(myobj, file = paste0(outfolder,proj.id,"_methylkit_meth.rds"))
}
\ No newline at end of file
MethylAnalysis_bulk_v2 <- function(methcall.folder = NULL, # input folder with .cov files (bismark)
outfolder = NULL, # output folder (if not exist it will be created)
proj.id = "myproject", # project id to prefix in putput files
samplesheet = NULL,
sheetnum = 1,
design.var = "Disease", # variable to subset samplesheet
case.var = "Case", # 1 in treatment vector
control.var = "Control", # 0 in treatment vector
idcol = "SampleID",
mincoverage = 10,
lowperc = 5,
lowcount = 10,
assem="hg38",
do.subset = T,
chr.subset = "chr9",
start.subset = 136668000,
end.subset = 136671000,
pipeline.meth = "bismarkCoverage",
plot.covariates = c("Condition","Batch","MRD",
"Torelapse","Chemorefractory",
"Sex","CytoA","Tissue"),
idstoremove = NULL,
delta.meth = 10,
plot.categorical.vars = c("Condition","Batch","MRD","Torelapse","Chemorefractory","Sex","CytoA"),
plot.continuous.vars = c("miR-126","egfl7"),
plot.height = 9,
plot.width = 12,
cellw = 15,
cellh = 15,
fontnumsize = 5,
fontsize = 8
){
# load libraries
require(methylKit)
require(ggplot2)
require(reshape2)
require(plyr)
require(ggrepel)
library(GenomicRanges)
library(openxlsx)
library(pheatmap)
# check vars
if(is.null(methcall.folder)){
stop("Please set methcall.folder")
}
if(is.null(outfolder)){
stop("Please set outfolder")
}
if(is.null(samplesheet)){
stop("Please set samplesheet")
}
# Setup variables
infolder = paste0(methcall.folder, "/")
dir.create(infolder, showWarnings = F)
outfolder = paste0(outfolder, "/")
dir.create(outfolder, showWarnings = F)
qcfolder <- paste0(outfolder, "QC/")
dir.create(qcfolder, showWarnings = F)
# reading sample sheet with metadata
ssheet <- read.xlsx(samplesheet, sheet = sheetnum)
if(!is.null(idstoremove)){
ssheet <- subset.data.frame(x = ssheet, subset = !ssheet[[idcol]] %in% idstoremove)
}
# defining design vector according to variable
ssheet <- subset.data.frame(x = ssheet, subset = ssheet[[design.var]] %in% c(case.var, control.var))
d.vec <- ssheet[[design.var]]
d.vector <- ifelse(d.vec == case.var, 1, 0)
# initialzing coverage .cov files list
covs <- list()
sampleids <- as.list(ssheet[[idcol]])
names(sampleids) <- ssheet[[idcol]]
for (i in ssheet[[idcol]]) {
covs[[i]] <- paste0(infolder, "/", i, ".bismark.cov")
}
saveRDS(object = covs, file = "covs_object.rds")
# creating object
myobj <- methRead(location = covs,
sample.id = sampleids,
assembly = assem,
pipeline = pipeline.meth,
treatment = d.vector,
context="CpG",
mincov = mincoverage)
saveRDS(myobj, file = "Myobject.rds")
names(myobj) <- ssheet[[idcol]]
saveRDS(myobj, file = "Myobject_with_names.rds")
# subsetting if declared
if(isTRUE(do.subset)){
my.win = GRanges(seqnames = chr.subset, ranges = IRanges(start = start.subset, end = end.subset))
myobj <- selectByOverlap(myobj,my.win)
}
saveRDS(myobj, file = "Myobject_with_names_aftersubset.rds")
# filtering on minimum coverage
myobj <- filterByCoverage(methylObj = myobj, lo.count=lowcount, lo.perc = lowperc)
# Normalization
myobj <- normalizeCoverage(obj = myobj)
# saveRDS(object = myobj, file = "Initial_object.rds")
# Calculate basic stats and PCs
metricsfolder <- paste0(qcfolder, "Metrics/")
dir.create(path = metricsfolder, showWarnings = F)
for (id in ssheet[[idcol]]) {
png(filename = paste0(metricsfolder,proj.id,"_CpG_pct_methylation_sample_", id, ".png"),
width = 9, height = 6, units = "in", res = 96)
print(getMethylationStats(myobj[[id]],plot=TRUE,both.strands=FALSE))
dev.off()
png(filename = paste0(metricsfolder, proj.id,"_Coverage_stats_sample_", id, ".png"),
width = 9, height = 6, units = "in", res = 96)
print(getCoverageStats(myobj[[id]],plot=TRUE,both.strands=FALSE))
dev.off()
}
# create meth obj
#meth <- unite(object = myobj, destrand=FALSE)
meth <- unite(object = myobj, destrand=FALSE) # for debugging
saveRDS(meth, "savemeth.tmp.rds")
# Perform correlation
sink(paste0(qcfolder, proj.id, "_Correlations.txt"))
getCorrelation(meth,plot=FALSE)
sink()
if(length(ssheet[[idcol]]) < 15){
png(filename = paste0(qcfolder,proj.id, "_Correlations_pearson_pairwise.png"),
width = 9, height = 6, units = "in", res = 96)
print(getCorrelation(meth,plot=TRUE))
dev.off()
}
png(filename = paste0(qcfolder, proj.id, "_Clustering.png"),
width = 9, height = 6, units = "in", res = 96)
clusterSamples(meth, dist="euclidean", plot=TRUE, method = "ward.D2")
dev.off()
# Re-plotting PCs (custom chart)
# compute PCs and store in object
pca_compt <- PCASamples(meth, obj.return = T, screeplot = F)
# extract PCs components
pcafolder <- paste0(qcfolder, "PCA/")
dir.create(path = pcafolder, showWarnings = F)
pca_pc1_2 <- as.data.frame(x = pca_compt$x[,1:2])
for(myvars in plot.covariates){
pca_pc1_2$condition <- as.factor(ssheet[[myvars]])
png(filename = paste0(pcafolder, proj.id, "_PCA_",myvars,".png"),
width = 9, height = 6, units = "in",res=96)
print(ggplot(data = pca_pc1_2,
mapping = aes(x = PC1, y=PC2, col=condition, label=rownames(pca_pc1_2))) +
geom_point(size=3) + geom_text_repel(size=3) + ggtitle(label = "Principal component analysis", subtitle = myvars) +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5)) +
theme(plot.subtitle=element_text(size=12, hjust=0.5, face="italic", color="black")) +
theme(axis.title = element_text(size=12, hjust=0.5, face="bold", color="black")) +
theme(legend.text = element_text(size=8, hjust=0.5)) +
theme(legend.title = element_blank()) +
theme(axis.text = element_text(size=12, hjust=0.5, color="black")))
dev.off()
}
# retrieve and store % of methylation
perc.meth <- percMethylation(meth)
saveRDS(perc.meth,"pctmethly.rds")
base::rownames(perc.meth) <- paste0(meth$chr, "_", meth$start)
# Perform diff methylation
myDiff=calculateDiffMeth(meth)
write.table(myDiff,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
difftest <- read.table(paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), header = T)
difftest$comparison <- proj.id
difftest$qvalue_r <- as.character(cut(x = difftest$qvalue,
breaks = c(-1, 1e-100, 1e-10, 1e-02, 1),
labels = c("***","**","*","ns")))
myindex <- abs(difftest$meth.diff) < delta.meth
difftest$meth.diff <- abs(difftest$meth.diff)
difftest$qvalue_r[myindex] <- "ns"
write.table(difftest,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
# Adding color list and annotations
library(RColorBrewer)
color_list <- list()
annrows <- NULL
anncols <- subset.data.frame(difftest, select = c("qvalue_r","meth.diff"))
base::rownames(anncols) <- difftest$start
rows.annot.vars.cat = plot.categorical.vars
rows.annot.vars.con = plot.continuous.vars
# if(!is.null(rows.annot.vars.cat) | !is.null(rows.annot.vars.con)){
# rownames(ssheet) <- ssheet$SampleID
# annrows <- subset.data.frame(x = ssheet, select = c(rows.annot.vars.cat, rows.annot.vars.con))
# concolors <- RColorBrewer::brewer.pal(n = 9, name = "Set1")
# catcolors <- NULL
# for (varcon in 1:length(rows.annot.vars.con)) {
# color_list[[rows.annot.vars.con[varcon]]] <- colorRampPalette(c("lightgrey", concolors[varcon]))(10)
# }
# for (varcat in 1:length(rows.annot.vars.cat)) {
# ncolor <- 1
# for (val in unique(ssheet[[rows.annot.vars.cat[varcat]]])[order(unique(ssheet[[rows.annot.vars.cat[varcat]]]))]){
# if(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])) > 9){
# catcolors <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name = "Set3"))(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])))
# }
# else{
# catcolors <- RColorBrewer::brewer.pal(n = 9, name = "Set3")
# }
# color_list[[rows.annot.vars.cat[varcat]]][[val]] <- catcolors[ncolor]
# ncolor <- ncolor + 1
# }
# }
# }
color_list[["qvalue_r"]] = c("*" = "#6497b1", "**" = "#03396c", "***" = "#011f4b", ns= "#c4cacf")
color_list[["meth.diff"]] = colorRampPalette(brewer.pal(n = 11, "Reds"))(100)
color_list[["miR-126"]] <- brewer.pal(n = 9, "PuRd")
# Heatmap
pctmeth_matrix <- t(perc.meth)
base::colnames(pctmeth_matrix) <- gsub(x = base::colnames(pctmeth_matrix), pattern = "chr[0-9]+_", replacement = "")
pheatmap(mat = pctmeth_matrix, main = gsub(x = proj.id, pattern = "_", replacement = " "),
filename = paste0(outfolder, proj.id,"_CpG_percent_methylation_matrix_pheatmap.pdf"), width = plot.width, height = plot.height,
na_col = "black",
cluster_cols = FALSE,
cluster_rows = TRUE,
annotation_row = annrows,
cellwidth = cellw,
cellheight = cellh,
display_numbers = T,
fontsize = fontsize,
fontsize_number = fontnumsize,
number_format = "%.0f",
#border_color = "#CBBEB5",
annotation_col = anncols,
#labels_row = lrow,
#labels_col = lcol,
#gaps_row = gaps.row,
gaps_col = c(8),
annotation_colors = color_list,
color = c("#F5F5F5","#EEEEEE","#CCCCCC","#999999", "#666666","#333333","#000000"), breaks = c(0,10,20,30,50,70,90,100)
)
saveRDS(object = pctmeth_matrix, paste0(outfolder, proj.id,"_CpG_percent_methylation_matrix_pheatmap.rds"))
saveRDS(object = list(anncol = anncols, annrow = annrows, anncolors = color_list), paste0(outfolder, proj.id,"_annotations_matrix_pheatmap.rds"))
saveRDS(object = difftest, paste0(outfolder, proj.id,"_CpG_differential_methylation.rds"))
saveRDS(object = perc.meth, paste0(outfolder, proj.id,"_CpG_percent_methylation.rds"))
write.table(x = perc.meth, file = paste0(outfolder, proj.id,"_CpG_percent_methylation.txt"))
write.table(x = difftest, file = paste0(outfolder, proj.id,"_CpG_differential_methylation.txt"))
saveRDS(myobj, file = paste0(outfolder,proj.id,"_methylkit.rds"))
saveRDS(myobj, file = paste0(outfolder,proj.id,"_methylkit_meth.rds"))
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment