Commit 8891fc61 authored by Matteo Barcella's avatar Matteo Barcella
Browse files

Adding scripts for differential methylation analysis and heatmap in manuscript

parent 0c684c53
#CpGs from position 136669244 to 13666449 (the entire fragment of the intron 4 putative transcription starting site),
#CpGs from position 136670325 to 136670386 (5’ of the intron 7) and
#CpGs from position 136670827 to 136670945 (3’ of the intron 7).
# load data:
source("MethylAnalysis.R")
MethylAnalysis(methcall.folder = "input/",
outfolder = "Adults_vs_Prog",
samplesheet = "SampleSheets_BisulphiteExp_Sets.xlsx",
sheetnum = 2,
do.subset = T,
chr.subset = "chr9",
start.subset = 136668000,
end.subset = 136671000,
idcol = "SampleID",
design.var = "TestVar",
case.var = "Case",
control.var = "Controls",
mincoverage = 10,
idstoremove = c("S150310"),
assem = "hg38",
pipeline.meth = "bismarkCoverage",
proj.id = "Adults_vs_Prog_Case_vs_Control",
plot.covariates = c("Condition","MRD","Cytogenetics","TestVar"),
lowperc = 5,
lowcount = 10,
delta.meth = 10,
plot.height = 12, plot.width = 18,
plot.categorical.vars = c("Condition","Cytogenetics"),
plot.continuous.vars = c("miR-126"),cellw = 14, cellh = 8, fontnumsize = 7, fontsize = 7
)
library(openxlsx)
library(pheatmap)
library(RColorBrewer)
# load data pct methylation
pctmeth_matrix <- readRDS(file = "Adults_vs_Prog/Adults_vs_Prog_Case_vs_Control_CpG_percent_methylation_matrix_pheatmap.rds")
# fixing colors
annotations <- readRDS("Adults_vs_Prog/Adults_vs_Prog_Case_vs_Control_annotations_matrix_pheatmap.rds")
annotations2 <- annotations
annotations2$anncolors$Condition <- c("#006600","#929292","#c9c9c9","#b30101") # #006600: verdescuro, #b30101: rosso scuro, grigio-scuro: #929292, grigiochiaro: #c9c9c9
names(annotations2$anncolors$Condition) <- names(annotations$anncolors$Condition)
annotations2$anncolors$Cytogenetics <- c("#0080cc","#f6a145","white") # https://www.color-hex.com/color-palette/96440
names(annotations2$anncolors$Cytogenetics) <- names(annotations$anncolors$Cytogenetics)
png(filename = "Adults_vs_Prog/Adults_vs_Prog_Case_vs_Control_CpG_percent_methylation_matrix_pheatmap_CUSTOM.png",
width = 15, height = 7, units = "in", res = 300)
pheatmap(mat = pctmeth_matrix,
width = 15, height = 7,
na_col = "black",
cluster_cols = FALSE,
cluster_rows = TRUE,
annotation_row = annotations2$annrow,
cellwidth = 14,
cellheight = 13,
display_numbers = T,
fontsize = 10,
fontsize_number = 7,
number_format = "%.0f",
border_color = FALSE,
annotation_col = annotations2$anncol,
legend = F,
annotation_legend = TRUE,
annotation_names_col = T,
#labels_row = lrow,
#labels_col = lcol,
# gaps_row = c(23),
gaps_col = c(9,43),
annotation_colors = annotations2$anncolors,
color = c("#F5F5F5","#EEEEEE","#CCCCCC","#999999", "#666666","#333333","#000000"), breaks = c(0,10,20,30,50,70,90,100)
)
dev.off()
#################
# Disease 12-27 #
#################
source("MethylAnalysis_bulk.R")
MethylAnalysis_bulk(methcall.folder = "input/",
outfolder = "S120167_Case_vs_Controls",
samplesheet = "SampleSheets_BisulphiteExp_Sets.xlsx",
sheetnum = 4,
do.subset = T,
chr.subset = "chr9",
start.subset = 136668000,
end.subset = 136671000,
idcol = "SampleID",
design.var = "TestVar",
case.var = "POP",
control.var = "DGN",
mincoverage = 10,
idstoremove = NULL,
assem = "hg38",
pipeline.meth = "bismarkCoverage",
proj.id = "S120167_POP_vs_DGN",
plot.covariates = c("Condition","TestVar"),
lowperc = 5,
lowcount = 10,
delta.meth = 10,
plot.height = 12, plot.width = 18,
plot.categorical.vars = c("Condition"),
plot.continuous.vars = c("miR-126"),cellw = 14, cellh = 8, fontnumsize = 7, fontsize = 7
)
# RUNNING GFPLOW VS GFPHIGH to get diff meth results and stats
source("MethylAnalysis_bulk_v2.R")
MethylAnalysis_bulk_v2(methcall.folder = "input/",
outfolder = "S120167_GFP_L_vs_GFP_H",
samplesheet = "SampleSheets_BisulphiteExp_Sets.xlsx",
sheetnum = 4,
do.subset = T,
chr.subset = "chr9",
start.subset = 136668000,
end.subset = 136671000,
idcol = "SampleID",
design.var = "Condition",
case.var = "GFP_L",
control.var = "GFP_H",
mincoverage = 10,
idstoremove = NULL,
assem = "hg38",
pipeline.meth = "bismarkCoverage",
proj.id = "S120167_GFP_L_vs_GFP_H",
plot.covariates = c("Condition"),
lowperc = 5,
lowcount = 10,
delta.meth = 10,
plot.height = 12, plot.width = 18,
plot.categorical.vars = c("Condition"),
plot.continuous.vars = c("miR-126"),
cellw = 14,
cellh = 8,
fontnumsize = 7,
fontsize = 7
)
# FIXING THE IMAGE
pctmeth_matrix <- readRDS(file = "S120167_Case_vs_Controls/S120167_POP_vs_DGN_CpG_percent_methylation_matrix_pheatmap.rds")
annotations <- readRDS("S120167_Case_vs_Controls/S120167_POP_vs_DGN_annotations_matrix_pheatmap.rds")
annotations_GFP_L_vs_GFP_H <- readRDS("S120167_GFP_L_vs_GFP_H/S120167_GFP_L_vs_GFP_H_annotations_matrix_pheatmap.rds")
# replacing diff meth and GFP_L vs GFP_H qvalue_r in POP vs DGN chart
annotations$anncol$qvalue_r <- annotations_GFP_L_vs_GFP_H$anncol$qvalue_r
annotations$anncol$meth.diff <- annotations_GFP_L_vs_GFP_H$anncol$meth.diff
annotations2 <- annotations
annotations2$anncolors$Condition <- c("#656161","#0b64a1","#df4041") # #006600: verdescuro, #b30101: rosso scuro, grigio-scuro: #929292, grigiochiaro: #c9c9c9
names(annotations2$anncolors$Condition) <- names(annotations$anncolors$Condition)
annotations2$anncolors$qvalue_r <- annotations_GFP_L_vs_GFP_H$anncolors$qvalue_r
annotations2$anncolors$meth.diff <- annotations_GFP_L_vs_GFP_H$anncolors$meth.diff
annotations2$annrow$`miR-126` <- NULL
# plot heatmap custom
png(filename = "S120167_Case_vs_Controls/S120167_GFP_Lvs_GFP_H_CpG_percent_methylation_matrix_pheatmap_CUSTOM_CpG_main.png",
width = 15, height = 5, units = "in", res = 300)
pheatmap(mat = pctmeth_matrix[,pctmeth_matrix_ADULTs_CpG],
#filename = "S10272_Case_vs_Controls/S10272_GFP_Lvs_GFP_H_CpG_percent_methylation_matrix_pheatmap_CUSTOM.png",
width = 15, height = 7,
na_col = "black",
cluster_cols = FALSE,
cluster_rows = TRUE,
annotation_row = annotations2$annrow,
cellwidth = 14,
cellheight = 13,
display_numbers = T,
fontsize = 10,
fontsize_number = 7,
number_format = "%.0f",
border_color = FALSE,
annotation_col = annotations2$anncol,
legend = F,
annotation_legend = TRUE,
annotation_names_col = T,
#labels_row = lrow,
#labels_col = lcol,
# gaps_row = c(23),
gaps_col = c(9,43),
annotation_colors = annotations2$anncolors,
color = c("#F5F5F5","#EEEEEE","#CCCCCC","#999999", "#666666","#333333","#000000"), breaks = c(0,10,20,30,50,70,90,100)
)
dev.off()
MethylAnalysis <- function(methcall.folder = NULL, # input folder with .cov files (bismark)
outfolder = NULL, # output folder (if not exist it will be created)
proj.id = "myproject", # project id to prefix in putput files
samplesheet = NULL,
sheetnum = 1,
design.var = "Disease", # variable to subset samplesheet
case.var = "Case", # 1 in treatment vector
control.var = "Control", # 0 in treatment vector
idcol = "SampleID",
mincoverage = 10,
lowperc = 5,
lowcount = 10,
assem="hg38",
do.subset = T,
chr.subset = "chr9",
start.subset = 136668000,
end.subset = 136671000,
pipeline.meth = "bismarkCoverage",
plot.covariates = c("Condition","Batch","MRD",
"Torelapse","Chemorefractory",
"Sex","CytoA","Tissue"),
idstoremove = NULL,
delta.meth = 10,
plot.categorical.vars = c("Condition","Batch","MRD","Torelapse","Chemorefractory","Sex","CytoA"),
plot.continuous.vars = c("miR-126","egfl7"),
plot.height = 9,
plot.width = 12,
cellw = 15,
cellh = 15,
fontnumsize = 5,
fontsize = 8
){
# load libraries
require(methylKit)
require(ggplot2)
require(reshape2)
require(plyr)
require(ggrepel)
library(GenomicRanges)
library(openxlsx)
library(pheatmap)
# check vars
if(is.null(methcall.folder)){
stop("Please set methcall.folder")
}
if(is.null(outfolder)){
stop("Please set outfolder")
}
if(is.null(samplesheet)){
stop("Please set samplesheet")
}
# Setup variables
infolder = paste0(methcall.folder, "/")
dir.create(infolder, showWarnings = F)
outfolder = paste0(outfolder, "/")
dir.create(outfolder, showWarnings = F)
qcfolder <- paste0(outfolder, "QC/")
dir.create(qcfolder, showWarnings = F)
# reading sample sheet with metadata
ssheet <- read.xlsx(samplesheet, sheet = sheetnum)
if(!is.null(idstoremove)){
ssheet <- subset.data.frame(x = ssheet, subset = !ssheet[[idcol]] %in% idstoremove)
}
# defining design vector according to variable
ssheet <- subset.data.frame(x = ssheet, subset = ssheet[[design.var]] %in% c(case.var, control.var))
d.vec <- ssheet[[design.var]]
d.vector <- ifelse(d.vec == case.var, 1, 0)
# initialzing coverage .cov files list
covs <- list()
sampleids <- as.list(ssheet[[idcol]])
names(sampleids) <- ssheet[[idcol]]
for (i in ssheet[[idcol]]) {
covs[[i]] <- paste0(infolder, "/", i, ".bismark.cov")
}
saveRDS(object = covs, file = "covs_object.rds")
# creating object
myobj <- methRead(location = covs,
sample.id = sampleids,
assembly = assem,
pipeline = pipeline.meth,
treatment = d.vector,
context="CpG",
mincov = mincoverage)
saveRDS(myobj, file = "Myobject.rds")
names(myobj) <- ssheet[[idcol]]
saveRDS(myobj, file = "Myobject_with_names.rds")
# subsetting if declared
if(isTRUE(do.subset)){
my.win = GRanges(seqnames = chr.subset, ranges = IRanges(start = start.subset, end = end.subset))
myobj <- selectByOverlap(myobj,my.win)
}
saveRDS(myobj, file = "Myobject_with_names_aftersubset.rds")
# filtering on minimum coverage
myobj <- filterByCoverage(methylObj = myobj, lo.count=lowcount, lo.perc = lowperc)
# Normalization
myobj <- normalizeCoverage(obj = myobj)
# saveRDS(object = myobj, file = "Initial_object.rds")
# Calculate basic stats and PCs
metricsfolder <- paste0(qcfolder, "Metrics/")
dir.create(path = metricsfolder, showWarnings = F)
for (id in ssheet[[idcol]]) {
png(filename = paste0(metricsfolder,proj.id,"_CpG_pct_methylation_sample_", id, ".png"),
width = 9, height = 6, units = "in", res = 96)
print(getMethylationStats(myobj[[id]],plot=TRUE,both.strands=FALSE))
dev.off()
png(filename = paste0(metricsfolder, proj.id,"_Coverage_stats_sample_", id, ".png"),
width = 9, height = 6, units = "in", res = 96)
print(getCoverageStats(myobj[[id]],plot=TRUE,both.strands=FALSE))
dev.off()
}
# create meth obj
#meth <- unite(object = myobj, destrand=FALSE)
meth <- unite(object = myobj, destrand=FALSE) # for debugging
saveRDS(meth, "savemeth.tmp.rds")
# Perform correlation
sink(paste0(qcfolder, proj.id, "_Correlations.txt"))
getCorrelation(meth,plot=FALSE)
sink()
if(length(ssheet[[idcol]]) < 15){
png(filename = paste0(qcfolder,proj.id, "_Correlations_pearson_pairwise.png"),
width = 9, height = 6, units = "in", res = 96)
print(getCorrelation(meth,plot=TRUE))
dev.off()
}
png(filename = paste0(qcfolder, proj.id, "_Clustering.png"),
width = 9, height = 6, units = "in", res = 96)
clusterSamples(meth, dist="euclidean", plot=TRUE, method = "ward.D2")
dev.off()
# Re-plotting PCs (custom chart)
# compute PCs and store in object
pca_compt <- PCASamples(meth, obj.return = T, screeplot = F)
# extract PCs components
pcafolder <- paste0(qcfolder, "PCA/")
dir.create(path = pcafolder, showWarnings = F)
pca_pc1_2 <- as.data.frame(x = pca_compt$x[,1:2])
for(myvars in plot.covariates){
pca_pc1_2$condition <- as.factor(ssheet[[myvars]])
png(filename = paste0(pcafolder, proj.id, "_PCA_",myvars,".png"),
width = 9, height = 6, units = "in",res=96)
print(ggplot(data = pca_pc1_2,
mapping = aes(x = PC1, y=PC2, col=condition, label=rownames(pca_pc1_2))) +
geom_point(size=3) + geom_text_repel(size=3) + ggtitle(label = "Principal component analysis", subtitle = myvars) +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5)) +
theme(plot.subtitle=element_text(size=12, hjust=0.5, face="italic", color="black")) +
theme(axis.title = element_text(size=12, hjust=0.5, face="bold", color="black")) +
theme(legend.text = element_text(size=8, hjust=0.5)) +
theme(legend.title = element_blank()) +
theme(axis.text = element_text(size=12, hjust=0.5, color="black")))
dev.off()
}
# retrieve and store % of methylation
perc.meth <- percMethylation(meth)
saveRDS(perc.meth,"pctmethly.rds")
base::rownames(perc.meth) <- paste0(meth$chr, "_", meth$start)
# Perform diff methylation
myDiff=calculateDiffMeth(meth)
write.table(myDiff,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
difftest <- read.table(paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), header = T)
difftest$comparison <- proj.id
difftest$qvalue_r <- as.character(cut(x = difftest$qvalue,
breaks = c(-1, 1e-100, 1e-10, 1e-02, 1),
labels = c("***","**","*","ns")))
myindex <- abs(difftest$meth.diff) < delta.meth
difftest$meth.diff <- abs(difftest$meth.diff)
difftest$qvalue_r[myindex] <- "ns"
write.table(difftest,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
write.table(difftest,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
# Adding color list and annotations
library(RColorBrewer)
color_list <- list()
annrows <- NULL
anncols <- subset.data.frame(difftest, select = c("qvalue_r","meth.diff"))
base::rownames(anncols) <- difftest$start
rows.annot.vars.cat = plot.categorical.vars
rows.annot.vars.con = plot.continuous.vars
if(!is.null(rows.annot.vars.cat) | !is.null(rows.annot.vars.con)){
rownames(ssheet) <- ssheet$SampleID
annrows <- subset.data.frame(x = ssheet, select = c(rows.annot.vars.cat, rows.annot.vars.con))
concolors <- RColorBrewer::brewer.pal(n = 9, name = "Set1")
catcolors <- NULL
for (varcon in 1:length(rows.annot.vars.con)) {
color_list[[rows.annot.vars.con[varcon]]] <- colorRampPalette(c("lightgrey", concolors[varcon]))(10)
}
for (varcat in 1:length(rows.annot.vars.cat)) {
ncolor <- 1
for (val in unique(ssheet[[rows.annot.vars.cat[varcat]]])[order(unique(ssheet[[rows.annot.vars.cat[varcat]]]))]){
if(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])) > 9){
catcolors <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name = "Set3"))(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])))
}
else{
catcolors <- RColorBrewer::brewer.pal(n = 9, name = "Set3")
}
color_list[[rows.annot.vars.cat[varcat]]][[val]] <- catcolors[ncolor]
ncolor <- ncolor + 1
}
}
}
color_list[["qvalue_r"]] = c("*" = "#6497b1", "**" = "#03396c", "***" = "#011f4b", ns= "#c4cacf")
color_list[["meth.diff"]] = colorRampPalette(brewer.pal(n = 11, "Reds"))(100)
color_list[["miR-126"]] <- brewer.pal(n = 9, "PuRd")
# Heatmap
pctmeth_matrix <- t(perc.meth)
base::colnames(pctmeth_matrix) <- gsub(x = base::colnames(pctmeth_matrix), pattern = "chr[0-9]+_", replacement = "")
pheatmap(mat = pctmeth_matrix, main = gsub(x = proj.id, pattern = "_", replacement = " "),
filename = paste0(outfolder, proj.id,"_CpG_percent_methylation_matrix_pheatmap.pdf"), width = plot.width, height = plot.height,
na_col = "black",
cluster_cols = FALSE,
cluster_rows = TRUE,
annotation_row = annrows,
cellwidth = cellw,
cellheight = cellh,
display_numbers = T,
fontsize = fontsize,
fontsize_number = fontnumsize,
number_format = "%.0f",
#border_color = "#CBBEB5",
annotation_col = anncols,
#labels_row = lrow,
#labels_col = lcol,
#gaps_row = gaps.row,
gaps_col = c(8),
annotation_colors = color_list,
color = c("#F5F5F5","#EEEEEE","#CCCCCC","#999999", "#666666","#333333","#000000"), breaks = c(0,10,20,30,50,70,90,100)
)
saveRDS(object = pctmeth_matrix, paste0(outfolder, proj.id,"_CpG_percent_methylation_matrix_pheatmap.rds"))
saveRDS(object = list(anncol = anncols, annrow = annrows, anncolors = color_list), paste0(outfolder, proj.id,"_annotations_matrix_pheatmap.rds"))
saveRDS(object = difftest, paste0(outfolder, proj.id,"_CpG_differential_methylation.rds"))
saveRDS(object = perc.meth, paste0(outfolder, proj.id,"_CpG_percent_methylation.rds"))
write.table(x = perc.meth, file = paste0(outfolder, proj.id,"_CpG_percent_methylation.txt"))
write.table(x = difftest, file = paste0(outfolder, proj.id,"_CpG_differential_methylation.txt"))
saveRDS(myobj, file = paste0(outfolder,proj.id,"_methylkit.rds"))
saveRDS(myobj, file = paste0(outfolder,proj.id,"_methylkit_meth.rds"))
}
\ No newline at end of file
MethylAnalysis_bulk <- function(methcall.folder = NULL, # input folder with .cov files (bismark)
outfolder = NULL, # output folder (if not exist it will be created)
proj.id = "myproject", # project id to prefix in putput files
samplesheet = NULL,
sheetnum = 1,
design.var = "Disease", # variable to subset samplesheet
case.var = "Case", # 1 in treatment vector
control.var = "Control", # 0 in treatment vector
idcol = "SampleID",
mincoverage = 10,
lowperc = 5,
lowcount = 10,
assem="hg38",
do.subset = T,
chr.subset = "chr9",
start.subset = 136668000,
end.subset = 136671000,
pipeline.meth = "bismarkCoverage",
plot.covariates = c("Condition","Batch","MRD",
"Torelapse","Chemorefractory",
"Sex","CytoA","Tissue"),
idstoremove = NULL,
delta.meth = 10,
plot.categorical.vars = c("Condition","Batch","MRD","Torelapse","Chemorefractory","Sex","CytoA"),
plot.continuous.vars = c("miR-126","egfl7"),
plot.height = 9,
plot.width = 12,
cellw = 15,
cellh = 15,
fontnumsize = 5,
fontsize = 8
){
# load libraries
require(methylKit)
require(ggplot2)
require(reshape2)
require(plyr)
require(ggrepel)
library(GenomicRanges)
library(openxlsx)
library(pheatmap)
# check vars
if(is.null(methcall.folder)){
stop("Please set methcall.folder")
}
if(is.null(outfolder)){
stop("Please set outfolder")
}
if(is.null(samplesheet)){
stop("Please set samplesheet")
}
# Setup variables
infolder = paste0(methcall.folder, "/")
dir.create(infolder, showWarnings = F)
outfolder = paste0(outfolder, "/")
dir.create(outfolder, showWarnings = F)
qcfolder <- paste0(outfolder, "QC/")
dir.create(qcfolder, showWarnings = F)
# reading sample sheet with metadata
ssheet <- read.xlsx(samplesheet, sheet = sheetnum)
if(!is.null(idstoremove)){
ssheet <- subset.data.frame(x = ssheet, subset = !ssheet[[idcol]] %in% idstoremove)
}
# defining design vector according to variable
ssheet <- subset.data.frame(x = ssheet, subset = ssheet[[design.var]] %in% c(case.var, control.var))
d.vec <- ssheet[[design.var]]
d.vector <- ifelse(d.vec == case.var, 1, 0)
# initialzing coverage .cov files list
covs <- list()
sampleids <- as.list(ssheet[[idcol]])
names(sampleids) <- ssheet[[idcol]]
for (i in ssheet[[idcol]]) {
covs[[i]] <- paste0(infolder, "/", i, ".bismark.cov")
}
saveRDS(object = covs, file = "covs_object.rds")
# creating object
myobj <- methRead(location = covs,
sample.id = sampleids,
assembly = assem,
pipeline = pipeline.meth,
treatment = d.vector,
context="CpG",
mincov = mincoverage)
saveRDS(myobj, file = "Myobject.rds")
names(myobj) <- ssheet[[idcol]]
saveRDS(myobj, file = "Myobject_with_names.rds")
# subsetting if declared
if(isTRUE(do.subset)){
my.win = GRanges(seqnames = chr.subset, ranges = IRanges(start = start.subset, end = end.subset))
myobj <- selectByOverlap(myobj,my.win)
}
saveRDS(myobj, file = "Myobject_with_names_aftersubset.rds")
# filtering on minimum coverage
myobj <- filterByCoverage(methylObj = myobj, lo.count=lowcount, lo.perc = lowperc)
# Normalization
myobj <- normalizeCoverage(obj = myobj)
# saveRDS(object = myobj, file = "Initial_object.rds")
# Calculate basic stats and PCs
metricsfolder <- paste0(qcfolder, "Metrics/")
dir.create(path = metricsfolder, showWarnings = F)
for (id in ssheet[[idcol]]) {
png(filename = paste0(metricsfolder,proj.id,"_CpG_pct_methylation_sample_", id, ".png"),
width = 9, height = 6, units = "in", res = 96)
print(getMethylationStats(myobj[[id]],plot=TRUE,both.strands=FALSE))
dev.off()
png(filename = paste0(metricsfolder, proj.id,"_Coverage_stats_sample_", id, ".png"),
width = 9, height = 6, units = "in", res = 96)
print(getCoverageStats(myobj[[id]],plot=TRUE,both.strands=FALSE))
dev.off()
}
# create meth obj
#meth <- unite(object = myobj, destrand=FALSE)
meth <- unite(object = myobj, destrand=FALSE) # for debugging
saveRDS(meth, "savemeth.tmp.rds")
# Perform correlation
sink(paste0(qcfolder, proj.id, "_Correlations.txt"))
getCorrelation(meth,plot=FALSE)
sink()
if(length(ssheet[[idcol]]) < 15){
png(filename = paste0(qcfolder,proj.id, "_Correlations_pearson_pairwise.png"),
width = 9, height = 6, units = "in", res = 96)
print(getCorrelation(meth,plot=TRUE))
dev.off()
}
png(filename = paste0(qcfolder, proj.id, "_Clustering.png"),
width = 9, height = 6, units = "in", res = 96)
clusterSamples(meth, dist="euclidean", plot=TRUE, method = "ward.D2")
dev.off()
# Re-plotting PCs (custom chart)
# compute PCs and store in object
pca_compt <- PCASamples(meth, obj.return = T, screeplot = F)
# extract PCs components
pcafolder <- paste0(qcfolder, "PCA/")
dir.create(path = pcafolder, showWarnings = F)
pca_pc1_2 <- as.data.frame(x = pca_compt$x[,1:2])
for(myvars in plot.covariates){
pca_pc1_2$condition <- as.factor(ssheet[[myvars]])
png(filename = paste0(pcafolder, proj.id, "_PCA_",myvars,".png"),
width = 9, height = 6, units = "in",res=96)
print(ggplot(data = pca_pc1_2,
mapping = aes(x = PC1, y=PC2, col=condition, label=rownames(pca_pc1_2))) +
geom_point(size=3) + geom_text_repel(size=3) + ggtitle(label = "Principal component analysis", subtitle = myvars) +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5)) +
theme(plot.subtitle=element_text(size=12, hjust=0.5, face="italic", color="black")) +
theme(axis.title = element_text(size=12, hjust=0.5, face="bold", color="black")) +
theme(legend.text = element_text(size=8, hjust=0.5)) +
theme(legend.title = element_blank()) +
theme(axis.text = element_text(size=12, hjust=0.5, color="black")))
dev.off()
}
# retrieve and store % of methylation
perc.meth <- percMethylation(meth)
saveRDS(perc.meth,"pctmethly.rds")
base::rownames(perc.meth) <- paste0(meth$chr, "_", meth$start)
# Perform diff methylation
myDiff=calculateDiffMeth(meth)
write.table(myDiff,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
difftest <- read.table(paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), header = T)
difftest$comparison <- proj.id
difftest$qvalue_r <- as.character(cut(x = difftest$qvalue,
breaks = c(-1, 1e-100, 1e-10, 1e-02, 1),
labels = c("***","**","*","ns")))
myindex <- abs(difftest$meth.diff) < delta.meth
difftest$meth.diff <- abs(difftest$meth.diff)
difftest$qvalue_r[myindex] <- "ns"
write.table(difftest,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
write.table(difftest,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
# Adding color list and annotations
library(RColorBrewer)
color_list <- list()
annrows <- NULL
anncols <- subset.data.frame(difftest, select = c("qvalue_r","meth.diff"))
base::rownames(anncols) <- difftest$start
rows.annot.vars.cat = plot.categorical.vars
rows.annot.vars.con = plot.continuous.vars
if(!is.null(rows.annot.vars.cat) | !is.null(rows.annot.vars.con)){
rownames(ssheet) <- ssheet$SampleID
annrows <- subset.data.frame(x = ssheet, select = c(rows.annot.vars.cat, rows.annot.vars.con))
concolors <- RColorBrewer::brewer.pal(n = 9, name = "Set1")
catcolors <- NULL
for (varcon in 1:length(rows.annot.vars.con)) {
color_list[[rows.annot.vars.con[varcon]]] <- colorRampPalette(c("lightgrey", concolors[varcon]))(10)
}
for (varcat in 1:length(rows.annot.vars.cat)) {
ncolor <- 1
for (val in unique(ssheet[[rows.annot.vars.cat[varcat]]])[order(unique(ssheet[[rows.annot.vars.cat[varcat]]]))]){
if(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])) > 9){
catcolors <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name = "Set3"))(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])))
}
else{
catcolors <- RColorBrewer::brewer.pal(n = 9, name = "Set3")
}
color_list[[rows.annot.vars.cat[varcat]]][[val]] <- catcolors[ncolor]
ncolor <- ncolor + 1
}
}
}
color_list[["qvalue_r"]] = c("*" = "#6497b1", "**" = "#03396c", "***" = "#011f4b", ns= "#c4cacf")
color_list[["meth.diff"]] = colorRampPalette(brewer.pal(n = 11, "Reds"))(100)
color_list[["miR-126"]] <- brewer.pal(n = 9, "PuRd")
# Heatmap
pctmeth_matrix <- t(perc.meth)
base::colnames(pctmeth_matrix) <- gsub(x = base::colnames(pctmeth_matrix), pattern = "chr[0-9]+_", replacement = "")
pheatmap(mat = pctmeth_matrix, main = gsub(x = proj.id, pattern = "_", replacement = " "),
filename = paste0(outfolder, proj.id,"_CpG_percent_methylation_matrix_pheatmap.pdf"), width = plot.width, height = plot.height,
na_col = "black",
cluster_cols = FALSE,
cluster_rows = TRUE,
annotation_row = annrows,
cellwidth = cellw,
cellheight = cellh,
display_numbers = T,
fontsize = fontsize,
fontsize_number = fontnumsize,
number_format = "%.0f",
#border_color = "#CBBEB5",
annotation_col = anncols,
#labels_row = lrow,
#labels_col = lcol,
#gaps_row = gaps.row,
gaps_col = c(8),
annotation_colors = color_list,
color = c("#F5F5F5","#EEEEEE","#CCCCCC","#999999", "#666666","#333333","#000000"), breaks = c(0,10,20,30,50,70,90,100)
)
saveRDS(object = pctmeth_matrix, paste0(outfolder, proj.id,"_CpG_percent_methylation_matrix_pheatmap.rds"))
saveRDS(object = list(anncol = anncols, annrow = annrows, anncolors = color_list), paste0(outfolder, proj.id,"_annotations_matrix_pheatmap.rds"))
saveRDS(object = difftest, paste0(outfolder, proj.id,"_CpG_differential_methylation.rds"))
saveRDS(object = perc.meth, paste0(outfolder, proj.id,"_CpG_percent_methylation.rds"))
write.table(x = perc.meth, file = paste0(outfolder, proj.id,"_CpG_percent_methylation.txt"))
write.table(x = difftest, file = paste0(outfolder, proj.id,"_CpG_differential_methylation.txt"))
saveRDS(myobj, file = paste0(outfolder,proj.id,"_methylkit.rds"))
saveRDS(myobj, file = paste0(outfolder,proj.id,"_methylkit_meth.rds"))
}
\ No newline at end of file
MethylAnalysis_bulk_v2 <- function(methcall.folder = NULL, # input folder with .cov files (bismark)
outfolder = NULL, # output folder (if not exist it will be created)
proj.id = "myproject", # project id to prefix in putput files
samplesheet = NULL,
sheetnum = 1,
design.var = "Disease", # variable to subset samplesheet
case.var = "Case", # 1 in treatment vector
control.var = "Control", # 0 in treatment vector
idcol = "SampleID",
mincoverage = 10,
lowperc = 5,
lowcount = 10,
assem="hg38",
do.subset = T,
chr.subset = "chr9",
start.subset = 136668000,
end.subset = 136671000,
pipeline.meth = "bismarkCoverage",
plot.covariates = c("Condition","Batch","MRD",
"Torelapse","Chemorefractory",
"Sex","CytoA","Tissue"),
idstoremove = NULL,
delta.meth = 10,
plot.categorical.vars = c("Condition","Batch","MRD","Torelapse","Chemorefractory","Sex","CytoA"),
plot.continuous.vars = c("miR-126","egfl7"),
plot.height = 9,
plot.width = 12,
cellw = 15,
cellh = 15,
fontnumsize = 5,
fontsize = 8
){
# load libraries
require(methylKit)
require(ggplot2)
require(reshape2)
require(plyr)
require(ggrepel)
library(GenomicRanges)
library(openxlsx)
library(pheatmap)
# check vars
if(is.null(methcall.folder)){
stop("Please set methcall.folder")
}
if(is.null(outfolder)){
stop("Please set outfolder")
}
if(is.null(samplesheet)){
stop("Please set samplesheet")
}
# Setup variables
infolder = paste0(methcall.folder, "/")
dir.create(infolder, showWarnings = F)
outfolder = paste0(outfolder, "/")
dir.create(outfolder, showWarnings = F)
qcfolder <- paste0(outfolder, "QC/")
dir.create(qcfolder, showWarnings = F)
# reading sample sheet with metadata
ssheet <- read.xlsx(samplesheet, sheet = sheetnum)
if(!is.null(idstoremove)){
ssheet <- subset.data.frame(x = ssheet, subset = !ssheet[[idcol]] %in% idstoremove)
}
# defining design vector according to variable
ssheet <- subset.data.frame(x = ssheet, subset = ssheet[[design.var]] %in% c(case.var, control.var))
d.vec <- ssheet[[design.var]]
d.vector <- ifelse(d.vec == case.var, 1, 0)
# initialzing coverage .cov files list
covs <- list()
sampleids <- as.list(ssheet[[idcol]])
names(sampleids) <- ssheet[[idcol]]
for (i in ssheet[[idcol]]) {
covs[[i]] <- paste0(infolder, "/", i, ".bismark.cov")
}
saveRDS(object = covs, file = "covs_object.rds")
# creating object
myobj <- methRead(location = covs,
sample.id = sampleids,
assembly = assem,
pipeline = pipeline.meth,
treatment = d.vector,
context="CpG",
mincov = mincoverage)
saveRDS(myobj, file = "Myobject.rds")
names(myobj) <- ssheet[[idcol]]
saveRDS(myobj, file = "Myobject_with_names.rds")
# subsetting if declared
if(isTRUE(do.subset)){
my.win = GRanges(seqnames = chr.subset, ranges = IRanges(start = start.subset, end = end.subset))
myobj <- selectByOverlap(myobj,my.win)
}
saveRDS(myobj, file = "Myobject_with_names_aftersubset.rds")
# filtering on minimum coverage
myobj <- filterByCoverage(methylObj = myobj, lo.count=lowcount, lo.perc = lowperc)
# Normalization
myobj <- normalizeCoverage(obj = myobj)
# saveRDS(object = myobj, file = "Initial_object.rds")
# Calculate basic stats and PCs
metricsfolder <- paste0(qcfolder, "Metrics/")
dir.create(path = metricsfolder, showWarnings = F)
for (id in ssheet[[idcol]]) {
png(filename = paste0(metricsfolder,proj.id,"_CpG_pct_methylation_sample_", id, ".png"),
width = 9, height = 6, units = "in", res = 96)
print(getMethylationStats(myobj[[id]],plot=TRUE,both.strands=FALSE))
dev.off()
png(filename = paste0(metricsfolder, proj.id,"_Coverage_stats_sample_", id, ".png"),
width = 9, height = 6, units = "in", res = 96)
print(getCoverageStats(myobj[[id]],plot=TRUE,both.strands=FALSE))
dev.off()
}
# create meth obj
#meth <- unite(object = myobj, destrand=FALSE)
meth <- unite(object = myobj, destrand=FALSE) # for debugging
saveRDS(meth, "savemeth.tmp.rds")
# Perform correlation
sink(paste0(qcfolder, proj.id, "_Correlations.txt"))
getCorrelation(meth,plot=FALSE)
sink()
if(length(ssheet[[idcol]]) < 15){
png(filename = paste0(qcfolder,proj.id, "_Correlations_pearson_pairwise.png"),
width = 9, height = 6, units = "in", res = 96)
print(getCorrelation(meth,plot=TRUE))
dev.off()
}
png(filename = paste0(qcfolder, proj.id, "_Clustering.png"),
width = 9, height = 6, units = "in", res = 96)
clusterSamples(meth, dist="euclidean", plot=TRUE, method = "ward.D2")
dev.off()
# Re-plotting PCs (custom chart)
# compute PCs and store in object
pca_compt <- PCASamples(meth, obj.return = T, screeplot = F)
# extract PCs components
pcafolder <- paste0(qcfolder, "PCA/")
dir.create(path = pcafolder, showWarnings = F)
pca_pc1_2 <- as.data.frame(x = pca_compt$x[,1:2])
for(myvars in plot.covariates){
pca_pc1_2$condition <- as.factor(ssheet[[myvars]])
png(filename = paste0(pcafolder, proj.id, "_PCA_",myvars,".png"),
width = 9, height = 6, units = "in",res=96)
print(ggplot(data = pca_pc1_2,
mapping = aes(x = PC1, y=PC2, col=condition, label=rownames(pca_pc1_2))) +
geom_point(size=3) + geom_text_repel(size=3) + ggtitle(label = "Principal component analysis", subtitle = myvars) +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5)) +
theme(plot.subtitle=element_text(size=12, hjust=0.5, face="italic", color="black")) +
theme(axis.title = element_text(size=12, hjust=0.5, face="bold", color="black")) +
theme(legend.text = element_text(size=8, hjust=0.5)) +
theme(legend.title = element_blank()) +
theme(axis.text = element_text(size=12, hjust=0.5, color="black")))
dev.off()
}
# retrieve and store % of methylation
perc.meth <- percMethylation(meth)
saveRDS(perc.meth,"pctmethly.rds")
base::rownames(perc.meth) <- paste0(meth$chr, "_", meth$start)
# Perform diff methylation
myDiff=calculateDiffMeth(meth)
write.table(myDiff,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
difftest <- read.table(paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), header = T)
difftest$comparison <- proj.id
difftest$qvalue_r <- as.character(cut(x = difftest$qvalue,
breaks = c(-1, 1e-100, 1e-10, 1e-02, 1),
labels = c("***","**","*","ns")))
myindex <- abs(difftest$meth.diff) < delta.meth
difftest$meth.diff <- abs(difftest$meth.diff)
difftest$qvalue_r[myindex] <- "ns"
write.table(difftest,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
# Adding color list and annotations
library(RColorBrewer)
color_list <- list()
annrows <- NULL
anncols <- subset.data.frame(difftest, select = c("qvalue_r","meth.diff"))
base::rownames(anncols) <- difftest$start
rows.annot.vars.cat = plot.categorical.vars
rows.annot.vars.con = plot.continuous.vars
# if(!is.null(rows.annot.vars.cat) | !is.null(rows.annot.vars.con)){
# rownames(ssheet) <- ssheet$SampleID
# annrows <- subset.data.frame(x = ssheet, select = c(rows.annot.vars.cat, rows.annot.vars.con))
# concolors <- RColorBrewer::brewer.pal(n = 9, name = "Set1")
# catcolors <- NULL
# for (varcon in 1:length(rows.annot.vars.con)) {
# color_list[[rows.annot.vars.con[varcon]]] <- colorRampPalette(c("lightgrey", concolors[varcon]))(10)
# }
# for (varcat in 1:length(rows.annot.vars.cat)) {
# ncolor <- 1
# for (val in unique(ssheet[[rows.annot.vars.cat[varcat]]])[order(unique(ssheet[[rows.annot.vars.cat[varcat]]]))]){
# if(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])) > 9){
# catcolors <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name = "Set3"))(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])))
# }
# else{
# catcolors <- RColorBrewer::brewer.pal(n = 9, name = "Set3")
# }
# color_list[[rows.annot.vars.cat[varcat]]][[val]] <- catcolors[ncolor]
# ncolor <- ncolor + 1
# }
# }
# }
color_list[["qvalue_r"]] = c("*" = "#6497b1", "**" = "#03396c", "***" = "#011f4b", ns= "#c4cacf")
color_list[["meth.diff"]] = colorRampPalette(brewer.pal(n = 11, "Reds"))(100)
color_list[["miR-126"]] <- brewer.pal(n = 9, "PuRd")
# Heatmap
pctmeth_matrix <- t(perc.meth)
base::colnames(pctmeth_matrix) <- gsub(x = base::colnames(pctmeth_matrix), pattern = "chr[0-9]+_", replacement = "")
pheatmap(mat = pctmeth_matrix, main = gsub(x = proj.id, pattern = "_", replacement = " "),
filename = paste0(outfolder, proj.id,"_CpG_percent_methylation_matrix_pheatmap.pdf"), width = plot.width, height = plot.height,
na_col = "black",
cluster_cols = FALSE,
cluster_rows = TRUE,
annotation_row = annrows,
cellwidth = cellw,
cellheight = cellh,
display_numbers = T,
fontsize = fontsize,
fontsize_number = fontnumsize,
number_format = "%.0f",
#border_color = "#CBBEB5",
annotation_col = anncols,
#labels_row = lrow,
#labels_col = lcol,
#gaps_row = gaps.row,
gaps_col = c(8),
annotation_colors = color_list,
color = c("#F5F5F5","#EEEEEE","#CCCCCC","#999999", "#666666","#333333","#000000"), breaks = c(0,10,20,30,50,70,90,100)
)
saveRDS(object = pctmeth_matrix, paste0(outfolder, proj.id,"_CpG_percent_methylation_matrix_pheatmap.rds"))
saveRDS(object = list(anncol = anncols, annrow = annrows, anncolors = color_list), paste0(outfolder, proj.id,"_annotations_matrix_pheatmap.rds"))
saveRDS(object = difftest, paste0(outfolder, proj.id,"_CpG_differential_methylation.rds"))
saveRDS(object = perc.meth, paste0(outfolder, proj.id,"_CpG_percent_methylation.rds"))
write.table(x = perc.meth, file = paste0(outfolder, proj.id,"_CpG_percent_methylation.txt"))
write.table(x = difftest, file = paste0(outfolder, proj.id,"_CpG_differential_methylation.txt"))
saveRDS(myobj, file = paste0(outfolder,proj.id,"_methylkit.rds"))
saveRDS(myobj, file = paste0(outfolder,proj.id,"_methylkit_meth.rds"))
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment