MethylAnalysis <- function(methcall.folder = NULL, # input folder with .cov files (bismark) outfolder = NULL, # output folder (if not exist it will be created) proj.id = "myproject", # project id to prefix in putput files samplesheet = NULL, sheetnum = 1, design.var = "Disease", # variable to subset samplesheet case.var = "Case", # 1 in treatment vector control.var = "Control", # 0 in treatment vector idcol = "SampleID", mincoverage = 10, lowperc = 5, lowcount = 10, assem="hg38", do.subset = T, chr.subset = "chr9", start.subset = 136668000, end.subset = 136671000, pipeline.meth = "bismarkCoverage", plot.covariates = c("Condition","Batch","MRD", "Torelapse","Chemorefractory", "Sex","CytoA","Tissue"), idstoremove = NULL, delta.meth = 10, plot.categorical.vars = c("Condition","Batch","MRD","Torelapse","Chemorefractory","Sex","CytoA"), plot.continuous.vars = c("miR-126","egfl7"), plot.height = 9, plot.width = 12, cellw = 15, cellh = 15, fontnumsize = 5, fontsize = 8 ){ # load libraries require(methylKit) require(ggplot2) require(reshape2) require(plyr) require(ggrepel) library(GenomicRanges) library(openxlsx) library(pheatmap) # check vars if(is.null(methcall.folder)){ stop("Please set methcall.folder") } if(is.null(outfolder)){ stop("Please set outfolder") } if(is.null(samplesheet)){ stop("Please set samplesheet") } # Setup variables infolder = paste0(methcall.folder, "/") dir.create(infolder, showWarnings = F) outfolder = paste0(outfolder, "/") dir.create(outfolder, showWarnings = F) qcfolder <- paste0(outfolder, "QC/") dir.create(qcfolder, showWarnings = F) # reading sample sheet with metadata ssheet <- read.xlsx(samplesheet, sheet = sheetnum) if(!is.null(idstoremove)){ ssheet <- subset.data.frame(x = ssheet, subset = !ssheet[[idcol]] %in% idstoremove) } # defining design vector according to variable ssheet <- subset.data.frame(x = ssheet, subset = ssheet[[design.var]] %in% c(case.var, control.var)) d.vec <- ssheet[[design.var]] d.vector <- ifelse(d.vec == case.var, 1, 0) # initialzing coverage .cov files list covs <- list() sampleids <- as.list(ssheet[[idcol]]) names(sampleids) <- ssheet[[idcol]] for (i in ssheet[[idcol]]) { covs[[i]] <- paste0(infolder, "/", i, ".bismark.cov") } saveRDS(object = covs, file = "covs_object.rds") # creating object myobj <- methRead(location = covs, sample.id = sampleids, assembly = assem, pipeline = pipeline.meth, treatment = d.vector, context="CpG", mincov = mincoverage) saveRDS(myobj, file = "Myobject.rds") names(myobj) <- ssheet[[idcol]] saveRDS(myobj, file = "Myobject_with_names.rds") # subsetting if declared if(isTRUE(do.subset)){ my.win = GRanges(seqnames = chr.subset, ranges = IRanges(start = start.subset, end = end.subset)) myobj <- selectByOverlap(myobj,my.win) } saveRDS(myobj, file = "Myobject_with_names_aftersubset.rds") # filtering on minimum coverage myobj <- filterByCoverage(methylObj = myobj, lo.count=lowcount, lo.perc = lowperc) # Normalization myobj <- normalizeCoverage(obj = myobj) # saveRDS(object = myobj, file = "Initial_object.rds") # Calculate basic stats and PCs metricsfolder <- paste0(qcfolder, "Metrics/") dir.create(path = metricsfolder, showWarnings = F) for (id in ssheet[[idcol]]) { png(filename = paste0(metricsfolder,proj.id,"_CpG_pct_methylation_sample_", id, ".png"), width = 9, height = 6, units = "in", res = 96) print(getMethylationStats(myobj[[id]],plot=TRUE,both.strands=FALSE)) dev.off() png(filename = paste0(metricsfolder, proj.id,"_Coverage_stats_sample_", id, ".png"), width = 9, height = 6, units = "in", res = 96) print(getCoverageStats(myobj[[id]],plot=TRUE,both.strands=FALSE)) dev.off() } # create meth obj #meth <- unite(object = myobj, destrand=FALSE) meth <- unite(object = myobj, destrand=FALSE) # for debugging saveRDS(meth, "savemeth.tmp.rds") # Perform correlation sink(paste0(qcfolder, proj.id, "_Correlations.txt")) getCorrelation(meth,plot=FALSE) sink() if(length(ssheet[[idcol]]) < 15){ png(filename = paste0(qcfolder,proj.id, "_Correlations_pearson_pairwise.png"), width = 9, height = 6, units = "in", res = 96) print(getCorrelation(meth,plot=TRUE)) dev.off() } png(filename = paste0(qcfolder, proj.id, "_Clustering.png"), width = 9, height = 6, units = "in", res = 96) clusterSamples(meth, dist="euclidean", plot=TRUE, method = "ward.D2") dev.off() # Re-plotting PCs (custom chart) # compute PCs and store in object pca_compt <- PCASamples(meth, obj.return = T, screeplot = F) # extract PCs components pcafolder <- paste0(qcfolder, "PCA/") dir.create(path = pcafolder, showWarnings = F) pca_pc1_2 <- as.data.frame(x = pca_compt$x[,1:2]) for(myvars in plot.covariates){ pca_pc1_2$condition <- as.factor(ssheet[[myvars]]) png(filename = paste0(pcafolder, proj.id, "_PCA_",myvars,".png"), width = 9, height = 6, units = "in",res=96) print(ggplot(data = pca_pc1_2, mapping = aes(x = PC1, y=PC2, col=condition, label=rownames(pca_pc1_2))) + geom_point(size=3) + geom_text_repel(size=3) + ggtitle(label = "Principal component analysis", subtitle = myvars) + theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5)) + theme(plot.subtitle=element_text(size=12, hjust=0.5, face="italic", color="black")) + theme(axis.title = element_text(size=12, hjust=0.5, face="bold", color="black")) + theme(legend.text = element_text(size=8, hjust=0.5)) + theme(legend.title = element_blank()) + theme(axis.text = element_text(size=12, hjust=0.5, color="black"))) dev.off() } # retrieve and store % of methylation perc.meth <- percMethylation(meth) saveRDS(perc.meth,"pctmethly.rds") base::rownames(perc.meth) <- paste0(meth$chr, "_", meth$start) # Perform diff methylation myDiff=calculateDiffMeth(meth) write.table(myDiff,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F) difftest <- read.table(paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), header = T) difftest$comparison <- proj.id difftest$qvalue_r <- as.character(cut(x = difftest$qvalue, breaks = c(-1, 1e-100, 1e-10, 1e-02, 1), labels = c("***","**","*","ns"))) myindex <- abs(difftest$meth.diff) < delta.meth difftest$meth.diff <- abs(difftest$meth.diff) difftest$qvalue_r[myindex] <- "ns" write.table(difftest,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F) write.table(difftest,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F) # Adding color list and annotations library(RColorBrewer) color_list <- list() annrows <- NULL anncols <- subset.data.frame(difftest, select = c("qvalue_r","meth.diff")) base::rownames(anncols) <- difftest$start rows.annot.vars.cat = plot.categorical.vars rows.annot.vars.con = plot.continuous.vars if(!is.null(rows.annot.vars.cat) | !is.null(rows.annot.vars.con)){ rownames(ssheet) <- ssheet$SampleID annrows <- subset.data.frame(x = ssheet, select = c(rows.annot.vars.cat, rows.annot.vars.con)) concolors <- RColorBrewer::brewer.pal(n = 9, name = "Set1") catcolors <- NULL for (varcon in 1:length(rows.annot.vars.con)) { color_list[[rows.annot.vars.con[varcon]]] <- colorRampPalette(c("lightgrey", concolors[varcon]))(10) } for (varcat in 1:length(rows.annot.vars.cat)) { ncolor <- 1 for (val in unique(ssheet[[rows.annot.vars.cat[varcat]]])[order(unique(ssheet[[rows.annot.vars.cat[varcat]]]))]){ if(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])) > 9){ catcolors <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name = "Set3"))(length(unique(ssheet[[rows.annot.vars.cat[varcat]]]))) } else{ catcolors <- RColorBrewer::brewer.pal(n = 9, name = "Set3") } color_list[[rows.annot.vars.cat[varcat]]][[val]] <- catcolors[ncolor] ncolor <- ncolor + 1 } } } color_list[["qvalue_r"]] = c("*" = "#6497b1", "**" = "#03396c", "***" = "#011f4b", ns= "#c4cacf") color_list[["meth.diff"]] = colorRampPalette(brewer.pal(n = 11, "Reds"))(100) color_list[["miR-126"]] <- brewer.pal(n = 9, "PuRd") # Heatmap pctmeth_matrix <- t(perc.meth) base::colnames(pctmeth_matrix) <- gsub(x = base::colnames(pctmeth_matrix), pattern = "chr[0-9]+_", replacement = "") pheatmap(mat = pctmeth_matrix, main = gsub(x = proj.id, pattern = "_", replacement = " "), filename = paste0(outfolder, proj.id,"_CpG_percent_methylation_matrix_pheatmap.pdf"), width = plot.width, height = plot.height, na_col = "black", cluster_cols = FALSE, cluster_rows = TRUE, annotation_row = annrows, cellwidth = cellw, cellheight = cellh, display_numbers = T, fontsize = fontsize, fontsize_number = fontnumsize, number_format = "%.0f", #border_color = "#CBBEB5", annotation_col = anncols, #labels_row = lrow, #labels_col = lcol, #gaps_row = gaps.row, gaps_col = c(8), annotation_colors = color_list, color = c("#F5F5F5","#EEEEEE","#CCCCCC","#999999", "#666666","#333333","#000000"), breaks = c(0,10,20,30,50,70,90,100) ) saveRDS(object = pctmeth_matrix, paste0(outfolder, proj.id,"_CpG_percent_methylation_matrix_pheatmap.rds")) saveRDS(object = list(anncol = anncols, annrow = annrows, anncolors = color_list), paste0(outfolder, proj.id,"_annotations_matrix_pheatmap.rds")) saveRDS(object = difftest, paste0(outfolder, proj.id,"_CpG_differential_methylation.rds")) saveRDS(object = perc.meth, paste0(outfolder, proj.id,"_CpG_percent_methylation.rds")) write.table(x = perc.meth, file = paste0(outfolder, proj.id,"_CpG_percent_methylation.txt")) write.table(x = difftest, file = paste0(outfolder, proj.id,"_CpG_differential_methylation.txt")) saveRDS(myobj, file = paste0(outfolder,proj.id,"_methylkit.rds")) saveRDS(myobj, file = paste0(outfolder,proj.id,"_methylkit_meth.rds")) }