Compare_variants_AF_v3 <- function(dblist, sampleid_1, sampleid_2, my.width, my.height, my.res, tag = "demotag", outfolder = "Outfold/"){ library(ggplot2) library(plotly) library(ggrepel) library(reshape2) library(cowplot) library(openxlsx) dir.create(path = outfolder, recursive = T) db <- dblist Common_vars <- intersect(x = db[[sampleid_1]]$variantkey, y = db[[sampleid_2]]$variantkey) fulldf <- rbind.data.frame(x = db[[sampleid_1]], y = db[[sampleid_2]]) fulldf_annot <- subset.data.frame(x = fulldf, select = c("variantkey","ID","HGVS_P","Effect","Impact", "dbNSFP_CADD_phred", "dbNSFP_PROVEAN_pred", "dbNSFP_MutationTaster_pred", "dbNSFP_SIFT_pred", "dbNSFP_Polyphen2_HVAR_pred")) fulldf_annot <- fulldf_annot %>% distinct(variantkey,.keep_all = TRUE) sampleid_1_sub <- subset.data.frame(x = db[[sampleid_1]], subset = variantkey %in% Common_vars, select = c("variantkey","AF","DP","FILTER","Gene")) colnames(sampleid_1_sub) <- c("variantkey","AF_GFP_H","DP_GFP_H","FILTER_GFP_H","Gene") sampleid_2_sub <- subset.data.frame(x = db[[sampleid_2]], subset = variantkey %in% Common_vars, select = c("variantkey","AF","DP","FILTER","Gene")) colnames(sampleid_2_sub) <- c("variantkey","AF_GFP_L","DP_GFP_L","FILTER_GFP_L","Gene") # merge datasets common_df <- merge.data.frame(x = sampleid_1_sub, y = sampleid_2_sub, by = c('variantkey','Gene')) common_df_melted <- melt(common_df, id.vars = c("variantkey","Gene", "DP_GFP_H", "DP_GFP_L","FILTER_GFP_H","FILTER_GFP_L")) # plotting common variants AF common_df_tmp <- common_df colnames(common_df_tmp) = c("variantkey","Gene","AF_GFP_H","DP_GFP_H","FILTER_GFP_H","AF_GFP_L","DP_GFP_L","FILTER_GFP_L") common_df_melted_tmp <- melt(common_df_tmp, id.vars = c("variantkey","Gene", "DP_GFP_H", "DP_GFP_L","FILTER_GFP_H","FILTER_GFP_L")) g_point <- ggplot(data = common_df_tmp, mapping = aes(x = AF_GFP_H, y = AF_GFP_L)) + geom_point() + xlab(label = "AF_GFP_H") + ylab(label = "AF_GFP_L") g_density <- ggplot(data = common_df_melted_tmp, mapping = aes(value, fill = variable)) + geom_density() + facet_wrap(facets = ~ variable, nrow = 2) + theme(legend.position = "none", axis.title = element_blank()) title <- ggdraw() + draw_label( paste0("\tAllele Fraction\n", sampleid_1, " vs ", sampleid_2), fontface = 'bold', x = 0, hjust = -0.5 ) + theme( # add margin on the left of the drawing canvas, # so title is aligned with left edge of first plot plot.margin = margin(1, 1, 1, 1) ) aa <- plot_grid(plotlist = list(g_density, g_point), ncol = 2) png(filename = paste0(outfolder,"AF_Comparison_",sampleid_1,"_",sampleid_2,"_common_",tag,".png"), width = my.width, height = my.height, units = "in", res = my.res) print(plot_grid( title, aa, ncol = 1, # rel_heights values control vertical title margins rel_heights = c(0.1, 1) )) dev.off() # Merging union_vars <- union(x = db[[sampleid_1]]$variantkey, y = db[[sampleid_2]]$variantkey) sampleid_1_full <- subset.data.frame(x = db[[sampleid_1]], select = c("variantkey","AF","DP","FILTER","Gene")) colnames(sampleid_1_full) <- c("variantkey","AF_GFP_H","DP_GFP_H","FILTER_GFP_H","Gene") # sampleid_2_full <- subset.data.frame(x = db[[sampleid_2]], select = c("variantkey","AF","DP","FILTER","Gene")) colnames(sampleid_2_full) <- c("variantkey","AF_GFP_L","DP_GFP_L","FILTER_GFP_L","Gene") # union_df <- merge.data.frame(x = sampleid_1_full, y=sampleid_2_full, by = c("variantkey","Gene"), all = T) union_df$AF_GFP_H[is.na(union_df$AF_GFP_H)] <- 0 union_df$AF_GFP_L[is.na(union_df$AF_GFP_L)] <- 0 union_df$DP_GFP_H[is.na(union_df$DP_GFP_H)] <- 0 union_df$DP_GFP_L[is.na(union_df$DP_GFP_L)] <- 0 colnames(union_df) <- c("variantkey","Gene","AF_GFP_H","DP_GFP_H","FILTER_GFP_H","AF_GFP_L","DP_GFP_L","FILTER_GFP_L") # Plotting Both png(filename = paste0(outfolder,"AF_Comparison_",sampleid_1,"_",sampleid_2,"_union_",tag,".png"), width = 12, height = 9, units = "in", res = 200) print(ggplot(data = union_df, mapping = aes(x = AF_GFP_H, y = AF_GFP_L, label = Gene)) + geom_point(size = 0.5) + #geom_text(size=2) + ggtitle(label = paste0("AF distribution ", tag), subtitle = paste0(sampleid_1, " vs ", sampleid_2)) + theme(plot.subtitle = element_text(hjust = 0.5))) dev.off() # retrieve info from db for complete annotation common_annot <- merge.data.frame(x = common_df, y = fulldf_annot, by = 'variantkey', all.x = T, sort = F) union_annot <- merge.data.frame(x = union_df, y = fulldf_annot, by = 'variantkey', all.x = T, sort = F) resultdata <- list(common = common_df, commonannot = common_annot, union = union_df, unionannot = union_annot, comvars = fulldf) write.xlsx(x = resultdata, file = paste0(outfolder,"Comparison_",sampleid_1,"_",sampleid_2,"_", tag, ".xlsx"), asTable = T) saveRDS(object = resultdata, file = paste0(outfolder,"Comparison_",sampleid_1,"_",sampleid_2,"_", tag, ".rds")) return(resultdata) }