From 8582746dd76867d4e7252f04f48dc3728922dcee Mon Sep 17 00:00:00 2001 From: Matteo Barcella Date: Thu, 3 Aug 2023 17:15:54 +0200 Subject: [PATCH] typos --- WES/Compare_variants_AF_v3.R | 122 +++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 WES/Compare_variants_AF_v3.R diff --git a/WES/Compare_variants_AF_v3.R b/WES/Compare_variants_AF_v3.R new file mode 100644 index 0000000..1dd1718 --- /dev/null +++ b/WES/Compare_variants_AF_v3.R @@ -0,0 +1,122 @@ +Compare_variants_AF_v3 <- function(dblist, sampleid_1, sampleid_2, + my.width, my.height, my.res, + tag = "demotag", + outfolder = "Outfold/"){ + + library(ggplot2) + library(plotly) + library(ggrepel) + library(reshape2) + library(cowplot) + library(openxlsx) + + dir.create(path = outfolder, recursive = T) + + db <- dblist + + Common_vars <- intersect(x = db[[sampleid_1]]$variantkey, y = db[[sampleid_2]]$variantkey) + + fulldf <- rbind.data.frame(x = db[[sampleid_1]], y = db[[sampleid_2]]) + + fulldf_annot <- subset.data.frame(x = fulldf, select = c("variantkey","ID","HGVS_P","Effect","Impact", + "dbNSFP_CADD_phred", "dbNSFP_PROVEAN_pred", + "dbNSFP_MutationTaster_pred", "dbNSFP_SIFT_pred", + "dbNSFP_Polyphen2_HVAR_pred")) + fulldf_annot <- fulldf_annot %>% distinct(variantkey,.keep_all = TRUE) + + sampleid_1_sub <- subset.data.frame(x = db[[sampleid_1]], subset = variantkey %in% Common_vars, + select = c("variantkey","AF","DP","FILTER","Gene")) + colnames(sampleid_1_sub) <- c("variantkey","AF_GFP_H","DP_GFP_H","FILTER_GFP_H","Gene") + + sampleid_2_sub <- subset.data.frame(x = db[[sampleid_2]], subset = variantkey %in% Common_vars, + select = c("variantkey","AF","DP","FILTER","Gene")) + colnames(sampleid_2_sub) <- c("variantkey","AF_GFP_L","DP_GFP_L","FILTER_GFP_L","Gene") + + # merge datasets + + common_df <- merge.data.frame(x = sampleid_1_sub, y = sampleid_2_sub, by = c('variantkey','Gene')) + common_df_melted <- melt(common_df, id.vars = c("variantkey","Gene", "DP_GFP_H", "DP_GFP_L","FILTER_GFP_H","FILTER_GFP_L")) + + # plotting common variants AF + + common_df_tmp <- common_df + colnames(common_df_tmp) = c("variantkey","Gene","AF_GFP_H","DP_GFP_H","FILTER_GFP_H","AF_GFP_L","DP_GFP_L","FILTER_GFP_L") + + common_df_melted_tmp <- melt(common_df_tmp, id.vars = c("variantkey","Gene", "DP_GFP_H", "DP_GFP_L","FILTER_GFP_H","FILTER_GFP_L")) + + g_point <- ggplot(data = common_df_tmp, mapping = aes(x = AF_GFP_H, y = AF_GFP_L)) + geom_point() + xlab(label = "AF_GFP_H") + ylab(label = "AF_GFP_L") + g_density <- ggplot(data = common_df_melted_tmp, mapping = aes(value, fill = variable)) + geom_density() + + facet_wrap(facets = ~ variable, nrow = 2) + theme(legend.position = "none", axis.title = element_blank()) + + title <- ggdraw() + + draw_label( + paste0("\tAllele Fraction\n", sampleid_1, " vs ", sampleid_2), + fontface = 'bold', + x = 0, + hjust = -0.5 + ) + + theme( + # add margin on the left of the drawing canvas, + # so title is aligned with left edge of first plot + plot.margin = margin(1, 1, 1, 1) + ) + + aa <- plot_grid(plotlist = list(g_density, g_point), ncol = 2) + + png(filename = paste0(outfolder,"AF_Comparison_",sampleid_1,"_",sampleid_2,"_common_",tag,".png"), + width = my.width, height = my.height, units = "in", res = my.res) + print(plot_grid( + title, + aa, + ncol = 1, + # rel_heights values control vertical title margins + rel_heights = c(0.1, 1) + )) + dev.off() + + # Merging + + union_vars <- union(x = db[[sampleid_1]]$variantkey, y = db[[sampleid_2]]$variantkey) + + sampleid_1_full <- subset.data.frame(x = db[[sampleid_1]], select = c("variantkey","AF","DP","FILTER","Gene")) + colnames(sampleid_1_full) <- c("variantkey","AF_GFP_H","DP_GFP_H","FILTER_GFP_H","Gene") + # + sampleid_2_full <- subset.data.frame(x = db[[sampleid_2]], select = c("variantkey","AF","DP","FILTER","Gene")) + colnames(sampleid_2_full) <- c("variantkey","AF_GFP_L","DP_GFP_L","FILTER_GFP_L","Gene") + # + union_df <- merge.data.frame(x = sampleid_1_full, y=sampleid_2_full, by = c("variantkey","Gene"), all = T) + + union_df$AF_GFP_H[is.na(union_df$AF_GFP_H)] <- 0 + union_df$AF_GFP_L[is.na(union_df$AF_GFP_L)] <- 0 + + union_df$DP_GFP_H[is.na(union_df$DP_GFP_H)] <- 0 + union_df$DP_GFP_L[is.na(union_df$DP_GFP_L)] <- 0 + + colnames(union_df) <- c("variantkey","Gene","AF_GFP_H","DP_GFP_H","FILTER_GFP_H","AF_GFP_L","DP_GFP_L","FILTER_GFP_L") + + # Plotting Both + + png(filename = paste0(outfolder,"AF_Comparison_",sampleid_1,"_",sampleid_2,"_union_",tag,".png"), width = 12, height = 9, units = "in", res = 200) + print(ggplot(data = union_df, mapping = aes(x = AF_GFP_H, y = AF_GFP_L, label = Gene)) + + geom_point(size = 0.5) + #geom_text(size=2) + + ggtitle(label = paste0("AF distribution ", tag), subtitle = paste0(sampleid_1, " vs ", sampleid_2)) + + theme(plot.subtitle = element_text(hjust = 0.5))) + dev.off() + + # retrieve info from db for complete annotation + + common_annot <- merge.data.frame(x = common_df, y = fulldf_annot, by = 'variantkey', all.x = T, sort = F) + union_annot <- merge.data.frame(x = union_df, y = fulldf_annot, by = 'variantkey', all.x = T, sort = F) + + resultdata <- list(common = common_df, commonannot = common_annot, + union = union_df, unionannot = union_annot, + comvars = fulldf) + + write.xlsx(x = resultdata, file = paste0(outfolder,"Comparison_",sampleid_1,"_",sampleid_2,"_", tag, ".xlsx"), asTable = T) + + saveRDS(object = resultdata, file = paste0(outfolder,"Comparison_",sampleid_1,"_",sampleid_2,"_", tag, ".rds")) + + return(resultdata) +} + + -- GitLab