typos

8582746d · Matteo Barcella · 38a09ebb · 8582746d
Commit 8582746d authored Aug 03, 2023 by Matteo Barcella
--- a/WES/Compare_variants_AF_v3.R
+++ b/WES/Compare_variants_AF_v3.R
+Compare_variants_AF_v3 <- function(dblist, sampleid_1, sampleid_2, 
+                                my.width, my.height, my.res, 
+                                tag = "demotag",
+                                outfolder = "Outfold/"){
+  
+  library(ggplot2)
+  library(plotly)
+  library(ggrepel)
+  library(reshape2)
+  library(cowplot)
+  library(openxlsx)
+  
+  dir.create(path = outfolder, recursive = T)
+  
+  db <- dblist
+  
+  Common_vars <- intersect(x = db[[sampleid_1]]$variantkey, y = db[[sampleid_2]]$variantkey)
+  
+  fulldf <- rbind.data.frame(x = db[[sampleid_1]], y = db[[sampleid_2]])
+  
+  fulldf_annot <- subset.data.frame(x = fulldf, select = c("variantkey","ID","HGVS_P","Effect","Impact", 
+                                                           "dbNSFP_CADD_phred", "dbNSFP_PROVEAN_pred",
+                                                           "dbNSFP_MutationTaster_pred", "dbNSFP_SIFT_pred",
+                                                           "dbNSFP_Polyphen2_HVAR_pred"))
+  fulldf_annot <- fulldf_annot %>% distinct(variantkey,.keep_all = TRUE)
+  
+  sampleid_1_sub <- subset.data.frame(x = db[[sampleid_1]], subset = variantkey %in% Common_vars, 
+                                      select = c("variantkey","AF","DP","FILTER","Gene"))
+  colnames(sampleid_1_sub) <- c("variantkey","AF_GFP_H","DP_GFP_H","FILTER_GFP_H","Gene")
+  
+  sampleid_2_sub <- subset.data.frame(x = db[[sampleid_2]], subset = variantkey %in% Common_vars, 
+                                      select = c("variantkey","AF","DP","FILTER","Gene"))
+  colnames(sampleid_2_sub) <- c("variantkey","AF_GFP_L","DP_GFP_L","FILTER_GFP_L","Gene")
+  
+  # merge datasets
+  
+  common_df <- merge.data.frame(x = sampleid_1_sub, y = sampleid_2_sub, by = c('variantkey','Gene'))
+  common_df_melted <- melt(common_df, id.vars = c("variantkey","Gene", "DP_GFP_H", "DP_GFP_L","FILTER_GFP_H","FILTER_GFP_L"))
+  
+  # plotting common variants AF
+  
+  common_df_tmp <- common_df
+  colnames(common_df_tmp) = c("variantkey","Gene","AF_GFP_H","DP_GFP_H","FILTER_GFP_H","AF_GFP_L","DP_GFP_L","FILTER_GFP_L")
+  
+  common_df_melted_tmp <- melt(common_df_tmp, id.vars = c("variantkey","Gene", "DP_GFP_H", "DP_GFP_L","FILTER_GFP_H","FILTER_GFP_L"))
+  
+  g_point <- ggplot(data = common_df_tmp, mapping = aes(x = AF_GFP_H, y = AF_GFP_L)) + geom_point() + xlab(label = "AF_GFP_H") + ylab(label = "AF_GFP_L")
+  g_density <- ggplot(data = common_df_melted_tmp, mapping = aes(value, fill = variable)) + geom_density() + 
+    facet_wrap(facets = ~ variable, nrow = 2) + theme(legend.position = "none", axis.title = element_blank())
+  
+  title <- ggdraw() +
+    draw_label(
+      paste0("\tAllele Fraction\n", sampleid_1, " vs ", sampleid_2),
+      fontface = 'bold',
+      x = 0,
+      hjust = -0.5
+    ) +
+    theme(
+      # add margin on the left of the drawing canvas,
+      # so title is aligned with left edge of first plot
+      plot.margin = margin(1, 1, 1, 1)
+    )
+
+  aa <- plot_grid(plotlist = list(g_density, g_point), ncol = 2)
+  
+  png(filename = paste0(outfolder,"AF_Comparison_",sampleid_1,"_",sampleid_2,"_common_",tag,".png"), 
+      width = my.width, height = my.height, units = "in", res = my.res)
+  print(plot_grid(
+    title, 
+    aa,
+    ncol = 1,
+    # rel_heights values control vertical title margins
+    rel_heights = c(0.1, 1)
+  ))
+  dev.off()
+  
+  # Merging
+  
+   union_vars <- union(x = db[[sampleid_1]]$variantkey, y = db[[sampleid_2]]$variantkey)
+   
+   sampleid_1_full <- subset.data.frame(x = db[[sampleid_1]], select = c("variantkey","AF","DP","FILTER","Gene"))
+   colnames(sampleid_1_full) <- c("variantkey","AF_GFP_H","DP_GFP_H","FILTER_GFP_H","Gene")
+  # 
+   sampleid_2_full <- subset.data.frame(x = db[[sampleid_2]], select = c("variantkey","AF","DP","FILTER","Gene"))
+   colnames(sampleid_2_full) <- c("variantkey","AF_GFP_L","DP_GFP_L","FILTER_GFP_L","Gene")
+  # 
+  union_df <- merge.data.frame(x = sampleid_1_full, y=sampleid_2_full, by = c("variantkey","Gene"), all = T)
+ 
+  union_df$AF_GFP_H[is.na(union_df$AF_GFP_H)] <- 0
+  union_df$AF_GFP_L[is.na(union_df$AF_GFP_L)] <- 0
+
+  union_df$DP_GFP_H[is.na(union_df$DP_GFP_H)] <- 0
+  union_df$DP_GFP_L[is.na(union_df$DP_GFP_L)] <- 0
+  
+  colnames(union_df) <- c("variantkey","Gene","AF_GFP_H","DP_GFP_H","FILTER_GFP_H","AF_GFP_L","DP_GFP_L","FILTER_GFP_L")
+  
+  # Plotting Both
+
+  png(filename = paste0(outfolder,"AF_Comparison_",sampleid_1,"_",sampleid_2,"_union_",tag,".png"), width = 12, height = 9, units = "in", res = 200)
+  print(ggplot(data = union_df, mapping = aes(x = AF_GFP_H, y = AF_GFP_L, label = Gene)) + 
+          geom_point(size = 0.5) + #geom_text(size=2) + 
+          ggtitle(label = paste0("AF distribution ", tag), subtitle = paste0(sampleid_1, " vs ", sampleid_2)) +
+          theme(plot.subtitle = element_text(hjust = 0.5))) 
+  dev.off()
+
+  # retrieve info from db for complete annotation 
+  
+  common_annot <- merge.data.frame(x = common_df, y = fulldf_annot, by = 'variantkey', all.x = T, sort = F)
+  union_annot <- merge.data.frame(x = union_df, y = fulldf_annot, by = 'variantkey', all.x = T, sort = F)
+    
+  resultdata <- list(common = common_df, commonannot = common_annot, 
+                     union = union_df, unionannot = union_annot,
+                     comvars = fulldf)
+  
+  write.xlsx(x = resultdata, file = paste0(outfolder,"Comparison_",sampleid_1,"_",sampleid_2,"_", tag, ".xlsx"), asTable = T)
+  
+  saveRDS(object = resultdata, file = paste0(outfolder,"Comparison_",sampleid_1,"_",sampleid_2,"_", tag, ".rds"))
+  
+  return(resultdata)
+}
+
+