Commit 8582746d authored by Matteo Barcella's avatar Matteo Barcella
Browse files

typos

parent 38a09ebb
Compare_variants_AF_v3 <- function(dblist, sampleid_1, sampleid_2,
my.width, my.height, my.res,
tag = "demotag",
outfolder = "Outfold/"){
library(ggplot2)
library(plotly)
library(ggrepel)
library(reshape2)
library(cowplot)
library(openxlsx)
dir.create(path = outfolder, recursive = T)
db <- dblist
Common_vars <- intersect(x = db[[sampleid_1]]$variantkey, y = db[[sampleid_2]]$variantkey)
fulldf <- rbind.data.frame(x = db[[sampleid_1]], y = db[[sampleid_2]])
fulldf_annot <- subset.data.frame(x = fulldf, select = c("variantkey","ID","HGVS_P","Effect","Impact",
"dbNSFP_CADD_phred", "dbNSFP_PROVEAN_pred",
"dbNSFP_MutationTaster_pred", "dbNSFP_SIFT_pred",
"dbNSFP_Polyphen2_HVAR_pred"))
fulldf_annot <- fulldf_annot %>% distinct(variantkey,.keep_all = TRUE)
sampleid_1_sub <- subset.data.frame(x = db[[sampleid_1]], subset = variantkey %in% Common_vars,
select = c("variantkey","AF","DP","FILTER","Gene"))
colnames(sampleid_1_sub) <- c("variantkey","AF_GFP_H","DP_GFP_H","FILTER_GFP_H","Gene")
sampleid_2_sub <- subset.data.frame(x = db[[sampleid_2]], subset = variantkey %in% Common_vars,
select = c("variantkey","AF","DP","FILTER","Gene"))
colnames(sampleid_2_sub) <- c("variantkey","AF_GFP_L","DP_GFP_L","FILTER_GFP_L","Gene")
# merge datasets
common_df <- merge.data.frame(x = sampleid_1_sub, y = sampleid_2_sub, by = c('variantkey','Gene'))
common_df_melted <- melt(common_df, id.vars = c("variantkey","Gene", "DP_GFP_H", "DP_GFP_L","FILTER_GFP_H","FILTER_GFP_L"))
# plotting common variants AF
common_df_tmp <- common_df
colnames(common_df_tmp) = c("variantkey","Gene","AF_GFP_H","DP_GFP_H","FILTER_GFP_H","AF_GFP_L","DP_GFP_L","FILTER_GFP_L")
common_df_melted_tmp <- melt(common_df_tmp, id.vars = c("variantkey","Gene", "DP_GFP_H", "DP_GFP_L","FILTER_GFP_H","FILTER_GFP_L"))
g_point <- ggplot(data = common_df_tmp, mapping = aes(x = AF_GFP_H, y = AF_GFP_L)) + geom_point() + xlab(label = "AF_GFP_H") + ylab(label = "AF_GFP_L")
g_density <- ggplot(data = common_df_melted_tmp, mapping = aes(value, fill = variable)) + geom_density() +
facet_wrap(facets = ~ variable, nrow = 2) + theme(legend.position = "none", axis.title = element_blank())
title <- ggdraw() +
draw_label(
paste0("\tAllele Fraction\n", sampleid_1, " vs ", sampleid_2),
fontface = 'bold',
x = 0,
hjust = -0.5
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(1, 1, 1, 1)
)
aa <- plot_grid(plotlist = list(g_density, g_point), ncol = 2)
png(filename = paste0(outfolder,"AF_Comparison_",sampleid_1,"_",sampleid_2,"_common_",tag,".png"),
width = my.width, height = my.height, units = "in", res = my.res)
print(plot_grid(
title,
aa,
ncol = 1,
# rel_heights values control vertical title margins
rel_heights = c(0.1, 1)
))
dev.off()
# Merging
union_vars <- union(x = db[[sampleid_1]]$variantkey, y = db[[sampleid_2]]$variantkey)
sampleid_1_full <- subset.data.frame(x = db[[sampleid_1]], select = c("variantkey","AF","DP","FILTER","Gene"))
colnames(sampleid_1_full) <- c("variantkey","AF_GFP_H","DP_GFP_H","FILTER_GFP_H","Gene")
#
sampleid_2_full <- subset.data.frame(x = db[[sampleid_2]], select = c("variantkey","AF","DP","FILTER","Gene"))
colnames(sampleid_2_full) <- c("variantkey","AF_GFP_L","DP_GFP_L","FILTER_GFP_L","Gene")
#
union_df <- merge.data.frame(x = sampleid_1_full, y=sampleid_2_full, by = c("variantkey","Gene"), all = T)
union_df$AF_GFP_H[is.na(union_df$AF_GFP_H)] <- 0
union_df$AF_GFP_L[is.na(union_df$AF_GFP_L)] <- 0
union_df$DP_GFP_H[is.na(union_df$DP_GFP_H)] <- 0
union_df$DP_GFP_L[is.na(union_df$DP_GFP_L)] <- 0
colnames(union_df) <- c("variantkey","Gene","AF_GFP_H","DP_GFP_H","FILTER_GFP_H","AF_GFP_L","DP_GFP_L","FILTER_GFP_L")
# Plotting Both
png(filename = paste0(outfolder,"AF_Comparison_",sampleid_1,"_",sampleid_2,"_union_",tag,".png"), width = 12, height = 9, units = "in", res = 200)
print(ggplot(data = union_df, mapping = aes(x = AF_GFP_H, y = AF_GFP_L, label = Gene)) +
geom_point(size = 0.5) + #geom_text(size=2) +
ggtitle(label = paste0("AF distribution ", tag), subtitle = paste0(sampleid_1, " vs ", sampleid_2)) +
theme(plot.subtitle = element_text(hjust = 0.5)))
dev.off()
# retrieve info from db for complete annotation
common_annot <- merge.data.frame(x = common_df, y = fulldf_annot, by = 'variantkey', all.x = T, sort = F)
union_annot <- merge.data.frame(x = union_df, y = fulldf_annot, by = 'variantkey', all.x = T, sort = F)
resultdata <- list(common = common_df, commonannot = common_annot,
union = union_df, unionannot = union_annot,
comvars = fulldf)
write.xlsx(x = resultdata, file = paste0(outfolder,"Comparison_",sampleid_1,"_",sampleid_2,"_", tag, ".xlsx"), asTable = T)
saveRDS(object = resultdata, file = paste0(outfolder,"Comparison_",sampleid_1,"_",sampleid_2,"_", tag, ".rds"))
return(resultdata)
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment