Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
casertanucera_leukemia2023
Commits
8582746d
Commit
8582746d
authored
Aug 03, 2023
by
Matteo Barcella
Browse files
typos
parent
38a09ebb
Changes
1
Hide whitespace changes
Inline
Side-by-side
WES/Compare_variants_AF_v3.R
0 → 100644
View file @
8582746d
Compare_variants_AF_v3
<-
function
(
dblist
,
sampleid_1
,
sampleid_2
,
my.width
,
my.height
,
my.res
,
tag
=
"demotag"
,
outfolder
=
"Outfold/"
){
library
(
ggplot2
)
library
(
plotly
)
library
(
ggrepel
)
library
(
reshape2
)
library
(
cowplot
)
library
(
openxlsx
)
dir.create
(
path
=
outfolder
,
recursive
=
T
)
db
<-
dblist
Common_vars
<-
intersect
(
x
=
db
[[
sampleid_1
]]
$
variantkey
,
y
=
db
[[
sampleid_2
]]
$
variantkey
)
fulldf
<-
rbind.data.frame
(
x
=
db
[[
sampleid_1
]],
y
=
db
[[
sampleid_2
]])
fulldf_annot
<-
subset.data.frame
(
x
=
fulldf
,
select
=
c
(
"variantkey"
,
"ID"
,
"HGVS_P"
,
"Effect"
,
"Impact"
,
"dbNSFP_CADD_phred"
,
"dbNSFP_PROVEAN_pred"
,
"dbNSFP_MutationTaster_pred"
,
"dbNSFP_SIFT_pred"
,
"dbNSFP_Polyphen2_HVAR_pred"
))
fulldf_annot
<-
fulldf_annot
%>%
distinct
(
variantkey
,
.keep_all
=
TRUE
)
sampleid_1_sub
<-
subset.data.frame
(
x
=
db
[[
sampleid_1
]],
subset
=
variantkey
%in%
Common_vars
,
select
=
c
(
"variantkey"
,
"AF"
,
"DP"
,
"FILTER"
,
"Gene"
))
colnames
(
sampleid_1_sub
)
<-
c
(
"variantkey"
,
"AF_GFP_H"
,
"DP_GFP_H"
,
"FILTER_GFP_H"
,
"Gene"
)
sampleid_2_sub
<-
subset.data.frame
(
x
=
db
[[
sampleid_2
]],
subset
=
variantkey
%in%
Common_vars
,
select
=
c
(
"variantkey"
,
"AF"
,
"DP"
,
"FILTER"
,
"Gene"
))
colnames
(
sampleid_2_sub
)
<-
c
(
"variantkey"
,
"AF_GFP_L"
,
"DP_GFP_L"
,
"FILTER_GFP_L"
,
"Gene"
)
# merge datasets
common_df
<-
merge.data.frame
(
x
=
sampleid_1_sub
,
y
=
sampleid_2_sub
,
by
=
c
(
'variantkey'
,
'Gene'
))
common_df_melted
<-
melt
(
common_df
,
id.vars
=
c
(
"variantkey"
,
"Gene"
,
"DP_GFP_H"
,
"DP_GFP_L"
,
"FILTER_GFP_H"
,
"FILTER_GFP_L"
))
# plotting common variants AF
common_df_tmp
<-
common_df
colnames
(
common_df_tmp
)
=
c
(
"variantkey"
,
"Gene"
,
"AF_GFP_H"
,
"DP_GFP_H"
,
"FILTER_GFP_H"
,
"AF_GFP_L"
,
"DP_GFP_L"
,
"FILTER_GFP_L"
)
common_df_melted_tmp
<-
melt
(
common_df_tmp
,
id.vars
=
c
(
"variantkey"
,
"Gene"
,
"DP_GFP_H"
,
"DP_GFP_L"
,
"FILTER_GFP_H"
,
"FILTER_GFP_L"
))
g_point
<-
ggplot
(
data
=
common_df_tmp
,
mapping
=
aes
(
x
=
AF_GFP_H
,
y
=
AF_GFP_L
))
+
geom_point
()
+
xlab
(
label
=
"AF_GFP_H"
)
+
ylab
(
label
=
"AF_GFP_L"
)
g_density
<-
ggplot
(
data
=
common_df_melted_tmp
,
mapping
=
aes
(
value
,
fill
=
variable
))
+
geom_density
()
+
facet_wrap
(
facets
=
~
variable
,
nrow
=
2
)
+
theme
(
legend.position
=
"none"
,
axis.title
=
element_blank
())
title
<-
ggdraw
()
+
draw_label
(
paste0
(
"\tAllele Fraction\n"
,
sampleid_1
,
" vs "
,
sampleid_2
),
fontface
=
'bold'
,
x
=
0
,
hjust
=
-0.5
)
+
theme
(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin
=
margin
(
1
,
1
,
1
,
1
)
)
aa
<-
plot_grid
(
plotlist
=
list
(
g_density
,
g_point
),
ncol
=
2
)
png
(
filename
=
paste0
(
outfolder
,
"AF_Comparison_"
,
sampleid_1
,
"_"
,
sampleid_2
,
"_common_"
,
tag
,
".png"
),
width
=
my.width
,
height
=
my.height
,
units
=
"in"
,
res
=
my.res
)
print
(
plot_grid
(
title
,
aa
,
ncol
=
1
,
# rel_heights values control vertical title margins
rel_heights
=
c
(
0.1
,
1
)
))
dev.off
()
# Merging
union_vars
<-
union
(
x
=
db
[[
sampleid_1
]]
$
variantkey
,
y
=
db
[[
sampleid_2
]]
$
variantkey
)
sampleid_1_full
<-
subset.data.frame
(
x
=
db
[[
sampleid_1
]],
select
=
c
(
"variantkey"
,
"AF"
,
"DP"
,
"FILTER"
,
"Gene"
))
colnames
(
sampleid_1_full
)
<-
c
(
"variantkey"
,
"AF_GFP_H"
,
"DP_GFP_H"
,
"FILTER_GFP_H"
,
"Gene"
)
#
sampleid_2_full
<-
subset.data.frame
(
x
=
db
[[
sampleid_2
]],
select
=
c
(
"variantkey"
,
"AF"
,
"DP"
,
"FILTER"
,
"Gene"
))
colnames
(
sampleid_2_full
)
<-
c
(
"variantkey"
,
"AF_GFP_L"
,
"DP_GFP_L"
,
"FILTER_GFP_L"
,
"Gene"
)
#
union_df
<-
merge.data.frame
(
x
=
sampleid_1_full
,
y
=
sampleid_2_full
,
by
=
c
(
"variantkey"
,
"Gene"
),
all
=
T
)
union_df
$
AF_GFP_H
[
is.na
(
union_df
$
AF_GFP_H
)]
<-
0
union_df
$
AF_GFP_L
[
is.na
(
union_df
$
AF_GFP_L
)]
<-
0
union_df
$
DP_GFP_H
[
is.na
(
union_df
$
DP_GFP_H
)]
<-
0
union_df
$
DP_GFP_L
[
is.na
(
union_df
$
DP_GFP_L
)]
<-
0
colnames
(
union_df
)
<-
c
(
"variantkey"
,
"Gene"
,
"AF_GFP_H"
,
"DP_GFP_H"
,
"FILTER_GFP_H"
,
"AF_GFP_L"
,
"DP_GFP_L"
,
"FILTER_GFP_L"
)
# Plotting Both
png
(
filename
=
paste0
(
outfolder
,
"AF_Comparison_"
,
sampleid_1
,
"_"
,
sampleid_2
,
"_union_"
,
tag
,
".png"
),
width
=
12
,
height
=
9
,
units
=
"in"
,
res
=
200
)
print
(
ggplot
(
data
=
union_df
,
mapping
=
aes
(
x
=
AF_GFP_H
,
y
=
AF_GFP_L
,
label
=
Gene
))
+
geom_point
(
size
=
0.5
)
+
#geom_text(size=2) +
ggtitle
(
label
=
paste0
(
"AF distribution "
,
tag
),
subtitle
=
paste0
(
sampleid_1
,
" vs "
,
sampleid_2
))
+
theme
(
plot.subtitle
=
element_text
(
hjust
=
0.5
)))
dev.off
()
# retrieve info from db for complete annotation
common_annot
<-
merge.data.frame
(
x
=
common_df
,
y
=
fulldf_annot
,
by
=
'variantkey'
,
all.x
=
T
,
sort
=
F
)
union_annot
<-
merge.data.frame
(
x
=
union_df
,
y
=
fulldf_annot
,
by
=
'variantkey'
,
all.x
=
T
,
sort
=
F
)
resultdata
<-
list
(
common
=
common_df
,
commonannot
=
common_annot
,
union
=
union_df
,
unionannot
=
union_annot
,
comvars
=
fulldf
)
write.xlsx
(
x
=
resultdata
,
file
=
paste0
(
outfolder
,
"Comparison_"
,
sampleid_1
,
"_"
,
sampleid_2
,
"_"
,
tag
,
".xlsx"
),
asTable
=
T
)
saveRDS
(
object
=
resultdata
,
file
=
paste0
(
outfolder
,
"Comparison_"
,
sampleid_1
,
"_"
,
sampleid_2
,
"_"
,
tag
,
".rds"
))
return
(
resultdata
)
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment