Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
Fiumara_BasePrimeEd2022
Fiumara_BasePrimeEd2022_WES
Commits
90f1cd57
Commit
90f1cd57
authored
Jul 31, 2023
by
Stefano Beretta
Browse files
Upload New File
parent
911e890d
Changes
1
Hide whitespace changes
Inline
Side-by-side
scripts/WESstd_plot_results.R
0 → 100644
View file @
90f1cd57
library
(
dplyr
)
library
(
ggplot2
)
library
(
RColorBrewer
)
library
(
CMplot
)
library
(
gtable
)
library
(
circlize
)
library
(
grid
)
library
(
gridExtra
)
library
(
stringr
)
library
(
e1071
)
library
(
openxlsx
)
library
(
VennDiagram
)
library
(
scales
)
library
(
ggVennDiagram
)
library
(
reshape2
)
#########################
### Utility Functions ###
#########################
# A helper function to define a region on the layout
define_region
<-
function
(
row
,
col
){
viewport
(
layout.pos.row
=
row
,
layout.pos.col
=
col
)
}
# Function to define colors for SNVs
snv_colors2
<-
function
(
del
=
FALSE
,
alpha
=
1
)
{
nuc
<-
c
(
"A"
,
"C"
,
"G"
,
"T"
)
# Point deletions
dels
<-
alpha
(
brewer.pal
(
5
,
"Greys"
)[
2
:
5
],
1
)
names
(
dels
)
<-
paste0
(
nuc
,
"*"
)
# Transitions
ts
<-
alpha
(
brewer.pal
(
5
,
"Blues"
)[
2
:
5
],
1
)
names
(
ts
)
<-
c
(
"CT"
,
"GA"
,
"AG"
,
"TC"
)
# Tansversions
tv1
<-
alpha
(
brewer.pal
(
5
,
"YlGn"
)[
2
:
5
],
1
)
names
(
tv1
)
<-
c
(
"AC"
,
"TG"
,
"AT"
,
"TA"
)
tv2
<-
alpha
(
brewer.pal
(
5
,
"OrRd"
)[
2
:
5
],
1
)
names
(
tv2
)
<-
c
(
"CA"
,
"GT"
,
"CG"
,
"GC"
)
var_cols
<-
c
(
ts
,
tv1
,
tv2
)
if
(
del
)
{
var_cols
<-
c
(
var_cols
,
dels
)
}
return
(
var_cols
)
}
# Circos + Legend
legend_plot
=
function
(
t
,
sample
)
{
t
<-
mutate
(
t
,
TYPE
=
case_when
(
nchar
(
REF
)
==
1
&
nchar
(
ALT
)
==
1
&
ALT
!=
"*"
~
"Substitution"
,
nchar
(
REF
)
==
1
&
nchar
(
ALT
)
==
1
&
ALT
==
"*"
~
"Deletion"
,
nchar
(
REF
)
>
nchar
(
ALT
)
&
nchar
(
REF
)
-
nchar
(
ALT
)
>=
1
~
"Deletion"
,
nchar
(
REF
)
<
nchar
(
ALT
)
&
nchar
(
ALT
)
-
nchar
(
REF
)
>=
1
~
"Insertion"
,
TRUE
~
"Other"
))
# Substitutions
t.subs
<-
t
%>%
filter
(
TYPE
==
"Substitution"
)
%>%
tbl_df
()
t.subs
$
SUB
<-
paste0
(
t.subs
$
REF
,
t.subs
$
ALT
)
t.subs
$
SUB
<-
factor
(
t.subs
$
SUB
,
levels
=
sort
(
unique
(
t.subs
$
SUB
)))
cols_names
<-
levels
(
t.subs
$
SUB
)
cols
<-
snv_colors2
(
del
=
F
)
t.subs
<-
merge
(
t.subs
,
data.frame
(
cols
,
SUB
=
names
(
cols
)),
all.x
=
TRUE
)
tt.subs
<-
t.subs
%>%
filter
(
Sample
==
sample
)
%>%
tbl_df
()
# Deletions / Insertions
t.other
<-
t
%>%
filter
(
TYPE
!=
"Substitution"
)
%>%
tbl_df
()
tt.other
<-
t.other
%>%
filter
(
Sample
==
sample
)
%>%
tbl_df
()
if
(
nrow
(
tt.other
)
>
0
)
{
tt.other
$
AF
<-
1
}
tt.other
$
col
<-
ifelse
(
tt.other
$
TYPE
==
"Deletion"
,
"Red"
,
"Blue"
)
# Circos
par
(
mar
=
rep
(
0
,
4
))
chr.names
<-
paste0
(
"chr"
,
c
(
1
:
22
,
"X"
,
"Y"
))
circos.par
(
"start.degree"
=
90
)
circos.initializeWithIdeogram
(
species
=
"hg38"
,
chromosome.index
=
chr.names
,
plotType
=
c
())
# Legends (center)
legend
(
-0.5
,
0.85
,
lty
=
1
,
lwd
=
2
,
seg.len
=
1
,
col
=
c
(
"Red"
,
"Blue"
),
legend
=
c
(
"Deletion"
,
"Insertion"
),
bty
=
'n'
,
#xjust = 0,
x.intersp
=
0.4
,
y.intersp
=
1
,
inset
=
c
(
0.05
,
0
),
title.adj
=
0.1
,
title
=
"Small Indels"
)
legend
(
-0.5
,
0.45
,
pch
=
16
,
col
=
cols
,
legend
=
names
(
cols
),
pt.cex
=
1.2
,
bty
=
'n'
,
ncol
=
3
,
#xjust = 0,
x.intersp
=
0.4
,
y.intersp
=
1
,
inset
=
c
(
0.05
,
0
),
border
=
"NA"
,
title.adj
=
0.1
,
title
=
"Substitution"
)
if
(
"snpEff_Impact"
%in%
colnames
(
tt.subs
))
{
legend
(
-0.5
,
-0.2
,
pch
=
c
(
10
,
13
),
col
=
"red"
,
legend
=
c
(
"Relevant Impact"
,
"Target Gene"
),
pt.cex
=
1.8
,
bty
=
'n'
,
ncol
=
1
,
#xjust = 0,
x.intersp
=
0.6
,
y.intersp
=
1
,
inset
=
c
(
0.2
,
0
),
border
=
"NA"
,
title.adj
=
0
,
title
=
"Highlight Variants"
)
}
circos.clear
()
}
plot_circos
<-
function
(
t
,
sample
)
{
# Substitutions
t
<-
mutate
(
t
,
TYPE
=
case_when
(
nchar
(
REF
)
==
1
&
nchar
(
ALT
)
==
1
&
ALT
!=
"*"
~
"Substitution"
,
nchar
(
REF
)
==
1
&
nchar
(
ALT
)
==
1
&
ALT
==
"*"
~
"Deletion"
,
nchar
(
REF
)
>
nchar
(
ALT
)
&
nchar
(
REF
)
-
nchar
(
ALT
)
>=
1
~
"Deletion"
,
nchar
(
REF
)
<
nchar
(
ALT
)
&
nchar
(
ALT
)
-
nchar
(
REF
)
>=
1
~
"Insertion"
,
TRUE
~
"Other"
))
t.subs
<-
t
%>%
filter
(
TYPE
==
"Substitution"
)
%>%
tbl_df
()
t.subs
$
SUB
<-
paste0
(
t.subs
$
REF
,
t.subs
$
ALT
)
t.subs
$
SUB
<-
factor
(
t.subs
$
SUB
,
levels
=
sort
(
unique
(
t.subs
$
SUB
)))
cols_names
<-
levels
(
t.subs
$
SUB
)
cols
<-
snv_colors2
(
del
=
F
,
alpha
=
0.8
)
t.subs
<-
merge
(
t.subs
,
data.frame
(
cols
,
SUB
=
names
(
cols
)),
all.x
=
TRUE
)
tt.subs
<-
t.subs
%>%
filter
(
Sample
==
sample
)
%>%
tbl_df
()
# Deletions / Insertions
t.other
<-
t
%>%
filter
(
TYPE
!=
"Substitution"
)
%>%
tbl_df
()
tt.other
<-
t.other
%>%
filter
(
Sample
==
sample
)
%>%
tbl_df
()
if
(
nrow
(
tt.other
)
>
0
)
{
tt.other
$
VAF
<-
1
}
tt.other
$
col
<-
ifelse
(
tt.other
$
TYPE
==
"Deletion"
,
"Red"
,
"Blue"
)
if
(
"IMPACT"
%in%
colnames
(
tt.other
))
{
hio
<-
subset
(
tt.other
,
IMPACT
%in%
c
(
"HIGH"
,
"MODERATE"
))
if
(
nrow
(
hio
)
>
0
)
{
print
(
hio
)
}
}
# Circos
par
(
mar
=
rep
(
0
,
4
))
chr.names
<-
paste0
(
"chr"
,
c
(
1
:
22
,
"X"
,
"Y"
))
circos.par
(
"start.degree"
=
90
)
circos.initializeWithIdeogram
(
species
=
"hg38"
,
chromosome.index
=
chr.names
,
plotType
=
c
(
"ideogram"
,
"labels"
))
lines
<-
seq
(
0
,
1
,
0.2
)
line_factors
<-
expand.grid
(
x
=
get.all.sector.index
(),
y
=
lines
)
circos.trackPlotRegion
(
factors
=
line_factors
$
x
,
y
=
line_factors
$
y
,
track.height
=
0.4
,
panel.fun
=
function
(
x
,
y
)
{
xl
<-
get.cell.meta.data
(
"xlim"
)
for
(
i
in
lines
)
{
circos.lines
(
xl
,
c
(
i
,
i
),
col
=
"grey"
)
}
})
for
(
i
in
lines
)
{
circos.text
(
0
,
i
,
i
,
col
=
"black"
,
sector.index
=
"chrY"
,
track.index
=
3
,
cex
=
0.5
,
adj
=
c
(
0.01
,
0.01
))
}
circos.trackPoints
(
tt.subs
$
CHROM
,
tt.subs
$
POS
,
tt.subs
$
VAF
,
pch
=
16
,
cex
=
1.5
,
col
=
as.character
(
tt.subs
$
cols
))
if
(
"snpEff_Impact"
%in%
colnames
(
tt.subs
))
{
hi
<-
subset
(
tt.subs
,
snpEff_Impact
%in%
c
(
"HIGH"
,
"MODERATE"
))
if
(
nrow
(
hi
)
>
0
)
{
circos.trackPoints
(
hi
$
CHROM
,
hi
$
POS
,
hi
$
VAF
,
pch
=
10
,
cex
=
1.8
,
col
=
"red"
,
track.index
=
3
)
}
b2m_df
<-
data.frame
(
"CHROM"
=
c
(
"chr15"
),
"POS"
=
c
(
44715702
),
"VAF"
=
c
(
0.7
))
circos.trackPoints
(
b2m_df
$
CHROM
,
b2m_df
$
POS
,
b2m_df
$
VAF
,
pch
=
13
,
cex
=
2.5
,
col
=
"red"
,
track.index
=
3
)
}
circos.trackPlotRegion
(
factors
=
line_factors
$
x
,
ylim
=
c
(
0
,
1
),
track.height
=
0.1
)
if
(
nrow
(
tt.other
)
>
0
)
{
circos.trackLines
(
tt.other
$
CHROM
,
tt.other
$
POS
,
tt.other
$
VAF
,
track.index
=
4
,
col
=
as.character
(
tt.other
$
col
),
lwd
=
2
,
type
=
'h'
)
}
text
(
0
,
0
,
sample
,
cex
=
1.5
)
circos.clear
()
}
# Variant Plots
plot_variants
<-
function
(
full.t
,
out_dir
,
plot_prefix
,
fill_by
)
{
dir.create
(
path
=
out_dir
,
showWarnings
=
F
)
tt
<-
full.t
%>%
filter
(
grepl
(
"chr"
,
CHROM
))
%>%
filter
(
!
CHROM
%in%
c
(
"chrY"
,
"chrM"
))
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
))
%>%
summarise
(
Count
=
n
())
write.xlsx
(
x
=
list
(
"NumVariants"
=
tt
),
file
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantCounts.xlsx"
),
sep
=
"/"
))
p
<-
ggplot
(
tt
,
aes
(
x
=
Sample
,
y
=
Count
,
fill
=
get
(
fill_by
)))
+
theme_bw
(
base_size
=
12
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
90
,
hjust
=
1
,
vjust
=
0.5
),
legend.position
=
"none"
)
+
geom_bar
(
stat
=
"identity"
)
+
scale_y_continuous
(
labels
=
scales
::
comma
,
n.breaks
=
8
)
+
xlab
(
""
)
+
facet_grid
(
.
~
get
(
fill_by
),
scales
=
"free_x"
,
space
=
"free"
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantCounts.pdf"
),
sep
=
"/"
),
plot
=
p
,
width
=
6
,
height
=
6
)
tt
<-
full.t
%>%
filter
(
grepl
(
"chr"
,
CHROM
))
%>%
filter
(
!
CHROM
%in%
c
(
"chrY"
,
"chrM"
))
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
),
REF
,
ALT
)
%>%
summarise
(
Count
=
n
())
tt
<-
mutate
(
tt
,
TYPE
=
case_when
(
nchar
(
REF
)
==
1
&
nchar
(
ALT
)
==
1
&
ALT
!=
"*"
~
"SNV"
,
nchar
(
REF
)
==
1
&
nchar
(
ALT
)
==
1
&
ALT
==
"*"
~
"DEL"
,
nchar
(
REF
)
>
nchar
(
ALT
)
&
nchar
(
REF
)
-
nchar
(
ALT
)
>=
1
~
"DEL"
,
nchar
(
REF
)
<
nchar
(
ALT
)
&
nchar
(
ALT
)
-
nchar
(
REF
)
>=
1
~
"INS"
,
TRUE
~
"Other"
))
write.xlsx
(
x
=
list
(
"VariantClass"
=
tt
),
file
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantClassification.xlsx"
),
sep
=
"/"
))
tt_type
<-
tt
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
),
TYPE
)
%>%
summarise
(
SumCount
=
sum
(
Count
))
%>%
arrange
(
desc
(
SumCount
))
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
))
%>%
mutate
(
CountPerc
=
SumCount
/
sum
(
SumCount
))
write.xlsx
(
x
=
list
(
"VariantType"
=
tt_type
),
file
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantType.xlsx"
),
sep
=
"/"
))
p
<-
ggplot
(
tt_type
,
aes
(
x
=
Sample
,
y
=
SumCount
,
fill
=
TYPE
))
+
theme_bw
(
base_size
=
12
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
30
,
hjust
=
1
),
legend.position
=
"top"
)
+
guides
(
fill
=
guide_legend
(
ncol
=
6
))
+
xlab
(
""
)
+
ylab
(
"Count"
)
+
scale_fill_brewer
(
palette
=
"Set1"
,
name
=
""
)
+
geom_bar
(
stat
=
"identity"
,
position
=
"stack"
)
+
scale_y_continuous
(
labels
=
scales
::
comma
,
n.breaks
=
8
)
+
facet_grid
(
.
~
get
(
fill_by
),
scales
=
"free_x"
,
space
=
"free"
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantTypes.pdf"
),
sep
=
"/"
),
plot
=
p
,
width
=
6
,
height
=
6
)
p
<-
ggplot
(
tt_type
,
aes
(
x
=
Sample
,
y
=
CountPerc
,
fill
=
TYPE
))
+
theme_bw
(
base_size
=
12
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
30
,
hjust
=
1
),
legend.position
=
"top"
)
+
guides
(
fill
=
guide_legend
(
ncol
=
6
))
+
xlab
(
""
)
+
ylab
(
"Count"
)
+
scale_fill_brewer
(
palette
=
"Set1"
,
name
=
""
)
+
geom_bar
(
stat
=
"identity"
,
position
=
"stack"
)
+
scale_y_continuous
(
labels
=
scales
::
percent_format
(
accuracy
=
2
),
n.breaks
=
10
)
+
facet_grid
(
.
~
get
(
fill_by
),
scales
=
"free_x"
,
space
=
"free"
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantTypesPerc.pdf"
),
sep
=
"/"
),
plot
=
p
,
width
=
6
,
height
=
6
)
tt
<-
full.t
%>%
filter
(
nchar
(
REF
)
==
1
&
nchar
(
ALT
)
==
1
&
grepl
(
"chr"
,
CHROM
))
%>%
filter
(
!
CHROM
%in%
c
(
"chrY"
,
"chrM"
))
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
),
REF
,
ALT
)
%>%
summarise
(
Count
=
n
())
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
))
%>%
mutate
(
CountPerc
=
Count
/
sum
(
Count
))
tt
$
Variant
<-
paste0
(
tt
$
REF
,
tt
$
ALT
)
tt
$
Variant
<-
factor
(
tt
$
Variant
,
levels
=
sort
(
unique
(
tt
$
Variant
)))
write.xlsx
(
x
=
list
(
"SNV"
=
tt
),
file
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_SNVcounts.xlsx"
),
sep
=
"/"
))
tt
$
Variant
<-
factor
(
tt
$
Variant
,
levels
=
rev
(
names
(
snv_colors2
())))
p2
<-
ggplot
(
tt
,
aes
(
x
=
Sample
,
y
=
Count
,
fill
=
Variant
))
+
theme_bw
(
base_size
=
12
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
30
,
hjust
=
1
))
+
geom_bar
(
stat
=
"identity"
)
+
scale_fill_manual
(
values
=
snv_colors2
(
del
=
F
))
+
xlab
(
""
)
+
scale_y_continuous
(
labels
=
scales
::
comma
,
n.breaks
=
8
)
+
facet_grid
(
.
~
get
(
fill_by
),
scales
=
"free_x"
,
space
=
"free"
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_SNVcounts.pdf"
),
sep
=
"/"
),
plot
=
p2
,
width
=
7
,
height
=
7
)
p2p
<-
ggplot
(
tt
,
aes
(
x
=
Sample
,
y
=
CountPerc
,
fill
=
Variant
))
+
theme_bw
(
base_size
=
12
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
30
,
hjust
=
1
))
+
geom_bar
(
stat
=
"identity"
)
+
scale_fill_manual
(
values
=
snv_colors2
(
del
=
F
))
+
scale_y_continuous
(
labels
=
scales
::
percent_format
(
accuracy
=
2
),
n.breaks
=
10
)
+
xlab
(
""
)
+
facet_grid
(
.
~
get
(
fill_by
),
scales
=
"free_x"
,
space
=
"free"
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_SNVcountsPerc.pdf"
),
sep
=
"/"
),
plot
=
p2p
,
width
=
7
,
height
=
7
)
}
###############
### General ###
###############
wdir
<-
"Fiumara_BasePrimeEd2022_WES/WESstd"
dp_sing
<-
"50"
out_dir
<-
paste
(
wdir
,
"results"
,
sep
=
"/"
)
dir.create
(
out_dir
,
showWarnings
=
F
)
# Samples
samples
<-
c
(
"227-BM-A0-5"
,
"227-BM-A0-6"
,
"227-BM-A2-5"
,
"227-BM-A2-6"
,
"227-BM-A3-5"
,
"227-BM-A3-6"
,
"227-BM-B0-5"
,
"227-BM-B1-5"
,
"227-BM-B3-5"
,
"227-BM-B4-5"
,
"227-BM-C0-5"
,
"227-BM-C1-5"
,
"227-BM-D0-6"
,
"227-BM-D1-6"
,
"227-BM-D3-6"
,
"227-vitro-A"
,
"227-vitro-B"
,
"227-vitro-C"
,
"227-vitro-D"
)
################
### Variants ###
################
full.t
<-
data.frame
()
for
(
ss
in
samples
)
{
print
(
ss
)
t
<-
read.table
(
gzfile
(
paste
(
wdir
,
"data"
,
paste0
(
ss
,
"-GQ80_DP"
,
dp_sing
,
"_PASS.ANNOT.vcf.gz"
),
sep
=
"/"
)),
comment.char
=
"#"
)
colnames
(
t
)
<-
c
(
"CHROM"
,
"POS"
,
"ID"
,
"REF"
,
"ALT"
,
"QUAL"
,
"FILTER"
,
"ANNOT"
,
"FORMAT"
,
"SAMPLE"
)
t
$
Vars
<-
paste
(
t
$
CHROM
,
t
$
POS
,
t
$
REF
,
t
$
ALT
,
sep
=
"-"
)
t
$
Sample
<-
ss
t
$
Treatment
<-
substr
(
as.character
(
str_split_fixed
(
ss
,
"-"
,
3
)[,
3
]),
1
,
1
)
t
$
Group
<-
gsub
(
"BM"
,
"Sample"
,
as.character
(
str_split_fixed
(
ss
,
"-"
,
3
)[,
2
]))
t
$
Vars
<-
paste
(
t
$
CHROM
,
t
$
POS
,
t
$
REF
,
t
$
ALT
,
sep
=
"-"
)
t
$
DP
<-
as.numeric
(
str_split_fixed
(
t
$
SAMPLE
,
":"
,
5
)[,
3
])
t
$
AD
<-
as.numeric
(
str_split_fixed
(
str_split_fixed
(
t
$
SAMPLE
,
":"
,
5
)[,
2
],
","
,
2
)[,
1
])
t
$
GQ
<-
as.numeric
(
str_split_fixed
(
t
$
SAMPLE
,
":"
,
5
)[,
4
])
t
$
VAF
<-
(
t
$
DP
-
t
$
AD
)
/
t
$
DP
full.t
<-
rbind
(
full.t
,
t
)
}
chr
<-
c
(
paste0
(
"chr"
,
seq
(
1
,
22
)),
"chrX"
)
full.t
<-
subset
(
full.t
,
CHROM
%in%
chr
)
full.t
$
CHROM
<-
factor
(
full.t
$
CHROM
,
levels
=
chr
)
full.t
$
Sample
<-
factor
(
full.t
$
Sample
,
levels
=
unique
(
full.t
$
Sample
))
write.table
(
x
=
full.t
,
file
=
paste
(
out_dir
,
"Full_res_PASS.tsv"
,
sep
=
"/"
),
sep
=
"\t"
,
quote
=
F
,
row.names
=
F
,
col.names
=
T
)
# Vitro
vitros
<-
list
()
for
(
vv
in
c
(
"227-vitro-A"
,
"227-vitro-B"
,
"227-vitro-C"
,
"227-vitro-D"
))
{
print
(
vv
)
t
<-
subset
(
full.t
,
Sample
==
vv
)
vitros
[[
vv
]]
<-
t
$
Vars
}
venn
<-
Venn
(
vitros
)
data
<-
process_data
(
venn
)
vreg
<-
venn_region
(
data
)
vreg
$
NumInters
<-
as.character
(
str_count
(
venn_region
(
data
)
$
name
,
"[..]"
))
cc
<-
brewer.pal
(
name
=
"Accent"
,
n
=
8
)
cc_cat
<-
cc
[
1
:
length
(
unique
(
vreg
$
NumInters
))]
names
(
cc_cat
)
<-
unique
(
vreg
$
NumInters
)
vp
<-
ggplot
()
+
# change mapping of color filling
geom_sf
(
aes
(
fill
=
NumInters
),
data
=
vreg
,
show.legend
=
FALSE
)
+
scale_fill_manual
(
values
=
cc_cat
)
+
# adjust edge size and color
geom_sf
(
color
=
"black"
,
size
=
1
,
data
=
venn_setedge
(
data
),
show.legend
=
FALSE
)
+
# show set label in bold
geom_sf_text
(
aes
(
label
=
name
),
fontface
=
"bold"
,
data
=
venn_setlabel
(
data
),
size
=
8
,
nudge_x
=
c
(
0.05
,
0.06
,
-0.06
,
-0.06
))
+
# add a alternative region name
geom_sf_label
(
aes
(
label
=
paste0
(
count
,
"\n"
,
"("
,
round
(
count
/
sum
(
count
)
*
100
,
1
),
"%)"
)),
data
=
vreg
,
alpha
=
0.5
,
size
=
5
)
+
theme_void
()
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
"Vitro_DP"
,
dp_sing
,
"_intersection_Venn.pdf"
),
sep
=
"/"
),
plot
=
vp
,
width
=
9
,
height
=
7
)
# Remove Multivars
vitros
<-
list
()
for
(
vv
in
c
(
"227-vitro-A"
,
"227-vitro-B"
,
"227-vitro-C"
,
"227-vitro-D"
))
{
print
(
vv
)
t
<-
subset
(
full.t
,
Sample
==
vv
)
t
<-
t
[
grep
(
pattern
=
","
,
x
=
t
$
ALT
,
invert
=
T
),]
vitros
[[
vv
]]
<-
t
$
Vars
}
venn
<-
Venn
(
vitros
)
data
<-
process_data
(
venn
)
vreg
<-
venn_region
(
data
)
vreg
$
NumInters
<-
as.character
(
str_count
(
venn_region
(
data
)
$
name
,
"[..]"
))
cc
<-
brewer.pal
(
name
=
"Accent"
,
n
=
8
)
cc_cat
<-
cc
[
1
:
length
(
unique
(
vreg
$
NumInters
))]
names
(
cc_cat
)
<-
unique
(
vreg
$
NumInters
)
vp
<-
ggplot
()
+
# change mapping of color filling
geom_sf
(
aes
(
fill
=
NumInters
),
data
=
vreg
,
show.legend
=
FALSE
)
+
scale_fill_manual
(
values
=
cc_cat
)
+
# adjust edge size and color
geom_sf
(
color
=
"black"
,
size
=
1
,
data
=
venn_setedge
(
data
),
show.legend
=
FALSE
)
+
# show set label in bold
geom_sf_text
(
aes
(
label
=
name
),
fontface
=
"bold"
,
data
=
venn_setlabel
(
data
),
size
=
8
,
nudge_x
=
c
(
0.05
,
0.06
,
-0.06
,
-0.06
))
+
# add a alternative region name
geom_sf_label
(
aes
(
label
=
paste0
(
count
,
"\n"
,
"("
,
round
(
count
/
sum
(
count
)
*
100
,
1
),
"%)"
)),
data
=
vreg
,
alpha
=
0.5
,
size
=
5
)
+
theme_void
()
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
"Vitro_DP"
,
dp_sing
,
"NoMulti_intersection_Venn.pdf"
),
sep
=
"/"
),
plot
=
vp
,
width
=
9
,
height
=
7
)
plot_variants
(
full.t
=
subset
(
full.t
,
Group
==
"vitro"
),
out_dir
=
out_dir
,
plot_prefix
=
paste0
(
"Vitro_DP"
,
dp_sing
),
fill_by
=
"Treatment"
)
# Control
vitro_ctrl
<-
subset
(
full.t
,
Sample
==
"227-vitro-D"
)
vitro_ctrl_vars
<-
vitro_ctrl
$
Vars
# Remove Control variants from vitro
full.t_noctrl
<-
subset
(
full.t
,
!
Vars
%in%
vitro_ctrl_vars
)
full.t_noctrl
%>%
group_by
(
Sample
)
%>%
summarise
(
n
())
plot_variants
(
full.t
=
subset
(
full.t_noctrl
,
Group
==
"vitro"
),
out_dir
=
out_dir
,
plot_prefix
=
paste0
(
"Vitro_DP"
,
dp_sing
,
"_NoVitroCtrl"
),
fill_by
=
"Treatment"
)
vitros_noctrl
<-
list
()
for
(
vv
in
c
(
"227-vitro-A"
,
"227-vitro-B"
,
"227-vitro-C"
))
{
print
(
vv
)
t
<-
subset
(
full.t_noctrl
,
Sample
==
vv
)
vitros_noctrl
[[
vv
]]
<-
t
$
Vars
}
venn
<-
Venn
(
vitros_noctrl
)
data
<-
process_data
(
venn
)
vreg
<-
venn_region
(
data
)
vreg
$
NumInters
<-
as.character
(
str_count
(
venn_region
(
data
)
$
name
,
"[..]"
))
cc
<-
brewer.pal
(
name
=
"Accent"
,
n
=
8
)
cc_cat
<-
cc
[
1
:
length
(
unique
(
vreg
$
NumInters
))]
names
(
cc_cat
)
<-
unique
(
vreg
$
NumInters
)
vp
<-
ggplot
()
+
# change mapping of color filling
geom_sf
(
aes
(
fill
=
NumInters
),
data
=
vreg
,
show.legend
=
FALSE
)
+
scale_fill_manual
(
values
=
cc_cat
)
+
# adjust edge size and color
geom_sf
(
color
=
"black"
,
size
=
1
,
data
=
venn_setedge
(
data
),
show.legend
=
FALSE
)
+
# show set label in bold
geom_sf_text
(
aes
(
label
=
name
),
fontface
=
"bold"
,
data
=
venn_setlabel
(
data
),
size
=
8
,
nudge_y
=
c
(
-450
,
0
,
-450
),
nudge_x
=
c
(
100
,
0
,
-100
))
+
# add a alternative region name
geom_sf_label
(
aes
(
label
=
paste0
(
count
,
"\n"
,
"("
,
round
(
count
/
sum
(
count
)
*
100
,
1
),
"%)"
)),
data
=
vreg
,
alpha
=
0.5
,
size
=
5
)
+
theme_void
()
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
"Vitro_DP"
,
dp_sing
,
"_NoVitroCtrl_intersection_Venn.pdf"
),
sep
=
"/"
),
plot
=
vp
,
width
=
9
,
height
=
7
)
# Samples
plot_variants
(
full.t
=
subset
(
full.t
,
Group
==
"Sample"
),
out_dir
=
out_dir
,
plot_prefix
=
paste0
(
"Sample_DP"
,
dp_sing
),
fill_by
=
"Treatment"
)
plot_variants
(
full.t
=
subset
(
full.t_noctrl
,
Group
==
"Sample"
),
out_dir
=
out_dir
,
plot_prefix
=
paste0
(
"Sample_DP"
,
dp_sing
,
"_NoVitroCtrl"
),
fill_by
=
"Treatment"
)
full.t_noctrl_nomulti
<-
full.t_noctrl
[
grep
(
pattern
=
","
,
x
=
full.t_noctrl
$
ALT
,
invert
=
T
),]
plot_variants
(
full.t
=
subset
(
full.t_noctrl_nomulti
,
Group
==
"Sample"
),
out_dir
=
out_dir
,
plot_prefix
=
paste0
(
"Sample_DP"
,
dp_sing
,
"NoMulti_NoVitroCtrl"
),
fill_by
=
"Treatment"
)
plot_variants
(
full.t
=
full.t_noctrl
,
out_dir
=
out_dir
,
plot_prefix
=
paste0
(
"Full_DP"
,
dp_sing
,
"_NoVitroCtrl"
),
fill_by
=
"Treatment"
)
## Parse snpEff Annotation
full.t_noctrl_nomulti
$
snpEff_tmp
<-
lapply
(
full.t_noctrl_nomulti
$
ANNOT
,
function
(
x
){
tmp
<-
strsplit
(
x
,
";"
)[[
1
]]
ann_pos
<-
length
(
tmp
)
if
(
!
startsWith
(
tmp
[
length
(
tmp
)],
"ANN"
))
{
for
(
i
in
seq
(
1
,
length
(
tmp
)))
{
if
(
startsWith
(
tmp
[
i
],
"ANN"
))
{
ann_pos
<-
i
}
}
}
tmp
<-
tmp
[
ann_pos
]
})
full.t_noctrl_nomulti
$
snpEff_Effect
<-
sapply
(
full.t_noctrl_nomulti
$
snpEff_tmp
,
function
(
tmp
){
strsplit
(
tmp
,
"|"
,
fixed
=
T
)[[
1
]][
2
]
},
USE.NAMES
=
F
)
full.t_noctrl_nomulti
$
snpEff_Impact
<-
sapply
(
full.t_noctrl_nomulti
$
snpEff_tmp
,
function
(
tmp
){
strsplit
(
tmp
,
"|"
,
fixed
=
T
)[[
1
]][
3
]
},
USE.NAMES
=
F
)
full.t_noctrl_nomulti
$
snpEff_Gene
<-
sapply
(
full.t_noctrl_nomulti
$
snpEff_tmp
,
function
(
tmp
){
strsplit
(
tmp
,
"|"
,
fixed
=
T
)[[
1
]][
4
]
},
USE.NAMES
=
F
)
full.t_noctrl_nomulti
$
snpEff_GeneName
<-
sapply
(
full.t_noctrl_nomulti
$
snpEff_tmp
,
function
(
tmp
){
strsplit
(
tmp
,
"|"
,
fixed
=
T
)[[
1
]][
5
]
},
USE.NAMES
=
F
)
full.t_noctrl_nomulti
$
snpEff_BioType
<-
sapply
(
full.t_noctrl_nomulti
$
snpEff_tmp
,
function
(
tmp
){
strsplit
(
tmp
,
"|"
,
fixed
=
T
)[[
1
]][
8
]
},
USE.NAMES
=
F
)
full.t_noctrl_nomulti
$
snpEff_tmp
<-
NULL
write.table
(
x
=
full.t_noctrl_nomulti
,
file
=
paste
(
out_dir
,
"Full_NoMulti_noGerm_res.tsv"
,
sep
=
"/"
),
sep
=
"\t"
,
quote
=
F
,
row.names
=
F
,
col.names
=
T
)
snpeff_df
<-
full.t_noctrl_nomulti
%>%
group_by
(
Sample
,
Group
,
Treatment
,
snpEff_Effect
,
snpEff_Impact
)
%>%
summarise
(
Count
=
n
())
snpeff_df
<-
melt
(
reshape2
::
dcast
(
snpeff_df
,
Sample
+
Group
+
Treatment
~
snpEff_Effect
+
snpEff_Impact
,
value.var
=
"Count"
),
id.vars
=
c
(
"Sample"
,
"Group"
,
"Treatment"
))
snpeff_df
$
snpEff_Effect
<-
sapply
(
snpeff_df
$
variable
,
function
(
x
){
tmp
<-
strsplit
(
as.character
(
x
),
"_"
)[[
1
]]
# only consider the nearest match
tmp
<-
tmp
[
1
:
(
length
(
tmp
)
-1
)]
paste
(
tmp
,
collapse
=
"_"
)
},
USE.NAMES
=
F
)
snpeff_df
$
snpEff_Impact
<-
sapply
(
snpeff_df
$
variable
,
function
(
x
){
tmp
<-
strsplit
(
as.character
(
x
),
"_"
)[[
1
]]
# only consider the nearest match
tmp
<-
tmp
[
length
(
tmp
)]
},
USE.NAMES
=
F
)
snpeff_df
$
variable
<-
NULL
snpeff_df
$
snpEff_Impact
<-
factor
(
snpeff_df
$
snpEff_Impact
,
levels
=
c
(
"HIGH"
,
"MODERATE"
,
"LOW"
,
"MODIFIER"
))
mpoint
<-
(
max
(
snpeff_df
$
value
,
na.rm
=
T
)
-
min
(
snpeff_df
$
value
,
na.rm
=
T
))
/
2
p
<-
ggplot
(
snpeff_df
,
aes
(
x
=
snpEff_Effect
,
y
=
Sample
,
fill
=
value
))
+
theme_bw
(
base_size
=
14
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
70
,
hjust
=
1
))
+
geom_tile
(
aes
(
height
=
.98
,
width
=
.98
))
+
geom_text
(
data
=
subset
(
snpeff_df
,
value
<
1000
),
aes
(
label
=
value
),
color
=
"white"
,
size
=
4
)
+
ylab
(
""
)
+
xlab
(
""
)
+
ggtitle
(
"snpEff Variant Classification"
)
+
scale_fill_gradient2
(
low
=
"blue"
,
high
=
"red"
,
mid
=
"yellow"
,
midpoint
=
mpoint
,
na.value
=
"grey"
)
+
facet_grid
(
Treatment
~
snpEff_Impact
,
scales
=
"free"
,
space
=
"free"
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
"Full_DP"
,
dp_sing
,
"NoMulti_NoVitroCtrl_snpEff.pdf"
),
sep
=
"/"
),
plot
=
p
,
width
=
15
,
height
=
12
)
# Samples
samples.t_noctrl_nomulti
<-
subset
(
full.t_noctrl_nomulti
,
Group
==
"Sample"
)
############################
### Cancer-related Genes ###
############################
offt_gene
<-
c
(
"ABCB1"
,
"ABCC2"
,
"ABL1"
,
"ABL2"
,
"AKT1"
,
"AKT2"
,
"AKT3"
,
"ALK"
,
"ANGPTL7"
,
"APC"
,
"ASXL1"
,
"ATM"
,
"ATRX"
,
"BCYRN1"
,
"BRAF"
,
"BRCA1"
,
"BRCA2"
,
"CBL"
,
"CDA"
,
"CDH1"
,
"CDKN2A"
,
"CDKN2B"
,
"CEBPA"
,
"CHD7"
,
"CHIC2"
,
"CREBBP"
,
"CRLF2"
,
"CSF1R"
,
"CTNNB1"
,
"CYP19A1"
,
"CYP2A13"
,
"CYP2A6"
,
"CYP2A7"
,
"CYP2B6"
,
"CYP2B7P"
,
"CYP2C19"
,
"CYP2C9"
,
"CYP2D6"
,
"CYP2D7"
,
"DACH1"
,
"DDR1"
,
"DDR2"
,
"DDX3X"
,
"DDX54"
,
"DNMT3A"
,
"DPYD"
,
"DPYD-AS1"
,
"EGFR"
,
"EGFR-AS1"
,
"ERBB2"
,
"ERBB3"
,
"ERBB4"
,
"ERG"
,
"ESR1"
,
"EVI2A"
,
"EVI2B"
,
"EZH2"
,
"FBXW7"
,
"FGFR1"
,
"FGFR2"
,
"FGFR3"
,
"FGFR4"
,
"FLT1"
,
"FLT3"
,
"FLT4"
,
"FSTL5"
,
"GNA11"
,
"GNAQ"
,
"GNAS"
,
"GNAS-AS1"
,
"GSTP1"
,
"GTF2IP1"
,
"H3F3A"
,
"HNF1A"
,
"HRAS"
,
"IDH1"
,
"IDH2"
,
"IKZF1"
,
"IL1RAPL1"
,
"IL2RA"
,
"IL2RB"
,
"IL2RG"
,
"INPP4B"
,
"JAK1"
,
"JAK2"
,
"JAK3"
,
"KDM6A"
,
"KDR"
,
"KIT"
,
"KMT2A"
,
"KRAS"
,
"LAMA2"
,
"LCK"
,
"LOC100287072"
,
"LOC101928052"
,
"LPAR6"
,
"LTK"
,
"MAP2K1"
,
"MAP2K2"
,
"MAP2K4"
,
"MAP3K1"
,
"MAPK1"
,
"MED13"
,
"MEIKIN"
,
"MET"
,
"MGC32805"
,
"MGST2"
,
"MIR548AZ"
,
"MLH1"
,
"MPL"
,
"MST1R"
,
"MTOR"
,
"MTOR-AS1"
,
"MYC"
,
"MYD88"
,
"NELL2"
,
"NF1"
,
"NOTCH1"
,
"NPM1"
,
"NRAS"
,
"OMG"
,
"PDGFRA"
,
"PDGFRB"
,
"PHF6"
,
"PIK3CA"
,
"PIK3R1"
,
"PSMB1"
,
"PSMB2"
,
"PSMB5"
,
"PSMD1"
,
"PSMD2"
,
"PTCH1"
,
"PTEN"
,
"PTENP1"
,
"PTPN11"
,
"PVT1"
,
"RAF1"
,
"RARA"
,
"RARA-AS1"
,
"RARB"
,
"RARG"
,
"RB1"
,
"RET"
,
"ROS1"
,
"RPS6KB1"
,
"RUNDC3B"
,
"RUNX1"
,
"RXRA"
,
"RXRB"
,
"RXRG"
,
"SDCCAG8"
,
"SHH"
,
"SHOC2"
,
"SLC22A1"
,
"SLC22A2"
,
"SLC31A1"
,
"SLC34A2"
,
"SLC45A3"
,
"SLCO1B1"
,
"SMAD4"
,
"SMARCA4"
,
"SMARCB1"
,
"SMO"
,
"SNCAIP"
,
"SOS1"
,
"SPRED1"
,
"SRC"
,
"STK11"
,
"SUFU"
,
"TAS2R38"
,
"TET2"
,
"TMEM75"
,
"TMPRSS2"
,
"TP53"
,
"TPX2"
,
"TRRAP"
,
"TYK2"
,
"UGT1A9"
,
"UTY"
,
"VHL"
,
"WT1"
,
"YES1"
)
offt_vars
<-
subset
(
full.t_noctrl_nomulti
,
snpEff_GeneName
%in%
offt_gene
)
pdf
(
paste
(
out_dir
,
paste0
(
"Full_DP"
,
dp_sing
,
"NoMulti_NoVitroCtrl_Circos_Pannello_Genes.pdf"
),
sep
=
"/"
),
width
=
12
,
height
=
9
)
par
(
mfrow
=
c
(
3
,
4
))
for
(
gg
in
unique
(
offt_vars
$
Treatment
))
{
offt_vars_gg
<-
subset
(
offt_vars
,
Treatment
==
gg
)
print
(
gg
)
for
(
ss
in
unique
(
offt_vars_gg
$
Sample
))
{
plot_circos
(
t
=
offt_vars
,
sample
=
ss
)
}
}
legend_plot
(
t
=
offt_vars
,
sample
=
ss
)
dev.off
()
write.xlsx
(
x
=
list
(
"Pannello"
=
offt_vars
),
file
=
paste
(
out_dir
,
paste0
(
"Full_DP"
,
dp_sing
,
"NoMulti_NoVitroCtrl_Circos_Pannello_Genes.xlsx"
),
sep
=
"/"
))
offt_vars
<-
subset
(
samples.t_noctrl_nomulti
,
snpEff_GeneName
%in%
offt_gene
)
pdf
(
paste
(
out_dir
,
paste0
(
"Sample_DP"
,
dp_sing
,
"NoMulti_NoVitroCtrl_Circos_Pannello_Genes.pdf"
),
sep
=
"/"
),
width
=
12
,
height
=
9
)
par
(
mfrow
=
c
(
3
,
4
))
for
(
gg
in
unique
(
offt_vars
$
Treatment
))
{
offt_vars_gg
<-
subset
(
offt_vars
,
Treatment
==
gg
)
print
(
gg
)
for
(
ss
in
unique
(
offt_vars_gg
$
Sample
))
{
plot_circos
(
t
=
offt_vars
,
sample
=
ss
)
}
}
legend_plot
(
t
=
offt_vars
,
sample
=
ss
)
dev.off
()
write.xlsx
(
x
=
list
(
"Pannello"
=
offt_vars
),
file
=
paste
(
out_dir
,
paste0
(
"Sample_DP"
,
dp_sing
,
"NoMulti_NoVitroCtrl_Circos_Pannello_Genes.xlsx"
),
sep
=
"/"
))
offt_vars
<-
subset
(
samples.t_noctrl_nomulti
,
snpEff_GeneName
%in%
offt_gene
&
snpEff_Impact
%in%
c
(
"HIGH"
,
"MODERATE"
,
"MODIFIER"
))
pdf
(
paste
(
out_dir
,
"Groups_Circos_Pannello_Genes_HighModMod.pdf"
,
sep
=
"/"
),
width
=
12
,
height
=
9
)
par
(
mfrow
=
c
(
2
,
3
))
for
(
gg
in
unique
(
offt_vars
$
Treatment
))
{
offt_vars_gg
<-
subset
(
offt_vars
,
Treatment
==
gg
)
offt_vars_gg
$
Sample
<-
gg
plot_circos
(
t
=
offt_vars_gg
,
sample
=
gg
)
}
legend_plot
(
t
=
offt_vars
,
sample
=
gg
)
dev.off
()
write.xlsx
(
x
=
list
(
"Pannello"
=
offt_vars
),
file
=
paste
(
out_dir
,
"Groups_Circos_Pannello_Genes_HighModMod.xlsx"
,
sep
=
"/"
))
# Merge samples 5 and 6 of group A
tt_A
<-
data.frame
()
for
(
contr
in
c
(
"227-BM-A0"
,
"227-BM-A2"
,
"227-BM-A3"
))
{
tt5_name
<-
paste0
(
contr
,
"-5"
)
tt5
<-
subset
(
full.t
,
Sample
==
tt5_name
)
tt6_name
<-
paste0
(
contr
,
"-6"
)
tt6
<-
subset
(
full.t
,
Sample
==
tt6_name
)
vl
<-
list
(
tt5
$
Vars
,
tt6
$
Vars
)
names
(
vl
)
<-
c
(
tt5_name
,
tt6_name
)
venn
<-
Venn
(
vl
)
data
<-
process_data
(
venn
)
vreg
<-
venn_region
(
data
)
vreg
$
NumInters
<-
as.character
(
str_count
(
venn_region
(
data
)
$
name
,
"[..]"
))
cc
<-
brewer.pal
(
name
=
"Accent"
,
n
=
8
)
cc_cat
<-
cc
[
1
:
length
(
unique
(
vreg
$
NumInters
))]
names
(
cc_cat
)
<-
unique
(
vreg
$
NumInters
)
vp
<-
ggplot
()
+
# change mapping of color filling
geom_sf
(
aes
(
fill
=
NumInters
),
data
=
vreg
,
show.legend
=
FALSE
)
+
scale_fill_manual
(
values
=
cc_cat
)
+
# adjust edge size and color
geom_sf
(
color
=
"black"
,
size
=
1
,
data
=
venn_setedge
(
data
),
show.legend
=
FALSE
)
+
# show set label in bold
geom_sf_text
(
aes
(
label
=
name
),
fontface
=
"bold"
,
data
=
venn_setlabel
(
data
),
size
=
6
)
+
# add a alternative region name
geom_sf_label
(
aes
(
label
=
paste0
(
count
,
"\n"
,
"("
,
round
(
count
/
sum
(
count
)
*
100
,
1
),
"%)"
)),
data
=
vreg
,
alpha
=
0.5
,
size
=
5
)
+
theme_void
()
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
contr
,
"_Samples_DP"
,
dp_sing
,
"_intersection_Venn.pdf"
),
sep
=
"/"
),
plot
=
vp
,
width
=
5
,
height
=
3
)
tt5_only
<-
subset
(
tt5
,
Vars
%in%
setdiff
(
tt5
$
Vars
,
tt6
$
Vars
))
tt5_only
$
Sample
<-
contr
tt6_only
<-
subset
(
tt6
,
Vars
%in%
setdiff
(
tt6
$
Vars
,
tt5
$
Vars
))
tt6_only
$
Sample
<-
contr
tt5_common
<-
subset
(
tt5
,
Vars
%in%
intersect
(
tt5
$
Vars
,
tt6
$
Vars
))
tt6_common
<-
subset
(
tt6
,
Vars
%in%
intersect
(
tt5
$
Vars
,
tt6
$
Vars
))
tt_common
<-
merge
(
tt5_common
,
tt6_common
,
by
=
c
(
"CHROM"
,
"POS"
,
"ID"
,
"REF"
,
"ALT"
,
"FILTER"
,
"Group"
,
"Treatment"
,
"Vars"
))
tt_common
$
Sample.x
<-
NULL
tt_common
$
Sample.y
<-
NULL
tt_common
$
Sample
<-
contr
tt_common
$
VAF
<-
rowMeans
(
tt_common
[,
c
(
"VAF.x"
,
"VAF.y"
)])
tt_common
$
VAF.x
<-
NULL
tt_common
$
VAF.y
<-
NULL
tt_common
$
DP
<-
rowMeans
(
tt_common
[,
c
(
"DP.x"
,
"DP.y"
)])
tt_common
$
DP.x
<-
NULL
tt_common
$
DP.y
<-
NULL
tt_common
$
GQ
<-
rowMeans
(
tt_common
[,
c
(
"GQ.x"
,
"GQ.y"
)])
tt_common
$
GQ.x
<-
NULL
tt_common
$
GQ.y
<-
NULL
tt_common
$
QUAL
<-
rowMeans
(
tt_common
[,
c
(
"QUAL.x"
,
"QUAL.y"
)])
tt_common
$
QUAL.x
<-
NULL
tt_common
$
QUAL.y
<-
NULL
tt_common
$
ANNOT
<-
tt_common
$
ANNOT.x
tt_common
$
ANNOT.x
<-
NULL
tt_common
$
ANNOT.y
<-
NULL
tt_common
$
FORMAT
<-
tt_common
$
FORMAT.x
tt_common
$
FORMAT.x
<-
NULL
tt_common
$
FORMAT.y
<-
NULL
tt_common
$
SAMPLE
<-
tt_common
$
SAMPLE.x
tt_common
$
SAMPLE.x
<-
NULL
tt_common
$
SAMPLE.y
<-
NULL
tt_common
<-
tt_common
[,
colnames
(
tt5_only
)]
tt_contr
<-
rbind
(
tt_common
,
tt5_only
,
tt6_only
)
if
(
nrow
(
tt_A
)
==
0
)
{
tt_A
<-
tt_contr
}
else
{
tt_A
<-
rbind
(
tt_A
,
tt_contr
)
}
}
table
(
full.t
$
Sample
)
full.t.unionA
<-
subset
(
full.t
,
!
Sample
%in%
c
(
paste0
(
c
(
"227-BM-A0"
,
"227-BM-A2"
,
"227-BM-A3"
),
"-5"
),
paste0
(
c
(
"227-BM-A0"
,
"227-BM-A2"
,
"227-BM-A3"
),
"-6"
)))
full.t.unionA
<-
rbind
(
full.t.unionA
,
tt_A
)
plot_variants
(
full.t
=
subset
(
full.t.unionA
,
Group
==
"Sample"
),
out_dir
=
out_dir
,
plot_prefix
=
paste0
(
"SampleUnionA_DP"
,
dp_sing
),
fill_by
=
"Treatment"
)
full.t.unionA_noctrl
<-
subset
(
full.t.unionA
,
!
Vars
%in%
vitro_ctrl_vars
)
plot_variants
(
full.t
=
subset
(
full.t.unionA_noctrl
,
Group
==
"Sample"
),
out_dir
=
out_dir
,
plot_prefix
=
paste0
(
"SampleUnionA_DP"
,
dp_sing
,
"_NoVitroCtrl"
),
fill_by
=
"Treatment"
)
full.t.unionA_noctrl_nomulti
<-
full.t.unionA_noctrl
[
grep
(
pattern
=
","
,
x
=
full.t.unionA_noctrl
$
ALT
,
invert
=
T
),]
plot_variants
(
full.t
=
subset
(
full.t.unionA_noctrl_nomulti
,
Group
==
"Sample"
),
out_dir
=
out_dir
,
plot_prefix
=
paste0
(
"SampleUnionA_DP"
,
dp_sing
,
"NoMulti_NoVitroCtrl"
),
fill_by
=
"Treatment"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment