Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
Fiumara_BasePrimeEd2022
Fiumara_BasePrimeEd2022_WES
Commits
a1aa39cb
Commit
a1aa39cb
authored
Jul 31, 2023
by
Stefano Beretta
Browse files
Upload New File
parent
6fe5b298
Changes
1
Show whitespace changes
Inline
Side-by-side
scripts/WEScolonies_plot_results.R
0 → 100644
View file @
a1aa39cb
library
(
dplyr
)
library
(
ggplot2
)
library
(
RColorBrewer
)
library
(
CMplot
)
library
(
gtable
)
library
(
circlize
)
library
(
grid
)
library
(
gridExtra
)
library
(
stringr
)
library
(
e1071
)
library
(
openxlsx
)
library
(
VennDiagram
)
library
(
scales
)
library
(
ggVennDiagram
)
library
(
reshape2
)
# Function to define colors for SNVs
snv_colors2
<-
function
(
del
=
FALSE
,
alpha
=
1
)
{
nuc
<-
c
(
"A"
,
"C"
,
"G"
,
"T"
)
# Point deletions
dels
<-
alpha
(
brewer.pal
(
5
,
"Greys"
)[
2
:
5
],
1
)
names
(
dels
)
<-
paste0
(
nuc
,
"*"
)
# Transitions
ts
<-
alpha
(
brewer.pal
(
5
,
"Blues"
)[
2
:
5
],
1
)
names
(
ts
)
<-
c
(
"CT"
,
"GA"
,
"AG"
,
"TC"
)
# Tansversions
tv1
<-
alpha
(
brewer.pal
(
5
,
"YlGn"
)[
2
:
5
],
1
)
names
(
tv1
)
<-
c
(
"AC"
,
"TG"
,
"AT"
,
"TA"
)
tv2
<-
alpha
(
brewer.pal
(
5
,
"OrRd"
)[
2
:
5
],
1
)
names
(
tv2
)
<-
c
(
"CA"
,
"GT"
,
"CG"
,
"GC"
)
var_cols
<-
c
(
ts
,
tv1
,
tv2
)
if
(
del
)
{
var_cols
<-
c
(
var_cols
,
dels
)
}
return
(
var_cols
)
}
# Variant Plots
plot_variants
<-
function
(
full.t
,
out_dir
,
plot_prefix
,
fill_by
)
{
dir.create
(
path
=
out_dir
,
showWarnings
=
F
)
tt
<-
full.t
%>%
filter
(
grepl
(
"chr"
,
CHROM
))
%>%
filter
(
!
CHROM
%in%
c
(
"chrY"
,
"chrM"
))
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
))
%>%
summarise
(
Count
=
n
())
write.xlsx
(
x
=
list
(
"NumVariants"
=
tt
),
file
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantCounts.xlsx"
),
sep
=
"/"
))
p
<-
ggplot
(
tt
,
aes
(
x
=
Sample
,
y
=
Count
,
fill
=
get
(
fill_by
)))
+
theme_bw
(
base_size
=
12
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
90
,
hjust
=
1
,
vjust
=
0.5
),
legend.position
=
"none"
)
+
geom_bar
(
stat
=
"identity"
)
+
scale_y_continuous
(
labels
=
scales
::
comma
,
n.breaks
=
8
)
+
xlab
(
""
)
+
facet_grid
(
.
~
get
(
fill_by
),
scales
=
"free_x"
,
space
=
"free"
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantCounts.pdf"
),
sep
=
"/"
),
plot
=
p
,
width
=
6
,
height
=
6
)
tt
<-
full.t
%>%
filter
(
grepl
(
"chr"
,
CHROM
))
%>%
filter
(
!
CHROM
%in%
c
(
"chrY"
,
"chrM"
))
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
),
REF
,
ALT
)
%>%
summarise
(
Count
=
n
())
tt
<-
mutate
(
tt
,
TYPE
=
case_when
(
nchar
(
REF
)
==
1
&
nchar
(
ALT
)
==
1
&
ALT
!=
"*"
~
"SNV"
,
nchar
(
REF
)
==
1
&
nchar
(
ALT
)
==
1
&
ALT
==
"*"
~
"DEL"
,
nchar
(
REF
)
>
nchar
(
ALT
)
&
nchar
(
REF
)
-
nchar
(
ALT
)
>=
1
~
"DEL"
,
nchar
(
REF
)
<
nchar
(
ALT
)
&
nchar
(
ALT
)
-
nchar
(
REF
)
>=
1
~
"INS"
,
TRUE
~
"Other"
))
write.xlsx
(
x
=
list
(
"VariantClass"
=
tt
),
file
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantClassification.xlsx"
),
sep
=
"/"
))
tt_type
<-
tt
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
),
TYPE
)
%>%
summarise
(
SumCount
=
sum
(
Count
))
%>%
arrange
(
desc
(
SumCount
))
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
))
%>%
mutate
(
CountPerc
=
SumCount
/
sum
(
SumCount
))
write.xlsx
(
x
=
list
(
"VariantType"
=
tt_type
),
file
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantType.xlsx"
),
sep
=
"/"
))
p
<-
ggplot
(
tt_type
,
aes
(
x
=
Sample
,
y
=
SumCount
,
fill
=
TYPE
))
+
theme_bw
(
base_size
=
12
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
30
,
hjust
=
1
),
legend.position
=
"top"
)
+
guides
(
fill
=
guide_legend
(
ncol
=
6
))
+
xlab
(
""
)
+
ylab
(
"Count"
)
+
scale_fill_brewer
(
palette
=
"Set1"
,
name
=
""
)
+
geom_bar
(
stat
=
"identity"
,
position
=
"stack"
)
+
scale_y_continuous
(
labels
=
scales
::
comma
,
n.breaks
=
8
)
+
facet_grid
(
.
~
get
(
fill_by
),
scales
=
"free_x"
,
space
=
"free"
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantTypes.pdf"
),
sep
=
"/"
),
plot
=
p
,
width
=
6
,
height
=
6
)
p
<-
ggplot
(
tt_type
,
aes
(
x
=
Sample
,
y
=
CountPerc
,
fill
=
TYPE
))
+
theme_bw
(
base_size
=
12
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
30
,
hjust
=
1
),
legend.position
=
"top"
)
+
guides
(
fill
=
guide_legend
(
ncol
=
6
))
+
xlab
(
""
)
+
ylab
(
"Count"
)
+
scale_fill_brewer
(
palette
=
"Set1"
,
name
=
""
)
+
geom_bar
(
stat
=
"identity"
,
position
=
"stack"
)
+
scale_y_continuous
(
labels
=
scales
::
percent_format
(
accuracy
=
2
),
n.breaks
=
10
)
+
facet_grid
(
.
~
get
(
fill_by
),
scales
=
"free_x"
,
space
=
"free"
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantTypesPerc.pdf"
),
sep
=
"/"
),
plot
=
p
,
width
=
6
,
height
=
6
)
tt
<-
full.t
%>%
filter
(
nchar
(
REF
)
==
1
&
nchar
(
ALT
)
==
1
&
grepl
(
"chr"
,
CHROM
))
%>%
filter
(
!
CHROM
%in%
c
(
"chrY"
,
"chrM"
))
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
),
REF
,
ALT
)
%>%
summarise
(
Count
=
n
())
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
))
%>%
mutate
(
CountPerc
=
Count
/
sum
(
Count
))
tt
$
Variant
<-
paste0
(
tt
$
REF
,
tt
$
ALT
)
tt
$
Variant
<-
factor
(
tt
$
Variant
,
levels
=
sort
(
unique
(
tt
$
Variant
)))
write.xlsx
(
x
=
list
(
"SNV"
=
tt
),
file
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_SNVcounts.xlsx"
),
sep
=
"/"
))
tt
$
Variant
<-
factor
(
tt
$
Variant
,
levels
=
rev
(
names
(
snv_colors2
())))
p2
<-
ggplot
(
tt
,
aes
(
x
=
Sample
,
y
=
Count
,
fill
=
Variant
))
+
theme_bw
(
base_size
=
12
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
30
,
hjust
=
1
))
+
geom_bar
(
stat
=
"identity"
)
+
scale_fill_manual
(
values
=
snv_colors2
(
del
=
F
))
+
xlab
(
""
)
+
scale_y_continuous
(
labels
=
scales
::
comma
,
n.breaks
=
8
)
+
facet_grid
(
.
~
get
(
fill_by
),
scales
=
"free_x"
,
space
=
"free"
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_SNVcounts.pdf"
),
sep
=
"/"
),
plot
=
p2
,
width
=
7
,
height
=
7
)
p2p
<-
ggplot
(
tt
,
aes
(
x
=
Sample
,
y
=
CountPerc
,
fill
=
Variant
))
+
theme_bw
(
base_size
=
12
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
30
,
hjust
=
1
))
+
geom_bar
(
stat
=
"identity"
)
+
scale_fill_manual
(
values
=
snv_colors2
(
del
=
F
))
+
scale_y_continuous
(
labels
=
scales
::
percent_format
(
accuracy
=
2
),
n.breaks
=
10
)
+
xlab
(
""
)
+
facet_grid
(
.
~
get
(
fill_by
),
scales
=
"free_x"
,
space
=
"free"
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_SNVcountsPerc.pdf"
),
sep
=
"/"
),
plot
=
p2p
,
width
=
7
,
height
=
7
)
}
###############
### General ###
###############
wdir
<-
"Fiumara_BasePrimeEd2022_WES/WEScolonies"
out_dir
<-
paste
(
wdir
,
"results"
,
sep
=
"/"
)
dir.create
(
out_dir
,
showWarnings
=
F
)
# Samples
samples
<-
c
(
"S1-BE4CC"
,
"S2-BE4CCw-osgRNA"
,
"S3-ABE8CC"
,
"S4-BE4arca"
,
"S5-mockcolonies"
,
"S6-mock24h"
)
################
### Variants ###
################
full.t
<-
data.frame
()
for
(
ss
in
samples
)
{
print
(
ss
)
t
<-
read.table
(
gzfile
(
paste
(
wdir
,
"data"
,
paste0
(
ss
,
"-DP200_PASS_sGQ80_sDP10.ANNOT.vcf.gz"
),
sep
=
"/"
)),
comment.char
=
"#"
,
sep
=
"\t"
)
colnames
(
t
)
<-
c
(
"CHROM"
,
"POS"
,
"ID"
,
"REF"
,
"ALT"
,
"QUAL"
,
"FILTER"
,
"ANNOT"
,
"FORMAT"
,
"SAMPLE"
)
t
$
Sample
<-
ss
t
$
Vars
<-
paste
(
t
$
CHROM
,
t
$
POS
,
t
$
REF
,
t
$
ALT
,
sep
=
"-"
)
t
$
Group
<-
str_split_fixed
(
ss
,
"-"
,
2
)[,
2
]
t
$
DP
<-
as.numeric
(
str_split_fixed
(
t
$
SAMPLE
,
":"
,
5
)[,
3
])
t
$
AD
<-
as.numeric
(
str_split_fixed
(
str_split_fixed
(
t
$
SAMPLE
,
":"
,
5
)[,
2
],
","
,
2
)[,
1
])
t
$
VAF
<-
(
t
$
DP
-
t
$
AD
)
/
t
$
DP
full.t
<-
rbind
(
full.t
,
t
)
}
# Filter CHR only
chr
<-
c
(
paste0
(
"chr"
,
seq
(
1
,
22
)),
"chrX"
)
full.t
<-
subset
(
full.t
,
CHROM
%in%
chr
)
full.t
$
CHROM
<-
factor
(
full.t
$
CHROM
,
levels
=
chr
)
full.t
$
Sample
<-
factor
(
full.t
$
Sample
,
levels
=
unique
(
full.t
$
Sample
))
# Remove Multi
full.t.nomulti
<-
full.t
[
grep
(
","
,
full.t
$
ALT
,
value
=
F
,
invert
=
T
),
]
write.table
(
x
=
full.t.nomulti
,
file
=
paste
(
out_dir
,
"Full_NoMulti_res.tsv"
,
sep
=
"/"
),
sep
=
"\t"
,
quote
=
F
,
row.names
=
F
,
col.names
=
T
)
plot_variants
(
full.t
=
full.t.nomulti
,
out_dir
=
out_dir
,
plot_prefix
=
"Full_NoMulti"
,
fill_by
=
"Group"
)
# Control
vitro_ctrl
<-
subset
(
full.t.nomulti
,
Sample
==
"S6-mock24h"
)
vitro_ctrl_vars
<-
vitro_ctrl
$
Vars
# Remove germline
full.t.nomulti.noGerm
<-
subset
(
full.t.nomulti
,
!
(
Vars
%in%
vitro_ctrl_vars
))
plot_variants
(
full.t
=
full.t.nomulti.noGerm
,
out_dir
=
out_dir
,
plot_prefix
=
"Full_NoMulti_noGerm"
,
fill_by
=
"Group"
)
## Parse snpEff Annotation
full.t.nomulti.noGerm
$
snpEff_tmp
<-
lapply
(
full.t.nomulti.noGerm
$
ANNOT
,
function
(
x
){
tmp
<-
strsplit
(
x
,
";"
)[[
1
]]
ann_pos
<-
length
(
tmp
)
if
(
!
startsWith
(
tmp
[
length
(
tmp
)],
"ANN"
))
{
for
(
i
in
seq
(
1
,
length
(
tmp
)))
{
if
(
startsWith
(
tmp
[
i
],
"ANN"
))
{
ann_pos
<-
i
}
}
}
tmp
<-
tmp
[
ann_pos
]
})
full.t.nomulti.noGerm
$
snpEff_Effect
<-
sapply
(
full.t.nomulti.noGerm
$
snpEff_tmp
,
function
(
tmp
){
strsplit
(
tmp
,
"|"
,
fixed
=
T
)[[
1
]][
2
]
},
USE.NAMES
=
F
)
full.t.nomulti.noGerm
$
snpEff_Impact
<-
sapply
(
full.t.nomulti.noGerm
$
snpEff_tmp
,
function
(
tmp
){
strsplit
(
tmp
,
"|"
,
fixed
=
T
)[[
1
]][
3
]
},
USE.NAMES
=
F
)
full.t.nomulti.noGerm
$
snpEff_Gene
<-
sapply
(
full.t.nomulti.noGerm
$
snpEff_tmp
,
function
(
tmp
){
strsplit
(
tmp
,
"|"
,
fixed
=
T
)[[
1
]][
4
]
},
USE.NAMES
=
F
)
full.t.nomulti.noGerm
$
snpEff_GeneName
<-
sapply
(
full.t.nomulti.noGerm
$
snpEff_tmp
,
function
(
tmp
){
strsplit
(
tmp
,
"|"
,
fixed
=
T
)[[
1
]][
5
]
},
USE.NAMES
=
F
)
full.t.nomulti.noGerm
$
snpEff_BioType
<-
sapply
(
full.t.nomulti.noGerm
$
snpEff_tmp
,
function
(
tmp
){
strsplit
(
tmp
,
"|"
,
fixed
=
T
)[[
1
]][
8
]
},
USE.NAMES
=
F
)
full.t.nomulti.noGerm
$
snpEff_tmp
<-
NULL
write.table
(
x
=
full.t.nomulti.noGerm
,
file
=
paste
(
out_dir
,
"Full_NoMulti_noGerm_res.tsv"
,
sep
=
"/"
),
sep
=
"\t"
,
quote
=
F
,
row.names
=
F
,
col.names
=
T
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment