From aba1e70e64758e9960d21304ee5fc4515c854949 Mon Sep 17 00:00:00 2001 From: Matteo Barcella Date: Fri, 4 Aug 2023 14:39:47 +0000 Subject: [PATCH] Update README.md --- WES/README.md | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/WES/README.md b/WES/README.md index 406706c..b382d35 100644 --- a/WES/README.md +++ b/WES/README.md @@ -43,26 +43,35 @@ Below the workflow and commands used: We implemented several layers of annotation using different datasets including [SNPeff](http://pcingola.github.io/SnpEff/) (v.86), [dbSNP](https://www.ncbi.nlm.nih.gov/snp/) (v.152), [dbNSFP4](http://database.liulab.science/dbNSFP) (4.0a for academic). ``` -snpEff -v -csvStats ${sampleid}\_rawstats_canon.csv -s ${sampleid}\_rawstats_canon.html -canon GRCh38.86 Mutect2_raw_call_${sampleid}\_exome_refseq_light_filtered.vcf.gz > $sampleid\_raw_annot_refseq_light_filtered.vcf +snpEff -v -csvStats SampleID_rawstats_canon.csv -s SampleID_rawstats_canon.html -canon GRCh38.86 Mutect2_raw_call_SampleID_exome_refseq_light_filtered.vcf.gz > SampleID_raw_annot_refseq_light_filtered.vcf # dbSNP annotation -SnpSift annotate $dbsnp ${sampleid}\_raw_annot_refseq_light_filtered.vcf > ${sampleid}\_dbsnp_refseq_light_filtered.vcf -gatk IndexFeatureFile -I ${sampleid}\_dbsnp_refseq_light_filtered.vcf +SnpSift annotate $dbsnp SampleID_raw_annot_refseq_light_filtered.vcf > SampleID_dbsnp_refseq_light_filtered.vcf +gatk IndexFeatureFile -I SampleID_dbsnp_refseq_light_filtered.vcf # Cosmic annotation (coding) -SnpSift annotate $cosmic ${sampleid}\_dbsnp_refseq_light_filtered.vcf > ${sampleid}\_dbsnp_cc_refseq_light_filtered.vcf -gatk IndexFeatureFile -I ${sampleid}\_dbsnp_cc_refseq_light_filtered.vcf +SnpSift annotate $cosmic SampleID_dbsnp_refseq_light_filtered.vcf > SampleID_dbsnp_cc_refseq_light_filtered.vcf +gatk IndexFeatureFile -I SampleID_dbsnp_cc_refseq_light_filtered.vcf # Cosmic annotation (non coding) -SnpSift annotate $cosmicnc ${sampleid}\_dbsnp_cc_refseq_light_filtered.vcf > ${sampleid}\_dbsnp_cc_cnc_refseq_light_filtered.vcf -gatk IndexFeatureFile -I ${sampleid}\_dbsnp_cc_cnc_refseq_light_filtered.vcf +SnpSift annotate $cosmicnc SampleID_dbsnp_cc_refseq_light_filtered.vcf > SampleID_dbsnp_cc_cnc_refseq_light_filtered.vcf +gatk IndexFeatureFile -I SampleID_dbsnp_cc_cnc_refseq_light_filtered.vcf # dbNSFP4 annotation -SnpSift dbnsfp -db dbNSFP4.0a/dbNSFP4.0a.txt.gz -v ${sampleid}\_dbsnp_cc_cnc_refseq_light_filtered.vcf > ${sampleid}\_dbsnp_cc_cnc_dbnsfp_refseq_light_filtered.vcf +SnpSift dbnsfp -db dbNSFP4.0a/dbNSFP4.0a.txt.gz -v SampleID_dbsnp_cc_cnc_refseq_light_filtered.vcf > SampleID_dbsnp_cc_cnc_dbnsfp_refseq_light_filtered.vcf + +# NOTE: a pass-only version is also produced by picking-up only PASS variants: + +bcftools_viewCommand=view -f PASS -O z Mutect2_raw_call_SampleID_exome_refseq_light_filtered_DP10.vcf.gz +- SnpEff -csvStats SampleID_rawstats_canon.csv -s SampleID_rawstats_canon.html GRCh38.86 Mutect2_raw_call_SampleID_exome_refseq_light_filtered_DP10_PASS.vcf.gz +- SnpSift Annotate dbSNP.v152.vcf.gz SampleID_raw_annot_refseq_light.vcf +- SnpSift Annotate CosmicCodingMuts.vcf.gz SampleID_dbsnp_refseq_light.vcf +- SnpSift Annotate CosmicNonCodingVariants.vcf.gz SampleID_dbsnp_cc_refseq_light.vcf +- SnpSift DbNsfp SampleID_dbsnp_cc_cnc_refseq_light.vcf ``` @@ -71,7 +80,10 @@ SnpSift dbnsfp -db dbNSFP4.0a/dbNSFP4.0a.txt.gz -v ${sampleid}\_dbsnp_cc_cnc_ref We leverage SnpSift extractfield function for selecting fields of interest and performing Allele Fraction analysis. ``` -- SnpSift extractFields ${sampleid}_dbsnp_cc_cnc_dbnsfp_refseq_light_filtered.vcf CHROM POS REF ALT GEN[0].AF GEN[0].DP GEN[0].AD[0] GEN[0].AD[1] GEN[0].GT ID FILTER ANN[0].HGVS_P ANN[0].GENE ANN[0].BIOTYPE ANN[0].RANK ANN[0].EFFECT ANN[0].IMPACT COMMON G5 dbNSFP_ExAC_Adj_AF dbNSFP_1000Gp3_AF dbNSFP_ExAC_AF dbNSFP_phastCons100way_vertebrate dbNSFP_FATHMM_pred dbNSFP_GERP___RS dbNSFP_GERP___NR dbNSFP_CADD_phred dbNSFP_MetaSVM_pred dbNSFP_LRT_pred dbNSFP_PROVEAN_pred dbNSFP_MutationTaster_pred dbNSFP_MutationAssessor_pred dbNSFP_SIFT_pred dbNSFP_Polyphen2_HVAR_pred dbNSFP_Polyphen2_HDIV_pred > ${sampleid}_dbsnp_cc_cnc_dbnsfp_refseq_light_raw_filtered.fields.txt +- SnpSift extractFields SampleID_dbsnp_cc_cnc_dbnsfp_refseq_light_filtered.vcf CHROM POS REF ALT GEN[0].AF GEN[0].DP GEN[0].AD[0] GEN[0].AD[1] GEN[0].GT ID FILTER ANN[0].HGVS_P ANN[0].GENE ANN[0].BIOTYPE ANN[0].RANK ANN[0].EFFECT ANN[0].IMPACT COMMON G5 dbNSFP_ExAC_Adj_AF dbNSFP_1000Gp3_AF dbNSFP_ExAC_AF dbNSFP_phastCons100way_vertebrate dbNSFP_FATHMM_pred dbNSFP_GERP___RS dbNSFP_GERP___NR dbNSFP_CADD_phred dbNSFP_MetaSVM_pred dbNSFP_LRT_pred dbNSFP_PROVEAN_pred dbNSFP_MutationTaster_pred dbNSFP_MutationAssessor_pred dbNSFP_SIFT_pred dbNSFP_Polyphen2_HVAR_pred dbNSFP_Polyphen2_HDIV_pred > SampleID_dbsnp_cc_cnc_dbnsfp_refseq_light_raw_filtered.fields.txt + +SnpSift extractFields SampleID_dbsnp_cc_cnc_dbnsfp_refseq_light.vcf CHROM POS REF ALT GEN[0].AF GEN[0].DP GEN[0].AD[0] GEN[0].AD[1] GEN[0].GT ID FILTER ANN[0].HGVS_P ANN[0].GENE ANN[0].BIOTYPE ANN[0].RANK ANN[0].EFFECT ANN[0].IMPACT COMMON G5 dbNSFP_ExAC_Adj_AF dbNSFP_1000Gp3_AF dbNSFP_ExAC_AF dbNSFP_phastCons100way_vertebrate dbNSFP_FATHMM_pred dbNSFP_GERP___RS dbNSFP_GERP___NR dbNSFP_CADD_phred dbNSFP_MetaSVM_pred dbNSFP_LRT_pred dbNSFP_PROVEAN_pred dbNSFP_MutationTaster_pred dbNSFP_MutationAssessor_pred dbNSFP_SIFT_pred dbNSFP_Polyphen2_HVAR_pred dbNSFP_Polyphen2_HDIV_pred > SampleID_dbsnp_cc_cnc_dbnsfp_refseq_light.fields.txt + ``` -- GitLab