diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java index ab70e280a..1c22f8fe1 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java @@ -88,15 +88,7 @@ import Jama.Matrix; * Build a recalibration model to score variant quality for filtering purposes * *

- * This tool performs the first pass in a two-stage process called VQSR; the second pass is performed by the - * ApplyRecalibration tool. - * In brief, the first pass consists of creating a Gaussian mixture model by looking at the distribution of annotation - * values over a high quality subset of the input call set, and then scoring all input variants according to the model. - * The second pass consists of filtering variants based on score cutoffs identified in the first pass. - *

- * - *

- * The purpose of the variant recalibrator is to assign a well-calibrated probability to each variant call in a call set. + * The purpose of variant recalibration is to assign a well-calibrated probability to each variant call in a call set. * You can then create highly accurate call sets by filtering based on this single estimate for the accuracy of each call. * The approach taken by variant quality score recalibration is to develop a continuous, covarying estimate of the relationship * between SNP call annotations (such as QD, MQ, and ReadPosRankSum, for example) and the probability that a SNP is a true genetic @@ -106,6 +98,14 @@ import Jama.Matrix; * probability that each call is real. The score that gets added to the INFO field of each variant is called the VQSLOD. It is * the log odds of being a true variant versus being false under the trained Gaussian mixture model. *

+ * + *

+ * This tool performs the first pass in a two-stage process called VQSR; the second pass is performed by the + * ApplyRecalibration tool. + * In brief, the first pass consists of creating a Gaussian mixture model by looking at the distribution of annotation + * values over a high quality subset of the input call set, and then scoring all input variants according to the model. + * The second pass consists of filtering variants based on score cutoffs identified in the first pass. + *

* *

VQSR is probably the hardest part of the Best Practices to get right, so be sure to read the * method documentation, @@ -115,7 +115,10 @@ import Jama.Matrix; * *

Inputs

* * @@ -135,7 +138,7 @@ import Jama.Matrix; * -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \ * -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \ * -resource:1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.high_confidence.vcf - * -resource:dbsnp,known=true,training=false,truth=false,prior=6.0 dbsnp_135.b37.vcf \ + * -resource:dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_135.b37.vcf \ * -an QD -an MQ -an MQRankSum -an ReadPosRankSum -an FS -an SOR -an InbreedingCoeff \ * -mode SNP \ * -recalFile output.recal \ @@ -153,7 +156,7 @@ import Jama.Matrix; * -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \ * -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \ * -resource:1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.high_confidence.vcf - * -resource:dbsnp,known=true,training=false,truth=false,prior=6.0 dbsnp_135.b37.vcf \ + * -resource:dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_135.b37.vcf \ * -an QD -an MQ -an MQRankSum -an ReadPosRankSum -an FS -an SOR -an InbreedingCoeff \ * -mode SNP \ * -recalFile output.AS.recal \ @@ -191,9 +194,13 @@ public class VariantRecalibrator extends RodWalker> inputCollections; final private List> input = new ArrayList<>();