Un-exclude SD and TRA from HC annotators
This commit is contained in:
Geraldine Van der Auwera 2015-05-03 21:25:55 +02:00
commit 8a4a4f3fcf
8 changed files with 62 additions and 29 deletions

View File

@ -57,7 +57,7 @@ import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardUGAnnotation;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
@ -79,7 +79,7 @@ import java.util.Map;
* <p>This anotation gives you the count of all reads that have MAPQ = 0 across all samples. The count of reads with MAPQ0 can be used for quality control; high counts typically indicate regions where it is difficult to make confident calls.</p>
*
* <h3>Caveat</h3>
* <p>This annotation is excluded by HaplotypeCaller because HC filters out all reads with MQ0 upfront, so the annotation would always return a value of 0 anyway.</p>
* <p>It is not useful to apply this annotation with HaplotypeCaller because HC filters out all reads with MQ0 upfront, so the annotation will always return a value of 0.</p>
*
* <h3>Related annotations</h3>
* <ul>
@ -88,7 +88,7 @@ import java.util.Map;
* </ul>
*
*/
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
public class MappingQualityZero extends InfoFieldAnnotation implements StandardUGAnnotation, ActiveRegionBasedAnnotation {
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,

View File

@ -55,7 +55,7 @@ import org.apache.log4j.Logger;
import org.broadinstitute.gatk.tools.walkers.genotyper.UnifiedGenotyper;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardUGAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
@ -76,12 +76,12 @@ import java.util.*;
*
* <h3>Caveats</h3>
* <ul>
* <li>This annotation is not compatible with HaplotypeCaller; its purpose is to compensate for the UnifiedGenotyper's inability to integrate SNPs and indels in the same model (unlike HaplotypeCaller)</li>
* <li>In its current form, this annotation is not compatible with HaplotypeCaller. It is only meant to be used with UnifiedGenotyper, as its purpose is to compensate for the UnifiedGenotyper's inability to integrate SNPs and indels in the same model (unlike HaplotypeCaller).</li>
* <li>By default, the UnifiedGenotyper will not call variants where the fraction of spanning deletions is above a certain threshold. This threshold can be adjusted using the `--max_deletion_fraction` argument.</li>
* </ul>
*
*/
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
public class SpanningDeletions extends InfoFieldAnnotation implements StandardUGAnnotation {
private final static Logger logger = Logger.getLogger(SpanningDeletions.class);
private boolean walkerIdentityCheckWarningLogged = false;

View File

@ -58,7 +58,7 @@ import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardUGAnnotation;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
@ -76,13 +76,13 @@ import java.util.*;
*
* <p>A tandem repeat unit is composed of one or more nucleotides that are repeated multiple times in series. Repetitive sequences are difficult to map to the reference because they are associated with multiple alignment possibilities. Knowing the number of repeat units in a set of tandem repeats tells you the number of different positions the tandem repeat can be placed in. The observation of many tandem repeat units multiplies the number of possible representations that can be made of the region.
*
* <h3>Caveats</h3>
* <h3>Caveat</h3>
* <ul>
* <li>This annotation is currently not compatible with HaplotypeCaller.</li>
* </ul>
*
*/
public class TandemRepeatAnnotator extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
public class TandemRepeatAnnotator extends InfoFieldAnnotation implements StandardUGAnnotation, ActiveRegionBasedAnnotation {
private final static Logger logger = Logger.getLogger(TandemRepeatAnnotator.class);
private boolean walkerIdentityCheckWarningLogged = false;

View File

@ -219,7 +219,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
* Keep in mind that RODRequiringAnnotations are not intended to be used as a group, because they require specific ROD inputs.
*/
@Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls. The single value 'none' removes the default group", required=false)
protected String[] annotationClassesToUse = { "Standard" };
protected String[] annotationClassesToUse = { "Standard", "StandardUG" };
// the calculation arguments
private UnifiedGenotypingEngine genotypingEngine = null;

View File

@ -293,12 +293,16 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
protected List<String> annotationsToUse = new ArrayList<>(Arrays.asList(new String[]{"ClippingRankSumTest", "DepthPerSampleHC"}));
/**
* Which annotations to exclude from output in the VCF file. Note that this argument has higher priority than the -A or -G arguments,
* so these annotations will be excluded even if they are explicitly included with the other options.
* Which annotations to exclude from output in the VCF file. Note that this argument has higher priority than the
* -A or -G arguments, so these annotations will be excluded even if they are explicitly included with the other
* options. When HaplotypeCaller is run with -ERC GVCF or -ERC BP_RESOLUTION, some annotations are excluded from the
* output by default because they will only be meaningful once they have been recalculated by GenotypeGVCFs. As
* of version 3.3 this concerns ChromosomeCounts, FisherStrand, StrandOddsRatio and QualByDepth.
*
*/
@Advanced
@Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false)
protected List<String> annotationsToExclude = new ArrayList<>(Arrays.asList(new String[]{"SpanningDeletions", "TandemRepeatAnnotator", "MappingQualityZero"}));
protected List<String> annotationsToExclude = new ArrayList<>(Arrays.asList(new String[]{}));
/**
* Which groups of annotations to add to the output VCF file. The single value 'none' removes the default group. See
@ -506,7 +510,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
}
if (dontGenotype && emitReferenceConfidence())
throw new UserException("You cannot request gVCF output and do not genotype at the same time");
throw new UserException("You cannot request gVCF output and 'do not genotype' at the same time");
if ( emitReferenceConfidence() ) {

View File

@ -74,6 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
final static String REF = b37KGReference;
final static String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
final static String standardAnnotations = " -G Standard -G StandardUG ";
public static String baseTestString() {
return "-T VariantAnnotator -R " + b36KGReference + " --no_cmdline_in_header -o %s";
@ -98,7 +99,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
baseTestString() + standardAnnotations + "--variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("92eb47332dd9d7ee7fbe3120dc39c594"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -106,7 +107,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
baseTestString() + standardAnnotations + "--variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("c367bf7cebd7b26305f8d4736788aec8"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -132,7 +133,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
baseTestString() + standardAnnotations + "--variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("098dcad8d90d90391755a0191c9db59c"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -140,7 +141,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
baseTestString() + standardAnnotations + "--variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("f3bbfbc179d2e1bae49890f1e9dfde34"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@ -148,7 +149,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testExcludeAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
baseTestString() + standardAnnotations + "-XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("7267450fc4d002f75a24ca17278e0950"));
executeTest("test exclude annotations", spec);
}
@ -181,7 +182,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
baseTestString() + standardAnnotations + "--variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
Arrays.asList("18592c72d83ee84e1326acb999518c38"));
executeTest("test overwriting header", spec);
}
@ -189,7 +190,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testNoReads() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
baseTestString() + standardAnnotations + "--variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("6de950b381d2d92b21bab6144e8f0714"));
executeTest("not passing it any reads", spec);
}
@ -197,7 +198,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testDBTagWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
baseTestString() + " --dbsnp " + b36dbSNP129 + standardAnnotations + "--variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("e0bd85747c87ea4df6ef67f593cbacbf"));
executeTest("getting DB tag with dbSNP", spec);
}
@ -205,7 +206,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testMultipleIdsWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3withIDs.vcf -L " + privateTestDir + "vcfexample3withIDs.vcf", 1,
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + standardAnnotations + "--variant " + privateTestDir + "vcfexample3withIDs.vcf -L " + privateTestDir + "vcfexample3withIDs.vcf", 1,
Arrays.asList("194a942f17104292192fb564a3c96610"));
executeTest("adding multiple IDs with dbSNP", spec);
}
@ -213,7 +214,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testDBTagWithHapMap() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf" + standardAnnotations + "--variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("9e41ae733a76632b40eda38e3cef909d"));
executeTest("getting DB tag with HM3", spec);
}
@ -221,7 +222,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testDBTagWithTwoComps() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf --comp:foo " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf --comp:foo " + privateTestDir + "fakeHM3.vcf " + standardAnnotations + " --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("7b718bae0444f1896a6e86da80531218"));
executeTest("getting DB tag with 2 comps", spec);
}
@ -237,7 +238,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testUsingExpression() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.AF -L " + privateTestDir + "vcfexample3empty.vcf", 1,
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf" + standardAnnotations + "--variant " + privateTestDir + "vcfexample3empty.vcf -E foo.AF -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("0bed7b4f6ed0556c5e7d398353a9fa91"));
executeTest("using expression", spec);
}
@ -245,7 +246,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testUsingExpressionMultiAllele() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations-multiAllele.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty-multiAllele.vcf -E foo.AF -E foo.AC -L " + privateTestDir + "vcfexample3empty-multiAllele.vcf", 1,
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations-multiAllele.vcf" + standardAnnotations + "--variant " + privateTestDir + "vcfexample3empty-multiAllele.vcf -E foo.AF -E foo.AC -L " + privateTestDir + "vcfexample3empty-multiAllele.vcf", 1,
Arrays.asList("195cf0f5b1aa5c7d00a0595dcca02f4c"));
executeTest("using expression with multi-alleles", spec);
}
@ -253,7 +254,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testUsingExpressionWithID() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.ID -L " + privateTestDir + "vcfexample3empty.vcf", 1,
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf" + standardAnnotations + "--variant " + privateTestDir + "vcfexample3empty.vcf -E foo.ID -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("b3fe9d3bdb18ca2629543f849a7d27ed"));
executeTest("using expression with ID", spec);
}

View File

@ -50,7 +50,7 @@ import java.util.List;
* <p>This annotation gives you the count of all reads that have MAPQ = 0 for each sample. The count of reads with MAPQ0 can be used for quality control; high counts typically indicate regions where it is difficult to make confident calls.</p>
*
* <h3>Caveat</h3>
* <p>This annotation is excluded by HaplotypeCaller because HC filters out all reads with MQ0 upfront, so the annotation would always return a value of 0 anyway.</p>
* <p>It is not useful to apply this annotation with HaplotypeCaller because HC filters out all reads with MQ0 upfront, so the annotation will always return a value of 0.</p>
*
* <h3>Related annotations</h3>
* <ul>

View File

@ -0,0 +1,28 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.gatk.tools.walkers.annotator.interfaces;
public interface StandardUGAnnotation extends AnnotationType {}