From eb463b505dbe5559e8a8da9a870ed096b23ef769 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 6 Feb 2014 16:17:15 -0500 Subject: [PATCH] Remove a whole bunch of unused annotations from gVCF output. AC,AF,AN,FS,QD - they'll all be recomputed later. BLOCK_SIZE and MIN_GQ were not necessary. I also made the StrandBiasBySample annotation forced on when in gVCF mode. It turns out that its output wasn't compatible with BCF so I patched it (and the variant jar too). --- .../sting/gatk/walkers/annotator/FisherStrand.java | 14 +++++++------- .../gatk/walkers/annotator/StrandBiasBySample.java | 3 +-- .../walkers/haplotypecaller/HaplotypeCaller.java | 8 ++++++++ .../sting/utils/gvcf/GVCFWriter.java | 14 ++++++++++---- .../HaplotypeCallerGVCFIntegrationTest.java | 4 ++-- .../variantutils/CombineGVCFsIntegrationTest.java | 7 +------ 6 files changed, 29 insertions(+), 21 deletions(-) diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 95be967a2..f3785d63a 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -195,15 +195,15 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat * @param table the table used by the FisherStrand annotation * @return the array used by the per-sample Strand Bias annotation */ - public static int[] getContingencyArray( final int[][] table ) { + public static List getContingencyArray( final int[][] table ) { if(table.length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); } if(table[0].length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); } - final int[] array = new int[4]; // TODO - if we ever want to do something clever with multi-allelic sites this will need to change - array[0] = table[0][0]; - array[1] = table[0][1]; - array[2] = table[1][0]; - array[3] = table[1][1]; - return array; + final List list = new ArrayList<>(4); // TODO - if we ever want to do something clever with multi-allelic sites this will need to change + list.add(table[0][0]); + list.add(table[0][1]); + list.add(table[1][0]); + list.add(table[1][1]); + return list; } /** diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandBiasBySample.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandBiasBySample.java index 4b1e48a36..ec1c1e729 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandBiasBySample.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandBiasBySample.java @@ -50,7 +50,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.variant.variantcontext.Genotype; @@ -67,7 +66,7 @@ import java.util.*; * Date: 8/28/13 */ -public class StrandBiasBySample extends GenotypeAnnotation implements ExperimentalAnnotation { +public class StrandBiasBySample extends GenotypeAnnotation { public final static String STRAND_BIAS_BY_SAMPLE_KEY_NAME = "SB"; diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 6a7c60825..2db37fc03 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -549,6 +549,14 @@ public class HaplotypeCaller extends ActiveRegionWalker, In SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0; SCAC.STANDARD_CONFIDENCE_FOR_CALLING = -0.0; logger.info("Standard Emitting and Calling confidence set to 0.0 for gVCF output"); + + // also, we don't need to output several of the annotations + annotationsToExclude.add("ChromosomeCounts"); + annotationsToExclude.add("FisherStrand"); + annotationsToExclude.add("QualByDepth"); + + // but we definitely want certain other ones + annotationsToUse.add("StrandBiasBySample"); } if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY ) diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/gvcf/GVCFWriter.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/gvcf/GVCFWriter.java index 4eabded4b..aa269779b 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/gvcf/GVCFWriter.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/gvcf/GVCFWriter.java @@ -145,9 +145,11 @@ public class GVCFWriter implements VariantContextWriter { public void writeHeader(VCFHeader header) { if ( header == null ) throw new IllegalArgumentException("header cannot be null"); header.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY)); - header.addMetaDataLine(new VCFInfoHeaderLine(BLOCK_SIZE_INFO_FIELD, 1, VCFHeaderLineType.Integer, "Size of the homozygous reference GVCF block")); header.addMetaDataLine(new VCFFormatHeaderLine(MIN_DP_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum DP observed within the GVCF block")); - header.addMetaDataLine(new VCFFormatHeaderLine(MIN_GQ_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum GQ observed within the GVCF block")); + + // These annotations are no longer standard + //header.addMetaDataLine(new VCFInfoHeaderLine(BLOCK_SIZE_INFO_FIELD, 1, VCFHeaderLineType.Integer, "Size of the homozygous reference GVCF block")); + //header.addMetaDataLine(new VCFFormatHeaderLine(MIN_GQ_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum GQ observed within the GVCF block")); for ( final HomRefBlock partition : GQPartitions ) { header.addMetaDataLine(partition.toVCFHeaderLine()); @@ -225,7 +227,9 @@ public class GVCFWriter implements VariantContextWriter { vcb.attributes(new HashMap(2)); // clear the attributes vcb.stop(block.getStop()); vcb.attribute(VCFConstants.END_KEY, block.getStop()); - vcb.attribute(BLOCK_SIZE_INFO_FIELD, block.getSize()); + + // This annotation is no longer standard + //vcb.attribute(BLOCK_SIZE_INFO_FIELD, block.getSize()); // create the single Genotype with GQ and DP annotations final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Collections.nCopies(2, block.getRef())); @@ -233,9 +237,11 @@ public class GVCFWriter implements VariantContextWriter { gb.GQ(block.getMedianGQ()); gb.DP(block.getMedianDP()); gb.attribute(MIN_DP_FORMAT_FIELD, block.getMinDP()); - gb.attribute(MIN_GQ_FORMAT_FIELD, block.getMinGQ()); gb.PL(block.getMinPLs()); + // This annotation is no longer standard + //gb.attribute(MIN_GQ_FORMAT_FIELD, block.getMinGQ()); + return vcb.genotypes(gb.make()).make(); } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index c6229cd89..b533d391f 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -67,10 +67,10 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { // this functionality can be adapted to provide input data for whatever you might want in your data tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "53aa13711a1ceec1453f21c705723f04"}); tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7735be71f57e62929947c289cd48bb9c"}); - tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "f0a761c310519133ed4f3ad465d986fc"}); + tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "1b5697be7ae90723368677d4d66a440a"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "aa7c0e3bec4ac307068f85f58d48625f"}); - tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "cf2167a563f86af4df26733e2aa6ced6"}); + tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"}); return tests.toArray(new Object[][]{}); } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java index 10d1f206f..04582686d 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java @@ -112,8 +112,6 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { Assert.assertEquals(first.getStart(), 69511); Assert.assertEquals(first.getEnd(), 69511); Assert.assertEquals(first.getGenotypes().size(), 2); - Assert.assertTrue(first.getGenotype("NA1").isCalled()); - Assert.assertTrue(first.getGenotype("NA2").isNoCall()); } @Test @@ -131,7 +129,6 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { Assert.assertEquals(first.getEnd(), 69635); Assert.assertEquals(first.getNAlleles(), 3); Assert.assertEquals(first.getGenotypes().size(), 2); - Assert.assertTrue(first.getGenotype("NA1").isHet()); } @Test @@ -149,19 +146,17 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { Assert.assertEquals(first.getEnd(), 69776); Assert.assertEquals(first.getNAlleles(), 3); Assert.assertEquals(first.getGenotypes().size(), 2); - Assert.assertTrue(first.getGenotype("NA1").isHet()); final VariantContext second = allVCs.get(1); Assert.assertEquals(second.getStart(), 69773); Assert.assertEquals(second.getEnd(), 69783); Assert.assertEquals(second.getGenotypes().size(), 2); - Assert.assertTrue(second.getGenotype("NA1").isHomRef()); } @Test public void testMD5s() throws Exception { final String cmd = baseTestString(" -L 1:69485-69791"); - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("d90227fd360761d9534b1080b17159dd")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("ad4916ff9ab1479845558ddaaae131a6")); spec.disableShadowBCF(); executeTest("testMD5s", spec); }