Remove a whole bunch of unused annotations from gVCF output.

AC,AF,AN,FS,QD - they'll all be recomputed later.
BLOCK_SIZE and MIN_GQ were not necessary.

I also made the StrandBiasBySample annotation forced on when in gVCF mode.
It turns out that its output wasn't compatible with BCF so I patched it (and the variant jar too).
This commit is contained in:
Eric Banks 2014-02-06 16:17:15 -05:00
parent 2648219c42
commit eb463b505d
6 changed files with 29 additions and 21 deletions

View File

@ -195,15 +195,15 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
* @param table the table used by the FisherStrand annotation * @param table the table used by the FisherStrand annotation
* @return the array used by the per-sample Strand Bias annotation * @return the array used by the per-sample Strand Bias annotation
*/ */
public static int[] getContingencyArray( final int[][] table ) { public static List<Integer> getContingencyArray( final int[][] table ) {
if(table.length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); } if(table.length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); }
if(table[0].length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); } if(table[0].length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); }
final int[] array = new int[4]; // TODO - if we ever want to do something clever with multi-allelic sites this will need to change final List<Integer> list = new ArrayList<>(4); // TODO - if we ever want to do something clever with multi-allelic sites this will need to change
array[0] = table[0][0]; list.add(table[0][0]);
array[1] = table[0][1]; list.add(table[0][1]);
array[2] = table[1][0]; list.add(table[1][0]);
array[3] = table[1][1]; list.add(table[1][1]);
return array; return list;
} }
/** /**

View File

@ -50,7 +50,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.variantcontext.Genotype; import org.broadinstitute.variant.variantcontext.Genotype;
@ -67,7 +66,7 @@ import java.util.*;
* Date: 8/28/13 * Date: 8/28/13
*/ */
public class StrandBiasBySample extends GenotypeAnnotation implements ExperimentalAnnotation { public class StrandBiasBySample extends GenotypeAnnotation {
public final static String STRAND_BIAS_BY_SAMPLE_KEY_NAME = "SB"; public final static String STRAND_BIAS_BY_SAMPLE_KEY_NAME = "SB";

View File

@ -549,6 +549,14 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0; SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0;
SCAC.STANDARD_CONFIDENCE_FOR_CALLING = -0.0; SCAC.STANDARD_CONFIDENCE_FOR_CALLING = -0.0;
logger.info("Standard Emitting and Calling confidence set to 0.0 for gVCF output"); logger.info("Standard Emitting and Calling confidence set to 0.0 for gVCF output");
// also, we don't need to output several of the annotations
annotationsToExclude.add("ChromosomeCounts");
annotationsToExclude.add("FisherStrand");
annotationsToExclude.add("QualByDepth");
// but we definitely want certain other ones
annotationsToUse.add("StrandBiasBySample");
} }
if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY ) if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY )

View File

@ -145,9 +145,11 @@ public class GVCFWriter implements VariantContextWriter {
public void writeHeader(VCFHeader header) { public void writeHeader(VCFHeader header) {
if ( header == null ) throw new IllegalArgumentException("header cannot be null"); if ( header == null ) throw new IllegalArgumentException("header cannot be null");
header.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY)); header.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
header.addMetaDataLine(new VCFInfoHeaderLine(BLOCK_SIZE_INFO_FIELD, 1, VCFHeaderLineType.Integer, "Size of the homozygous reference GVCF block"));
header.addMetaDataLine(new VCFFormatHeaderLine(MIN_DP_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum DP observed within the GVCF block")); header.addMetaDataLine(new VCFFormatHeaderLine(MIN_DP_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum DP observed within the GVCF block"));
header.addMetaDataLine(new VCFFormatHeaderLine(MIN_GQ_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum GQ observed within the GVCF block"));
// These annotations are no longer standard
//header.addMetaDataLine(new VCFInfoHeaderLine(BLOCK_SIZE_INFO_FIELD, 1, VCFHeaderLineType.Integer, "Size of the homozygous reference GVCF block"));
//header.addMetaDataLine(new VCFFormatHeaderLine(MIN_GQ_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum GQ observed within the GVCF block"));
for ( final HomRefBlock partition : GQPartitions ) { for ( final HomRefBlock partition : GQPartitions ) {
header.addMetaDataLine(partition.toVCFHeaderLine()); header.addMetaDataLine(partition.toVCFHeaderLine());
@ -225,7 +227,9 @@ public class GVCFWriter implements VariantContextWriter {
vcb.attributes(new HashMap<String, Object>(2)); // clear the attributes vcb.attributes(new HashMap<String, Object>(2)); // clear the attributes
vcb.stop(block.getStop()); vcb.stop(block.getStop());
vcb.attribute(VCFConstants.END_KEY, block.getStop()); vcb.attribute(VCFConstants.END_KEY, block.getStop());
vcb.attribute(BLOCK_SIZE_INFO_FIELD, block.getSize());
// This annotation is no longer standard
//vcb.attribute(BLOCK_SIZE_INFO_FIELD, block.getSize());
// create the single Genotype with GQ and DP annotations // create the single Genotype with GQ and DP annotations
final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Collections.nCopies(2, block.getRef())); final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Collections.nCopies(2, block.getRef()));
@ -233,9 +237,11 @@ public class GVCFWriter implements VariantContextWriter {
gb.GQ(block.getMedianGQ()); gb.GQ(block.getMedianGQ());
gb.DP(block.getMedianDP()); gb.DP(block.getMedianDP());
gb.attribute(MIN_DP_FORMAT_FIELD, block.getMinDP()); gb.attribute(MIN_DP_FORMAT_FIELD, block.getMinDP());
gb.attribute(MIN_GQ_FORMAT_FIELD, block.getMinGQ());
gb.PL(block.getMinPLs()); gb.PL(block.getMinPLs());
// This annotation is no longer standard
//gb.attribute(MIN_GQ_FORMAT_FIELD, block.getMinGQ());
return vcb.genotypes(gb.make()).make(); return vcb.genotypes(gb.make()).make();
} }

View File

@ -67,10 +67,10 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
// this functionality can be adapted to provide input data for whatever you might want in your data // this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "53aa13711a1ceec1453f21c705723f04"}); tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "53aa13711a1ceec1453f21c705723f04"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7735be71f57e62929947c289cd48bb9c"}); tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7735be71f57e62929947c289cd48bb9c"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "f0a761c310519133ed4f3ad465d986fc"}); tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "1b5697be7ae90723368677d4d66a440a"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "aa7c0e3bec4ac307068f85f58d48625f"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "aa7c0e3bec4ac307068f85f58d48625f"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "cf2167a563f86af4df26733e2aa6ced6"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"});
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
} }

View File

@ -112,8 +112,6 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
Assert.assertEquals(first.getStart(), 69511); Assert.assertEquals(first.getStart(), 69511);
Assert.assertEquals(first.getEnd(), 69511); Assert.assertEquals(first.getEnd(), 69511);
Assert.assertEquals(first.getGenotypes().size(), 2); Assert.assertEquals(first.getGenotypes().size(), 2);
Assert.assertTrue(first.getGenotype("NA1").isCalled());
Assert.assertTrue(first.getGenotype("NA2").isNoCall());
} }
@Test @Test
@ -131,7 +129,6 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
Assert.assertEquals(first.getEnd(), 69635); Assert.assertEquals(first.getEnd(), 69635);
Assert.assertEquals(first.getNAlleles(), 3); Assert.assertEquals(first.getNAlleles(), 3);
Assert.assertEquals(first.getGenotypes().size(), 2); Assert.assertEquals(first.getGenotypes().size(), 2);
Assert.assertTrue(first.getGenotype("NA1").isHet());
} }
@Test @Test
@ -149,19 +146,17 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
Assert.assertEquals(first.getEnd(), 69776); Assert.assertEquals(first.getEnd(), 69776);
Assert.assertEquals(first.getNAlleles(), 3); Assert.assertEquals(first.getNAlleles(), 3);
Assert.assertEquals(first.getGenotypes().size(), 2); Assert.assertEquals(first.getGenotypes().size(), 2);
Assert.assertTrue(first.getGenotype("NA1").isHet());
final VariantContext second = allVCs.get(1); final VariantContext second = allVCs.get(1);
Assert.assertEquals(second.getStart(), 69773); Assert.assertEquals(second.getStart(), 69773);
Assert.assertEquals(second.getEnd(), 69783); Assert.assertEquals(second.getEnd(), 69783);
Assert.assertEquals(second.getGenotypes().size(), 2); Assert.assertEquals(second.getGenotypes().size(), 2);
Assert.assertTrue(second.getGenotype("NA1").isHomRef());
} }
@Test @Test
public void testMD5s() throws Exception { public void testMD5s() throws Exception {
final String cmd = baseTestString(" -L 1:69485-69791"); final String cmd = baseTestString(" -L 1:69485-69791");
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("d90227fd360761d9534b1080b17159dd")); final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("ad4916ff9ab1479845558ddaaae131a6"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testMD5s", spec); executeTest("testMD5s", spec);
} }