Remove a whole bunch of unused annotations from gVCF output.

AC,AF,AN,FS,QD - they'll all be recomputed later.
BLOCK_SIZE and MIN_GQ were not necessary.

I also made the StrandBiasBySample annotation forced on when in gVCF mode.
It turns out that its output wasn't compatible with BCF so I patched it (and the variant jar too).
This commit is contained in:
Eric Banks 2014-02-06 16:17:15 -05:00
parent 2648219c42
commit eb463b505d
6 changed files with 29 additions and 21 deletions

View File

@ -195,15 +195,15 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
* @param table the table used by the FisherStrand annotation
* @return the array used by the per-sample Strand Bias annotation
*/
public static int[] getContingencyArray( final int[][] table ) {
public static List<Integer> getContingencyArray( final int[][] table ) {
if(table.length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); }
if(table[0].length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); }
final int[] array = new int[4]; // TODO - if we ever want to do something clever with multi-allelic sites this will need to change
array[0] = table[0][0];
array[1] = table[0][1];
array[2] = table[1][0];
array[3] = table[1][1];
return array;
final List<Integer> list = new ArrayList<>(4); // TODO - if we ever want to do something clever with multi-allelic sites this will need to change
list.add(table[0][0]);
list.add(table[0][1]);
list.add(table[1][0]);
list.add(table[1][1]);
return list;
}
/**

View File

@ -50,7 +50,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.variantcontext.Genotype;
@ -67,7 +66,7 @@ import java.util.*;
* Date: 8/28/13
*/
public class StrandBiasBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
public class StrandBiasBySample extends GenotypeAnnotation {
public final static String STRAND_BIAS_BY_SAMPLE_KEY_NAME = "SB";

View File

@ -549,6 +549,14 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0;
SCAC.STANDARD_CONFIDENCE_FOR_CALLING = -0.0;
logger.info("Standard Emitting and Calling confidence set to 0.0 for gVCF output");
// also, we don't need to output several of the annotations
annotationsToExclude.add("ChromosomeCounts");
annotationsToExclude.add("FisherStrand");
annotationsToExclude.add("QualByDepth");
// but we definitely want certain other ones
annotationsToUse.add("StrandBiasBySample");
}
if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY )

View File

@ -145,9 +145,11 @@ public class GVCFWriter implements VariantContextWriter {
public void writeHeader(VCFHeader header) {
if ( header == null ) throw new IllegalArgumentException("header cannot be null");
header.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
header.addMetaDataLine(new VCFInfoHeaderLine(BLOCK_SIZE_INFO_FIELD, 1, VCFHeaderLineType.Integer, "Size of the homozygous reference GVCF block"));
header.addMetaDataLine(new VCFFormatHeaderLine(MIN_DP_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum DP observed within the GVCF block"));
header.addMetaDataLine(new VCFFormatHeaderLine(MIN_GQ_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum GQ observed within the GVCF block"));
// These annotations are no longer standard
//header.addMetaDataLine(new VCFInfoHeaderLine(BLOCK_SIZE_INFO_FIELD, 1, VCFHeaderLineType.Integer, "Size of the homozygous reference GVCF block"));
//header.addMetaDataLine(new VCFFormatHeaderLine(MIN_GQ_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum GQ observed within the GVCF block"));
for ( final HomRefBlock partition : GQPartitions ) {
header.addMetaDataLine(partition.toVCFHeaderLine());
@ -225,7 +227,9 @@ public class GVCFWriter implements VariantContextWriter {
vcb.attributes(new HashMap<String, Object>(2)); // clear the attributes
vcb.stop(block.getStop());
vcb.attribute(VCFConstants.END_KEY, block.getStop());
vcb.attribute(BLOCK_SIZE_INFO_FIELD, block.getSize());
// This annotation is no longer standard
//vcb.attribute(BLOCK_SIZE_INFO_FIELD, block.getSize());
// create the single Genotype with GQ and DP annotations
final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Collections.nCopies(2, block.getRef()));
@ -233,9 +237,11 @@ public class GVCFWriter implements VariantContextWriter {
gb.GQ(block.getMedianGQ());
gb.DP(block.getMedianDP());
gb.attribute(MIN_DP_FORMAT_FIELD, block.getMinDP());
gb.attribute(MIN_GQ_FORMAT_FIELD, block.getMinGQ());
gb.PL(block.getMinPLs());
// This annotation is no longer standard
//gb.attribute(MIN_GQ_FORMAT_FIELD, block.getMinGQ());
return vcb.genotypes(gb.make()).make();
}

View File

@ -67,10 +67,10 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "53aa13711a1ceec1453f21c705723f04"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7735be71f57e62929947c289cd48bb9c"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "f0a761c310519133ed4f3ad465d986fc"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "1b5697be7ae90723368677d4d66a440a"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "aa7c0e3bec4ac307068f85f58d48625f"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "cf2167a563f86af4df26733e2aa6ced6"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"});
return tests.toArray(new Object[][]{});
}

View File

@ -112,8 +112,6 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
Assert.assertEquals(first.getStart(), 69511);
Assert.assertEquals(first.getEnd(), 69511);
Assert.assertEquals(first.getGenotypes().size(), 2);
Assert.assertTrue(first.getGenotype("NA1").isCalled());
Assert.assertTrue(first.getGenotype("NA2").isNoCall());
}
@Test
@ -131,7 +129,6 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
Assert.assertEquals(first.getEnd(), 69635);
Assert.assertEquals(first.getNAlleles(), 3);
Assert.assertEquals(first.getGenotypes().size(), 2);
Assert.assertTrue(first.getGenotype("NA1").isHet());
}
@Test
@ -149,19 +146,17 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
Assert.assertEquals(first.getEnd(), 69776);
Assert.assertEquals(first.getNAlleles(), 3);
Assert.assertEquals(first.getGenotypes().size(), 2);
Assert.assertTrue(first.getGenotype("NA1").isHet());
final VariantContext second = allVCs.get(1);
Assert.assertEquals(second.getStart(), 69773);
Assert.assertEquals(second.getEnd(), 69783);
Assert.assertEquals(second.getGenotypes().size(), 2);
Assert.assertTrue(second.getGenotype("NA1").isHomRef());
}
@Test
public void testMD5s() throws Exception {
final String cmd = baseTestString(" -L 1:69485-69791");
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("d90227fd360761d9534b1080b17159dd"));
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("ad4916ff9ab1479845558ddaaae131a6"));
spec.disableShadowBCF();
executeTest("testMD5s", spec);
}