From bfbf1686cd0f71c94dea59c84b6c74c71f0ae1af Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 12 Sep 2012 07:08:03 -0400 Subject: [PATCH] Fixed nasty bug with defaulting to diploid no-call genotypes -- For the pooled caller we were writing diploid no-calls even when other samples were haploid. Changed maxPloidy function to return a defaultPloidy, rather than 0, in the case where all samples are missing. -- VCF/BCF Writers now create missing genotypes with the ploidy of other samples, or 2 if none are available at all. -- Updating integration tests for general ploidy, as previously we wrote ./. even when other calls were 0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/1/1/1/1/1, but now we write ./././././././././././././././././././././././. (ugly but correct) --- .../UnifiedGenotyperGeneralPloidyIntegrationTest.java | 6 +++--- .../sting/utils/codecs/vcf/VCFCompoundHeaderLine.java | 4 ++-- .../sting/utils/variantcontext/GenotypeBuilder.java | 11 +++++++++-- .../sting/utils/variantcontext/GenotypesContext.java | 11 +++++++++-- .../sting/utils/variantcontext/VariantContext.java | 9 +++++---- .../utils/variantcontext/writer/BCF2FieldWriter.java | 6 +----- .../sting/utils/variantcontext/writer/BCF2Writer.java | 2 +- .../sting/utils/variantcontext/writer/VCFWriter.java | 4 +++- 8 files changed, 33 insertions(+), 20 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java index e0bf07809..a4a618887 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; import java.util.Arrays; -import org.testng.annotations.Test; /** * Created by IntelliJ IDEA. @@ -52,7 +52,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { @Test(enabled = true) public void testINDEL_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","90af837f372e3d5143af30bf5c8c2b75"); + PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","567ae6b2a7f839b1307d4087c2f59cca"); } @Test(enabled = true) @@ -62,7 +62,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","26598044436c8044f22ffa767b06a0f0"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","d2a22e12f1969ae199557947e5039b58"); } @Test(enabled = true) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java index 667de3dea..5273806a7 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java @@ -88,8 +88,8 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF case UNBOUNDED: return -1; case A: return vc.getNAlleles() - 1; case G: - final int ploidy = vc.getMaxPloidy(); - return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy == 0 ? 2 : ploidy); + final int ploidy = vc.getMaxPloidy(2); + return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy); default: throw new ReviewedStingException("Unknown count type: " + countType); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java index 9337a78f9..8fd792d3b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java @@ -53,6 +53,7 @@ import java.util.*; */ @Invariant({"alleles != null"}) public final class GenotypeBuilder { + private static final List HAPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL); private static final List DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); private String sampleName = null; @@ -99,8 +100,14 @@ public final class GenotypeBuilder { * @param sampleName the name of this sample * @return an initialized Genotype with sampleName that's a diploid ./. no call genotype */ - public static Genotype createMissing(final String sampleName) { - return new GenotypeBuilder(sampleName).alleles(DIPLOID_NO_CALL).make(); + public static Genotype createMissing(final String sampleName, final int ploidy) { + final GenotypeBuilder builder = new GenotypeBuilder(sampleName); + switch ( ploidy ) { + case 1: builder.alleles(HAPLOID_NO_CALL); break; + case 2: builder.alleles(DIPLOID_NO_CALL); break; + default: builder.alleles(Collections.nCopies(ploidy, Allele.NO_CALL)); break; + } + return builder.make(); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java index 02ea1a1f2..f306bac4d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java @@ -413,18 +413,25 @@ public class GenotypesContext implements List { } /** - * What is the max ploidy among all samples? Returns 0 if no genotypes are present + * What is the max ploidy among all samples? Returns defaultPloidy if no genotypes are present * + * @param defaultPloidy the default ploidy, if all samples are no-called * @return */ @Ensures("result >= 0") - public int getMaxPloidy() { + public int getMaxPloidy(final int defaultPloidy) { + if ( defaultPloidy < 0 ) throw new IllegalArgumentException("defaultPloidy must be greater than or equal to 0"); + if ( maxPloidy == -1 ) { maxPloidy = 0; // necessary in the case where there are no genotypes for ( final Genotype g : getGenotypes() ) { maxPloidy = Math.max(g.getPloidy(), maxPloidy); } + + // everything is no called so we return the default ploidy + if ( maxPloidy == 0 ) maxPloidy = defaultPloidy; } + return maxPloidy; } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index dd16cf7e1..abac84202 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -642,14 +642,15 @@ public class VariantContext implements Feature { // to enable tribble integratio } /** - * Returns the maximum ploidy of all samples in this VC, or -1 if there are no genotypes + * Returns the maximum ploidy of all samples in this VC, or default if there are no genotypes * * This function is caching, so it's only expensive on the first call * - * @return -1, or the max ploidy + * @param defaultPloidy the default ploidy, if all samples are no-called + * @return default, or the max ploidy */ - public int getMaxPloidy() { - return genotypes.getMaxPloidy(); + public int getMaxPloidy(final int defaultPloidy) { + return genotypes.getMaxPloidy(defaultPloidy); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java index 497c68c0c..61c0129bb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java @@ -272,11 +272,7 @@ public abstract class BCF2FieldWriter { encodingType = BCF2Type.INT8; buildAlleleMap(vc); - nValuesPerGenotype = vc.getMaxPloidy(); - - // deal with the case where we have no call everywhere, in which case we write out diploid - if ( nValuesPerGenotype == 0 ) - nValuesPerGenotype = 2; + nValuesPerGenotype = vc.getMaxPloidy(2); super.start(encoder, vc); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index a338c7c0d..536f07f90 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -353,7 +353,7 @@ class BCF2Writer extends IndexingVariantContextWriter { writer.start(encoder, vc); for ( final String name : sampleNames ) { Genotype g = vc.getGenotype(name); - if ( g == null ) g = GenotypeBuilder.createMissing(name); + if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype); writer.addGenotype(encoder, vc, g); } writer.done(encoder, vc); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java index 93b3b603f..f5306b6da 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java @@ -338,11 +338,13 @@ class VCFWriter extends IndexingVariantContextWriter { */ private void addGenotypeData(VariantContext vc, Map alleleMap, List genotypeFormatKeys) throws IOException { + final int ploidy = vc.getMaxPloidy(2); + for ( String sample : mHeader.getGenotypeSamples() ) { mWriter.write(VCFConstants.FIELD_SEPARATOR); Genotype g = vc.getGenotype(sample); - if ( g == null ) g = GenotypeBuilder.createMissing(sample); + if ( g == null ) g = GenotypeBuilder.createMissing(sample, ploidy); final List attrs = new ArrayList(genotypeFormatKeys.size()); for ( String field : genotypeFormatKeys ) {