Fixed nasty bug with defaulting to diploid no-call genotypes

-- For the pooled caller we were writing diploid no-calls even when other samples were haploid.  Changed maxPloidy function to return a defaultPloidy, rather than 0, in the case where all samples are missing.
-- VCF/BCF Writers now create missing genotypes with the ploidy of other samples, or 2 if none are available at all.
-- Updating integration tests for general ploidy, as previously we wrote ./. even when other calls were 0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/1/1/1/1/1, but now we write ./././././././././././././././././././././././. (ugly but correct)
This commit is contained in:
Mark DePristo 2012-09-12 07:08:03 -04:00
parent d1ba17df5d
commit bfbf1686cd
8 changed files with 33 additions and 20 deletions

View File

@ -1,9 +1,9 @@
package org.broadinstitute.sting.gatk.walkers.genotyper; package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import java.util.Arrays; import java.util.Arrays;
import org.testng.annotations.Test;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -52,7 +52,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
@Test(enabled = true) @Test(enabled = true)
public void testINDEL_GGA_Pools() { public void testINDEL_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","90af837f372e3d5143af30bf5c8c2b75"); PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","567ae6b2a7f839b1307d4087c2f59cca");
} }
@Test(enabled = true) @Test(enabled = true)
@ -62,7 +62,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
@Test(enabled = true) @Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","26598044436c8044f22ffa767b06a0f0"); PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","d2a22e12f1969ae199557947e5039b58");
} }
@Test(enabled = true) @Test(enabled = true)

View File

@ -88,8 +88,8 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
case UNBOUNDED: return -1; case UNBOUNDED: return -1;
case A: return vc.getNAlleles() - 1; case A: return vc.getNAlleles() - 1;
case G: case G:
final int ploidy = vc.getMaxPloidy(); final int ploidy = vc.getMaxPloidy(2);
return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy == 0 ? 2 : ploidy); return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy);
default: default:
throw new ReviewedStingException("Unknown count type: " + countType); throw new ReviewedStingException("Unknown count type: " + countType);
} }

View File

@ -53,6 +53,7 @@ import java.util.*;
*/ */
@Invariant({"alleles != null"}) @Invariant({"alleles != null"})
public final class GenotypeBuilder { public final class GenotypeBuilder {
private static final List<Allele> HAPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL);
private static final List<Allele> DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); private static final List<Allele> DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
private String sampleName = null; private String sampleName = null;
@ -99,8 +100,14 @@ public final class GenotypeBuilder {
* @param sampleName the name of this sample * @param sampleName the name of this sample
* @return an initialized Genotype with sampleName that's a diploid ./. no call genotype * @return an initialized Genotype with sampleName that's a diploid ./. no call genotype
*/ */
public static Genotype createMissing(final String sampleName) { public static Genotype createMissing(final String sampleName, final int ploidy) {
return new GenotypeBuilder(sampleName).alleles(DIPLOID_NO_CALL).make(); final GenotypeBuilder builder = new GenotypeBuilder(sampleName);
switch ( ploidy ) {
case 1: builder.alleles(HAPLOID_NO_CALL); break;
case 2: builder.alleles(DIPLOID_NO_CALL); break;
default: builder.alleles(Collections.nCopies(ploidy, Allele.NO_CALL)); break;
}
return builder.make();
} }
/** /**

View File

@ -413,18 +413,25 @@ public class GenotypesContext implements List<Genotype> {
} }
/** /**
* What is the max ploidy among all samples? Returns 0 if no genotypes are present * What is the max ploidy among all samples? Returns defaultPloidy if no genotypes are present
* *
* @param defaultPloidy the default ploidy, if all samples are no-called
* @return * @return
*/ */
@Ensures("result >= 0") @Ensures("result >= 0")
public int getMaxPloidy() { public int getMaxPloidy(final int defaultPloidy) {
if ( defaultPloidy < 0 ) throw new IllegalArgumentException("defaultPloidy must be greater than or equal to 0");
if ( maxPloidy == -1 ) { if ( maxPloidy == -1 ) {
maxPloidy = 0; // necessary in the case where there are no genotypes maxPloidy = 0; // necessary in the case where there are no genotypes
for ( final Genotype g : getGenotypes() ) { for ( final Genotype g : getGenotypes() ) {
maxPloidy = Math.max(g.getPloidy(), maxPloidy); maxPloidy = Math.max(g.getPloidy(), maxPloidy);
} }
// everything is no called so we return the default ploidy
if ( maxPloidy == 0 ) maxPloidy = defaultPloidy;
} }
return maxPloidy; return maxPloidy;
} }

View File

@ -642,14 +642,15 @@ public class VariantContext implements Feature { // to enable tribble integratio
} }
/** /**
* Returns the maximum ploidy of all samples in this VC, or -1 if there are no genotypes * Returns the maximum ploidy of all samples in this VC, or default if there are no genotypes
* *
* This function is caching, so it's only expensive on the first call * This function is caching, so it's only expensive on the first call
* *
* @return -1, or the max ploidy * @param defaultPloidy the default ploidy, if all samples are no-called
* @return default, or the max ploidy
*/ */
public int getMaxPloidy() { public int getMaxPloidy(final int defaultPloidy) {
return genotypes.getMaxPloidy(); return genotypes.getMaxPloidy(defaultPloidy);
} }
/** /**

View File

@ -272,11 +272,7 @@ public abstract class BCF2FieldWriter {
encodingType = BCF2Type.INT8; encodingType = BCF2Type.INT8;
buildAlleleMap(vc); buildAlleleMap(vc);
nValuesPerGenotype = vc.getMaxPloidy(); nValuesPerGenotype = vc.getMaxPloidy(2);
// deal with the case where we have no call everywhere, in which case we write out diploid
if ( nValuesPerGenotype == 0 )
nValuesPerGenotype = 2;
super.start(encoder, vc); super.start(encoder, vc);
} }

View File

@ -353,7 +353,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
writer.start(encoder, vc); writer.start(encoder, vc);
for ( final String name : sampleNames ) { for ( final String name : sampleNames ) {
Genotype g = vc.getGenotype(name); Genotype g = vc.getGenotype(name);
if ( g == null ) g = GenotypeBuilder.createMissing(name); if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype);
writer.addGenotype(encoder, vc, g); writer.addGenotype(encoder, vc, g);
} }
writer.done(encoder, vc); writer.done(encoder, vc);

View File

@ -338,11 +338,13 @@ class VCFWriter extends IndexingVariantContextWriter {
*/ */
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys) private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
throws IOException { throws IOException {
final int ploidy = vc.getMaxPloidy(2);
for ( String sample : mHeader.getGenotypeSamples() ) { for ( String sample : mHeader.getGenotypeSamples() ) {
mWriter.write(VCFConstants.FIELD_SEPARATOR); mWriter.write(VCFConstants.FIELD_SEPARATOR);
Genotype g = vc.getGenotype(sample); Genotype g = vc.getGenotype(sample);
if ( g == null ) g = GenotypeBuilder.createMissing(sample); if ( g == null ) g = GenotypeBuilder.createMissing(sample, ploidy);
final List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size()); final List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
for ( String field : genotypeFormatKeys ) { for ( String field : genotypeFormatKeys ) {