Fixed nasty bug with defaulting to diploid no-call genotypes

-- For the pooled caller we were writing diploid no-calls even when other samples were haploid.  Changed maxPloidy function to return a defaultPloidy, rather than 0, in the case where all samples are missing.
-- VCF/BCF Writers now create missing genotypes with the ploidy of other samples, or 2 if none are available at all.
-- Updating integration tests for general ploidy, as previously we wrote ./. even when other calls were 0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/1/1/1/1/1, but now we write ./././././././././././././././././././././././. (ugly but correct)
This commit is contained in:
Mark DePristo 2012-09-12 07:08:03 -04:00
parent d1ba17df5d
commit bfbf1686cd
8 changed files with 33 additions and 20 deletions

View File

@ -1,9 +1,9 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import java.util.Arrays;
import org.testng.annotations.Test;
/**
* Created by IntelliJ IDEA.
@ -52,7 +52,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testINDEL_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","90af837f372e3d5143af30bf5c8c2b75");
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","567ae6b2a7f839b1307d4087c2f59cca");
}
@Test(enabled = true)
@ -62,7 +62,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","26598044436c8044f22ffa767b06a0f0");
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","d2a22e12f1969ae199557947e5039b58");
}
@Test(enabled = true)

View File

@ -88,8 +88,8 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
case UNBOUNDED: return -1;
case A: return vc.getNAlleles() - 1;
case G:
final int ploidy = vc.getMaxPloidy();
return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy == 0 ? 2 : ploidy);
final int ploidy = vc.getMaxPloidy(2);
return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy);
default:
throw new ReviewedStingException("Unknown count type: " + countType);
}

View File

@ -53,6 +53,7 @@ import java.util.*;
*/
@Invariant({"alleles != null"})
public final class GenotypeBuilder {
private static final List<Allele> HAPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL);
private static final List<Allele> DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
private String sampleName = null;
@ -99,8 +100,14 @@ public final class GenotypeBuilder {
* @param sampleName the name of this sample
* @return an initialized Genotype with sampleName that's a diploid ./. no call genotype
*/
public static Genotype createMissing(final String sampleName) {
return new GenotypeBuilder(sampleName).alleles(DIPLOID_NO_CALL).make();
public static Genotype createMissing(final String sampleName, final int ploidy) {
final GenotypeBuilder builder = new GenotypeBuilder(sampleName);
switch ( ploidy ) {
case 1: builder.alleles(HAPLOID_NO_CALL); break;
case 2: builder.alleles(DIPLOID_NO_CALL); break;
default: builder.alleles(Collections.nCopies(ploidy, Allele.NO_CALL)); break;
}
return builder.make();
}
/**

View File

@ -413,18 +413,25 @@ public class GenotypesContext implements List<Genotype> {
}
/**
* What is the max ploidy among all samples? Returns 0 if no genotypes are present
* What is the max ploidy among all samples? Returns defaultPloidy if no genotypes are present
*
* @param defaultPloidy the default ploidy, if all samples are no-called
* @return
*/
@Ensures("result >= 0")
public int getMaxPloidy() {
public int getMaxPloidy(final int defaultPloidy) {
if ( defaultPloidy < 0 ) throw new IllegalArgumentException("defaultPloidy must be greater than or equal to 0");
if ( maxPloidy == -1 ) {
maxPloidy = 0; // necessary in the case where there are no genotypes
for ( final Genotype g : getGenotypes() ) {
maxPloidy = Math.max(g.getPloidy(), maxPloidy);
}
// everything is no called so we return the default ploidy
if ( maxPloidy == 0 ) maxPloidy = defaultPloidy;
}
return maxPloidy;
}

View File

@ -642,14 +642,15 @@ public class VariantContext implements Feature { // to enable tribble integratio
}
/**
* Returns the maximum ploidy of all samples in this VC, or -1 if there are no genotypes
* Returns the maximum ploidy of all samples in this VC, or default if there are no genotypes
*
* This function is caching, so it's only expensive on the first call
*
* @return -1, or the max ploidy
* @param defaultPloidy the default ploidy, if all samples are no-called
* @return default, or the max ploidy
*/
public int getMaxPloidy() {
return genotypes.getMaxPloidy();
public int getMaxPloidy(final int defaultPloidy) {
return genotypes.getMaxPloidy(defaultPloidy);
}
/**

View File

@ -272,11 +272,7 @@ public abstract class BCF2FieldWriter {
encodingType = BCF2Type.INT8;
buildAlleleMap(vc);
nValuesPerGenotype = vc.getMaxPloidy();
// deal with the case where we have no call everywhere, in which case we write out diploid
if ( nValuesPerGenotype == 0 )
nValuesPerGenotype = 2;
nValuesPerGenotype = vc.getMaxPloidy(2);
super.start(encoder, vc);
}

View File

@ -353,7 +353,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
writer.start(encoder, vc);
for ( final String name : sampleNames ) {
Genotype g = vc.getGenotype(name);
if ( g == null ) g = GenotypeBuilder.createMissing(name);
if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype);
writer.addGenotype(encoder, vc, g);
}
writer.done(encoder, vc);

View File

@ -338,11 +338,13 @@ class VCFWriter extends IndexingVariantContextWriter {
*/
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
throws IOException {
final int ploidy = vc.getMaxPloidy(2);
for ( String sample : mHeader.getGenotypeSamples() ) {
mWriter.write(VCFConstants.FIELD_SEPARATOR);
Genotype g = vc.getGenotype(sample);
if ( g == null ) g = GenotypeBuilder.createMissing(sample);
if ( g == null ) g = GenotypeBuilder.createMissing(sample, ploidy);
final List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
for ( String field : genotypeFormatKeys ) {