Fixed nasty bug with defaulting to diploid no-call genotypes
-- For the pooled caller we were writing diploid no-calls even when other samples were haploid. Changed maxPloidy function to return a defaultPloidy, rather than 0, in the case where all samples are missing. -- VCF/BCF Writers now create missing genotypes with the ploidy of other samples, or 2 if none are available at all. -- Updating integration tests for general ploidy, as previously we wrote ./. even when other calls were 0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/1/1/1/1/1, but now we write ./././././././././././././././././././././././. (ugly but correct)
This commit is contained in:
parent
d1ba17df5d
commit
bfbf1686cd
|
|
@ -1,9 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -52,7 +52,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","90af837f372e3d5143af30bf5c8c2b75");
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","567ae6b2a7f839b1307d4087c2f59cca");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
@ -62,7 +62,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","26598044436c8044f22ffa767b06a0f0");
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","d2a22e12f1969ae199557947e5039b58");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
|
|||
|
|
@ -88,8 +88,8 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
case UNBOUNDED: return -1;
|
||||
case A: return vc.getNAlleles() - 1;
|
||||
case G:
|
||||
final int ploidy = vc.getMaxPloidy();
|
||||
return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy == 0 ? 2 : ploidy);
|
||||
final int ploidy = vc.getMaxPloidy(2);
|
||||
return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy);
|
||||
default:
|
||||
throw new ReviewedStingException("Unknown count type: " + countType);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ import java.util.*;
|
|||
*/
|
||||
@Invariant({"alleles != null"})
|
||||
public final class GenotypeBuilder {
|
||||
private static final List<Allele> HAPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL);
|
||||
private static final List<Allele> DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
|
||||
private String sampleName = null;
|
||||
|
|
@ -99,8 +100,14 @@ public final class GenotypeBuilder {
|
|||
* @param sampleName the name of this sample
|
||||
* @return an initialized Genotype with sampleName that's a diploid ./. no call genotype
|
||||
*/
|
||||
public static Genotype createMissing(final String sampleName) {
|
||||
return new GenotypeBuilder(sampleName).alleles(DIPLOID_NO_CALL).make();
|
||||
public static Genotype createMissing(final String sampleName, final int ploidy) {
|
||||
final GenotypeBuilder builder = new GenotypeBuilder(sampleName);
|
||||
switch ( ploidy ) {
|
||||
case 1: builder.alleles(HAPLOID_NO_CALL); break;
|
||||
case 2: builder.alleles(DIPLOID_NO_CALL); break;
|
||||
default: builder.alleles(Collections.nCopies(ploidy, Allele.NO_CALL)); break;
|
||||
}
|
||||
return builder.make();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -413,18 +413,25 @@ public class GenotypesContext implements List<Genotype> {
|
|||
}
|
||||
|
||||
/**
|
||||
* What is the max ploidy among all samples? Returns 0 if no genotypes are present
|
||||
* What is the max ploidy among all samples? Returns defaultPloidy if no genotypes are present
|
||||
*
|
||||
* @param defaultPloidy the default ploidy, if all samples are no-called
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result >= 0")
|
||||
public int getMaxPloidy() {
|
||||
public int getMaxPloidy(final int defaultPloidy) {
|
||||
if ( defaultPloidy < 0 ) throw new IllegalArgumentException("defaultPloidy must be greater than or equal to 0");
|
||||
|
||||
if ( maxPloidy == -1 ) {
|
||||
maxPloidy = 0; // necessary in the case where there are no genotypes
|
||||
for ( final Genotype g : getGenotypes() ) {
|
||||
maxPloidy = Math.max(g.getPloidy(), maxPloidy);
|
||||
}
|
||||
|
||||
// everything is no called so we return the default ploidy
|
||||
if ( maxPloidy == 0 ) maxPloidy = defaultPloidy;
|
||||
}
|
||||
|
||||
return maxPloidy;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -642,14 +642,15 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the maximum ploidy of all samples in this VC, or -1 if there are no genotypes
|
||||
* Returns the maximum ploidy of all samples in this VC, or default if there are no genotypes
|
||||
*
|
||||
* This function is caching, so it's only expensive on the first call
|
||||
*
|
||||
* @return -1, or the max ploidy
|
||||
* @param defaultPloidy the default ploidy, if all samples are no-called
|
||||
* @return default, or the max ploidy
|
||||
*/
|
||||
public int getMaxPloidy() {
|
||||
return genotypes.getMaxPloidy();
|
||||
public int getMaxPloidy(final int defaultPloidy) {
|
||||
return genotypes.getMaxPloidy(defaultPloidy);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -272,11 +272,7 @@ public abstract class BCF2FieldWriter {
|
|||
|
||||
encodingType = BCF2Type.INT8;
|
||||
buildAlleleMap(vc);
|
||||
nValuesPerGenotype = vc.getMaxPloidy();
|
||||
|
||||
// deal with the case where we have no call everywhere, in which case we write out diploid
|
||||
if ( nValuesPerGenotype == 0 )
|
||||
nValuesPerGenotype = 2;
|
||||
nValuesPerGenotype = vc.getMaxPloidy(2);
|
||||
|
||||
super.start(encoder, vc);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -353,7 +353,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
writer.start(encoder, vc);
|
||||
for ( final String name : sampleNames ) {
|
||||
Genotype g = vc.getGenotype(name);
|
||||
if ( g == null ) g = GenotypeBuilder.createMissing(name);
|
||||
if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype);
|
||||
writer.addGenotype(encoder, vc, g);
|
||||
}
|
||||
writer.done(encoder, vc);
|
||||
|
|
|
|||
|
|
@ -338,11 +338,13 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
*/
|
||||
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
|
||||
throws IOException {
|
||||
final int ploidy = vc.getMaxPloidy(2);
|
||||
|
||||
for ( String sample : mHeader.getGenotypeSamples() ) {
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
Genotype g = vc.getGenotype(sample);
|
||||
if ( g == null ) g = GenotypeBuilder.createMissing(sample);
|
||||
if ( g == null ) g = GenotypeBuilder.createMissing(sample, ploidy);
|
||||
|
||||
final List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
|
||||
for ( String field : genotypeFormatKeys ) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue