Fixed nasty bug with defaulting to diploid no-call genotypes
-- For the pooled caller we were writing diploid no-calls even when other samples were haploid. Changed maxPloidy function to return a defaultPloidy, rather than 0, in the case where all samples are missing. -- VCF/BCF Writers now create missing genotypes with the ploidy of other samples, or 2 if none are available at all. -- Updating integration tests for general ploidy, as previously we wrote ./. even when other calls were 0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/1/1/1/1/1, but now we write ./././././././././././././././././././././././. (ugly but correct)
This commit is contained in:
parent
d1ba17df5d
commit
bfbf1686cd
|
|
@ -1,9 +1,9 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.WalkerTest;
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import org.testng.annotations.Test;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -52,7 +52,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testINDEL_GGA_Pools() {
|
public void testINDEL_GGA_Pools() {
|
||||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","90af837f372e3d5143af30bf5c8c2b75");
|
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","567ae6b2a7f839b1307d4087c2f59cca");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
|
|
@ -62,7 +62,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","26598044436c8044f22ffa767b06a0f0");
|
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","d2a22e12f1969ae199557947e5039b58");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
|
|
|
||||||
|
|
@ -88,8 +88,8 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
||||||
case UNBOUNDED: return -1;
|
case UNBOUNDED: return -1;
|
||||||
case A: return vc.getNAlleles() - 1;
|
case A: return vc.getNAlleles() - 1;
|
||||||
case G:
|
case G:
|
||||||
final int ploidy = vc.getMaxPloidy();
|
final int ploidy = vc.getMaxPloidy(2);
|
||||||
return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy == 0 ? 2 : ploidy);
|
return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy);
|
||||||
default:
|
default:
|
||||||
throw new ReviewedStingException("Unknown count type: " + countType);
|
throw new ReviewedStingException("Unknown count type: " + countType);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,7 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
@Invariant({"alleles != null"})
|
@Invariant({"alleles != null"})
|
||||||
public final class GenotypeBuilder {
|
public final class GenotypeBuilder {
|
||||||
|
private static final List<Allele> HAPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL);
|
||||||
private static final List<Allele> DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
private static final List<Allele> DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||||
|
|
||||||
private String sampleName = null;
|
private String sampleName = null;
|
||||||
|
|
@ -99,8 +100,14 @@ public final class GenotypeBuilder {
|
||||||
* @param sampleName the name of this sample
|
* @param sampleName the name of this sample
|
||||||
* @return an initialized Genotype with sampleName that's a diploid ./. no call genotype
|
* @return an initialized Genotype with sampleName that's a diploid ./. no call genotype
|
||||||
*/
|
*/
|
||||||
public static Genotype createMissing(final String sampleName) {
|
public static Genotype createMissing(final String sampleName, final int ploidy) {
|
||||||
return new GenotypeBuilder(sampleName).alleles(DIPLOID_NO_CALL).make();
|
final GenotypeBuilder builder = new GenotypeBuilder(sampleName);
|
||||||
|
switch ( ploidy ) {
|
||||||
|
case 1: builder.alleles(HAPLOID_NO_CALL); break;
|
||||||
|
case 2: builder.alleles(DIPLOID_NO_CALL); break;
|
||||||
|
default: builder.alleles(Collections.nCopies(ploidy, Allele.NO_CALL)); break;
|
||||||
|
}
|
||||||
|
return builder.make();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -413,18 +413,25 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* What is the max ploidy among all samples? Returns 0 if no genotypes are present
|
* What is the max ploidy among all samples? Returns defaultPloidy if no genotypes are present
|
||||||
*
|
*
|
||||||
|
* @param defaultPloidy the default ploidy, if all samples are no-called
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
@Ensures("result >= 0")
|
@Ensures("result >= 0")
|
||||||
public int getMaxPloidy() {
|
public int getMaxPloidy(final int defaultPloidy) {
|
||||||
|
if ( defaultPloidy < 0 ) throw new IllegalArgumentException("defaultPloidy must be greater than or equal to 0");
|
||||||
|
|
||||||
if ( maxPloidy == -1 ) {
|
if ( maxPloidy == -1 ) {
|
||||||
maxPloidy = 0; // necessary in the case where there are no genotypes
|
maxPloidy = 0; // necessary in the case where there are no genotypes
|
||||||
for ( final Genotype g : getGenotypes() ) {
|
for ( final Genotype g : getGenotypes() ) {
|
||||||
maxPloidy = Math.max(g.getPloidy(), maxPloidy);
|
maxPloidy = Math.max(g.getPloidy(), maxPloidy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// everything is no called so we return the default ploidy
|
||||||
|
if ( maxPloidy == 0 ) maxPloidy = defaultPloidy;
|
||||||
}
|
}
|
||||||
|
|
||||||
return maxPloidy;
|
return maxPloidy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -642,14 +642,15 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the maximum ploidy of all samples in this VC, or -1 if there are no genotypes
|
* Returns the maximum ploidy of all samples in this VC, or default if there are no genotypes
|
||||||
*
|
*
|
||||||
* This function is caching, so it's only expensive on the first call
|
* This function is caching, so it's only expensive on the first call
|
||||||
*
|
*
|
||||||
* @return -1, or the max ploidy
|
* @param defaultPloidy the default ploidy, if all samples are no-called
|
||||||
|
* @return default, or the max ploidy
|
||||||
*/
|
*/
|
||||||
public int getMaxPloidy() {
|
public int getMaxPloidy(final int defaultPloidy) {
|
||||||
return genotypes.getMaxPloidy();
|
return genotypes.getMaxPloidy(defaultPloidy);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -272,11 +272,7 @@ public abstract class BCF2FieldWriter {
|
||||||
|
|
||||||
encodingType = BCF2Type.INT8;
|
encodingType = BCF2Type.INT8;
|
||||||
buildAlleleMap(vc);
|
buildAlleleMap(vc);
|
||||||
nValuesPerGenotype = vc.getMaxPloidy();
|
nValuesPerGenotype = vc.getMaxPloidy(2);
|
||||||
|
|
||||||
// deal with the case where we have no call everywhere, in which case we write out diploid
|
|
||||||
if ( nValuesPerGenotype == 0 )
|
|
||||||
nValuesPerGenotype = 2;
|
|
||||||
|
|
||||||
super.start(encoder, vc);
|
super.start(encoder, vc);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -353,7 +353,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
writer.start(encoder, vc);
|
writer.start(encoder, vc);
|
||||||
for ( final String name : sampleNames ) {
|
for ( final String name : sampleNames ) {
|
||||||
Genotype g = vc.getGenotype(name);
|
Genotype g = vc.getGenotype(name);
|
||||||
if ( g == null ) g = GenotypeBuilder.createMissing(name);
|
if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype);
|
||||||
writer.addGenotype(encoder, vc, g);
|
writer.addGenotype(encoder, vc, g);
|
||||||
}
|
}
|
||||||
writer.done(encoder, vc);
|
writer.done(encoder, vc);
|
||||||
|
|
|
||||||
|
|
@ -338,11 +338,13 @@ class VCFWriter extends IndexingVariantContextWriter {
|
||||||
*/
|
*/
|
||||||
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
|
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
final int ploidy = vc.getMaxPloidy(2);
|
||||||
|
|
||||||
for ( String sample : mHeader.getGenotypeSamples() ) {
|
for ( String sample : mHeader.getGenotypeSamples() ) {
|
||||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||||
|
|
||||||
Genotype g = vc.getGenotype(sample);
|
Genotype g = vc.getGenotype(sample);
|
||||||
if ( g == null ) g = GenotypeBuilder.createMissing(sample);
|
if ( g == null ) g = GenotypeBuilder.createMissing(sample, ploidy);
|
||||||
|
|
||||||
final List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
|
final List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
|
||||||
for ( String field : genotypeFormatKeys ) {
|
for ( String field : genotypeFormatKeys ) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue