Merge pull request #621 from broadinstitute/vrr_combine_gvcf_bugfix

Fix for CombineGVCFs and GenotypeGVCFs recurrent exception about missing...
This commit is contained in:
Ryan Poplin 2014-05-02 11:52:32 -04:00
commit 38b7cfbda9
7 changed files with 70 additions and 29 deletions

View File

@ -198,9 +198,7 @@ public class ReferenceConfidenceModel {
final int offset = curPos.getStart() - refSpan.getStart(); final int offset = curPos.getStart() - refSpan.getStart();
final VariantContext overlappingSite = getOverlappingVariantContext(curPos, variantCalls); final VariantContext overlappingSite = getOverlappingVariantContext(curPos, variantCalls);
if ( overlappingSite != null ) { if ( overlappingSite != null && overlappingSite.getStart() == curPos.getStart() ) {
// we have some overlapping site, add it to the list of positions
if ( overlappingSite.getStart() == curPos.getStart() )
results.add(overlappingSite); results.add(overlappingSite);
} else { } else {
// otherwise emit a reference confidence variant context // otherwise emit a reference confidence variant context

View File

@ -299,7 +299,7 @@ public class CombineGVCFs extends RodWalker<CombineGVCFs.PositionalState, Combin
final GenotypesContext genotypes = GenotypesContext.create(); final GenotypesContext genotypes = GenotypesContext.create();
for ( final VariantContext vc : VCs ) { for ( final VariantContext vc : VCs ) {
for ( final Genotype g : vc.getGenotypes() ) for ( final Genotype g : vc.getGenotypes() )
genotypes.add(new GenotypeBuilder(g).alleles(Arrays.asList(refAllele, refAllele)).make()); genotypes.add(new GenotypeBuilder(g).alleles(GATKVariantContextUtils.noCallAlleles(g.getPloidy())).make());
} }
return new VariantContextBuilder("", first.getChr(), start, end, Arrays.asList(refAllele, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE)).attributes(attrs).genotypes(genotypes).make(); return new VariantContextBuilder("", first.getChr(), start, end, Arrays.asList(refAllele, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE)).attributes(attrs).genotypes(genotypes).make();

View File

@ -68,7 +68,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
// this functionality can be adapted to provide input data for whatever you might want in your data // this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "50323a284788c8220c9226037c7003b5"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "50323a284788c8220c9226037c7003b5"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "96fea2caf0a40df3feb268e8b14da670"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "d5980df681bc57e10c681901494b46d2"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "19efc8020f31d1b68d80c50df0629e50"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "19efc8020f31d1b68d80c50df0629e50"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"});

View File

@ -323,8 +323,8 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
final VariantContext vcStart = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start, Arrays.asList("A", "C")); final VariantContext vcStart = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start, Arrays.asList("A", "C"));
final VariantContext vcEnd = GATKVariantContextUtils.makeFromAlleles("test", "chr1", stop, Arrays.asList("A", "C")); final VariantContext vcEnd = GATKVariantContextUtils.makeFromAlleles("test", "chr1", stop, Arrays.asList("A", "C"));
final VariantContext vcMiddle = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start + 2, Arrays.asList("A", "C")); final VariantContext vcMiddle = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start + 2, Arrays.asList("A", "C"));
final VariantContext vcDel = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start + 4, Arrays.asList("ACG", "A")); final VariantContext vcDel = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start + 4, Arrays.asList("AAC", "A"));
final VariantContext vcIns = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start + 8, Arrays.asList("A", "ACG")); final VariantContext vcIns = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start + 8, Arrays.asList("G", "GCG"));
final List<VariantContext> allCalls = Arrays.asList(vcStart, vcEnd, vcMiddle, vcDel, vcIns); final List<VariantContext> allCalls = Arrays.asList(vcStart, vcEnd, vcMiddle, vcDel, vcIns);
@ -365,7 +365,19 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
} }
if ( call != null ) { if ( call != null ) {
Assert.assertEquals(refModel, call, "Should have found call " + call + " but found " + refModel + " instead"); if (call.isVariant() && refModel.getType() == VariantContext.Type.SYMBOLIC ) {
//Assert.assertEquals(refModel, call, "Should have found call " + call + " but found " + refModel + " instead");
Assert.assertTrue(call.getReference().length() > 1); // must be a deletion.
Assert.assertTrue(call.getStart() < refModel.getStart()); // the deletion must not start at the same position
Assert.assertEquals(call.getReference().getBaseString().substring(refModel.getStart() - call.getStart(),
refModel.getStart() - call.getStart() + 1), refModel.getReference().getBaseString(), "" + data.getRefHap()); // the reference must be the same.
Assert.assertTrue(refModel.getGenotype(0).getGQ() <= 0); // No confidence in the reference hom-ref call across the deletion
Assert.assertEquals(refModel.getAlleles().size(),2); // the reference and the lonelly <NON_REF>
Assert.assertEquals(refModel.getAlleles().get(1),GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
} else {
Assert.assertEquals(refModel, call, "Should have found call " + call + " but found " + refModel + " instead");
}
} else { } else {
final int expectedDP = expectedDPs.get(curPos.getStart() - data.getActiveRegion().getLocation().getStart()); final int expectedDP = expectedDPs.get(curPos.getStart() - data.getActiveRegion().getLocation().getStart());
Assert.assertEquals(refModel.getStart(), loc.getStart() + i); Assert.assertEquals(refModel.getStart(), loc.getStart() + i);

View File

@ -76,15 +76,15 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
Assert.assertEquals(first.getStart(), 69491); Assert.assertEquals(first.getStart(), 69491);
Assert.assertEquals(first.getEnd(), 69497); Assert.assertEquals(first.getEnd(), 69497);
Assert.assertEquals(first.getGenotypes().size(), 2); Assert.assertEquals(first.getGenotypes().size(), 2);
Assert.assertTrue(first.getGenotype("NA1").isCalled()); Assert.assertTrue(first.getGenotype("NA1").isNoCall());
Assert.assertTrue(first.getGenotype("NA2").isNoCall()); Assert.assertTrue(first.getGenotype("NA2").isNoCall());
final VariantContext second = allVCs.get(1); final VariantContext second = allVCs.get(1);
Assert.assertEquals(second.getStart(), 69498); Assert.assertEquals(second.getStart(), 69498);
Assert.assertEquals(second.getEnd(), 69506); Assert.assertEquals(second.getEnd(), 69506);
Assert.assertEquals(second.getGenotypes().size(), 2); Assert.assertEquals(second.getGenotypes().size(), 2);
Assert.assertTrue(second.getGenotype("NA1").isCalled()); Assert.assertTrue(second.getGenotype("NA1").isNoCall());
Assert.assertTrue(second.getGenotype("NA2").isCalled()); Assert.assertTrue(second.getGenotype("NA2").isNoCall());
} }
@Test @Test
@ -161,7 +161,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
@Test @Test
public void testMD5s() throws Exception { public void testMD5s() throws Exception {
final String cmd = baseTestString(" -L 1:69485-69791"); final String cmd = baseTestString(" -L 1:69485-69791");
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("c279e089fd15359e75867b1318cb4d50")); final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("1df56fdfc71729cc82ba5dbfc75a72c4"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testMD5s", spec); executeTest("testMD5s", spec);
} }
@ -169,7 +169,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
@Test @Test
public void testBasepairResolutionOutput() throws Exception { public void testBasepairResolutionOutput() throws Exception {
final String cmd = baseTestString(" -L 1:69485-69791 --convertToBasePairResolution"); final String cmd = baseTestString(" -L 1:69485-69791 --convertToBasePairResolution");
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("a068fb2c35cdd14df1e8f1f92f4114b4")); final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("9c8fc4d9e330fbe41a00b7f71a784f4e"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testBasepairResolutionOutput", spec); executeTest("testBasepairResolutionOutput", spec);
} }
@ -177,7 +177,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
@Test @Test
public void testBasepairResolutionInput() throws Exception { public void testBasepairResolutionInput() throws Exception {
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V " + privateTestDir + "gvcf.basepairResolution.vcf"; final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V " + privateTestDir + "gvcf.basepairResolution.vcf";
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("0bd914cfa16349ee0439bfa5033a4f2a")); final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("6116f3c70cd5288f3e8b89b1953a1e15"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testBasepairResolutionInput", spec); executeTest("testBasepairResolutionInput", spec);
} }

View File

@ -126,4 +126,21 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
Arrays.asList("2b0f8662be950e49911dfd2d93776712")); Arrays.asList("2b0f8662be950e49911dfd2d93776712"));
executeTest("testSamplesWithDifferentLs", spec); executeTest("testSamplesWithDifferentLs", spec);
} }
@Test(enabled = true)
public void testNoPLsException() {
// Test with input files with (1) 0/0 and (2) ./.
WalkerTestSpec spec1 = new WalkerTestSpec(
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:1115550-1115551 -o %s -R " + hg19Reference +
" --variant " + privateTestDir + "combined_genotype_gvcf_exception.vcf",
1,
Arrays.asList("97bf0aad80b3992704166bbaca0cc455"));
WalkerTestSpec spec2 = new WalkerTestSpec(
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:1115550-1115551 -o %s -R " + hg19Reference +
" --variant " + privateTestDir + "combined_genotype_gvcf_exception.nocall.vcf",
1,
Arrays.asList("97bf0aad80b3992704166bbaca0cc455"));
executeTest("testNoPLsException.1", spec1);
executeTest("testNoPLsException.2", spec2);
}
} }

View File

@ -1546,26 +1546,40 @@ public class GATKVariantContextUtils {
// only add if the name is new // only add if the name is new
final String name = g.getSampleName(); final String name = g.getSampleName();
if ( !mergedGenotypes.containsSample(name) ) { if ( !mergedGenotypes.containsSample(name) ) {
final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(g).alleles(noCallAlleles(g.getPloidy()));
if ( !g.hasPL() ) { if (g.hasPL()) {
if ( g.isNoCall() ) { // we need to modify it even if it already contains all of the alleles because we need to purge the <ALT> PLs out anyways
mergedGenotypes.add(g); final int[] indexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles, VC.getStart());
continue; final int[] PLs = generatePLs(g, indexesOfRelevantAlleles);
} final int[] AD = g.hasAD() ? generateAD(g.getAD(), indexesOfRelevantAlleles) : null;
throw new UserException("cannot merge genotypes from samples without PLs; sample " + g.getSampleName() + " does not have likelihoods at position " + VC.getChr() + ":" + VC.getStart()); genotypeBuilder.PL(PLs).AD(AD).noGQ();
} }
mergedGenotypes.add(genotypeBuilder.make());
// we need to modify it even if it already contains all of the alleles because we need to purge the <ALT> PLs out anyways
final int[] indexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles, VC.getStart());
final int[] PLs = generatePLs(g, indexesOfRelevantAlleles);
final int[] AD = g.hasAD() ? generateAD(g.getAD(), indexesOfRelevantAlleles) : null;
final Genotype newG = new GenotypeBuilder(g).name(name).alleles(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).PL(PLs).AD(AD).noGQ().make();
mergedGenotypes.add(newG);
} }
} }
} }
/**
* Returns a {@link Allele#NO_CALL NO_CALL} allele list provided the ploidy.
*
* @param ploidy the required ploidy.
*
* @return never {@code null}, but an empty list if {@code ploidy} is equal or less than 0. The returned list
* might or might not be mutable.
*/
public static List<Allele> noCallAlleles(final int ploidy) {
if (ploidy <= 0)
return Collections.EMPTY_LIST;
else if (ploidy == 1)
return Collections.singletonList(Allele.NO_CALL);
else {
final List<Allele> result = new ArrayList<>(ploidy);
for (int i = 0; i < ploidy; i++)
result.add(Allele.NO_CALL);
return result;
}
}
/** /**
* Determines the allele mapping from myAlleles to the targetAlleles, substituting the generic "<ALT>" as appropriate. * Determines the allele mapping from myAlleles to the targetAlleles, substituting the generic "<ALT>" as appropriate.
* If the myAlleles set does not contain "<ALT>" as an allele, it throws an exception. * If the myAlleles set does not contain "<ALT>" as an allele, it throws an exception.