Merge pull request #621 from broadinstitute/vrr_combine_gvcf_bugfix

Fix for CombineGVCFs and GenotypeGVCFs recurrent exception about missing...
This commit is contained in:
Ryan Poplin 2014-05-02 11:52:32 -04:00
commit 38b7cfbda9
7 changed files with 70 additions and 29 deletions

View File

@ -198,9 +198,7 @@ public class ReferenceConfidenceModel {
final int offset = curPos.getStart() - refSpan.getStart();
final VariantContext overlappingSite = getOverlappingVariantContext(curPos, variantCalls);
if ( overlappingSite != null ) {
// we have some overlapping site, add it to the list of positions
if ( overlappingSite.getStart() == curPos.getStart() )
if ( overlappingSite != null && overlappingSite.getStart() == curPos.getStart() ) {
results.add(overlappingSite);
} else {
// otherwise emit a reference confidence variant context

View File

@ -299,7 +299,7 @@ public class CombineGVCFs extends RodWalker<CombineGVCFs.PositionalState, Combin
final GenotypesContext genotypes = GenotypesContext.create();
for ( final VariantContext vc : VCs ) {
for ( final Genotype g : vc.getGenotypes() )
genotypes.add(new GenotypeBuilder(g).alleles(Arrays.asList(refAllele, refAllele)).make());
genotypes.add(new GenotypeBuilder(g).alleles(GATKVariantContextUtils.noCallAlleles(g.getPloidy())).make());
}
return new VariantContextBuilder("", first.getChr(), start, end, Arrays.asList(refAllele, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE)).attributes(attrs).genotypes(genotypes).make();

View File

@ -68,7 +68,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "50323a284788c8220c9226037c7003b5"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "96fea2caf0a40df3feb268e8b14da670"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "d5980df681bc57e10c681901494b46d2"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "19efc8020f31d1b68d80c50df0629e50"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"});

View File

@ -323,8 +323,8 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
final VariantContext vcStart = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start, Arrays.asList("A", "C"));
final VariantContext vcEnd = GATKVariantContextUtils.makeFromAlleles("test", "chr1", stop, Arrays.asList("A", "C"));
final VariantContext vcMiddle = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start + 2, Arrays.asList("A", "C"));
final VariantContext vcDel = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start + 4, Arrays.asList("ACG", "A"));
final VariantContext vcIns = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start + 8, Arrays.asList("A", "ACG"));
final VariantContext vcDel = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start + 4, Arrays.asList("AAC", "A"));
final VariantContext vcIns = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start + 8, Arrays.asList("G", "GCG"));
final List<VariantContext> allCalls = Arrays.asList(vcStart, vcEnd, vcMiddle, vcDel, vcIns);
@ -365,7 +365,19 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
}
if ( call != null ) {
Assert.assertEquals(refModel, call, "Should have found call " + call + " but found " + refModel + " instead");
if (call.isVariant() && refModel.getType() == VariantContext.Type.SYMBOLIC ) {
//Assert.assertEquals(refModel, call, "Should have found call " + call + " but found " + refModel + " instead");
Assert.assertTrue(call.getReference().length() > 1); // must be a deletion.
Assert.assertTrue(call.getStart() < refModel.getStart()); // the deletion must not start at the same position
Assert.assertEquals(call.getReference().getBaseString().substring(refModel.getStart() - call.getStart(),
refModel.getStart() - call.getStart() + 1), refModel.getReference().getBaseString(), "" + data.getRefHap()); // the reference must be the same.
Assert.assertTrue(refModel.getGenotype(0).getGQ() <= 0); // No confidence in the reference hom-ref call across the deletion
Assert.assertEquals(refModel.getAlleles().size(),2); // the reference and the lonelly <NON_REF>
Assert.assertEquals(refModel.getAlleles().get(1),GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
} else {
Assert.assertEquals(refModel, call, "Should have found call " + call + " but found " + refModel + " instead");
}
} else {
final int expectedDP = expectedDPs.get(curPos.getStart() - data.getActiveRegion().getLocation().getStart());
Assert.assertEquals(refModel.getStart(), loc.getStart() + i);

View File

@ -76,15 +76,15 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
Assert.assertEquals(first.getStart(), 69491);
Assert.assertEquals(first.getEnd(), 69497);
Assert.assertEquals(first.getGenotypes().size(), 2);
Assert.assertTrue(first.getGenotype("NA1").isCalled());
Assert.assertTrue(first.getGenotype("NA1").isNoCall());
Assert.assertTrue(first.getGenotype("NA2").isNoCall());
final VariantContext second = allVCs.get(1);
Assert.assertEquals(second.getStart(), 69498);
Assert.assertEquals(second.getEnd(), 69506);
Assert.assertEquals(second.getGenotypes().size(), 2);
Assert.assertTrue(second.getGenotype("NA1").isCalled());
Assert.assertTrue(second.getGenotype("NA2").isCalled());
Assert.assertTrue(second.getGenotype("NA1").isNoCall());
Assert.assertTrue(second.getGenotype("NA2").isNoCall());
}
@Test
@ -161,7 +161,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
@Test
public void testMD5s() throws Exception {
final String cmd = baseTestString(" -L 1:69485-69791");
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("c279e089fd15359e75867b1318cb4d50"));
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("1df56fdfc71729cc82ba5dbfc75a72c4"));
spec.disableShadowBCF();
executeTest("testMD5s", spec);
}
@ -169,7 +169,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
@Test
public void testBasepairResolutionOutput() throws Exception {
final String cmd = baseTestString(" -L 1:69485-69791 --convertToBasePairResolution");
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("a068fb2c35cdd14df1e8f1f92f4114b4"));
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("9c8fc4d9e330fbe41a00b7f71a784f4e"));
spec.disableShadowBCF();
executeTest("testBasepairResolutionOutput", spec);
}
@ -177,7 +177,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
@Test
public void testBasepairResolutionInput() throws Exception {
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V " + privateTestDir + "gvcf.basepairResolution.vcf";
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("0bd914cfa16349ee0439bfa5033a4f2a"));
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("6116f3c70cd5288f3e8b89b1953a1e15"));
spec.disableShadowBCF();
executeTest("testBasepairResolutionInput", spec);
}

View File

@ -126,4 +126,21 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
Arrays.asList("2b0f8662be950e49911dfd2d93776712"));
executeTest("testSamplesWithDifferentLs", spec);
}
@Test(enabled = true)
public void testNoPLsException() {
// Test with input files with (1) 0/0 and (2) ./.
WalkerTestSpec spec1 = new WalkerTestSpec(
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:1115550-1115551 -o %s -R " + hg19Reference +
" --variant " + privateTestDir + "combined_genotype_gvcf_exception.vcf",
1,
Arrays.asList("97bf0aad80b3992704166bbaca0cc455"));
WalkerTestSpec spec2 = new WalkerTestSpec(
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:1115550-1115551 -o %s -R " + hg19Reference +
" --variant " + privateTestDir + "combined_genotype_gvcf_exception.nocall.vcf",
1,
Arrays.asList("97bf0aad80b3992704166bbaca0cc455"));
executeTest("testNoPLsException.1", spec1);
executeTest("testNoPLsException.2", spec2);
}
}

View File

@ -1546,26 +1546,40 @@ public class GATKVariantContextUtils {
// only add if the name is new
final String name = g.getSampleName();
if ( !mergedGenotypes.containsSample(name) ) {
if ( !g.hasPL() ) {
if ( g.isNoCall() ) {
mergedGenotypes.add(g);
continue;
}
throw new UserException("cannot merge genotypes from samples without PLs; sample " + g.getSampleName() + " does not have likelihoods at position " + VC.getChr() + ":" + VC.getStart());
final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(g).alleles(noCallAlleles(g.getPloidy()));
if (g.hasPL()) {
// we need to modify it even if it already contains all of the alleles because we need to purge the <ALT> PLs out anyways
final int[] indexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles, VC.getStart());
final int[] PLs = generatePLs(g, indexesOfRelevantAlleles);
final int[] AD = g.hasAD() ? generateAD(g.getAD(), indexesOfRelevantAlleles) : null;
genotypeBuilder.PL(PLs).AD(AD).noGQ();
}
// we need to modify it even if it already contains all of the alleles because we need to purge the <ALT> PLs out anyways
final int[] indexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles, VC.getStart());
final int[] PLs = generatePLs(g, indexesOfRelevantAlleles);
final int[] AD = g.hasAD() ? generateAD(g.getAD(), indexesOfRelevantAlleles) : null;
final Genotype newG = new GenotypeBuilder(g).name(name).alleles(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).PL(PLs).AD(AD).noGQ().make();
mergedGenotypes.add(newG);
mergedGenotypes.add(genotypeBuilder.make());
}
}
}
/**
* Returns a {@link Allele#NO_CALL NO_CALL} allele list provided the ploidy.
*
* @param ploidy the required ploidy.
*
* @return never {@code null}, but an empty list if {@code ploidy} is equal or less than 0. The returned list
* might or might not be mutable.
*/
public static List<Allele> noCallAlleles(final int ploidy) {
if (ploidy <= 0)
return Collections.EMPTY_LIST;
else if (ploidy == 1)
return Collections.singletonList(Allele.NO_CALL);
else {
final List<Allele> result = new ArrayList<>(ploidy);
for (int i = 0; i < ploidy; i++)
result.add(Allele.NO_CALL);
return result;
}
}
/**
* Determines the allele mapping from myAlleles to the targetAlleles, substituting the generic "<ALT>" as appropriate.
* If the myAlleles set does not contain "<ALT>" as an allele, it throws an exception.