Merge pull request #621 from broadinstitute/vrr_combine_gvcf_bugfix
Fix for CombineGVCFs and GenotypeGVCFs recurrent exception about missing...
This commit is contained in:
commit
38b7cfbda9
|
|
@ -198,9 +198,7 @@ public class ReferenceConfidenceModel {
|
|||
final int offset = curPos.getStart() - refSpan.getStart();
|
||||
|
||||
final VariantContext overlappingSite = getOverlappingVariantContext(curPos, variantCalls);
|
||||
if ( overlappingSite != null ) {
|
||||
// we have some overlapping site, add it to the list of positions
|
||||
if ( overlappingSite.getStart() == curPos.getStart() )
|
||||
if ( overlappingSite != null && overlappingSite.getStart() == curPos.getStart() ) {
|
||||
results.add(overlappingSite);
|
||||
} else {
|
||||
// otherwise emit a reference confidence variant context
|
||||
|
|
|
|||
|
|
@ -299,7 +299,7 @@ public class CombineGVCFs extends RodWalker<CombineGVCFs.PositionalState, Combin
|
|||
final GenotypesContext genotypes = GenotypesContext.create();
|
||||
for ( final VariantContext vc : VCs ) {
|
||||
for ( final Genotype g : vc.getGenotypes() )
|
||||
genotypes.add(new GenotypeBuilder(g).alleles(Arrays.asList(refAllele, refAllele)).make());
|
||||
genotypes.add(new GenotypeBuilder(g).alleles(GATKVariantContextUtils.noCallAlleles(g.getPloidy())).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder("", first.getChr(), start, end, Arrays.asList(refAllele, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE)).attributes(attrs).genotypes(genotypes).make();
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "50323a284788c8220c9226037c7003b5"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "96fea2caf0a40df3feb268e8b14da670"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "d5980df681bc57e10c681901494b46d2"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "19efc8020f31d1b68d80c50df0629e50"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"});
|
||||
|
|
|
|||
|
|
@ -323,8 +323,8 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
|
|||
final VariantContext vcStart = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start, Arrays.asList("A", "C"));
|
||||
final VariantContext vcEnd = GATKVariantContextUtils.makeFromAlleles("test", "chr1", stop, Arrays.asList("A", "C"));
|
||||
final VariantContext vcMiddle = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start + 2, Arrays.asList("A", "C"));
|
||||
final VariantContext vcDel = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start + 4, Arrays.asList("ACG", "A"));
|
||||
final VariantContext vcIns = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start + 8, Arrays.asList("A", "ACG"));
|
||||
final VariantContext vcDel = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start + 4, Arrays.asList("AAC", "A"));
|
||||
final VariantContext vcIns = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start + 8, Arrays.asList("G", "GCG"));
|
||||
|
||||
final List<VariantContext> allCalls = Arrays.asList(vcStart, vcEnd, vcMiddle, vcDel, vcIns);
|
||||
|
||||
|
|
@ -365,7 +365,19 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
if ( call != null ) {
|
||||
Assert.assertEquals(refModel, call, "Should have found call " + call + " but found " + refModel + " instead");
|
||||
if (call.isVariant() && refModel.getType() == VariantContext.Type.SYMBOLIC ) {
|
||||
//Assert.assertEquals(refModel, call, "Should have found call " + call + " but found " + refModel + " instead");
|
||||
Assert.assertTrue(call.getReference().length() > 1); // must be a deletion.
|
||||
Assert.assertTrue(call.getStart() < refModel.getStart()); // the deletion must not start at the same position
|
||||
Assert.assertEquals(call.getReference().getBaseString().substring(refModel.getStart() - call.getStart(),
|
||||
refModel.getStart() - call.getStart() + 1), refModel.getReference().getBaseString(), "" + data.getRefHap()); // the reference must be the same.
|
||||
Assert.assertTrue(refModel.getGenotype(0).getGQ() <= 0); // No confidence in the reference hom-ref call across the deletion
|
||||
Assert.assertEquals(refModel.getAlleles().size(),2); // the reference and the lonelly <NON_REF>
|
||||
Assert.assertEquals(refModel.getAlleles().get(1),GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
} else {
|
||||
Assert.assertEquals(refModel, call, "Should have found call " + call + " but found " + refModel + " instead");
|
||||
}
|
||||
|
||||
} else {
|
||||
final int expectedDP = expectedDPs.get(curPos.getStart() - data.getActiveRegion().getLocation().getStart());
|
||||
Assert.assertEquals(refModel.getStart(), loc.getStart() + i);
|
||||
|
|
|
|||
|
|
@ -76,15 +76,15 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
Assert.assertEquals(first.getStart(), 69491);
|
||||
Assert.assertEquals(first.getEnd(), 69497);
|
||||
Assert.assertEquals(first.getGenotypes().size(), 2);
|
||||
Assert.assertTrue(first.getGenotype("NA1").isCalled());
|
||||
Assert.assertTrue(first.getGenotype("NA1").isNoCall());
|
||||
Assert.assertTrue(first.getGenotype("NA2").isNoCall());
|
||||
|
||||
final VariantContext second = allVCs.get(1);
|
||||
Assert.assertEquals(second.getStart(), 69498);
|
||||
Assert.assertEquals(second.getEnd(), 69506);
|
||||
Assert.assertEquals(second.getGenotypes().size(), 2);
|
||||
Assert.assertTrue(second.getGenotype("NA1").isCalled());
|
||||
Assert.assertTrue(second.getGenotype("NA2").isCalled());
|
||||
Assert.assertTrue(second.getGenotype("NA1").isNoCall());
|
||||
Assert.assertTrue(second.getGenotype("NA2").isNoCall());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -161,7 +161,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testMD5s() throws Exception {
|
||||
final String cmd = baseTestString(" -L 1:69485-69791");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("c279e089fd15359e75867b1318cb4d50"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("1df56fdfc71729cc82ba5dbfc75a72c4"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testMD5s", spec);
|
||||
}
|
||||
|
|
@ -169,7 +169,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testBasepairResolutionOutput() throws Exception {
|
||||
final String cmd = baseTestString(" -L 1:69485-69791 --convertToBasePairResolution");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("a068fb2c35cdd14df1e8f1f92f4114b4"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("9c8fc4d9e330fbe41a00b7f71a784f4e"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testBasepairResolutionOutput", spec);
|
||||
}
|
||||
|
|
@ -177,7 +177,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testBasepairResolutionInput() throws Exception {
|
||||
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V " + privateTestDir + "gvcf.basepairResolution.vcf";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("0bd914cfa16349ee0439bfa5033a4f2a"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("6116f3c70cd5288f3e8b89b1953a1e15"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testBasepairResolutionInput", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -126,4 +126,21 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
Arrays.asList("2b0f8662be950e49911dfd2d93776712"));
|
||||
executeTest("testSamplesWithDifferentLs", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testNoPLsException() {
|
||||
// Test with input files with (1) 0/0 and (2) ./.
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:1115550-1115551 -o %s -R " + hg19Reference +
|
||||
" --variant " + privateTestDir + "combined_genotype_gvcf_exception.vcf",
|
||||
1,
|
||||
Arrays.asList("97bf0aad80b3992704166bbaca0cc455"));
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:1115550-1115551 -o %s -R " + hg19Reference +
|
||||
" --variant " + privateTestDir + "combined_genotype_gvcf_exception.nocall.vcf",
|
||||
1,
|
||||
Arrays.asList("97bf0aad80b3992704166bbaca0cc455"));
|
||||
executeTest("testNoPLsException.1", spec1);
|
||||
executeTest("testNoPLsException.2", spec2);
|
||||
}
|
||||
}
|
||||
|
|
@ -1546,26 +1546,40 @@ public class GATKVariantContextUtils {
|
|||
// only add if the name is new
|
||||
final String name = g.getSampleName();
|
||||
if ( !mergedGenotypes.containsSample(name) ) {
|
||||
|
||||
if ( !g.hasPL() ) {
|
||||
if ( g.isNoCall() ) {
|
||||
mergedGenotypes.add(g);
|
||||
continue;
|
||||
}
|
||||
throw new UserException("cannot merge genotypes from samples without PLs; sample " + g.getSampleName() + " does not have likelihoods at position " + VC.getChr() + ":" + VC.getStart());
|
||||
final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(g).alleles(noCallAlleles(g.getPloidy()));
|
||||
if (g.hasPL()) {
|
||||
// we need to modify it even if it already contains all of the alleles because we need to purge the <ALT> PLs out anyways
|
||||
final int[] indexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles, VC.getStart());
|
||||
final int[] PLs = generatePLs(g, indexesOfRelevantAlleles);
|
||||
final int[] AD = g.hasAD() ? generateAD(g.getAD(), indexesOfRelevantAlleles) : null;
|
||||
genotypeBuilder.PL(PLs).AD(AD).noGQ();
|
||||
}
|
||||
|
||||
// we need to modify it even if it already contains all of the alleles because we need to purge the <ALT> PLs out anyways
|
||||
final int[] indexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles, VC.getStart());
|
||||
final int[] PLs = generatePLs(g, indexesOfRelevantAlleles);
|
||||
final int[] AD = g.hasAD() ? generateAD(g.getAD(), indexesOfRelevantAlleles) : null;
|
||||
|
||||
final Genotype newG = new GenotypeBuilder(g).name(name).alleles(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).PL(PLs).AD(AD).noGQ().make();
|
||||
mergedGenotypes.add(newG);
|
||||
mergedGenotypes.add(genotypeBuilder.make());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a {@link Allele#NO_CALL NO_CALL} allele list provided the ploidy.
|
||||
*
|
||||
* @param ploidy the required ploidy.
|
||||
*
|
||||
* @return never {@code null}, but an empty list if {@code ploidy} is equal or less than 0. The returned list
|
||||
* might or might not be mutable.
|
||||
*/
|
||||
public static List<Allele> noCallAlleles(final int ploidy) {
|
||||
if (ploidy <= 0)
|
||||
return Collections.EMPTY_LIST;
|
||||
else if (ploidy == 1)
|
||||
return Collections.singletonList(Allele.NO_CALL);
|
||||
else {
|
||||
final List<Allele> result = new ArrayList<>(ploidy);
|
||||
for (int i = 0; i < ploidy; i++)
|
||||
result.add(Allele.NO_CALL);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the allele mapping from myAlleles to the targetAlleles, substituting the generic "<ALT>" as appropriate.
|
||||
* If the myAlleles set does not contain "<ALT>" as an allele, it throws an exception.
|
||||
|
|
|
|||
Loading…
Reference in New Issue