Fix loss of key alternative haplotypes due to a change on threading start policy required when recovering dangling heads.

Story:

  - https://www.pivotaltracker.com/story/show/67601310

Change:

  - Unless recover-danging-heads is active, the threading starting location policy is the original one. i.e. just at already existing unique kmer vertices.

Tests:

  - HaplotypeCallerIntegrationTest#testMissingKeyAlternativeHaplotypesBugFix
This commit is contained in:
Valentin Ruano-Rubio 2014-03-29 17:27:35 -04:00
parent 5abb7ea2db
commit 258b2bce28
5 changed files with 61 additions and 11 deletions

View File

@ -149,6 +149,8 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly, numPruningSamples); final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly, numPruningSamples);
rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingHeads);
// add the reference sequence to the graph // add the reference sequence to the graph
rtgraph.addSequence("ref", refHaplotype.getBases(), true); rtgraph.addSequence("ref", refHaplotype.getBases(), true);

View File

@ -66,6 +66,8 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
private final static boolean WRITE_GRAPH = false; private final static boolean WRITE_GRAPH = false;
private final static boolean DEBUG_NON_UNIQUE_CALC = false; private final static boolean DEBUG_NON_UNIQUE_CALC = false;
private boolean startThreadingOnlyAtExistingVertex = false;
/** for debugging info printing */ /** for debugging info printing */
private static int counter = 0; private static int counter = 0;
@ -252,13 +254,48 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
for ( int i = seqForKmers.start; i < seqForKmers.stop - kmerSize; i++ ) { for ( int i = seqForKmers.start; i < seqForKmers.stop - kmerSize; i++ ) {
final Kmer kmer1 = new Kmer(seqForKmers.sequence, i, kmerSize); final Kmer kmer1 = new Kmer(seqForKmers.sequence, i, kmerSize);
if ( !nonUniqueKmers.contains(kmer1) ) if ( isThreadingStart(kmer1) )
return i; return i;
} }
return -1; return -1;
} }
/**
* Checks whether a kmer can be the threading start based on the current threading start location policy.
*
* @see #setThreadingStartOnlyAtExistingVertex(boolean)
* @see #getThreadingStartOnlyAtExistingVertex()
*
* @param kmer the query kmer.
* @return {@code true} if we can start thread the sequence at this kmer, {@code false} otherwise.
*/
protected boolean isThreadingStart(final Kmer kmer) {
if (kmer == null)
throw new IllegalArgumentException();
return startThreadingOnlyAtExistingVertex ? uniqueKmers.containsKey(kmer) : !nonUniqueKmers.contains(kmer);
}
/**
* Changes the threading start location policy.
*
* @param value {@code true} if threading will start only at existing vertices in the graph, {@code false} if
* it can start at any unique kmer.
*/
public void setThreadingStartOnlyAtExistingVertex(final boolean value) {
startThreadingOnlyAtExistingVertex = value;
}
/**
* Indicates the threading start location policy.
*
* @return {@code true} if threading will start only at existing vertices in the graph, {@code false} if
* it can start at any unique kmer.
*/
public boolean getThreadingStartOnlyAtExistingVertex() {
return startThreadingOnlyAtExistingVertex;
}
/** /**
* Build the read threaded assembly graph if it hasn't already been constructed from the sequences that have * Build the read threaded assembly graph if it hasn't already been constructed from the sequences that have
* been added to the graph. * been added to the graph.

View File

@ -358,7 +358,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
final File outputVCFNoQD = executeTest("testQualByDepth calling without QD", specNoQD).getFirst().get(0); final File outputVCFNoQD = executeTest("testQualByDepth calling without QD", specNoQD).getFirst().get(0);
final String baseAnn = String.format("-T VariantAnnotator -R %s -V %s", REF, outputVCFNoQD.getAbsolutePath()) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -A QualByDepth"; final String baseAnn = String.format("-T VariantAnnotator -R %s -V %s", REF, outputVCFNoQD.getAbsolutePath()) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -A QualByDepth";
final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("388200d107fb47326df78a971a52698f")); final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("4ccdbebcfd02be87ae5b4ad94666f011"));
specAnn.disableShadowBCF(); specAnn.disableShadowBCF();
final File outputVCFAnn = executeTest("testQualByDepth re-annotation of QD", specAnn).getFirst().get(0); final File outputVCFAnn = executeTest("testQualByDepth re-annotation of QD", specAnn).getFirst().get(0);

View File

@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test @Test
public void testHaplotypeCallerMultiSampleComplex1() { public void testHaplotypeCallerMultiSampleComplex1() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "7278afd47e5851c954359441cac2f0b8"); HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "65c316f1f3987d7bc94e887999920d45");
} }
private void HCTestSymbolicVariants(String bam, String args, String md5) { private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -88,7 +88,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test @Test
public void testHaplotypeCallerMultiSampleGGAComplex() { public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"cbdd34c454d69b266e3681ddfc33c7a3"); "724a05b7df716647014f29c0fe86e071");
} }
@Test @Test
@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test @Test
public void testHaplotypeCallerMultiSampleConsensusModeComplex() { public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337", HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
"61972c7c0d378e756f3b4d99aed9d0cf"); "9689b89bea8282137fade0905b5f2716");
} }
} }

View File

@ -89,7 +89,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerSingleSample() { public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "c208ef58d464465c68b5c26501122ad7"); HCTest(NA12878_BAM, "", "96f299a5cf411900b8eda3845c3ce465");
} }
@Test @Test
@ -99,7 +99,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerGraphBasedSingleSample() { public void testHaplotypeCallerGraphBasedSingleSample() {
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "049ba1794a1ce2b15566bb1e9431fccf"); HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "83fe0694621bc1e0240f6f79eb6d6999");
} }
@Test @Test
@ -227,7 +227,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGS() { public void HCTestDBSNPAnnotationWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, "-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("7c3254ead383e2b9a51b242f6de2a5b2")); Arrays.asList("2b240d51aa9b0d1f65f4e899a2feb97e"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
} }
@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGSGraphBased() { public void HCTestDBSNPAnnotationWGSGraphBased() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("eda8f91091fe462205d687ec49fc61e7")); Arrays.asList("6f6213bfc62e9cd9db56a7277ebe04e0"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
} }
@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestConservativePcrIndelModelWGS() { public void HCTestConservativePcrIndelModelWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1, "-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
Arrays.asList("4e10d49b8af23d5ef3a28cb702d10a4b")); Arrays.asList("7592274ecd2b5ef4624dd2ed659536ef"));
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec); executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
} }
@ -309,13 +309,24 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec); executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
} }
@Test(priority= -3)
public void testMissingKeyAlternativeHaplotypesBugFix() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list");
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e33053579a21d4cb23ba84107595891f"));
spec.disableShadowBCF();
executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec);
}
@Test @Test
public void testBadLikelihoodsDueToBadHaplotypeSelectionFix() { public void testBadLikelihoodsDueToBadHaplotypeSelectionFix() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ", final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
hg19RefereneWithChrPrefixInChromosomeNames, privateTestDir + "bad-likelihoods.bam", privateTestDir + "bad-likelihoods.interval_list", hg19RefereneWithChrPrefixInChromosomeNames, privateTestDir + "bad-likelihoods.bam", privateTestDir + "bad-likelihoods.interval_list",
HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("cbda30145523bf05e0413157f1a00b3e")); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("0f6384c8e170840ef1490d262ac2e06e"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testBadLikelihoodsDueToBadHaplotypeSelectionFix", spec); executeTest("testBadLikelihoodsDueToBadHaplotypeSelectionFix", spec);
} }
} }