Fix loss of key alternative haplotypes due to a change on threading start policy required when recovering dangling heads.

Story:

  - https://www.pivotaltracker.com/story/show/67601310

Change:

  - Unless recover-danging-heads is active, the threading starting location policy is the original one. i.e. just at already existing unique kmer vertices.

Tests:

  - HaplotypeCallerIntegrationTest#testMissingKeyAlternativeHaplotypesBugFix
This commit is contained in:
Valentin Ruano-Rubio 2014-03-29 17:27:35 -04:00
parent 5abb7ea2db
commit 258b2bce28
5 changed files with 61 additions and 11 deletions

View File

@ -149,6 +149,8 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly, numPruningSamples);
rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingHeads);
// add the reference sequence to the graph
rtgraph.addSequence("ref", refHaplotype.getBases(), true);

View File

@ -66,6 +66,8 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
private final static boolean WRITE_GRAPH = false;
private final static boolean DEBUG_NON_UNIQUE_CALC = false;
private boolean startThreadingOnlyAtExistingVertex = false;
/** for debugging info printing */
private static int counter = 0;
@ -252,13 +254,48 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
for ( int i = seqForKmers.start; i < seqForKmers.stop - kmerSize; i++ ) {
final Kmer kmer1 = new Kmer(seqForKmers.sequence, i, kmerSize);
if ( !nonUniqueKmers.contains(kmer1) )
if ( isThreadingStart(kmer1) )
return i;
}
return -1;
}
/**
* Checks whether a kmer can be the threading start based on the current threading start location policy.
*
* @see #setThreadingStartOnlyAtExistingVertex(boolean)
* @see #getThreadingStartOnlyAtExistingVertex()
*
* @param kmer the query kmer.
* @return {@code true} if we can start thread the sequence at this kmer, {@code false} otherwise.
*/
protected boolean isThreadingStart(final Kmer kmer) {
if (kmer == null)
throw new IllegalArgumentException();
return startThreadingOnlyAtExistingVertex ? uniqueKmers.containsKey(kmer) : !nonUniqueKmers.contains(kmer);
}
/**
* Changes the threading start location policy.
*
* @param value {@code true} if threading will start only at existing vertices in the graph, {@code false} if
* it can start at any unique kmer.
*/
public void setThreadingStartOnlyAtExistingVertex(final boolean value) {
startThreadingOnlyAtExistingVertex = value;
}
/**
* Indicates the threading start location policy.
*
* @return {@code true} if threading will start only at existing vertices in the graph, {@code false} if
* it can start at any unique kmer.
*/
public boolean getThreadingStartOnlyAtExistingVertex() {
return startThreadingOnlyAtExistingVertex;
}
/**
* Build the read threaded assembly graph if it hasn't already been constructed from the sequences that have
* been added to the graph.

View File

@ -358,7 +358,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
final File outputVCFNoQD = executeTest("testQualByDepth calling without QD", specNoQD).getFirst().get(0);
final String baseAnn = String.format("-T VariantAnnotator -R %s -V %s", REF, outputVCFNoQD.getAbsolutePath()) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -A QualByDepth";
final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("388200d107fb47326df78a971a52698f"));
final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("4ccdbebcfd02be87ae5b4ad94666f011"));
specAnn.disableShadowBCF();
final File outputVCFAnn = executeTest("testQualByDepth re-annotation of QD", specAnn).getFirst().get(0);

View File

@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleComplex1() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "7278afd47e5851c954359441cac2f0b8");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "65c316f1f3987d7bc94e887999920d45");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -88,7 +88,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"cbdd34c454d69b266e3681ddfc33c7a3");
"724a05b7df716647014f29c0fe86e071");
}
@Test
@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
"61972c7c0d378e756f3b4d99aed9d0cf");
"9689b89bea8282137fade0905b5f2716");
}
}

View File

@ -89,7 +89,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "c208ef58d464465c68b5c26501122ad7");
HCTest(NA12878_BAM, "", "96f299a5cf411900b8eda3845c3ce465");
}
@Test
@ -99,7 +99,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerGraphBasedSingleSample() {
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "049ba1794a1ce2b15566bb1e9431fccf");
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "83fe0694621bc1e0240f6f79eb6d6999");
}
@Test
@ -227,7 +227,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("7c3254ead383e2b9a51b242f6de2a5b2"));
Arrays.asList("2b240d51aa9b0d1f65f4e899a2feb97e"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
}
@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGSGraphBased() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("eda8f91091fe462205d687ec49fc61e7"));
Arrays.asList("6f6213bfc62e9cd9db56a7277ebe04e0"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
}
@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestConservativePcrIndelModelWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
Arrays.asList("4e10d49b8af23d5ef3a28cb702d10a4b"));
Arrays.asList("7592274ecd2b5ef4624dd2ed659536ef"));
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
}
@ -309,13 +309,24 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
}
@Test(priority= -3)
public void testMissingKeyAlternativeHaplotypesBugFix() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list");
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e33053579a21d4cb23ba84107595891f"));
spec.disableShadowBCF();
executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec);
}
@Test
public void testBadLikelihoodsDueToBadHaplotypeSelectionFix() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
hg19RefereneWithChrPrefixInChromosomeNames, privateTestDir + "bad-likelihoods.bam", privateTestDir + "bad-likelihoods.interval_list",
HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("cbda30145523bf05e0413157f1a00b3e"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("0f6384c8e170840ef1490d262ac2e06e"));
spec.disableShadowBCF();
executeTest("testBadLikelihoodsDueToBadHaplotypeSelectionFix", spec);
}
}