Fix loss of key alternative haplotypes due to a change on threading start policy required when recovering dangling heads.
Story: - https://www.pivotaltracker.com/story/show/67601310 Change: - Unless recover-danging-heads is active, the threading starting location policy is the original one. i.e. just at already existing unique kmer vertices. Tests: - HaplotypeCallerIntegrationTest#testMissingKeyAlternativeHaplotypesBugFix
This commit is contained in:
parent
5abb7ea2db
commit
258b2bce28
|
|
@ -149,6 +149,8 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
|
|||
|
||||
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly, numPruningSamples);
|
||||
|
||||
rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingHeads);
|
||||
|
||||
// add the reference sequence to the graph
|
||||
rtgraph.addSequence("ref", refHaplotype.getBases(), true);
|
||||
|
||||
|
|
|
|||
|
|
@ -66,6 +66,8 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
|
|||
private final static boolean WRITE_GRAPH = false;
|
||||
private final static boolean DEBUG_NON_UNIQUE_CALC = false;
|
||||
|
||||
private boolean startThreadingOnlyAtExistingVertex = false;
|
||||
|
||||
/** for debugging info printing */
|
||||
private static int counter = 0;
|
||||
|
||||
|
|
@ -252,13 +254,48 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
|
|||
|
||||
for ( int i = seqForKmers.start; i < seqForKmers.stop - kmerSize; i++ ) {
|
||||
final Kmer kmer1 = new Kmer(seqForKmers.sequence, i, kmerSize);
|
||||
if ( !nonUniqueKmers.contains(kmer1) )
|
||||
if ( isThreadingStart(kmer1) )
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether a kmer can be the threading start based on the current threading start location policy.
|
||||
*
|
||||
* @see #setThreadingStartOnlyAtExistingVertex(boolean)
|
||||
* @see #getThreadingStartOnlyAtExistingVertex()
|
||||
*
|
||||
* @param kmer the query kmer.
|
||||
* @return {@code true} if we can start thread the sequence at this kmer, {@code false} otherwise.
|
||||
*/
|
||||
protected boolean isThreadingStart(final Kmer kmer) {
|
||||
if (kmer == null)
|
||||
throw new IllegalArgumentException();
|
||||
return startThreadingOnlyAtExistingVertex ? uniqueKmers.containsKey(kmer) : !nonUniqueKmers.contains(kmer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Changes the threading start location policy.
|
||||
*
|
||||
* @param value {@code true} if threading will start only at existing vertices in the graph, {@code false} if
|
||||
* it can start at any unique kmer.
|
||||
*/
|
||||
public void setThreadingStartOnlyAtExistingVertex(final boolean value) {
|
||||
startThreadingOnlyAtExistingVertex = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicates the threading start location policy.
|
||||
*
|
||||
* @return {@code true} if threading will start only at existing vertices in the graph, {@code false} if
|
||||
* it can start at any unique kmer.
|
||||
*/
|
||||
public boolean getThreadingStartOnlyAtExistingVertex() {
|
||||
return startThreadingOnlyAtExistingVertex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the read threaded assembly graph if it hasn't already been constructed from the sequences that have
|
||||
* been added to the graph.
|
||||
|
|
|
|||
|
|
@ -358,7 +358,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
final File outputVCFNoQD = executeTest("testQualByDepth calling without QD", specNoQD).getFirst().get(0);
|
||||
|
||||
final String baseAnn = String.format("-T VariantAnnotator -R %s -V %s", REF, outputVCFNoQD.getAbsolutePath()) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -A QualByDepth";
|
||||
final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("388200d107fb47326df78a971a52698f"));
|
||||
final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("4ccdbebcfd02be87ae5b4ad94666f011"));
|
||||
specAnn.disableShadowBCF();
|
||||
final File outputVCFAnn = executeTest("testQualByDepth re-annotation of QD", specAnn).getFirst().get(0);
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex1() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "7278afd47e5851c954359441cac2f0b8");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "65c316f1f3987d7bc94e887999920d45");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
@ -88,7 +88,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
|
||||
"cbdd34c454d69b266e3681ddfc33c7a3");
|
||||
"724a05b7df716647014f29c0fe86e071");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
|
||||
"61972c7c0d378e756f3b4d99aed9d0cf");
|
||||
"9689b89bea8282137fade0905b5f2716");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "c208ef58d464465c68b5c26501122ad7");
|
||||
HCTest(NA12878_BAM, "", "96f299a5cf411900b8eda3845c3ce465");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -99,7 +99,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerGraphBasedSingleSample() {
|
||||
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "049ba1794a1ce2b15566bb1e9431fccf");
|
||||
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "83fe0694621bc1e0240f6f79eb6d6999");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -227,7 +227,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("7c3254ead383e2b9a51b242f6de2a5b2"));
|
||||
Arrays.asList("2b240d51aa9b0d1f65f4e899a2feb97e"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGSGraphBased() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("eda8f91091fe462205d687ec49fc61e7"));
|
||||
Arrays.asList("6f6213bfc62e9cd9db56a7277ebe04e0"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestConservativePcrIndelModelWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
||||
Arrays.asList("4e10d49b8af23d5ef3a28cb702d10a4b"));
|
||||
Arrays.asList("7592274ecd2b5ef4624dd2ed659536ef"));
|
||||
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -309,13 +309,24 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
|
||||
}
|
||||
|
||||
@Test(priority= -3)
|
||||
public void testMissingKeyAlternativeHaplotypesBugFix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
|
||||
b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e33053579a21d4cb23ba84107595891f"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBadLikelihoodsDueToBadHaplotypeSelectionFix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
|
||||
hg19RefereneWithChrPrefixInChromosomeNames, privateTestDir + "bad-likelihoods.bam", privateTestDir + "bad-likelihoods.interval_list",
|
||||
HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("cbda30145523bf05e0413157f1a00b3e"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("0f6384c8e170840ef1490d262ac2e06e"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testBadLikelihoodsDueToBadHaplotypeSelectionFix", spec);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue