Merge pull request #729 from broadinstitute/eb_improve_dangling_head_merging_PT74221002

Improve the accuracy of dangling head merging in the HC assembler.
This commit is contained in:
Eric Banks 2014-09-08 10:28:51 -04:00
commit 78a61dc3e8
8 changed files with 92 additions and 73 deletions

View File

@ -364,16 +364,22 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
@Argument(fullName="numPruningSamples", shortName="numPruningSamples", doc="The number of samples that must pass the minPuning factor in order for the path to be kept", required = false)
protected int numPruningSamples = 1;
/**
* This mode is currently experimental and should only be used in the RNA-seq calling pipeline.
*/
@Advanced
@Argument(fullName="recoverDanglingHeads", shortName="recoverDanglingHeads", doc="Should we enable dangling head recovery in the read threading assembler?", required = false)
protected boolean recoverDanglingHeads = false;
@Deprecated
@Argument(fullName="recoverDanglingHeads", shortName="recoverDanglingHeads", doc="This argument is no longer needed as it is now the default behavior", required = false)
protected boolean DEPRECATED_RecoverDanglingHeads = false;
@Hidden
@Argument(fullName="doNotRecoverDanglingTails", shortName="doNotRecoverDanglingTails", doc="Should we disable dangling tail recovery in the read threading assembler?", required = false)
protected boolean doNotRecoverDanglingTails = false;
@Argument(fullName="doNotRecoverDanglingBranches", shortName="doNotRecoverDanglingBranches", doc="Should we disable dangling head and tail recovery in the read threading assembler?", required = false)
protected boolean doNotRecoverDanglingBranches = false;
/**
* When constructing the assembly graph we are often left with "dangling" branches. The assembly engine attempts to rescue these branches
* by merging them back into the main graph. This argument describes the minimum length of a dangling branch needed for the engine to
* try to rescue it. A smaller number here will lead to higher sensitivity to real variation but also to a higher number of false positives.
*/
@Advanced
@Argument(fullName="minDanglingBranchLength", shortName="minDanglingBranchLength", doc="Minimum length of a dangling branch to attempt recovery in the read threading assembler", required = false)
protected int minDanglingBranchLength = 4;
@Advanced
@Argument(fullName="consensus", shortName="consensus", doc="In 1000G consensus mode. Inject all provided alleles to the assembly graph but don't forcibly genotype all of them.", required = false)
@ -678,7 +684,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
}
}
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
final UnifiedArgumentCollection simpleUAC = SCAC.cloneTo(UnifiedArgumentCollection.class);
simpleUAC.outputMode = OutputMode.EMIT_VARIANTS_ONLY;
@ -750,8 +756,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
assemblyEngine.setDebug(SCAC.DEBUG);
assemblyEngine.setDebugGraphTransformations(debugGraphTransformations);
assemblyEngine.setAllowCyclesInKmerGraphToGeneratePaths(allowCyclesInKmerGraphToGeneratePaths);
assemblyEngine.setRecoverDanglingTails(!doNotRecoverDanglingTails);
assemblyEngine.setRecoverDanglingHeads(recoverDanglingHeads);
assemblyEngine.setRecoverDanglingBranches(!doNotRecoverDanglingBranches);
assemblyEngine.setMinDanglingBranchLength(minDanglingBranchLength);
assemblyEngine.setMinBaseQualityToUseInAssembly(MIN_BASE_QUALTY_SCORE);
MIN_TAIL_QUALITY = (byte)(MIN_BASE_QUALTY_SCORE - 1);

View File

@ -86,8 +86,8 @@ public abstract class LocalAssemblyEngine {
protected boolean debug = false;
protected boolean allowCyclesInKmerGraphToGeneratePaths = false;
protected boolean debugGraphTransformations = false;
protected boolean recoverDanglingTails = true;
protected boolean recoverDanglingHeads = true;
protected boolean recoverDanglingBranches = true;
protected int minDanglingBranchLength = 0;
protected byte minBaseQualityToUseInAssembly = DEFAULT_MIN_BASE_QUALITY_TO_USE;
protected int pruneFactor = 2;
@ -439,19 +439,11 @@ public abstract class LocalAssemblyEngine {
this.debugGraphTransformations = debugGraphTransformations;
}
public boolean isRecoverDanglingTails() {
return recoverDanglingTails;
public boolean isRecoverDanglingBranches() { return recoverDanglingBranches; }
public void setRecoverDanglingBranches(final boolean recoverDanglingBranches) {
this.recoverDanglingBranches = recoverDanglingBranches;
}
public void setRecoverDanglingTails(boolean recoverDanglingTails) {
this.recoverDanglingTails = recoverDanglingTails;
}
public boolean isRecoverDanglingHeads() {
return recoverDanglingHeads;
}
public void setRecoverDanglingHeads(boolean recoverDanglingHeads) {
this.recoverDanglingHeads = recoverDanglingHeads;
}
public void setMinDanglingBranchLength(final int minDanglingBranchLength) { this.minDanglingBranchLength = minDanglingBranchLength; }
}

View File

@ -60,8 +60,7 @@ import java.util.*;
public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSampleEdge> {
private static final int MAX_CIGAR_COMPLEXITY = 3;
private static final int MIN_DANGLING_TAIL_LENGTH = 5; // SNP + 3 stabilizing nodes + the LCA
private static final int MAXIMUM_MISMATCHES_IN_DANGLING_HEAD_MERGE = 1;
private int maxMismatchesInDanglingHead = -1;
protected boolean alreadyBuilt;
@ -73,6 +72,10 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
super(kmerSize, edgeFactory);
}
protected void setMaxMismatchesInDanglingHead(final int maxMismatchesInDanglingHead) {
this.maxMismatchesInDanglingHead = maxMismatchesInDanglingHead;
}
/**
* Edge factory that encapsulates the numPruningSamples assembly parameter
*/
@ -119,8 +122,9 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
* Try to recover dangling tails
*
* @param pruneFactor the prune factor to use in ignoring chain pieces
* @param minDanglingBranchLength the minimum length of a dangling branch for us to try to merge it
*/
public void recoverDanglingTails(final int pruneFactor) {
public void recoverDanglingTails(final int pruneFactor, final int minDanglingBranchLength) {
if ( ! alreadyBuilt ) throw new IllegalStateException("recoverDanglingTails requires the graph be already built");
int attempted = 0;
@ -128,7 +132,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
for ( final MultiDeBruijnVertex v : vertexSet() ) {
if ( outDegreeOf(v) == 0 && ! isRefSink(v) ) {
attempted++;
nRecovered += recoverDanglingTail(v, pruneFactor);
nRecovered += recoverDanglingTail(v, pruneFactor, minDanglingBranchLength);
}
}
@ -139,8 +143,9 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
* Try to recover dangling heads
*
* @param pruneFactor the prune factor to use in ignoring chain pieces
* @param minDanglingBranchLength the minimum length of a dangling branch for us to try to merge it
*/
public void recoverDanglingHeads(final int pruneFactor) {
public void recoverDanglingHeads(final int pruneFactor, final int minDanglingBranchLength) {
if ( ! alreadyBuilt ) throw new IllegalStateException("recoverDanglingHeads requires the graph be already built");
// we need to build a list of dangling heads because that process can modify the graph (and otherwise generate
@ -157,7 +162,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
// now we can try to recover the dangling heads
for ( final MultiDeBruijnVertex v : danglingHeads ) {
attempted++;
nRecovered += recoverDanglingHead(v, pruneFactor);
nRecovered += recoverDanglingHead(v, pruneFactor, minDanglingBranchLength);
}
logger.debug("Recovered " + nRecovered + " of " + attempted + " dangling heads");
@ -168,13 +173,14 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
*
* @param vertex the vertex to recover
* @param pruneFactor the prune factor to use in ignoring chain pieces
* @param minDanglingBranchLength the minimum length of a dangling branch for us to try to merge it
* @return 1 if we successfully recovered the vertex and 0 otherwise
*/
protected int recoverDanglingTail(final MultiDeBruijnVertex vertex, final int pruneFactor) {
protected int recoverDanglingTail(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) {
if ( outDegreeOf(vertex) != 0 ) throw new IllegalStateException("Attempting to recover a dangling tail for " + vertex + " but it has out-degree > 0");
// generate the CIGAR string from Smith-Waterman between the dangling tail and reference paths
final DanglingChainMergeHelper danglingTailMergeResult = generateCigarAgainstDownwardsReferencePath(vertex, pruneFactor);
final DanglingChainMergeHelper danglingTailMergeResult = generateCigarAgainstDownwardsReferencePath(vertex, pruneFactor, minDanglingBranchLength);
// if the CIGAR is too complex (or couldn't be computed) then we do not allow the merge into the reference path
if ( danglingTailMergeResult == null || ! cigarIsOkayToMerge(danglingTailMergeResult.cigar, false, true) )
@ -189,13 +195,14 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
*
* @param vertex the vertex to recover
* @param pruneFactor the prune factor to use in ignoring chain pieces
* @param minDanglingBranchLength the minimum length of a dangling branch for us to try to merge it
* @return 1 if we successfully recovered a vertex and 0 otherwise
*/
protected int recoverDanglingHead(final MultiDeBruijnVertex vertex, final int pruneFactor) {
protected int recoverDanglingHead(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) {
if ( inDegreeOf(vertex) != 0 ) throw new IllegalStateException("Attempting to recover a dangling head for " + vertex + " but it has in-degree > 0");
// generate the CIGAR string from Smith-Waterman between the dangling tail and reference paths
final DanglingChainMergeHelper danglingHeadMergeResult = generateCigarAgainstUpwardsReferencePath(vertex, pruneFactor);
final DanglingChainMergeHelper danglingHeadMergeResult = generateCigarAgainstUpwardsReferencePath(vertex, pruneFactor, minDanglingBranchLength);
// if the CIGAR is too complex (or couldn't be computed) then we do not allow the merge into the reference path
if ( danglingHeadMergeResult == null || ! cigarIsOkayToMerge(danglingHeadMergeResult.cigar, true, false) )
@ -230,7 +237,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
if ( requireLastElementM && elements.get(numElements - 1).getOperator() != CigarOperator.M )
return false;
// TODO -- do we want to check whether the Ms mismatch too much also?
// note that there are checks for too many mismatches in the dangling branch later in the process
return true;
}
@ -313,11 +320,12 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
* @param pruneFactor the prune factor to use in ignoring chain pieces
* @return a SmithWaterman object which can be null if no proper alignment could be generated
*/
protected DanglingChainMergeHelper generateCigarAgainstDownwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor) {
protected DanglingChainMergeHelper generateCigarAgainstDownwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) {
final int minTailPathLength = Math.max(1, minDanglingBranchLength); // while heads can be 0, tails absolutely cannot
// find the lowest common ancestor path between this vertex and the diverging master path if available
final List<MultiDeBruijnVertex> altPath = findPathUpwardsToLowestCommonAncestor(vertex, pruneFactor);
if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < MIN_DANGLING_TAIL_LENGTH )
if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < minTailPathLength + 1 ) // add 1 to include the LCA
return null;
// now get the reference path from the LCA
@ -340,11 +348,11 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
* @param pruneFactor the prune factor to use in ignoring chain pieces
* @return a SmithWaterman object which can be null if no proper alignment could be generated
*/
protected DanglingChainMergeHelper generateCigarAgainstUpwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor) {
protected DanglingChainMergeHelper generateCigarAgainstUpwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) {
// find the highest common descendant path between vertex and the reference source if available
final List<MultiDeBruijnVertex> altPath = findPathDownwardsToHighestCommonDescendantOfReference(vertex, pruneFactor);
if ( altPath == null || isRefSink(altPath.get(0)) )
if ( altPath == null || isRefSink(altPath.get(0)) || altPath.size() < minDanglingBranchLength + 1 ) // add 1 to include the LCA
return null;
// now get the reference path from the LCA
@ -477,14 +485,15 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
* @param maxIndex the maximum index to traverse (not inclusive)
* @return the index of the ideal prefix match or -1 if it cannot find one, must be less than maxIndex
*/
protected static int bestPrefixMatch(final byte[] path1, final byte[] path2, final int maxIndex) {
protected int bestPrefixMatch(final byte[] path1, final byte[] path2, final int maxIndex) {
final int maxMismatches = getMaxMismatches(maxIndex);
int mismatches = 0;
int index = 0;
int lastGoodIndex = -1;
while ( index < maxIndex ) {
if ( path1[index] != path2[index] ) {
if ( ++mismatches > MAXIMUM_MISMATCHES_IN_DANGLING_HEAD_MERGE )
return lastGoodIndex;
if ( ++mismatches > maxMismatches )
return -1;
lastGoodIndex = index;
}
index++;
@ -493,6 +502,17 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
return lastGoodIndex;
}
/**
* Determine the maximum number of mismatches permitted on the branch.
* Unless it's preset (e.g. by unit tests) it should be the length of the branch divided by the kmer size.
*
* @param lengthOfDanglingBranch the length of the branch itself
* @return positive integer
*/
private int getMaxMismatches(final int lengthOfDanglingBranch) {
return maxMismatchesInDanglingHead > 0 ? maxMismatchesInDanglingHead : Math.max(1, (lengthOfDanglingBranch / kmerSize));
}
protected boolean extendDanglingPathAgainstReference(final DanglingChainMergeHelper danglingHeadMergeResult, final int numNodesToExtend) {
final int indexOfLastDanglingNode = danglingHeadMergeResult.danglingPath.size() - 1;

View File

@ -69,7 +69,6 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
/** The min and max kmer sizes to try when building the graph. */
private final List<Integer> kmerSizes;
private final int maxAllowedPathsForReadThreadingAssembler;
private final boolean dontIncreaseKmerSizesForCycles;
private final boolean allowNonUniqueKmersInRef;
@ -85,7 +84,6 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List<Integer> kmerSizes, final boolean dontIncreaseKmerSizesForCycles, final boolean allowNonUniqueKmersInRef, final int numPruningSamples) {
super(maxAllowedPathsForReadThreadingAssembler);
this.kmerSizes = kmerSizes;
this.maxAllowedPathsForReadThreadingAssembler = maxAllowedPathsForReadThreadingAssembler;
this.dontIncreaseKmerSizesForCycles = dontIncreaseKmerSizesForCycles;
this.allowNonUniqueKmersInRef = allowNonUniqueKmersInRef;
this.numPruningSamples = numPruningSamples;
@ -159,7 +157,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly, numPruningSamples);
rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingHeads);
rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingBranches);
// add the reference sequence to the graph
rtgraph.addSequence("ref", refHaplotype.getBases(), true);
@ -199,8 +197,10 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
// look at all chains in the graph that terminate in a non-ref node (dangling sources and sinks) and see if
// we can recover them by merging some N bases from the chain back into the reference
if ( recoverDanglingTails ) rtgraph.recoverDanglingTails(pruneFactor);
if ( recoverDanglingHeads ) rtgraph.recoverDanglingHeads(pruneFactor);
if ( recoverDanglingBranches ) {
rtgraph.recoverDanglingTails(pruneFactor, minDanglingBranchLength);
rtgraph.recoverDanglingHeads(pruneFactor, minDanglingBranchLength);
}
// remove all heading and trailing paths
if ( removePathsNotConnectedToRef ) rtgraph.removePathsNotConnectedToRef();

View File

@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleComplex1() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "50f00994d0e9ae043c163425a2073675");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "29f216779f0db9a08f4907ea82f0c7fb");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -88,7 +88,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"e497d592078f999d37af4badef0f3c32");
"9de64c4405e0dab99c70c2fae54d4841");
}
@Test
@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
"c44bd3a3bf241fc106496510450746fd");
"272e096b7dc2839d11343f35e5d5442d");
}
}

View File

@ -85,28 +85,28 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "fd31a5da51e46b021db055177841c8c2");
HCTest(CEUTRIO_BAM, "", "7bc718c6a604405e6d33c3073630cc66");
}
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "4f80b7c7e448d719e308b7d73751bebc");
HCTest(NA12878_BAM, "", "07125724eb465a739df9c6ab191216b0");
}
@Test
public void testHaplotypeCallerMultiSampleHaploid() {
HCTest(CEUTRIO_BAM,
"-ploidy 1", "466d423a3bacd0032549bda987fb1574");
"-ploidy 1", "c93650f842aa4dfa4ef2b5f1b908a576");
}
@Test
public void testHaplotypeCallerSingleSampleHaploid() {
HCTest(NA12878_BAM, "-ploidy 1", "f588569b6d14246b7e6678ac80286012");
HCTest(NA12878_BAM, "-ploidy 1", "e5c8a8bfb4d9d522e610a2299a9b32ad");
}
@Test
public void testHaplotypeCallerSingleSampleTetraploid() {
HCTest(NA12878_BAM, "-ploidy 4", "95d4cca48fcbb82d5e41cfe60d68531e");
HCTest(NA12878_BAM, "-ploidy 4", "af711f92b33d3f42e87a719745d93a68");
}
@Test
@ -126,41 +126,41 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerGraphBasedSingleSample() {
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "4577a7c82300950eff0fae10aa4cc4ae");
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "79b0f8fa5e42ef23f3d166d84d92fa23");
}
@Test
public void testHaplotypeCallerGraphBasedMultiSampleHaploid() {
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "2ce7ef54258261c9e869fc69db3312e3");
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "0962d7723ef1e5e20fb42e869f12e1da");
}
@Test
public void testHaplotypeCallerGraphBasedMultiSample() {
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "958511863caca47833b1df03a2f1e130");
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "10119fbb494e966a6f1b54307cfbeb8b");
}
@Test
public void testHaplotypeCallerSingleSampleWithDbsnp() {
HCTest(NA12878_BAM, "-D " + b37dbSNP132, "8294e5a78f2e1091369ae5d9735533cd");
HCTest(NA12878_BAM, "-D " + b37dbSNP132, "fded195a0436242673718e6dc083c172");
}
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf" +
" -isr INTERSECTION -L " + GGA_INTERVALS_FILE,
"368f42869cc651c8fd94ec2a572bb545");
"5bc8892b68e281a3ceb4f2d141f8c723");
}
@Test
public void testHaplotypeCallerMultiSampleGGAHaploid() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 1 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"52494fae89dd37abd0029013557dfa9d");
"d1e872b1d9c484e94f826255e5dde548");
}
@Test
public void testHaplotypeCallerMultiSampleGGATetraploid() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 4 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"6a70e9d87459f274bc9b20f9b97ea274");
"874f5fc9d7eed8894f72b8d2587de4b6");
}
@Test
@ -176,7 +176,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "77f56e9211b13690ad4c30f40469381a");
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "f2242d1696ef542196d363cf56159851");
}
private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
@ -223,7 +223,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestProblematicReadsModifiedInActiveRegions() {
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("2be5ac87d698d7f92525a9721e66ed94"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("cf6fbb3636c52cd47dd14e0bd415a320"));
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
}
@ -272,7 +272,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("defb00b7f19a4626a1ace1bbf4fdf81d"));
Arrays.asList("86fb942473b3f8df2f8865209e551200"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
}
@ -289,7 +289,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGSGraphBased() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("6d221ed702f7f241e1af6f9016754a50"));
Arrays.asList("b6dab8a6223afeb9d0fa7c178c84c024"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
}
@ -359,7 +359,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testMissingKeyAlternativeHaplotypesBugFix() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list");
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ca3d7b611b57835724fabd78f202bede"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("423b70c151a5d0028e104aa3372b8783"));
spec.disableShadowBCF();
executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec);
}

View File

@ -119,7 +119,7 @@ public class DanglingChainMergingGraphUnitTest extends BaseTest {
Assert.assertTrue(altSink != null, "We did not find a non-reference sink");
// confirm that the SW alignment agrees with our expectations
final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstDownwardsReferencePath(altSink, 0);
final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstDownwardsReferencePath(altSink, 0, 4);
if ( result == null ) {
Assert.assertFalse(cigarIsGood);
@ -209,6 +209,7 @@ public class DanglingChainMergingGraphUnitTest extends BaseTest {
rtgraph.addSequence("ref", ref.getBytes(), true);
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(alt.getBytes(), Utils.dupBytes((byte) 30, alt.length()), alt.length() + "M");
rtgraph.addRead(read);
rtgraph.setMaxMismatchesInDanglingHead(10);
rtgraph.buildGraphIfNecessary();
// confirm that we have just a single dangling head
@ -223,7 +224,7 @@ public class DanglingChainMergingGraphUnitTest extends BaseTest {
Assert.assertTrue(altSource != null, "We did not find a non-reference source");
// confirm that the SW alignment agrees with our expectations
final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstUpwardsReferencePath(altSource, 0);
final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstUpwardsReferencePath(altSource, 0, 1);
if ( result == null ) {
Assert.assertFalse(shouldBeMerged);

View File

@ -85,7 +85,7 @@ public class ReadThreadingAssemblerUnitTest extends BaseTest {
public SeqGraph assemble() {
assembler.removePathsNotConnectedToRef = false; // needed to pass some of the tests
assembler.setRecoverDanglingTails(false); // needed to pass some of the tests
assembler.setRecoverDanglingBranches(false); // needed to pass some of the tests
assembler.setDebugGraphTransformations(true);
final SeqGraph graph = assembler.assemble(reads, refHaplotype, Collections.<Haplotype>emptyList()).get(0).getGraph();
if ( DEBUG ) graph.printGraph(new File("test.dot"), 0);
@ -171,7 +171,7 @@ public class ReadThreadingAssemblerUnitTest extends BaseTest {
Assert.assertNotNull(graph.getReferenceSinkVertex());
final List<KBestHaplotype> paths = new KBestHaplotypeFinder(graph);
Assert.assertEquals(paths.size(), 2);
Assert.assertEquals(paths.size(), 1);
}
@Test(enabled = ! DEBUG)