Improve the accuracy of dangling head merging in the HC assembler.

Dangling head merging (like with tails) in now enabled by default.
The --recoverDanglingHeads argument is now deprecated so that users know not to use it anymore.
We now also allow the user to set the minimum branch length for merging.  This will be different
for exomes and RNA (see below).

The other changes in the code itself:
1. We no longer allow an arbitrarily large number of mismatches in the dangling head for merging
2. The max number of mismatches allowed in a dangling head is proportional to the kmer size

There will be a difference in the RNA calling pipeline.  Instead of invoking '--recoverDanglingHeads'
the user will instead want to use '--minDanglingBranchLength 0'.

Below are the knowledgebase results of the master branch vs. this one.

For NA12878 DNA Exome:

master  SNPS         TRUE_POSITIVE                                36722
master  SNPS         CALLED_NOT_IN_DB_AT_ALL                       2699
master  SNPS         REASONABLE_FILTERS_WOULD_FILTER_FP_SITE        292
master  SNPS         FALSE_POSITIVE_SITE_IS_FP                       70

branch  SNPS         TRUE_POSITIVE                                36867
branch  SNPS         CALLED_NOT_IN_DB_AT_ALL                       2952
branch  SNPS         REASONABLE_FILTERS_WOULD_FILTER_FP_SITE        387
branch  SNPS         FALSE_POSITIVE_SITE_IS_FP                       94

As I discussed with Ryan in person, there are a good number of FPs that are called in the new
code, but they nearly all have bad strand bias and should be easily filtered by VQSR.
Note that there is no change for indels.

For NA12878 RNA from Ami:

master  SNPS         TRUE_POSITIVE                                11055
master  SNPS         CALLED_NOT_IN_DB_AT_ALL                        831
master  SNPS         REASONABLE_FILTERS_WOULD_FILTER_FP_SITE         44
master  SNPS         FALSE_POSITIVE_SITE_IS_FP                       96

branch  SNPS         TRUE_POSITIVE                                11113
branch  SNPS         CALLED_NOT_IN_DB_AT_ALL                        874
branch  SNPS         REASONABLE_FILTERS_WOULD_FILTER_FP_SITE         47
branch  SNPS         FALSE_POSITIVE_SITE_IS_FP                       92

Again, there's basically no change for indels.
This commit is contained in:
Eric Banks 2014-09-02 12:42:13 -04:00
parent 56a2554bb0
commit cc175bad40
8 changed files with 92 additions and 73 deletions

View File

@ -364,16 +364,22 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
@Argument(fullName="numPruningSamples", shortName="numPruningSamples", doc="The number of samples that must pass the minPuning factor in order for the path to be kept", required = false)
protected int numPruningSamples = 1;
/**
* This mode is currently experimental and should only be used in the RNA-seq calling pipeline.
*/
@Advanced
@Argument(fullName="recoverDanglingHeads", shortName="recoverDanglingHeads", doc="Should we enable dangling head recovery in the read threading assembler?", required = false)
protected boolean recoverDanglingHeads = false;
@Deprecated
@Argument(fullName="recoverDanglingHeads", shortName="recoverDanglingHeads", doc="This argument is no longer needed as it is now the default behavior", required = false)
protected boolean DEPRECATED_RecoverDanglingHeads = false;
@Hidden
@Argument(fullName="doNotRecoverDanglingTails", shortName="doNotRecoverDanglingTails", doc="Should we disable dangling tail recovery in the read threading assembler?", required = false)
protected boolean doNotRecoverDanglingTails = false;
@Argument(fullName="doNotRecoverDanglingBranches", shortName="doNotRecoverDanglingBranches", doc="Should we disable dangling head and tail recovery in the read threading assembler?", required = false)
protected boolean doNotRecoverDanglingBranches = false;
/**
* When constructing the assembly graph we are often left with "dangling" branches. The assembly engine attempts to rescue these branches
* by merging them back into the main graph. This argument describes the minimum length of a dangling branch needed for the engine to
* try to rescue it. A smaller number here will lead to higher sensitivity to real variation but also to a higher number of false positives.
*/
@Advanced
@Argument(fullName="minDanglingBranchLength", shortName="minDanglingBranchLength", doc="Minimum length of a dangling branch to attempt recovery in the read threading assembler", required = false)
protected int minDanglingBranchLength = 4;
@Advanced
@Argument(fullName="consensus", shortName="consensus", doc="In 1000G consensus mode. Inject all provided alleles to the assembly graph but don't forcibly genotype all of them.", required = false)
@ -678,7 +684,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
}
}
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
final UnifiedArgumentCollection simpleUAC = SCAC.cloneTo(UnifiedArgumentCollection.class);
simpleUAC.outputMode = OutputMode.EMIT_VARIANTS_ONLY;
@ -750,8 +756,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
assemblyEngine.setDebug(SCAC.DEBUG);
assemblyEngine.setDebugGraphTransformations(debugGraphTransformations);
assemblyEngine.setAllowCyclesInKmerGraphToGeneratePaths(allowCyclesInKmerGraphToGeneratePaths);
assemblyEngine.setRecoverDanglingTails(!doNotRecoverDanglingTails);
assemblyEngine.setRecoverDanglingHeads(recoverDanglingHeads);
assemblyEngine.setRecoverDanglingBranches(!doNotRecoverDanglingBranches);
assemblyEngine.setMinDanglingBranchLength(minDanglingBranchLength);
assemblyEngine.setMinBaseQualityToUseInAssembly(MIN_BASE_QUALTY_SCORE);
MIN_TAIL_QUALITY = (byte)(MIN_BASE_QUALTY_SCORE - 1);

View File

@ -86,8 +86,8 @@ public abstract class LocalAssemblyEngine {
protected boolean debug = false;
protected boolean allowCyclesInKmerGraphToGeneratePaths = false;
protected boolean debugGraphTransformations = false;
protected boolean recoverDanglingTails = true;
protected boolean recoverDanglingHeads = true;
protected boolean recoverDanglingBranches = true;
protected int minDanglingBranchLength = 0;
protected byte minBaseQualityToUseInAssembly = DEFAULT_MIN_BASE_QUALITY_TO_USE;
protected int pruneFactor = 2;
@ -439,19 +439,11 @@ public abstract class LocalAssemblyEngine {
this.debugGraphTransformations = debugGraphTransformations;
}
public boolean isRecoverDanglingTails() {
return recoverDanglingTails;
public boolean isRecoverDanglingBranches() { return recoverDanglingBranches; }
public void setRecoverDanglingBranches(final boolean recoverDanglingBranches) {
this.recoverDanglingBranches = recoverDanglingBranches;
}
public void setRecoverDanglingTails(boolean recoverDanglingTails) {
this.recoverDanglingTails = recoverDanglingTails;
}
public boolean isRecoverDanglingHeads() {
return recoverDanglingHeads;
}
public void setRecoverDanglingHeads(boolean recoverDanglingHeads) {
this.recoverDanglingHeads = recoverDanglingHeads;
}
public void setMinDanglingBranchLength(final int minDanglingBranchLength) { this.minDanglingBranchLength = minDanglingBranchLength; }
}

View File

@ -60,8 +60,7 @@ import java.util.*;
public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSampleEdge> {
private static final int MAX_CIGAR_COMPLEXITY = 3;
private static final int MIN_DANGLING_TAIL_LENGTH = 5; // SNP + 3 stabilizing nodes + the LCA
private static final int MAXIMUM_MISMATCHES_IN_DANGLING_HEAD_MERGE = 1;
private int maxMismatchesInDanglingHead = -1;
protected boolean alreadyBuilt;
@ -73,6 +72,10 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
super(kmerSize, edgeFactory);
}
protected void setMaxMismatchesInDanglingHead(final int maxMismatchesInDanglingHead) {
this.maxMismatchesInDanglingHead = maxMismatchesInDanglingHead;
}
/**
* Edge factory that encapsulates the numPruningSamples assembly parameter
*/
@ -119,8 +122,9 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
* Try to recover dangling tails
*
* @param pruneFactor the prune factor to use in ignoring chain pieces
* @param minDanglingBranchLength the minimum length of a dangling branch for us to try to merge it
*/
public void recoverDanglingTails(final int pruneFactor) {
public void recoverDanglingTails(final int pruneFactor, final int minDanglingBranchLength) {
if ( ! alreadyBuilt ) throw new IllegalStateException("recoverDanglingTails requires the graph be already built");
int attempted = 0;
@ -128,7 +132,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
for ( final MultiDeBruijnVertex v : vertexSet() ) {
if ( outDegreeOf(v) == 0 && ! isRefSink(v) ) {
attempted++;
nRecovered += recoverDanglingTail(v, pruneFactor);
nRecovered += recoverDanglingTail(v, pruneFactor, minDanglingBranchLength);
}
}
@ -139,8 +143,9 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
* Try to recover dangling heads
*
* @param pruneFactor the prune factor to use in ignoring chain pieces
* @param minDanglingBranchLength the minimum length of a dangling branch for us to try to merge it
*/
public void recoverDanglingHeads(final int pruneFactor) {
public void recoverDanglingHeads(final int pruneFactor, final int minDanglingBranchLength) {
if ( ! alreadyBuilt ) throw new IllegalStateException("recoverDanglingHeads requires the graph be already built");
// we need to build a list of dangling heads because that process can modify the graph (and otherwise generate
@ -157,7 +162,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
// now we can try to recover the dangling heads
for ( final MultiDeBruijnVertex v : danglingHeads ) {
attempted++;
nRecovered += recoverDanglingHead(v, pruneFactor);
nRecovered += recoverDanglingHead(v, pruneFactor, minDanglingBranchLength);
}
logger.debug("Recovered " + nRecovered + " of " + attempted + " dangling heads");
@ -168,13 +173,14 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
*
* @param vertex the vertex to recover
* @param pruneFactor the prune factor to use in ignoring chain pieces
* @param minDanglingBranchLength the minimum length of a dangling branch for us to try to merge it
* @return 1 if we successfully recovered the vertex and 0 otherwise
*/
protected int recoverDanglingTail(final MultiDeBruijnVertex vertex, final int pruneFactor) {
protected int recoverDanglingTail(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) {
if ( outDegreeOf(vertex) != 0 ) throw new IllegalStateException("Attempting to recover a dangling tail for " + vertex + " but it has out-degree > 0");
// generate the CIGAR string from Smith-Waterman between the dangling tail and reference paths
final DanglingChainMergeHelper danglingTailMergeResult = generateCigarAgainstDownwardsReferencePath(vertex, pruneFactor);
final DanglingChainMergeHelper danglingTailMergeResult = generateCigarAgainstDownwardsReferencePath(vertex, pruneFactor, minDanglingBranchLength);
// if the CIGAR is too complex (or couldn't be computed) then we do not allow the merge into the reference path
if ( danglingTailMergeResult == null || ! cigarIsOkayToMerge(danglingTailMergeResult.cigar, false, true) )
@ -189,13 +195,14 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
*
* @param vertex the vertex to recover
* @param pruneFactor the prune factor to use in ignoring chain pieces
* @param minDanglingBranchLength the minimum length of a dangling branch for us to try to merge it
* @return 1 if we successfully recovered a vertex and 0 otherwise
*/
protected int recoverDanglingHead(final MultiDeBruijnVertex vertex, final int pruneFactor) {
protected int recoverDanglingHead(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) {
if ( inDegreeOf(vertex) != 0 ) throw new IllegalStateException("Attempting to recover a dangling head for " + vertex + " but it has in-degree > 0");
// generate the CIGAR string from Smith-Waterman between the dangling tail and reference paths
final DanglingChainMergeHelper danglingHeadMergeResult = generateCigarAgainstUpwardsReferencePath(vertex, pruneFactor);
final DanglingChainMergeHelper danglingHeadMergeResult = generateCigarAgainstUpwardsReferencePath(vertex, pruneFactor, minDanglingBranchLength);
// if the CIGAR is too complex (or couldn't be computed) then we do not allow the merge into the reference path
if ( danglingHeadMergeResult == null || ! cigarIsOkayToMerge(danglingHeadMergeResult.cigar, true, false) )
@ -230,7 +237,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
if ( requireLastElementM && elements.get(numElements - 1).getOperator() != CigarOperator.M )
return false;
// TODO -- do we want to check whether the Ms mismatch too much also?
// note that there are checks for too many mismatches in the dangling branch later in the process
return true;
}
@ -313,11 +320,12 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
* @param pruneFactor the prune factor to use in ignoring chain pieces
* @return a SmithWaterman object which can be null if no proper alignment could be generated
*/
protected DanglingChainMergeHelper generateCigarAgainstDownwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor) {
protected DanglingChainMergeHelper generateCigarAgainstDownwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) {
final int minTailPathLength = Math.max(1, minDanglingBranchLength); // while heads can be 0, tails absolutely cannot
// find the lowest common ancestor path between this vertex and the diverging master path if available
final List<MultiDeBruijnVertex> altPath = findPathUpwardsToLowestCommonAncestor(vertex, pruneFactor);
if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < MIN_DANGLING_TAIL_LENGTH )
if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < minTailPathLength + 1 ) // add 1 to include the LCA
return null;
// now get the reference path from the LCA
@ -340,11 +348,11 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
* @param pruneFactor the prune factor to use in ignoring chain pieces
* @return a SmithWaterman object which can be null if no proper alignment could be generated
*/
protected DanglingChainMergeHelper generateCigarAgainstUpwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor) {
protected DanglingChainMergeHelper generateCigarAgainstUpwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) {
// find the highest common descendant path between vertex and the reference source if available
final List<MultiDeBruijnVertex> altPath = findPathDownwardsToHighestCommonDescendantOfReference(vertex, pruneFactor);
if ( altPath == null || isRefSink(altPath.get(0)) )
if ( altPath == null || isRefSink(altPath.get(0)) || altPath.size() < minDanglingBranchLength + 1 ) // add 1 to include the LCA
return null;
// now get the reference path from the LCA
@ -477,14 +485,15 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
* @param maxIndex the maximum index to traverse (not inclusive)
* @return the index of the ideal prefix match or -1 if it cannot find one, must be less than maxIndex
*/
protected static int bestPrefixMatch(final byte[] path1, final byte[] path2, final int maxIndex) {
protected int bestPrefixMatch(final byte[] path1, final byte[] path2, final int maxIndex) {
final int maxMismatches = getMaxMismatches(maxIndex);
int mismatches = 0;
int index = 0;
int lastGoodIndex = -1;
while ( index < maxIndex ) {
if ( path1[index] != path2[index] ) {
if ( ++mismatches > MAXIMUM_MISMATCHES_IN_DANGLING_HEAD_MERGE )
return lastGoodIndex;
if ( ++mismatches > maxMismatches )
return -1;
lastGoodIndex = index;
}
index++;
@ -493,6 +502,17 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
return lastGoodIndex;
}
/**
* Determine the maximum number of mismatches permitted on the branch.
* Unless it's preset (e.g. by unit tests) it should be the length of the branch divided by the kmer size.
*
* @param lengthOfDanglingBranch the length of the branch itself
* @return positive integer
*/
private int getMaxMismatches(final int lengthOfDanglingBranch) {
return maxMismatchesInDanglingHead > 0 ? maxMismatchesInDanglingHead : Math.max(1, (lengthOfDanglingBranch / kmerSize));
}
protected boolean extendDanglingPathAgainstReference(final DanglingChainMergeHelper danglingHeadMergeResult, final int numNodesToExtend) {
final int indexOfLastDanglingNode = danglingHeadMergeResult.danglingPath.size() - 1;

View File

@ -69,7 +69,6 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
/** The min and max kmer sizes to try when building the graph. */
private final List<Integer> kmerSizes;
private final int maxAllowedPathsForReadThreadingAssembler;
private final boolean dontIncreaseKmerSizesForCycles;
private final boolean allowNonUniqueKmersInRef;
@ -85,7 +84,6 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List<Integer> kmerSizes, final boolean dontIncreaseKmerSizesForCycles, final boolean allowNonUniqueKmersInRef, final int numPruningSamples) {
super(maxAllowedPathsForReadThreadingAssembler);
this.kmerSizes = kmerSizes;
this.maxAllowedPathsForReadThreadingAssembler = maxAllowedPathsForReadThreadingAssembler;
this.dontIncreaseKmerSizesForCycles = dontIncreaseKmerSizesForCycles;
this.allowNonUniqueKmersInRef = allowNonUniqueKmersInRef;
this.numPruningSamples = numPruningSamples;
@ -159,7 +157,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly, numPruningSamples);
rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingHeads);
rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingBranches);
// add the reference sequence to the graph
rtgraph.addSequence("ref", refHaplotype.getBases(), true);
@ -199,8 +197,10 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
// look at all chains in the graph that terminate in a non-ref node (dangling sources and sinks) and see if
// we can recover them by merging some N bases from the chain back into the reference
if ( recoverDanglingTails ) rtgraph.recoverDanglingTails(pruneFactor);
if ( recoverDanglingHeads ) rtgraph.recoverDanglingHeads(pruneFactor);
if ( recoverDanglingBranches ) {
rtgraph.recoverDanglingTails(pruneFactor, minDanglingBranchLength);
rtgraph.recoverDanglingHeads(pruneFactor, minDanglingBranchLength);
}
// remove all heading and trailing paths
if ( removePathsNotConnectedToRef ) rtgraph.removePathsNotConnectedToRef();

View File

@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleComplex1() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "50f00994d0e9ae043c163425a2073675");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "29f216779f0db9a08f4907ea82f0c7fb");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -88,7 +88,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"e497d592078f999d37af4badef0f3c32");
"9de64c4405e0dab99c70c2fae54d4841");
}
@Test
@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
"c44bd3a3bf241fc106496510450746fd");
"272e096b7dc2839d11343f35e5d5442d");
}
}

View File

@ -85,28 +85,28 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "fd31a5da51e46b021db055177841c8c2");
HCTest(CEUTRIO_BAM, "", "7bc718c6a604405e6d33c3073630cc66");
}
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "4f80b7c7e448d719e308b7d73751bebc");
HCTest(NA12878_BAM, "", "07125724eb465a739df9c6ab191216b0");
}
@Test
public void testHaplotypeCallerMultiSampleHaploid() {
HCTest(CEUTRIO_BAM,
"-ploidy 1", "466d423a3bacd0032549bda987fb1574");
"-ploidy 1", "c93650f842aa4dfa4ef2b5f1b908a576");
}
@Test
public void testHaplotypeCallerSingleSampleHaploid() {
HCTest(NA12878_BAM, "-ploidy 1", "f588569b6d14246b7e6678ac80286012");
HCTest(NA12878_BAM, "-ploidy 1", "e5c8a8bfb4d9d522e610a2299a9b32ad");
}
@Test
public void testHaplotypeCallerSingleSampleTetraploid() {
HCTest(NA12878_BAM, "-ploidy 4", "95d4cca48fcbb82d5e41cfe60d68531e");
HCTest(NA12878_BAM, "-ploidy 4", "af711f92b33d3f42e87a719745d93a68");
}
@Test
@ -126,41 +126,41 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerGraphBasedSingleSample() {
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "4577a7c82300950eff0fae10aa4cc4ae");
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "79b0f8fa5e42ef23f3d166d84d92fa23");
}
@Test
public void testHaplotypeCallerGraphBasedMultiSampleHaploid() {
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "2ce7ef54258261c9e869fc69db3312e3");
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "0962d7723ef1e5e20fb42e869f12e1da");
}
@Test
public void testHaplotypeCallerGraphBasedMultiSample() {
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "958511863caca47833b1df03a2f1e130");
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "10119fbb494e966a6f1b54307cfbeb8b");
}
@Test
public void testHaplotypeCallerSingleSampleWithDbsnp() {
HCTest(NA12878_BAM, "-D " + b37dbSNP132, "8294e5a78f2e1091369ae5d9735533cd");
HCTest(NA12878_BAM, "-D " + b37dbSNP132, "fded195a0436242673718e6dc083c172");
}
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf" +
" -isr INTERSECTION -L " + GGA_INTERVALS_FILE,
"368f42869cc651c8fd94ec2a572bb545");
"5bc8892b68e281a3ceb4f2d141f8c723");
}
@Test
public void testHaplotypeCallerMultiSampleGGAHaploid() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 1 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"52494fae89dd37abd0029013557dfa9d");
"d1e872b1d9c484e94f826255e5dde548");
}
@Test
public void testHaplotypeCallerMultiSampleGGATetraploid() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 4 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"6a70e9d87459f274bc9b20f9b97ea274");
"874f5fc9d7eed8894f72b8d2587de4b6");
}
@Test
@ -176,7 +176,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "77f56e9211b13690ad4c30f40469381a");
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "f2242d1696ef542196d363cf56159851");
}
private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
@ -223,7 +223,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestProblematicReadsModifiedInActiveRegions() {
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("2be5ac87d698d7f92525a9721e66ed94"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("cf6fbb3636c52cd47dd14e0bd415a320"));
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
}
@ -272,7 +272,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("defb00b7f19a4626a1ace1bbf4fdf81d"));
Arrays.asList("86fb942473b3f8df2f8865209e551200"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
}
@ -289,7 +289,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGSGraphBased() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("6d221ed702f7f241e1af6f9016754a50"));
Arrays.asList("b6dab8a6223afeb9d0fa7c178c84c024"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
}
@ -359,7 +359,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testMissingKeyAlternativeHaplotypesBugFix() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list");
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ca3d7b611b57835724fabd78f202bede"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("423b70c151a5d0028e104aa3372b8783"));
spec.disableShadowBCF();
executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec);
}

View File

@ -119,7 +119,7 @@ public class DanglingChainMergingGraphUnitTest extends BaseTest {
Assert.assertTrue(altSink != null, "We did not find a non-reference sink");
// confirm that the SW alignment agrees with our expectations
final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstDownwardsReferencePath(altSink, 0);
final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstDownwardsReferencePath(altSink, 0, 4);
if ( result == null ) {
Assert.assertFalse(cigarIsGood);
@ -209,6 +209,7 @@ public class DanglingChainMergingGraphUnitTest extends BaseTest {
rtgraph.addSequence("ref", ref.getBytes(), true);
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(alt.getBytes(), Utils.dupBytes((byte) 30, alt.length()), alt.length() + "M");
rtgraph.addRead(read);
rtgraph.setMaxMismatchesInDanglingHead(10);
rtgraph.buildGraphIfNecessary();
// confirm that we have just a single dangling head
@ -223,7 +224,7 @@ public class DanglingChainMergingGraphUnitTest extends BaseTest {
Assert.assertTrue(altSource != null, "We did not find a non-reference source");
// confirm that the SW alignment agrees with our expectations
final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstUpwardsReferencePath(altSource, 0);
final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstUpwardsReferencePath(altSource, 0, 1);
if ( result == null ) {
Assert.assertFalse(shouldBeMerged);

View File

@ -85,7 +85,7 @@ public class ReadThreadingAssemblerUnitTest extends BaseTest {
public SeqGraph assemble() {
assembler.removePathsNotConnectedToRef = false; // needed to pass some of the tests
assembler.setRecoverDanglingTails(false); // needed to pass some of the tests
assembler.setRecoverDanglingBranches(false); // needed to pass some of the tests
assembler.setDebugGraphTransformations(true);
final SeqGraph graph = assembler.assemble(reads, refHaplotype, Collections.<Haplotype>emptyList()).get(0).getGraph();
if ( DEBUG ) graph.printGraph(new File("test.dot"), 0);
@ -171,7 +171,7 @@ public class ReadThreadingAssemblerUnitTest extends BaseTest {
Assert.assertNotNull(graph.getReferenceSinkVertex());
final List<KBestHaplotype> paths = new KBestHaplotypeFinder(graph);
Assert.assertEquals(paths.size(), 2);
Assert.assertEquals(paths.size(), 1);
}
@Test(enabled = ! DEBUG)