Improve the accuracy of dangling head merging in the HC assembler.
Dangling head merging (like with tails) in now enabled by default. The --recoverDanglingHeads argument is now deprecated so that users know not to use it anymore. We now also allow the user to set the minimum branch length for merging. This will be different for exomes and RNA (see below). The other changes in the code itself: 1. We no longer allow an arbitrarily large number of mismatches in the dangling head for merging 2. The max number of mismatches allowed in a dangling head is proportional to the kmer size There will be a difference in the RNA calling pipeline. Instead of invoking '--recoverDanglingHeads' the user will instead want to use '--minDanglingBranchLength 0'. Below are the knowledgebase results of the master branch vs. this one. For NA12878 DNA Exome: master SNPS TRUE_POSITIVE 36722 master SNPS CALLED_NOT_IN_DB_AT_ALL 2699 master SNPS REASONABLE_FILTERS_WOULD_FILTER_FP_SITE 292 master SNPS FALSE_POSITIVE_SITE_IS_FP 70 branch SNPS TRUE_POSITIVE 36867 branch SNPS CALLED_NOT_IN_DB_AT_ALL 2952 branch SNPS REASONABLE_FILTERS_WOULD_FILTER_FP_SITE 387 branch SNPS FALSE_POSITIVE_SITE_IS_FP 94 As I discussed with Ryan in person, there are a good number of FPs that are called in the new code, but they nearly all have bad strand bias and should be easily filtered by VQSR. Note that there is no change for indels. For NA12878 RNA from Ami: master SNPS TRUE_POSITIVE 11055 master SNPS CALLED_NOT_IN_DB_AT_ALL 831 master SNPS REASONABLE_FILTERS_WOULD_FILTER_FP_SITE 44 master SNPS FALSE_POSITIVE_SITE_IS_FP 96 branch SNPS TRUE_POSITIVE 11113 branch SNPS CALLED_NOT_IN_DB_AT_ALL 874 branch SNPS REASONABLE_FILTERS_WOULD_FILTER_FP_SITE 47 branch SNPS FALSE_POSITIVE_SITE_IS_FP 92 Again, there's basically no change for indels.
This commit is contained in:
parent
56a2554bb0
commit
cc175bad40
|
|
@ -364,16 +364,22 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
@Argument(fullName="numPruningSamples", shortName="numPruningSamples", doc="The number of samples that must pass the minPuning factor in order for the path to be kept", required = false)
|
||||
protected int numPruningSamples = 1;
|
||||
|
||||
/**
|
||||
* This mode is currently experimental and should only be used in the RNA-seq calling pipeline.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName="recoverDanglingHeads", shortName="recoverDanglingHeads", doc="Should we enable dangling head recovery in the read threading assembler?", required = false)
|
||||
protected boolean recoverDanglingHeads = false;
|
||||
@Deprecated
|
||||
@Argument(fullName="recoverDanglingHeads", shortName="recoverDanglingHeads", doc="This argument is no longer needed as it is now the default behavior", required = false)
|
||||
protected boolean DEPRECATED_RecoverDanglingHeads = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="doNotRecoverDanglingTails", shortName="doNotRecoverDanglingTails", doc="Should we disable dangling tail recovery in the read threading assembler?", required = false)
|
||||
protected boolean doNotRecoverDanglingTails = false;
|
||||
@Argument(fullName="doNotRecoverDanglingBranches", shortName="doNotRecoverDanglingBranches", doc="Should we disable dangling head and tail recovery in the read threading assembler?", required = false)
|
||||
protected boolean doNotRecoverDanglingBranches = false;
|
||||
|
||||
/**
|
||||
* When constructing the assembly graph we are often left with "dangling" branches. The assembly engine attempts to rescue these branches
|
||||
* by merging them back into the main graph. This argument describes the minimum length of a dangling branch needed for the engine to
|
||||
* try to rescue it. A smaller number here will lead to higher sensitivity to real variation but also to a higher number of false positives.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName="minDanglingBranchLength", shortName="minDanglingBranchLength", doc="Minimum length of a dangling branch to attempt recovery in the read threading assembler", required = false)
|
||||
protected int minDanglingBranchLength = 4;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName="consensus", shortName="consensus", doc="In 1000G consensus mode. Inject all provided alleles to the assembly graph but don't forcibly genotype all of them.", required = false)
|
||||
|
|
@ -678,7 +684,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
|
||||
final UnifiedArgumentCollection simpleUAC = SCAC.cloneTo(UnifiedArgumentCollection.class);
|
||||
simpleUAC.outputMode = OutputMode.EMIT_VARIANTS_ONLY;
|
||||
|
|
@ -750,8 +756,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
assemblyEngine.setDebug(SCAC.DEBUG);
|
||||
assemblyEngine.setDebugGraphTransformations(debugGraphTransformations);
|
||||
assemblyEngine.setAllowCyclesInKmerGraphToGeneratePaths(allowCyclesInKmerGraphToGeneratePaths);
|
||||
assemblyEngine.setRecoverDanglingTails(!doNotRecoverDanglingTails);
|
||||
assemblyEngine.setRecoverDanglingHeads(recoverDanglingHeads);
|
||||
assemblyEngine.setRecoverDanglingBranches(!doNotRecoverDanglingBranches);
|
||||
assemblyEngine.setMinDanglingBranchLength(minDanglingBranchLength);
|
||||
assemblyEngine.setMinBaseQualityToUseInAssembly(MIN_BASE_QUALTY_SCORE);
|
||||
|
||||
MIN_TAIL_QUALITY = (byte)(MIN_BASE_QUALTY_SCORE - 1);
|
||||
|
|
|
|||
|
|
@ -86,8 +86,8 @@ public abstract class LocalAssemblyEngine {
|
|||
protected boolean debug = false;
|
||||
protected boolean allowCyclesInKmerGraphToGeneratePaths = false;
|
||||
protected boolean debugGraphTransformations = false;
|
||||
protected boolean recoverDanglingTails = true;
|
||||
protected boolean recoverDanglingHeads = true;
|
||||
protected boolean recoverDanglingBranches = true;
|
||||
protected int minDanglingBranchLength = 0;
|
||||
|
||||
protected byte minBaseQualityToUseInAssembly = DEFAULT_MIN_BASE_QUALITY_TO_USE;
|
||||
protected int pruneFactor = 2;
|
||||
|
|
@ -439,19 +439,11 @@ public abstract class LocalAssemblyEngine {
|
|||
this.debugGraphTransformations = debugGraphTransformations;
|
||||
}
|
||||
|
||||
public boolean isRecoverDanglingTails() {
|
||||
return recoverDanglingTails;
|
||||
public boolean isRecoverDanglingBranches() { return recoverDanglingBranches; }
|
||||
|
||||
public void setRecoverDanglingBranches(final boolean recoverDanglingBranches) {
|
||||
this.recoverDanglingBranches = recoverDanglingBranches;
|
||||
}
|
||||
|
||||
public void setRecoverDanglingTails(boolean recoverDanglingTails) {
|
||||
this.recoverDanglingTails = recoverDanglingTails;
|
||||
}
|
||||
|
||||
public boolean isRecoverDanglingHeads() {
|
||||
return recoverDanglingHeads;
|
||||
}
|
||||
|
||||
public void setRecoverDanglingHeads(boolean recoverDanglingHeads) {
|
||||
this.recoverDanglingHeads = recoverDanglingHeads;
|
||||
}
|
||||
public void setMinDanglingBranchLength(final int minDanglingBranchLength) { this.minDanglingBranchLength = minDanglingBranchLength; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,8 +60,7 @@ import java.util.*;
|
|||
public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSampleEdge> {
|
||||
|
||||
private static final int MAX_CIGAR_COMPLEXITY = 3;
|
||||
private static final int MIN_DANGLING_TAIL_LENGTH = 5; // SNP + 3 stabilizing nodes + the LCA
|
||||
private static final int MAXIMUM_MISMATCHES_IN_DANGLING_HEAD_MERGE = 1;
|
||||
private int maxMismatchesInDanglingHead = -1;
|
||||
|
||||
protected boolean alreadyBuilt;
|
||||
|
||||
|
|
@ -73,6 +72,10 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
super(kmerSize, edgeFactory);
|
||||
}
|
||||
|
||||
protected void setMaxMismatchesInDanglingHead(final int maxMismatchesInDanglingHead) {
|
||||
this.maxMismatchesInDanglingHead = maxMismatchesInDanglingHead;
|
||||
}
|
||||
|
||||
/**
|
||||
* Edge factory that encapsulates the numPruningSamples assembly parameter
|
||||
*/
|
||||
|
|
@ -119,8 +122,9 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
* Try to recover dangling tails
|
||||
*
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
* @param minDanglingBranchLength the minimum length of a dangling branch for us to try to merge it
|
||||
*/
|
||||
public void recoverDanglingTails(final int pruneFactor) {
|
||||
public void recoverDanglingTails(final int pruneFactor, final int minDanglingBranchLength) {
|
||||
if ( ! alreadyBuilt ) throw new IllegalStateException("recoverDanglingTails requires the graph be already built");
|
||||
|
||||
int attempted = 0;
|
||||
|
|
@ -128,7 +132,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
for ( final MultiDeBruijnVertex v : vertexSet() ) {
|
||||
if ( outDegreeOf(v) == 0 && ! isRefSink(v) ) {
|
||||
attempted++;
|
||||
nRecovered += recoverDanglingTail(v, pruneFactor);
|
||||
nRecovered += recoverDanglingTail(v, pruneFactor, minDanglingBranchLength);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -139,8 +143,9 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
* Try to recover dangling heads
|
||||
*
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
* @param minDanglingBranchLength the minimum length of a dangling branch for us to try to merge it
|
||||
*/
|
||||
public void recoverDanglingHeads(final int pruneFactor) {
|
||||
public void recoverDanglingHeads(final int pruneFactor, final int minDanglingBranchLength) {
|
||||
if ( ! alreadyBuilt ) throw new IllegalStateException("recoverDanglingHeads requires the graph be already built");
|
||||
|
||||
// we need to build a list of dangling heads because that process can modify the graph (and otherwise generate
|
||||
|
|
@ -157,7 +162,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
// now we can try to recover the dangling heads
|
||||
for ( final MultiDeBruijnVertex v : danglingHeads ) {
|
||||
attempted++;
|
||||
nRecovered += recoverDanglingHead(v, pruneFactor);
|
||||
nRecovered += recoverDanglingHead(v, pruneFactor, minDanglingBranchLength);
|
||||
}
|
||||
|
||||
logger.debug("Recovered " + nRecovered + " of " + attempted + " dangling heads");
|
||||
|
|
@ -168,13 +173,14 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
*
|
||||
* @param vertex the vertex to recover
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
* @param minDanglingBranchLength the minimum length of a dangling branch for us to try to merge it
|
||||
* @return 1 if we successfully recovered the vertex and 0 otherwise
|
||||
*/
|
||||
protected int recoverDanglingTail(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||
protected int recoverDanglingTail(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) {
|
||||
if ( outDegreeOf(vertex) != 0 ) throw new IllegalStateException("Attempting to recover a dangling tail for " + vertex + " but it has out-degree > 0");
|
||||
|
||||
// generate the CIGAR string from Smith-Waterman between the dangling tail and reference paths
|
||||
final DanglingChainMergeHelper danglingTailMergeResult = generateCigarAgainstDownwardsReferencePath(vertex, pruneFactor);
|
||||
final DanglingChainMergeHelper danglingTailMergeResult = generateCigarAgainstDownwardsReferencePath(vertex, pruneFactor, minDanglingBranchLength);
|
||||
|
||||
// if the CIGAR is too complex (or couldn't be computed) then we do not allow the merge into the reference path
|
||||
if ( danglingTailMergeResult == null || ! cigarIsOkayToMerge(danglingTailMergeResult.cigar, false, true) )
|
||||
|
|
@ -189,13 +195,14 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
*
|
||||
* @param vertex the vertex to recover
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
* @param minDanglingBranchLength the minimum length of a dangling branch for us to try to merge it
|
||||
* @return 1 if we successfully recovered a vertex and 0 otherwise
|
||||
*/
|
||||
protected int recoverDanglingHead(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||
protected int recoverDanglingHead(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) {
|
||||
if ( inDegreeOf(vertex) != 0 ) throw new IllegalStateException("Attempting to recover a dangling head for " + vertex + " but it has in-degree > 0");
|
||||
|
||||
// generate the CIGAR string from Smith-Waterman between the dangling tail and reference paths
|
||||
final DanglingChainMergeHelper danglingHeadMergeResult = generateCigarAgainstUpwardsReferencePath(vertex, pruneFactor);
|
||||
final DanglingChainMergeHelper danglingHeadMergeResult = generateCigarAgainstUpwardsReferencePath(vertex, pruneFactor, minDanglingBranchLength);
|
||||
|
||||
// if the CIGAR is too complex (or couldn't be computed) then we do not allow the merge into the reference path
|
||||
if ( danglingHeadMergeResult == null || ! cigarIsOkayToMerge(danglingHeadMergeResult.cigar, true, false) )
|
||||
|
|
@ -230,7 +237,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
if ( requireLastElementM && elements.get(numElements - 1).getOperator() != CigarOperator.M )
|
||||
return false;
|
||||
|
||||
// TODO -- do we want to check whether the Ms mismatch too much also?
|
||||
// note that there are checks for too many mismatches in the dangling branch later in the process
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -313,11 +320,12 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
* @return a SmithWaterman object which can be null if no proper alignment could be generated
|
||||
*/
|
||||
protected DanglingChainMergeHelper generateCigarAgainstDownwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||
protected DanglingChainMergeHelper generateCigarAgainstDownwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) {
|
||||
final int minTailPathLength = Math.max(1, minDanglingBranchLength); // while heads can be 0, tails absolutely cannot
|
||||
|
||||
// find the lowest common ancestor path between this vertex and the diverging master path if available
|
||||
final List<MultiDeBruijnVertex> altPath = findPathUpwardsToLowestCommonAncestor(vertex, pruneFactor);
|
||||
if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < MIN_DANGLING_TAIL_LENGTH )
|
||||
if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < minTailPathLength + 1 ) // add 1 to include the LCA
|
||||
return null;
|
||||
|
||||
// now get the reference path from the LCA
|
||||
|
|
@ -340,11 +348,11 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
* @return a SmithWaterman object which can be null if no proper alignment could be generated
|
||||
*/
|
||||
protected DanglingChainMergeHelper generateCigarAgainstUpwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||
protected DanglingChainMergeHelper generateCigarAgainstUpwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) {
|
||||
|
||||
// find the highest common descendant path between vertex and the reference source if available
|
||||
final List<MultiDeBruijnVertex> altPath = findPathDownwardsToHighestCommonDescendantOfReference(vertex, pruneFactor);
|
||||
if ( altPath == null || isRefSink(altPath.get(0)) )
|
||||
if ( altPath == null || isRefSink(altPath.get(0)) || altPath.size() < minDanglingBranchLength + 1 ) // add 1 to include the LCA
|
||||
return null;
|
||||
|
||||
// now get the reference path from the LCA
|
||||
|
|
@ -477,14 +485,15 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
* @param maxIndex the maximum index to traverse (not inclusive)
|
||||
* @return the index of the ideal prefix match or -1 if it cannot find one, must be less than maxIndex
|
||||
*/
|
||||
protected static int bestPrefixMatch(final byte[] path1, final byte[] path2, final int maxIndex) {
|
||||
protected int bestPrefixMatch(final byte[] path1, final byte[] path2, final int maxIndex) {
|
||||
final int maxMismatches = getMaxMismatches(maxIndex);
|
||||
int mismatches = 0;
|
||||
int index = 0;
|
||||
int lastGoodIndex = -1;
|
||||
while ( index < maxIndex ) {
|
||||
if ( path1[index] != path2[index] ) {
|
||||
if ( ++mismatches > MAXIMUM_MISMATCHES_IN_DANGLING_HEAD_MERGE )
|
||||
return lastGoodIndex;
|
||||
if ( ++mismatches > maxMismatches )
|
||||
return -1;
|
||||
lastGoodIndex = index;
|
||||
}
|
||||
index++;
|
||||
|
|
@ -493,6 +502,17 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
return lastGoodIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the maximum number of mismatches permitted on the branch.
|
||||
* Unless it's preset (e.g. by unit tests) it should be the length of the branch divided by the kmer size.
|
||||
*
|
||||
* @param lengthOfDanglingBranch the length of the branch itself
|
||||
* @return positive integer
|
||||
*/
|
||||
private int getMaxMismatches(final int lengthOfDanglingBranch) {
|
||||
return maxMismatchesInDanglingHead > 0 ? maxMismatchesInDanglingHead : Math.max(1, (lengthOfDanglingBranch / kmerSize));
|
||||
}
|
||||
|
||||
protected boolean extendDanglingPathAgainstReference(final DanglingChainMergeHelper danglingHeadMergeResult, final int numNodesToExtend) {
|
||||
|
||||
final int indexOfLastDanglingNode = danglingHeadMergeResult.danglingPath.size() - 1;
|
||||
|
|
|
|||
|
|
@ -69,7 +69,6 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
|
|||
|
||||
/** The min and max kmer sizes to try when building the graph. */
|
||||
private final List<Integer> kmerSizes;
|
||||
private final int maxAllowedPathsForReadThreadingAssembler;
|
||||
|
||||
private final boolean dontIncreaseKmerSizesForCycles;
|
||||
private final boolean allowNonUniqueKmersInRef;
|
||||
|
|
@ -85,7 +84,6 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
|
|||
public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List<Integer> kmerSizes, final boolean dontIncreaseKmerSizesForCycles, final boolean allowNonUniqueKmersInRef, final int numPruningSamples) {
|
||||
super(maxAllowedPathsForReadThreadingAssembler);
|
||||
this.kmerSizes = kmerSizes;
|
||||
this.maxAllowedPathsForReadThreadingAssembler = maxAllowedPathsForReadThreadingAssembler;
|
||||
this.dontIncreaseKmerSizesForCycles = dontIncreaseKmerSizesForCycles;
|
||||
this.allowNonUniqueKmersInRef = allowNonUniqueKmersInRef;
|
||||
this.numPruningSamples = numPruningSamples;
|
||||
|
|
@ -159,7 +157,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
|
|||
|
||||
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly, numPruningSamples);
|
||||
|
||||
rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingHeads);
|
||||
rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingBranches);
|
||||
|
||||
// add the reference sequence to the graph
|
||||
rtgraph.addSequence("ref", refHaplotype.getBases(), true);
|
||||
|
|
@ -199,8 +197,10 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
|
|||
|
||||
// look at all chains in the graph that terminate in a non-ref node (dangling sources and sinks) and see if
|
||||
// we can recover them by merging some N bases from the chain back into the reference
|
||||
if ( recoverDanglingTails ) rtgraph.recoverDanglingTails(pruneFactor);
|
||||
if ( recoverDanglingHeads ) rtgraph.recoverDanglingHeads(pruneFactor);
|
||||
if ( recoverDanglingBranches ) {
|
||||
rtgraph.recoverDanglingTails(pruneFactor, minDanglingBranchLength);
|
||||
rtgraph.recoverDanglingHeads(pruneFactor, minDanglingBranchLength);
|
||||
}
|
||||
|
||||
// remove all heading and trailing paths
|
||||
if ( removePathsNotConnectedToRef ) rtgraph.removePathsNotConnectedToRef();
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex1() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "50f00994d0e9ae043c163425a2073675");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "29f216779f0db9a08f4907ea82f0c7fb");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
@ -88,7 +88,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
|
||||
"e497d592078f999d37af4badef0f3c32");
|
||||
"9de64c4405e0dab99c70c2fae54d4841");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
|
||||
"c44bd3a3bf241fc106496510450746fd");
|
||||
"272e096b7dc2839d11343f35e5d5442d");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -85,28 +85,28 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "fd31a5da51e46b021db055177841c8c2");
|
||||
HCTest(CEUTRIO_BAM, "", "7bc718c6a604405e6d33c3073630cc66");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "4f80b7c7e448d719e308b7d73751bebc");
|
||||
HCTest(NA12878_BAM, "", "07125724eb465a739df9c6ab191216b0");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleHaploid() {
|
||||
HCTest(CEUTRIO_BAM,
|
||||
"-ploidy 1", "466d423a3bacd0032549bda987fb1574");
|
||||
"-ploidy 1", "c93650f842aa4dfa4ef2b5f1b908a576");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleHaploid() {
|
||||
HCTest(NA12878_BAM, "-ploidy 1", "f588569b6d14246b7e6678ac80286012");
|
||||
HCTest(NA12878_BAM, "-ploidy 1", "e5c8a8bfb4d9d522e610a2299a9b32ad");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleTetraploid() {
|
||||
HCTest(NA12878_BAM, "-ploidy 4", "95d4cca48fcbb82d5e41cfe60d68531e");
|
||||
HCTest(NA12878_BAM, "-ploidy 4", "af711f92b33d3f42e87a719745d93a68");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -126,41 +126,41 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerGraphBasedSingleSample() {
|
||||
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "4577a7c82300950eff0fae10aa4cc4ae");
|
||||
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "79b0f8fa5e42ef23f3d166d84d92fa23");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerGraphBasedMultiSampleHaploid() {
|
||||
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "2ce7ef54258261c9e869fc69db3312e3");
|
||||
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "0962d7723ef1e5e20fb42e869f12e1da");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerGraphBasedMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "958511863caca47833b1df03a2f1e130");
|
||||
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "10119fbb494e966a6f1b54307cfbeb8b");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleWithDbsnp() {
|
||||
HCTest(NA12878_BAM, "-D " + b37dbSNP132, "8294e5a78f2e1091369ae5d9735533cd");
|
||||
HCTest(NA12878_BAM, "-D " + b37dbSNP132, "fded195a0436242673718e6dc083c172");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf" +
|
||||
" -isr INTERSECTION -L " + GGA_INTERVALS_FILE,
|
||||
"368f42869cc651c8fd94ec2a572bb545");
|
||||
"5bc8892b68e281a3ceb4f2d141f8c723");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAHaploid() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 1 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"52494fae89dd37abd0029013557dfa9d");
|
||||
"d1e872b1d9c484e94f826255e5dde548");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGATetraploid() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 4 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"6a70e9d87459f274bc9b20f9b97ea274");
|
||||
"874f5fc9d7eed8894f72b8d2587de4b6");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -176,7 +176,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "77f56e9211b13690ad4c30f40469381a");
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "f2242d1696ef542196d363cf56159851");
|
||||
}
|
||||
|
||||
private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
|
||||
|
|
@ -223,7 +223,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("2be5ac87d698d7f92525a9721e66ed94"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("cf6fbb3636c52cd47dd14e0bd415a320"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
|
||||
|
|
@ -272,7 +272,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("defb00b7f19a4626a1ace1bbf4fdf81d"));
|
||||
Arrays.asList("86fb942473b3f8df2f8865209e551200"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -289,7 +289,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGSGraphBased() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("6d221ed702f7f241e1af6f9016754a50"));
|
||||
Arrays.asList("b6dab8a6223afeb9d0fa7c178c84c024"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -359,7 +359,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void testMissingKeyAlternativeHaplotypesBugFix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
|
||||
b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ca3d7b611b57835724fabd78f202bede"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("423b70c151a5d0028e104aa3372b8783"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ public class DanglingChainMergingGraphUnitTest extends BaseTest {
|
|||
Assert.assertTrue(altSink != null, "We did not find a non-reference sink");
|
||||
|
||||
// confirm that the SW alignment agrees with our expectations
|
||||
final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstDownwardsReferencePath(altSink, 0);
|
||||
final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstDownwardsReferencePath(altSink, 0, 4);
|
||||
|
||||
if ( result == null ) {
|
||||
Assert.assertFalse(cigarIsGood);
|
||||
|
|
@ -209,6 +209,7 @@ public class DanglingChainMergingGraphUnitTest extends BaseTest {
|
|||
rtgraph.addSequence("ref", ref.getBytes(), true);
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(alt.getBytes(), Utils.dupBytes((byte) 30, alt.length()), alt.length() + "M");
|
||||
rtgraph.addRead(read);
|
||||
rtgraph.setMaxMismatchesInDanglingHead(10);
|
||||
rtgraph.buildGraphIfNecessary();
|
||||
|
||||
// confirm that we have just a single dangling head
|
||||
|
|
@ -223,7 +224,7 @@ public class DanglingChainMergingGraphUnitTest extends BaseTest {
|
|||
Assert.assertTrue(altSource != null, "We did not find a non-reference source");
|
||||
|
||||
// confirm that the SW alignment agrees with our expectations
|
||||
final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstUpwardsReferencePath(altSource, 0);
|
||||
final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstUpwardsReferencePath(altSource, 0, 1);
|
||||
|
||||
if ( result == null ) {
|
||||
Assert.assertFalse(shouldBeMerged);
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ public class ReadThreadingAssemblerUnitTest extends BaseTest {
|
|||
|
||||
public SeqGraph assemble() {
|
||||
assembler.removePathsNotConnectedToRef = false; // needed to pass some of the tests
|
||||
assembler.setRecoverDanglingTails(false); // needed to pass some of the tests
|
||||
assembler.setRecoverDanglingBranches(false); // needed to pass some of the tests
|
||||
assembler.setDebugGraphTransformations(true);
|
||||
final SeqGraph graph = assembler.assemble(reads, refHaplotype, Collections.<Haplotype>emptyList()).get(0).getGraph();
|
||||
if ( DEBUG ) graph.printGraph(new File("test.dot"), 0);
|
||||
|
|
@ -171,7 +171,7 @@ public class ReadThreadingAssemblerUnitTest extends BaseTest {
|
|||
Assert.assertNotNull(graph.getReferenceSinkVertex());
|
||||
|
||||
final List<KBestHaplotype> paths = new KBestHaplotypeFinder(graph);
|
||||
Assert.assertEquals(paths.size(), 2);
|
||||
Assert.assertEquals(paths.size(), 1);
|
||||
}
|
||||
|
||||
@Test(enabled = ! DEBUG)
|
||||
|
|
|
|||
Loading…
Reference in New Issue