Improved tail merging: now tails can be merged to branches that are not entirely reference.
This is useful for e.g. cases where there are SNPs on insertions. Before tails were forced to be merged (incorrectly) only to a reference node, but now they can be merged to any path in the graph from which they directly branch. Also, I've transferred over Ryan's code to refuse to process kmer sizes such that there are non-unique kmers in the reference sequence with them.
This commit is contained in:
parent
5eee065133
commit
1d97b4a191
|
|
@ -273,6 +273,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
@Argument(fullName="dontIncreaseKmerSizesForCycles", shortName="dontIncreaseKmerSizesForCycles", doc="Should we disable the iterating over kmer sizes when graph cycles are detected?", required = false)
|
@Argument(fullName="dontIncreaseKmerSizesForCycles", shortName="dontIncreaseKmerSizesForCycles", doc="Should we disable the iterating over kmer sizes when graph cycles are detected?", required = false)
|
||||||
protected boolean dontIncreaseKmerSizesForCycles = false;
|
protected boolean dontIncreaseKmerSizesForCycles = false;
|
||||||
|
|
||||||
|
@Advanced
|
||||||
|
@Argument(fullName="allowNonUniqueKmersInRef", shortName="allowNonUniqueKmersInRef", doc="Should we allow graphs which have non-unique kmers in the reference?", required = false)
|
||||||
|
protected boolean allowNonUniqueKmersInRef = false;
|
||||||
|
|
||||||
@Advanced
|
@Advanced
|
||||||
@Argument(fullName="numPruningSamples", shortName="numPruningSamples", doc="The number of samples that must pass the minPuning factor in order for the path to be kept", required = false)
|
@Argument(fullName="numPruningSamples", shortName="numPruningSamples", doc="The number of samples that must pass the minPuning factor in order for the path to be kept", required = false)
|
||||||
protected int numPruningSamples = 1;
|
protected int numPruningSamples = 1;
|
||||||
|
|
@ -615,7 +619,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
}
|
}
|
||||||
|
|
||||||
// create and setup the assembler
|
// create and setup the assembler
|
||||||
assemblyEngine = new ReadThreadingAssembler(maxNumHaplotypesInPopulation, kmerSizes, dontIncreaseKmerSizesForCycles, numPruningSamples);
|
assemblyEngine = new ReadThreadingAssembler(maxNumHaplotypesInPopulation, kmerSizes, dontIncreaseKmerSizesForCycles, allowNonUniqueKmersInRef, numPruningSamples);
|
||||||
|
|
||||||
assemblyEngine.setErrorCorrectKmers(errorCorrectKmers);
|
assemblyEngine.setErrorCorrectKmers(errorCorrectKmers);
|
||||||
assemblyEngine.setPruneFactor(MIN_PRUNE_FACTOR);
|
assemblyEngine.setPruneFactor(MIN_PRUNE_FACTOR);
|
||||||
|
|
|
||||||
|
|
@ -260,15 +260,39 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
|
||||||
/**
|
/**
|
||||||
* Traverse the graph and get the next reference vertex if it exists
|
* Traverse the graph and get the next reference vertex if it exists
|
||||||
* @param v the current vertex, can be null
|
* @param v the current vertex, can be null
|
||||||
* @return the next reference vertex if it exists
|
* @return the next reference vertex if it exists, otherwise null
|
||||||
*/
|
*/
|
||||||
public V getNextReferenceVertex( final V v ) {
|
public V getNextReferenceVertex( final V v ) {
|
||||||
|
return getNextReferenceVertex(v, false, Collections.<MultiSampleEdge>emptyList());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Traverse the graph and get the next reference vertex if it exists
|
||||||
|
* @param v the current vertex, can be null
|
||||||
|
* @param allowNonRefPaths if true, allow sub-paths that are non-reference if there is only a single outgoing edge
|
||||||
|
* @param blacklistedEdges edges to ignore in the traversal down; useful to exclude the non-reference dangling paths
|
||||||
|
* @return the next vertex (but not necessarily on the reference path if allowNonRefPaths is true) if it exists, otherwise null
|
||||||
|
*/
|
||||||
|
public V getNextReferenceVertex( final V v, final boolean allowNonRefPaths, final Collection<MultiSampleEdge> blacklistedEdges ) {
|
||||||
if( v == null ) { return null; }
|
if( v == null ) { return null; }
|
||||||
for( final E edgeToTest : outgoingEdgesOf(v) ) {
|
|
||||||
|
// variable must be mutable because outgoingEdgesOf is an immutable collection
|
||||||
|
Set<E> edges = outgoingEdgesOf(v);
|
||||||
|
|
||||||
|
for( final E edgeToTest : edges ) {
|
||||||
if( edgeToTest.isRef() ) {
|
if( edgeToTest.isRef() ) {
|
||||||
return getEdgeTarget(edgeToTest);
|
return getEdgeTarget(edgeToTest);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if we got here, then we aren't on a reference path
|
||||||
|
if ( allowNonRefPaths ) {
|
||||||
|
edges = new HashSet<>(edges); // edges was immutable
|
||||||
|
edges.removeAll(blacklistedEdges);
|
||||||
|
if ( edges.size() == 1 )
|
||||||
|
return getEdgeTarget(edges.iterator().next());
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -315,13 +315,13 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
||||||
*/
|
*/
|
||||||
protected DanglingChainMergeHelper generateCigarAgainstDownwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
protected DanglingChainMergeHelper generateCigarAgainstDownwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||||
|
|
||||||
// find the lowest common ancestor path between vertex and the reference sink if available
|
// find the lowest common ancestor path between this vertex and the diverging master path if available
|
||||||
final List<MultiDeBruijnVertex> altPath = findPathUpwardsToLowestCommonAncestorOfReference(vertex, pruneFactor);
|
final List<MultiDeBruijnVertex> altPath = findPathUpwardsToLowestCommonAncestor(vertex, pruneFactor);
|
||||||
if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < MIN_DANGLING_TAIL_LENGTH )
|
if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < MIN_DANGLING_TAIL_LENGTH )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// now get the reference path from the LCA
|
// now get the reference path from the LCA
|
||||||
final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.downwards);
|
final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.downwards, Arrays.asList(incomingEdgeOf(altPath.get(1))));
|
||||||
|
|
||||||
// create the Smith-Waterman strings to use
|
// create the Smith-Waterman strings to use
|
||||||
final byte[] refBases = getBasesForPath(refPath, false);
|
final byte[] refBases = getBasesForPath(refPath, false);
|
||||||
|
|
@ -348,7 +348,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// now get the reference path from the LCA
|
// now get the reference path from the LCA
|
||||||
final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.upwards);
|
final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.upwards, Collections.<MultiSampleEdge>emptyList());
|
||||||
|
|
||||||
// create the Smith-Waterman strings to use
|
// create the Smith-Waterman strings to use
|
||||||
final byte[] refBases = getBasesForPath(refPath, true);
|
final byte[] refBases = getBasesForPath(refPath, true);
|
||||||
|
|
@ -360,19 +360,19 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finds the path upwards in the graph from this vertex to the reference sequence, including the lowest common ancestor vertex.
|
* Finds the path upwards in the graph from this vertex to the first diverging node, including that (lowest common ancestor) vertex.
|
||||||
* Note that nodes are excluded if their pruning weight is less than the pruning factor.
|
* Note that nodes are excluded if their pruning weight is less than the pruning factor.
|
||||||
*
|
*
|
||||||
* @param vertex the original vertex
|
* @param vertex the original vertex
|
||||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||||
* @return the path if it can be determined or null if this vertex either doesn't merge onto the reference path or
|
* @return the path if it can be determined or null if this vertex either doesn't merge onto another path or
|
||||||
* has an ancestor with multiple incoming edges before hitting the reference path
|
* has an ancestor with multiple incoming edges before hitting the reference path
|
||||||
*/
|
*/
|
||||||
protected List<MultiDeBruijnVertex> findPathUpwardsToLowestCommonAncestorOfReference(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
protected List<MultiDeBruijnVertex> findPathUpwardsToLowestCommonAncestor(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||||
final LinkedList<MultiDeBruijnVertex> path = new LinkedList<>();
|
final LinkedList<MultiDeBruijnVertex> path = new LinkedList<>();
|
||||||
|
|
||||||
MultiDeBruijnVertex v = vertex;
|
MultiDeBruijnVertex v = vertex;
|
||||||
while ( ! isReferenceNode(v) && inDegreeOf(v) == 1 ) {
|
while ( inDegreeOf(v) == 1 && outDegreeOf(v) < 2 ) {
|
||||||
final MultiSampleEdge edge = incomingEdgeOf(v);
|
final MultiSampleEdge edge = incomingEdgeOf(v);
|
||||||
// if it has too low a weight, don't use it (or previous vertexes) for the path
|
// if it has too low a weight, don't use it (or previous vertexes) for the path
|
||||||
if ( edge.getPruningMultiplicity() < pruneFactor )
|
if ( edge.getPruningMultiplicity() < pruneFactor )
|
||||||
|
|
@ -384,7 +384,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
||||||
}
|
}
|
||||||
path.addFirst(v);
|
path.addFirst(v);
|
||||||
|
|
||||||
return isReferenceNode(v) ? path : null;
|
return outDegreeOf(v) > 1 ? path : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -426,17 +426,19 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
||||||
*
|
*
|
||||||
* @param start the reference vertex to start from
|
* @param start the reference vertex to start from
|
||||||
* @param direction describes which direction to move in the graph (i.e. down to the reference sink or up to the source)
|
* @param direction describes which direction to move in the graph (i.e. down to the reference sink or up to the source)
|
||||||
|
* @param blacklistedEdges edges to ignore in the traversal down; useful to exclude the non-reference dangling paths
|
||||||
* @return the path (non-null, non-empty)
|
* @return the path (non-null, non-empty)
|
||||||
*/
|
*/
|
||||||
protected List<MultiDeBruijnVertex> getReferencePath(final MultiDeBruijnVertex start, final TraversalDirection direction) {
|
protected List<MultiDeBruijnVertex> getReferencePath(final MultiDeBruijnVertex start,
|
||||||
if ( ! isReferenceNode(start) ) throw new IllegalArgumentException("Cannot construct the reference path from a vertex that is not on that path");
|
final TraversalDirection direction,
|
||||||
|
final Collection<MultiSampleEdge> blacklistedEdges) {
|
||||||
|
|
||||||
final List<MultiDeBruijnVertex> path = new ArrayList<>();
|
final List<MultiDeBruijnVertex> path = new ArrayList<>();
|
||||||
|
|
||||||
MultiDeBruijnVertex v = start;
|
MultiDeBruijnVertex v = start;
|
||||||
while ( v != null ) {
|
while ( v != null ) {
|
||||||
path.add(v);
|
path.add(v);
|
||||||
v = (direction == TraversalDirection.downwards ? getNextReferenceVertex(v) : getPrevReferenceVertex(v));
|
v = (direction == TraversalDirection.downwards ? getNextReferenceVertex(v, true, blacklistedEdges) : getPrevReferenceVertex(v));
|
||||||
}
|
}
|
||||||
|
|
||||||
return path;
|
return path;
|
||||||
|
|
|
||||||
|
|
@ -72,6 +72,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
|
||||||
private final int maxAllowedPathsForReadThreadingAssembler;
|
private final int maxAllowedPathsForReadThreadingAssembler;
|
||||||
|
|
||||||
private final boolean dontIncreaseKmerSizesForCycles;
|
private final boolean dontIncreaseKmerSizesForCycles;
|
||||||
|
private final boolean allowNonUniqueKmersInRef;
|
||||||
private final int numPruningSamples;
|
private final int numPruningSamples;
|
||||||
protected boolean removePathsNotConnectedToRef = true;
|
protected boolean removePathsNotConnectedToRef = true;
|
||||||
private boolean justReturnRawGraph = false;
|
private boolean justReturnRawGraph = false;
|
||||||
|
|
@ -81,16 +82,17 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
|
||||||
this(DEFAULT_NUM_PATHS_PER_GRAPH, Arrays.asList(25));
|
this(DEFAULT_NUM_PATHS_PER_GRAPH, Arrays.asList(25));
|
||||||
}
|
}
|
||||||
|
|
||||||
public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List<Integer> kmerSizes, final boolean dontIncreaseKmerSizesForCycles, final int numPruningSamples) {
|
public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List<Integer> kmerSizes, final boolean dontIncreaseKmerSizesForCycles, final boolean allowNonUniqueKmersInRef, final int numPruningSamples) {
|
||||||
super(maxAllowedPathsForReadThreadingAssembler);
|
super(maxAllowedPathsForReadThreadingAssembler);
|
||||||
this.kmerSizes = kmerSizes;
|
this.kmerSizes = kmerSizes;
|
||||||
this.maxAllowedPathsForReadThreadingAssembler = maxAllowedPathsForReadThreadingAssembler;
|
this.maxAllowedPathsForReadThreadingAssembler = maxAllowedPathsForReadThreadingAssembler;
|
||||||
this.dontIncreaseKmerSizesForCycles = dontIncreaseKmerSizesForCycles;
|
this.dontIncreaseKmerSizesForCycles = dontIncreaseKmerSizesForCycles;
|
||||||
|
this.allowNonUniqueKmersInRef = allowNonUniqueKmersInRef;
|
||||||
this.numPruningSamples = numPruningSamples;
|
this.numPruningSamples = numPruningSamples;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List<Integer> kmerSizes) {
|
protected ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List<Integer> kmerSizes) {
|
||||||
this(maxAllowedPathsForReadThreadingAssembler, kmerSizes, true, 1);
|
this(maxAllowedPathsForReadThreadingAssembler, kmerSizes, true, true, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** for testing purposes */
|
/** for testing purposes */
|
||||||
|
|
@ -109,7 +111,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
|
||||||
|
|
||||||
// first, try using the requested kmer sizes
|
// first, try using the requested kmer sizes
|
||||||
for ( final int kmerSize : kmerSizes ) {
|
for ( final int kmerSize : kmerSizes ) {
|
||||||
addResult(results, createGraph(reads, refHaplotype, kmerSize, givenHaplotypes, dontIncreaseKmerSizesForCycles));
|
addResult(results, createGraph(reads, refHaplotype, kmerSize, givenHaplotypes, dontIncreaseKmerSizesForCycles, allowNonUniqueKmersInRef));
|
||||||
}
|
}
|
||||||
|
|
||||||
// if none of those worked, iterate over larger sizes if allowed to do so
|
// if none of those worked, iterate over larger sizes if allowed to do so
|
||||||
|
|
@ -118,7 +120,8 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
|
||||||
int numIterations = 1;
|
int numIterations = 1;
|
||||||
while ( results.isEmpty() && numIterations <= MAX_KMER_ITERATIONS_TO_ATTEMPT ) {
|
while ( results.isEmpty() && numIterations <= MAX_KMER_ITERATIONS_TO_ATTEMPT ) {
|
||||||
// on the last attempt we will allow low complexity graphs
|
// on the last attempt we will allow low complexity graphs
|
||||||
addResult(results, createGraph(reads, refHaplotype, kmerSize, givenHaplotypes, numIterations == MAX_KMER_ITERATIONS_TO_ATTEMPT));
|
final boolean lastAttempt = numIterations == MAX_KMER_ITERATIONS_TO_ATTEMPT;
|
||||||
|
addResult(results, createGraph(reads, refHaplotype, kmerSize, givenHaplotypes, lastAttempt, lastAttempt));
|
||||||
kmerSize += KMER_SIZE_ITERATION_INCREASE;
|
kmerSize += KMER_SIZE_ITERATION_INCREASE;
|
||||||
numIterations++;
|
numIterations++;
|
||||||
}
|
}
|
||||||
|
|
@ -135,18 +138,25 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
|
||||||
* @param kmerSize kmer size
|
* @param kmerSize kmer size
|
||||||
* @param activeAlleleHaplotypes the GGA haplotypes to inject into the graph
|
* @param activeAlleleHaplotypes the GGA haplotypes to inject into the graph
|
||||||
* @param allowLowComplexityGraphs if true, do not check for low-complexity graphs
|
* @param allowLowComplexityGraphs if true, do not check for low-complexity graphs
|
||||||
|
* @param allowNonUniqueKmersInRef if true, do not fail if the reference has non-unique kmers
|
||||||
* @return sequence graph or null if one could not be created (e.g. because it contains cycles or too many paths or is low complexity)
|
* @return sequence graph or null if one could not be created (e.g. because it contains cycles or too many paths or is low complexity)
|
||||||
*/
|
*/
|
||||||
protected AssemblyResult createGraph(final List<GATKSAMRecord> reads,
|
protected AssemblyResult createGraph(final List<GATKSAMRecord> reads,
|
||||||
final Haplotype refHaplotype,
|
final Haplotype refHaplotype,
|
||||||
final int kmerSize,
|
final int kmerSize,
|
||||||
final List<Haplotype> activeAlleleHaplotypes,
|
final List<Haplotype> activeAlleleHaplotypes,
|
||||||
final boolean allowLowComplexityGraphs) {
|
final boolean allowLowComplexityGraphs,
|
||||||
|
final boolean allowNonUniqueKmersInRef) {
|
||||||
if ( refHaplotype.length() < kmerSize ) {
|
if ( refHaplotype.length() < kmerSize ) {
|
||||||
// happens in cases where the assembled region is just too small
|
// happens in cases where the assembled region is just too small
|
||||||
return new AssemblyResult(AssemblyResult.Status.FAILED, null);
|
return new AssemblyResult(AssemblyResult.Status.FAILED, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( !allowNonUniqueKmersInRef && !ReadThreadingGraph.determineNonUniqueKmers(new SequenceForKmers("ref", refHaplotype.getBases(), 0, refHaplotype.getBases().length, 1, true), kmerSize).isEmpty() ) {
|
||||||
|
if ( debug ) logger.info("Not using kmer size of " + kmerSize + " in read threading assembler because reference contains non-unique kmers");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly, numPruningSamples);
|
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly, numPruningSamples);
|
||||||
|
|
||||||
rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingHeads);
|
rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingHeads);
|
||||||
|
|
|
||||||
|
|
@ -435,7 +435,7 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
|
||||||
* @param kmerSize the size of the kmers
|
* @param kmerSize the size of the kmers
|
||||||
* @return a non-null collection of non-unique kmers in sequence
|
* @return a non-null collection of non-unique kmers in sequence
|
||||||
*/
|
*/
|
||||||
private Collection<Kmer> determineNonUniqueKmers(final SequenceForKmers seqForKmers, final int kmerSize) {
|
static protected Collection<Kmer> determineNonUniqueKmers(final SequenceForKmers seqForKmers, final int kmerSize) {
|
||||||
// count up occurrences of kmers within each read
|
// count up occurrences of kmers within each read
|
||||||
final KMerCounter counter = new KMerCounter(kmerSize);
|
final KMerCounter counter = new KMerCounter(kmerSize);
|
||||||
final int stopPosition = seqForKmers.stop - kmerSize;
|
final int stopPosition = seqForKmers.stop - kmerSize;
|
||||||
|
|
|
||||||
|
|
@ -94,7 +94,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||||
"b63bf0f2848f28e52fd5c7621bbd27c7");
|
"3c02e454fe6d0e296effd99fbd3cba4c");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HCTestComplexConsensusMode(String bam, String args, String md5) {
|
private void HCTestComplexConsensusMode(String bam, String args, String md5) {
|
||||||
|
|
@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
|
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
|
||||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
|
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
|
||||||
"972a726b0ba476f6215bf162439db2cd");
|
"f0560ba2ca20e5202181d0e57b1ee8cf");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -67,9 +67,9 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
||||||
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
|
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
|
||||||
|
|
||||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "662481d96a58c5475dc4752466a8d3b2"});
|
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "f321fadf3d6b4608536fba1015e9693a"});
|
||||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "d9f7fd119eec40b4610cb4aae1cbed75"});
|
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "8d5626262b255ce6d4a53394ea8cd30f"});
|
||||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "cbf6d876045051a68aca784491cca6cf"});
|
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "126c94d06e00c67380051c4924054841"});
|
||||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "d5c07fa3edca496a84fd17cecad06230"});
|
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "d5c07fa3edca496a84fd17cecad06230"});
|
||||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "330685c734e277d70a44637de85ad54d"});
|
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "330685c734e277d70a44637de85ad54d"});
|
||||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "e6ec90da053a612f0c615e221eb34baa"});
|
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "e6ec90da053a612f0c615e221eb34baa"});
|
||||||
|
|
@ -137,7 +137,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
||||||
public void testWrongGVCFNonVariantRecordOrderBugFix() {
|
public void testWrongGVCFNonVariantRecordOrderBugFix() {
|
||||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
||||||
b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("5fbc88cf1136212afac1fd0b7e0e8ce8"));
|
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9fecd24420579f321157e5605b6cd7f2"));
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
executeTest("testMissingGVCFIndexingStrategyException", spec);
|
executeTest("testMissingGVCFIndexingStrategyException", spec);
|
||||||
}
|
}
|
||||||
|
|
@ -149,7 +149,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
||||||
public void testNoCallGVCFMissingPLsBugFix() {
|
public void testNoCallGVCFMissingPLsBugFix() {
|
||||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
||||||
b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("1b77bc92962fa10b5eec86fe9400c528"));
|
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("95bae0b4a8fe9bfcf71762704b22a76e"));
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
executeTest("testNoCallGVCFMissingPLsBugFix", spec);
|
executeTest("testNoCallGVCFMissingPLsBugFix", spec);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSample() {
|
public void testHaplotypeCallerMultiSample() {
|
||||||
HCTest(CEUTRIO_BAM, "", "b66f8d08e0b94ddc52bd06dadbb9b299");
|
HCTest(CEUTRIO_BAM, "", "60e2f0c3ce33a05c060035d86bc79543");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -94,17 +94,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMinBaseQuality() {
|
public void testHaplotypeCallerMinBaseQuality() {
|
||||||
HCTest(NA12878_BAM, "-mbq 15", "f753c9e284eafb1424b7f9d88193fdee");
|
HCTest(NA12878_BAM, "-mbq 15", "d063c0e5af1fd413be0500609ae36d46");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerGraphBasedSingleSample() {
|
public void testHaplotypeCallerGraphBasedSingleSample() {
|
||||||
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "5a49b5b98247070e8de637a706b02db9");
|
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "07910f50710349eacd2560452fac3e8d");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerGraphBasedMultiSample() {
|
public void testHaplotypeCallerGraphBasedMultiSample() {
|
||||||
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "f200260127d0f63cc61ce5f2287bd5a0");
|
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "c5ef449a46b80b69dde87aa52041fe50");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false) // can't annotate the rsID's yet
|
@Test(enabled = false) // can't annotate the rsID's yet
|
||||||
|
|
@ -115,7 +115,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleGGA() {
|
public void testHaplotypeCallerMultiSampleGGA() {
|
||||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||||
"4b1271aaa70f46ed6c987cdf4610b7f0");
|
"b61e0bdf0e3180cb4f5abd3491b05aa6");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -131,7 +131,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "575850d9a8195000a2ef747f838aaf77");
|
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "ec90508248b82800eb2596348a7cf85b");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
|
private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
|
||||||
|
|
@ -199,7 +199,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void HCTestDanglingTailMergingForDeletions() throws IOException {
|
public void HCTestDanglingTailMergingForDeletions() throws IOException {
|
||||||
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, NA12878_BAM) + " --no_cmdline_in_header -o %s -L 20:10130740-10130800";
|
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, NA12878_BAM) + " --no_cmdline_in_header -o %s -L 20:10130740-10130800 --allowNonUniqueKmersInRef";
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
|
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
|
||||||
final File outputVCF = executeTest("HCTestDanglingTailMergingForDeletions", spec).getFirst().get(0);
|
final File outputVCF = executeTest("HCTestDanglingTailMergingForDeletions", spec).getFirst().get(0);
|
||||||
|
|
||||||
|
|
@ -227,7 +227,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
public void HCTestDBSNPAnnotationWGS() {
|
public void HCTestDBSNPAnnotationWGS() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||||
Arrays.asList("cf314806fcb0847e2ed99c7013e5d55d"));
|
Arrays.asList("10a7a3d49cb6cb172ae8404996c524df"));
|
||||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
public void HCTestDBSNPAnnotationWGSGraphBased() {
|
public void HCTestDBSNPAnnotationWGSGraphBased() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||||
Arrays.asList("0e68c82163d4fee30b4135d4ebd1a3ba"));
|
Arrays.asList("14384fe06359bf35e11d3802217e1a23"));
|
||||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -276,7 +276,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
public void HCTestAggressivePcrIndelModelWGS() {
|
public void HCTestAggressivePcrIndelModelWGS() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
"-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
||||||
Arrays.asList("be7bf9e8f78d6a9ee569b750054991ac"));
|
Arrays.asList("daf2a533d83f1a5fd7d0e5a92d67fd64"));
|
||||||
executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec);
|
executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
public void HCTestConservativePcrIndelModelWGS() {
|
public void HCTestConservativePcrIndelModelWGS() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
||||||
Arrays.asList("01b716d421dc33f7a6bc8c387b7e3322"));
|
Arrays.asList("69bccf72c900361775d076d7d131955d"));
|
||||||
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
|
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -313,7 +313,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
public void testMissingKeyAlternativeHaplotypesBugFix() {
|
public void testMissingKeyAlternativeHaplotypesBugFix() {
|
||||||
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
|
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
|
||||||
b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list");
|
b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list");
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("cd33c69a34a66b5baca72841fe8a2bc0"));
|
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4322083ad7decca26a3257be2f1d0347"));
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec);
|
executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec);
|
||||||
}
|
}
|
||||||
|
|
@ -336,9 +336,10 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
// but please make sure that both outputs get the same variant,
|
// but please make sure that both outputs get the same variant,
|
||||||
// alleles all with DBSNP ids
|
// alleles all with DBSNP ids
|
||||||
// We test here that change in active region size does not have an effect in placement of indels.
|
// We test here that change in active region size does not have an effect in placement of indels.
|
||||||
final WalkerTestSpec shortSpec = new WalkerTestSpec(commandLineShortInterval + " -o %s",Arrays.asList("0f87fd95350fade475fe3c5df8dd9e08"));
|
final String md5 = "54c2cea6e2266fe3ff05c1b5d58bb609";
|
||||||
|
final WalkerTestSpec shortSpec = new WalkerTestSpec(commandLineShortInterval + " -o %s",Arrays.asList(md5));
|
||||||
executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::shortInterval",shortSpec);
|
executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::shortInterval",shortSpec);
|
||||||
final WalkerTestSpec longSpec = new WalkerTestSpec(commandLineLongInterval + " -o %s",Arrays.asList("b3a2853d51a7817999a3be8e72d9ac23"));
|
final WalkerTestSpec longSpec = new WalkerTestSpec(commandLineLongInterval + " -o %s",Arrays.asList(md5));
|
||||||
executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::longInterval",longSpec);
|
executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::longInterval",longSpec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest {
|
||||||
List<Object[]> tests = new ArrayList<>();
|
List<Object[]> tests = new ArrayList<>();
|
||||||
|
|
||||||
for ( final int nct : Arrays.asList(1, 2, 4) ) {
|
for ( final int nct : Arrays.asList(1, 2, 4) ) {
|
||||||
tests.add(new Object[]{nct, "a2718251ffae9db885b7f74b33dd5b57"});
|
tests.add(new Object[]{nct, "af29c396835b86d88f366629884e6ba4"});
|
||||||
}
|
}
|
||||||
|
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue