Improved tail merging: now tails can be merged to branches that are not entirely reference.
This is useful for e.g. cases where there are SNPs on insertions. Before tails were forced to be merged (incorrectly) only to a reference node, but now they can be merged to any path in the graph from which they directly branch. Also, I've transferred over Ryan's code to refuse to process kmer sizes such that there are non-unique kmers in the reference sequence with them.
This commit is contained in:
parent
5eee065133
commit
1d97b4a191
|
|
@ -273,6 +273,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
@Argument(fullName="dontIncreaseKmerSizesForCycles", shortName="dontIncreaseKmerSizesForCycles", doc="Should we disable the iterating over kmer sizes when graph cycles are detected?", required = false)
|
||||
protected boolean dontIncreaseKmerSizesForCycles = false;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName="allowNonUniqueKmersInRef", shortName="allowNonUniqueKmersInRef", doc="Should we allow graphs which have non-unique kmers in the reference?", required = false)
|
||||
protected boolean allowNonUniqueKmersInRef = false;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName="numPruningSamples", shortName="numPruningSamples", doc="The number of samples that must pass the minPuning factor in order for the path to be kept", required = false)
|
||||
protected int numPruningSamples = 1;
|
||||
|
|
@ -615,7 +619,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
}
|
||||
|
||||
// create and setup the assembler
|
||||
assemblyEngine = new ReadThreadingAssembler(maxNumHaplotypesInPopulation, kmerSizes, dontIncreaseKmerSizesForCycles, numPruningSamples);
|
||||
assemblyEngine = new ReadThreadingAssembler(maxNumHaplotypesInPopulation, kmerSizes, dontIncreaseKmerSizesForCycles, allowNonUniqueKmersInRef, numPruningSamples);
|
||||
|
||||
assemblyEngine.setErrorCorrectKmers(errorCorrectKmers);
|
||||
assemblyEngine.setPruneFactor(MIN_PRUNE_FACTOR);
|
||||
|
|
|
|||
|
|
@ -260,15 +260,39 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
|
|||
/**
|
||||
* Traverse the graph and get the next reference vertex if it exists
|
||||
* @param v the current vertex, can be null
|
||||
* @return the next reference vertex if it exists
|
||||
* @return the next reference vertex if it exists, otherwise null
|
||||
*/
|
||||
public V getNextReferenceVertex( final V v ) {
|
||||
return getNextReferenceVertex(v, false, Collections.<MultiSampleEdge>emptyList());
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverse the graph and get the next reference vertex if it exists
|
||||
* @param v the current vertex, can be null
|
||||
* @param allowNonRefPaths if true, allow sub-paths that are non-reference if there is only a single outgoing edge
|
||||
* @param blacklistedEdges edges to ignore in the traversal down; useful to exclude the non-reference dangling paths
|
||||
* @return the next vertex (but not necessarily on the reference path if allowNonRefPaths is true) if it exists, otherwise null
|
||||
*/
|
||||
public V getNextReferenceVertex( final V v, final boolean allowNonRefPaths, final Collection<MultiSampleEdge> blacklistedEdges ) {
|
||||
if( v == null ) { return null; }
|
||||
for( final E edgeToTest : outgoingEdgesOf(v) ) {
|
||||
|
||||
// variable must be mutable because outgoingEdgesOf is an immutable collection
|
||||
Set<E> edges = outgoingEdgesOf(v);
|
||||
|
||||
for( final E edgeToTest : edges ) {
|
||||
if( edgeToTest.isRef() ) {
|
||||
return getEdgeTarget(edgeToTest);
|
||||
}
|
||||
}
|
||||
|
||||
// if we got here, then we aren't on a reference path
|
||||
if ( allowNonRefPaths ) {
|
||||
edges = new HashSet<>(edges); // edges was immutable
|
||||
edges.removeAll(blacklistedEdges);
|
||||
if ( edges.size() == 1 )
|
||||
return getEdgeTarget(edges.iterator().next());
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -315,13 +315,13 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
*/
|
||||
protected DanglingChainMergeHelper generateCigarAgainstDownwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||
|
||||
// find the lowest common ancestor path between vertex and the reference sink if available
|
||||
final List<MultiDeBruijnVertex> altPath = findPathUpwardsToLowestCommonAncestorOfReference(vertex, pruneFactor);
|
||||
// find the lowest common ancestor path between this vertex and the diverging master path if available
|
||||
final List<MultiDeBruijnVertex> altPath = findPathUpwardsToLowestCommonAncestor(vertex, pruneFactor);
|
||||
if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < MIN_DANGLING_TAIL_LENGTH )
|
||||
return null;
|
||||
|
||||
// now get the reference path from the LCA
|
||||
final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.downwards);
|
||||
final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.downwards, Arrays.asList(incomingEdgeOf(altPath.get(1))));
|
||||
|
||||
// create the Smith-Waterman strings to use
|
||||
final byte[] refBases = getBasesForPath(refPath, false);
|
||||
|
|
@ -348,7 +348,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
return null;
|
||||
|
||||
// now get the reference path from the LCA
|
||||
final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.upwards);
|
||||
final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.upwards, Collections.<MultiSampleEdge>emptyList());
|
||||
|
||||
// create the Smith-Waterman strings to use
|
||||
final byte[] refBases = getBasesForPath(refPath, true);
|
||||
|
|
@ -360,19 +360,19 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
}
|
||||
|
||||
/**
|
||||
* Finds the path upwards in the graph from this vertex to the reference sequence, including the lowest common ancestor vertex.
|
||||
* Finds the path upwards in the graph from this vertex to the first diverging node, including that (lowest common ancestor) vertex.
|
||||
* Note that nodes are excluded if their pruning weight is less than the pruning factor.
|
||||
*
|
||||
* @param vertex the original vertex
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
* @return the path if it can be determined or null if this vertex either doesn't merge onto the reference path or
|
||||
* @return the path if it can be determined or null if this vertex either doesn't merge onto another path or
|
||||
* has an ancestor with multiple incoming edges before hitting the reference path
|
||||
*/
|
||||
protected List<MultiDeBruijnVertex> findPathUpwardsToLowestCommonAncestorOfReference(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||
protected List<MultiDeBruijnVertex> findPathUpwardsToLowestCommonAncestor(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||
final LinkedList<MultiDeBruijnVertex> path = new LinkedList<>();
|
||||
|
||||
MultiDeBruijnVertex v = vertex;
|
||||
while ( ! isReferenceNode(v) && inDegreeOf(v) == 1 ) {
|
||||
while ( inDegreeOf(v) == 1 && outDegreeOf(v) < 2 ) {
|
||||
final MultiSampleEdge edge = incomingEdgeOf(v);
|
||||
// if it has too low a weight, don't use it (or previous vertexes) for the path
|
||||
if ( edge.getPruningMultiplicity() < pruneFactor )
|
||||
|
|
@ -384,7 +384,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
}
|
||||
path.addFirst(v);
|
||||
|
||||
return isReferenceNode(v) ? path : null;
|
||||
return outDegreeOf(v) > 1 ? path : null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -426,17 +426,19 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
|
|||
*
|
||||
* @param start the reference vertex to start from
|
||||
* @param direction describes which direction to move in the graph (i.e. down to the reference sink or up to the source)
|
||||
* @param blacklistedEdges edges to ignore in the traversal down; useful to exclude the non-reference dangling paths
|
||||
* @return the path (non-null, non-empty)
|
||||
*/
|
||||
protected List<MultiDeBruijnVertex> getReferencePath(final MultiDeBruijnVertex start, final TraversalDirection direction) {
|
||||
if ( ! isReferenceNode(start) ) throw new IllegalArgumentException("Cannot construct the reference path from a vertex that is not on that path");
|
||||
protected List<MultiDeBruijnVertex> getReferencePath(final MultiDeBruijnVertex start,
|
||||
final TraversalDirection direction,
|
||||
final Collection<MultiSampleEdge> blacklistedEdges) {
|
||||
|
||||
final List<MultiDeBruijnVertex> path = new ArrayList<>();
|
||||
|
||||
MultiDeBruijnVertex v = start;
|
||||
while ( v != null ) {
|
||||
path.add(v);
|
||||
v = (direction == TraversalDirection.downwards ? getNextReferenceVertex(v) : getPrevReferenceVertex(v));
|
||||
v = (direction == TraversalDirection.downwards ? getNextReferenceVertex(v, true, blacklistedEdges) : getPrevReferenceVertex(v));
|
||||
}
|
||||
|
||||
return path;
|
||||
|
|
|
|||
|
|
@ -72,6 +72,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
|
|||
private final int maxAllowedPathsForReadThreadingAssembler;
|
||||
|
||||
private final boolean dontIncreaseKmerSizesForCycles;
|
||||
private final boolean allowNonUniqueKmersInRef;
|
||||
private final int numPruningSamples;
|
||||
protected boolean removePathsNotConnectedToRef = true;
|
||||
private boolean justReturnRawGraph = false;
|
||||
|
|
@ -81,16 +82,17 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
|
|||
this(DEFAULT_NUM_PATHS_PER_GRAPH, Arrays.asList(25));
|
||||
}
|
||||
|
||||
public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List<Integer> kmerSizes, final boolean dontIncreaseKmerSizesForCycles, final int numPruningSamples) {
|
||||
public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List<Integer> kmerSizes, final boolean dontIncreaseKmerSizesForCycles, final boolean allowNonUniqueKmersInRef, final int numPruningSamples) {
|
||||
super(maxAllowedPathsForReadThreadingAssembler);
|
||||
this.kmerSizes = kmerSizes;
|
||||
this.maxAllowedPathsForReadThreadingAssembler = maxAllowedPathsForReadThreadingAssembler;
|
||||
this.dontIncreaseKmerSizesForCycles = dontIncreaseKmerSizesForCycles;
|
||||
this.allowNonUniqueKmersInRef = allowNonUniqueKmersInRef;
|
||||
this.numPruningSamples = numPruningSamples;
|
||||
}
|
||||
|
||||
public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List<Integer> kmerSizes) {
|
||||
this(maxAllowedPathsForReadThreadingAssembler, kmerSizes, true, 1);
|
||||
protected ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List<Integer> kmerSizes) {
|
||||
this(maxAllowedPathsForReadThreadingAssembler, kmerSizes, true, true, 1);
|
||||
}
|
||||
|
||||
/** for testing purposes */
|
||||
|
|
@ -109,7 +111,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
|
|||
|
||||
// first, try using the requested kmer sizes
|
||||
for ( final int kmerSize : kmerSizes ) {
|
||||
addResult(results, createGraph(reads, refHaplotype, kmerSize, givenHaplotypes, dontIncreaseKmerSizesForCycles));
|
||||
addResult(results, createGraph(reads, refHaplotype, kmerSize, givenHaplotypes, dontIncreaseKmerSizesForCycles, allowNonUniqueKmersInRef));
|
||||
}
|
||||
|
||||
// if none of those worked, iterate over larger sizes if allowed to do so
|
||||
|
|
@ -118,7 +120,8 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
|
|||
int numIterations = 1;
|
||||
while ( results.isEmpty() && numIterations <= MAX_KMER_ITERATIONS_TO_ATTEMPT ) {
|
||||
// on the last attempt we will allow low complexity graphs
|
||||
addResult(results, createGraph(reads, refHaplotype, kmerSize, givenHaplotypes, numIterations == MAX_KMER_ITERATIONS_TO_ATTEMPT));
|
||||
final boolean lastAttempt = numIterations == MAX_KMER_ITERATIONS_TO_ATTEMPT;
|
||||
addResult(results, createGraph(reads, refHaplotype, kmerSize, givenHaplotypes, lastAttempt, lastAttempt));
|
||||
kmerSize += KMER_SIZE_ITERATION_INCREASE;
|
||||
numIterations++;
|
||||
}
|
||||
|
|
@ -135,18 +138,25 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
|
|||
* @param kmerSize kmer size
|
||||
* @param activeAlleleHaplotypes the GGA haplotypes to inject into the graph
|
||||
* @param allowLowComplexityGraphs if true, do not check for low-complexity graphs
|
||||
* @param allowNonUniqueKmersInRef if true, do not fail if the reference has non-unique kmers
|
||||
* @return sequence graph or null if one could not be created (e.g. because it contains cycles or too many paths or is low complexity)
|
||||
*/
|
||||
protected AssemblyResult createGraph(final List<GATKSAMRecord> reads,
|
||||
final Haplotype refHaplotype,
|
||||
final int kmerSize,
|
||||
final List<Haplotype> activeAlleleHaplotypes,
|
||||
final boolean allowLowComplexityGraphs) {
|
||||
final boolean allowLowComplexityGraphs,
|
||||
final boolean allowNonUniqueKmersInRef) {
|
||||
if ( refHaplotype.length() < kmerSize ) {
|
||||
// happens in cases where the assembled region is just too small
|
||||
return new AssemblyResult(AssemblyResult.Status.FAILED, null);
|
||||
}
|
||||
|
||||
if ( !allowNonUniqueKmersInRef && !ReadThreadingGraph.determineNonUniqueKmers(new SequenceForKmers("ref", refHaplotype.getBases(), 0, refHaplotype.getBases().length, 1, true), kmerSize).isEmpty() ) {
|
||||
if ( debug ) logger.info("Not using kmer size of " + kmerSize + " in read threading assembler because reference contains non-unique kmers");
|
||||
return null;
|
||||
}
|
||||
|
||||
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly, numPruningSamples);
|
||||
|
||||
rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingHeads);
|
||||
|
|
|
|||
|
|
@ -435,7 +435,7 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
|
|||
* @param kmerSize the size of the kmers
|
||||
* @return a non-null collection of non-unique kmers in sequence
|
||||
*/
|
||||
private Collection<Kmer> determineNonUniqueKmers(final SequenceForKmers seqForKmers, final int kmerSize) {
|
||||
static protected Collection<Kmer> determineNonUniqueKmers(final SequenceForKmers seqForKmers, final int kmerSize) {
|
||||
// count up occurrences of kmers within each read
|
||||
final KMerCounter counter = new KMerCounter(kmerSize);
|
||||
final int stopPosition = seqForKmers.stop - kmerSize;
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||
"b63bf0f2848f28e52fd5c7621bbd27c7");
|
||||
"3c02e454fe6d0e296effd99fbd3cba4c");
|
||||
}
|
||||
|
||||
private void HCTestComplexConsensusMode(String bam, String args, String md5) {
|
||||
|
|
@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
|
||||
"972a726b0ba476f6215bf162439db2cd");
|
||||
"f0560ba2ca20e5202181d0e57b1ee8cf");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -67,9 +67,9 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
|
||||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "662481d96a58c5475dc4752466a8d3b2"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "d9f7fd119eec40b4610cb4aae1cbed75"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "cbf6d876045051a68aca784491cca6cf"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "f321fadf3d6b4608536fba1015e9693a"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "8d5626262b255ce6d4a53394ea8cd30f"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "126c94d06e00c67380051c4924054841"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "d5c07fa3edca496a84fd17cecad06230"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "330685c734e277d70a44637de85ad54d"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "e6ec90da053a612f0c615e221eb34baa"});
|
||||
|
|
@ -137,7 +137,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
public void testWrongGVCFNonVariantRecordOrderBugFix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
||||
b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("5fbc88cf1136212afac1fd0b7e0e8ce8"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9fecd24420579f321157e5605b6cd7f2"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testMissingGVCFIndexingStrategyException", spec);
|
||||
}
|
||||
|
|
@ -149,7 +149,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
public void testNoCallGVCFMissingPLsBugFix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
||||
b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("1b77bc92962fa10b5eec86fe9400c528"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("95bae0b4a8fe9bfcf71762704b22a76e"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testNoCallGVCFMissingPLsBugFix", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "b66f8d08e0b94ddc52bd06dadbb9b299");
|
||||
HCTest(CEUTRIO_BAM, "", "60e2f0c3ce33a05c060035d86bc79543");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -94,17 +94,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMinBaseQuality() {
|
||||
HCTest(NA12878_BAM, "-mbq 15", "f753c9e284eafb1424b7f9d88193fdee");
|
||||
HCTest(NA12878_BAM, "-mbq 15", "d063c0e5af1fd413be0500609ae36d46");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerGraphBasedSingleSample() {
|
||||
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "5a49b5b98247070e8de637a706b02db9");
|
||||
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "07910f50710349eacd2560452fac3e8d");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerGraphBasedMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "f200260127d0f63cc61ce5f2287bd5a0");
|
||||
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "c5ef449a46b80b69dde87aa52041fe50");
|
||||
}
|
||||
|
||||
@Test(enabled = false) // can't annotate the rsID's yet
|
||||
|
|
@ -115,7 +115,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"4b1271aaa70f46ed6c987cdf4610b7f0");
|
||||
"b61e0bdf0e3180cb4f5abd3491b05aa6");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -131,7 +131,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "575850d9a8195000a2ef747f838aaf77");
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "ec90508248b82800eb2596348a7cf85b");
|
||||
}
|
||||
|
||||
private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
|
||||
|
|
@ -199,7 +199,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void HCTestDanglingTailMergingForDeletions() throws IOException {
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, NA12878_BAM) + " --no_cmdline_in_header -o %s -L 20:10130740-10130800";
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, NA12878_BAM) + " --no_cmdline_in_header -o %s -L 20:10130740-10130800 --allowNonUniqueKmersInRef";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
|
||||
final File outputVCF = executeTest("HCTestDanglingTailMergingForDeletions", spec).getFirst().get(0);
|
||||
|
||||
|
|
@ -227,7 +227,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("cf314806fcb0847e2ed99c7013e5d55d"));
|
||||
Arrays.asList("10a7a3d49cb6cb172ae8404996c524df"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGSGraphBased() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("0e68c82163d4fee30b4135d4ebd1a3ba"));
|
||||
Arrays.asList("14384fe06359bf35e11d3802217e1a23"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -276,7 +276,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestAggressivePcrIndelModelWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
||||
Arrays.asList("be7bf9e8f78d6a9ee569b750054991ac"));
|
||||
Arrays.asList("daf2a533d83f1a5fd7d0e5a92d67fd64"));
|
||||
executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestConservativePcrIndelModelWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
||||
Arrays.asList("01b716d421dc33f7a6bc8c387b7e3322"));
|
||||
Arrays.asList("69bccf72c900361775d076d7d131955d"));
|
||||
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -313,7 +313,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void testMissingKeyAlternativeHaplotypesBugFix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
|
||||
b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("cd33c69a34a66b5baca72841fe8a2bc0"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4322083ad7decca26a3257be2f1d0347"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec);
|
||||
}
|
||||
|
|
@ -336,9 +336,10 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
// but please make sure that both outputs get the same variant,
|
||||
// alleles all with DBSNP ids
|
||||
// We test here that change in active region size does not have an effect in placement of indels.
|
||||
final WalkerTestSpec shortSpec = new WalkerTestSpec(commandLineShortInterval + " -o %s",Arrays.asList("0f87fd95350fade475fe3c5df8dd9e08"));
|
||||
final String md5 = "54c2cea6e2266fe3ff05c1b5d58bb609";
|
||||
final WalkerTestSpec shortSpec = new WalkerTestSpec(commandLineShortInterval + " -o %s",Arrays.asList(md5));
|
||||
executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::shortInterval",shortSpec);
|
||||
final WalkerTestSpec longSpec = new WalkerTestSpec(commandLineLongInterval + " -o %s",Arrays.asList("b3a2853d51a7817999a3be8e72d9ac23"));
|
||||
final WalkerTestSpec longSpec = new WalkerTestSpec(commandLineLongInterval + " -o %s",Arrays.asList(md5));
|
||||
executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::longInterval",longSpec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest {
|
|||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
for ( final int nct : Arrays.asList(1, 2, 4) ) {
|
||||
tests.add(new Object[]{nct, "a2718251ffae9db885b7f74b33dd5b57"});
|
||||
tests.add(new Object[]{nct, "af29c396835b86d88f366629884e6ba4"});
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
|
|
|
|||
Loading…
Reference in New Issue