diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestPaths.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/AggregatedSubHaplotypeFinder.java
similarity index 59%
rename from protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestPaths.java
rename to protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/AggregatedSubHaplotypeFinder.java
index 3ba85dd92..8fba6c9d5 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestPaths.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/AggregatedSubHaplotypeFinder.java
@@ -1,185 +1,194 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
-*
+*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
-*
+*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
-*
+*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
-*
+*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
-*
+*
* 2. LICENSE
-* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
+* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
-* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
-*
-* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
+* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
+*
+* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
-*
+*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
-*
+*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
-*
+*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
-*
+*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
-* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
-* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
+* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
+* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
-
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
-import com.google.common.collect.MinMaxPriorityQueue;
-import com.google.java.contract.Ensures;
-
-import java.io.Serializable;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.PriorityQueue;
/**
- * Class for finding the K best paths (as determined by the sum of multiplicities of the edges) in a graph.
- * This is different from most graph traversals because we want to test paths from any source node to any sink node.
+ * K-best sub-haplotype finder that selects the best solutions out of a collection of sub-haplotype finders.
*
- * User: ebanks, rpoplin, mdepristo
- * Date: Mar 23, 2011
+ * @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
*/
-public class KBestPaths {
- private final boolean allowCycles;
+class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
/**
- * Create a new KBestPaths finder that follows cycles in the graph
+ * Collection of subFinders that provided the actual solutions.
*/
- public KBestPaths() {
- this(true);
- }
+ private final Collection extends KBestSubHaplotypeFinder> subFinders;
/**
- * Create a new KBestPaths finder
+ * Flag indicating whether the sub-finders have been processed or not.
+ */
+ private boolean processedSubFinders = false;
+
+ /**
+ * Holds the number of k-best solution that this finder would ever return.
+ */
+ private int count = 0;
+
+ /**
+ * Holds the best {@code i} paths to the sink so far calculated where {@code i+1} is the length of this list.
*
- * @param allowCycles should we allow paths that follow cycles in the graph?
+ *
As more results are requested the array will grow. All positions and solutions are
+ * calculated up to {@code i}
.
*/
- public KBestPaths(final boolean allowCycles) {
- this.allowCycles = allowCycles;
- }
-
- protected static class MyInt { public int val = 0; }
+ private ArrayList rankedSubHaplotype;
/**
- * Compare paths such that paths with greater weight are earlier in a list
+ * Priority queue with next best haplotype solution from each sub-finder; previous ones are
+ * already part {@link #rankedSubHaplotype}.
*/
- protected static class PathComparatorTotalScore implements Comparator, Serializable {
+ private PriorityQueue nextBestSubHaplotypes;
+
+ /**
+ * Creates a new aggregated sub-haplotype finder given its sub-finders.
+ * @param finders set of sub-finders.
+ */
+ public AggregatedSubHaplotypeFinder(final Collection extends KBestSubHaplotypeFinder> finders) {
+ if (finders == null) throw new IllegalArgumentException("finder collection cannot be null");
+ this.subFinders = finders;
+ }
+
+ @Override
+ public int getCount() {
+ processSubFindersIfNeeded();
+ return count;
+ }
+
+ private void processSubFindersIfNeeded() {
+ if (processedSubFinders) return;
+
+ long count = 0;
+ nextBestSubHaplotypes = new PriorityQueue<>(subFinders.size());
+ for (final KBestSubHaplotypeFinder finder : subFinders) {
+ final int finderCount = finder.getCount();
+ if (finderCount == 0) continue;
+ count += finderCount;
+ nextBestSubHaplotypes.add(new MyKBestHaplotypeResult(finder,0));
+ }
+
+ this.count = (int) Math.min(Integer.MAX_VALUE,count);
+
+ rankedSubHaplotype = new ArrayList<>(10);
+ processedSubFinders = true;
+ }
+
+ @Override
+ public KBestHaplotype getKBest(int k) {
+ if (k < 0) throw new IllegalArgumentException("k cannot be negative");
+ processSubFindersIfNeeded();
+ if (k >= count) throw new IllegalArgumentException("k cannot be equal or greater than the count");
+ if (k < rankedSubHaplotype.size())
+ return rankedSubHaplotype.get(k);
+
+ rankedSubHaplotype.ensureCapacity(k+1);
+ for (int i = rankedSubHaplotype.size(); i <= k; i++) {
+ // since k < possibleHaplotypeCount is guarantee no to be empty.
+ if (nextBestSubHaplotypes.isEmpty())
+ throw new IllegalStateException("what the heck " + k + " " + count);
+ final MyKBestHaplotypeResult nextResult = nextBestSubHaplotypes.remove();
+ nextResult.rank = i;
+ rankedSubHaplotype.add(nextResult);
+ final int subRank = nextResult.result.rank();
+
+ // if there is no further solution from the same child we cannot add another solution from that child.
+ if (subRank + 1 >= nextResult.subFinder.getCount())
+ continue;
+ nextBestSubHaplotypes.add(new MyKBestHaplotypeResult(nextResult.subFinder, subRank + 1));
+ }
+ return rankedSubHaplotype.get(k);
+ }
+
+ /**
+ * Custom implementation of {@link KBestHaplotype} to encapsulate sub-finder results.
+ */
+ private class MyKBestHaplotypeResult extends KBestHaplotype {
+
+ private KBestSubHaplotypeFinder subFinder;
+
+ private final KBestHaplotype result;
+
+ private int rank;
+
+ private MyKBestHaplotypeResult(final KBestSubHaplotypeFinder finder, final int rank) {
+ this.subFinder = finder;
+ this.result = finder.getKBest(rank);
+ this.rank = -1;
+ }
+
@Override
- public int compare(final Path path1, final Path path2) {
- return path2.getScore() - path1.getScore();
- }
- }
-
- /**
- * @see #getKBestPaths(BaseGraph, int) retriving the best 1000 paths
- */
- public List> getKBestPaths( final BaseGraph graph ) {
- return getKBestPaths(graph, 1000);
- }
-
- /**
- * @see #getKBestPaths(BaseGraph, int, java.util.Set, java.util.Set) retriving the first 1000 paths
- * starting from all source vertices and ending with all sink vertices
- */
- public List> getKBestPaths( final BaseGraph graph, final int k ) {
- return getKBestPaths(graph, k, graph.getSources(), graph.getSinks());
- }
-
- /**
- * @see #getKBestPaths(BaseGraph, int, java.util.Set, java.util.Set) with k=1000
- */
- public List> getKBestPaths( final BaseGraph graph, final Set sources, final Set sinks ) {
- return getKBestPaths(graph, 1000, sources, sinks);
- }
-
- /**
- * @see #getKBestPaths(BaseGraph, int, java.util.Set, java.util.Set) with k=1000
- */
- public List> getKBestPaths( final BaseGraph graph, final T source, final T sink ) {
- return getKBestPaths(graph, 1000, source, sink);
- }
-
- /**
- * @see #getKBestPaths(BaseGraph, int, java.util.Set, java.util.Set) with singleton source and sink sets
- */
- public List> getKBestPaths( final BaseGraph graph, final int k, final T source, final T sink ) {
- return getKBestPaths(graph, k, Collections.singleton(source), Collections.singleton(sink));
- }
-
- /**
- * Traverse the graph and pull out the best k paths.
- * Paths are scored via their comparator function. The default being PathComparatorTotalScore()
- * @param graph the graph from which to pull paths
- * @param k the number of paths to find
- * @param sources a set of vertices we want to start paths with
- * @param sinks a set of vertices we want to end paths with
- * @return a list with at most k top-scoring paths from the graph
- */
- @Ensures({"result != null", "result.size() <= k"})
- public List> getKBestPaths( final BaseGraph graph, final int k, final Set sources, final Set sinks ) {
- if( graph == null ) { throw new IllegalArgumentException("Attempting to traverse a null graph."); }
-
- // a min max queue that will collect the best k paths
- final MinMaxPriorityQueue> bestPaths = MinMaxPriorityQueue.orderedBy(new PathComparatorTotalScore()).maximumSize(k).create();
-
- // run a DFS for best paths
- for ( final T source : sources ) {
- final Path startingPath = new Path(source, graph);
- findBestPaths(startingPath, sinks, bestPaths, new MyInt());
+ public SeqGraph graph() {
+ return result.graph();
}
- // the MinMaxPriorityQueue iterator returns items in an arbitrary order, so we need to sort the final result
- final List> toReturn = new ArrayList>(bestPaths);
- Collections.sort(toReturn, new PathComparatorTotalScore());
- return toReturn;
- }
+ @Override
+ public int score() {
+ return result.score();
+ }
- /**
- * Recursive algorithm to find the K best paths in the graph from the current path to any of the sinks
- * @param path the current path progress
- * @param sinks a set of nodes that are sinks. Will terminate and add a path if the last vertex of path is in this set
- * @param bestPaths a path to collect completed paths.
- * @param n used to limit the search by tracking the number of vertices visited across all paths
- */
- private void findBestPaths( final Path path, final Set sinks, final Collection> bestPaths, final MyInt n ) {
- if ( sinks.contains(path.getLastVertex())) {
- bestPaths.add(path);
- } else if( n.val > 10000 ) {
- // do nothing, just return, as we've done too much work already
- } else {
- // recursively run DFS
- final ArrayList edgeArrayList = new ArrayList(path.getOutgoingEdgesOfLastVertex());
- Collections.sort(edgeArrayList, new BaseEdge.EdgeWeightComparator());
- for ( final E edge : edgeArrayList ) {
- final T target = path.getGraph().getEdgeTarget(edge);
- // make sure the edge is not already in the path
- final boolean alreadyVisited = allowCycles ? path.containsEdge(edge) : path.containsVertex(target);
- if ( ! alreadyVisited ) {
- final Path newPath = new Path(path, edge);
- n.val++;
- findBestPaths(newPath, sinks, bestPaths, n);
- }
- }
+ @Override
+ public boolean isReference() {
+ return result.isReference();
+ }
+
+ @Override
+ public int rank() {
+ return rank;
+ }
+
+ @Override
+ protected SeqVertex head() {
+ return result.head();
+ }
+
+ @Override
+ protected KBestHaplotype tail() {
+ return result.tail();
}
}
-}
\ No newline at end of file
+}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java
index c9d51b81b..36216bdd2 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java
@@ -695,4 +695,21 @@ public class BaseGraph extends Default
public BaseGraph subsetToRefSource() {
return subsetToNeighbors(getReferenceSourceVertex(), 10);
}
+
+ /**
+ * Checks whether the graph contains all the vertices in a collection.
+ *
+ * @param vertices the vertices to check.
+ *
+ * @throws IllegalArgumentException if {@code vertices} is {@code null}.
+ *
+ * @return {@code true} if all the vertices in the input collection are present in this graph.
+ * Also if the input collection is empty. Otherwise it returns {@code false}.
+ */
+ public boolean containsAllVertices(final Collection extends V> vertices) {
+ if (vertices == null) throw new IllegalArgumentException("the input vertices collection cannot be null");
+ for (final V vertex : vertices)
+ if (!containsVertex(vertex)) return false;
+ return true;
+ }
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeadEndKBestSubHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeadEndKBestSubHaplotypeFinder.java
index 0a29bd08b..ae270ed7b 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeadEndKBestSubHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeadEndKBestSubHaplotypeFinder.java
@@ -46,9 +46,9 @@
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
/**
- * Represent a trivial k-best sub haplotype finder with no solutions.
+ * Represents a trivial k-best sub haplotype finder with no solutions.
*
- *
To be used at vertices that do not have any valid path to the requested sink node
+ *
To be used at vertices that do not have any valid path to the requested sink vertices
*
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
*/
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/EmptyPathHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/EmptyPathHaplotypeFinder.java
index aa1f213fe..0e50ec02b 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/EmptyPathHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/EmptyPathHaplotypeFinder.java
@@ -52,14 +52,19 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
*/
class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder {
-
/**
* Caches the only solution returned by this finder.
*/
private final KBestHaplotype singleHaplotypePath;
- public EmptyPathHaplotypeFinderNode(final SeqGraph graph, final SeqVertex sink) {
- singleHaplotypePath = new MyBestHaplotypePath(graph,sink);
+ /**
+ * Constructs a new empty k-best haplotype finder.
+ *
+ * @param graph the search graph.
+ * @param vertex the source and sink vertex of the only solution returned by this finder.
+ */
+ public EmptyPathHaplotypeFinderNode(final SeqGraph graph, final SeqVertex vertex) {
+ singleHaplotypePath = new MyBestHaplotypePath(graph,vertex);
}
@Override
@@ -81,12 +86,29 @@ class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder {
*/
private class MyBestHaplotypePath extends KBestHaplotype {
+ /**
+ * The solution's only vertex.
+ */
private final SeqVertex vertex;
+ /**
+ * The search graph.
+ */
private final SeqGraph graph;
+ /**
+ * Whether the vertex is a reference vertex.
+ *
+ *
Initialize lazily.
+ */
private Boolean isReference;
+ /**
+ * Constructs a new empty k-best haplotype solution.
+ *
+ * @param graph the search graph.
+ * @param vertex the source and sink vertex of the only solution returned by the outer finder.
+ */
public MyBestHaplotypePath(final SeqGraph graph, final SeqVertex vertex) {
this.vertex = vertex;
this.graph = graph;
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotype.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotype.java
index d88c17cbf..ca22f17ec 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotype.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotype.java
@@ -52,7 +52,7 @@ import org.broadinstitute.sting.utils.haplotype.Haplotype;
*
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
*/
-public abstract class KBestHaplotype {
+public abstract class KBestHaplotype implements Comparable {
/**
* Returns the original graph searched.
@@ -143,6 +143,18 @@ public abstract class KBestHaplotype {
return path;
}
+ /**
+ * Compares k-best haplotypes based on the score where the one with larger score comes first (descending orther).
+ *
+ * @param other the other haplotype to compare to.
+ * @return {@code -1} if the current score is larger than {@code other}'s, {@code 0} if they are the same, {@code 1}
+ * if {@code other}'s score is larger.
+ */
+ public int compareTo(final KBestHaplotype other) {
+ if (other == null) throw new IllegalArgumentException("the other object cannot be null");
+ return - 1 * (score() - other.score());
+ }
+
/**
* The first vertex on the haplotype path.
*
@@ -156,6 +168,4 @@ public abstract class KBestHaplotype {
* @return {@code null} if there are no more vertices in the solution path a part from the one returned by {@link #head}.
*/
protected abstract KBestHaplotype tail();
-
-
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinder.java
index 725fcae1a..f27cca12c 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinder.java
@@ -56,49 +56,131 @@ import java.util.*;
*/
public class KBestHaplotypeFinder extends AbstractList implements Iterable {
-
+ /**
+ * The search graph.
+ */
private final SeqGraph graph;
- protected Map nodeBySource;
- protected SeqVertex sink;
- protected SeqVertex source;
+ /**
+ * Map of sub-haplotype finder by their source vertex.
+ */
+ protected Map finderByVertex;
+
+ /**
+ * Possible haplotype sink vertices.
+ */
+ protected Set sinks;
+
+ /**
+ * Possible haplotype source vertices.
+ */
+ protected Set sources;
+
+ /**
+ * The top finder.
+ *
+ *
If there is only a single source vertex, its finder is the top finder. However whent there
+ * is more than one possible source, we create a composite finder that alternates between individual source vertices
+ * for their best haplotypes.
+ */
+ private final KBestSubHaplotypeFinder topFinder;
/**
* Constructs a new best haplotypes finder.
*
* @param graph the seq-graph to search.
* @param source the source vertex for all haplotypes.
- * @param sink the sink vertex for all haplotypes.
+ * @param sink sink vertices for all haplotypes.
*
* @throws IllegalArgumentException if
*
any of {@code graph}, {@code source} or {@code sink} is {@code null} or
- *
either {@code source} or {@code sink} is not a vertex of {@code graph}'s.
+ *
either {@code source} or {@code sink} is not a vertex in {@code graph}.
*
*/
public KBestHaplotypeFinder(final SeqGraph graph, final SeqVertex source, final SeqVertex sink) {
- if (graph == null) throw new IllegalArgumentException("graph cannot be null");
- if (source == null) throw new IllegalArgumentException("source cannot be null");
- if (sink == null) throw new IllegalArgumentException("sink cannot be null");
- if (!graph.containsVertex(source)) throw new IllegalArgumentException("source does not belong to the graph");
- if (!graph.containsVertex(sink)) throw new IllegalArgumentException("sink does not belong to the graph");
- //TODO dealing with cycles here due to a bug in some of the graph transformations that produces cycles.
- //TODO Once that is solve, the conditional above should be removed in order the save time:
- //this.graph = graph;
- if (new CycleDetector<>(graph).detectCycles())
- this.graph = removeCycles(graph,source,sink);
- else
- this.graph = graph;
- nodeBySource = new HashMap<>(graph.vertexSet().size());
- this.sink = sink;
- this.source = source;
+ this(graph,Collections.singleton(source),Collections.singleton(sink));
}
- private static SeqGraph removeCycles(final SeqGraph original, final SeqVertex source, final SeqVertex sink) {
+ /**
+ * Constructs a new best haplotypes finder.
+ *
+ * @param graph the seq-graph to search.
+ * @param sources source vertices for all haplotypes.
+ * @param sinks sink vertices for all haplotypes.
+ *
+ * @throws IllegalArgumentException if
+ *
any of {@code graph}, {@code sources} or {@code sinks} is {@code null} or
+ *
any of {@code sources}' or any {@code sinks}' member is not a vertex in {@code graph}.
+ *
+ */
+ public KBestHaplotypeFinder(final SeqGraph graph, final Set sources, final Set sinks) {
+ if (graph == null) throw new IllegalArgumentException("graph cannot be null");
+ if (sources == null) throw new IllegalArgumentException("source cannot be null");
+ if (sinks == null) throw new IllegalArgumentException("sink cannot be null");
+ if (!graph.containsAllVertices(sources)) throw new IllegalArgumentException("source does not belong to the graph");
+ if (!graph.containsAllVertices(sinks)) throw new IllegalArgumentException("sink does not belong to the graph");
+
+ //TODO dealing with cycles here due to a bug in some of the graph transformations that produces cycles.
+ //TODO Once that is solve, the if-else below should be substituted by a throw if there is any cycles,
+ //TODO just the line commented out below if you want to trade early-bug-fail for speed.
+ //this.graph = graph;
+ this.graph = new CycleDetector<>(graph).detectCycles() ? removeCycles(graph,sources,sinks) : graph;
+
+ finderByVertex = new HashMap<>(this.graph.vertexSet().size());
+ this.sinks = sinks;
+ this.sources = sources;
+ if (sinks.size() == 0 || sources.size() == 0)
+ topFinder = DeadEndKBestSubHaplotypeFinder.INSTANCE;
+ else if (sources.size() == 1)
+ topFinder = createVertexFinder(sources.iterator().next());
+ else
+ topFinder = createAggregatedFinder();
+ }
+
+ /**
+ * Constructs a new best haplotype finder.
+ *
+ * It will consider all source and sink vertex when looking for haplotypes.
+ *
+ *
+ * @param graph the seq-graph to search for the best haplotypes.
+ */
+ public KBestHaplotypeFinder(SeqGraph graph) {
+ this(graph,graph.getSources(),graph.getSinks());
+ }
+
+ /**
+ * Creates an aggregated recursive finder to try all possible source vertices.
+ *
+ * @return never {@code null}.
+ */
+ private KBestSubHaplotypeFinder createAggregatedFinder() {
+ final List sourceFinders = new ArrayList<>(sources.size());
+ for (final SeqVertex source : sources)
+ sourceFinders.add(createVertexFinder(source));
+ return new AggregatedSubHaplotypeFinder(sourceFinders);
+ }
+
+ /**
+ * Removes edges that produces cycles and also dead vertices that do not lead to any sink vertex.
+ *
+ * @param original graph to modify.
+ * @param sources considered source vertices.
+ * @param sinks considered sink vertices.
+ * @return never {@code null}.
+ */
+ private static SeqGraph removeCycles(final SeqGraph original, final Set sources, final Set sinks) {
final Set edgesToRemove = new HashSet<>(original.edgeSet().size());
final Set vertexToRemove = new HashSet<>(original.vertexSet().size());
- if (!findGuiltyVerticesAndEdgesToRemoveCycles(original, source, sink, edgesToRemove, vertexToRemove, new HashSet(original.vertexSet().size())))
- throw new IllegalStateException("could not find any path from the source vertex to the sink vertex: " + source + " -> " + sink);
+ boolean foundSomePath = false;
+ for (final SeqVertex source : sources)
+ foundSomePath = findGuiltyVerticesAndEdgesToRemoveCycles(original, source, sinks, edgesToRemove,
+ vertexToRemove, new HashSet(original.vertexSet().size())) | foundSomePath;
+
+ if (!foundSomePath)
+ throw new IllegalStateException("could not find any path from the source vertex to the sink vertex after removing cycles: "
+ + Arrays.toString(sources.toArray()) + " => " + Arrays.toString(sinks.toArray()));
if (edgesToRemove.isEmpty() && vertexToRemove.isEmpty())
throw new IllegalStateException("cannot find a way to remove the cycles");
@@ -109,13 +191,30 @@ public class KBestHaplotypeFinder extends AbstractList implement
return result;
}
- private static boolean findGuiltyVerticesAndEdgesToRemoveCycles(final SeqGraph graph, final SeqVertex currentVertex, final SeqVertex sink,
- final Set edgesToRemove, final Set verticesToRemove,
+ /**
+ * Recursive call that looks for edges and vertices that need to be removed to get rid of cycles.
+ *
+ * @param graph the original graph.
+ * @param currentVertex current search vertex.
+ * @param sinks considered sink vertices.
+ * @param edgesToRemove collection of edges that need to be removed in order to get rid of cycles.
+ * @param verticesToRemove collection of vertices that can be removed.
+ * @param parentVertices collection of vertices that preceded the {@code currentVertex}; i.e. the it can be
+ * reached from those vertices using edges existing in {@code graph}.
+ *
+ * @return {@code true} to indicate that the some sink vertex is reachable by {@code currentVertex},
+ * {@code false} otherwise.
+ */
+ private static boolean findGuiltyVerticesAndEdgesToRemoveCycles(final SeqGraph graph,
+ final SeqVertex currentVertex,
+ final Set sinks,
+ final Set edgesToRemove,
+ final Set verticesToRemove,
final Set parentVertices) {
- if (currentVertex.equals(sink)) return true;
+ if (sinks.contains(currentVertex)) return true;
final Set outgoingEdges = graph.outgoingEdgesOf(currentVertex);
- boolean reachsSink = false;
+ boolean reachesSink = false;
parentVertices.add(currentVertex);
for (final BaseEdge edge : outgoingEdges) {
@@ -123,31 +222,28 @@ public class KBestHaplotypeFinder extends AbstractList implement
if (parentVertices.contains(child))
edgesToRemove.add(edge);
else {
- final boolean childReachSink = findGuiltyVerticesAndEdgesToRemoveCycles(graph, child, sink, edgesToRemove, verticesToRemove, parentVertices);
- reachsSink = reachsSink || childReachSink;
+ final boolean childReachSink = findGuiltyVerticesAndEdgesToRemoveCycles(graph, child, sinks,
+ edgesToRemove, verticesToRemove, parentVertices);
+ reachesSink = reachesSink || childReachSink;
}
}
parentVertices.remove(currentVertex);
- if (!reachsSink) verticesToRemove.add(currentVertex);
- return reachsSink;
+ if (!reachesSink) verticesToRemove.add(currentVertex);
+ return reachesSink;
}
-
@Override
public KBestHaplotype get(int index) {
- final KBestSubHaplotypeFinder sourceNode = createNode(source);
if (index < 0 || index >= size())
throw new IndexOutOfBoundsException();
- return sourceNode.getKBest(index);
+ return topFinder.getKBest(index);
}
@Override
public Iterator iterator() {
- final KBestSubHaplotypeFinder sourceFinder = createNode(source);
-
return new Iterator() {
private int nextK = 0;
- private final int maxK = sourceFinder.getCount();
+ private final int maxK = topFinder.getCount();
@Override
@@ -158,7 +254,7 @@ public class KBestHaplotypeFinder extends AbstractList implement
@Override
public KBestHaplotype next() {
if (nextK >= maxK) throw new NoSuchElementException();
- return sourceFinder.getKBest(nextK++);
+ return topFinder.getKBest(nextK++);
}
@Override
@@ -170,7 +266,7 @@ public class KBestHaplotypeFinder extends AbstractList implement
@Override
public int size() {
- return createNode(source).getCount();
+ return topFinder.getCount();
}
/**
@@ -183,12 +279,10 @@ public class KBestHaplotypeFinder extends AbstractList implement
* @return never {@code null}, but perhaps a iterator that return no haplotype.
*/
public Iterator iterator(final int k) {
- final KBestSubHaplotypeFinder sourceFinder = createNode(source);
return new Iterator() {
private int nextK = 0;
- private final int maxK = Math.min(sourceFinder.getCount(), k);
-
+ private final int maxK = Math.min(size(), k);
@Override
public boolean hasNext() {
@@ -198,7 +292,7 @@ public class KBestHaplotypeFinder extends AbstractList implement
@Override
public KBestHaplotype next() {
if (nextK >= maxK) throw new NoSuchElementException();
- return sourceFinder.getKBest(nextK++);
+ return topFinder.getKBest(nextK++);
}
@Override
@@ -208,38 +302,51 @@ public class KBestHaplotypeFinder extends AbstractList implement
};
}
- protected KBestSubHaplotypeFinder createNode(final SeqVertex source) {
- KBestSubHaplotypeFinder node = nodeBySource.get(source);
+ /**
+ * Creates a finder from a vertex.
+ *
+ * @param source the source vertex for the finder.
+ *
+ * @return never {@code null}, perhaps a finder that returns no haplotypes though.
+ */
+ protected KBestSubHaplotypeFinder createVertexFinder(final SeqVertex source) {
+ KBestSubHaplotypeFinder node = finderByVertex.get(source);
if (node == null) {
- if (source.equals(sink))
- node = new EmptyPathHaplotypeFinderNode(graph,sink);
+ if (sinks.contains(source))
+ node = new EmptyPathHaplotypeFinderNode(graph,source);
else {
final Set outgoingEdges = graph.outgoingEdgesOf(source);
if (outgoingEdges.isEmpty())
node = DeadEndKBestSubHaplotypeFinder.INSTANCE;
else {
- final Map undeadChildren = createChildrenNodes(outgoingEdges);
- if (undeadChildren.isEmpty())
- node = DeadEndKBestSubHaplotypeFinder.INSTANCE;
- else
- node = new RecursiveSubHaplotypeFinder(graph,source,undeadChildren);
-
+ final Map undeadChildren = createChildrenFinders(outgoingEdges);
+ node = undeadChildren.isEmpty() ? DeadEndKBestSubHaplotypeFinder.INSTANCE :
+ new RecursiveSubHaplotypeFinder(source,undeadChildren);
}
}
- nodeBySource.put(source,node);
+ finderByVertex.put(source, node);
}
return node;
}
- private Map createChildrenNodes(Set baseEdges) {
+ /**
+ * Creates finder for target vertices of a collection of edges.
+ *
+ * This peculiar signature is convenient for when we want to create finders for the children of a vertex.
+ *
+ *
+ * @param baseEdges target collection of edges.
+ *
+ * @return never {@code null}, perhaps an empty map if there is no children with valid paths to any sink for this
+ * finder.
+ */
+ private Map createChildrenFinders(Set baseEdges) {
final Map result = new LinkedHashMap<>(baseEdges.size());
- for (final BaseEdge edge : baseEdges)
- result.put(edge,createNode(graph.getEdgeTarget(edge)));
- final Iterator> childrenIterator = result.entrySet().iterator();
- while (childrenIterator.hasNext())
- if (childrenIterator.next().getValue().getCount() == 0)
- childrenIterator.remove();
+ for (final BaseEdge edge : baseEdges) {
+ final KBestSubHaplotypeFinder targetFinder = createVertexFinder(graph.getEdgeTarget(edge));
+ if (targetFinder.getCount() == 0) continue;
+ result.put(edge, targetFinder);
+ }
return result;
}
-
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestSubHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestSubHaplotypeFinder.java
index a637ff2b6..9c185b52c 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestSubHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestSubHaplotypeFinder.java
@@ -68,5 +68,4 @@ interface KBestSubHaplotypeFinder {
* @return never {@code null}.
*/
public abstract KBestHaplotype getKBest(int k);
-
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/RecursiveSubHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/RecursiveSubHaplotypeFinder.java
index f2106ffb9..0fbbfdc64 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/RecursiveSubHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/RecursiveSubHaplotypeFinder.java
@@ -46,18 +46,18 @@
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Map;
-import java.util.PriorityQueue;
/**
* General recursive sub-haplotype finder.
*
-* Provides the k-best sub-haplotypes looking into the outgoing set of vertices (that contain at least one solution).
+* Provides the k-best sub-haplotypes from a vertex provided map between outgoing edges and its target finders
*
*
-* This is done efficiently by keeping an priority-queue on best subhaplotype solutions and pulling them on demand
+* This is done efficiently by keeping an priority-queue on best sub-haplotype solutions and pulling them on demand
* as needed.
-*
+*
*
* Solutions are cached for repeated retrieval so that we save compute at vertices that share sub-haplotypes
* (share descendant vertices). This aspect is controlled by {@link KBestSubHaplotypeFinder} that instantiate
@@ -67,34 +67,7 @@ import java.util.PriorityQueue;
*
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
*/
-class RecursiveSubHaplotypeFinder implements KBestSubHaplotypeFinder {
-
- private final SeqGraph graph;
-
- private final SeqVertex vertex;
-
- private final Map children;
-
- private boolean childrenWereProcessed = false;
-
- /**
- * Holds the number of possible paths from this source node vertex to the sink vertex.
- *
- *
Updated by {@link #processChildrenIfNeeded()}
- */
- private int possibleHaplotypeCount;
-
- /**
- * Holds the best {@code i} paths to the sink so far calculated where {@code i+1} is the length of rankedResults.
- *
- *
As more results are requested the array will grow. All positions and solutions are calculated up to {@code i}
.
- */
- private ArrayList rankedResults;
-
- /**
- * Priority queue with best sub-haplotype solutions that haven't been calculated and cached on {@link #rankedResults} yet.
- */
- private PriorityQueue nextChildrenKBestHaplotypePath;
+class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
/**
* Creates a recursive sub-haplotype finder give the target graph, first vertex and all possible outgoing edges
@@ -104,113 +77,73 @@ class RecursiveSubHaplotypeFinder implements KBestSubHaplotypeFinder {
* are outgoing edges from {@code vertex} on {@code graph} and that the value sub-haplotype resolver have as
* the first vertex the adjacent vertex through that key edge.
*
- * @param graph the search graph.
* @param vertex first vertex for all sub-haplotype solutions provided by this finder
* @param children map from outgoing edge to the corresponding sub-sub-haplotype finder.
*/
- public RecursiveSubHaplotypeFinder(final SeqGraph graph, final SeqVertex vertex,
+ public RecursiveSubHaplotypeFinder(final SeqVertex vertex,
final Map children) {
- if (vertex == null) throw new IllegalArgumentException("the vertex provided cannot be null");
- if (graph == null) throw new IllegalArgumentException("the graph provided cannot be null");
- this.vertex = vertex;
- this.children = children;
- this.graph = graph;
+ super(createChildFinderCollection(vertex, children));
}
- @Override
- public int getCount() {
- processChildrenIfNeeded();
- return possibleHaplotypeCount;
+ private static Collection createChildFinderCollection(final SeqVertex vertex, final Map finders) {
+ if (finders == null) throw new IllegalArgumentException("the edge to child map cannot be null");
+ final Collection result = new ArrayList<>(finders.size());
+ for (final Map.Entry e : finders.entrySet())
+ result.add(new EdgeSubHaplotypeFinder(vertex,e.getKey(), e.getValue()));
+ return result;
}
- /**
- * Process children and initialize structures if not done before.
- */
- private void processChildrenIfNeeded() {
- if (childrenWereProcessed) return;
- long possibleHaplotypeCount = 0;
+ private static class EdgeSubHaplotypeFinder implements KBestSubHaplotypeFinder {
- nextChildrenKBestHaplotypePath = new PriorityQueue<>(children.size());
+ private final KBestSubHaplotypeFinder childFinder;
- for (final Map.Entry entry : children.entrySet()) {
- final KBestSubHaplotypeFinder child = entry.getValue();
- final BaseEdge edge = entry.getKey();
- final int childPossibleHaplotypePathCount = child.getCount();
- if (childPossibleHaplotypePathCount != 0) // paranoia check, should not happen at this point.
- nextChildrenKBestHaplotypePath.add(new ChildKBestSubHaplotype(-1,edge,child,0));
- possibleHaplotypeCount += childPossibleHaplotypePathCount;
+ private final SeqVertex vertex;
+
+ private final BaseEdge edge;
+
+ private EdgeSubHaplotypeFinder(final SeqVertex vertex, final BaseEdge edge, final KBestSubHaplotypeFinder childFinder) {
+ this.childFinder = childFinder;
+ this.edge = edge;
+ this.vertex = vertex;
}
- // Just make sure we won't incur in overflow here for very large graphs; who is ever going to ask for more than 2G paths!!!)
- this.possibleHaplotypeCount = (int) Math.min(Integer.MAX_VALUE,possibleHaplotypeCount);
-
- // 10 is a bit arbitrary as it is difficult to anticipate what would be the number of requested
- // best sub-haplotypes for any node. It shouldn't be too large so that it does not waste space
- // but not too small so that there is no need to resize when just a few best solutions are requested.
- rankedResults = new ArrayList<>(Math.min(this.possibleHaplotypeCount,10));
-
- childrenWereProcessed = true;
- }
-
- @Override
- public KBestHaplotype getKBest(int k) {
- if (k < 0)
- throw new IllegalArgumentException("the rank requested cannot be negative");
- processChildrenIfNeeded();
- if (k >= possibleHaplotypeCount)
- throw new IllegalArgumentException("the rank requested cannot be equal or greater to the number of possible haplotypes");
- if (rankedResults.size() > k)
- return rankedResults.get(k);
-
- rankedResults.ensureCapacity(k+1);
- for (int i = rankedResults.size(); i <= k; i++) {
- // since k < possibleHaplotypeCount is guarantee no to be empty.
- if (nextChildrenKBestHaplotypePath.isEmpty())
- throw new IllegalStateException("what the heck " + k + " " + possibleHaplotypeCount);
- final ChildKBestSubHaplotype nextResult = nextChildrenKBestHaplotypePath.remove();
- nextResult.rank = i;
- rankedResults.add(nextResult);
- final int childRank = nextResult.subpath.rank();
- final KBestSubHaplotypeFinder child = nextResult.child;
-
- // if there is no further solution from the same child we cannot add another solution from that child.
- if (childRank + 1 >= nextResult.child.getCount())
- continue;
- nextChildrenKBestHaplotypePath.add(new ChildKBestSubHaplotype(-1,nextResult.edge, child, childRank + 1));
+ @Override
+ public int getCount() {
+ return childFinder.getCount();
+ }
+
+ @Override
+ public KBestHaplotype getKBest(int k) {
+ return new ChildKBestSubHaplotype(vertex,edge,childFinder.getKBest(k));
}
- return rankedResults.get(k);
}
/**
* Custom extension of the {@link KBestHaplotype} used for solutions generated by this class.
+ *
+ *
+ * These by delegating on the encapsulated solution from outgoing edge's finder by adding
+ * the edge score and prefixing this outer finder
+ * source vertex.
+ *
*/
- private class ChildKBestSubHaplotype extends KBestHaplotype implements Comparable{
+ private static class ChildKBestSubHaplotype extends KBestHaplotype {
+
private final int score;
- private int rank;
- private final KBestSubHaplotypeFinder child;
- private final BaseEdge edge;
- private final KBestHaplotype subpath;
+ private final KBestHaplotype child;
+ private final SeqVertex vertex;
private final boolean isReference;
- public ChildKBestSubHaplotype(final int rank, final BaseEdge edge,
- final KBestSubHaplotypeFinder child, final int childRank) {
+ public ChildKBestSubHaplotype(final SeqVertex vertex, final BaseEdge edge, final KBestHaplotype child) {
+ this.score = edge.getMultiplicity() + child.score();
+ this.vertex = vertex;
this.child = child;
- this.edge = edge;
- this.rank = rank;
- this.subpath = child.getKBest(childRank);
- this.score = edge.getMultiplicity() + subpath.score();
- this.isReference = edge.isRef() && subpath.isReference();
+ this.isReference = edge.isRef() && child.isReference();
}
@Override
public SeqGraph graph() {
- return graph;
- }
-
- @Override
- public int compareTo(final ChildKBestSubHaplotype other) {
- if (other == null) throw new IllegalArgumentException("the other object cannot be null");
- return - Integer.compare(this.score,other.score);
+ return child.graph();
}
@Override
@@ -220,10 +153,9 @@ class RecursiveSubHaplotypeFinder implements KBestSubHaplotypeFinder {
@Override
public int rank() {
- return rank;
+ return child.rank();
}
-
@Override
protected SeqVertex head() {
return vertex;
@@ -231,7 +163,7 @@ class RecursiveSubHaplotypeFinder implements KBestSubHaplotypeFinder {
@Override
protected KBestHaplotype tail() {
- return subpath;
+ return child;
}
@Override
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java
index a932f8a96..30b677fe9 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java
@@ -73,7 +73,6 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
private final boolean dontIncreaseKmerSizesForCycles;
private final int numPruningSamples;
- private boolean requireReasonableNumberOfPaths = false;
protected boolean removePathsNotConnectedToRef = true;
private boolean justReturnRawGraph = false;
@@ -207,24 +206,12 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
initialSeqGraph.cleanNonRefPaths(); // TODO -- I don't this is possible by construction
final AssemblyResult cleaned = cleanupSeqGraph(initialSeqGraph);
- final AssemblyResult.Status status = cleaned.getStatus() == AssemblyResult.Status.ASSEMBLED_SOME_VARIATION && requireReasonableNumberOfPaths && !reasonableNumberOfPaths(cleaned.getGraph()) ? AssemblyResult.Status.FAILED : cleaned.getStatus();
+ final AssemblyResult.Status status = cleaned.getStatus();
final AssemblyResult result = new AssemblyResult(status, cleaned.getGraph());
result.setThreadingGraph(rtgraph);
return result;
}
- /**
- * Did we find a reasonable number of paths in this graph?
- * @param graph
- * @return
- */
- private boolean reasonableNumberOfPaths(final SeqGraph graph) {
- final KBestPaths pathFinder = new KBestPaths<>(false);
- final List> allPaths = pathFinder.getKBestPaths(graph, 100000);
- logger.info("Found " + allPaths.size() + " paths through " + graph + " with maximum " + maxAllowedPathsForReadThreadingAssembler);
- return allPaths.size() <= maxAllowedPathsForReadThreadingAssembler;
- }
-
@Override
public String toString() {
return "ReadThreadingAssembler{" +
diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java
index 63fd21d8f..0ddf7544d 100644
--- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java
+++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java
@@ -137,19 +137,20 @@ public class CommonSuffixMergerUnitTest extends BaseTest {
public static void assertSameHaplotypes(final String name, final SeqGraph actual, final SeqGraph original) {
try {
final Set haplotypes = new HashSet();
- final List> originalPaths = new KBestPaths().getKBestPaths(original);
- for ( final Path path : originalPaths )
- haplotypes.add(new String(path.getBases()));
+ final List originalKBestHaplotypes = new KBestHaplotypeFinder(original,original.getSources(),original.getSinks());
+ final List actualKBestHaplotypes = new KBestHaplotypeFinder(actual,actual.getSources(),actual.getSinks());
- final List> splitPaths = new KBestPaths().getKBestPaths(actual);
- for ( final Path path : splitPaths ) {
- final String h = new String(path.getBases());
+ for (final KBestHaplotype kbh : originalKBestHaplotypes)
+ haplotypes.add(new String(kbh.bases()));
+
+ for ( final KBestHaplotype kbh : actualKBestHaplotypes ) {
+ final String h = new String(kbh.bases());
Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h);
}
- if ( splitPaths.size() == originalPaths.size() ) {
- for ( int i = 0; i < originalPaths.size(); i++ ) {
- Assert.assertTrue(splitPaths.get(i).equalSequence(originalPaths.get(i)), "Paths not equal " + splitPaths.get(i) + " vs. original " + originalPaths.get(i));
+ if ( actualKBestHaplotypes.size() == originalKBestHaplotypes.size() ) {
+ for ( int i = 0; i < originalKBestHaplotypes.size(); i++ ) {
+ Assert.assertTrue(actualKBestHaplotypes.get(i).haplotype().getBaseString().equals(originalKBestHaplotypes.get(i).haplotype().getBaseString()), "Paths not equal " + actualKBestHaplotypes.get(i).haplotype() + " vs. original " + originalKBestHaplotypes.get(i).haplotype());
}
}
} catch ( AssertionError e ) {
diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestPathsUnitTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinderUnitTest.java
similarity index 82%
rename from protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestPathsUnitTest.java
rename to protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinderUnitTest.java
index f117f5750..6dc3d5d67 100644
--- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestPathsUnitTest.java
+++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinderUnitTest.java
@@ -66,31 +66,25 @@ import java.util.*;
* Date: 1/31/13
*/
-public class KBestPathsUnitTest extends BaseTest {
- private final static boolean DEBUG = false;
+public class KBestHaplotypeFinderUnitTest extends BaseTest {
@DataProvider(name = "BasicPathFindingData")
public Object[][] makeBasicPathFindingData() {
- List