Optimizations for HC infrastructure
-- outgoingVerticesOf and incomingVerticesOf return a list not a set now, as the corresponding values must be unique since our super directed graph doesn't allow multiple edges between vertices -- Make DeBruijnGraph, SeqGraph, SeqVertex, and DeBruijnVertex all final -- Cache HashCode calculation in BaseVertex -- Better docs before the pruneGraph call
This commit is contained in:
parent
e916998784
commit
4d389a8234
|
|
@ -185,6 +185,14 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
|
|||
final SeqGraph seqGraph = deBruijnGraph.convertToSequenceGraph();
|
||||
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.1.dot"), pruneFactor);
|
||||
|
||||
// TODO -- we need to come up with a consistent pruning algorithm. The current pruning algorithm
|
||||
// TODO -- works well but it doesn't differentiate between an isolated chain that doesn't connect
|
||||
// TODO -- to anything from one that's actuall has good support along the chain but just happens
|
||||
// TODO -- to have a connection in the middle that has weight of < pruneFactor. Ultimately
|
||||
// TODO -- the pruning algorithm really should be an error correction algorithm that knows more
|
||||
// TODO -- about the structure of the data and can differeniate between an infrequent path but
|
||||
// TODO -- without evidence against it (such as occurs when a region is hard to get any reads through)
|
||||
// TODO -- from a error with lots of weight going along another similar path
|
||||
// the very first thing we need to do is zip up the graph, or pruneGraph will be too aggressive
|
||||
seqGraph.zipLinearChains();
|
||||
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.2.zipped.dot"), pruneFactor);
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@ import java.util.Arrays;
|
|||
*/
|
||||
public class BaseVertex {
|
||||
final byte[] sequence;
|
||||
int cachedHashCode = -1;
|
||||
|
||||
/**
|
||||
* Create a new sequence vertex with sequence
|
||||
|
|
@ -128,8 +129,10 @@ public class BaseVertex {
|
|||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
// TODO -- optimization, could compute upfront once and cached in debruijn graph
|
||||
return Arrays.hashCode(sequence);
|
||||
if ( cachedHashCode == -1 ) {
|
||||
cachedHashCode = Arrays.hashCode(sequence);
|
||||
}
|
||||
return cachedHashCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ import java.util.Map;
|
|||
* User: rpoplin
|
||||
* Date: 2/6/13
|
||||
*/
|
||||
public class DeBruijnGraph extends BaseGraph<DeBruijnVertex> {
|
||||
public final class DeBruijnGraph extends BaseGraph<DeBruijnVertex> {
|
||||
/**
|
||||
* Create an empty DeBruijnGraph with default kmer size
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ import com.google.java.contract.Ensures;
|
|||
* User: ebanks, mdepristo
|
||||
* Date: Mar 23, 2011
|
||||
*/
|
||||
public class DeBruijnVertex extends BaseVertex {
|
||||
public final class DeBruijnVertex extends BaseVertex {
|
||||
private final static byte[][] sufficesAsByteArray = new byte[256][];
|
||||
static {
|
||||
for ( int i = 0; i < sufficesAsByteArray.length; i++ )
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ import java.util.Arrays;
|
|||
* @author: depristo
|
||||
* @since 03/2013
|
||||
*/
|
||||
public class SeqVertex extends BaseVertex {
|
||||
public final class SeqVertex extends BaseVertex {
|
||||
private static int idCounter = 0;
|
||||
public final int id;
|
||||
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ public class SharedSequenceMerger {
|
|||
if ( graph == null ) throw new IllegalArgumentException("graph cannot be null");
|
||||
if ( ! graph.vertexSet().contains(v) ) throw new IllegalArgumentException("graph doesn't contain vertex " + v);
|
||||
|
||||
final Set<SeqVertex> prevs = graph.incomingVerticesOf(v);
|
||||
final List<SeqVertex> prevs = graph.incomingVerticesOf(v);
|
||||
if ( ! canMerge(graph, v, prevs) )
|
||||
return false;
|
||||
else {
|
||||
|
|
|
|||
|
|
@ -241,9 +241,11 @@ public class BaseGraphUnitTest extends BaseTest {
|
|||
graph.printGraph(tmp, 10);
|
||||
}
|
||||
|
||||
private void assertVertexSetEquals(final Set<SeqVertex> actual, final SeqVertex ... expected) {
|
||||
private void assertVertexSetEquals(final Collection<SeqVertex> actual, final SeqVertex ... expected) {
|
||||
final Set<SeqVertex> actualSet = new HashSet<SeqVertex>(actual);
|
||||
Assert.assertEquals(actualSet.size(), actual.size(), "Duplicate elements found in vertex list");
|
||||
final Set<SeqVertex> expectedSet = expected == null ? Collections.<SeqVertex>emptySet() : new HashSet<SeqVertex>(Arrays.asList(expected));
|
||||
Assert.assertEquals(actual, expectedSet);
|
||||
Assert.assertEquals(actualSet, expectedSet);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ import java.util.LinkedList;
|
|||
import java.util.List;
|
||||
|
||||
public class SeqGraphUnitTest extends BaseTest {
|
||||
private final static boolean DEBUG = true;
|
||||
private final static boolean DEBUG = false;
|
||||
|
||||
private class MergeNodesWithNoVariationTestProvider extends TestDataProvider {
|
||||
public byte[] sequence;
|
||||
|
|
|
|||
Loading…
Reference in New Issue