Optimizations for HC infrastructure
-- outgoingVerticesOf and incomingVerticesOf return a list not a set now, as the corresponding values must be unique since our super directed graph doesn't allow multiple edges between vertices -- Make DeBruijnGraph, SeqGraph, SeqVertex, and DeBruijnVertex all final -- Cache HashCode calculation in BaseVertex -- Better docs before the pruneGraph call
This commit is contained in:
parent
e916998784
commit
4d389a8234
|
|
@ -185,6 +185,14 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
final SeqGraph seqGraph = deBruijnGraph.convertToSequenceGraph();
|
final SeqGraph seqGraph = deBruijnGraph.convertToSequenceGraph();
|
||||||
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.1.dot"), pruneFactor);
|
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.1.dot"), pruneFactor);
|
||||||
|
|
||||||
|
// TODO -- we need to come up with a consistent pruning algorithm. The current pruning algorithm
|
||||||
|
// TODO -- works well but it doesn't differentiate between an isolated chain that doesn't connect
|
||||||
|
// TODO -- to anything from one that's actuall has good support along the chain but just happens
|
||||||
|
// TODO -- to have a connection in the middle that has weight of < pruneFactor. Ultimately
|
||||||
|
// TODO -- the pruning algorithm really should be an error correction algorithm that knows more
|
||||||
|
// TODO -- about the structure of the data and can differeniate between an infrequent path but
|
||||||
|
// TODO -- without evidence against it (such as occurs when a region is hard to get any reads through)
|
||||||
|
// TODO -- from a error with lots of weight going along another similar path
|
||||||
// the very first thing we need to do is zip up the graph, or pruneGraph will be too aggressive
|
// the very first thing we need to do is zip up the graph, or pruneGraph will be too aggressive
|
||||||
seqGraph.zipLinearChains();
|
seqGraph.zipLinearChains();
|
||||||
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.2.zipped.dot"), pruneFactor);
|
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.2.zipped.dot"), pruneFactor);
|
||||||
|
|
|
||||||
|
|
@ -58,6 +58,7 @@ import java.util.Arrays;
|
||||||
*/
|
*/
|
||||||
public class BaseVertex {
|
public class BaseVertex {
|
||||||
final byte[] sequence;
|
final byte[] sequence;
|
||||||
|
int cachedHashCode = -1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new sequence vertex with sequence
|
* Create a new sequence vertex with sequence
|
||||||
|
|
@ -128,8 +129,10 @@ public class BaseVertex {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
// TODO -- optimization, could compute upfront once and cached in debruijn graph
|
if ( cachedHashCode == -1 ) {
|
||||||
return Arrays.hashCode(sequence);
|
cachedHashCode = Arrays.hashCode(sequence);
|
||||||
|
}
|
||||||
|
return cachedHashCode;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ import java.util.Map;
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
* Date: 2/6/13
|
* Date: 2/6/13
|
||||||
*/
|
*/
|
||||||
public class DeBruijnGraph extends BaseGraph<DeBruijnVertex> {
|
public final class DeBruijnGraph extends BaseGraph<DeBruijnVertex> {
|
||||||
/**
|
/**
|
||||||
* Create an empty DeBruijnGraph with default kmer size
|
* Create an empty DeBruijnGraph with default kmer size
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,7 @@ import com.google.java.contract.Ensures;
|
||||||
* User: ebanks, mdepristo
|
* User: ebanks, mdepristo
|
||||||
* Date: Mar 23, 2011
|
* Date: Mar 23, 2011
|
||||||
*/
|
*/
|
||||||
public class DeBruijnVertex extends BaseVertex {
|
public final class DeBruijnVertex extends BaseVertex {
|
||||||
private final static byte[][] sufficesAsByteArray = new byte[256][];
|
private final static byte[][] sufficesAsByteArray = new byte[256][];
|
||||||
static {
|
static {
|
||||||
for ( int i = 0; i < sufficesAsByteArray.length; i++ )
|
for ( int i = 0; i < sufficesAsByteArray.length; i++ )
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,7 @@ import java.util.Arrays;
|
||||||
* @author: depristo
|
* @author: depristo
|
||||||
* @since 03/2013
|
* @since 03/2013
|
||||||
*/
|
*/
|
||||||
public class SeqVertex extends BaseVertex {
|
public final class SeqVertex extends BaseVertex {
|
||||||
private static int idCounter = 0;
|
private static int idCounter = 0;
|
||||||
public final int id;
|
public final int id;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -75,7 +75,7 @@ public class SharedSequenceMerger {
|
||||||
if ( graph == null ) throw new IllegalArgumentException("graph cannot be null");
|
if ( graph == null ) throw new IllegalArgumentException("graph cannot be null");
|
||||||
if ( ! graph.vertexSet().contains(v) ) throw new IllegalArgumentException("graph doesn't contain vertex " + v);
|
if ( ! graph.vertexSet().contains(v) ) throw new IllegalArgumentException("graph doesn't contain vertex " + v);
|
||||||
|
|
||||||
final Set<SeqVertex> prevs = graph.incomingVerticesOf(v);
|
final List<SeqVertex> prevs = graph.incomingVerticesOf(v);
|
||||||
if ( ! canMerge(graph, v, prevs) )
|
if ( ! canMerge(graph, v, prevs) )
|
||||||
return false;
|
return false;
|
||||||
else {
|
else {
|
||||||
|
|
|
||||||
|
|
@ -241,9 +241,11 @@ public class BaseGraphUnitTest extends BaseTest {
|
||||||
graph.printGraph(tmp, 10);
|
graph.printGraph(tmp, 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void assertVertexSetEquals(final Set<SeqVertex> actual, final SeqVertex ... expected) {
|
private void assertVertexSetEquals(final Collection<SeqVertex> actual, final SeqVertex ... expected) {
|
||||||
|
final Set<SeqVertex> actualSet = new HashSet<SeqVertex>(actual);
|
||||||
|
Assert.assertEquals(actualSet.size(), actual.size(), "Duplicate elements found in vertex list");
|
||||||
final Set<SeqVertex> expectedSet = expected == null ? Collections.<SeqVertex>emptySet() : new HashSet<SeqVertex>(Arrays.asList(expected));
|
final Set<SeqVertex> expectedSet = expected == null ? Collections.<SeqVertex>emptySet() : new HashSet<SeqVertex>(Arrays.asList(expected));
|
||||||
Assert.assertEquals(actual, expectedSet);
|
Assert.assertEquals(actualSet, expectedSet);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,7 @@ import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public class SeqGraphUnitTest extends BaseTest {
|
public class SeqGraphUnitTest extends BaseTest {
|
||||||
private final static boolean DEBUG = true;
|
private final static boolean DEBUG = false;
|
||||||
|
|
||||||
private class MergeNodesWithNoVariationTestProvider extends TestDataProvider {
|
private class MergeNodesWithNoVariationTestProvider extends TestDataProvider {
|
||||||
public byte[] sequence;
|
public byte[] sequence;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue