Optimizations for HC infrastructure

-- outgoingVerticesOf and incomingVerticesOf return a list not a set now, as the corresponding values must be unique since our super directed graph doesn't allow multiple edges between vertices
-- Make DeBruijnGraph, SeqGraph, SeqVertex, and DeBruijnVertex all final
-- Cache HashCode calculation in BaseVertex
-- Better docs before the pruneGraph call
This commit is contained in:
Mark DePristo 2013-03-31 16:57:36 -04:00
parent e916998784
commit 4d389a8234
8 changed files with 22 additions and 9 deletions

View File

@ -185,6 +185,14 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
final SeqGraph seqGraph = deBruijnGraph.convertToSequenceGraph();
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.1.dot"), pruneFactor);
// TODO -- we need to come up with a consistent pruning algorithm. The current pruning algorithm
// TODO -- works well but it doesn't differentiate between an isolated chain that doesn't connect
// TODO -- to anything from one that's actuall has good support along the chain but just happens
// TODO -- to have a connection in the middle that has weight of < pruneFactor. Ultimately
// TODO -- the pruning algorithm really should be an error correction algorithm that knows more
// TODO -- about the structure of the data and can differeniate between an infrequent path but
// TODO -- without evidence against it (such as occurs when a region is hard to get any reads through)
// TODO -- from a error with lots of weight going along another similar path
// the very first thing we need to do is zip up the graph, or pruneGraph will be too aggressive
seqGraph.zipLinearChains();
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.2.zipped.dot"), pruneFactor);

View File

@ -58,6 +58,7 @@ import java.util.Arrays;
*/
public class BaseVertex {
final byte[] sequence;
int cachedHashCode = -1;
/**
* Create a new sequence vertex with sequence
@ -128,8 +129,10 @@ public class BaseVertex {
*/
@Override
public int hashCode() {
// TODO -- optimization, could compute upfront once and cached in debruijn graph
return Arrays.hashCode(sequence);
if ( cachedHashCode == -1 ) {
cachedHashCode = Arrays.hashCode(sequence);
}
return cachedHashCode;
}
@Override

View File

@ -59,7 +59,7 @@ import java.util.Map;
* User: rpoplin
* Date: 2/6/13
*/
public class DeBruijnGraph extends BaseGraph<DeBruijnVertex> {
public final class DeBruijnGraph extends BaseGraph<DeBruijnVertex> {
/**
* Create an empty DeBruijnGraph with default kmer size
*/

View File

@ -54,7 +54,7 @@ import com.google.java.contract.Ensures;
* User: ebanks, mdepristo
* Date: Mar 23, 2011
*/
public class DeBruijnVertex extends BaseVertex {
public final class DeBruijnVertex extends BaseVertex {
private final static byte[][] sufficesAsByteArray = new byte[256][];
static {
for ( int i = 0; i < sufficesAsByteArray.length; i++ )

View File

@ -70,7 +70,7 @@ import java.util.Arrays;
* @author: depristo
* @since 03/2013
*/
public class SeqVertex extends BaseVertex {
public final class SeqVertex extends BaseVertex {
private static int idCounter = 0;
public final int id;

View File

@ -75,7 +75,7 @@ public class SharedSequenceMerger {
if ( graph == null ) throw new IllegalArgumentException("graph cannot be null");
if ( ! graph.vertexSet().contains(v) ) throw new IllegalArgumentException("graph doesn't contain vertex " + v);
final Set<SeqVertex> prevs = graph.incomingVerticesOf(v);
final List<SeqVertex> prevs = graph.incomingVerticesOf(v);
if ( ! canMerge(graph, v, prevs) )
return false;
else {

View File

@ -241,9 +241,11 @@ public class BaseGraphUnitTest extends BaseTest {
graph.printGraph(tmp, 10);
}
private void assertVertexSetEquals(final Set<SeqVertex> actual, final SeqVertex ... expected) {
private void assertVertexSetEquals(final Collection<SeqVertex> actual, final SeqVertex ... expected) {
final Set<SeqVertex> actualSet = new HashSet<SeqVertex>(actual);
Assert.assertEquals(actualSet.size(), actual.size(), "Duplicate elements found in vertex list");
final Set<SeqVertex> expectedSet = expected == null ? Collections.<SeqVertex>emptySet() : new HashSet<SeqVertex>(Arrays.asList(expected));
Assert.assertEquals(actual, expectedSet);
Assert.assertEquals(actualSet, expectedSet);
}
@Test(enabled = true)

View File

@ -58,7 +58,7 @@ import java.util.LinkedList;
import java.util.List;
public class SeqGraphUnitTest extends BaseTest {
private final static boolean DEBUG = true;
private final static boolean DEBUG = false;
private class MergeNodesWithNoVariationTestProvider extends TestDataProvider {
public byte[] sequence;