Intermediate commit of new bubble assembly graph traversal algorithm for the HaplotypeCaller. Adding functionality for a path from an assembly graph to calculate its own cigar string from each of the bubbles instead of doing a massive Smith-Waterman alignment between the path's full base composition and the reference.

This commit is contained in:
Ryan Poplin 2013-01-31 11:32:19 -05:00
parent 495bca3d1a
commit ac033ce41a
6 changed files with 496 additions and 39 deletions

View File

@ -86,7 +86,7 @@ public class DeBruijnEdge {
multiplicity = value;
}
public boolean getIsRef() {
public boolean isRef() {
return isRef;
}

View File

@ -46,8 +46,14 @@
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.SWPairwiseAlignment;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.jgrapht.graph.DefaultDirectedGraph;
import java.io.Serializable;
@ -55,7 +61,7 @@ import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: ebanks
* User: ebanks, rpoplin
* Date: Mar 23, 2011
*/
// Class for finding the K best paths (as determined by the sum of multiplicities of the edges) in a graph.
@ -72,28 +78,47 @@ public class KBestPaths {
protected static class Path {
// the last vertex seen in the path
private DeBruijnVertex lastVertex;
private final DeBruijnVertex lastVertex;
// the list of edges comprising the path
private ArrayList<DeBruijnEdge> edges;
private final List<DeBruijnEdge> edges;
// the scores for the path
private int totalScore = 0, lowestEdge = -1;
private final int totalScore;
private final int lowestEdge;
public Path( final DeBruijnVertex initialVertex ) {
// the graph from which this path originated
private final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph;
// used in the bubble state machine to apply Smith-Waterman to the bubble sequence
private final double SW_MATCH = 15.0;
private final double SW_MISMATCH = -15.0;
private final double SW_GAP = -25.0;
private final double SW_GAP_EXTEND = -1.2;
public Path( final DeBruijnVertex initialVertex, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
lastVertex = initialVertex;
edges = new ArrayList<DeBruijnEdge>(0);
totalScore = 0;
lowestEdge = -1;
this.graph = graph;
}
public Path( final Path p, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge ) {
lastVertex = graph.getEdgeTarget(edge);
public Path( final Path p, final DeBruijnEdge edge ) {
graph = p.graph;
lastVertex = p.graph.getEdgeTarget(edge);
edges = new ArrayList<DeBruijnEdge>(p.edges);
edges.add(edge);
totalScore = p.totalScore + edge.getMultiplicity();
lowestEdge = ( p.lowestEdge == -1 ) ? edge.getMultiplicity() : Math.min(p.lowestEdge, edge.getMultiplicity());
}
public boolean containsEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge ) {
/**
* Does this path contain the given edge
* @param edge the given edge to test
* @return true if the edge is found in this path
*/
public boolean containsEdge( final DeBruijnEdge edge ) {
final DeBruijnVertex targetVertex = graph.getEdgeTarget(edge);
for( final DeBruijnEdge e : edges ) {
if( e.equals(graph, edge) || graph.getEdgeTarget(e).equals(targetVertex) ) {
@ -104,7 +129,7 @@ public class KBestPaths {
return false;
}
public ArrayList<DeBruijnEdge> getEdges() { return edges; }
public List<DeBruijnEdge> getEdges() { return edges; }
public int getScore() { return totalScore; }
@ -112,7 +137,12 @@ public class KBestPaths {
public DeBruijnVertex getLastVertexInPath() { return lastVertex; }
public byte[] getBases( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
/**
* The base sequence for this path. Pull the full sequence for the source of the path and then the suffix for all subsequent nodes
* @return non-null sequence of bases corresponding to this path
*/
@Ensures({"result != null"})
public byte[] getBases() {
if( edges.size() == 0 ) { return lastVertex.getSequence(); }
byte[] bases = graph.getEdgeSource( edges.get(0) ).getSequence();
@ -121,6 +151,142 @@ public class KBestPaths {
}
return bases;
}
/**
* Calculate the cigar string for this path using a bubble traversal of the assembly graph and running a Smith-Waterman alignment on each bubble
*/
@Ensures("result != null")
public Cigar calculateCigar() {
final Cigar cigar = new Cigar();
// special case for paths that start on reference but not at the reference source node
if( edges.get(0).isRef() && !isRefSource(graph, edges.get(0)) ) {
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, null, null, graph.getEdgeSource(edges.get(0))).getCigarElements() ) {
cigar.add(ce);
}
}
// reset the bubble state machine
final BubbleStateMachine bsm = new BubbleStateMachine(cigar);
for( final DeBruijnEdge e : edges ) {
if( e.equals(graph, edges.get(0)) ) {
advanceBubbleStateMachine( bsm, graph, graph.getEdgeSource(e), null );
}
advanceBubbleStateMachine( bsm, graph, graph.getEdgeTarget(e), e );
}
// special case for paths that don't end on reference
if( bsm.inBubble ) {
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, bsm.lastSeenReferenceNode, null).getCigarElements() ) {
bsm.cigar.add(ce);
}
} else if( edges.get(edges.size()-1).isRef() && !isRefSink(graph, edges.get(edges.size()-1)) ) { // special case for paths that end of the reference but haven't completed the entire reference circuit
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, graph.getEdgeTarget(edges.get(edges.size()-1)), null).getCigarElements() ) {
bsm.cigar.add(ce);
}
}
return AlignmentUtils.consolidateCigar(bsm.cigar);
}
private void advanceBubbleStateMachine( final BubbleStateMachine bsm, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex node, final DeBruijnEdge e ) {
if( isReferenceNode( graph, node ) ) {
if( !bsm.inBubble ) { // just add the ref bases as M's in the Cigar string, and don't do anything else
if( e !=null && !e.isRef() ) {
if( referencePathExists( graph, graph.getEdgeSource(e), node) ) {
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, null, graph.getEdgeSource(e), node).getCigarElements() ) {
bsm.cigar.add(ce);
}
bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) );
} else if ( graph.getEdgeSource(e).equals(graph.getEdgeTarget(e)) ) { // alt edge at ref node points to itself
bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.I) );
} else {
bsm.inBubble = true;
bsm.bubbleBytes = null;
bsm.lastSeenReferenceNode = graph.getEdgeSource(e);
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
}
} else {
bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) );
}
} else if( bsm.lastSeenReferenceNode != null && !referencePathExists( graph, bsm.lastSeenReferenceNode, node ) ) { // add bases to the bubble string until we get back to the reference path
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
} else { // close the bubble and use a local SW to determine the Cigar string
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, bsm.lastSeenReferenceNode, node).getCigarElements() ) {
bsm.cigar.add(ce);
}
bsm.inBubble = false;
bsm.bubbleBytes = null;
bsm.lastSeenReferenceNode = null;
bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) );
}
} else { // non-ref vertex
if( bsm.inBubble ) { // just keep accumulating until we get back to the reference path
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
} else { // open up a bubble
bsm.inBubble = true;
bsm.bubbleBytes = null;
bsm.lastSeenReferenceNode = (e != null ? graph.getEdgeSource(e) : null );
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
}
}
}
private Cigar calculateCigarForCompleteBubble( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final byte[] bubbleBytes, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex ) {
final byte[] refBytes = getReferenceBytes(graph, fromVertex, toVertex);
final Cigar cigar = new Cigar();
// add padding to anchor ref/alt bases in the SW matrix
byte[] padding = "XXXXXX".getBytes();
boolean goodAlignment = false;
SWPairwiseAlignment swConsensus = null;
while( !goodAlignment && padding.length < 1000 ) {
padding = ArrayUtils.addAll(padding, padding); // double the size of the padding each time
final byte[] reference = ArrayUtils.addAll( ArrayUtils.addAll(padding, refBytes), padding );
final byte[] alternate = ArrayUtils.addAll( ArrayUtils.addAll(padding, bubbleBytes), padding );
swConsensus = new SWPairwiseAlignment( reference, alternate, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
if( swConsensus.getAlignmentStart2wrt1() == 0 && !swConsensus.getCigar().toString().contains("S") && swConsensus.getCigar().getReferenceLength() == reference.length ) {
goodAlignment = true;
}
}
if( !goodAlignment && swConsensus != null ) {
throw new ReviewedStingException("SmithWaterman offset failure: " + (refBytes == null ? "-" : new String(refBytes)) + " against " + new String(bubbleBytes) + " = " + swConsensus.getCigar());
}
if( swConsensus != null ) {
final Cigar swCigar = swConsensus.getCigar();
for( int iii = 0; iii < swCigar.numCigarElements(); iii++ ) {
// now we need to remove the padding from the cigar string
int length = swCigar.getCigarElement(iii).getLength();
if( iii == 0 ) { length -= padding.length; }
if( iii == swCigar.numCigarElements() - 1 ) { length -= padding.length; }
if( length > 0 ) {
cigar.add( new CigarElement(length, swCigar.getCigarElement(iii).getOperator()) );
}
}
if( (refBytes == null && cigar.getReferenceLength() != 0) || ( refBytes != null && cigar.getReferenceLength() != refBytes.length ) ) {
throw new ReviewedStingException("SmithWaterman cigar failure: " + (refBytes == null ? "-" : new String(refBytes)) + " against " + new String(bubbleBytes) + " = " + swConsensus.getCigar());
}
}
return cigar;
}
// class to keep track of the bubble state machine
protected static class BubbleStateMachine {
public boolean inBubble = false;
public byte[] bubbleBytes = null;
public DeBruijnVertex lastSeenReferenceNode = null;
public Cigar cigar = null;
public BubbleStateMachine( final Cigar initialCigar ) {
inBubble = false;
bubbleBytes = null;
lastSeenReferenceNode = null;
cigar = initialCigar;
}
}
}
protected static class PathComparatorTotalScore implements Comparator<Path>, Serializable {
@ -130,12 +296,12 @@ public class KBestPaths {
}
}
//protected static class PathComparatorLowestEdge implements Comparator<Path>, Serializable {
// @Override
// public int compare(final Path path1, final Path path2) {
// return path2.lowestEdge - path1.lowestEdge;
// }
//}
protected static class PathComparatorLowestEdge implements Comparator<Path>, Serializable {
@Override
public int compare(final Path path1, final Path path2) {
return path2.lowestEdge - path1.lowestEdge;
}
}
public static List<Path> getKBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int k ) {
if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); }
@ -144,7 +310,7 @@ public class KBestPaths {
// run a DFS for best paths
for( final DeBruijnVertex v : graph.vertexSet() ) {
if( graph.inDegreeOf(v) == 0 ) {
findBestPaths(graph, new Path(v), bestPaths);
findBestPaths(new Path(v, graph), bestPaths);
}
}
@ -153,14 +319,14 @@ public class KBestPaths {
return bestPaths.subList(0, Math.min(k, bestPaths.size()));
}
private static void findBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Path path, final List<Path> bestPaths ) {
findBestPaths(graph, path, bestPaths, new MyInt());
private static void findBestPaths( final Path path, final List<Path> bestPaths ) {
findBestPaths(path, bestPaths, new MyInt());
}
private static void findBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Path path, final List<Path> bestPaths, MyInt n ) {
private static void findBestPaths( final Path path, final List<Path> bestPaths, final MyInt n ) {
// did we hit the end of a path?
if ( allOutgoingEdgesHaveBeenVisited(graph, path) ) {
if ( allOutgoingEdgesHaveBeenVisited(path) ) {
if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) {
// clean out some low scoring paths
Collections.sort(bestPaths, new PathComparatorTotalScore() );
@ -172,27 +338,253 @@ public class KBestPaths {
} else {
// recursively run DFS
final ArrayList<DeBruijnEdge> edgeArrayList = new ArrayList<DeBruijnEdge>();
edgeArrayList.addAll(graph.outgoingEdgesOf(path.lastVertex));
edgeArrayList.addAll(path.graph.outgoingEdgesOf(path.lastVertex));
Collections.sort(edgeArrayList, new DeBruijnEdge.EdgeWeightComparator());
Collections.reverse(edgeArrayList);
for ( final DeBruijnEdge edge : edgeArrayList ) {
// make sure the edge is not already in the path
if ( path.containsEdge(graph, edge) )
if ( path.containsEdge(edge) )
continue;
final Path newPath = new Path(path, graph, edge);
final Path newPath = new Path(path, edge);
n.val++;
findBestPaths(graph, newPath, bestPaths, n);
findBestPaths(newPath, bestPaths, n);
}
}
}
private static boolean allOutgoingEdgesHaveBeenVisited( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Path path ) {
for( final DeBruijnEdge edge : graph.outgoingEdgesOf(path.lastVertex) ) {
if( !path.containsEdge(graph, edge) ) {
private static boolean allOutgoingEdgesHaveBeenVisited( final Path path ) {
for( final DeBruijnEdge edge : path.graph.outgoingEdgesOf(path.lastVertex) ) {
if( !path.containsEdge(edge) ) {
return false;
}
}
return true;
}
/****************************************************************
* Collection of graph functions used by KBestPaths *
***************************************************************/
/**
* Test if the vertex is on a reference path in the graph. If so it is referred to as a reference node
* @param graph the graph from which the vertex originated
* @param v the vertex to test
* @return true if the vertex is on the reference path
*/
public static boolean isReferenceNode( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
for( final DeBruijnEdge e : graph.edgesOf(v) ) {
if( e.isRef() ) { return true; }
}
return false;
}
/**
* Is this edge a source edge (the source vertex of the edge is a source node in the graph)
* @param graph the graph from which the edge originated
* @param e the edge to test
* @return true if the source vertex of the edge is a source node in the graph
*/
public static boolean isSource( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
return graph.inDegreeOf(graph.getEdgeSource(e)) == 0;
}
/**
* Is this vertex a source vertex
* @param graph the graph from which the vertex originated
* @param v the vertex to test
* @return true if the vertex is a source vertex
*/
public static boolean isSource( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
return graph.inDegreeOf(v) == 0;
}
/**
* Pull the added base sequence implied by visiting this node in a path
* @param graph the graph from which the vertex originated
* @param v the vertex whose sequence to grab
* @return non-null sequence of bases corresponding to this node in the graph
*/
@Ensures({"result != null"})
public static byte[] getAdditionalSequence( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
return ( isSource(graph, v) ? v.getSequence() : v.getSuffix() );
}
/**
* Is this edge both a reference edge and a source edge for the reference path
* @param graph the graph from which the edge originated
* @param e the edge to test
* @return true if the edge is both a reference edge and a reference path source edge
*/
public static boolean isRefSource( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(graph.getEdgeSource(e)) ) {
if( edgeToTest.isRef() ) { return false; }
}
return true;
}
/**
* Is this vertex both a reference node and a source node for the reference path
* @param graph the graph from which the vertex originated
* @param v the vertex to test
* @return true if the vertex is both a reference node and a reference path source node
*/
public static boolean isRefSource( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(v) ) {
if( edgeToTest.isRef() ) { return false; }
}
return true;
}
/**
* Is this edge both a reference edge and a sink edge for the reference path
* @param graph the graph from which the edge originated
* @param e the edge to test
* @return true if the edge is both a reference edge and a reference path sink edge
*/
public static boolean isRefSink( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(graph.getEdgeTarget(e)) ) {
if( edgeToTest.isRef() ) { return false; }
}
return true;
}
/**
* Is this vertex both a reference node and a sink node for the reference path
* @param graph the graph from which the node originated
* @param v the node to test
* @return true if the vertex is both a reference node and a reference path sink node
*/
public static boolean isRefSink( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(v) ) {
if( edgeToTest.isRef() ) { return false; }
}
return true;
}
public static DeBruijnEdge getReferenceSourceEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
for( final DeBruijnEdge e : graph.edgeSet() ) {
if( e.isRef() && isRefSource(graph, e) ) {
return e;
}
}
throw new ReviewedStingException("All reference graphs should have a source node");
}
public static DeBruijnVertex getReferenceSourceVertex( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
for( final DeBruijnVertex v : graph.vertexSet() ) {
if( isReferenceNode(graph, v) && isRefSource(graph, v) ) {
return v;
}
}
return null;
}
public static DeBruijnEdge getReferenceSinkEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
for( final DeBruijnEdge e : graph.edgeSet() ) {
if( e.isRef() && isRefSink(graph, e) ) {
return e;
}
}
throw new ReviewedStingException("All reference graphs should have a sink node");
}
public static DeBruijnVertex getReferenceSinkVertex( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
for( final DeBruijnVertex v : graph.vertexSet() ) {
if( isReferenceNode(graph, v) && isRefSink(graph, v) ) {
return v;
}
}
throw new ReviewedStingException("All reference graphs should have a sink node");
}
public static DeBruijnEdge getNextReferenceEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
if( e == null ) { return null; }
for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(graph.getEdgeTarget(e)) ) {
if( edgeToTest.isRef() ) {
return edgeToTest;
}
}
return null;
}
public static DeBruijnVertex getNextReferenceVertex( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
if( v == null ) { return null; }
for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(v) ) {
if( edgeToTest.isRef() ) {
return graph.getEdgeTarget(edgeToTest);
}
}
return null;
}
public static DeBruijnEdge getPrevReferenceEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(graph.getEdgeSource(e)) ) {
if( edgeToTest.isRef() ) {
return edgeToTest;
}
}
return null;
}
public static DeBruijnVertex getPrevReferenceVertex( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(v) ) {
if( isReferenceNode(graph, graph.getEdgeSource(edgeToTest)) ) {
return graph.getEdgeSource(edgeToTest);
}
}
return null;
}
public static boolean referencePathExists(final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge fromEdge, final DeBruijnEdge toEdge) {
DeBruijnEdge e = fromEdge;
if( e == null ) {
return false;
}
while( !e.equals(graph, toEdge) ) {
e = getNextReferenceEdge(graph, e);
if( e == null ) {
return false;
}
}
return true;
}
public static boolean referencePathExists(final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex) {
DeBruijnVertex v = fromVertex;
if( v == null ) {
return false;
}
v = getNextReferenceVertex(graph, v);
if( v == null ) {
return false;
}
while( !v.equals(toVertex) ) {
v = getNextReferenceVertex(graph, v);
if( v == null ) {
return false;
}
}
return true;
}
// fromVertex (exclusive) -> toVertex (exclusive)
public static byte[] getReferenceBytes( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex ) {
byte[] bytes = null;
if( fromVertex != null && toVertex != null && !referencePathExists(graph, fromVertex, toVertex) ) {
throw new ReviewedStingException("Asked for a reference path which doesn't exist. " + fromVertex + " --> " + toVertex);
}
DeBruijnVertex v = fromVertex;
if( v == null ) {
v = getReferenceSourceVertex(graph);
bytes = ArrayUtils.addAll( bytes, getAdditionalSequence(graph, v) );
}
v = getNextReferenceVertex(graph, v);
while( (toVertex != null && !v.equals(toVertex)) || (toVertex == null && v != null) ) {
bytes = ArrayUtils.addAll( bytes, getAdditionalSequence(graph, v) );
// advance along the reference path
v = getNextReferenceVertex(graph, v);
}
return bytes;
}
}

View File

@ -152,10 +152,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
final DeBruijnVertex addedVertex = new DeBruijnVertex( ArrayUtils.addAll(incomingVertex.getSequence(), outgoingVertex.getSuffix()), outgoingVertex.kmer );
graph.addVertex(addedVertex);
for( final DeBruijnEdge edge : outEdges ) {
graph.addEdge(addedVertex, graph.getEdgeTarget(edge), new DeBruijnEdge(edge.getIsRef(), edge.getMultiplicity()));
graph.addEdge(addedVertex, graph.getEdgeTarget(edge), new DeBruijnEdge(edge.isRef(), edge.getMultiplicity()));
}
for( final DeBruijnEdge edge : inEdges ) {
graph.addEdge(graph.getEdgeSource(edge), addedVertex, new DeBruijnEdge(edge.getIsRef(), edge.getMultiplicity()));
graph.addEdge(graph.getEdgeSource(edge), addedVertex, new DeBruijnEdge(edge.isRef(), edge.getMultiplicity()));
}
graph.removeVertex( incomingVertex );
@ -170,7 +170,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
protected static void pruneGraph( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int pruneFactor ) {
final List<DeBruijnEdge> edgesToRemove = new ArrayList<DeBruijnEdge>();
for( final DeBruijnEdge e : graph.edgeSet() ) {
if( e.getMultiplicity() <= pruneFactor && !e.getIsRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor
if( e.getMultiplicity() <= pruneFactor && !e.isRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor
edgesToRemove.add(e);
}
}
@ -195,7 +195,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
if( graph.inDegreeOf(v) == 0 || graph.outDegreeOf(v) == 0 ) {
boolean isRefNode = false;
for( final DeBruijnEdge e : graph.edgesOf(v) ) {
if( e.getIsRef() ) {
if( e.isRef() ) {
isRefNode = true;
break;
}
@ -299,10 +299,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
if( edge.getMultiplicity() > PRUNE_FACTOR ) {
GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [" + (edge.getMultiplicity() <= PRUNE_FACTOR ? "style=dotted,color=grey" : "label=\""+ edge.getMultiplicity() +"\"") + "];");
}
if( edge.getIsRef() ) {
if( edge.isRef() ) {
GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [color=red];");
}
if( !edge.getIsRef() && edge.getMultiplicity() <= PRUNE_FACTOR ) { System.out.println("Graph pruning warning!"); }
if( !edge.isRef() && edge.getMultiplicity() <= PRUNE_FACTOR ) { System.out.println("Graph pruning warning!"); }
}
for( final DeBruijnVertex v : graph.vertexSet() ) {
final String label = ( graph.inDegreeOf(v) == 0 ? v.toString() : v.getSuffixString() );
@ -338,7 +338,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) {
final Haplotype h = new Haplotype( path.getBases( graph ) );
final Haplotype h = new Haplotype( path.getBases() );
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false ) ) {
// for GGA mode, add the desired allele into the haplotype if it isn't already present

View File

@ -201,7 +201,7 @@ public class RecalDatum {
* Returns the error rate (in real space) of this interval, or 0 if there are no observations
* @return the empirical error rate ~= N errors / N obs
*/
@Ensures("result >= 0.0")
@Ensures({"result >= 0.0"})
public double getEmpiricalErrorRate() {
if ( numObservations == 0 )
return 0.0;

View File

@ -25,6 +25,8 @@
package org.broadinstitute.sting.utils.sam;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
@ -499,6 +501,29 @@ public class AlignmentUtils {
return true;
}
/**
* Need a well-formed, consolidated Cigar string so that the left aligning code works properly.
* For example, 1M1M1M1D2M1M --> 3M1D3M
* If the given cigar is empty then the returned cigar will also be empty
* @param c the cigar to consolidate
* @return a cigar with consecutive matching operators merged into single operators.
*/
@Requires({"c != null"})
@Ensures({"result != null"})
public static Cigar consolidateCigar( final Cigar c ) {
if( c.isEmpty() ) { return c; }
final Cigar returnCigar = new Cigar();
int sumLength = 0;
for( int iii = 0; iii < c.numCigarElements(); iii++ ) {
sumLength += c.getCigarElement(iii).getLength();
if( iii == c.numCigarElements() - 1 || !c.getCigarElement(iii).getOperator().equals(c.getCigarElement(iii+1).getOperator())) { // at the end so finish the current element
returnCigar.add(new CigarElement(sumLength, c.getCigarElement(iii).getOperator()));
sumLength = 0;
}
}
return returnCigar;
}
/**
* Takes the alignment of the read sequence <code>readSeq</code> to the reference sequence <code>refSeq</code>
* starting at 0-based position <code>refIndex</code> on the <code>refSeq</code> and specified by its <code>cigar</code>.

View File

@ -26,12 +26,14 @@
package org.broadinstitute.sting.utils.sam;
import junit.framework.Assert;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.*;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.List;
public class AlignmentUtilsUnitTest {
private SAMFileHeader header;
@ -121,4 +123,42 @@ public class AlignmentUtilsUnitTest {
start,
ArtificialSAMUtils.DEFAULT_READ_LENGTH);
}
@Test
public void testConsolidateCigar() {
{
//1M1M1M1D2M1M --> 3M1D3M
List<CigarElement> list = new ArrayList<CigarElement>();
list.add( new CigarElement(1, CigarOperator.M));
list.add( new CigarElement(1, CigarOperator.M));
list.add( new CigarElement(1, CigarOperator.M));
list.add( new CigarElement(1, CigarOperator.D));
list.add( new CigarElement(2, CigarOperator.M));
list.add( new CigarElement(1, CigarOperator.M));
Cigar unconsolidatedCigar = new Cigar(list);
list.clear();
list.add( new CigarElement(3, CigarOperator.M));
list.add( new CigarElement(1, CigarOperator.D));
list.add( new CigarElement(3, CigarOperator.M));
Cigar consolidatedCigar = new Cigar(list);
Assert.assertEquals(consolidatedCigar.toString(), AlignmentUtils.consolidateCigar(unconsolidatedCigar).toString());
}
{
//6M6M6M --> 18M
List<CigarElement> list = new ArrayList<CigarElement>();
list.add( new CigarElement(6, CigarOperator.M));
list.add( new CigarElement(6, CigarOperator.M));
list.add( new CigarElement(6, CigarOperator.M));
Cigar unconsolidatedCigar = new Cigar(list);
list.clear();
list.add( new CigarElement(18, CigarOperator.M));
Cigar consolidatedCigar = new Cigar(list);
Assert.assertEquals(consolidatedCigar.toString(), AlignmentUtils.consolidateCigar(unconsolidatedCigar).toString());
}
}
}