Intermediate commit of new bubble assembly graph traversal algorithm for the HaplotypeCaller. Adding functionality for a path from an assembly graph to calculate its own cigar string from each of the bubbles instead of doing a massive Smith-Waterman alignment between the path's full base composition and the reference.
This commit is contained in:
parent
495bca3d1a
commit
ac033ce41a
|
|
@ -86,7 +86,7 @@ public class DeBruijnEdge {
|
||||||
multiplicity = value;
|
multiplicity = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean getIsRef() {
|
public boolean isRef() {
|
||||||
return isRef;
|
return isRef;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -46,8 +46,14 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import net.sf.samtools.Cigar;
|
||||||
|
import net.sf.samtools.CigarElement;
|
||||||
|
import net.sf.samtools.CigarOperator;
|
||||||
import org.apache.commons.lang.ArrayUtils;
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
|
import org.broadinstitute.sting.utils.SWPairwiseAlignment;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||||
import org.jgrapht.graph.DefaultDirectedGraph;
|
import org.jgrapht.graph.DefaultDirectedGraph;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
@ -55,7 +61,7 @@ import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: ebanks
|
* User: ebanks, rpoplin
|
||||||
* Date: Mar 23, 2011
|
* Date: Mar 23, 2011
|
||||||
*/
|
*/
|
||||||
// Class for finding the K best paths (as determined by the sum of multiplicities of the edges) in a graph.
|
// Class for finding the K best paths (as determined by the sum of multiplicities of the edges) in a graph.
|
||||||
|
|
@ -72,28 +78,47 @@ public class KBestPaths {
|
||||||
protected static class Path {
|
protected static class Path {
|
||||||
|
|
||||||
// the last vertex seen in the path
|
// the last vertex seen in the path
|
||||||
private DeBruijnVertex lastVertex;
|
private final DeBruijnVertex lastVertex;
|
||||||
|
|
||||||
// the list of edges comprising the path
|
// the list of edges comprising the path
|
||||||
private ArrayList<DeBruijnEdge> edges;
|
private final List<DeBruijnEdge> edges;
|
||||||
|
|
||||||
// the scores for the path
|
// the scores for the path
|
||||||
private int totalScore = 0, lowestEdge = -1;
|
private final int totalScore;
|
||||||
|
private final int lowestEdge;
|
||||||
|
|
||||||
public Path( final DeBruijnVertex initialVertex ) {
|
// the graph from which this path originated
|
||||||
|
private final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph;
|
||||||
|
|
||||||
|
// used in the bubble state machine to apply Smith-Waterman to the bubble sequence
|
||||||
|
private final double SW_MATCH = 15.0;
|
||||||
|
private final double SW_MISMATCH = -15.0;
|
||||||
|
private final double SW_GAP = -25.0;
|
||||||
|
private final double SW_GAP_EXTEND = -1.2;
|
||||||
|
|
||||||
|
public Path( final DeBruijnVertex initialVertex, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
|
||||||
lastVertex = initialVertex;
|
lastVertex = initialVertex;
|
||||||
edges = new ArrayList<DeBruijnEdge>(0);
|
edges = new ArrayList<DeBruijnEdge>(0);
|
||||||
|
totalScore = 0;
|
||||||
|
lowestEdge = -1;
|
||||||
|
this.graph = graph;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Path( final Path p, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge ) {
|
public Path( final Path p, final DeBruijnEdge edge ) {
|
||||||
lastVertex = graph.getEdgeTarget(edge);
|
graph = p.graph;
|
||||||
|
lastVertex = p.graph.getEdgeTarget(edge);
|
||||||
edges = new ArrayList<DeBruijnEdge>(p.edges);
|
edges = new ArrayList<DeBruijnEdge>(p.edges);
|
||||||
edges.add(edge);
|
edges.add(edge);
|
||||||
totalScore = p.totalScore + edge.getMultiplicity();
|
totalScore = p.totalScore + edge.getMultiplicity();
|
||||||
lowestEdge = ( p.lowestEdge == -1 ) ? edge.getMultiplicity() : Math.min(p.lowestEdge, edge.getMultiplicity());
|
lowestEdge = ( p.lowestEdge == -1 ) ? edge.getMultiplicity() : Math.min(p.lowestEdge, edge.getMultiplicity());
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean containsEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge ) {
|
/**
|
||||||
|
* Does this path contain the given edge
|
||||||
|
* @param edge the given edge to test
|
||||||
|
* @return true if the edge is found in this path
|
||||||
|
*/
|
||||||
|
public boolean containsEdge( final DeBruijnEdge edge ) {
|
||||||
final DeBruijnVertex targetVertex = graph.getEdgeTarget(edge);
|
final DeBruijnVertex targetVertex = graph.getEdgeTarget(edge);
|
||||||
for( final DeBruijnEdge e : edges ) {
|
for( final DeBruijnEdge e : edges ) {
|
||||||
if( e.equals(graph, edge) || graph.getEdgeTarget(e).equals(targetVertex) ) {
|
if( e.equals(graph, edge) || graph.getEdgeTarget(e).equals(targetVertex) ) {
|
||||||
|
|
@ -104,7 +129,7 @@ public class KBestPaths {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ArrayList<DeBruijnEdge> getEdges() { return edges; }
|
public List<DeBruijnEdge> getEdges() { return edges; }
|
||||||
|
|
||||||
public int getScore() { return totalScore; }
|
public int getScore() { return totalScore; }
|
||||||
|
|
||||||
|
|
@ -112,7 +137,12 @@ public class KBestPaths {
|
||||||
|
|
||||||
public DeBruijnVertex getLastVertexInPath() { return lastVertex; }
|
public DeBruijnVertex getLastVertexInPath() { return lastVertex; }
|
||||||
|
|
||||||
public byte[] getBases( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
|
/**
|
||||||
|
* The base sequence for this path. Pull the full sequence for the source of the path and then the suffix for all subsequent nodes
|
||||||
|
* @return non-null sequence of bases corresponding to this path
|
||||||
|
*/
|
||||||
|
@Ensures({"result != null"})
|
||||||
|
public byte[] getBases() {
|
||||||
if( edges.size() == 0 ) { return lastVertex.getSequence(); }
|
if( edges.size() == 0 ) { return lastVertex.getSequence(); }
|
||||||
|
|
||||||
byte[] bases = graph.getEdgeSource( edges.get(0) ).getSequence();
|
byte[] bases = graph.getEdgeSource( edges.get(0) ).getSequence();
|
||||||
|
|
@ -121,6 +151,142 @@ public class KBestPaths {
|
||||||
}
|
}
|
||||||
return bases;
|
return bases;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate the cigar string for this path using a bubble traversal of the assembly graph and running a Smith-Waterman alignment on each bubble
|
||||||
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
|
public Cigar calculateCigar() {
|
||||||
|
|
||||||
|
final Cigar cigar = new Cigar();
|
||||||
|
// special case for paths that start on reference but not at the reference source node
|
||||||
|
if( edges.get(0).isRef() && !isRefSource(graph, edges.get(0)) ) {
|
||||||
|
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, null, null, graph.getEdgeSource(edges.get(0))).getCigarElements() ) {
|
||||||
|
cigar.add(ce);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// reset the bubble state machine
|
||||||
|
final BubbleStateMachine bsm = new BubbleStateMachine(cigar);
|
||||||
|
|
||||||
|
for( final DeBruijnEdge e : edges ) {
|
||||||
|
if( e.equals(graph, edges.get(0)) ) {
|
||||||
|
advanceBubbleStateMachine( bsm, graph, graph.getEdgeSource(e), null );
|
||||||
|
}
|
||||||
|
advanceBubbleStateMachine( bsm, graph, graph.getEdgeTarget(e), e );
|
||||||
|
}
|
||||||
|
|
||||||
|
// special case for paths that don't end on reference
|
||||||
|
if( bsm.inBubble ) {
|
||||||
|
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, bsm.lastSeenReferenceNode, null).getCigarElements() ) {
|
||||||
|
bsm.cigar.add(ce);
|
||||||
|
}
|
||||||
|
} else if( edges.get(edges.size()-1).isRef() && !isRefSink(graph, edges.get(edges.size()-1)) ) { // special case for paths that end of the reference but haven't completed the entire reference circuit
|
||||||
|
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, graph.getEdgeTarget(edges.get(edges.size()-1)), null).getCigarElements() ) {
|
||||||
|
bsm.cigar.add(ce);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return AlignmentUtils.consolidateCigar(bsm.cigar);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void advanceBubbleStateMachine( final BubbleStateMachine bsm, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex node, final DeBruijnEdge e ) {
|
||||||
|
if( isReferenceNode( graph, node ) ) {
|
||||||
|
if( !bsm.inBubble ) { // just add the ref bases as M's in the Cigar string, and don't do anything else
|
||||||
|
if( e !=null && !e.isRef() ) {
|
||||||
|
if( referencePathExists( graph, graph.getEdgeSource(e), node) ) {
|
||||||
|
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, null, graph.getEdgeSource(e), node).getCigarElements() ) {
|
||||||
|
bsm.cigar.add(ce);
|
||||||
|
}
|
||||||
|
bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) );
|
||||||
|
} else if ( graph.getEdgeSource(e).equals(graph.getEdgeTarget(e)) ) { // alt edge at ref node points to itself
|
||||||
|
bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.I) );
|
||||||
|
} else {
|
||||||
|
bsm.inBubble = true;
|
||||||
|
bsm.bubbleBytes = null;
|
||||||
|
bsm.lastSeenReferenceNode = graph.getEdgeSource(e);
|
||||||
|
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) );
|
||||||
|
}
|
||||||
|
} else if( bsm.lastSeenReferenceNode != null && !referencePathExists( graph, bsm.lastSeenReferenceNode, node ) ) { // add bases to the bubble string until we get back to the reference path
|
||||||
|
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
|
||||||
|
} else { // close the bubble and use a local SW to determine the Cigar string
|
||||||
|
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, bsm.lastSeenReferenceNode, node).getCigarElements() ) {
|
||||||
|
bsm.cigar.add(ce);
|
||||||
|
}
|
||||||
|
bsm.inBubble = false;
|
||||||
|
bsm.bubbleBytes = null;
|
||||||
|
bsm.lastSeenReferenceNode = null;
|
||||||
|
bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) );
|
||||||
|
}
|
||||||
|
} else { // non-ref vertex
|
||||||
|
if( bsm.inBubble ) { // just keep accumulating until we get back to the reference path
|
||||||
|
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
|
||||||
|
} else { // open up a bubble
|
||||||
|
bsm.inBubble = true;
|
||||||
|
bsm.bubbleBytes = null;
|
||||||
|
bsm.lastSeenReferenceNode = (e != null ? graph.getEdgeSource(e) : null );
|
||||||
|
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Cigar calculateCigarForCompleteBubble( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final byte[] bubbleBytes, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex ) {
|
||||||
|
final byte[] refBytes = getReferenceBytes(graph, fromVertex, toVertex);
|
||||||
|
final Cigar cigar = new Cigar();
|
||||||
|
|
||||||
|
// add padding to anchor ref/alt bases in the SW matrix
|
||||||
|
byte[] padding = "XXXXXX".getBytes();
|
||||||
|
boolean goodAlignment = false;
|
||||||
|
SWPairwiseAlignment swConsensus = null;
|
||||||
|
while( !goodAlignment && padding.length < 1000 ) {
|
||||||
|
padding = ArrayUtils.addAll(padding, padding); // double the size of the padding each time
|
||||||
|
final byte[] reference = ArrayUtils.addAll( ArrayUtils.addAll(padding, refBytes), padding );
|
||||||
|
final byte[] alternate = ArrayUtils.addAll( ArrayUtils.addAll(padding, bubbleBytes), padding );
|
||||||
|
swConsensus = new SWPairwiseAlignment( reference, alternate, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
|
||||||
|
if( swConsensus.getAlignmentStart2wrt1() == 0 && !swConsensus.getCigar().toString().contains("S") && swConsensus.getCigar().getReferenceLength() == reference.length ) {
|
||||||
|
goodAlignment = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( !goodAlignment && swConsensus != null ) {
|
||||||
|
throw new ReviewedStingException("SmithWaterman offset failure: " + (refBytes == null ? "-" : new String(refBytes)) + " against " + new String(bubbleBytes) + " = " + swConsensus.getCigar());
|
||||||
|
}
|
||||||
|
|
||||||
|
if( swConsensus != null ) {
|
||||||
|
final Cigar swCigar = swConsensus.getCigar();
|
||||||
|
for( int iii = 0; iii < swCigar.numCigarElements(); iii++ ) {
|
||||||
|
// now we need to remove the padding from the cigar string
|
||||||
|
int length = swCigar.getCigarElement(iii).getLength();
|
||||||
|
if( iii == 0 ) { length -= padding.length; }
|
||||||
|
if( iii == swCigar.numCigarElements() - 1 ) { length -= padding.length; }
|
||||||
|
if( length > 0 ) {
|
||||||
|
cigar.add( new CigarElement(length, swCigar.getCigarElement(iii).getOperator()) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( (refBytes == null && cigar.getReferenceLength() != 0) || ( refBytes != null && cigar.getReferenceLength() != refBytes.length ) ) {
|
||||||
|
throw new ReviewedStingException("SmithWaterman cigar failure: " + (refBytes == null ? "-" : new String(refBytes)) + " against " + new String(bubbleBytes) + " = " + swConsensus.getCigar());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return cigar;
|
||||||
|
}
|
||||||
|
|
||||||
|
// class to keep track of the bubble state machine
|
||||||
|
protected static class BubbleStateMachine {
|
||||||
|
public boolean inBubble = false;
|
||||||
|
public byte[] bubbleBytes = null;
|
||||||
|
public DeBruijnVertex lastSeenReferenceNode = null;
|
||||||
|
public Cigar cigar = null;
|
||||||
|
|
||||||
|
public BubbleStateMachine( final Cigar initialCigar ) {
|
||||||
|
inBubble = false;
|
||||||
|
bubbleBytes = null;
|
||||||
|
lastSeenReferenceNode = null;
|
||||||
|
cigar = initialCigar;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static class PathComparatorTotalScore implements Comparator<Path>, Serializable {
|
protected static class PathComparatorTotalScore implements Comparator<Path>, Serializable {
|
||||||
|
|
@ -130,12 +296,12 @@ public class KBestPaths {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//protected static class PathComparatorLowestEdge implements Comparator<Path>, Serializable {
|
protected static class PathComparatorLowestEdge implements Comparator<Path>, Serializable {
|
||||||
// @Override
|
@Override
|
||||||
// public int compare(final Path path1, final Path path2) {
|
public int compare(final Path path1, final Path path2) {
|
||||||
// return path2.lowestEdge - path1.lowestEdge;
|
return path2.lowestEdge - path1.lowestEdge;
|
||||||
// }
|
}
|
||||||
//}
|
}
|
||||||
|
|
||||||
public static List<Path> getKBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int k ) {
|
public static List<Path> getKBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int k ) {
|
||||||
if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); }
|
if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); }
|
||||||
|
|
@ -144,7 +310,7 @@ public class KBestPaths {
|
||||||
// run a DFS for best paths
|
// run a DFS for best paths
|
||||||
for( final DeBruijnVertex v : graph.vertexSet() ) {
|
for( final DeBruijnVertex v : graph.vertexSet() ) {
|
||||||
if( graph.inDegreeOf(v) == 0 ) {
|
if( graph.inDegreeOf(v) == 0 ) {
|
||||||
findBestPaths(graph, new Path(v), bestPaths);
|
findBestPaths(new Path(v, graph), bestPaths);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -153,14 +319,14 @@ public class KBestPaths {
|
||||||
return bestPaths.subList(0, Math.min(k, bestPaths.size()));
|
return bestPaths.subList(0, Math.min(k, bestPaths.size()));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void findBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Path path, final List<Path> bestPaths ) {
|
private static void findBestPaths( final Path path, final List<Path> bestPaths ) {
|
||||||
findBestPaths(graph, path, bestPaths, new MyInt());
|
findBestPaths(path, bestPaths, new MyInt());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void findBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Path path, final List<Path> bestPaths, MyInt n ) {
|
private static void findBestPaths( final Path path, final List<Path> bestPaths, final MyInt n ) {
|
||||||
|
|
||||||
// did we hit the end of a path?
|
// did we hit the end of a path?
|
||||||
if ( allOutgoingEdgesHaveBeenVisited(graph, path) ) {
|
if ( allOutgoingEdgesHaveBeenVisited(path) ) {
|
||||||
if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) {
|
if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) {
|
||||||
// clean out some low scoring paths
|
// clean out some low scoring paths
|
||||||
Collections.sort(bestPaths, new PathComparatorTotalScore() );
|
Collections.sort(bestPaths, new PathComparatorTotalScore() );
|
||||||
|
|
@ -172,27 +338,253 @@ public class KBestPaths {
|
||||||
} else {
|
} else {
|
||||||
// recursively run DFS
|
// recursively run DFS
|
||||||
final ArrayList<DeBruijnEdge> edgeArrayList = new ArrayList<DeBruijnEdge>();
|
final ArrayList<DeBruijnEdge> edgeArrayList = new ArrayList<DeBruijnEdge>();
|
||||||
edgeArrayList.addAll(graph.outgoingEdgesOf(path.lastVertex));
|
edgeArrayList.addAll(path.graph.outgoingEdgesOf(path.lastVertex));
|
||||||
Collections.sort(edgeArrayList, new DeBruijnEdge.EdgeWeightComparator());
|
Collections.sort(edgeArrayList, new DeBruijnEdge.EdgeWeightComparator());
|
||||||
Collections.reverse(edgeArrayList);
|
Collections.reverse(edgeArrayList);
|
||||||
for ( final DeBruijnEdge edge : edgeArrayList ) {
|
for ( final DeBruijnEdge edge : edgeArrayList ) {
|
||||||
// make sure the edge is not already in the path
|
// make sure the edge is not already in the path
|
||||||
if ( path.containsEdge(graph, edge) )
|
if ( path.containsEdge(edge) )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
final Path newPath = new Path(path, graph, edge);
|
final Path newPath = new Path(path, edge);
|
||||||
n.val++;
|
n.val++;
|
||||||
findBestPaths(graph, newPath, bestPaths, n);
|
findBestPaths(newPath, bestPaths, n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean allOutgoingEdgesHaveBeenVisited( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Path path ) {
|
private static boolean allOutgoingEdgesHaveBeenVisited( final Path path ) {
|
||||||
for( final DeBruijnEdge edge : graph.outgoingEdgesOf(path.lastVertex) ) {
|
for( final DeBruijnEdge edge : path.graph.outgoingEdgesOf(path.lastVertex) ) {
|
||||||
if( !path.containsEdge(graph, edge) ) {
|
if( !path.containsEdge(edge) ) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/****************************************************************
|
||||||
|
* Collection of graph functions used by KBestPaths *
|
||||||
|
***************************************************************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test if the vertex is on a reference path in the graph. If so it is referred to as a reference node
|
||||||
|
* @param graph the graph from which the vertex originated
|
||||||
|
* @param v the vertex to test
|
||||||
|
* @return true if the vertex is on the reference path
|
||||||
|
*/
|
||||||
|
public static boolean isReferenceNode( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
|
||||||
|
for( final DeBruijnEdge e : graph.edgesOf(v) ) {
|
||||||
|
if( e.isRef() ) { return true; }
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is this edge a source edge (the source vertex of the edge is a source node in the graph)
|
||||||
|
* @param graph the graph from which the edge originated
|
||||||
|
* @param e the edge to test
|
||||||
|
* @return true if the source vertex of the edge is a source node in the graph
|
||||||
|
*/
|
||||||
|
public static boolean isSource( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
|
||||||
|
return graph.inDegreeOf(graph.getEdgeSource(e)) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is this vertex a source vertex
|
||||||
|
* @param graph the graph from which the vertex originated
|
||||||
|
* @param v the vertex to test
|
||||||
|
* @return true if the vertex is a source vertex
|
||||||
|
*/
|
||||||
|
public static boolean isSource( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
|
||||||
|
return graph.inDegreeOf(v) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pull the added base sequence implied by visiting this node in a path
|
||||||
|
* @param graph the graph from which the vertex originated
|
||||||
|
* @param v the vertex whose sequence to grab
|
||||||
|
* @return non-null sequence of bases corresponding to this node in the graph
|
||||||
|
*/
|
||||||
|
@Ensures({"result != null"})
|
||||||
|
public static byte[] getAdditionalSequence( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
|
||||||
|
return ( isSource(graph, v) ? v.getSequence() : v.getSuffix() );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is this edge both a reference edge and a source edge for the reference path
|
||||||
|
* @param graph the graph from which the edge originated
|
||||||
|
* @param e the edge to test
|
||||||
|
* @return true if the edge is both a reference edge and a reference path source edge
|
||||||
|
*/
|
||||||
|
public static boolean isRefSource( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
|
||||||
|
for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(graph.getEdgeSource(e)) ) {
|
||||||
|
if( edgeToTest.isRef() ) { return false; }
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is this vertex both a reference node and a source node for the reference path
|
||||||
|
* @param graph the graph from which the vertex originated
|
||||||
|
* @param v the vertex to test
|
||||||
|
* @return true if the vertex is both a reference node and a reference path source node
|
||||||
|
*/
|
||||||
|
public static boolean isRefSource( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
|
||||||
|
for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(v) ) {
|
||||||
|
if( edgeToTest.isRef() ) { return false; }
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is this edge both a reference edge and a sink edge for the reference path
|
||||||
|
* @param graph the graph from which the edge originated
|
||||||
|
* @param e the edge to test
|
||||||
|
* @return true if the edge is both a reference edge and a reference path sink edge
|
||||||
|
*/
|
||||||
|
public static boolean isRefSink( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
|
||||||
|
for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(graph.getEdgeTarget(e)) ) {
|
||||||
|
if( edgeToTest.isRef() ) { return false; }
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is this vertex both a reference node and a sink node for the reference path
|
||||||
|
* @param graph the graph from which the node originated
|
||||||
|
* @param v the node to test
|
||||||
|
* @return true if the vertex is both a reference node and a reference path sink node
|
||||||
|
*/
|
||||||
|
public static boolean isRefSink( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
|
||||||
|
for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(v) ) {
|
||||||
|
if( edgeToTest.isRef() ) { return false; }
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DeBruijnEdge getReferenceSourceEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
|
||||||
|
for( final DeBruijnEdge e : graph.edgeSet() ) {
|
||||||
|
if( e.isRef() && isRefSource(graph, e) ) {
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new ReviewedStingException("All reference graphs should have a source node");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DeBruijnVertex getReferenceSourceVertex( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
|
||||||
|
for( final DeBruijnVertex v : graph.vertexSet() ) {
|
||||||
|
if( isReferenceNode(graph, v) && isRefSource(graph, v) ) {
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DeBruijnEdge getReferenceSinkEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
|
||||||
|
for( final DeBruijnEdge e : graph.edgeSet() ) {
|
||||||
|
if( e.isRef() && isRefSink(graph, e) ) {
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new ReviewedStingException("All reference graphs should have a sink node");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DeBruijnVertex getReferenceSinkVertex( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
|
||||||
|
for( final DeBruijnVertex v : graph.vertexSet() ) {
|
||||||
|
if( isReferenceNode(graph, v) && isRefSink(graph, v) ) {
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new ReviewedStingException("All reference graphs should have a sink node");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DeBruijnEdge getNextReferenceEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
|
||||||
|
if( e == null ) { return null; }
|
||||||
|
for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(graph.getEdgeTarget(e)) ) {
|
||||||
|
if( edgeToTest.isRef() ) {
|
||||||
|
return edgeToTest;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DeBruijnVertex getNextReferenceVertex( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
|
||||||
|
if( v == null ) { return null; }
|
||||||
|
for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(v) ) {
|
||||||
|
if( edgeToTest.isRef() ) {
|
||||||
|
return graph.getEdgeTarget(edgeToTest);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DeBruijnEdge getPrevReferenceEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
|
||||||
|
for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(graph.getEdgeSource(e)) ) {
|
||||||
|
if( edgeToTest.isRef() ) {
|
||||||
|
return edgeToTest;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DeBruijnVertex getPrevReferenceVertex( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
|
||||||
|
for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(v) ) {
|
||||||
|
if( isReferenceNode(graph, graph.getEdgeSource(edgeToTest)) ) {
|
||||||
|
return graph.getEdgeSource(edgeToTest);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean referencePathExists(final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge fromEdge, final DeBruijnEdge toEdge) {
|
||||||
|
DeBruijnEdge e = fromEdge;
|
||||||
|
if( e == null ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
while( !e.equals(graph, toEdge) ) {
|
||||||
|
e = getNextReferenceEdge(graph, e);
|
||||||
|
if( e == null ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean referencePathExists(final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex) {
|
||||||
|
DeBruijnVertex v = fromVertex;
|
||||||
|
if( v == null ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
v = getNextReferenceVertex(graph, v);
|
||||||
|
if( v == null ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
while( !v.equals(toVertex) ) {
|
||||||
|
v = getNextReferenceVertex(graph, v);
|
||||||
|
if( v == null ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// fromVertex (exclusive) -> toVertex (exclusive)
|
||||||
|
public static byte[] getReferenceBytes( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex ) {
|
||||||
|
byte[] bytes = null;
|
||||||
|
if( fromVertex != null && toVertex != null && !referencePathExists(graph, fromVertex, toVertex) ) {
|
||||||
|
throw new ReviewedStingException("Asked for a reference path which doesn't exist. " + fromVertex + " --> " + toVertex);
|
||||||
|
}
|
||||||
|
DeBruijnVertex v = fromVertex;
|
||||||
|
if( v == null ) {
|
||||||
|
v = getReferenceSourceVertex(graph);
|
||||||
|
bytes = ArrayUtils.addAll( bytes, getAdditionalSequence(graph, v) );
|
||||||
|
}
|
||||||
|
v = getNextReferenceVertex(graph, v);
|
||||||
|
while( (toVertex != null && !v.equals(toVertex)) || (toVertex == null && v != null) ) {
|
||||||
|
bytes = ArrayUtils.addAll( bytes, getAdditionalSequence(graph, v) );
|
||||||
|
// advance along the reference path
|
||||||
|
v = getNextReferenceVertex(graph, v);
|
||||||
|
}
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -152,10 +152,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
final DeBruijnVertex addedVertex = new DeBruijnVertex( ArrayUtils.addAll(incomingVertex.getSequence(), outgoingVertex.getSuffix()), outgoingVertex.kmer );
|
final DeBruijnVertex addedVertex = new DeBruijnVertex( ArrayUtils.addAll(incomingVertex.getSequence(), outgoingVertex.getSuffix()), outgoingVertex.kmer );
|
||||||
graph.addVertex(addedVertex);
|
graph.addVertex(addedVertex);
|
||||||
for( final DeBruijnEdge edge : outEdges ) {
|
for( final DeBruijnEdge edge : outEdges ) {
|
||||||
graph.addEdge(addedVertex, graph.getEdgeTarget(edge), new DeBruijnEdge(edge.getIsRef(), edge.getMultiplicity()));
|
graph.addEdge(addedVertex, graph.getEdgeTarget(edge), new DeBruijnEdge(edge.isRef(), edge.getMultiplicity()));
|
||||||
}
|
}
|
||||||
for( final DeBruijnEdge edge : inEdges ) {
|
for( final DeBruijnEdge edge : inEdges ) {
|
||||||
graph.addEdge(graph.getEdgeSource(edge), addedVertex, new DeBruijnEdge(edge.getIsRef(), edge.getMultiplicity()));
|
graph.addEdge(graph.getEdgeSource(edge), addedVertex, new DeBruijnEdge(edge.isRef(), edge.getMultiplicity()));
|
||||||
}
|
}
|
||||||
|
|
||||||
graph.removeVertex( incomingVertex );
|
graph.removeVertex( incomingVertex );
|
||||||
|
|
@ -170,7 +170,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
protected static void pruneGraph( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int pruneFactor ) {
|
protected static void pruneGraph( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int pruneFactor ) {
|
||||||
final List<DeBruijnEdge> edgesToRemove = new ArrayList<DeBruijnEdge>();
|
final List<DeBruijnEdge> edgesToRemove = new ArrayList<DeBruijnEdge>();
|
||||||
for( final DeBruijnEdge e : graph.edgeSet() ) {
|
for( final DeBruijnEdge e : graph.edgeSet() ) {
|
||||||
if( e.getMultiplicity() <= pruneFactor && !e.getIsRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor
|
if( e.getMultiplicity() <= pruneFactor && !e.isRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor
|
||||||
edgesToRemove.add(e);
|
edgesToRemove.add(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -195,7 +195,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
if( graph.inDegreeOf(v) == 0 || graph.outDegreeOf(v) == 0 ) {
|
if( graph.inDegreeOf(v) == 0 || graph.outDegreeOf(v) == 0 ) {
|
||||||
boolean isRefNode = false;
|
boolean isRefNode = false;
|
||||||
for( final DeBruijnEdge e : graph.edgesOf(v) ) {
|
for( final DeBruijnEdge e : graph.edgesOf(v) ) {
|
||||||
if( e.getIsRef() ) {
|
if( e.isRef() ) {
|
||||||
isRefNode = true;
|
isRefNode = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -299,10 +299,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
if( edge.getMultiplicity() > PRUNE_FACTOR ) {
|
if( edge.getMultiplicity() > PRUNE_FACTOR ) {
|
||||||
GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [" + (edge.getMultiplicity() <= PRUNE_FACTOR ? "style=dotted,color=grey" : "label=\""+ edge.getMultiplicity() +"\"") + "];");
|
GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [" + (edge.getMultiplicity() <= PRUNE_FACTOR ? "style=dotted,color=grey" : "label=\""+ edge.getMultiplicity() +"\"") + "];");
|
||||||
}
|
}
|
||||||
if( edge.getIsRef() ) {
|
if( edge.isRef() ) {
|
||||||
GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [color=red];");
|
GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [color=red];");
|
||||||
}
|
}
|
||||||
if( !edge.getIsRef() && edge.getMultiplicity() <= PRUNE_FACTOR ) { System.out.println("Graph pruning warning!"); }
|
if( !edge.isRef() && edge.getMultiplicity() <= PRUNE_FACTOR ) { System.out.println("Graph pruning warning!"); }
|
||||||
}
|
}
|
||||||
for( final DeBruijnVertex v : graph.vertexSet() ) {
|
for( final DeBruijnVertex v : graph.vertexSet() ) {
|
||||||
final String label = ( graph.inDegreeOf(v) == 0 ? v.toString() : v.getSuffixString() );
|
final String label = ( graph.inDegreeOf(v) == 0 ? v.toString() : v.getSuffixString() );
|
||||||
|
|
@ -338,7 +338,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
|
for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
|
||||||
for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) {
|
for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) {
|
||||||
|
|
||||||
final Haplotype h = new Haplotype( path.getBases( graph ) );
|
final Haplotype h = new Haplotype( path.getBases() );
|
||||||
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false ) ) {
|
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false ) ) {
|
||||||
|
|
||||||
// for GGA mode, add the desired allele into the haplotype if it isn't already present
|
// for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||||
|
|
|
||||||
|
|
@ -201,7 +201,7 @@ public class RecalDatum {
|
||||||
* Returns the error rate (in real space) of this interval, or 0 if there are no observations
|
* Returns the error rate (in real space) of this interval, or 0 if there are no observations
|
||||||
* @return the empirical error rate ~= N errors / N obs
|
* @return the empirical error rate ~= N errors / N obs
|
||||||
*/
|
*/
|
||||||
@Ensures("result >= 0.0")
|
@Ensures({"result >= 0.0"})
|
||||||
public double getEmpiricalErrorRate() {
|
public double getEmpiricalErrorRate() {
|
||||||
if ( numObservations == 0 )
|
if ( numObservations == 0 )
|
||||||
return 0.0;
|
return 0.0;
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.sam;
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
import net.sf.samtools.Cigar;
|
import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
|
|
@ -499,6 +501,29 @@ public class AlignmentUtils {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Need a well-formed, consolidated Cigar string so that the left aligning code works properly.
|
||||||
|
* For example, 1M1M1M1D2M1M --> 3M1D3M
|
||||||
|
* If the given cigar is empty then the returned cigar will also be empty
|
||||||
|
* @param c the cigar to consolidate
|
||||||
|
* @return a cigar with consecutive matching operators merged into single operators.
|
||||||
|
*/
|
||||||
|
@Requires({"c != null"})
|
||||||
|
@Ensures({"result != null"})
|
||||||
|
public static Cigar consolidateCigar( final Cigar c ) {
|
||||||
|
if( c.isEmpty() ) { return c; }
|
||||||
|
final Cigar returnCigar = new Cigar();
|
||||||
|
int sumLength = 0;
|
||||||
|
for( int iii = 0; iii < c.numCigarElements(); iii++ ) {
|
||||||
|
sumLength += c.getCigarElement(iii).getLength();
|
||||||
|
if( iii == c.numCigarElements() - 1 || !c.getCigarElement(iii).getOperator().equals(c.getCigarElement(iii+1).getOperator())) { // at the end so finish the current element
|
||||||
|
returnCigar.add(new CigarElement(sumLength, c.getCigarElement(iii).getOperator()));
|
||||||
|
sumLength = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return returnCigar;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Takes the alignment of the read sequence <code>readSeq</code> to the reference sequence <code>refSeq</code>
|
* Takes the alignment of the read sequence <code>readSeq</code> to the reference sequence <code>refSeq</code>
|
||||||
* starting at 0-based position <code>refIndex</code> on the <code>refSeq</code> and specified by its <code>cigar</code>.
|
* starting at 0-based position <code>refIndex</code> on the <code>refSeq</code> and specified by its <code>cigar</code>.
|
||||||
|
|
|
||||||
|
|
@ -26,12 +26,14 @@
|
||||||
package org.broadinstitute.sting.utils.sam;
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
import junit.framework.Assert;
|
import junit.framework.Assert;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.*;
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.testng.annotations.BeforeClass;
|
import org.testng.annotations.BeforeClass;
|
||||||
import org.testng.annotations.DataProvider;
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class AlignmentUtilsUnitTest {
|
public class AlignmentUtilsUnitTest {
|
||||||
private SAMFileHeader header;
|
private SAMFileHeader header;
|
||||||
|
|
||||||
|
|
@ -121,4 +123,42 @@ public class AlignmentUtilsUnitTest {
|
||||||
start,
|
start,
|
||||||
ArtificialSAMUtils.DEFAULT_READ_LENGTH);
|
ArtificialSAMUtils.DEFAULT_READ_LENGTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testConsolidateCigar() {
|
||||||
|
{
|
||||||
|
//1M1M1M1D2M1M --> 3M1D3M
|
||||||
|
List<CigarElement> list = new ArrayList<CigarElement>();
|
||||||
|
list.add( new CigarElement(1, CigarOperator.M));
|
||||||
|
list.add( new CigarElement(1, CigarOperator.M));
|
||||||
|
list.add( new CigarElement(1, CigarOperator.M));
|
||||||
|
list.add( new CigarElement(1, CigarOperator.D));
|
||||||
|
list.add( new CigarElement(2, CigarOperator.M));
|
||||||
|
list.add( new CigarElement(1, CigarOperator.M));
|
||||||
|
Cigar unconsolidatedCigar = new Cigar(list);
|
||||||
|
|
||||||
|
list.clear();
|
||||||
|
list.add( new CigarElement(3, CigarOperator.M));
|
||||||
|
list.add( new CigarElement(1, CigarOperator.D));
|
||||||
|
list.add( new CigarElement(3, CigarOperator.M));
|
||||||
|
Cigar consolidatedCigar = new Cigar(list);
|
||||||
|
|
||||||
|
Assert.assertEquals(consolidatedCigar.toString(), AlignmentUtils.consolidateCigar(unconsolidatedCigar).toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
//6M6M6M --> 18M
|
||||||
|
List<CigarElement> list = new ArrayList<CigarElement>();
|
||||||
|
list.add( new CigarElement(6, CigarOperator.M));
|
||||||
|
list.add( new CigarElement(6, CigarOperator.M));
|
||||||
|
list.add( new CigarElement(6, CigarOperator.M));
|
||||||
|
Cigar unconsolidatedCigar = new Cigar(list);
|
||||||
|
|
||||||
|
list.clear();
|
||||||
|
list.add( new CigarElement(18, CigarOperator.M));
|
||||||
|
Cigar consolidatedCigar = new Cigar(list);
|
||||||
|
|
||||||
|
Assert.assertEquals(consolidatedCigar.toString(), AlignmentUtils.consolidateCigar(unconsolidatedCigar).toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue