diff --git a/build.xml b/build.xml
index 1e88bb400..e92e41c10 100644
--- a/build.xml
+++ b/build.xml
@@ -708,9 +708,6 @@
-
-
-
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
index 95b31b732..ddca5e0b8 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
@@ -107,7 +107,7 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true );
if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED )
continue;
- int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, false, 0, 0 );
+ int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, 0, 0 );
final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read );
if (readPos > numAlignedBases / 2)
readPos = numAlignedBases - (readPos + 1);
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnEdge.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnEdge.java
index 74a95db37..881fe4204 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnEdge.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnEdge.java
@@ -86,7 +86,7 @@ public class DeBruijnEdge {
multiplicity = value;
}
- public boolean getIsRef() {
+ public boolean isRef() {
return isRef;
}
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java
index 7d79edf93..edfe8254d 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java
@@ -46,8 +46,14 @@
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+import com.google.java.contract.Ensures;
+import net.sf.samtools.Cigar;
+import net.sf.samtools.CigarElement;
+import net.sf.samtools.CigarOperator;
import org.apache.commons.lang.ArrayUtils;
+import org.broadinstitute.sting.utils.SWPairwiseAlignment;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.jgrapht.graph.DefaultDirectedGraph;
import java.io.Serializable;
@@ -55,7 +61,7 @@ import java.util.*;
/**
* Created by IntelliJ IDEA.
- * User: ebanks
+ * User: ebanks, rpoplin
* Date: Mar 23, 2011
*/
// Class for finding the K best paths (as determined by the sum of multiplicities of the edges) in a graph.
@@ -72,28 +78,47 @@ public class KBestPaths {
protected static class Path {
// the last vertex seen in the path
- private DeBruijnVertex lastVertex;
+ private final DeBruijnVertex lastVertex;
// the list of edges comprising the path
- private ArrayList edges;
+ private final List edges;
// the scores for the path
- private int totalScore = 0, lowestEdge = -1;
+ private final int totalScore;
+ private final int lowestEdge;
- public Path( final DeBruijnVertex initialVertex ) {
+ // the graph from which this path originated
+ private final DefaultDirectedGraph graph;
+
+ // used in the bubble state machine to apply Smith-Waterman to the bubble sequence
+ private final double SW_MATCH = 15.0;
+ private final double SW_MISMATCH = -15.0;
+ private final double SW_GAP = -25.0;
+ private final double SW_GAP_EXTEND = -1.2;
+
+ public Path( final DeBruijnVertex initialVertex, final DefaultDirectedGraph graph ) {
lastVertex = initialVertex;
edges = new ArrayList(0);
+ totalScore = 0;
+ lowestEdge = -1;
+ this.graph = graph;
}
- public Path( final Path p, final DefaultDirectedGraph graph, final DeBruijnEdge edge ) {
- lastVertex = graph.getEdgeTarget(edge);
+ public Path( final Path p, final DeBruijnEdge edge ) {
+ graph = p.graph;
+ lastVertex = p.graph.getEdgeTarget(edge);
edges = new ArrayList(p.edges);
edges.add(edge);
totalScore = p.totalScore + edge.getMultiplicity();
lowestEdge = ( p.lowestEdge == -1 ) ? edge.getMultiplicity() : Math.min(p.lowestEdge, edge.getMultiplicity());
}
- public boolean containsEdge( final DefaultDirectedGraph graph, final DeBruijnEdge edge ) {
+ /**
+ * Does this path contain the given edge
+ * @param edge the given edge to test
+ * @return true if the edge is found in this path
+ */
+ public boolean containsEdge( final DeBruijnEdge edge ) {
final DeBruijnVertex targetVertex = graph.getEdgeTarget(edge);
for( final DeBruijnEdge e : edges ) {
if( e.equals(graph, edge) || graph.getEdgeTarget(e).equals(targetVertex) ) {
@@ -104,7 +129,7 @@ public class KBestPaths {
return false;
}
- public ArrayList getEdges() { return edges; }
+ public List getEdges() { return edges; }
public int getScore() { return totalScore; }
@@ -112,7 +137,12 @@ public class KBestPaths {
public DeBruijnVertex getLastVertexInPath() { return lastVertex; }
- public byte[] getBases( final DefaultDirectedGraph graph ) {
+ /**
+ * The base sequence for this path. Pull the full sequence for the source of the path and then the suffix for all subsequent nodes
+ * @return non-null sequence of bases corresponding to this path
+ */
+ @Ensures({"result != null"})
+ public byte[] getBases() {
if( edges.size() == 0 ) { return lastVertex.getSequence(); }
byte[] bases = graph.getEdgeSource( edges.get(0) ).getSequence();
@@ -121,6 +151,142 @@ public class KBestPaths {
}
return bases;
}
+
+ /**
+ * Calculate the cigar string for this path using a bubble traversal of the assembly graph and running a Smith-Waterman alignment on each bubble
+ */
+ @Ensures("result != null")
+ public Cigar calculateCigar() {
+
+ final Cigar cigar = new Cigar();
+ // special case for paths that start on reference but not at the reference source node
+ if( edges.get(0).isRef() && !isRefSource(graph, edges.get(0)) ) {
+ for( final CigarElement ce : calculateCigarForCompleteBubble(graph, null, null, graph.getEdgeSource(edges.get(0))).getCigarElements() ) {
+ cigar.add(ce);
+ }
+ }
+
+ // reset the bubble state machine
+ final BubbleStateMachine bsm = new BubbleStateMachine(cigar);
+
+ for( final DeBruijnEdge e : edges ) {
+ if( e.equals(graph, edges.get(0)) ) {
+ advanceBubbleStateMachine( bsm, graph, graph.getEdgeSource(e), null );
+ }
+ advanceBubbleStateMachine( bsm, graph, graph.getEdgeTarget(e), e );
+ }
+
+ // special case for paths that don't end on reference
+ if( bsm.inBubble ) {
+ for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, bsm.lastSeenReferenceNode, null).getCigarElements() ) {
+ bsm.cigar.add(ce);
+ }
+ } else if( edges.get(edges.size()-1).isRef() && !isRefSink(graph, edges.get(edges.size()-1)) ) { // special case for paths that end of the reference but haven't completed the entire reference circuit
+ for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, graph.getEdgeTarget(edges.get(edges.size()-1)), null).getCigarElements() ) {
+ bsm.cigar.add(ce);
+ }
+ }
+
+ return AlignmentUtils.consolidateCigar(bsm.cigar);
+ }
+
+ private void advanceBubbleStateMachine( final BubbleStateMachine bsm, final DefaultDirectedGraph graph, final DeBruijnVertex node, final DeBruijnEdge e ) {
+ if( isReferenceNode( graph, node ) ) {
+ if( !bsm.inBubble ) { // just add the ref bases as M's in the Cigar string, and don't do anything else
+ if( e !=null && !e.isRef() ) {
+ if( referencePathExists( graph, graph.getEdgeSource(e), node) ) {
+ for( final CigarElement ce : calculateCigarForCompleteBubble(graph, null, graph.getEdgeSource(e), node).getCigarElements() ) {
+ bsm.cigar.add(ce);
+ }
+ bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) );
+ } else if ( graph.getEdgeSource(e).equals(graph.getEdgeTarget(e)) ) { // alt edge at ref node points to itself
+ bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.I) );
+ } else {
+ bsm.inBubble = true;
+ bsm.bubbleBytes = null;
+ bsm.lastSeenReferenceNode = graph.getEdgeSource(e);
+ bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
+ }
+ } else {
+ bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) );
+ }
+ } else if( bsm.lastSeenReferenceNode != null && !referencePathExists( graph, bsm.lastSeenReferenceNode, node ) ) { // add bases to the bubble string until we get back to the reference path
+ bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
+ } else { // close the bubble and use a local SW to determine the Cigar string
+ for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, bsm.lastSeenReferenceNode, node).getCigarElements() ) {
+ bsm.cigar.add(ce);
+ }
+ bsm.inBubble = false;
+ bsm.bubbleBytes = null;
+ bsm.lastSeenReferenceNode = null;
+ bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) );
+ }
+ } else { // non-ref vertex
+ if( bsm.inBubble ) { // just keep accumulating until we get back to the reference path
+ bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
+ } else { // open up a bubble
+ bsm.inBubble = true;
+ bsm.bubbleBytes = null;
+ bsm.lastSeenReferenceNode = (e != null ? graph.getEdgeSource(e) : null );
+ bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
+ }
+ }
+ }
+
+ private Cigar calculateCigarForCompleteBubble( final DefaultDirectedGraph graph, final byte[] bubbleBytes, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex ) {
+ final byte[] refBytes = getReferenceBytes(graph, fromVertex, toVertex);
+ final Cigar cigar = new Cigar();
+
+ // add padding to anchor ref/alt bases in the SW matrix
+ byte[] padding = "XXXXXX".getBytes();
+ boolean goodAlignment = false;
+ SWPairwiseAlignment swConsensus = null;
+ while( !goodAlignment && padding.length < 1000 ) {
+ padding = ArrayUtils.addAll(padding, padding); // double the size of the padding each time
+ final byte[] reference = ArrayUtils.addAll( ArrayUtils.addAll(padding, refBytes), padding );
+ final byte[] alternate = ArrayUtils.addAll( ArrayUtils.addAll(padding, bubbleBytes), padding );
+ swConsensus = new SWPairwiseAlignment( reference, alternate, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
+ if( swConsensus.getAlignmentStart2wrt1() == 0 && !swConsensus.getCigar().toString().contains("S") && swConsensus.getCigar().getReferenceLength() == reference.length ) {
+ goodAlignment = true;
+ }
+ }
+ if( !goodAlignment && swConsensus != null ) {
+ throw new ReviewedStingException("SmithWaterman offset failure: " + (refBytes == null ? "-" : new String(refBytes)) + " against " + new String(bubbleBytes) + " = " + swConsensus.getCigar());
+ }
+
+ if( swConsensus != null ) {
+ final Cigar swCigar = swConsensus.getCigar();
+ for( int iii = 0; iii < swCigar.numCigarElements(); iii++ ) {
+ // now we need to remove the padding from the cigar string
+ int length = swCigar.getCigarElement(iii).getLength();
+ if( iii == 0 ) { length -= padding.length; }
+ if( iii == swCigar.numCigarElements() - 1 ) { length -= padding.length; }
+ if( length > 0 ) {
+ cigar.add( new CigarElement(length, swCigar.getCigarElement(iii).getOperator()) );
+ }
+ }
+ if( (refBytes == null && cigar.getReferenceLength() != 0) || ( refBytes != null && cigar.getReferenceLength() != refBytes.length ) ) {
+ throw new ReviewedStingException("SmithWaterman cigar failure: " + (refBytes == null ? "-" : new String(refBytes)) + " against " + new String(bubbleBytes) + " = " + swConsensus.getCigar());
+ }
+ }
+
+ return cigar;
+ }
+
+ // class to keep track of the bubble state machine
+ protected static class BubbleStateMachine {
+ public boolean inBubble = false;
+ public byte[] bubbleBytes = null;
+ public DeBruijnVertex lastSeenReferenceNode = null;
+ public Cigar cigar = null;
+
+ public BubbleStateMachine( final Cigar initialCigar ) {
+ inBubble = false;
+ bubbleBytes = null;
+ lastSeenReferenceNode = null;
+ cigar = initialCigar;
+ }
+ }
}
protected static class PathComparatorTotalScore implements Comparator, Serializable {
@@ -130,12 +296,12 @@ public class KBestPaths {
}
}
- //protected static class PathComparatorLowestEdge implements Comparator, Serializable {
- // @Override
- // public int compare(final Path path1, final Path path2) {
- // return path2.lowestEdge - path1.lowestEdge;
- // }
- //}
+ protected static class PathComparatorLowestEdge implements Comparator, Serializable {
+ @Override
+ public int compare(final Path path1, final Path path2) {
+ return path2.lowestEdge - path1.lowestEdge;
+ }
+ }
public static List getKBestPaths( final DefaultDirectedGraph graph, final int k ) {
if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); }
@@ -144,7 +310,7 @@ public class KBestPaths {
// run a DFS for best paths
for( final DeBruijnVertex v : graph.vertexSet() ) {
if( graph.inDegreeOf(v) == 0 ) {
- findBestPaths(graph, new Path(v), bestPaths);
+ findBestPaths(new Path(v, graph), bestPaths);
}
}
@@ -153,14 +319,14 @@ public class KBestPaths {
return bestPaths.subList(0, Math.min(k, bestPaths.size()));
}
- private static void findBestPaths( final DefaultDirectedGraph graph, final Path path, final List bestPaths ) {
- findBestPaths(graph, path, bestPaths, new MyInt());
+ private static void findBestPaths( final Path path, final List bestPaths ) {
+ findBestPaths(path, bestPaths, new MyInt());
}
- private static void findBestPaths( final DefaultDirectedGraph graph, final Path path, final List bestPaths, MyInt n ) {
+ private static void findBestPaths( final Path path, final List bestPaths, final MyInt n ) {
// did we hit the end of a path?
- if ( allOutgoingEdgesHaveBeenVisited(graph, path) ) {
+ if ( allOutgoingEdgesHaveBeenVisited(path) ) {
if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) {
// clean out some low scoring paths
Collections.sort(bestPaths, new PathComparatorTotalScore() );
@@ -172,27 +338,253 @@ public class KBestPaths {
} else {
// recursively run DFS
final ArrayList edgeArrayList = new ArrayList();
- edgeArrayList.addAll(graph.outgoingEdgesOf(path.lastVertex));
+ edgeArrayList.addAll(path.graph.outgoingEdgesOf(path.lastVertex));
Collections.sort(edgeArrayList, new DeBruijnEdge.EdgeWeightComparator());
Collections.reverse(edgeArrayList);
for ( final DeBruijnEdge edge : edgeArrayList ) {
// make sure the edge is not already in the path
- if ( path.containsEdge(graph, edge) )
+ if ( path.containsEdge(edge) )
continue;
- final Path newPath = new Path(path, graph, edge);
+ final Path newPath = new Path(path, edge);
n.val++;
- findBestPaths(graph, newPath, bestPaths, n);
+ findBestPaths(newPath, bestPaths, n);
}
}
}
- private static boolean allOutgoingEdgesHaveBeenVisited( final DefaultDirectedGraph graph, final Path path ) {
- for( final DeBruijnEdge edge : graph.outgoingEdgesOf(path.lastVertex) ) {
- if( !path.containsEdge(graph, edge) ) {
+ private static boolean allOutgoingEdgesHaveBeenVisited( final Path path ) {
+ for( final DeBruijnEdge edge : path.graph.outgoingEdgesOf(path.lastVertex) ) {
+ if( !path.containsEdge(edge) ) {
return false;
}
}
return true;
}
+
+ /****************************************************************
+ * Collection of graph functions used by KBestPaths *
+ ***************************************************************/
+
+ /**
+ * Test if the vertex is on a reference path in the graph. If so it is referred to as a reference node
+ * @param graph the graph from which the vertex originated
+ * @param v the vertex to test
+ * @return true if the vertex is on the reference path
+ */
+ public static boolean isReferenceNode( final DefaultDirectedGraph graph, final DeBruijnVertex v ) {
+ for( final DeBruijnEdge e : graph.edgesOf(v) ) {
+ if( e.isRef() ) { return true; }
+ }
+ return false;
+ }
+
+ /**
+ * Is this edge a source edge (the source vertex of the edge is a source node in the graph)
+ * @param graph the graph from which the edge originated
+ * @param e the edge to test
+ * @return true if the source vertex of the edge is a source node in the graph
+ */
+ public static boolean isSource( final DefaultDirectedGraph graph, final DeBruijnEdge e ) {
+ return graph.inDegreeOf(graph.getEdgeSource(e)) == 0;
+ }
+
+ /**
+ * Is this vertex a source vertex
+ * @param graph the graph from which the vertex originated
+ * @param v the vertex to test
+ * @return true if the vertex is a source vertex
+ */
+ public static boolean isSource( final DefaultDirectedGraph graph, final DeBruijnVertex v ) {
+ return graph.inDegreeOf(v) == 0;
+ }
+
+ /**
+ * Pull the added base sequence implied by visiting this node in a path
+ * @param graph the graph from which the vertex originated
+ * @param v the vertex whose sequence to grab
+ * @return non-null sequence of bases corresponding to this node in the graph
+ */
+ @Ensures({"result != null"})
+ public static byte[] getAdditionalSequence( final DefaultDirectedGraph graph, final DeBruijnVertex v ) {
+ return ( isSource(graph, v) ? v.getSequence() : v.getSuffix() );
+ }
+
+ /**
+ * Is this edge both a reference edge and a source edge for the reference path
+ * @param graph the graph from which the edge originated
+ * @param e the edge to test
+ * @return true if the edge is both a reference edge and a reference path source edge
+ */
+ public static boolean isRefSource( final DefaultDirectedGraph graph, final DeBruijnEdge e ) {
+ for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(graph.getEdgeSource(e)) ) {
+ if( edgeToTest.isRef() ) { return false; }
+ }
+ return true;
+ }
+
+ /**
+ * Is this vertex both a reference node and a source node for the reference path
+ * @param graph the graph from which the vertex originated
+ * @param v the vertex to test
+ * @return true if the vertex is both a reference node and a reference path source node
+ */
+ public static boolean isRefSource( final DefaultDirectedGraph graph, final DeBruijnVertex v ) {
+ for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(v) ) {
+ if( edgeToTest.isRef() ) { return false; }
+ }
+ return true;
+ }
+
+ /**
+ * Is this edge both a reference edge and a sink edge for the reference path
+ * @param graph the graph from which the edge originated
+ * @param e the edge to test
+ * @return true if the edge is both a reference edge and a reference path sink edge
+ */
+ public static boolean isRefSink( final DefaultDirectedGraph graph, final DeBruijnEdge e ) {
+ for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(graph.getEdgeTarget(e)) ) {
+ if( edgeToTest.isRef() ) { return false; }
+ }
+ return true;
+ }
+
+ /**
+ * Is this vertex both a reference node and a sink node for the reference path
+ * @param graph the graph from which the node originated
+ * @param v the node to test
+ * @return true if the vertex is both a reference node and a reference path sink node
+ */
+ public static boolean isRefSink( final DefaultDirectedGraph graph, final DeBruijnVertex v ) {
+ for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(v) ) {
+ if( edgeToTest.isRef() ) { return false; }
+ }
+ return true;
+ }
+
+ public static DeBruijnEdge getReferenceSourceEdge( final DefaultDirectedGraph graph ) {
+ for( final DeBruijnEdge e : graph.edgeSet() ) {
+ if( e.isRef() && isRefSource(graph, e) ) {
+ return e;
+ }
+ }
+ throw new ReviewedStingException("All reference graphs should have a source node");
+ }
+
+ public static DeBruijnVertex getReferenceSourceVertex( final DefaultDirectedGraph graph ) {
+ for( final DeBruijnVertex v : graph.vertexSet() ) {
+ if( isReferenceNode(graph, v) && isRefSource(graph, v) ) {
+ return v;
+ }
+ }
+ return null;
+ }
+
+ public static DeBruijnEdge getReferenceSinkEdge( final DefaultDirectedGraph graph ) {
+ for( final DeBruijnEdge e : graph.edgeSet() ) {
+ if( e.isRef() && isRefSink(graph, e) ) {
+ return e;
+ }
+ }
+ throw new ReviewedStingException("All reference graphs should have a sink node");
+ }
+
+ public static DeBruijnVertex getReferenceSinkVertex( final DefaultDirectedGraph graph ) {
+ for( final DeBruijnVertex v : graph.vertexSet() ) {
+ if( isReferenceNode(graph, v) && isRefSink(graph, v) ) {
+ return v;
+ }
+ }
+ throw new ReviewedStingException("All reference graphs should have a sink node");
+ }
+
+ public static DeBruijnEdge getNextReferenceEdge( final DefaultDirectedGraph graph, final DeBruijnEdge e ) {
+ if( e == null ) { return null; }
+ for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(graph.getEdgeTarget(e)) ) {
+ if( edgeToTest.isRef() ) {
+ return edgeToTest;
+ }
+ }
+ return null;
+ }
+
+ public static DeBruijnVertex getNextReferenceVertex( final DefaultDirectedGraph graph, final DeBruijnVertex v ) {
+ if( v == null ) { return null; }
+ for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(v) ) {
+ if( edgeToTest.isRef() ) {
+ return graph.getEdgeTarget(edgeToTest);
+ }
+ }
+ return null;
+ }
+
+ public static DeBruijnEdge getPrevReferenceEdge( final DefaultDirectedGraph graph, final DeBruijnEdge e ) {
+ for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(graph.getEdgeSource(e)) ) {
+ if( edgeToTest.isRef() ) {
+ return edgeToTest;
+ }
+ }
+ return null;
+ }
+
+ public static DeBruijnVertex getPrevReferenceVertex( final DefaultDirectedGraph graph, final DeBruijnVertex v ) {
+ for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(v) ) {
+ if( isReferenceNode(graph, graph.getEdgeSource(edgeToTest)) ) {
+ return graph.getEdgeSource(edgeToTest);
+ }
+ }
+ return null;
+ }
+
+ public static boolean referencePathExists(final DefaultDirectedGraph graph, final DeBruijnEdge fromEdge, final DeBruijnEdge toEdge) {
+ DeBruijnEdge e = fromEdge;
+ if( e == null ) {
+ return false;
+ }
+ while( !e.equals(graph, toEdge) ) {
+ e = getNextReferenceEdge(graph, e);
+ if( e == null ) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public static boolean referencePathExists(final DefaultDirectedGraph graph, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex) {
+ DeBruijnVertex v = fromVertex;
+ if( v == null ) {
+ return false;
+ }
+ v = getNextReferenceVertex(graph, v);
+ if( v == null ) {
+ return false;
+ }
+ while( !v.equals(toVertex) ) {
+ v = getNextReferenceVertex(graph, v);
+ if( v == null ) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // fromVertex (exclusive) -> toVertex (exclusive)
+ public static byte[] getReferenceBytes( final DefaultDirectedGraph graph, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex ) {
+ byte[] bytes = null;
+ if( fromVertex != null && toVertex != null && !referencePathExists(graph, fromVertex, toVertex) ) {
+ throw new ReviewedStingException("Asked for a reference path which doesn't exist. " + fromVertex + " --> " + toVertex);
+ }
+ DeBruijnVertex v = fromVertex;
+ if( v == null ) {
+ v = getReferenceSourceVertex(graph);
+ bytes = ArrayUtils.addAll( bytes, getAdditionalSequence(graph, v) );
+ }
+ v = getNextReferenceVertex(graph, v);
+ while( (toVertex != null && !v.equals(toVertex)) || (toVertex == null && v != null) ) {
+ bytes = ArrayUtils.addAll( bytes, getAdditionalSequence(graph, v) );
+ // advance along the reference path
+ v = getNextReferenceVertex(graph, v);
+ }
+ return bytes;
+ }
}
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
index a9768557d..a007bfa0c 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
@@ -152,10 +152,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
final DeBruijnVertex addedVertex = new DeBruijnVertex( ArrayUtils.addAll(incomingVertex.getSequence(), outgoingVertex.getSuffix()), outgoingVertex.kmer );
graph.addVertex(addedVertex);
for( final DeBruijnEdge edge : outEdges ) {
- graph.addEdge(addedVertex, graph.getEdgeTarget(edge), new DeBruijnEdge(edge.getIsRef(), edge.getMultiplicity()));
+ graph.addEdge(addedVertex, graph.getEdgeTarget(edge), new DeBruijnEdge(edge.isRef(), edge.getMultiplicity()));
}
for( final DeBruijnEdge edge : inEdges ) {
- graph.addEdge(graph.getEdgeSource(edge), addedVertex, new DeBruijnEdge(edge.getIsRef(), edge.getMultiplicity()));
+ graph.addEdge(graph.getEdgeSource(edge), addedVertex, new DeBruijnEdge(edge.isRef(), edge.getMultiplicity()));
}
graph.removeVertex( incomingVertex );
@@ -170,7 +170,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
protected static void pruneGraph( final DefaultDirectedGraph graph, final int pruneFactor ) {
final List edgesToRemove = new ArrayList();
for( final DeBruijnEdge e : graph.edgeSet() ) {
- if( e.getMultiplicity() <= pruneFactor && !e.getIsRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor
+ if( e.getMultiplicity() <= pruneFactor && !e.isRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor
edgesToRemove.add(e);
}
}
@@ -195,7 +195,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
if( graph.inDegreeOf(v) == 0 || graph.outDegreeOf(v) == 0 ) {
boolean isRefNode = false;
for( final DeBruijnEdge e : graph.edgesOf(v) ) {
- if( e.getIsRef() ) {
+ if( e.isRef() ) {
isRefNode = true;
break;
}
@@ -299,10 +299,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
if( edge.getMultiplicity() > PRUNE_FACTOR ) {
GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [" + (edge.getMultiplicity() <= PRUNE_FACTOR ? "style=dotted,color=grey" : "label=\""+ edge.getMultiplicity() +"\"") + "];");
}
- if( edge.getIsRef() ) {
+ if( edge.isRef() ) {
GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [color=red];");
}
- if( !edge.getIsRef() && edge.getMultiplicity() <= PRUNE_FACTOR ) { System.out.println("Graph pruning warning!"); }
+ if( !edge.isRef() && edge.getMultiplicity() <= PRUNE_FACTOR ) { System.out.println("Graph pruning warning!"); }
}
for( final DeBruijnVertex v : graph.vertexSet() ) {
final String label = ( graph.inDegreeOf(v) == 0 ? v.toString() : v.getSuffixString() );
@@ -338,7 +338,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
for( final DefaultDirectedGraph graph : graphs ) {
for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) {
- final Haplotype h = new Haplotype( path.getBases( graph ) );
+ final Haplotype h = new Haplotype( path.getBases() );
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false ) ) {
// for GGA mode, add the desired allele into the haplotype if it isn't already present
diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java
index bd0063a7d..be537f294 100644
--- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java
+++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java
@@ -201,7 +201,7 @@ public class RecalDatum {
* Returns the error rate (in real space) of this interval, or 0 if there are no observations
* @return the empirical error rate ~= N errors / N obs
*/
- @Ensures("result >= 0.0")
+ @Ensures({"result >= 0.0"})
public double getEmpiricalErrorRate() {
if ( numObservations == 0 )
return 0.0;
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java
index f82c0a8ba..8b09e91ae 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java
@@ -93,7 +93,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
haplotypeAlleles.add( Allele.create("AACA", false) );
haplotypeAlleles.add( Allele.create("CATA", false) );
haplotypeAlleles.add( Allele.create("CACA", false) );
- final ArrayList haplotypes = new ArrayList();
+ final List haplotypes = new ArrayList();
haplotypes.add(new Haplotype("AATA".getBytes()));
haplotypes.add(new Haplotype("AACA".getBytes()));
haplotypes.add(new Haplotype("CATA".getBytes()));
@@ -101,11 +101,11 @@ public class GenotypingEngineUnitTest extends BaseTest {
final List haplotypeAllelesForSample = new ArrayList();
haplotypeAllelesForSample.add( Allele.create("CATA", false) );
haplotypeAllelesForSample.add( Allele.create("CACA", false) );
- final ArrayList> alleleMapper = new ArrayList>();
- ArrayList Aallele = new ArrayList();
+ final List> alleleMapper = new ArrayList>();
+ List Aallele = new ArrayList();
Aallele.add(haplotypes.get(0));
Aallele.add(haplotypes.get(1));
- ArrayList Callele = new ArrayList();
+ List Callele = new ArrayList();
Callele.add(haplotypes.get(2));
Callele.add(haplotypes.get(3));
alleleMapper.add(Aallele);
@@ -135,7 +135,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
haplotypeAlleles.add( Allele.create("TACA", false) );
haplotypeAlleles.add( Allele.create("TTCA", false) );
haplotypeAlleles.add( Allele.create("TTTA", false) );
- final ArrayList haplotypes = new ArrayList();
+ final List haplotypes = new ArrayList();
haplotypes.add(new Haplotype("AATA".getBytes()));
haplotypes.add(new Haplotype("AACA".getBytes()));
haplotypes.add(new Haplotype("CATA".getBytes()));
@@ -146,14 +146,14 @@ public class GenotypingEngineUnitTest extends BaseTest {
final List haplotypeAllelesForSample = new ArrayList();
haplotypeAllelesForSample.add( Allele.create("TTTA", false) );
haplotypeAllelesForSample.add( Allele.create("AATA", true) );
- final ArrayList> alleleMapper = new ArrayList>();
- ArrayList Aallele = new ArrayList();
+ final List> alleleMapper = new ArrayList>();
+ List Aallele = new ArrayList();
Aallele.add(haplotypes.get(0));
Aallele.add(haplotypes.get(1));
- ArrayList Callele = new ArrayList();
+ List Callele = new ArrayList();
Callele.add(haplotypes.get(2));
Callele.add(haplotypes.get(3));
- ArrayList Tallele = new ArrayList();
+ List Tallele = new ArrayList();
Tallele.add(haplotypes.get(4));
Tallele.add(haplotypes.get(5));
Tallele.add(haplotypes.get(6));
@@ -187,16 +187,16 @@ public class GenotypingEngineUnitTest extends BaseTest {
private class BasicGenotypingTestProvider extends TestDataProvider {
byte[] ref;
byte[] hap;
- HashMap expected;
+ Map expected;
- public BasicGenotypingTestProvider(String refString, String hapString, HashMap expected) {
+ public BasicGenotypingTestProvider(String refString, String hapString, Map expected) {
super(BasicGenotypingTestProvider.class, String.format("Haplotype to VCF test: ref = %s, alignment = %s", refString,hapString));
ref = refString.getBytes();
hap = hapString.getBytes();
this.expected = expected;
}
- public HashMap calcAlignment() {
+ public Map calcAlignment() {
final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap);
return GenotypingEngine.generateVCsFromAlignment( new Haplotype(hap), alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name");
}
@@ -206,14 +206,14 @@ public class GenotypingEngineUnitTest extends BaseTest {
public Object[][] makeBasicGenotypingTests() {
for( int contextSize : new int[]{0,1,5,9,24,36} ) {
- HashMap map = new HashMap();
+ Map map = new HashMap();
map.put(1 + contextSize, (byte)'M');
final String context = Utils.dupString('G', contextSize);
new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGACTAGCCGATAG", map);
}
for( int contextSize : new int[]{0,1,5,9,24,36} ) {
- HashMap map = new HashMap();
+ Map map = new HashMap();
map.put(2 + contextSize, (byte)'M');
map.put(21 + contextSize, (byte)'M');
final String context = Utils.dupString('G', contextSize);
@@ -221,7 +221,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
}
for( int contextSize : new int[]{0,1,5,9,24,36} ) {
- HashMap map = new HashMap();
+ Map map = new HashMap();
map.put(1 + contextSize, (byte)'M');
map.put(20 + contextSize, (byte)'I');
final String context = Utils.dupString('G', contextSize);
@@ -229,7 +229,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
}
for( int contextSize : new int[]{0,1,5,9,24,36} ) {
- HashMap map = new HashMap();
+ Map map = new HashMap();
map.put(1 + contextSize, (byte)'M');
map.put(20 + contextSize, (byte)'D');
final String context = Utils.dupString('G', contextSize);
@@ -237,7 +237,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
}
for( int contextSize : new int[]{1,5,9,24,36} ) {
- HashMap map = new HashMap();
+ Map map = new HashMap();
map.put(1, (byte)'M');
map.put(20, (byte)'D');
final String context = Utils.dupString('G', contextSize);
@@ -245,7 +245,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
}
for( int contextSize : new int[]{0,1,5,9,24,36} ) {
- HashMap map = new HashMap();
+ Map map = new HashMap();
map.put(2 + contextSize, (byte)'M');
map.put(20 + contextSize, (byte)'I');
map.put(30 + contextSize, (byte)'D');
@@ -254,7 +254,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
}
for( int contextSize : new int[]{0,1,5,9,24,36} ) {
- HashMap map = new HashMap();
+ Map map = new HashMap();
map.put(1 + contextSize, (byte)'M');
map.put(20 + contextSize, (byte)'D');
map.put(28 + contextSize, (byte)'M');
@@ -267,8 +267,8 @@ public class GenotypingEngineUnitTest extends BaseTest {
@Test(dataProvider = "BasicGenotypingTestProvider", enabled = true)
public void testHaplotypeToVCF(BasicGenotypingTestProvider cfg) {
- HashMap calculatedMap = cfg.calcAlignment();
- HashMap expectedMap = cfg.expected;
+ Map calculatedMap = cfg.calcAlignment();
+ Map expectedMap = cfg.expected;
logger.warn(String.format("Test: %s", cfg.toString()));
if(!compareVCMaps(calculatedMap, expectedMap)) {
logger.warn("calc map = " + calculatedMap);
@@ -420,9 +420,9 @@ public class GenotypingEngineUnitTest extends BaseTest {
}
/**
- * Private function to compare HashMap of VCs, it only checks the types and start locations of the VariantContext
+ * Private function to compare Map of VCs, it only checks the types and start locations of the VariantContext
*/
- private boolean compareVCMaps(HashMap calc, HashMap expected) {
+ private boolean compareVCMaps(Map calc, Map expected) {
if( !calc.keySet().equals(expected.keySet()) ) { return false; } // sanity check
for( Integer loc : expected.keySet() ) {
Byte type = expected.get(loc);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockLoader.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockLoader.java
index 428b7355c..72b778ec9 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockLoader.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockLoader.java
@@ -25,7 +25,7 @@
package org.broadinstitute.sting.gatk.datasources.reads;
-import org.broad.tribble.util.BlockCompressedStreamConstants;
+import net.sf.samtools.util.BlockCompressedStreamConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.FileInputStream;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
index 9cf0a9493..1cfb527cd 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
@@ -25,8 +25,8 @@
package org.broadinstitute.sting.gatk.datasources.reads;
-import org.broad.tribble.util.SeekableBufferedStream;
-import org.broad.tribble.util.SeekableFileStream;
+import net.sf.samtools.seekablestream.SeekableBufferedStream;
+import net.sf.samtools.seekablestream.SeekableFileStream;
import net.sf.samtools.*;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
index 8ef24d373..12e87f613 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
@@ -350,7 +350,7 @@ public class GATKRunReport {
* @return a non-null AWS access key for the GATK user
*/
protected static String getAWSAccessKey() {
- return getAWSKey("GATK_AWS_access.key");
+ return getAWSKey("resources/GATK_AWS_access.key");
}
/**
@@ -358,7 +358,7 @@ public class GATKRunReport {
* @return a non-null AWS secret key for the GATK user
*/
protected static String getAWSSecretKey() {
- return getAWSKey("GATK_AWS_secret.key");
+ return getAWSKey("resources/GATK_AWS_secret.key");
}
/**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATK_AWS_access.key b/public/java/src/org/broadinstitute/sting/gatk/phonehome/resources/GATK_AWS_access.key
similarity index 100%
rename from public/java/src/org/broadinstitute/sting/gatk/phonehome/GATK_AWS_access.key
rename to public/java/src/org/broadinstitute/sting/gatk/phonehome/resources/GATK_AWS_access.key
diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATK_AWS_secret.key b/public/java/src/org/broadinstitute/sting/gatk/phonehome/resources/GATK_AWS_secret.key
similarity index 100%
rename from public/java/src/org/broadinstitute/sting/gatk/phonehome/GATK_AWS_secret.key
rename to public/java/src/org/broadinstitute/sting/gatk/phonehome/resources/GATK_AWS_secret.key
diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java
index eb287abd8..77f3a84c3 100644
--- a/public/java/src/org/broadinstitute/sting/utils/Utils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java
@@ -240,6 +240,13 @@ public class Utils {
}
}
+ /**
+ * Create a new list that contains the elements of left along with elements elts
+ * @param left a non-null list of elements
+ * @param elts a varargs vector for elts to append in order to left
+ * @param
+ * @return A newly allocated linked list containing left followed by elts
+ */
public static List append(final List left, T ... elts) {
final List l = new LinkedList(left);
l.addAll(Arrays.asList(elts));
@@ -329,15 +336,6 @@ public class Utils {
return str.substring(start, end+1);
}
- public static byte listMaxByte(List quals) {
- if (quals.size() == 0) return 0;
- byte m = quals.get(0);
- for (byte b : quals) {
- m = b > m ? b : m;
- }
- return m;
- }
-
/**
* Splits expressions in command args by spaces and returns the array of expressions.
* Expressions may use single or double quotes to group any individual expression, but not both.
@@ -400,173 +398,6 @@ public class Utils {
return concatArrays(A, B);
}
- /**
- * Returns indices of all occurrences of the specified symbol in the string
- * @param s Search string
- * @param ch Character to search for
- * @return Indices of all occurrences of the specified symbol
- */
- public static int[] indexOfAll(String s, int ch) {
- int[] pos = new int[64];
- int z = 0;
-
- for (int i = 0; i < s.length(); i++) {
- if (s.charAt(i) == ch) pos[z++] = i;
- }
- return reallocate(pos, z);
- }
-
- public static int countSetBits(boolean[] array) {
- int counter = 0;
- for ( int i = 0; i < array.length; i++ ) {
- if ( array[i] )
- counter++;
- }
- return counter;
- }
-
- /**
- * Returns new (reallocated) integer array of the specified size, with content
- * of the original array orig copied into it. If newSize is
- * less than the size of the original array, only first newSize elements will be copied.
- * If new size is greater than the size of the original array, the content of the original array will be padded
- * with zeros up to the new size. Finally, if new size is the same as original size, no memory reallocation
- * will be performed and the original array will be returned instead.
- *
- * @param orig Original size.
- * @param newSize New Size.
- *
- * @return New array with length equal to newSize.
- */
- public static int[] reallocate(int[] orig, int newSize) {
- if (orig.length == newSize) return orig;
- int[] new_array = new int[newSize];
- int L = (newSize > orig.length ? orig.length : newSize);
- for (int i = 0; i < L; i++) new_array[i] = orig[i];
- return new_array;
- }
-
-
- /**
- * Returns a copy of array a, extended with additional n elements to the right (if n > 0 ) or -n elements to the
- * left (if n<0), copying the values form the original array. Newly added elements are filled with value v. Note that
- * if array a is being padded to the left, first (-n) elements of the returned array are v's, followed by the content of
- * array a.
- * @param a original array
- * @param n number of (v-filled) elements to append to a on the right (n>0) or on the left (n<0)
- * @param v element value
- * @return the extended copy of array a with additional n elements
- */
- public static byte [] extend(final byte[] a, int n, byte v) {
-
- byte [] newA;
-
- if ( n > 0 ) {
- newA = Arrays.copyOf(a, a.length+n);
- if ( v != 0) { // java pads with 0's for us, so there is nothing to do if v==0
- for ( int i = a.length; i < newA.length ; i++ ) newA[i] = v;
- }
- return newA;
- }
-
- // we are here only if n < 0:
- n = (-n);
- newA = new byte[ a.length + n ];
- int i;
- if ( v!= 0 ) {
- i = 0;
- for( ; i < n; i++ ) newA[i] = v;
- } else {
- i = n;
- }
- for ( int j = 0 ; j < a.length ; i++, j++) newA[i]=a[j];
- return newA;
- }
-
-
- /**
- * Returns a copy of array a, extended with additional n elements to the right (if n > 0 ) or -n elements to the
- * left (if n<0), copying the values form the original array. Newly added elements are filled with value v. Note that
- * if array a is padded to the left, first (-n) elements of the returned array are v's, followed by the content of
- * array a.
- * @param a original array
- * @param n number of (v-filled) elements to append to a on the right (n>0) or on the left (n<0)
- * @param v element value
- * @return the extended copy of array a with additional n elements
- */
- public static short [] extend(final short[] a, int n, short v) {
-
- short [] newA;
-
- if ( n > 0 ) {
- newA = Arrays.copyOf(a, a.length+n);
- if ( v != 0) { // java pads with 0's for us, so there is nothing to do if v==0
- for ( int i = a.length; i < newA.length ; i++ ) newA[i] = v;
- }
- return newA;
- }
-
- // we are here only if n < 0:
- n = (-n);
- newA = new short[ a.length + n ];
- int i;
- if ( v!= 0 ) {
- i = 0;
- for( ; i < n; i++ ) newA[i] = v;
- } else {
- i = n;
- }
- for ( int j = 0 ; j < a.length ; i++, j++) newA[i]=a[j];
- return newA;
- }
-
- /* TEST ME
- public static void main(String[] argv) {
- List l1 = new LinkedList();
- List l2 = new ArrayList();
-
- l1.add(1);
- l1.add(5);
- l1.add(3);
- l1.add(10);
- l1.add(4);
- l1.add(2);
- l2.add(1);
- l2.add(5);
- l2.add(3);
- l2.add(10);
- l2.add(4);
- l2.add(2);
-
- Predicate p = new Predicate() {
- public boolean apply(Integer i) {
- return i > 2;
- }
- };
- filterInPlace(p, l1);
- filterInPlace(p, l2);
-
- for ( int i = 0 ; i < l1.size(); i++ ) System.out.print(" "+l1.get(i));
- System.out.println();
- for ( int i = 0 ; i < l2.size(); i++ ) System.out.print(" " + l2.get(i));
- System.out.println();
-
- }
-
- */
-
- /**
- * a helper method. Turns a single character string into a char.
- *
- * @param str the string
- *
- * @return a char
- */
- public static char stringToChar(String str) {
- if (str.length() != 1) throw new IllegalArgumentException("String length must be one");
- return str.charAt(0);
- }
-
public static > List sorted(Collection c) {
return sorted(c, false);
}
@@ -594,18 +425,6 @@ public class Utils {
return l;
}
- public static , V> String sortedString(Map c) {
- List t = new ArrayList(c.keySet());
- Collections.sort(t);
-
- List pairs = new ArrayList();
- for ( T k : t ) {
- pairs.add(k + "=" + c.get(k));
- }
-
- return "{" + join(", ", pairs) + "}";
- }
-
/**
* Reverse a byte array of bases
*
@@ -654,14 +473,6 @@ public class Utils {
return new String( reverse( bases.getBytes() )) ;
}
- public static byte[] charSeq2byteSeq(char[] seqIn) {
- byte[] seqOut = new byte[seqIn.length];
- for ( int i = 0; i < seqIn.length; i++ ) {
- seqOut[i] = (byte)seqIn[i];
- }
- return seqOut;
- }
-
public static boolean isFlagSet(int value, int flag) {
return ((value & flag) == flag);
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
index 8c7bce6ac..73e129105 100644
--- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
+++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
@@ -673,7 +673,9 @@ public class BAQ {
} else if ( excludeReadFromBAQ(read) ) {
; // just fall through
} else {
- if ( calculationType == CalculationMode.RECALCULATE || ! hasBAQTag(read) ) {
+ final boolean readHasBAQTag = hasBAQTag(read);
+
+ if ( calculationType == CalculationMode.RECALCULATE || ! readHasBAQTag ) {
if ( DEBUG ) System.out.printf(" Calculating BAQ on the fly%n");
BAQCalculationResult hmmResult = calcBAQFromHMM(read, refReader);
if ( hmmResult != null ) {
@@ -683,6 +685,9 @@ public class BAQ {
case DONT_MODIFY: BAQQuals = hmmResult.bq; break;
default: throw new ReviewedStingException("BUG: unexpected qmode " + qmode);
}
+ } else if ( readHasBAQTag ) {
+ // remove the BAQ tag if it's there because we cannot trust it
+ read.setAttribute(BAQ_TAG, null);
}
} else if ( qmode == QualityMode.OVERWRITE_QUALS ) { // only makes sense if we are overwriting quals
if ( DEBUG ) System.out.printf(" Taking BAQ from tag%n");
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java
index 9c5f95472..1607db8af 100644
--- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java
@@ -25,6 +25,8 @@
package org.broadinstitute.sting.utils.sam;
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
@@ -38,9 +40,16 @@ import org.broadinstitute.sting.utils.recalibration.EventType;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.EnumSet;
+import java.util.List;
-public class AlignmentUtils {
+public final class AlignmentUtils {
+ private final static EnumSet ALIGNED_TO_GENOME_OPERATORS = EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X);
+ private final static EnumSet ALIGNED_TO_GENOME_PLUS_SOFTCLIPS = EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X, CigarOperator.S);
+
+ // cannot be instantiated
+ private AlignmentUtils() { }
public static class MismatchCount {
public int numMismatches = 0;
@@ -116,103 +125,6 @@ public class AlignmentUtils {
return mc;
}
- /**
- * Returns the number of mismatches in the pileup within the given reference context.
- *
- * @param pileup the pileup with reads
- * @param ref the reference context
- * @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window)
- * @return the number of mismatches
- */
- public static int mismatchesInRefWindow(ReadBackedPileup pileup, ReferenceContext ref, boolean ignoreTargetSite) {
- int mismatches = 0;
- for (PileupElement p : pileup)
- mismatches += mismatchesInRefWindow(p, ref, ignoreTargetSite);
- return mismatches;
- }
-
- /**
- * Returns the number of mismatches in the pileup element within the given reference context.
- *
- * @param p the pileup element
- * @param ref the reference context
- * @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window)
- * @return the number of mismatches
- */
- public static int mismatchesInRefWindow(PileupElement p, ReferenceContext ref, boolean ignoreTargetSite) {
- return mismatchesInRefWindow(p, ref, ignoreTargetSite, false);
- }
-
- /**
- * Returns the number of mismatches in the pileup element within the given reference context.
- *
- * @param p the pileup element
- * @param ref the reference context
- * @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window)
- * @param qualitySumInsteadOfMismatchCount
- * if true, return the quality score sum of the mismatches rather than the count
- * @return the number of mismatches
- */
- public static int mismatchesInRefWindow(PileupElement p, ReferenceContext ref, boolean ignoreTargetSite, boolean qualitySumInsteadOfMismatchCount) {
- int sum = 0;
-
- int windowStart = ref.getWindow().getStart();
- int windowStop = ref.getWindow().getStop();
- byte[] refBases = ref.getBases();
- byte[] readBases = p.getRead().getReadBases();
- byte[] readQualities = p.getRead().getBaseQualities();
- Cigar c = p.getRead().getCigar();
-
- int readIndex = 0;
- int currentPos = p.getRead().getAlignmentStart();
- int refIndex = Math.max(0, currentPos - windowStart);
-
- for (int i = 0; i < c.numCigarElements(); i++) {
- CigarElement ce = c.getCigarElement(i);
- int cigarElementLength = ce.getLength();
- switch (ce.getOperator()) {
- case EQ:
- case X:
- case M:
- for (int j = 0; j < cigarElementLength; j++, readIndex++, currentPos++) {
- // are we past the ref window?
- if (currentPos > windowStop)
- break;
-
- // are we before the ref window?
- if (currentPos < windowStart)
- continue;
-
- byte refChr = refBases[refIndex++];
-
- // do we need to skip the target site?
- if (ignoreTargetSite && ref.getLocus().getStart() == currentPos)
- continue;
-
- byte readChr = readBases[readIndex];
- if (readChr != refChr)
- sum += (qualitySumInsteadOfMismatchCount) ? readQualities[readIndex] : 1;
- }
- break;
- case I:
- case S:
- readIndex += cigarElementLength;
- break;
- case D:
- case N:
- currentPos += cigarElementLength;
- if (currentPos > windowStart)
- refIndex += Math.min(cigarElementLength, currentPos - windowStart);
- break;
- case H:
- case P:
- break;
- }
- }
-
- return sum;
- }
-
/**
* Returns number of alignment blocks (continuous stretches of aligned bases) in the specified alignment.
* This method follows closely the SAMRecord::getAlignmentBlocks() implemented in samtools library, but
@@ -223,31 +135,54 @@ public class AlignmentUtils {
* @param r alignment
* @return number of continuous alignment blocks (i.e. 'M' elements of the cigar; all indel and clipping elements are ignored).
*/
+ @Ensures("result >= 0")
public static int getNumAlignmentBlocks(final SAMRecord r) {
- int n = 0;
+ if ( r == null ) throw new IllegalArgumentException("read cannot be null");
final Cigar cigar = r.getCigar();
if (cigar == null) return 0;
+ int n = 0;
for (final CigarElement e : cigar.getCigarElements()) {
- if (e.getOperator() == CigarOperator.M) n++;
+ if (ALIGNED_TO_GENOME_OPERATORS.contains(e.getOperator()))
+ n++;
}
return n;
}
- public static int getNumAlignedBasesCountingSoftClips(final SAMRecord r) {
+
+ /**
+ * Get the number of bases aligned to the genome, including soft clips
+ *
+ * If read is not mapped (i.e., doesn't have a cigar) returns 0
+ *
+ * @param r a non-null GATKSAMRecord
+ * @return the number of bases aligned to the genome in R, including soft clipped bases
+ */
+ public static int getNumAlignedBasesCountingSoftClips(final GATKSAMRecord r) {
int n = 0;
final Cigar cigar = r.getCigar();
if (cigar == null) return 0;
for (final CigarElement e : cigar.getCigarElements())
- if (e.getOperator() == CigarOperator.M || e.getOperator() == CigarOperator.S)
+ if (ALIGNED_TO_GENOME_PLUS_SOFTCLIPS.contains(e.getOperator()))
n += e.getLength();
return n;
}
+ /**
+ * Count the number of bases hard clipped from read
+ *
+ * If read's cigar is null, return 0
+ *
+ * @param r a non-null read
+ * @return a positive integer
+ */
+ @Ensures("result >= 0")
public static int getNumHardClippedBases(final SAMRecord r) {
+ if ( r == null ) throw new IllegalArgumentException("Read cannot be null");
+
int n = 0;
final Cigar cigar = r.getCigar();
if (cigar == null) return 0;
@@ -259,16 +194,28 @@ public class AlignmentUtils {
return n;
}
+ /**
+ * Calculate the number of bases that are soft clipped in read with quality score greater than threshold
+ *
+ * Handles the case where the cigar is null (i.e., the read is unmapped), returning 0
+ *
+ * @param read a non-null GATKSAMRecord.
+ * @param qualThreshold consider bases with quals > this value as high quality. Must be >= 0
+ * @return positive integer
+ */
+ @Ensures("result >= 0")
public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) {
+ if ( read == null ) throw new IllegalArgumentException("Read cannot be null");
+ if ( qualThreshold < 0 ) throw new IllegalArgumentException("Expected qualThreshold to be a positive byte but saw " + qualThreshold);
+
+ if ( read.getCigar() == null ) // the read is unmapped
+ return 0;
+
+ final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION );
int numHQSoftClips = 0;
int alignPos = 0;
- final Cigar cigar = read.getCigar();
- final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION );
-
- for( int iii = 0; iii < cigar.numCigarElements(); iii++ ) {
-
- final CigarElement ce = cigar.getCigarElement(iii);
+ for ( final CigarElement ce : read.getCigar().getCigarElements() ) {
final int elementLength = ce.getLength();
switch( ce.getOperator() ) {
@@ -277,35 +224,26 @@ public class AlignmentUtils {
if( qual[alignPos++] > qualThreshold ) { numHQSoftClips++; }
}
break;
- case M:
- case I:
- case EQ:
- case X:
+ case M: case I: case EQ: case X:
alignPos += elementLength;
break;
- case H:
- case P:
- case D:
- case N:
+ case H: case P: case D: case N:
break;
default:
- throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
+ throw new IllegalStateException("Unsupported cigar operator: " + ce.getOperator());
}
}
+
return numHQSoftClips;
}
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) {
- return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), false, pileupElement.isDeletion(), alignmentStart, refLocus );
+ return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isDeletion(), alignmentStart, refLocus );
}
- public static int calcAlignmentByteArrayOffset(final Cigar cigar, final int offset, final boolean isInsertionAtBeginningOfRead, final boolean isDeletion, final int alignmentStart, final int refLocus) {
+ public static int calcAlignmentByteArrayOffset(final Cigar cigar, final int offset, final boolean isDeletion, final int alignmentStart, final int refLocus) {
int pileupOffset = offset;
- // Special case for reads starting with insertion
- if (isInsertionAtBeginningOfRead)
- return 0;
-
// Reassign the offset if we are in the middle of a deletion because of the modified representation of the read bases
if (isDeletion) {
pileupOffset = refLocus - alignmentStart;
@@ -458,18 +396,21 @@ public class AlignmentUtils {
* specification, one may need this convenience shortcut. Checks both 'read unmapped' flag and
* alignment reference index/start.
*
- * @param r record
+ * Our life would be so much easier if all sam files followed the specs. In reality,
+ * sam files (including those generated by maq or bwa) miss headers altogether. When
+ * reading such a SAM file, reference name is set, but since there is no sequence dictionary,
+ * null is always returned for referenceIndex. Let's be paranoid here, and make sure that
+ * we do not call the read "unmapped" when it has only reference name set with ref. index missing
+ * or vice versa.
+ *
+ * @param r a non-null record
* @return true if read is unmapped
*/
public static boolean isReadUnmapped(final SAMRecord r) {
+ if ( r == null ) throw new IllegalArgumentException("Read cannot be null");
+
if (r.getReadUnmappedFlag()) return true;
- // our life would be so much easier if all sam files followed the specs. In reality,
- // sam files (including those generated by maq or bwa) miss headers altogether. When
- // reading such a SAM file, reference name is set, but since there is no sequence dictionary,
- // null is always returned for referenceIndex. Let's be paranoid here, and make sure that
- // we do not call the read "unmapped" when it has only reference name set with ref. index missing
- // or vice versa.
if ((r.getReferenceIndex() != null && r.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX
|| r.getReferenceName() != null && !r.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME))
&& r.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START) return false;
@@ -477,26 +418,26 @@ public class AlignmentUtils {
}
/**
- * Due to (unfortunate) multiple ways to indicate that read/mate is unmapped allowed by SAM format
- * specification, one may need this convenience shortcut. Checks both 'mate unmapped' flag and
- * alignment reference index/start of the mate.
- *
- * @param r sam record for the read
- * @return true if read's mate is unmapped
+ * Need a well-formed, consolidated Cigar string so that the left aligning code works properly.
+ * For example, 1M1M1M1D2M1M --> 3M1D3M
+ * If the given cigar is empty then the returned cigar will also be empty
+ * @param c the cigar to consolidate
+ * @return a cigar with consecutive matching operators merged into single operators.
*/
- public static boolean isMateUnmapped(final SAMRecord r) {
- if (r.getMateUnmappedFlag()) return true;
-
- // our life would be so much easier if all sam files followed the specs. In reality,
- // sam files (including those generated by maq or bwa) miss headers altogether. When
- // reading such a SAM file, reference name is set, but since there is no sequence dictionary,
- // null is always returned for referenceIndex. Let's be paranoid here, and make sure that
- // we do not call the read "unmapped" when it has only reference name set with ref. index missing
- // or vice versa.
- if ((r.getMateReferenceIndex() != null && r.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX
- || r.getMateReferenceName() != null && !r.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME))
- && r.getMateAlignmentStart() != SAMRecord.NO_ALIGNMENT_START) return false;
- return true;
+ @Requires({"c != null"})
+ @Ensures({"result != null"})
+ public static Cigar consolidateCigar( final Cigar c ) {
+ if( c.isEmpty() ) { return c; }
+ final Cigar returnCigar = new Cigar();
+ int sumLength = 0;
+ for( int iii = 0; iii < c.numCigarElements(); iii++ ) {
+ sumLength += c.getCigarElement(iii).getLength();
+ if( iii == c.numCigarElements() - 1 || !c.getCigarElement(iii).getOperator().equals(c.getCigarElement(iii+1).getOperator())) { // at the end so finish the current element
+ returnCigar.add(new CigarElement(sumLength, c.getCigarElement(iii).getOperator()));
+ sumLength = 0;
+ }
+ }
+ return returnCigar;
}
/**
@@ -565,22 +506,41 @@ public class AlignmentUtils {
return cigar;
}
- private static boolean cigarHasZeroSizeElement(Cigar c) {
- for (CigarElement ce : c.getCigarElements()) {
+ /**
+ * Does one of the elements in cigar have a 0 length?
+ *
+ * @param c a non-null cigar
+ * @return true if any element has 0 size
+ */
+ @Requires("c != null")
+ protected static boolean cigarHasZeroSizeElement(final Cigar c) {
+ for (final CigarElement ce : c.getCigarElements()) {
if (ce.getLength() == 0)
return true;
}
return false;
}
- private static Cigar cleanUpCigar(Cigar c) {
- ArrayList elements = new ArrayList(c.numCigarElements() - 1);
- for (CigarElement ce : c.getCigarElements()) {
- if (ce.getLength() != 0 &&
- (elements.size() != 0 || ce.getOperator() != CigarOperator.D)) {
+ /**
+ * Clean up the incoming cigar
+ *
+ * Removes elements with zero size
+ * Clips away beginning deletion operators
+ *
+ * @param c the cigar string we want to clean up
+ * @return a newly allocated, cleaned up Cigar
+ */
+ @Requires("c != null")
+ @Ensures("result != null")
+ private static Cigar cleanUpCigar(final Cigar c) {
+ final List elements = new ArrayList(c.numCigarElements() - 1);
+
+ for (final CigarElement ce : c.getCigarElements()) {
+ if (ce.getLength() != 0 && (! elements.isEmpty() || ce.getOperator() != CigarOperator.D)) {
elements.add(ce);
}
}
+
return new Cigar(elements);
}
diff --git a/public/java/src/org/broadinstitute/variant/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/variant/vcf/AbstractVCFCodec.java
index 11d152e65..a4ccd050a 100644
--- a/public/java/src/org/broadinstitute/variant/vcf/AbstractVCFCodec.java
+++ b/public/java/src/org/broadinstitute/variant/vcf/AbstractVCFCodec.java
@@ -30,7 +30,7 @@ import org.broad.tribble.Feature;
import org.broad.tribble.NameAwareCodec;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.LineReader;
-import org.broad.tribble.util.BlockCompressedInputStream;
+import net.sf.samtools.util.BlockCompressedInputStream;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.variantcontext.*;
diff --git a/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java
index 5d6ecd0f9..29c643153 100644
--- a/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java
@@ -32,8 +32,7 @@ import org.broadinstitute.sting.BaseTest;
import org.testng.annotations.Test;
import java.io.File;
-import java.util.LinkedHashMap;
-import java.util.Map;
+import java.util.*;
/**
* Testing framework for general purpose utilities class.
@@ -43,6 +42,25 @@ import java.util.Map;
*/
public class UtilsUnitTest extends BaseTest {
+ @Test
+ public void testAppend() {
+ for ( int leftSize : Arrays.asList(0, 1, 2, 3) ) {
+ for ( final int rightSize : Arrays.asList(0, 1, 2) ) {
+ final List left = new LinkedList();
+ for ( int i = 0; i < leftSize; i++ ) left.add(i);
+ final List total = new LinkedList();
+ for ( int i = 0; i < leftSize + rightSize; i++ ) total.add(i);
+
+ if ( rightSize == 0 )
+ Assert.assertEquals(Utils.append(left), total);
+ if ( rightSize == 1 )
+ Assert.assertEquals(Utils.append(left, leftSize), total);
+ if ( rightSize == 2 )
+ Assert.assertEquals(Utils.append(left, leftSize, leftSize + 1), total);
+ }
+ }
+
+ }
@Test
public void testDupStringNoChars() {
diff --git a/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java
index 1e3386426..da82e9de5 100644
--- a/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java
@@ -203,6 +203,23 @@ public class BAQUnitTest extends BaseTest {
Assert.assertTrue(baq.calcEpsilon( ref, alt, (byte)i) >= 0.0, "Failed to get baq epsilon range");
}
+ @Test(enabled = true)
+ public void testBAQOverwritesExistingTagWithNull() {
+
+ // create a read with a single base off the end of the contig, which cannot be BAQed
+ final SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, fasta.getSequenceDictionary().getSequence("chr1").getSequenceLength() + 1, 1);
+ read.setReadBases(new byte[] {(byte) 'A'});
+ read.setBaseQualities(new byte[] {(byte) 20});
+ read.setCigarString("1M");
+ read.setAttribute("BQ", "A");
+
+ // try to BAQ and tell it to RECALCULATE AND ADD_TAG
+ BAQ baq = new BAQ(1e-3, 0.1, 7, (byte)4, false);
+ baq.baqRead(read, fasta, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.ADD_TAG);
+
+ // did we remove the existing tag?
+ Assert.assertTrue(read.getAttribute("BQ") == null);
+ }
public void testBAQ(BAQTest test, boolean lookupWithFasta) {
BAQ baqHMM = new BAQ(1e-3, 0.1, 7, (byte)4, false); // matches current samtools parameters
diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java
index f088574d7..d9f514593 100644
--- a/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java
@@ -25,13 +25,16 @@
package org.broadinstitute.sting.utils.sam;
-import junit.framework.Assert;
-import net.sf.samtools.SAMFileHeader;
-import net.sf.samtools.SAMRecord;
+import net.sf.samtools.*;
+import org.apache.commons.lang.ArrayUtils;
+import org.broadinstitute.sting.utils.Utils;
+import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
+import java.util.*;
+
public class AlignmentUtilsUnitTest {
private SAMFileHeader header;
@@ -121,4 +124,205 @@ public class AlignmentUtilsUnitTest {
start,
ArtificialSAMUtils.DEFAULT_READ_LENGTH);
}
+
+ private final List> makeCigarElementCombinations() {
+ // this functionality can be adapted to provide input data for whatever you might want in your data
+ final List cigarElements = new LinkedList();
+ for ( final int size : Arrays.asList(0, 10) ) {
+ for ( final CigarOperator op : CigarOperator.values() ) {
+ cigarElements.add(new CigarElement(size, op));
+ }
+ }
+
+ final List> combinations = new LinkedList>();
+ for ( final int nElements : Arrays.asList(1, 2, 3) ) {
+ combinations.addAll(Utils.makePermutations(cigarElements, nElements, true));
+ }
+
+ return combinations;
+ }
+
+
+ @DataProvider(name = "NumAlignedBasesCountingSoftClips")
+ public Object[][] makeNumAlignedBasesCountingSoftClips() {
+ List