diff --git a/build.xml b/build.xml index 1e88bb400..e92e41c10 100644 --- a/build.xml +++ b/build.xml @@ -708,9 +708,6 @@ - - - diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index 95b31b732..ddca5e0b8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -107,7 +107,7 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true ); if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) continue; - int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, false, 0, 0 ); + int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, 0, 0 ); final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read ); if (readPos > numAlignedBases / 2) readPos = numAlignedBases - (readPos + 1); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnEdge.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnEdge.java index 74a95db37..881fe4204 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnEdge.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnEdge.java @@ -86,7 +86,7 @@ public class DeBruijnEdge { multiplicity = value; } - public boolean getIsRef() { + public boolean isRef() { return isRef; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java index 7d79edf93..edfe8254d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java @@ -46,8 +46,14 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; +import com.google.java.contract.Ensures; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.utils.SWPairwiseAlignment; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.jgrapht.graph.DefaultDirectedGraph; import java.io.Serializable; @@ -55,7 +61,7 @@ import java.util.*; /** * Created by IntelliJ IDEA. - * User: ebanks + * User: ebanks, rpoplin * Date: Mar 23, 2011 */ // Class for finding the K best paths (as determined by the sum of multiplicities of the edges) in a graph. @@ -72,28 +78,47 @@ public class KBestPaths { protected static class Path { // the last vertex seen in the path - private DeBruijnVertex lastVertex; + private final DeBruijnVertex lastVertex; // the list of edges comprising the path - private ArrayList edges; + private final List edges; // the scores for the path - private int totalScore = 0, lowestEdge = -1; + private final int totalScore; + private final int lowestEdge; - public Path( final DeBruijnVertex initialVertex ) { + // the graph from which this path originated + private final DefaultDirectedGraph graph; + + // used in the bubble state machine to apply Smith-Waterman to the bubble sequence + private final double SW_MATCH = 15.0; + private final double SW_MISMATCH = -15.0; + private final double SW_GAP = -25.0; + private final double SW_GAP_EXTEND = -1.2; + + public Path( final DeBruijnVertex initialVertex, final DefaultDirectedGraph graph ) { lastVertex = initialVertex; edges = new ArrayList(0); + totalScore = 0; + lowestEdge = -1; + this.graph = graph; } - public Path( final Path p, final DefaultDirectedGraph graph, final DeBruijnEdge edge ) { - lastVertex = graph.getEdgeTarget(edge); + public Path( final Path p, final DeBruijnEdge edge ) { + graph = p.graph; + lastVertex = p.graph.getEdgeTarget(edge); edges = new ArrayList(p.edges); edges.add(edge); totalScore = p.totalScore + edge.getMultiplicity(); lowestEdge = ( p.lowestEdge == -1 ) ? edge.getMultiplicity() : Math.min(p.lowestEdge, edge.getMultiplicity()); } - public boolean containsEdge( final DefaultDirectedGraph graph, final DeBruijnEdge edge ) { + /** + * Does this path contain the given edge + * @param edge the given edge to test + * @return true if the edge is found in this path + */ + public boolean containsEdge( final DeBruijnEdge edge ) { final DeBruijnVertex targetVertex = graph.getEdgeTarget(edge); for( final DeBruijnEdge e : edges ) { if( e.equals(graph, edge) || graph.getEdgeTarget(e).equals(targetVertex) ) { @@ -104,7 +129,7 @@ public class KBestPaths { return false; } - public ArrayList getEdges() { return edges; } + public List getEdges() { return edges; } public int getScore() { return totalScore; } @@ -112,7 +137,12 @@ public class KBestPaths { public DeBruijnVertex getLastVertexInPath() { return lastVertex; } - public byte[] getBases( final DefaultDirectedGraph graph ) { + /** + * The base sequence for this path. Pull the full sequence for the source of the path and then the suffix for all subsequent nodes + * @return non-null sequence of bases corresponding to this path + */ + @Ensures({"result != null"}) + public byte[] getBases() { if( edges.size() == 0 ) { return lastVertex.getSequence(); } byte[] bases = graph.getEdgeSource( edges.get(0) ).getSequence(); @@ -121,6 +151,142 @@ public class KBestPaths { } return bases; } + + /** + * Calculate the cigar string for this path using a bubble traversal of the assembly graph and running a Smith-Waterman alignment on each bubble + */ + @Ensures("result != null") + public Cigar calculateCigar() { + + final Cigar cigar = new Cigar(); + // special case for paths that start on reference but not at the reference source node + if( edges.get(0).isRef() && !isRefSource(graph, edges.get(0)) ) { + for( final CigarElement ce : calculateCigarForCompleteBubble(graph, null, null, graph.getEdgeSource(edges.get(0))).getCigarElements() ) { + cigar.add(ce); + } + } + + // reset the bubble state machine + final BubbleStateMachine bsm = new BubbleStateMachine(cigar); + + for( final DeBruijnEdge e : edges ) { + if( e.equals(graph, edges.get(0)) ) { + advanceBubbleStateMachine( bsm, graph, graph.getEdgeSource(e), null ); + } + advanceBubbleStateMachine( bsm, graph, graph.getEdgeTarget(e), e ); + } + + // special case for paths that don't end on reference + if( bsm.inBubble ) { + for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, bsm.lastSeenReferenceNode, null).getCigarElements() ) { + bsm.cigar.add(ce); + } + } else if( edges.get(edges.size()-1).isRef() && !isRefSink(graph, edges.get(edges.size()-1)) ) { // special case for paths that end of the reference but haven't completed the entire reference circuit + for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, graph.getEdgeTarget(edges.get(edges.size()-1)), null).getCigarElements() ) { + bsm.cigar.add(ce); + } + } + + return AlignmentUtils.consolidateCigar(bsm.cigar); + } + + private void advanceBubbleStateMachine( final BubbleStateMachine bsm, final DefaultDirectedGraph graph, final DeBruijnVertex node, final DeBruijnEdge e ) { + if( isReferenceNode( graph, node ) ) { + if( !bsm.inBubble ) { // just add the ref bases as M's in the Cigar string, and don't do anything else + if( e !=null && !e.isRef() ) { + if( referencePathExists( graph, graph.getEdgeSource(e), node) ) { + for( final CigarElement ce : calculateCigarForCompleteBubble(graph, null, graph.getEdgeSource(e), node).getCigarElements() ) { + bsm.cigar.add(ce); + } + bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) ); + } else if ( graph.getEdgeSource(e).equals(graph.getEdgeTarget(e)) ) { // alt edge at ref node points to itself + bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.I) ); + } else { + bsm.inBubble = true; + bsm.bubbleBytes = null; + bsm.lastSeenReferenceNode = graph.getEdgeSource(e); + bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) ); + } + } else { + bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) ); + } + } else if( bsm.lastSeenReferenceNode != null && !referencePathExists( graph, bsm.lastSeenReferenceNode, node ) ) { // add bases to the bubble string until we get back to the reference path + bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) ); + } else { // close the bubble and use a local SW to determine the Cigar string + for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, bsm.lastSeenReferenceNode, node).getCigarElements() ) { + bsm.cigar.add(ce); + } + bsm.inBubble = false; + bsm.bubbleBytes = null; + bsm.lastSeenReferenceNode = null; + bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) ); + } + } else { // non-ref vertex + if( bsm.inBubble ) { // just keep accumulating until we get back to the reference path + bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) ); + } else { // open up a bubble + bsm.inBubble = true; + bsm.bubbleBytes = null; + bsm.lastSeenReferenceNode = (e != null ? graph.getEdgeSource(e) : null ); + bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) ); + } + } + } + + private Cigar calculateCigarForCompleteBubble( final DefaultDirectedGraph graph, final byte[] bubbleBytes, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex ) { + final byte[] refBytes = getReferenceBytes(graph, fromVertex, toVertex); + final Cigar cigar = new Cigar(); + + // add padding to anchor ref/alt bases in the SW matrix + byte[] padding = "XXXXXX".getBytes(); + boolean goodAlignment = false; + SWPairwiseAlignment swConsensus = null; + while( !goodAlignment && padding.length < 1000 ) { + padding = ArrayUtils.addAll(padding, padding); // double the size of the padding each time + final byte[] reference = ArrayUtils.addAll( ArrayUtils.addAll(padding, refBytes), padding ); + final byte[] alternate = ArrayUtils.addAll( ArrayUtils.addAll(padding, bubbleBytes), padding ); + swConsensus = new SWPairwiseAlignment( reference, alternate, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND ); + if( swConsensus.getAlignmentStart2wrt1() == 0 && !swConsensus.getCigar().toString().contains("S") && swConsensus.getCigar().getReferenceLength() == reference.length ) { + goodAlignment = true; + } + } + if( !goodAlignment && swConsensus != null ) { + throw new ReviewedStingException("SmithWaterman offset failure: " + (refBytes == null ? "-" : new String(refBytes)) + " against " + new String(bubbleBytes) + " = " + swConsensus.getCigar()); + } + + if( swConsensus != null ) { + final Cigar swCigar = swConsensus.getCigar(); + for( int iii = 0; iii < swCigar.numCigarElements(); iii++ ) { + // now we need to remove the padding from the cigar string + int length = swCigar.getCigarElement(iii).getLength(); + if( iii == 0 ) { length -= padding.length; } + if( iii == swCigar.numCigarElements() - 1 ) { length -= padding.length; } + if( length > 0 ) { + cigar.add( new CigarElement(length, swCigar.getCigarElement(iii).getOperator()) ); + } + } + if( (refBytes == null && cigar.getReferenceLength() != 0) || ( refBytes != null && cigar.getReferenceLength() != refBytes.length ) ) { + throw new ReviewedStingException("SmithWaterman cigar failure: " + (refBytes == null ? "-" : new String(refBytes)) + " against " + new String(bubbleBytes) + " = " + swConsensus.getCigar()); + } + } + + return cigar; + } + + // class to keep track of the bubble state machine + protected static class BubbleStateMachine { + public boolean inBubble = false; + public byte[] bubbleBytes = null; + public DeBruijnVertex lastSeenReferenceNode = null; + public Cigar cigar = null; + + public BubbleStateMachine( final Cigar initialCigar ) { + inBubble = false; + bubbleBytes = null; + lastSeenReferenceNode = null; + cigar = initialCigar; + } + } } protected static class PathComparatorTotalScore implements Comparator, Serializable { @@ -130,12 +296,12 @@ public class KBestPaths { } } - //protected static class PathComparatorLowestEdge implements Comparator, Serializable { - // @Override - // public int compare(final Path path1, final Path path2) { - // return path2.lowestEdge - path1.lowestEdge; - // } - //} + protected static class PathComparatorLowestEdge implements Comparator, Serializable { + @Override + public int compare(final Path path1, final Path path2) { + return path2.lowestEdge - path1.lowestEdge; + } + } public static List getKBestPaths( final DefaultDirectedGraph graph, final int k ) { if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); } @@ -144,7 +310,7 @@ public class KBestPaths { // run a DFS for best paths for( final DeBruijnVertex v : graph.vertexSet() ) { if( graph.inDegreeOf(v) == 0 ) { - findBestPaths(graph, new Path(v), bestPaths); + findBestPaths(new Path(v, graph), bestPaths); } } @@ -153,14 +319,14 @@ public class KBestPaths { return bestPaths.subList(0, Math.min(k, bestPaths.size())); } - private static void findBestPaths( final DefaultDirectedGraph graph, final Path path, final List bestPaths ) { - findBestPaths(graph, path, bestPaths, new MyInt()); + private static void findBestPaths( final Path path, final List bestPaths ) { + findBestPaths(path, bestPaths, new MyInt()); } - private static void findBestPaths( final DefaultDirectedGraph graph, final Path path, final List bestPaths, MyInt n ) { + private static void findBestPaths( final Path path, final List bestPaths, final MyInt n ) { // did we hit the end of a path? - if ( allOutgoingEdgesHaveBeenVisited(graph, path) ) { + if ( allOutgoingEdgesHaveBeenVisited(path) ) { if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) { // clean out some low scoring paths Collections.sort(bestPaths, new PathComparatorTotalScore() ); @@ -172,27 +338,253 @@ public class KBestPaths { } else { // recursively run DFS final ArrayList edgeArrayList = new ArrayList(); - edgeArrayList.addAll(graph.outgoingEdgesOf(path.lastVertex)); + edgeArrayList.addAll(path.graph.outgoingEdgesOf(path.lastVertex)); Collections.sort(edgeArrayList, new DeBruijnEdge.EdgeWeightComparator()); Collections.reverse(edgeArrayList); for ( final DeBruijnEdge edge : edgeArrayList ) { // make sure the edge is not already in the path - if ( path.containsEdge(graph, edge) ) + if ( path.containsEdge(edge) ) continue; - final Path newPath = new Path(path, graph, edge); + final Path newPath = new Path(path, edge); n.val++; - findBestPaths(graph, newPath, bestPaths, n); + findBestPaths(newPath, bestPaths, n); } } } - private static boolean allOutgoingEdgesHaveBeenVisited( final DefaultDirectedGraph graph, final Path path ) { - for( final DeBruijnEdge edge : graph.outgoingEdgesOf(path.lastVertex) ) { - if( !path.containsEdge(graph, edge) ) { + private static boolean allOutgoingEdgesHaveBeenVisited( final Path path ) { + for( final DeBruijnEdge edge : path.graph.outgoingEdgesOf(path.lastVertex) ) { + if( !path.containsEdge(edge) ) { return false; } } return true; } + + /**************************************************************** + * Collection of graph functions used by KBestPaths * + ***************************************************************/ + + /** + * Test if the vertex is on a reference path in the graph. If so it is referred to as a reference node + * @param graph the graph from which the vertex originated + * @param v the vertex to test + * @return true if the vertex is on the reference path + */ + public static boolean isReferenceNode( final DefaultDirectedGraph graph, final DeBruijnVertex v ) { + for( final DeBruijnEdge e : graph.edgesOf(v) ) { + if( e.isRef() ) { return true; } + } + return false; + } + + /** + * Is this edge a source edge (the source vertex of the edge is a source node in the graph) + * @param graph the graph from which the edge originated + * @param e the edge to test + * @return true if the source vertex of the edge is a source node in the graph + */ + public static boolean isSource( final DefaultDirectedGraph graph, final DeBruijnEdge e ) { + return graph.inDegreeOf(graph.getEdgeSource(e)) == 0; + } + + /** + * Is this vertex a source vertex + * @param graph the graph from which the vertex originated + * @param v the vertex to test + * @return true if the vertex is a source vertex + */ + public static boolean isSource( final DefaultDirectedGraph graph, final DeBruijnVertex v ) { + return graph.inDegreeOf(v) == 0; + } + + /** + * Pull the added base sequence implied by visiting this node in a path + * @param graph the graph from which the vertex originated + * @param v the vertex whose sequence to grab + * @return non-null sequence of bases corresponding to this node in the graph + */ + @Ensures({"result != null"}) + public static byte[] getAdditionalSequence( final DefaultDirectedGraph graph, final DeBruijnVertex v ) { + return ( isSource(graph, v) ? v.getSequence() : v.getSuffix() ); + } + + /** + * Is this edge both a reference edge and a source edge for the reference path + * @param graph the graph from which the edge originated + * @param e the edge to test + * @return true if the edge is both a reference edge and a reference path source edge + */ + public static boolean isRefSource( final DefaultDirectedGraph graph, final DeBruijnEdge e ) { + for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(graph.getEdgeSource(e)) ) { + if( edgeToTest.isRef() ) { return false; } + } + return true; + } + + /** + * Is this vertex both a reference node and a source node for the reference path + * @param graph the graph from which the vertex originated + * @param v the vertex to test + * @return true if the vertex is both a reference node and a reference path source node + */ + public static boolean isRefSource( final DefaultDirectedGraph graph, final DeBruijnVertex v ) { + for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(v) ) { + if( edgeToTest.isRef() ) { return false; } + } + return true; + } + + /** + * Is this edge both a reference edge and a sink edge for the reference path + * @param graph the graph from which the edge originated + * @param e the edge to test + * @return true if the edge is both a reference edge and a reference path sink edge + */ + public static boolean isRefSink( final DefaultDirectedGraph graph, final DeBruijnEdge e ) { + for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(graph.getEdgeTarget(e)) ) { + if( edgeToTest.isRef() ) { return false; } + } + return true; + } + + /** + * Is this vertex both a reference node and a sink node for the reference path + * @param graph the graph from which the node originated + * @param v the node to test + * @return true if the vertex is both a reference node and a reference path sink node + */ + public static boolean isRefSink( final DefaultDirectedGraph graph, final DeBruijnVertex v ) { + for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(v) ) { + if( edgeToTest.isRef() ) { return false; } + } + return true; + } + + public static DeBruijnEdge getReferenceSourceEdge( final DefaultDirectedGraph graph ) { + for( final DeBruijnEdge e : graph.edgeSet() ) { + if( e.isRef() && isRefSource(graph, e) ) { + return e; + } + } + throw new ReviewedStingException("All reference graphs should have a source node"); + } + + public static DeBruijnVertex getReferenceSourceVertex( final DefaultDirectedGraph graph ) { + for( final DeBruijnVertex v : graph.vertexSet() ) { + if( isReferenceNode(graph, v) && isRefSource(graph, v) ) { + return v; + } + } + return null; + } + + public static DeBruijnEdge getReferenceSinkEdge( final DefaultDirectedGraph graph ) { + for( final DeBruijnEdge e : graph.edgeSet() ) { + if( e.isRef() && isRefSink(graph, e) ) { + return e; + } + } + throw new ReviewedStingException("All reference graphs should have a sink node"); + } + + public static DeBruijnVertex getReferenceSinkVertex( final DefaultDirectedGraph graph ) { + for( final DeBruijnVertex v : graph.vertexSet() ) { + if( isReferenceNode(graph, v) && isRefSink(graph, v) ) { + return v; + } + } + throw new ReviewedStingException("All reference graphs should have a sink node"); + } + + public static DeBruijnEdge getNextReferenceEdge( final DefaultDirectedGraph graph, final DeBruijnEdge e ) { + if( e == null ) { return null; } + for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(graph.getEdgeTarget(e)) ) { + if( edgeToTest.isRef() ) { + return edgeToTest; + } + } + return null; + } + + public static DeBruijnVertex getNextReferenceVertex( final DefaultDirectedGraph graph, final DeBruijnVertex v ) { + if( v == null ) { return null; } + for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(v) ) { + if( edgeToTest.isRef() ) { + return graph.getEdgeTarget(edgeToTest); + } + } + return null; + } + + public static DeBruijnEdge getPrevReferenceEdge( final DefaultDirectedGraph graph, final DeBruijnEdge e ) { + for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(graph.getEdgeSource(e)) ) { + if( edgeToTest.isRef() ) { + return edgeToTest; + } + } + return null; + } + + public static DeBruijnVertex getPrevReferenceVertex( final DefaultDirectedGraph graph, final DeBruijnVertex v ) { + for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(v) ) { + if( isReferenceNode(graph, graph.getEdgeSource(edgeToTest)) ) { + return graph.getEdgeSource(edgeToTest); + } + } + return null; + } + + public static boolean referencePathExists(final DefaultDirectedGraph graph, final DeBruijnEdge fromEdge, final DeBruijnEdge toEdge) { + DeBruijnEdge e = fromEdge; + if( e == null ) { + return false; + } + while( !e.equals(graph, toEdge) ) { + e = getNextReferenceEdge(graph, e); + if( e == null ) { + return false; + } + } + return true; + } + + public static boolean referencePathExists(final DefaultDirectedGraph graph, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex) { + DeBruijnVertex v = fromVertex; + if( v == null ) { + return false; + } + v = getNextReferenceVertex(graph, v); + if( v == null ) { + return false; + } + while( !v.equals(toVertex) ) { + v = getNextReferenceVertex(graph, v); + if( v == null ) { + return false; + } + } + return true; + } + + // fromVertex (exclusive) -> toVertex (exclusive) + public static byte[] getReferenceBytes( final DefaultDirectedGraph graph, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex ) { + byte[] bytes = null; + if( fromVertex != null && toVertex != null && !referencePathExists(graph, fromVertex, toVertex) ) { + throw new ReviewedStingException("Asked for a reference path which doesn't exist. " + fromVertex + " --> " + toVertex); + } + DeBruijnVertex v = fromVertex; + if( v == null ) { + v = getReferenceSourceVertex(graph); + bytes = ArrayUtils.addAll( bytes, getAdditionalSequence(graph, v) ); + } + v = getNextReferenceVertex(graph, v); + while( (toVertex != null && !v.equals(toVertex)) || (toVertex == null && v != null) ) { + bytes = ArrayUtils.addAll( bytes, getAdditionalSequence(graph, v) ); + // advance along the reference path + v = getNextReferenceVertex(graph, v); + } + return bytes; + } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java index a9768557d..a007bfa0c 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java @@ -152,10 +152,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { final DeBruijnVertex addedVertex = new DeBruijnVertex( ArrayUtils.addAll(incomingVertex.getSequence(), outgoingVertex.getSuffix()), outgoingVertex.kmer ); graph.addVertex(addedVertex); for( final DeBruijnEdge edge : outEdges ) { - graph.addEdge(addedVertex, graph.getEdgeTarget(edge), new DeBruijnEdge(edge.getIsRef(), edge.getMultiplicity())); + graph.addEdge(addedVertex, graph.getEdgeTarget(edge), new DeBruijnEdge(edge.isRef(), edge.getMultiplicity())); } for( final DeBruijnEdge edge : inEdges ) { - graph.addEdge(graph.getEdgeSource(edge), addedVertex, new DeBruijnEdge(edge.getIsRef(), edge.getMultiplicity())); + graph.addEdge(graph.getEdgeSource(edge), addedVertex, new DeBruijnEdge(edge.isRef(), edge.getMultiplicity())); } graph.removeVertex( incomingVertex ); @@ -170,7 +170,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { protected static void pruneGraph( final DefaultDirectedGraph graph, final int pruneFactor ) { final List edgesToRemove = new ArrayList(); for( final DeBruijnEdge e : graph.edgeSet() ) { - if( e.getMultiplicity() <= pruneFactor && !e.getIsRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor + if( e.getMultiplicity() <= pruneFactor && !e.isRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor edgesToRemove.add(e); } } @@ -195,7 +195,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { if( graph.inDegreeOf(v) == 0 || graph.outDegreeOf(v) == 0 ) { boolean isRefNode = false; for( final DeBruijnEdge e : graph.edgesOf(v) ) { - if( e.getIsRef() ) { + if( e.isRef() ) { isRefNode = true; break; } @@ -299,10 +299,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { if( edge.getMultiplicity() > PRUNE_FACTOR ) { GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [" + (edge.getMultiplicity() <= PRUNE_FACTOR ? "style=dotted,color=grey" : "label=\""+ edge.getMultiplicity() +"\"") + "];"); } - if( edge.getIsRef() ) { + if( edge.isRef() ) { GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [color=red];"); } - if( !edge.getIsRef() && edge.getMultiplicity() <= PRUNE_FACTOR ) { System.out.println("Graph pruning warning!"); } + if( !edge.isRef() && edge.getMultiplicity() <= PRUNE_FACTOR ) { System.out.println("Graph pruning warning!"); } } for( final DeBruijnVertex v : graph.vertexSet() ) { final String label = ( graph.inDegreeOf(v) == 0 ? v.toString() : v.getSuffixString() ); @@ -338,7 +338,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { for( final DefaultDirectedGraph graph : graphs ) { for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) { - final Haplotype h = new Haplotype( path.getBases( graph ) ); + final Haplotype h = new Haplotype( path.getBases() ); if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false ) ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java index bd0063a7d..be537f294 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java @@ -201,7 +201,7 @@ public class RecalDatum { * Returns the error rate (in real space) of this interval, or 0 if there are no observations * @return the empirical error rate ~= N errors / N obs */ - @Ensures("result >= 0.0") + @Ensures({"result >= 0.0"}) public double getEmpiricalErrorRate() { if ( numObservations == 0 ) return 0.0; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java index f82c0a8ba..8b09e91ae 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java @@ -93,7 +93,7 @@ public class GenotypingEngineUnitTest extends BaseTest { haplotypeAlleles.add( Allele.create("AACA", false) ); haplotypeAlleles.add( Allele.create("CATA", false) ); haplotypeAlleles.add( Allele.create("CACA", false) ); - final ArrayList haplotypes = new ArrayList(); + final List haplotypes = new ArrayList(); haplotypes.add(new Haplotype("AATA".getBytes())); haplotypes.add(new Haplotype("AACA".getBytes())); haplotypes.add(new Haplotype("CATA".getBytes())); @@ -101,11 +101,11 @@ public class GenotypingEngineUnitTest extends BaseTest { final List haplotypeAllelesForSample = new ArrayList(); haplotypeAllelesForSample.add( Allele.create("CATA", false) ); haplotypeAllelesForSample.add( Allele.create("CACA", false) ); - final ArrayList> alleleMapper = new ArrayList>(); - ArrayList Aallele = new ArrayList(); + final List> alleleMapper = new ArrayList>(); + List Aallele = new ArrayList(); Aallele.add(haplotypes.get(0)); Aallele.add(haplotypes.get(1)); - ArrayList Callele = new ArrayList(); + List Callele = new ArrayList(); Callele.add(haplotypes.get(2)); Callele.add(haplotypes.get(3)); alleleMapper.add(Aallele); @@ -135,7 +135,7 @@ public class GenotypingEngineUnitTest extends BaseTest { haplotypeAlleles.add( Allele.create("TACA", false) ); haplotypeAlleles.add( Allele.create("TTCA", false) ); haplotypeAlleles.add( Allele.create("TTTA", false) ); - final ArrayList haplotypes = new ArrayList(); + final List haplotypes = new ArrayList(); haplotypes.add(new Haplotype("AATA".getBytes())); haplotypes.add(new Haplotype("AACA".getBytes())); haplotypes.add(new Haplotype("CATA".getBytes())); @@ -146,14 +146,14 @@ public class GenotypingEngineUnitTest extends BaseTest { final List haplotypeAllelesForSample = new ArrayList(); haplotypeAllelesForSample.add( Allele.create("TTTA", false) ); haplotypeAllelesForSample.add( Allele.create("AATA", true) ); - final ArrayList> alleleMapper = new ArrayList>(); - ArrayList Aallele = new ArrayList(); + final List> alleleMapper = new ArrayList>(); + List Aallele = new ArrayList(); Aallele.add(haplotypes.get(0)); Aallele.add(haplotypes.get(1)); - ArrayList Callele = new ArrayList(); + List Callele = new ArrayList(); Callele.add(haplotypes.get(2)); Callele.add(haplotypes.get(3)); - ArrayList Tallele = new ArrayList(); + List Tallele = new ArrayList(); Tallele.add(haplotypes.get(4)); Tallele.add(haplotypes.get(5)); Tallele.add(haplotypes.get(6)); @@ -187,16 +187,16 @@ public class GenotypingEngineUnitTest extends BaseTest { private class BasicGenotypingTestProvider extends TestDataProvider { byte[] ref; byte[] hap; - HashMap expected; + Map expected; - public BasicGenotypingTestProvider(String refString, String hapString, HashMap expected) { + public BasicGenotypingTestProvider(String refString, String hapString, Map expected) { super(BasicGenotypingTestProvider.class, String.format("Haplotype to VCF test: ref = %s, alignment = %s", refString,hapString)); ref = refString.getBytes(); hap = hapString.getBytes(); this.expected = expected; } - public HashMap calcAlignment() { + public Map calcAlignment() { final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap); return GenotypingEngine.generateVCsFromAlignment( new Haplotype(hap), alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name"); } @@ -206,14 +206,14 @@ public class GenotypingEngineUnitTest extends BaseTest { public Object[][] makeBasicGenotypingTests() { for( int contextSize : new int[]{0,1,5,9,24,36} ) { - HashMap map = new HashMap(); + Map map = new HashMap(); map.put(1 + contextSize, (byte)'M'); final String context = Utils.dupString('G', contextSize); new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGACTAGCCGATAG", map); } for( int contextSize : new int[]{0,1,5,9,24,36} ) { - HashMap map = new HashMap(); + Map map = new HashMap(); map.put(2 + contextSize, (byte)'M'); map.put(21 + contextSize, (byte)'M'); final String context = Utils.dupString('G', contextSize); @@ -221,7 +221,7 @@ public class GenotypingEngineUnitTest extends BaseTest { } for( int contextSize : new int[]{0,1,5,9,24,36} ) { - HashMap map = new HashMap(); + Map map = new HashMap(); map.put(1 + contextSize, (byte)'M'); map.put(20 + contextSize, (byte)'I'); final String context = Utils.dupString('G', contextSize); @@ -229,7 +229,7 @@ public class GenotypingEngineUnitTest extends BaseTest { } for( int contextSize : new int[]{0,1,5,9,24,36} ) { - HashMap map = new HashMap(); + Map map = new HashMap(); map.put(1 + contextSize, (byte)'M'); map.put(20 + contextSize, (byte)'D'); final String context = Utils.dupString('G', contextSize); @@ -237,7 +237,7 @@ public class GenotypingEngineUnitTest extends BaseTest { } for( int contextSize : new int[]{1,5,9,24,36} ) { - HashMap map = new HashMap(); + Map map = new HashMap(); map.put(1, (byte)'M'); map.put(20, (byte)'D'); final String context = Utils.dupString('G', contextSize); @@ -245,7 +245,7 @@ public class GenotypingEngineUnitTest extends BaseTest { } for( int contextSize : new int[]{0,1,5,9,24,36} ) { - HashMap map = new HashMap(); + Map map = new HashMap(); map.put(2 + contextSize, (byte)'M'); map.put(20 + contextSize, (byte)'I'); map.put(30 + contextSize, (byte)'D'); @@ -254,7 +254,7 @@ public class GenotypingEngineUnitTest extends BaseTest { } for( int contextSize : new int[]{0,1,5,9,24,36} ) { - HashMap map = new HashMap(); + Map map = new HashMap(); map.put(1 + contextSize, (byte)'M'); map.put(20 + contextSize, (byte)'D'); map.put(28 + contextSize, (byte)'M'); @@ -267,8 +267,8 @@ public class GenotypingEngineUnitTest extends BaseTest { @Test(dataProvider = "BasicGenotypingTestProvider", enabled = true) public void testHaplotypeToVCF(BasicGenotypingTestProvider cfg) { - HashMap calculatedMap = cfg.calcAlignment(); - HashMap expectedMap = cfg.expected; + Map calculatedMap = cfg.calcAlignment(); + Map expectedMap = cfg.expected; logger.warn(String.format("Test: %s", cfg.toString())); if(!compareVCMaps(calculatedMap, expectedMap)) { logger.warn("calc map = " + calculatedMap); @@ -420,9 +420,9 @@ public class GenotypingEngineUnitTest extends BaseTest { } /** - * Private function to compare HashMap of VCs, it only checks the types and start locations of the VariantContext + * Private function to compare Map of VCs, it only checks the types and start locations of the VariantContext */ - private boolean compareVCMaps(HashMap calc, HashMap expected) { + private boolean compareVCMaps(Map calc, Map expected) { if( !calc.keySet().equals(expected.keySet()) ) { return false; } // sanity check for( Integer loc : expected.keySet() ) { Byte type = expected.get(loc); diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockLoader.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockLoader.java index 428b7355c..72b778ec9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockLoader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockLoader.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import org.broad.tribble.util.BlockCompressedStreamConstants; +import net.sf.samtools.util.BlockCompressedStreamConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.FileInputStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java index 9cf0a9493..1cfb527cd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java @@ -25,8 +25,8 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import org.broad.tribble.util.SeekableBufferedStream; -import org.broad.tribble.util.SeekableFileStream; +import net.sf.samtools.seekablestream.SeekableBufferedStream; +import net.sf.samtools.seekablestream.SeekableFileStream; import net.sf.samtools.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index 8ef24d373..12e87f613 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -350,7 +350,7 @@ public class GATKRunReport { * @return a non-null AWS access key for the GATK user */ protected static String getAWSAccessKey() { - return getAWSKey("GATK_AWS_access.key"); + return getAWSKey("resources/GATK_AWS_access.key"); } /** @@ -358,7 +358,7 @@ public class GATKRunReport { * @return a non-null AWS secret key for the GATK user */ protected static String getAWSSecretKey() { - return getAWSKey("GATK_AWS_secret.key"); + return getAWSKey("resources/GATK_AWS_secret.key"); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATK_AWS_access.key b/public/java/src/org/broadinstitute/sting/gatk/phonehome/resources/GATK_AWS_access.key similarity index 100% rename from public/java/src/org/broadinstitute/sting/gatk/phonehome/GATK_AWS_access.key rename to public/java/src/org/broadinstitute/sting/gatk/phonehome/resources/GATK_AWS_access.key diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATK_AWS_secret.key b/public/java/src/org/broadinstitute/sting/gatk/phonehome/resources/GATK_AWS_secret.key similarity index 100% rename from public/java/src/org/broadinstitute/sting/gatk/phonehome/GATK_AWS_secret.key rename to public/java/src/org/broadinstitute/sting/gatk/phonehome/resources/GATK_AWS_secret.key diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index eb287abd8..77f3a84c3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -240,6 +240,13 @@ public class Utils { } } + /** + * Create a new list that contains the elements of left along with elements elts + * @param left a non-null list of elements + * @param elts a varargs vector for elts to append in order to left + * @param + * @return A newly allocated linked list containing left followed by elts + */ public static List append(final List left, T ... elts) { final List l = new LinkedList(left); l.addAll(Arrays.asList(elts)); @@ -329,15 +336,6 @@ public class Utils { return str.substring(start, end+1); } - public static byte listMaxByte(List quals) { - if (quals.size() == 0) return 0; - byte m = quals.get(0); - for (byte b : quals) { - m = b > m ? b : m; - } - return m; - } - /** * Splits expressions in command args by spaces and returns the array of expressions. * Expressions may use single or double quotes to group any individual expression, but not both. @@ -400,173 +398,6 @@ public class Utils { return concatArrays(A, B); } - /** - * Returns indices of all occurrences of the specified symbol in the string - * @param s Search string - * @param ch Character to search for - * @return Indices of all occurrences of the specified symbol - */ - public static int[] indexOfAll(String s, int ch) { - int[] pos = new int[64]; - int z = 0; - - for (int i = 0; i < s.length(); i++) { - if (s.charAt(i) == ch) pos[z++] = i; - } - return reallocate(pos, z); - } - - public static int countSetBits(boolean[] array) { - int counter = 0; - for ( int i = 0; i < array.length; i++ ) { - if ( array[i] ) - counter++; - } - return counter; - } - - /** - * Returns new (reallocated) integer array of the specified size, with content - * of the original array orig copied into it. If newSize is - * less than the size of the original array, only first newSize elements will be copied. - * If new size is greater than the size of the original array, the content of the original array will be padded - * with zeros up to the new size. Finally, if new size is the same as original size, no memory reallocation - * will be performed and the original array will be returned instead. - * - * @param orig Original size. - * @param newSize New Size. - * - * @return New array with length equal to newSize. - */ - public static int[] reallocate(int[] orig, int newSize) { - if (orig.length == newSize) return orig; - int[] new_array = new int[newSize]; - int L = (newSize > orig.length ? orig.length : newSize); - for (int i = 0; i < L; i++) new_array[i] = orig[i]; - return new_array; - } - - - /** - * Returns a copy of array a, extended with additional n elements to the right (if n > 0 ) or -n elements to the - * left (if n<0), copying the values form the original array. Newly added elements are filled with value v. Note that - * if array a is being padded to the left, first (-n) elements of the returned array are v's, followed by the content of - * array a. - * @param a original array - * @param n number of (v-filled) elements to append to a on the right (n>0) or on the left (n<0) - * @param v element value - * @return the extended copy of array a with additional n elements - */ - public static byte [] extend(final byte[] a, int n, byte v) { - - byte [] newA; - - if ( n > 0 ) { - newA = Arrays.copyOf(a, a.length+n); - if ( v != 0) { // java pads with 0's for us, so there is nothing to do if v==0 - for ( int i = a.length; i < newA.length ; i++ ) newA[i] = v; - } - return newA; - } - - // we are here only if n < 0: - n = (-n); - newA = new byte[ a.length + n ]; - int i; - if ( v!= 0 ) { - i = 0; - for( ; i < n; i++ ) newA[i] = v; - } else { - i = n; - } - for ( int j = 0 ; j < a.length ; i++, j++) newA[i]=a[j]; - return newA; - } - - - /** - * Returns a copy of array a, extended with additional n elements to the right (if n > 0 ) or -n elements to the - * left (if n<0), copying the values form the original array. Newly added elements are filled with value v. Note that - * if array a is padded to the left, first (-n) elements of the returned array are v's, followed by the content of - * array a. - * @param a original array - * @param n number of (v-filled) elements to append to a on the right (n>0) or on the left (n<0) - * @param v element value - * @return the extended copy of array a with additional n elements - */ - public static short [] extend(final short[] a, int n, short v) { - - short [] newA; - - if ( n > 0 ) { - newA = Arrays.copyOf(a, a.length+n); - if ( v != 0) { // java pads with 0's for us, so there is nothing to do if v==0 - for ( int i = a.length; i < newA.length ; i++ ) newA[i] = v; - } - return newA; - } - - // we are here only if n < 0: - n = (-n); - newA = new short[ a.length + n ]; - int i; - if ( v!= 0 ) { - i = 0; - for( ; i < n; i++ ) newA[i] = v; - } else { - i = n; - } - for ( int j = 0 ; j < a.length ; i++, j++) newA[i]=a[j]; - return newA; - } - - /* TEST ME - public static void main(String[] argv) { - List l1 = new LinkedList(); - List l2 = new ArrayList(); - - l1.add(1); - l1.add(5); - l1.add(3); - l1.add(10); - l1.add(4); - l1.add(2); - l2.add(1); - l2.add(5); - l2.add(3); - l2.add(10); - l2.add(4); - l2.add(2); - - Predicate p = new Predicate() { - public boolean apply(Integer i) { - return i > 2; - } - }; - filterInPlace(p, l1); - filterInPlace(p, l2); - - for ( int i = 0 ; i < l1.size(); i++ ) System.out.print(" "+l1.get(i)); - System.out.println(); - for ( int i = 0 ; i < l2.size(); i++ ) System.out.print(" " + l2.get(i)); - System.out.println(); - - } - - */ - - /** - * a helper method. Turns a single character string into a char. - * - * @param str the string - * - * @return a char - */ - public static char stringToChar(String str) { - if (str.length() != 1) throw new IllegalArgumentException("String length must be one"); - return str.charAt(0); - } - public static > List sorted(Collection c) { return sorted(c, false); } @@ -594,18 +425,6 @@ public class Utils { return l; } - public static , V> String sortedString(Map c) { - List t = new ArrayList(c.keySet()); - Collections.sort(t); - - List pairs = new ArrayList(); - for ( T k : t ) { - pairs.add(k + "=" + c.get(k)); - } - - return "{" + join(", ", pairs) + "}"; - } - /** * Reverse a byte array of bases * @@ -654,14 +473,6 @@ public class Utils { return new String( reverse( bases.getBytes() )) ; } - public static byte[] charSeq2byteSeq(char[] seqIn) { - byte[] seqOut = new byte[seqIn.length]; - for ( int i = 0; i < seqIn.length; i++ ) { - seqOut[i] = (byte)seqIn[i]; - } - return seqOut; - } - public static boolean isFlagSet(int value, int flag) { return ((value & flag) == flag); } diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java index 8c7bce6ac..73e129105 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java @@ -673,7 +673,9 @@ public class BAQ { } else if ( excludeReadFromBAQ(read) ) { ; // just fall through } else { - if ( calculationType == CalculationMode.RECALCULATE || ! hasBAQTag(read) ) { + final boolean readHasBAQTag = hasBAQTag(read); + + if ( calculationType == CalculationMode.RECALCULATE || ! readHasBAQTag ) { if ( DEBUG ) System.out.printf(" Calculating BAQ on the fly%n"); BAQCalculationResult hmmResult = calcBAQFromHMM(read, refReader); if ( hmmResult != null ) { @@ -683,6 +685,9 @@ public class BAQ { case DONT_MODIFY: BAQQuals = hmmResult.bq; break; default: throw new ReviewedStingException("BUG: unexpected qmode " + qmode); } + } else if ( readHasBAQTag ) { + // remove the BAQ tag if it's there because we cannot trust it + read.setAttribute(BAQ_TAG, null); } } else if ( qmode == QualityMode.OVERWRITE_QUALS ) { // only makes sense if we are overwriting quals if ( DEBUG ) System.out.printf(" Taking BAQ from tag%n"); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index 9c5f95472..1607db8af 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.utils.sam; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; @@ -38,9 +40,16 @@ import org.broadinstitute.sting.utils.recalibration.EventType; import java.util.ArrayList; import java.util.Arrays; +import java.util.EnumSet; +import java.util.List; -public class AlignmentUtils { +public final class AlignmentUtils { + private final static EnumSet ALIGNED_TO_GENOME_OPERATORS = EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X); + private final static EnumSet ALIGNED_TO_GENOME_PLUS_SOFTCLIPS = EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X, CigarOperator.S); + + // cannot be instantiated + private AlignmentUtils() { } public static class MismatchCount { public int numMismatches = 0; @@ -116,103 +125,6 @@ public class AlignmentUtils { return mc; } - /** - * Returns the number of mismatches in the pileup within the given reference context. - * - * @param pileup the pileup with reads - * @param ref the reference context - * @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window) - * @return the number of mismatches - */ - public static int mismatchesInRefWindow(ReadBackedPileup pileup, ReferenceContext ref, boolean ignoreTargetSite) { - int mismatches = 0; - for (PileupElement p : pileup) - mismatches += mismatchesInRefWindow(p, ref, ignoreTargetSite); - return mismatches; - } - - /** - * Returns the number of mismatches in the pileup element within the given reference context. - * - * @param p the pileup element - * @param ref the reference context - * @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window) - * @return the number of mismatches - */ - public static int mismatchesInRefWindow(PileupElement p, ReferenceContext ref, boolean ignoreTargetSite) { - return mismatchesInRefWindow(p, ref, ignoreTargetSite, false); - } - - /** - * Returns the number of mismatches in the pileup element within the given reference context. - * - * @param p the pileup element - * @param ref the reference context - * @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window) - * @param qualitySumInsteadOfMismatchCount - * if true, return the quality score sum of the mismatches rather than the count - * @return the number of mismatches - */ - public static int mismatchesInRefWindow(PileupElement p, ReferenceContext ref, boolean ignoreTargetSite, boolean qualitySumInsteadOfMismatchCount) { - int sum = 0; - - int windowStart = ref.getWindow().getStart(); - int windowStop = ref.getWindow().getStop(); - byte[] refBases = ref.getBases(); - byte[] readBases = p.getRead().getReadBases(); - byte[] readQualities = p.getRead().getBaseQualities(); - Cigar c = p.getRead().getCigar(); - - int readIndex = 0; - int currentPos = p.getRead().getAlignmentStart(); - int refIndex = Math.max(0, currentPos - windowStart); - - for (int i = 0; i < c.numCigarElements(); i++) { - CigarElement ce = c.getCigarElement(i); - int cigarElementLength = ce.getLength(); - switch (ce.getOperator()) { - case EQ: - case X: - case M: - for (int j = 0; j < cigarElementLength; j++, readIndex++, currentPos++) { - // are we past the ref window? - if (currentPos > windowStop) - break; - - // are we before the ref window? - if (currentPos < windowStart) - continue; - - byte refChr = refBases[refIndex++]; - - // do we need to skip the target site? - if (ignoreTargetSite && ref.getLocus().getStart() == currentPos) - continue; - - byte readChr = readBases[readIndex]; - if (readChr != refChr) - sum += (qualitySumInsteadOfMismatchCount) ? readQualities[readIndex] : 1; - } - break; - case I: - case S: - readIndex += cigarElementLength; - break; - case D: - case N: - currentPos += cigarElementLength; - if (currentPos > windowStart) - refIndex += Math.min(cigarElementLength, currentPos - windowStart); - break; - case H: - case P: - break; - } - } - - return sum; - } - /** * Returns number of alignment blocks (continuous stretches of aligned bases) in the specified alignment. * This method follows closely the SAMRecord::getAlignmentBlocks() implemented in samtools library, but @@ -223,31 +135,54 @@ public class AlignmentUtils { * @param r alignment * @return number of continuous alignment blocks (i.e. 'M' elements of the cigar; all indel and clipping elements are ignored). */ + @Ensures("result >= 0") public static int getNumAlignmentBlocks(final SAMRecord r) { - int n = 0; + if ( r == null ) throw new IllegalArgumentException("read cannot be null"); final Cigar cigar = r.getCigar(); if (cigar == null) return 0; + int n = 0; for (final CigarElement e : cigar.getCigarElements()) { - if (e.getOperator() == CigarOperator.M) n++; + if (ALIGNED_TO_GENOME_OPERATORS.contains(e.getOperator())) + n++; } return n; } - public static int getNumAlignedBasesCountingSoftClips(final SAMRecord r) { + + /** + * Get the number of bases aligned to the genome, including soft clips + * + * If read is not mapped (i.e., doesn't have a cigar) returns 0 + * + * @param r a non-null GATKSAMRecord + * @return the number of bases aligned to the genome in R, including soft clipped bases + */ + public static int getNumAlignedBasesCountingSoftClips(final GATKSAMRecord r) { int n = 0; final Cigar cigar = r.getCigar(); if (cigar == null) return 0; for (final CigarElement e : cigar.getCigarElements()) - if (e.getOperator() == CigarOperator.M || e.getOperator() == CigarOperator.S) + if (ALIGNED_TO_GENOME_PLUS_SOFTCLIPS.contains(e.getOperator())) n += e.getLength(); return n; } + /** + * Count the number of bases hard clipped from read + * + * If read's cigar is null, return 0 + * + * @param r a non-null read + * @return a positive integer + */ + @Ensures("result >= 0") public static int getNumHardClippedBases(final SAMRecord r) { + if ( r == null ) throw new IllegalArgumentException("Read cannot be null"); + int n = 0; final Cigar cigar = r.getCigar(); if (cigar == null) return 0; @@ -259,16 +194,28 @@ public class AlignmentUtils { return n; } + /** + * Calculate the number of bases that are soft clipped in read with quality score greater than threshold + * + * Handles the case where the cigar is null (i.e., the read is unmapped), returning 0 + * + * @param read a non-null GATKSAMRecord. + * @param qualThreshold consider bases with quals > this value as high quality. Must be >= 0 + * @return positive integer + */ + @Ensures("result >= 0") public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) { + if ( read == null ) throw new IllegalArgumentException("Read cannot be null"); + if ( qualThreshold < 0 ) throw new IllegalArgumentException("Expected qualThreshold to be a positive byte but saw " + qualThreshold); + + if ( read.getCigar() == null ) // the read is unmapped + return 0; + + final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION ); int numHQSoftClips = 0; int alignPos = 0; - final Cigar cigar = read.getCigar(); - final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION ); - - for( int iii = 0; iii < cigar.numCigarElements(); iii++ ) { - - final CigarElement ce = cigar.getCigarElement(iii); + for ( final CigarElement ce : read.getCigar().getCigarElements() ) { final int elementLength = ce.getLength(); switch( ce.getOperator() ) { @@ -277,35 +224,26 @@ public class AlignmentUtils { if( qual[alignPos++] > qualThreshold ) { numHQSoftClips++; } } break; - case M: - case I: - case EQ: - case X: + case M: case I: case EQ: case X: alignPos += elementLength; break; - case H: - case P: - case D: - case N: + case H: case P: case D: case N: break; default: - throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator()); + throw new IllegalStateException("Unsupported cigar operator: " + ce.getOperator()); } } + return numHQSoftClips; } public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) { - return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), false, pileupElement.isDeletion(), alignmentStart, refLocus ); + return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isDeletion(), alignmentStart, refLocus ); } - public static int calcAlignmentByteArrayOffset(final Cigar cigar, final int offset, final boolean isInsertionAtBeginningOfRead, final boolean isDeletion, final int alignmentStart, final int refLocus) { + public static int calcAlignmentByteArrayOffset(final Cigar cigar, final int offset, final boolean isDeletion, final int alignmentStart, final int refLocus) { int pileupOffset = offset; - // Special case for reads starting with insertion - if (isInsertionAtBeginningOfRead) - return 0; - // Reassign the offset if we are in the middle of a deletion because of the modified representation of the read bases if (isDeletion) { pileupOffset = refLocus - alignmentStart; @@ -458,18 +396,21 @@ public class AlignmentUtils { * specification, one may need this convenience shortcut. Checks both 'read unmapped' flag and * alignment reference index/start. * - * @param r record + * Our life would be so much easier if all sam files followed the specs. In reality, + * sam files (including those generated by maq or bwa) miss headers altogether. When + * reading such a SAM file, reference name is set, but since there is no sequence dictionary, + * null is always returned for referenceIndex. Let's be paranoid here, and make sure that + * we do not call the read "unmapped" when it has only reference name set with ref. index missing + * or vice versa. + * + * @param r a non-null record * @return true if read is unmapped */ public static boolean isReadUnmapped(final SAMRecord r) { + if ( r == null ) throw new IllegalArgumentException("Read cannot be null"); + if (r.getReadUnmappedFlag()) return true; - // our life would be so much easier if all sam files followed the specs. In reality, - // sam files (including those generated by maq or bwa) miss headers altogether. When - // reading such a SAM file, reference name is set, but since there is no sequence dictionary, - // null is always returned for referenceIndex. Let's be paranoid here, and make sure that - // we do not call the read "unmapped" when it has only reference name set with ref. index missing - // or vice versa. if ((r.getReferenceIndex() != null && r.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX || r.getReferenceName() != null && !r.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) && r.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START) return false; @@ -477,26 +418,26 @@ public class AlignmentUtils { } /** - * Due to (unfortunate) multiple ways to indicate that read/mate is unmapped allowed by SAM format - * specification, one may need this convenience shortcut. Checks both 'mate unmapped' flag and - * alignment reference index/start of the mate. - * - * @param r sam record for the read - * @return true if read's mate is unmapped + * Need a well-formed, consolidated Cigar string so that the left aligning code works properly. + * For example, 1M1M1M1D2M1M --> 3M1D3M + * If the given cigar is empty then the returned cigar will also be empty + * @param c the cigar to consolidate + * @return a cigar with consecutive matching operators merged into single operators. */ - public static boolean isMateUnmapped(final SAMRecord r) { - if (r.getMateUnmappedFlag()) return true; - - // our life would be so much easier if all sam files followed the specs. In reality, - // sam files (including those generated by maq or bwa) miss headers altogether. When - // reading such a SAM file, reference name is set, but since there is no sequence dictionary, - // null is always returned for referenceIndex. Let's be paranoid here, and make sure that - // we do not call the read "unmapped" when it has only reference name set with ref. index missing - // or vice versa. - if ((r.getMateReferenceIndex() != null && r.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX - || r.getMateReferenceName() != null && !r.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) - && r.getMateAlignmentStart() != SAMRecord.NO_ALIGNMENT_START) return false; - return true; + @Requires({"c != null"}) + @Ensures({"result != null"}) + public static Cigar consolidateCigar( final Cigar c ) { + if( c.isEmpty() ) { return c; } + final Cigar returnCigar = new Cigar(); + int sumLength = 0; + for( int iii = 0; iii < c.numCigarElements(); iii++ ) { + sumLength += c.getCigarElement(iii).getLength(); + if( iii == c.numCigarElements() - 1 || !c.getCigarElement(iii).getOperator().equals(c.getCigarElement(iii+1).getOperator())) { // at the end so finish the current element + returnCigar.add(new CigarElement(sumLength, c.getCigarElement(iii).getOperator())); + sumLength = 0; + } + } + return returnCigar; } /** @@ -565,22 +506,41 @@ public class AlignmentUtils { return cigar; } - private static boolean cigarHasZeroSizeElement(Cigar c) { - for (CigarElement ce : c.getCigarElements()) { + /** + * Does one of the elements in cigar have a 0 length? + * + * @param c a non-null cigar + * @return true if any element has 0 size + */ + @Requires("c != null") + protected static boolean cigarHasZeroSizeElement(final Cigar c) { + for (final CigarElement ce : c.getCigarElements()) { if (ce.getLength() == 0) return true; } return false; } - private static Cigar cleanUpCigar(Cigar c) { - ArrayList elements = new ArrayList(c.numCigarElements() - 1); - for (CigarElement ce : c.getCigarElements()) { - if (ce.getLength() != 0 && - (elements.size() != 0 || ce.getOperator() != CigarOperator.D)) { + /** + * Clean up the incoming cigar + * + * Removes elements with zero size + * Clips away beginning deletion operators + * + * @param c the cigar string we want to clean up + * @return a newly allocated, cleaned up Cigar + */ + @Requires("c != null") + @Ensures("result != null") + private static Cigar cleanUpCigar(final Cigar c) { + final List elements = new ArrayList(c.numCigarElements() - 1); + + for (final CigarElement ce : c.getCigarElements()) { + if (ce.getLength() != 0 && (! elements.isEmpty() || ce.getOperator() != CigarOperator.D)) { elements.add(ce); } } + return new Cigar(elements); } diff --git a/public/java/src/org/broadinstitute/variant/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/variant/vcf/AbstractVCFCodec.java index 11d152e65..a4ccd050a 100644 --- a/public/java/src/org/broadinstitute/variant/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/variant/vcf/AbstractVCFCodec.java @@ -30,7 +30,7 @@ import org.broad.tribble.Feature; import org.broad.tribble.NameAwareCodec; import org.broad.tribble.TribbleException; import org.broad.tribble.readers.LineReader; -import org.broad.tribble.util.BlockCompressedInputStream; +import net.sf.samtools.util.BlockCompressedInputStream; import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.variant.utils.GeneralUtils; import org.broadinstitute.variant.variantcontext.*; diff --git a/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java index 5d6ecd0f9..29c643153 100644 --- a/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java @@ -32,8 +32,7 @@ import org.broadinstitute.sting.BaseTest; import org.testng.annotations.Test; import java.io.File; -import java.util.LinkedHashMap; -import java.util.Map; +import java.util.*; /** * Testing framework for general purpose utilities class. @@ -43,6 +42,25 @@ import java.util.Map; */ public class UtilsUnitTest extends BaseTest { + @Test + public void testAppend() { + for ( int leftSize : Arrays.asList(0, 1, 2, 3) ) { + for ( final int rightSize : Arrays.asList(0, 1, 2) ) { + final List left = new LinkedList(); + for ( int i = 0; i < leftSize; i++ ) left.add(i); + final List total = new LinkedList(); + for ( int i = 0; i < leftSize + rightSize; i++ ) total.add(i); + + if ( rightSize == 0 ) + Assert.assertEquals(Utils.append(left), total); + if ( rightSize == 1 ) + Assert.assertEquals(Utils.append(left, leftSize), total); + if ( rightSize == 2 ) + Assert.assertEquals(Utils.append(left, leftSize, leftSize + 1), total); + } + } + + } @Test public void testDupStringNoChars() { diff --git a/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java index 1e3386426..da82e9de5 100644 --- a/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java @@ -203,6 +203,23 @@ public class BAQUnitTest extends BaseTest { Assert.assertTrue(baq.calcEpsilon( ref, alt, (byte)i) >= 0.0, "Failed to get baq epsilon range"); } + @Test(enabled = true) + public void testBAQOverwritesExistingTagWithNull() { + + // create a read with a single base off the end of the contig, which cannot be BAQed + final SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, fasta.getSequenceDictionary().getSequence("chr1").getSequenceLength() + 1, 1); + read.setReadBases(new byte[] {(byte) 'A'}); + read.setBaseQualities(new byte[] {(byte) 20}); + read.setCigarString("1M"); + read.setAttribute("BQ", "A"); + + // try to BAQ and tell it to RECALCULATE AND ADD_TAG + BAQ baq = new BAQ(1e-3, 0.1, 7, (byte)4, false); + baq.baqRead(read, fasta, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.ADD_TAG); + + // did we remove the existing tag? + Assert.assertTrue(read.getAttribute("BQ") == null); + } public void testBAQ(BAQTest test, boolean lookupWithFasta) { BAQ baqHMM = new BAQ(1e-3, 0.1, 7, (byte)4, false); // matches current samtools parameters diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java index f088574d7..d9f514593 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java @@ -25,13 +25,16 @@ package org.broadinstitute.sting.utils.sam; -import junit.framework.Assert; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMRecord; +import net.sf.samtools.*; +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.utils.Utils; +import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.*; + public class AlignmentUtilsUnitTest { private SAMFileHeader header; @@ -121,4 +124,205 @@ public class AlignmentUtilsUnitTest { start, ArtificialSAMUtils.DEFAULT_READ_LENGTH); } + + private final List> makeCigarElementCombinations() { + // this functionality can be adapted to provide input data for whatever you might want in your data + final List cigarElements = new LinkedList(); + for ( final int size : Arrays.asList(0, 10) ) { + for ( final CigarOperator op : CigarOperator.values() ) { + cigarElements.add(new CigarElement(size, op)); + } + } + + final List> combinations = new LinkedList>(); + for ( final int nElements : Arrays.asList(1, 2, 3) ) { + combinations.addAll(Utils.makePermutations(cigarElements, nElements, true)); + } + + return combinations; + } + + + @DataProvider(name = "NumAlignedBasesCountingSoftClips") + public Object[][] makeNumAlignedBasesCountingSoftClips() { + List tests = new ArrayList(); + + final EnumSet alignedToGenome = EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X, CigarOperator.S); + for ( final List elements : makeCigarElementCombinations() ) { + int n = 0; + for ( final CigarElement elt : elements ) n += alignedToGenome.contains(elt.getOperator()) ? elt.getLength() : 0; + tests.add(new Object[]{new Cigar(elements), n}); + } + + tests.add(new Object[]{null, 0}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "NumAlignedBasesCountingSoftClips") + public void testNumAlignedBasesCountingSoftClips(final Cigar cigar, final int expected) { + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, cigar == null ? 10 : cigar.getReadLength()); + read.setCigar(cigar); + Assert.assertEquals(AlignmentUtils.getNumAlignedBasesCountingSoftClips(read), expected, "Cigar " + cigar + " failed NumAlignedBasesCountingSoftClips"); + } + + @DataProvider(name = "CigarHasZeroElement") + public Object[][] makeCigarHasZeroElement() { + List tests = new ArrayList(); + + for ( final List elements : makeCigarElementCombinations() ) { + boolean hasZero = false; + for ( final CigarElement elt : elements ) hasZero = hasZero || elt.getLength() == 0; + tests.add(new Object[]{new Cigar(elements), hasZero}); + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "CigarHasZeroElement") + public void testCigarHasZeroSize(final Cigar cigar, final boolean hasZero) { + Assert.assertEquals(AlignmentUtils.cigarHasZeroSizeElement(cigar), hasZero, "Cigar " + cigar.toString() + " failed cigarHasZeroSizeElement"); + } + + @DataProvider(name = "NumHardClipped") + public Object[][] makeNumHardClipped() { + List tests = new ArrayList(); + + for ( final List elements : makeCigarElementCombinations() ) { + int n = 0; + for ( final CigarElement elt : elements ) n += elt.getOperator() == CigarOperator.H ? elt.getLength() : 0; + tests.add(new Object[]{new Cigar(elements), n}); + } + + tests.add(new Object[]{null, 0}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "NumHardClipped") + public void testNumHardClipped(final Cigar cigar, final int expected) { + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, cigar == null ? 10 : cigar.getReadLength()); + read.setCigar(cigar); + Assert.assertEquals(AlignmentUtils.getNumHardClippedBases(read), expected, "Cigar " + cigar + " failed num hard clips"); + } + + @DataProvider(name = "NumAlignedBlocks") + public Object[][] makeNumAlignedBlocks() { + List tests = new ArrayList(); + + for ( final List elements : makeCigarElementCombinations() ) { + int n = 0; + for ( final CigarElement elt : elements ) { + switch ( elt.getOperator() ) { + case M:case X:case EQ: n++; break; + default: break; + } + } + tests.add(new Object[]{new Cigar(elements), n}); + } + + tests.add(new Object[]{null, 0}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "NumAlignedBlocks") + public void testNumAlignedBlocks(final Cigar cigar, final int expected) { + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, cigar == null ? 10 : cigar.getReadLength()); + read.setCigar(cigar); + Assert.assertEquals(AlignmentUtils.getNumAlignmentBlocks(read), expected, "Cigar " + cigar + " failed NumAlignedBlocks"); + } + + @Test + public void testConsolidateCigar() { + { + //1M1M1M1D2M1M --> 3M1D3M + List list = new ArrayList(); + list.add( new CigarElement(1, CigarOperator.M)); + list.add( new CigarElement(1, CigarOperator.M)); + list.add( new CigarElement(1, CigarOperator.M)); + list.add( new CigarElement(1, CigarOperator.D)); + list.add( new CigarElement(2, CigarOperator.M)); + list.add( new CigarElement(1, CigarOperator.M)); + Cigar unconsolidatedCigar = new Cigar(list); + + list.clear(); + list.add( new CigarElement(3, CigarOperator.M)); + list.add( new CigarElement(1, CigarOperator.D)); + list.add( new CigarElement(3, CigarOperator.M)); + Cigar consolidatedCigar = new Cigar(list); + + Assert.assertEquals(consolidatedCigar.toString(), AlignmentUtils.consolidateCigar(unconsolidatedCigar).toString()); + } + + { + //6M6M6M --> 18M + List list = new ArrayList(); + list.add( new CigarElement(6, CigarOperator.M)); + list.add( new CigarElement(6, CigarOperator.M)); + list.add( new CigarElement(6, CigarOperator.M)); + Cigar unconsolidatedCigar = new Cigar(list); + + list.clear(); + list.add( new CigarElement(18, CigarOperator.M)); + Cigar consolidatedCigar = new Cigar(list); + + Assert.assertEquals(consolidatedCigar.toString(), AlignmentUtils.consolidateCigar(unconsolidatedCigar).toString()); + } + } + + @DataProvider(name = "SoftClipsDataProvider") + public Object[][] makeSoftClipsDataProvider() { + List tests = new ArrayList(); + + // this functionality can be adapted to provide input data for whatever you might want in your data + for ( final int lengthOfLeftClip : Arrays.asList(0, 1, 10) ) { + for ( final int lengthOfRightClip : Arrays.asList(0, 1, 10) ) { + for ( final int qualThres : Arrays.asList(10, 20, 30) ) { + for ( final String middleOp : Arrays.asList("M", "D") ) { + for ( final int matchSize : Arrays.asList(0, 1, 10) ) { + final byte[] left = makeQualArray(lengthOfLeftClip, qualThres); + final byte[] right = makeQualArray(lengthOfRightClip, qualThres); + int n = 0; + for ( int i = 0; i < left.length; i++ ) n += left[i] > qualThres ? 1 : 0; + for ( int i = 0; i < right.length; i++ ) n += right[i] > qualThres ? 1 : 0; + tests.add(new Object[]{left, matchSize, middleOp, right, qualThres, n}); + } + } + } + } + } + + return tests.toArray(new Object[][]{}); + } + + private byte[] makeQualArray(final int length, final int qualThreshold) { + final byte[] array = new byte[length]; + for ( int i = 0; i < array.length; i++ ) + array[i] = (byte)(qualThreshold + ( i % 2 == 0 ? 1 : - 1 )); + return array; + } + + @Test(dataProvider = "SoftClipsDataProvider") + public void testSoftClipsData(final byte[] qualsOfSoftClipsOnLeft, final int middleSize, final String middleOp, final byte[] qualOfSoftClipsOnRight, final int qualThreshold, final int numExpected) { + final int readLength = (middleOp.equals("D") ? 0 : middleSize) + qualOfSoftClipsOnRight.length + qualsOfSoftClipsOnLeft.length; + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, readLength); + final byte[] bases = Utils.dupBytes((byte) 'A', readLength); + final byte[] matchBytes = middleOp.equals("D") ? new byte[]{} : Utils.dupBytes((byte)30, middleSize); + final byte[] quals = ArrayUtils.addAll(ArrayUtils.addAll(qualsOfSoftClipsOnLeft, matchBytes), qualOfSoftClipsOnRight); + + // set the read's bases and quals + read.setReadBases(bases); + read.setBaseQualities(quals); + + final StringBuilder cigar = new StringBuilder(); + if (qualsOfSoftClipsOnLeft.length > 0 ) cigar.append(qualsOfSoftClipsOnLeft.length + "S"); + if (middleSize > 0 ) cigar.append(middleSize + middleOp); + if (qualOfSoftClipsOnRight.length > 0 ) cigar.append(qualOfSoftClipsOnRight.length + "S"); + + read.setCigarString(cigar.toString()); + + final int actual = AlignmentUtils.calcNumHighQualitySoftClips(read, (byte) qualThreshold); + Assert.assertEquals(actual, numExpected, "Wrong number of soft clips detected for read " + read.getSAMString()); + } } diff --git a/public/packages/PicardPrivate.xml b/public/packages/PicardPrivate.xml index a800294d6..d898a5d07 100644 --- a/public/packages/PicardPrivate.xml +++ b/public/packages/PicardPrivate.xml @@ -2,26 +2,9 @@ - - - - - - - - - - - - - - - - - diff --git a/settings/repository/edu.mit.broad/picard-private-parts-2375.jar b/settings/repository/edu.mit.broad/picard-private-parts-2375.jar deleted file mode 100644 index bfa2f65ad..000000000 Binary files a/settings/repository/edu.mit.broad/picard-private-parts-2375.jar and /dev/null differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-2662.jar b/settings/repository/edu.mit.broad/picard-private-parts-2662.jar new file mode 100644 index 000000000..54ef6d5e2 Binary files /dev/null and b/settings/repository/edu.mit.broad/picard-private-parts-2662.jar differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-2375.xml b/settings/repository/edu.mit.broad/picard-private-parts-2662.xml similarity index 58% rename from settings/repository/edu.mit.broad/picard-private-parts-2375.xml rename to settings/repository/edu.mit.broad/picard-private-parts-2662.xml index b467f934a..119255e8d 100644 --- a/settings/repository/edu.mit.broad/picard-private-parts-2375.xml +++ b/settings/repository/edu.mit.broad/picard-private-parts-2662.xml @@ -1,3 +1,3 @@ - + diff --git a/settings/repository/net.sf/picard-1.67.1197.xml b/settings/repository/net.sf/picard-1.67.1197.xml deleted file mode 100644 index 7d9042d6b..000000000 --- a/settings/repository/net.sf/picard-1.67.1197.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/picard-1.67.1197.jar b/settings/repository/net.sf/picard-1.84.1337.jar similarity index 61% rename from settings/repository/net.sf/picard-1.67.1197.jar rename to settings/repository/net.sf/picard-1.84.1337.jar index 9243c02df..68db41848 100644 Binary files a/settings/repository/net.sf/picard-1.67.1197.jar and b/settings/repository/net.sf/picard-1.84.1337.jar differ diff --git a/settings/repository/net.sf/picard-1.84.1337.xml b/settings/repository/net.sf/picard-1.84.1337.xml new file mode 100644 index 000000000..99f746ff6 --- /dev/null +++ b/settings/repository/net.sf/picard-1.84.1337.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/net.sf/sam-1.67.1197.xml b/settings/repository/net.sf/sam-1.67.1197.xml deleted file mode 100644 index d43aba4ed..000000000 --- a/settings/repository/net.sf/sam-1.67.1197.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/sam-1.67.1197.jar b/settings/repository/net.sf/sam-1.84.1337.jar similarity index 67% rename from settings/repository/net.sf/sam-1.67.1197.jar rename to settings/repository/net.sf/sam-1.84.1337.jar index 8a8343cfa..3d28e1928 100644 Binary files a/settings/repository/net.sf/sam-1.67.1197.jar and b/settings/repository/net.sf/sam-1.84.1337.jar differ diff --git a/settings/repository/net.sf/sam-1.84.1337.xml b/settings/repository/net.sf/sam-1.84.1337.xml new file mode 100644 index 000000000..4d31fe250 --- /dev/null +++ b/settings/repository/net.sf/sam-1.84.1337.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/org.broad/tribble-119.jar b/settings/repository/org.broad/tribble-1.84.1337.jar similarity index 51% rename from settings/repository/org.broad/tribble-119.jar rename to settings/repository/org.broad/tribble-1.84.1337.jar index c74bea398..a4c336101 100644 Binary files a/settings/repository/org.broad/tribble-119.jar and b/settings/repository/org.broad/tribble-1.84.1337.jar differ diff --git a/settings/repository/org.broad/tribble-1.84.1337.xml b/settings/repository/org.broad/tribble-1.84.1337.xml new file mode 100644 index 000000000..f14af794e --- /dev/null +++ b/settings/repository/org.broad/tribble-1.84.1337.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/org.broad/tribble-119.xml b/settings/repository/org.broad/tribble-119.xml deleted file mode 100644 index 08037b20e..000000000 --- a/settings/repository/org.broad/tribble-119.xml +++ /dev/null @@ -1,3 +0,0 @@ - - -