Merge branch 'master' of github.com:broadinstitute/gsa-unstable

This commit is contained in:
Guillermo del Angel 2013-02-01 11:15:01 -05:00
commit 2f118c85ed
32 changed files with 842 additions and 455 deletions

View File

@ -708,9 +708,6 @@
<include name="**/sting/gatk/**/*.R"/>
<include name="**/sting/alignment/**/*.R"/>
</fileset>
<fileset dir="${java.public.source.dir}">
<include name="**/phonehome/*.key"/>
</fileset>
<fileset dir="${key.dir}">
<include name="**/*.key"/>
</fileset>

View File

@ -107,7 +107,7 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true );
if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED )
continue;
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, false, 0, 0 );
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, 0, 0 );
final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read );
if (readPos > numAlignedBases / 2)
readPos = numAlignedBases - (readPos + 1);

View File

@ -86,7 +86,7 @@ public class DeBruijnEdge {
multiplicity = value;
}
public boolean getIsRef() {
public boolean isRef() {
return isRef;
}

View File

@ -46,8 +46,14 @@
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.SWPairwiseAlignment;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.jgrapht.graph.DefaultDirectedGraph;
import java.io.Serializable;
@ -55,7 +61,7 @@ import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: ebanks
* User: ebanks, rpoplin
* Date: Mar 23, 2011
*/
// Class for finding the K best paths (as determined by the sum of multiplicities of the edges) in a graph.
@ -72,28 +78,47 @@ public class KBestPaths {
protected static class Path {
// the last vertex seen in the path
private DeBruijnVertex lastVertex;
private final DeBruijnVertex lastVertex;
// the list of edges comprising the path
private ArrayList<DeBruijnEdge> edges;
private final List<DeBruijnEdge> edges;
// the scores for the path
private int totalScore = 0, lowestEdge = -1;
private final int totalScore;
private final int lowestEdge;
public Path( final DeBruijnVertex initialVertex ) {
// the graph from which this path originated
private final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph;
// used in the bubble state machine to apply Smith-Waterman to the bubble sequence
private final double SW_MATCH = 15.0;
private final double SW_MISMATCH = -15.0;
private final double SW_GAP = -25.0;
private final double SW_GAP_EXTEND = -1.2;
public Path( final DeBruijnVertex initialVertex, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
lastVertex = initialVertex;
edges = new ArrayList<DeBruijnEdge>(0);
totalScore = 0;
lowestEdge = -1;
this.graph = graph;
}
public Path( final Path p, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge ) {
lastVertex = graph.getEdgeTarget(edge);
public Path( final Path p, final DeBruijnEdge edge ) {
graph = p.graph;
lastVertex = p.graph.getEdgeTarget(edge);
edges = new ArrayList<DeBruijnEdge>(p.edges);
edges.add(edge);
totalScore = p.totalScore + edge.getMultiplicity();
lowestEdge = ( p.lowestEdge == -1 ) ? edge.getMultiplicity() : Math.min(p.lowestEdge, edge.getMultiplicity());
}
public boolean containsEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge ) {
/**
* Does this path contain the given edge
* @param edge the given edge to test
* @return true if the edge is found in this path
*/
public boolean containsEdge( final DeBruijnEdge edge ) {
final DeBruijnVertex targetVertex = graph.getEdgeTarget(edge);
for( final DeBruijnEdge e : edges ) {
if( e.equals(graph, edge) || graph.getEdgeTarget(e).equals(targetVertex) ) {
@ -104,7 +129,7 @@ public class KBestPaths {
return false;
}
public ArrayList<DeBruijnEdge> getEdges() { return edges; }
public List<DeBruijnEdge> getEdges() { return edges; }
public int getScore() { return totalScore; }
@ -112,7 +137,12 @@ public class KBestPaths {
public DeBruijnVertex getLastVertexInPath() { return lastVertex; }
public byte[] getBases( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
/**
* The base sequence for this path. Pull the full sequence for the source of the path and then the suffix for all subsequent nodes
* @return non-null sequence of bases corresponding to this path
*/
@Ensures({"result != null"})
public byte[] getBases() {
if( edges.size() == 0 ) { return lastVertex.getSequence(); }
byte[] bases = graph.getEdgeSource( edges.get(0) ).getSequence();
@ -121,6 +151,142 @@ public class KBestPaths {
}
return bases;
}
/**
* Calculate the cigar string for this path using a bubble traversal of the assembly graph and running a Smith-Waterman alignment on each bubble
*/
@Ensures("result != null")
public Cigar calculateCigar() {
final Cigar cigar = new Cigar();
// special case for paths that start on reference but not at the reference source node
if( edges.get(0).isRef() && !isRefSource(graph, edges.get(0)) ) {
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, null, null, graph.getEdgeSource(edges.get(0))).getCigarElements() ) {
cigar.add(ce);
}
}
// reset the bubble state machine
final BubbleStateMachine bsm = new BubbleStateMachine(cigar);
for( final DeBruijnEdge e : edges ) {
if( e.equals(graph, edges.get(0)) ) {
advanceBubbleStateMachine( bsm, graph, graph.getEdgeSource(e), null );
}
advanceBubbleStateMachine( bsm, graph, graph.getEdgeTarget(e), e );
}
// special case for paths that don't end on reference
if( bsm.inBubble ) {
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, bsm.lastSeenReferenceNode, null).getCigarElements() ) {
bsm.cigar.add(ce);
}
} else if( edges.get(edges.size()-1).isRef() && !isRefSink(graph, edges.get(edges.size()-1)) ) { // special case for paths that end of the reference but haven't completed the entire reference circuit
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, graph.getEdgeTarget(edges.get(edges.size()-1)), null).getCigarElements() ) {
bsm.cigar.add(ce);
}
}
return AlignmentUtils.consolidateCigar(bsm.cigar);
}
private void advanceBubbleStateMachine( final BubbleStateMachine bsm, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex node, final DeBruijnEdge e ) {
if( isReferenceNode( graph, node ) ) {
if( !bsm.inBubble ) { // just add the ref bases as M's in the Cigar string, and don't do anything else
if( e !=null && !e.isRef() ) {
if( referencePathExists( graph, graph.getEdgeSource(e), node) ) {
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, null, graph.getEdgeSource(e), node).getCigarElements() ) {
bsm.cigar.add(ce);
}
bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) );
} else if ( graph.getEdgeSource(e).equals(graph.getEdgeTarget(e)) ) { // alt edge at ref node points to itself
bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.I) );
} else {
bsm.inBubble = true;
bsm.bubbleBytes = null;
bsm.lastSeenReferenceNode = graph.getEdgeSource(e);
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
}
} else {
bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) );
}
} else if( bsm.lastSeenReferenceNode != null && !referencePathExists( graph, bsm.lastSeenReferenceNode, node ) ) { // add bases to the bubble string until we get back to the reference path
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
} else { // close the bubble and use a local SW to determine the Cigar string
for( final CigarElement ce : calculateCigarForCompleteBubble(graph, bsm.bubbleBytes, bsm.lastSeenReferenceNode, node).getCigarElements() ) {
bsm.cigar.add(ce);
}
bsm.inBubble = false;
bsm.bubbleBytes = null;
bsm.lastSeenReferenceNode = null;
bsm.cigar.add( new CigarElement( getAdditionalSequence(graph, node).length, CigarOperator.M) );
}
} else { // non-ref vertex
if( bsm.inBubble ) { // just keep accumulating until we get back to the reference path
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
} else { // open up a bubble
bsm.inBubble = true;
bsm.bubbleBytes = null;
bsm.lastSeenReferenceNode = (e != null ? graph.getEdgeSource(e) : null );
bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, getAdditionalSequence(graph, node) );
}
}
}
private Cigar calculateCigarForCompleteBubble( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final byte[] bubbleBytes, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex ) {
final byte[] refBytes = getReferenceBytes(graph, fromVertex, toVertex);
final Cigar cigar = new Cigar();
// add padding to anchor ref/alt bases in the SW matrix
byte[] padding = "XXXXXX".getBytes();
boolean goodAlignment = false;
SWPairwiseAlignment swConsensus = null;
while( !goodAlignment && padding.length < 1000 ) {
padding = ArrayUtils.addAll(padding, padding); // double the size of the padding each time
final byte[] reference = ArrayUtils.addAll( ArrayUtils.addAll(padding, refBytes), padding );
final byte[] alternate = ArrayUtils.addAll( ArrayUtils.addAll(padding, bubbleBytes), padding );
swConsensus = new SWPairwiseAlignment( reference, alternate, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
if( swConsensus.getAlignmentStart2wrt1() == 0 && !swConsensus.getCigar().toString().contains("S") && swConsensus.getCigar().getReferenceLength() == reference.length ) {
goodAlignment = true;
}
}
if( !goodAlignment && swConsensus != null ) {
throw new ReviewedStingException("SmithWaterman offset failure: " + (refBytes == null ? "-" : new String(refBytes)) + " against " + new String(bubbleBytes) + " = " + swConsensus.getCigar());
}
if( swConsensus != null ) {
final Cigar swCigar = swConsensus.getCigar();
for( int iii = 0; iii < swCigar.numCigarElements(); iii++ ) {
// now we need to remove the padding from the cigar string
int length = swCigar.getCigarElement(iii).getLength();
if( iii == 0 ) { length -= padding.length; }
if( iii == swCigar.numCigarElements() - 1 ) { length -= padding.length; }
if( length > 0 ) {
cigar.add( new CigarElement(length, swCigar.getCigarElement(iii).getOperator()) );
}
}
if( (refBytes == null && cigar.getReferenceLength() != 0) || ( refBytes != null && cigar.getReferenceLength() != refBytes.length ) ) {
throw new ReviewedStingException("SmithWaterman cigar failure: " + (refBytes == null ? "-" : new String(refBytes)) + " against " + new String(bubbleBytes) + " = " + swConsensus.getCigar());
}
}
return cigar;
}
// class to keep track of the bubble state machine
protected static class BubbleStateMachine {
public boolean inBubble = false;
public byte[] bubbleBytes = null;
public DeBruijnVertex lastSeenReferenceNode = null;
public Cigar cigar = null;
public BubbleStateMachine( final Cigar initialCigar ) {
inBubble = false;
bubbleBytes = null;
lastSeenReferenceNode = null;
cigar = initialCigar;
}
}
}
protected static class PathComparatorTotalScore implements Comparator<Path>, Serializable {
@ -130,12 +296,12 @@ public class KBestPaths {
}
}
//protected static class PathComparatorLowestEdge implements Comparator<Path>, Serializable {
// @Override
// public int compare(final Path path1, final Path path2) {
// return path2.lowestEdge - path1.lowestEdge;
// }
//}
protected static class PathComparatorLowestEdge implements Comparator<Path>, Serializable {
@Override
public int compare(final Path path1, final Path path2) {
return path2.lowestEdge - path1.lowestEdge;
}
}
public static List<Path> getKBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int k ) {
if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); }
@ -144,7 +310,7 @@ public class KBestPaths {
// run a DFS for best paths
for( final DeBruijnVertex v : graph.vertexSet() ) {
if( graph.inDegreeOf(v) == 0 ) {
findBestPaths(graph, new Path(v), bestPaths);
findBestPaths(new Path(v, graph), bestPaths);
}
}
@ -153,14 +319,14 @@ public class KBestPaths {
return bestPaths.subList(0, Math.min(k, bestPaths.size()));
}
private static void findBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Path path, final List<Path> bestPaths ) {
findBestPaths(graph, path, bestPaths, new MyInt());
private static void findBestPaths( final Path path, final List<Path> bestPaths ) {
findBestPaths(path, bestPaths, new MyInt());
}
private static void findBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Path path, final List<Path> bestPaths, MyInt n ) {
private static void findBestPaths( final Path path, final List<Path> bestPaths, final MyInt n ) {
// did we hit the end of a path?
if ( allOutgoingEdgesHaveBeenVisited(graph, path) ) {
if ( allOutgoingEdgesHaveBeenVisited(path) ) {
if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) {
// clean out some low scoring paths
Collections.sort(bestPaths, new PathComparatorTotalScore() );
@ -172,27 +338,253 @@ public class KBestPaths {
} else {
// recursively run DFS
final ArrayList<DeBruijnEdge> edgeArrayList = new ArrayList<DeBruijnEdge>();
edgeArrayList.addAll(graph.outgoingEdgesOf(path.lastVertex));
edgeArrayList.addAll(path.graph.outgoingEdgesOf(path.lastVertex));
Collections.sort(edgeArrayList, new DeBruijnEdge.EdgeWeightComparator());
Collections.reverse(edgeArrayList);
for ( final DeBruijnEdge edge : edgeArrayList ) {
// make sure the edge is not already in the path
if ( path.containsEdge(graph, edge) )
if ( path.containsEdge(edge) )
continue;
final Path newPath = new Path(path, graph, edge);
final Path newPath = new Path(path, edge);
n.val++;
findBestPaths(graph, newPath, bestPaths, n);
findBestPaths(newPath, bestPaths, n);
}
}
}
private static boolean allOutgoingEdgesHaveBeenVisited( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Path path ) {
for( final DeBruijnEdge edge : graph.outgoingEdgesOf(path.lastVertex) ) {
if( !path.containsEdge(graph, edge) ) {
private static boolean allOutgoingEdgesHaveBeenVisited( final Path path ) {
for( final DeBruijnEdge edge : path.graph.outgoingEdgesOf(path.lastVertex) ) {
if( !path.containsEdge(edge) ) {
return false;
}
}
return true;
}
/****************************************************************
* Collection of graph functions used by KBestPaths *
***************************************************************/
/**
* Test if the vertex is on a reference path in the graph. If so it is referred to as a reference node
* @param graph the graph from which the vertex originated
* @param v the vertex to test
* @return true if the vertex is on the reference path
*/
public static boolean isReferenceNode( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
for( final DeBruijnEdge e : graph.edgesOf(v) ) {
if( e.isRef() ) { return true; }
}
return false;
}
/**
* Is this edge a source edge (the source vertex of the edge is a source node in the graph)
* @param graph the graph from which the edge originated
* @param e the edge to test
* @return true if the source vertex of the edge is a source node in the graph
*/
public static boolean isSource( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
return graph.inDegreeOf(graph.getEdgeSource(e)) == 0;
}
/**
* Is this vertex a source vertex
* @param graph the graph from which the vertex originated
* @param v the vertex to test
* @return true if the vertex is a source vertex
*/
public static boolean isSource( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
return graph.inDegreeOf(v) == 0;
}
/**
* Pull the added base sequence implied by visiting this node in a path
* @param graph the graph from which the vertex originated
* @param v the vertex whose sequence to grab
* @return non-null sequence of bases corresponding to this node in the graph
*/
@Ensures({"result != null"})
public static byte[] getAdditionalSequence( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
return ( isSource(graph, v) ? v.getSequence() : v.getSuffix() );
}
/**
* Is this edge both a reference edge and a source edge for the reference path
* @param graph the graph from which the edge originated
* @param e the edge to test
* @return true if the edge is both a reference edge and a reference path source edge
*/
public static boolean isRefSource( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(graph.getEdgeSource(e)) ) {
if( edgeToTest.isRef() ) { return false; }
}
return true;
}
/**
* Is this vertex both a reference node and a source node for the reference path
* @param graph the graph from which the vertex originated
* @param v the vertex to test
* @return true if the vertex is both a reference node and a reference path source node
*/
public static boolean isRefSource( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(v) ) {
if( edgeToTest.isRef() ) { return false; }
}
return true;
}
/**
* Is this edge both a reference edge and a sink edge for the reference path
* @param graph the graph from which the edge originated
* @param e the edge to test
* @return true if the edge is both a reference edge and a reference path sink edge
*/
public static boolean isRefSink( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(graph.getEdgeTarget(e)) ) {
if( edgeToTest.isRef() ) { return false; }
}
return true;
}
/**
* Is this vertex both a reference node and a sink node for the reference path
* @param graph the graph from which the node originated
* @param v the node to test
* @return true if the vertex is both a reference node and a reference path sink node
*/
public static boolean isRefSink( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(v) ) {
if( edgeToTest.isRef() ) { return false; }
}
return true;
}
public static DeBruijnEdge getReferenceSourceEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
for( final DeBruijnEdge e : graph.edgeSet() ) {
if( e.isRef() && isRefSource(graph, e) ) {
return e;
}
}
throw new ReviewedStingException("All reference graphs should have a source node");
}
public static DeBruijnVertex getReferenceSourceVertex( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
for( final DeBruijnVertex v : graph.vertexSet() ) {
if( isReferenceNode(graph, v) && isRefSource(graph, v) ) {
return v;
}
}
return null;
}
public static DeBruijnEdge getReferenceSinkEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
for( final DeBruijnEdge e : graph.edgeSet() ) {
if( e.isRef() && isRefSink(graph, e) ) {
return e;
}
}
throw new ReviewedStingException("All reference graphs should have a sink node");
}
public static DeBruijnVertex getReferenceSinkVertex( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
for( final DeBruijnVertex v : graph.vertexSet() ) {
if( isReferenceNode(graph, v) && isRefSink(graph, v) ) {
return v;
}
}
throw new ReviewedStingException("All reference graphs should have a sink node");
}
public static DeBruijnEdge getNextReferenceEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
if( e == null ) { return null; }
for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(graph.getEdgeTarget(e)) ) {
if( edgeToTest.isRef() ) {
return edgeToTest;
}
}
return null;
}
public static DeBruijnVertex getNextReferenceVertex( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
if( v == null ) { return null; }
for( final DeBruijnEdge edgeToTest : graph.outgoingEdgesOf(v) ) {
if( edgeToTest.isRef() ) {
return graph.getEdgeTarget(edgeToTest);
}
}
return null;
}
public static DeBruijnEdge getPrevReferenceEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge e ) {
for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(graph.getEdgeSource(e)) ) {
if( edgeToTest.isRef() ) {
return edgeToTest;
}
}
return null;
}
public static DeBruijnVertex getPrevReferenceVertex( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex v ) {
for( final DeBruijnEdge edgeToTest : graph.incomingEdgesOf(v) ) {
if( isReferenceNode(graph, graph.getEdgeSource(edgeToTest)) ) {
return graph.getEdgeSource(edgeToTest);
}
}
return null;
}
public static boolean referencePathExists(final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge fromEdge, final DeBruijnEdge toEdge) {
DeBruijnEdge e = fromEdge;
if( e == null ) {
return false;
}
while( !e.equals(graph, toEdge) ) {
e = getNextReferenceEdge(graph, e);
if( e == null ) {
return false;
}
}
return true;
}
public static boolean referencePathExists(final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex) {
DeBruijnVertex v = fromVertex;
if( v == null ) {
return false;
}
v = getNextReferenceVertex(graph, v);
if( v == null ) {
return false;
}
while( !v.equals(toVertex) ) {
v = getNextReferenceVertex(graph, v);
if( v == null ) {
return false;
}
}
return true;
}
// fromVertex (exclusive) -> toVertex (exclusive)
public static byte[] getReferenceBytes( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnVertex fromVertex, final DeBruijnVertex toVertex ) {
byte[] bytes = null;
if( fromVertex != null && toVertex != null && !referencePathExists(graph, fromVertex, toVertex) ) {
throw new ReviewedStingException("Asked for a reference path which doesn't exist. " + fromVertex + " --> " + toVertex);
}
DeBruijnVertex v = fromVertex;
if( v == null ) {
v = getReferenceSourceVertex(graph);
bytes = ArrayUtils.addAll( bytes, getAdditionalSequence(graph, v) );
}
v = getNextReferenceVertex(graph, v);
while( (toVertex != null && !v.equals(toVertex)) || (toVertex == null && v != null) ) {
bytes = ArrayUtils.addAll( bytes, getAdditionalSequence(graph, v) );
// advance along the reference path
v = getNextReferenceVertex(graph, v);
}
return bytes;
}
}

View File

@ -152,10 +152,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
final DeBruijnVertex addedVertex = new DeBruijnVertex( ArrayUtils.addAll(incomingVertex.getSequence(), outgoingVertex.getSuffix()), outgoingVertex.kmer );
graph.addVertex(addedVertex);
for( final DeBruijnEdge edge : outEdges ) {
graph.addEdge(addedVertex, graph.getEdgeTarget(edge), new DeBruijnEdge(edge.getIsRef(), edge.getMultiplicity()));
graph.addEdge(addedVertex, graph.getEdgeTarget(edge), new DeBruijnEdge(edge.isRef(), edge.getMultiplicity()));
}
for( final DeBruijnEdge edge : inEdges ) {
graph.addEdge(graph.getEdgeSource(edge), addedVertex, new DeBruijnEdge(edge.getIsRef(), edge.getMultiplicity()));
graph.addEdge(graph.getEdgeSource(edge), addedVertex, new DeBruijnEdge(edge.isRef(), edge.getMultiplicity()));
}
graph.removeVertex( incomingVertex );
@ -170,7 +170,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
protected static void pruneGraph( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int pruneFactor ) {
final List<DeBruijnEdge> edgesToRemove = new ArrayList<DeBruijnEdge>();
for( final DeBruijnEdge e : graph.edgeSet() ) {
if( e.getMultiplicity() <= pruneFactor && !e.getIsRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor
if( e.getMultiplicity() <= pruneFactor && !e.isRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor
edgesToRemove.add(e);
}
}
@ -195,7 +195,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
if( graph.inDegreeOf(v) == 0 || graph.outDegreeOf(v) == 0 ) {
boolean isRefNode = false;
for( final DeBruijnEdge e : graph.edgesOf(v) ) {
if( e.getIsRef() ) {
if( e.isRef() ) {
isRefNode = true;
break;
}
@ -299,10 +299,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
if( edge.getMultiplicity() > PRUNE_FACTOR ) {
GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [" + (edge.getMultiplicity() <= PRUNE_FACTOR ? "style=dotted,color=grey" : "label=\""+ edge.getMultiplicity() +"\"") + "];");
}
if( edge.getIsRef() ) {
if( edge.isRef() ) {
GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [color=red];");
}
if( !edge.getIsRef() && edge.getMultiplicity() <= PRUNE_FACTOR ) { System.out.println("Graph pruning warning!"); }
if( !edge.isRef() && edge.getMultiplicity() <= PRUNE_FACTOR ) { System.out.println("Graph pruning warning!"); }
}
for( final DeBruijnVertex v : graph.vertexSet() ) {
final String label = ( graph.inDegreeOf(v) == 0 ? v.toString() : v.getSuffixString() );
@ -338,7 +338,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) {
final Haplotype h = new Haplotype( path.getBases( graph ) );
final Haplotype h = new Haplotype( path.getBases() );
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false ) ) {
// for GGA mode, add the desired allele into the haplotype if it isn't already present

View File

@ -201,7 +201,7 @@ public class RecalDatum {
* Returns the error rate (in real space) of this interval, or 0 if there are no observations
* @return the empirical error rate ~= N errors / N obs
*/
@Ensures("result >= 0.0")
@Ensures({"result >= 0.0"})
public double getEmpiricalErrorRate() {
if ( numObservations == 0 )
return 0.0;

View File

@ -93,7 +93,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
haplotypeAlleles.add( Allele.create("AACA", false) );
haplotypeAlleles.add( Allele.create("CATA", false) );
haplotypeAlleles.add( Allele.create("CACA", false) );
final ArrayList<Haplotype> haplotypes = new ArrayList<Haplotype>();
final List<Haplotype> haplotypes = new ArrayList<Haplotype>();
haplotypes.add(new Haplotype("AATA".getBytes()));
haplotypes.add(new Haplotype("AACA".getBytes()));
haplotypes.add(new Haplotype("CATA".getBytes()));
@ -101,11 +101,11 @@ public class GenotypingEngineUnitTest extends BaseTest {
final List<Allele> haplotypeAllelesForSample = new ArrayList<Allele>();
haplotypeAllelesForSample.add( Allele.create("CATA", false) );
haplotypeAllelesForSample.add( Allele.create("CACA", false) );
final ArrayList<ArrayList<Haplotype>> alleleMapper = new ArrayList<ArrayList<Haplotype>>();
ArrayList<Haplotype> Aallele = new ArrayList<Haplotype>();
final List<List<Haplotype>> alleleMapper = new ArrayList<List<Haplotype>>();
List<Haplotype> Aallele = new ArrayList<Haplotype>();
Aallele.add(haplotypes.get(0));
Aallele.add(haplotypes.get(1));
ArrayList<Haplotype> Callele = new ArrayList<Haplotype>();
List<Haplotype> Callele = new ArrayList<Haplotype>();
Callele.add(haplotypes.get(2));
Callele.add(haplotypes.get(3));
alleleMapper.add(Aallele);
@ -135,7 +135,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
haplotypeAlleles.add( Allele.create("TACA", false) );
haplotypeAlleles.add( Allele.create("TTCA", false) );
haplotypeAlleles.add( Allele.create("TTTA", false) );
final ArrayList<Haplotype> haplotypes = new ArrayList<Haplotype>();
final List<Haplotype> haplotypes = new ArrayList<Haplotype>();
haplotypes.add(new Haplotype("AATA".getBytes()));
haplotypes.add(new Haplotype("AACA".getBytes()));
haplotypes.add(new Haplotype("CATA".getBytes()));
@ -146,14 +146,14 @@ public class GenotypingEngineUnitTest extends BaseTest {
final List<Allele> haplotypeAllelesForSample = new ArrayList<Allele>();
haplotypeAllelesForSample.add( Allele.create("TTTA", false) );
haplotypeAllelesForSample.add( Allele.create("AATA", true) );
final ArrayList<ArrayList<Haplotype>> alleleMapper = new ArrayList<ArrayList<Haplotype>>();
ArrayList<Haplotype> Aallele = new ArrayList<Haplotype>();
final List<List<Haplotype>> alleleMapper = new ArrayList<List<Haplotype>>();
List<Haplotype> Aallele = new ArrayList<Haplotype>();
Aallele.add(haplotypes.get(0));
Aallele.add(haplotypes.get(1));
ArrayList<Haplotype> Callele = new ArrayList<Haplotype>();
List<Haplotype> Callele = new ArrayList<Haplotype>();
Callele.add(haplotypes.get(2));
Callele.add(haplotypes.get(3));
ArrayList<Haplotype> Tallele = new ArrayList<Haplotype>();
List<Haplotype> Tallele = new ArrayList<Haplotype>();
Tallele.add(haplotypes.get(4));
Tallele.add(haplotypes.get(5));
Tallele.add(haplotypes.get(6));
@ -187,16 +187,16 @@ public class GenotypingEngineUnitTest extends BaseTest {
private class BasicGenotypingTestProvider extends TestDataProvider {
byte[] ref;
byte[] hap;
HashMap<Integer,Byte> expected;
Map<Integer,Byte> expected;
public BasicGenotypingTestProvider(String refString, String hapString, HashMap<Integer, Byte> expected) {
public BasicGenotypingTestProvider(String refString, String hapString, Map<Integer, Byte> expected) {
super(BasicGenotypingTestProvider.class, String.format("Haplotype to VCF test: ref = %s, alignment = %s", refString,hapString));
ref = refString.getBytes();
hap = hapString.getBytes();
this.expected = expected;
}
public HashMap<Integer,VariantContext> calcAlignment() {
public Map<Integer,VariantContext> calcAlignment() {
final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap);
return GenotypingEngine.generateVCsFromAlignment( new Haplotype(hap), alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name");
}
@ -206,14 +206,14 @@ public class GenotypingEngineUnitTest extends BaseTest {
public Object[][] makeBasicGenotypingTests() {
for( int contextSize : new int[]{0,1,5,9,24,36} ) {
HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
Map<Integer, Byte> map = new HashMap<Integer, Byte>();
map.put(1 + contextSize, (byte)'M');
final String context = Utils.dupString('G', contextSize);
new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGACTAGCCGATAG", map);
}
for( int contextSize : new int[]{0,1,5,9,24,36} ) {
HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
Map<Integer, Byte> map = new HashMap<Integer, Byte>();
map.put(2 + contextSize, (byte)'M');
map.put(21 + contextSize, (byte)'M');
final String context = Utils.dupString('G', contextSize);
@ -221,7 +221,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
}
for( int contextSize : new int[]{0,1,5,9,24,36} ) {
HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
Map<Integer, Byte> map = new HashMap<Integer, Byte>();
map.put(1 + contextSize, (byte)'M');
map.put(20 + contextSize, (byte)'I');
final String context = Utils.dupString('G', contextSize);
@ -229,7 +229,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
}
for( int contextSize : new int[]{0,1,5,9,24,36} ) {
HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
Map<Integer, Byte> map = new HashMap<Integer, Byte>();
map.put(1 + contextSize, (byte)'M');
map.put(20 + contextSize, (byte)'D');
final String context = Utils.dupString('G', contextSize);
@ -237,7 +237,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
}
for( int contextSize : new int[]{1,5,9,24,36} ) {
HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
Map<Integer, Byte> map = new HashMap<Integer, Byte>();
map.put(1, (byte)'M');
map.put(20, (byte)'D');
final String context = Utils.dupString('G', contextSize);
@ -245,7 +245,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
}
for( int contextSize : new int[]{0,1,5,9,24,36} ) {
HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
Map<Integer, Byte> map = new HashMap<Integer, Byte>();
map.put(2 + contextSize, (byte)'M');
map.put(20 + contextSize, (byte)'I');
map.put(30 + contextSize, (byte)'D');
@ -254,7 +254,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
}
for( int contextSize : new int[]{0,1,5,9,24,36} ) {
HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
Map<Integer, Byte> map = new HashMap<Integer, Byte>();
map.put(1 + contextSize, (byte)'M');
map.put(20 + contextSize, (byte)'D');
map.put(28 + contextSize, (byte)'M');
@ -267,8 +267,8 @@ public class GenotypingEngineUnitTest extends BaseTest {
@Test(dataProvider = "BasicGenotypingTestProvider", enabled = true)
public void testHaplotypeToVCF(BasicGenotypingTestProvider cfg) {
HashMap<Integer,VariantContext> calculatedMap = cfg.calcAlignment();
HashMap<Integer,Byte> expectedMap = cfg.expected;
Map<Integer,VariantContext> calculatedMap = cfg.calcAlignment();
Map<Integer,Byte> expectedMap = cfg.expected;
logger.warn(String.format("Test: %s", cfg.toString()));
if(!compareVCMaps(calculatedMap, expectedMap)) {
logger.warn("calc map = " + calculatedMap);
@ -420,9 +420,9 @@ public class GenotypingEngineUnitTest extends BaseTest {
}
/**
* Private function to compare HashMap of VCs, it only checks the types and start locations of the VariantContext
* Private function to compare Map of VCs, it only checks the types and start locations of the VariantContext
*/
private boolean compareVCMaps(HashMap<Integer, VariantContext> calc, HashMap<Integer, Byte> expected) {
private boolean compareVCMaps(Map<Integer, VariantContext> calc, Map<Integer, Byte> expected) {
if( !calc.keySet().equals(expected.keySet()) ) { return false; } // sanity check
for( Integer loc : expected.keySet() ) {
Byte type = expected.get(loc);

View File

@ -25,7 +25,7 @@
package org.broadinstitute.sting.gatk.datasources.reads;
import org.broad.tribble.util.BlockCompressedStreamConstants;
import net.sf.samtools.util.BlockCompressedStreamConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.FileInputStream;

View File

@ -25,8 +25,8 @@
package org.broadinstitute.sting.gatk.datasources.reads;
import org.broad.tribble.util.SeekableBufferedStream;
import org.broad.tribble.util.SeekableFileStream;
import net.sf.samtools.seekablestream.SeekableBufferedStream;
import net.sf.samtools.seekablestream.SeekableFileStream;
import net.sf.samtools.*;

View File

@ -350,7 +350,7 @@ public class GATKRunReport {
* @return a non-null AWS access key for the GATK user
*/
protected static String getAWSAccessKey() {
return getAWSKey("GATK_AWS_access.key");
return getAWSKey("resources/GATK_AWS_access.key");
}
/**
@ -358,7 +358,7 @@ public class GATKRunReport {
* @return a non-null AWS secret key for the GATK user
*/
protected static String getAWSSecretKey() {
return getAWSKey("GATK_AWS_secret.key");
return getAWSKey("resources/GATK_AWS_secret.key");
}
/**

View File

@ -240,6 +240,13 @@ public class Utils {
}
}
/**
* Create a new list that contains the elements of left along with elements elts
* @param left a non-null list of elements
* @param elts a varargs vector for elts to append in order to left
* @param <T>
* @return A newly allocated linked list containing left followed by elts
*/
public static <T> List<T> append(final List<T> left, T ... elts) {
final List<T> l = new LinkedList<T>(left);
l.addAll(Arrays.asList(elts));
@ -329,15 +336,6 @@ public class Utils {
return str.substring(start, end+1);
}
public static byte listMaxByte(List<Byte> quals) {
if (quals.size() == 0) return 0;
byte m = quals.get(0);
for (byte b : quals) {
m = b > m ? b : m;
}
return m;
}
/**
* Splits expressions in command args by spaces and returns the array of expressions.
* Expressions may use single or double quotes to group any individual expression, but not both.
@ -400,173 +398,6 @@ public class Utils {
return concatArrays(A, B);
}
/**
* Returns indices of all occurrences of the specified symbol in the string
* @param s Search string
* @param ch Character to search for
* @return Indices of all occurrences of the specified symbol
*/
public static int[] indexOfAll(String s, int ch) {
int[] pos = new int[64];
int z = 0;
for (int i = 0; i < s.length(); i++) {
if (s.charAt(i) == ch) pos[z++] = i;
}
return reallocate(pos, z);
}
public static int countSetBits(boolean[] array) {
int counter = 0;
for ( int i = 0; i < array.length; i++ ) {
if ( array[i] )
counter++;
}
return counter;
}
/**
* Returns new (reallocated) integer array of the specified size, with content
* of the original array <code>orig</code> copied into it. If <code>newSize</code> is
* less than the size of the original array, only first <code>newSize</code> elements will be copied.
* If new size is greater than the size of the original array, the content of the original array will be padded
* with zeros up to the new size. Finally, if new size is the same as original size, no memory reallocation
* will be performed and the original array will be returned instead.
*
* @param orig Original size.
* @param newSize New Size.
*
* @return New array with length equal to newSize.
*/
public static int[] reallocate(int[] orig, int newSize) {
if (orig.length == newSize) return orig;
int[] new_array = new int[newSize];
int L = (newSize > orig.length ? orig.length : newSize);
for (int i = 0; i < L; i++) new_array[i] = orig[i];
return new_array;
}
/**
* Returns a copy of array a, extended with additional n elements to the right (if n > 0 ) or -n elements to the
* left (if n<0), copying the values form the original array. Newly added elements are filled with value v. Note that
* if array a is being padded to the left, first (-n) elements of the returned array are v's, followed by the content of
* array a.
* @param a original array
* @param n number of (v-filled) elements to append to a on the right (n>0) or on the left (n<0)
* @param v element value
* @return the extended copy of array a with additional n elements
*/
public static byte [] extend(final byte[] a, int n, byte v) {
byte [] newA;
if ( n > 0 ) {
newA = Arrays.copyOf(a, a.length+n);
if ( v != 0) { // java pads with 0's for us, so there is nothing to do if v==0
for ( int i = a.length; i < newA.length ; i++ ) newA[i] = v;
}
return newA;
}
// we are here only if n < 0:
n = (-n);
newA = new byte[ a.length + n ];
int i;
if ( v!= 0 ) {
i = 0;
for( ; i < n; i++ ) newA[i] = v;
} else {
i = n;
}
for ( int j = 0 ; j < a.length ; i++, j++) newA[i]=a[j];
return newA;
}
/**
* Returns a copy of array a, extended with additional n elements to the right (if n > 0 ) or -n elements to the
* left (if n<0), copying the values form the original array. Newly added elements are filled with value v. Note that
* if array a is padded to the left, first (-n) elements of the returned array are v's, followed by the content of
* array a.
* @param a original array
* @param n number of (v-filled) elements to append to a on the right (n>0) or on the left (n<0)
* @param v element value
* @return the extended copy of array a with additional n elements
*/
public static short [] extend(final short[] a, int n, short v) {
short [] newA;
if ( n > 0 ) {
newA = Arrays.copyOf(a, a.length+n);
if ( v != 0) { // java pads with 0's for us, so there is nothing to do if v==0
for ( int i = a.length; i < newA.length ; i++ ) newA[i] = v;
}
return newA;
}
// we are here only if n < 0:
n = (-n);
newA = new short[ a.length + n ];
int i;
if ( v!= 0 ) {
i = 0;
for( ; i < n; i++ ) newA[i] = v;
} else {
i = n;
}
for ( int j = 0 ; j < a.length ; i++, j++) newA[i]=a[j];
return newA;
}
/* TEST ME
public static void main(String[] argv) {
List<Integer> l1 = new LinkedList<Integer>();
List<Integer> l2 = new ArrayList<Integer>();
l1.add(1);
l1.add(5);
l1.add(3);
l1.add(10);
l1.add(4);
l1.add(2);
l2.add(1);
l2.add(5);
l2.add(3);
l2.add(10);
l2.add(4);
l2.add(2);
Predicate<Integer> p = new Predicate<Integer>() {
public boolean apply(Integer i) {
return i > 2;
}
};
filterInPlace(p, l1);
filterInPlace(p, l2);
for ( int i = 0 ; i < l1.size(); i++ ) System.out.print(" "+l1.get(i));
System.out.println();
for ( int i = 0 ; i < l2.size(); i++ ) System.out.print(" " + l2.get(i));
System.out.println();
}
*/
/**
* a helper method. Turns a single character string into a char.
*
* @param str the string
*
* @return a char
*/
public static char stringToChar(String str) {
if (str.length() != 1) throw new IllegalArgumentException("String length must be one");
return str.charAt(0);
}
public static <T extends Comparable<T>> List<T> sorted(Collection<T> c) {
return sorted(c, false);
}
@ -594,18 +425,6 @@ public class Utils {
return l;
}
public static <T extends Comparable<T>, V> String sortedString(Map<T,V> c) {
List<T> t = new ArrayList<T>(c.keySet());
Collections.sort(t);
List<String> pairs = new ArrayList<String>();
for ( T k : t ) {
pairs.add(k + "=" + c.get(k));
}
return "{" + join(", ", pairs) + "}";
}
/**
* Reverse a byte array of bases
*
@ -654,14 +473,6 @@ public class Utils {
return new String( reverse( bases.getBytes() )) ;
}
public static byte[] charSeq2byteSeq(char[] seqIn) {
byte[] seqOut = new byte[seqIn.length];
for ( int i = 0; i < seqIn.length; i++ ) {
seqOut[i] = (byte)seqIn[i];
}
return seqOut;
}
public static boolean isFlagSet(int value, int flag) {
return ((value & flag) == flag);
}

View File

@ -673,7 +673,9 @@ public class BAQ {
} else if ( excludeReadFromBAQ(read) ) {
; // just fall through
} else {
if ( calculationType == CalculationMode.RECALCULATE || ! hasBAQTag(read) ) {
final boolean readHasBAQTag = hasBAQTag(read);
if ( calculationType == CalculationMode.RECALCULATE || ! readHasBAQTag ) {
if ( DEBUG ) System.out.printf(" Calculating BAQ on the fly%n");
BAQCalculationResult hmmResult = calcBAQFromHMM(read, refReader);
if ( hmmResult != null ) {
@ -683,6 +685,9 @@ public class BAQ {
case DONT_MODIFY: BAQQuals = hmmResult.bq; break;
default: throw new ReviewedStingException("BUG: unexpected qmode " + qmode);
}
} else if ( readHasBAQTag ) {
// remove the BAQ tag if it's there because we cannot trust it
read.setAttribute(BAQ_TAG, null);
}
} else if ( qmode == QualityMode.OVERWRITE_QUALS ) { // only makes sense if we are overwriting quals
if ( DEBUG ) System.out.printf(" Taking BAQ from tag%n");

View File

@ -25,6 +25,8 @@
package org.broadinstitute.sting.utils.sam;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
@ -38,9 +40,16 @@ import org.broadinstitute.sting.utils.recalibration.EventType;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
public class AlignmentUtils {
public final class AlignmentUtils {
private final static EnumSet<CigarOperator> ALIGNED_TO_GENOME_OPERATORS = EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X);
private final static EnumSet<CigarOperator> ALIGNED_TO_GENOME_PLUS_SOFTCLIPS = EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X, CigarOperator.S);
// cannot be instantiated
private AlignmentUtils() { }
public static class MismatchCount {
public int numMismatches = 0;
@ -116,103 +125,6 @@ public class AlignmentUtils {
return mc;
}
/**
* Returns the number of mismatches in the pileup within the given reference context.
*
* @param pileup the pileup with reads
* @param ref the reference context
* @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window)
* @return the number of mismatches
*/
public static int mismatchesInRefWindow(ReadBackedPileup pileup, ReferenceContext ref, boolean ignoreTargetSite) {
int mismatches = 0;
for (PileupElement p : pileup)
mismatches += mismatchesInRefWindow(p, ref, ignoreTargetSite);
return mismatches;
}
/**
* Returns the number of mismatches in the pileup element within the given reference context.
*
* @param p the pileup element
* @param ref the reference context
* @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window)
* @return the number of mismatches
*/
public static int mismatchesInRefWindow(PileupElement p, ReferenceContext ref, boolean ignoreTargetSite) {
return mismatchesInRefWindow(p, ref, ignoreTargetSite, false);
}
/**
* Returns the number of mismatches in the pileup element within the given reference context.
*
* @param p the pileup element
* @param ref the reference context
* @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window)
* @param qualitySumInsteadOfMismatchCount
* if true, return the quality score sum of the mismatches rather than the count
* @return the number of mismatches
*/
public static int mismatchesInRefWindow(PileupElement p, ReferenceContext ref, boolean ignoreTargetSite, boolean qualitySumInsteadOfMismatchCount) {
int sum = 0;
int windowStart = ref.getWindow().getStart();
int windowStop = ref.getWindow().getStop();
byte[] refBases = ref.getBases();
byte[] readBases = p.getRead().getReadBases();
byte[] readQualities = p.getRead().getBaseQualities();
Cigar c = p.getRead().getCigar();
int readIndex = 0;
int currentPos = p.getRead().getAlignmentStart();
int refIndex = Math.max(0, currentPos - windowStart);
for (int i = 0; i < c.numCigarElements(); i++) {
CigarElement ce = c.getCigarElement(i);
int cigarElementLength = ce.getLength();
switch (ce.getOperator()) {
case EQ:
case X:
case M:
for (int j = 0; j < cigarElementLength; j++, readIndex++, currentPos++) {
// are we past the ref window?
if (currentPos > windowStop)
break;
// are we before the ref window?
if (currentPos < windowStart)
continue;
byte refChr = refBases[refIndex++];
// do we need to skip the target site?
if (ignoreTargetSite && ref.getLocus().getStart() == currentPos)
continue;
byte readChr = readBases[readIndex];
if (readChr != refChr)
sum += (qualitySumInsteadOfMismatchCount) ? readQualities[readIndex] : 1;
}
break;
case I:
case S:
readIndex += cigarElementLength;
break;
case D:
case N:
currentPos += cigarElementLength;
if (currentPos > windowStart)
refIndex += Math.min(cigarElementLength, currentPos - windowStart);
break;
case H:
case P:
break;
}
}
return sum;
}
/**
* Returns number of alignment blocks (continuous stretches of aligned bases) in the specified alignment.
* This method follows closely the SAMRecord::getAlignmentBlocks() implemented in samtools library, but
@ -223,31 +135,54 @@ public class AlignmentUtils {
* @param r alignment
* @return number of continuous alignment blocks (i.e. 'M' elements of the cigar; all indel and clipping elements are ignored).
*/
@Ensures("result >= 0")
public static int getNumAlignmentBlocks(final SAMRecord r) {
int n = 0;
if ( r == null ) throw new IllegalArgumentException("read cannot be null");
final Cigar cigar = r.getCigar();
if (cigar == null) return 0;
int n = 0;
for (final CigarElement e : cigar.getCigarElements()) {
if (e.getOperator() == CigarOperator.M) n++;
if (ALIGNED_TO_GENOME_OPERATORS.contains(e.getOperator()))
n++;
}
return n;
}
public static int getNumAlignedBasesCountingSoftClips(final SAMRecord r) {
/**
* Get the number of bases aligned to the genome, including soft clips
*
* If read is not mapped (i.e., doesn't have a cigar) returns 0
*
* @param r a non-null GATKSAMRecord
* @return the number of bases aligned to the genome in R, including soft clipped bases
*/
public static int getNumAlignedBasesCountingSoftClips(final GATKSAMRecord r) {
int n = 0;
final Cigar cigar = r.getCigar();
if (cigar == null) return 0;
for (final CigarElement e : cigar.getCigarElements())
if (e.getOperator() == CigarOperator.M || e.getOperator() == CigarOperator.S)
if (ALIGNED_TO_GENOME_PLUS_SOFTCLIPS.contains(e.getOperator()))
n += e.getLength();
return n;
}
/**
* Count the number of bases hard clipped from read
*
* If read's cigar is null, return 0
*
* @param r a non-null read
* @return a positive integer
*/
@Ensures("result >= 0")
public static int getNumHardClippedBases(final SAMRecord r) {
if ( r == null ) throw new IllegalArgumentException("Read cannot be null");
int n = 0;
final Cigar cigar = r.getCigar();
if (cigar == null) return 0;
@ -259,16 +194,28 @@ public class AlignmentUtils {
return n;
}
/**
* Calculate the number of bases that are soft clipped in read with quality score greater than threshold
*
* Handles the case where the cigar is null (i.e., the read is unmapped), returning 0
*
* @param read a non-null GATKSAMRecord.
* @param qualThreshold consider bases with quals > this value as high quality. Must be >= 0
* @return positive integer
*/
@Ensures("result >= 0")
public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) {
if ( read == null ) throw new IllegalArgumentException("Read cannot be null");
if ( qualThreshold < 0 ) throw new IllegalArgumentException("Expected qualThreshold to be a positive byte but saw " + qualThreshold);
if ( read.getCigar() == null ) // the read is unmapped
return 0;
final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION );
int numHQSoftClips = 0;
int alignPos = 0;
final Cigar cigar = read.getCigar();
final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION );
for( int iii = 0; iii < cigar.numCigarElements(); iii++ ) {
final CigarElement ce = cigar.getCigarElement(iii);
for ( final CigarElement ce : read.getCigar().getCigarElements() ) {
final int elementLength = ce.getLength();
switch( ce.getOperator() ) {
@ -277,35 +224,26 @@ public class AlignmentUtils {
if( qual[alignPos++] > qualThreshold ) { numHQSoftClips++; }
}
break;
case M:
case I:
case EQ:
case X:
case M: case I: case EQ: case X:
alignPos += elementLength;
break;
case H:
case P:
case D:
case N:
case H: case P: case D: case N:
break;
default:
throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
throw new IllegalStateException("Unsupported cigar operator: " + ce.getOperator());
}
}
return numHQSoftClips;
}
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) {
return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), false, pileupElement.isDeletion(), alignmentStart, refLocus );
return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isDeletion(), alignmentStart, refLocus );
}
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final int offset, final boolean isInsertionAtBeginningOfRead, final boolean isDeletion, final int alignmentStart, final int refLocus) {
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final int offset, final boolean isDeletion, final int alignmentStart, final int refLocus) {
int pileupOffset = offset;
// Special case for reads starting with insertion
if (isInsertionAtBeginningOfRead)
return 0;
// Reassign the offset if we are in the middle of a deletion because of the modified representation of the read bases
if (isDeletion) {
pileupOffset = refLocus - alignmentStart;
@ -458,18 +396,21 @@ public class AlignmentUtils {
* specification, one may need this convenience shortcut. Checks both 'read unmapped' flag and
* alignment reference index/start.
*
* @param r record
* Our life would be so much easier if all sam files followed the specs. In reality,
* sam files (including those generated by maq or bwa) miss headers altogether. When
* reading such a SAM file, reference name is set, but since there is no sequence dictionary,
* null is always returned for referenceIndex. Let's be paranoid here, and make sure that
* we do not call the read "unmapped" when it has only reference name set with ref. index missing
* or vice versa.
*
* @param r a non-null record
* @return true if read is unmapped
*/
public static boolean isReadUnmapped(final SAMRecord r) {
if ( r == null ) throw new IllegalArgumentException("Read cannot be null");
if (r.getReadUnmappedFlag()) return true;
// our life would be so much easier if all sam files followed the specs. In reality,
// sam files (including those generated by maq or bwa) miss headers altogether. When
// reading such a SAM file, reference name is set, but since there is no sequence dictionary,
// null is always returned for referenceIndex. Let's be paranoid here, and make sure that
// we do not call the read "unmapped" when it has only reference name set with ref. index missing
// or vice versa.
if ((r.getReferenceIndex() != null && r.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX
|| r.getReferenceName() != null && !r.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME))
&& r.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START) return false;
@ -477,26 +418,26 @@ public class AlignmentUtils {
}
/**
* Due to (unfortunate) multiple ways to indicate that read/mate is unmapped allowed by SAM format
* specification, one may need this convenience shortcut. Checks both 'mate unmapped' flag and
* alignment reference index/start of the mate.
*
* @param r sam record for the read
* @return true if read's mate is unmapped
* Need a well-formed, consolidated Cigar string so that the left aligning code works properly.
* For example, 1M1M1M1D2M1M --> 3M1D3M
* If the given cigar is empty then the returned cigar will also be empty
* @param c the cigar to consolidate
* @return a cigar with consecutive matching operators merged into single operators.
*/
public static boolean isMateUnmapped(final SAMRecord r) {
if (r.getMateUnmappedFlag()) return true;
// our life would be so much easier if all sam files followed the specs. In reality,
// sam files (including those generated by maq or bwa) miss headers altogether. When
// reading such a SAM file, reference name is set, but since there is no sequence dictionary,
// null is always returned for referenceIndex. Let's be paranoid here, and make sure that
// we do not call the read "unmapped" when it has only reference name set with ref. index missing
// or vice versa.
if ((r.getMateReferenceIndex() != null && r.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX
|| r.getMateReferenceName() != null && !r.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME))
&& r.getMateAlignmentStart() != SAMRecord.NO_ALIGNMENT_START) return false;
return true;
@Requires({"c != null"})
@Ensures({"result != null"})
public static Cigar consolidateCigar( final Cigar c ) {
if( c.isEmpty() ) { return c; }
final Cigar returnCigar = new Cigar();
int sumLength = 0;
for( int iii = 0; iii < c.numCigarElements(); iii++ ) {
sumLength += c.getCigarElement(iii).getLength();
if( iii == c.numCigarElements() - 1 || !c.getCigarElement(iii).getOperator().equals(c.getCigarElement(iii+1).getOperator())) { // at the end so finish the current element
returnCigar.add(new CigarElement(sumLength, c.getCigarElement(iii).getOperator()));
sumLength = 0;
}
}
return returnCigar;
}
/**
@ -565,22 +506,41 @@ public class AlignmentUtils {
return cigar;
}
private static boolean cigarHasZeroSizeElement(Cigar c) {
for (CigarElement ce : c.getCigarElements()) {
/**
* Does one of the elements in cigar have a 0 length?
*
* @param c a non-null cigar
* @return true if any element has 0 size
*/
@Requires("c != null")
protected static boolean cigarHasZeroSizeElement(final Cigar c) {
for (final CigarElement ce : c.getCigarElements()) {
if (ce.getLength() == 0)
return true;
}
return false;
}
private static Cigar cleanUpCigar(Cigar c) {
ArrayList<CigarElement> elements = new ArrayList<CigarElement>(c.numCigarElements() - 1);
for (CigarElement ce : c.getCigarElements()) {
if (ce.getLength() != 0 &&
(elements.size() != 0 || ce.getOperator() != CigarOperator.D)) {
/**
* Clean up the incoming cigar
*
* Removes elements with zero size
* Clips away beginning deletion operators
*
* @param c the cigar string we want to clean up
* @return a newly allocated, cleaned up Cigar
*/
@Requires("c != null")
@Ensures("result != null")
private static Cigar cleanUpCigar(final Cigar c) {
final List<CigarElement> elements = new ArrayList<CigarElement>(c.numCigarElements() - 1);
for (final CigarElement ce : c.getCigarElements()) {
if (ce.getLength() != 0 && (! elements.isEmpty() || ce.getOperator() != CigarOperator.D)) {
elements.add(ce);
}
}
return new Cigar(elements);
}

View File

@ -30,7 +30,7 @@ import org.broad.tribble.Feature;
import org.broad.tribble.NameAwareCodec;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.BlockCompressedInputStream;
import net.sf.samtools.util.BlockCompressedInputStream;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.variantcontext.*;

View File

@ -32,8 +32,7 @@ import org.broadinstitute.sting.BaseTest;
import org.testng.annotations.Test;
import java.io.File;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.*;
/**
* Testing framework for general purpose utilities class.
@ -43,6 +42,25 @@ import java.util.Map;
*/
public class UtilsUnitTest extends BaseTest {
@Test
public void testAppend() {
for ( int leftSize : Arrays.asList(0, 1, 2, 3) ) {
for ( final int rightSize : Arrays.asList(0, 1, 2) ) {
final List<Integer> left = new LinkedList<Integer>();
for ( int i = 0; i < leftSize; i++ ) left.add(i);
final List<Integer> total = new LinkedList<Integer>();
for ( int i = 0; i < leftSize + rightSize; i++ ) total.add(i);
if ( rightSize == 0 )
Assert.assertEquals(Utils.append(left), total);
if ( rightSize == 1 )
Assert.assertEquals(Utils.append(left, leftSize), total);
if ( rightSize == 2 )
Assert.assertEquals(Utils.append(left, leftSize, leftSize + 1), total);
}
}
}
@Test
public void testDupStringNoChars() {

View File

@ -203,6 +203,23 @@ public class BAQUnitTest extends BaseTest {
Assert.assertTrue(baq.calcEpsilon( ref, alt, (byte)i) >= 0.0, "Failed to get baq epsilon range");
}
@Test(enabled = true)
public void testBAQOverwritesExistingTagWithNull() {
// create a read with a single base off the end of the contig, which cannot be BAQed
final SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, fasta.getSequenceDictionary().getSequence("chr1").getSequenceLength() + 1, 1);
read.setReadBases(new byte[] {(byte) 'A'});
read.setBaseQualities(new byte[] {(byte) 20});
read.setCigarString("1M");
read.setAttribute("BQ", "A");
// try to BAQ and tell it to RECALCULATE AND ADD_TAG
BAQ baq = new BAQ(1e-3, 0.1, 7, (byte)4, false);
baq.baqRead(read, fasta, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.ADD_TAG);
// did we remove the existing tag?
Assert.assertTrue(read.getAttribute("BQ") == null);
}
public void testBAQ(BAQTest test, boolean lookupWithFasta) {
BAQ baqHMM = new BAQ(1e-3, 0.1, 7, (byte)4, false); // matches current samtools parameters

View File

@ -25,13 +25,16 @@
package org.broadinstitute.sting.utils.sam;
import junit.framework.Assert;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.*;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.Utils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
public class AlignmentUtilsUnitTest {
private SAMFileHeader header;
@ -121,4 +124,205 @@ public class AlignmentUtilsUnitTest {
start,
ArtificialSAMUtils.DEFAULT_READ_LENGTH);
}
private final List<List<CigarElement>> makeCigarElementCombinations() {
// this functionality can be adapted to provide input data for whatever you might want in your data
final List<CigarElement> cigarElements = new LinkedList<CigarElement>();
for ( final int size : Arrays.asList(0, 10) ) {
for ( final CigarOperator op : CigarOperator.values() ) {
cigarElements.add(new CigarElement(size, op));
}
}
final List<List<CigarElement>> combinations = new LinkedList<List<CigarElement>>();
for ( final int nElements : Arrays.asList(1, 2, 3) ) {
combinations.addAll(Utils.makePermutations(cigarElements, nElements, true));
}
return combinations;
}
@DataProvider(name = "NumAlignedBasesCountingSoftClips")
public Object[][] makeNumAlignedBasesCountingSoftClips() {
List<Object[]> tests = new ArrayList<Object[]>();
final EnumSet<CigarOperator> alignedToGenome = EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X, CigarOperator.S);
for ( final List<CigarElement> elements : makeCigarElementCombinations() ) {
int n = 0;
for ( final CigarElement elt : elements ) n += alignedToGenome.contains(elt.getOperator()) ? elt.getLength() : 0;
tests.add(new Object[]{new Cigar(elements), n});
}
tests.add(new Object[]{null, 0});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "NumAlignedBasesCountingSoftClips")
public void testNumAlignedBasesCountingSoftClips(final Cigar cigar, final int expected) {
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, cigar == null ? 10 : cigar.getReadLength());
read.setCigar(cigar);
Assert.assertEquals(AlignmentUtils.getNumAlignedBasesCountingSoftClips(read), expected, "Cigar " + cigar + " failed NumAlignedBasesCountingSoftClips");
}
@DataProvider(name = "CigarHasZeroElement")
public Object[][] makeCigarHasZeroElement() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( final List<CigarElement> elements : makeCigarElementCombinations() ) {
boolean hasZero = false;
for ( final CigarElement elt : elements ) hasZero = hasZero || elt.getLength() == 0;
tests.add(new Object[]{new Cigar(elements), hasZero});
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "CigarHasZeroElement")
public void testCigarHasZeroSize(final Cigar cigar, final boolean hasZero) {
Assert.assertEquals(AlignmentUtils.cigarHasZeroSizeElement(cigar), hasZero, "Cigar " + cigar.toString() + " failed cigarHasZeroSizeElement");
}
@DataProvider(name = "NumHardClipped")
public Object[][] makeNumHardClipped() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( final List<CigarElement> elements : makeCigarElementCombinations() ) {
int n = 0;
for ( final CigarElement elt : elements ) n += elt.getOperator() == CigarOperator.H ? elt.getLength() : 0;
tests.add(new Object[]{new Cigar(elements), n});
}
tests.add(new Object[]{null, 0});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "NumHardClipped")
public void testNumHardClipped(final Cigar cigar, final int expected) {
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, cigar == null ? 10 : cigar.getReadLength());
read.setCigar(cigar);
Assert.assertEquals(AlignmentUtils.getNumHardClippedBases(read), expected, "Cigar " + cigar + " failed num hard clips");
}
@DataProvider(name = "NumAlignedBlocks")
public Object[][] makeNumAlignedBlocks() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( final List<CigarElement> elements : makeCigarElementCombinations() ) {
int n = 0;
for ( final CigarElement elt : elements ) {
switch ( elt.getOperator() ) {
case M:case X:case EQ: n++; break;
default: break;
}
}
tests.add(new Object[]{new Cigar(elements), n});
}
tests.add(new Object[]{null, 0});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "NumAlignedBlocks")
public void testNumAlignedBlocks(final Cigar cigar, final int expected) {
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, cigar == null ? 10 : cigar.getReadLength());
read.setCigar(cigar);
Assert.assertEquals(AlignmentUtils.getNumAlignmentBlocks(read), expected, "Cigar " + cigar + " failed NumAlignedBlocks");
}
@Test
public void testConsolidateCigar() {
{
//1M1M1M1D2M1M --> 3M1D3M
List<CigarElement> list = new ArrayList<CigarElement>();
list.add( new CigarElement(1, CigarOperator.M));
list.add( new CigarElement(1, CigarOperator.M));
list.add( new CigarElement(1, CigarOperator.M));
list.add( new CigarElement(1, CigarOperator.D));
list.add( new CigarElement(2, CigarOperator.M));
list.add( new CigarElement(1, CigarOperator.M));
Cigar unconsolidatedCigar = new Cigar(list);
list.clear();
list.add( new CigarElement(3, CigarOperator.M));
list.add( new CigarElement(1, CigarOperator.D));
list.add( new CigarElement(3, CigarOperator.M));
Cigar consolidatedCigar = new Cigar(list);
Assert.assertEquals(consolidatedCigar.toString(), AlignmentUtils.consolidateCigar(unconsolidatedCigar).toString());
}
{
//6M6M6M --> 18M
List<CigarElement> list = new ArrayList<CigarElement>();
list.add( new CigarElement(6, CigarOperator.M));
list.add( new CigarElement(6, CigarOperator.M));
list.add( new CigarElement(6, CigarOperator.M));
Cigar unconsolidatedCigar = new Cigar(list);
list.clear();
list.add( new CigarElement(18, CigarOperator.M));
Cigar consolidatedCigar = new Cigar(list);
Assert.assertEquals(consolidatedCigar.toString(), AlignmentUtils.consolidateCigar(unconsolidatedCigar).toString());
}
}
@DataProvider(name = "SoftClipsDataProvider")
public Object[][] makeSoftClipsDataProvider() {
List<Object[]> tests = new ArrayList<Object[]>();
// this functionality can be adapted to provide input data for whatever you might want in your data
for ( final int lengthOfLeftClip : Arrays.asList(0, 1, 10) ) {
for ( final int lengthOfRightClip : Arrays.asList(0, 1, 10) ) {
for ( final int qualThres : Arrays.asList(10, 20, 30) ) {
for ( final String middleOp : Arrays.asList("M", "D") ) {
for ( final int matchSize : Arrays.asList(0, 1, 10) ) {
final byte[] left = makeQualArray(lengthOfLeftClip, qualThres);
final byte[] right = makeQualArray(lengthOfRightClip, qualThres);
int n = 0;
for ( int i = 0; i < left.length; i++ ) n += left[i] > qualThres ? 1 : 0;
for ( int i = 0; i < right.length; i++ ) n += right[i] > qualThres ? 1 : 0;
tests.add(new Object[]{left, matchSize, middleOp, right, qualThres, n});
}
}
}
}
}
return tests.toArray(new Object[][]{});
}
private byte[] makeQualArray(final int length, final int qualThreshold) {
final byte[] array = new byte[length];
for ( int i = 0; i < array.length; i++ )
array[i] = (byte)(qualThreshold + ( i % 2 == 0 ? 1 : - 1 ));
return array;
}
@Test(dataProvider = "SoftClipsDataProvider")
public void testSoftClipsData(final byte[] qualsOfSoftClipsOnLeft, final int middleSize, final String middleOp, final byte[] qualOfSoftClipsOnRight, final int qualThreshold, final int numExpected) {
final int readLength = (middleOp.equals("D") ? 0 : middleSize) + qualOfSoftClipsOnRight.length + qualsOfSoftClipsOnLeft.length;
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, readLength);
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
final byte[] matchBytes = middleOp.equals("D") ? new byte[]{} : Utils.dupBytes((byte)30, middleSize);
final byte[] quals = ArrayUtils.addAll(ArrayUtils.addAll(qualsOfSoftClipsOnLeft, matchBytes), qualOfSoftClipsOnRight);
// set the read's bases and quals
read.setReadBases(bases);
read.setBaseQualities(quals);
final StringBuilder cigar = new StringBuilder();
if (qualsOfSoftClipsOnLeft.length > 0 ) cigar.append(qualsOfSoftClipsOnLeft.length + "S");
if (middleSize > 0 ) cigar.append(middleSize + middleOp);
if (qualOfSoftClipsOnRight.length > 0 ) cigar.append(qualOfSoftClipsOnRight.length + "S");
read.setCigarString(cigar.toString());
final int actual = AlignmentUtils.calcNumHighQualitySoftClips(read, (byte) qualThreshold);
Assert.assertEquals(actual, numExpected, "Wrong number of soft clips detected for read " + read.getSAMString());
}
}

View File

@ -2,26 +2,9 @@
<package name="picard-private-parts">
<executable name="picard-private-parts">
<dependencies>
<class name="edu.mit.broad.picard.genotype.DiploidGenotype" />
<class name="edu.mit.broad.picard.genotype.geli.GeliFileReader" />
<class name="edu.mit.broad.picard.genotype.geli.GeliFileWriter" />
<class name="edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods" />
<class name="edu.mit.broad.picard.util.PasteParser" />
<class name="edu.mit.broad.picard.util.PicardAggregationFsUtil" />
<class name="edu.mit.broad.picard.util.PicardFileSystemUtil" />
<class name="edu.mit.broad.picard.variation.KnownVariantCodecV2" />
<class name="edu.mit.broad.picard.variation.KnownVariantCodec" />
<class name="edu.mit.broad.picard.variation.KnownVariantFileHeader" />
<class name="edu.mit.broad.picard.variation.VariantType" />
<class name="edu.mit.broad.picard.variation.ObsoleteKnownVariantCodecV1" />
<class name="edu.mit.broad.picard.variation.AbstractKnownVariantCodec" />
<class name="edu.mit.broad.picard.variation.KnownVariantIterator" />
<class name="edu.mit.broad.picard.variation.KnownVariantCodecFactory" />
<class name="edu.mit.broad.picard.variation.KnownVariant" />
<class name="net.sf.picard.util.IlluminaUtil" />
<class name="edu.mit.broad.picard.genotype.fingerprint.v2.FingerprintingSummaryMetrics" />
<class name="edu.mit.broad.picard.genotype.concordance.DbSnpMatchMetrics" />
</dependencies>
</executable>
</package>

View File

@ -1,3 +1,3 @@
<ivy-module version="1.0">
<info organisation="edu.mit.broad" module="picard-private-parts" revision="2375" status="integration" publication="20120502094400" />
<info organisation="edu.mit.broad" module="picard-private-parts" revision="2662" status="integration" publication="" />
</ivy-module>

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="picard" revision="1.67.1197" status="release" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="picard" revision="1.84.1337" status="release" />
</ivy-module>

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="sam" revision="1.67.1197" status="release" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="sam" revision="1.84.1337" status="release" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="1.84.1337" status="integration" />
</ivy-module>

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="119" status="integration" />
</ivy-module>