Move SomaticIndelDetector and associated tools and libraries into private/andrey package

-- Intermediate commit on the way to archiving SomaticIndelDetector and other tools.
-- SomaticIndelDetector, PairMaker and RemapAlignments tools have been refactored into the private andrey package.  All utility classes refactored into here as well.  At this point, the SomaticIndelDetector builds in this version of the GATK.
-- Subsequent commit will put this code into the archive so it no longer builds in the GATK
This commit is contained in:
Mark DePristo 2012-12-29 14:34:08 -05:00
parent 5f84a4ad82
commit 38cc496de8
4 changed files with 0 additions and 2770 deletions

View File

@ -1,231 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.collections;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
/** This class, closely resembling a deque (except that it is not dynamically grown),
* provides an object with array-like interface and efficient
* implementation of shift operation. Use this class when some kind of sliding window is required:
* e.g. an array (window) is populated from some stream of data, and then the window is shifted.
* If most of the data in the window remains the same so that only a few old elements sjould be popped from
* and a few new elements pushed onto the array, both re-populating the whole array from the data and
* shifting a regular array would be grossly inefficient. Instead, shiftData(int N) method of circular array
* efficiently pops out N first elements and makes last N elements available.
*
* Consider an example of reading a character stream A,B,C,D,....,Z into an array with requirement of keeping
* last 5 letters. First, we would read first 5 letters same way as we would with a regular array:<br><br>
*
* <code>
* CircularArray a(5);<br>
* for ( int i = 0; i < 5; i++ ) a.set(i, readChar());<br>
* </code>
* <br>
* and then on the arrival of each next character we shift the array:<br><br>
*
* <code>
* a.shiftData(1); a.set(4, readChar() );<br>
* </code>
* <br>
* After the lines from the above example are executed, the array will <i>logically</i> look as:<br>
*
* B,C,D,E,F,<br><br>
*
* e.g. as if we had a regular array, shifted it one element down and added new element on the top.
*
*
* @author asivache
*
*/
public class CircularArray <T> {
private Object[] data ;
private int offset;
/** Creates an array of fixed length */
public CircularArray(int length) {
if ( length <= 0 ) throw new ReviewedStingException("CircularArray length must be positive. Passed: "+length);
data = new Object[length];
offset = 0;
}
/** Returns length of the array */
public int length() {
return data.length;
}
/** Gets i-th element of the array
*
* @throws IndexOutOfBoundsException if value of i is illegal
*/
@SuppressWarnings("unchecked")
public T get(int i) {
if ( i < 0 || i >= data.length )
throw new IndexOutOfBoundsException("Length of CircularArray: "+data.length+"; element requested: "+i);
return (T)(data [ ( offset + i ) % data.length ]);
}
/** Sets i-th element of the array to the specified value.
*
* @throws IndexOutOfBoundsException if value of i is illegal
*/
public void set(int i, T value) {
if ( i < 0 || i >= data.length )
throw new IndexOutOfBoundsException("Length of CircularArray: "+data.length+"; set element request at: "+i);
data [ ( offset + i ) % data.length ] = value;
}
/** Set all elements to null.
*
*/
public void clear() {
for ( int i = 0 ; i < data.length ; i++ ) data[i] = null;
offset = 0;
}
/** Efficient shift-down of the array data. After this operation, array.get(0), array.get(1), etc will
* be returning what array.get(shift), array.get(shift+1),... were returning before the shift was performed,
* and last shift elements of the array will be reset to 0.
* @param shift
*/
public void shiftData(int shift) {
if ( shift >= data.length ) {
// if we shift by more than the length of stored data, we lose
// all that data completely, so we just re-initialize the array.
// This is not the operating mode CircularArray is intended for
// but we can handle it, just in case.
for ( int i = 0 ; i < data.length ; i++ ) data[i] = null;
offset = 0;
return;
}
// shift < data.length, so at least some data should be preserved
final int newOffset = ( offset+shift ) % data.length;
if ( newOffset < offset ) {
// wrap-around!
for ( int i = offset ; i < data.length ; i++ ) data[i] = null;
for ( int i = 0; i < newOffset ; i++ ) data[i] = null;
} else {
for ( int i = offset ; i < newOffset ; i++ ) data[i] = null;
}
offset = newOffset;
}
/** Implements primitive int type-based circular array. See CircularArray for details.
*
* @author asivache
*
*/
public static class Int {
private int [] data ;
private int offset;
/** Creates an array of fixed length */
public Int(int length) {
if ( length <= 0 ) throw new ReviewedStingException("CircularArray length must be positive. Passed: "+length);
data = new int[length]; // automaticaly initialized to zeros
offset = 0;
}
/** Returns length of the array */
public int length() {
return data.length;
}
/** Gets i-th element of the array
*
* @throws IndexOutOfBoundsException if value of i is illegal
*/
public int get(int i) {
if ( i < 0 || i >= data.length )
throw new IndexOutOfBoundsException("Length of CircularArray: "+data.length+"; element requested: "+i);
return data [ ( offset + i ) % data.length ];
}
/** Sets i-th element of the array to the specified value.
*
* @throws IndexOutOfBoundsException if value of i is illegal
*/
public void set(int i, int value) {
if ( i < 0 || i >= data.length )
throw new IndexOutOfBoundsException("Length of CircularArray: "+data.length+"; set element request at: "+i);
data [ ( offset + i ) % data.length ] = value;
}
/** Increments i-th element of the array by the specified value (value can be negative).
*
* @throws IndexOutOfBoundsException if i is illegal
*/
public void increment(int i, int value) {
if ( i < 0 || i >= data.length )
throw new IndexOutOfBoundsException("Length of CircularArray: "+data.length+"; increment element request at: "+i);
data [ ( offset + i ) % data.length ] += value;
}
/** Set all elements to 0.
*
*/
public void clear() {
for ( int i = 0 ; i < data.length ; i++ ) data[i] = 0;
offset = 0;
}
/** Efficient shift-down of the array data. After this operation, array.get(0), array.get(1), etc will
* be returning what array.get(shift), array.get(shift+1),... were returning before the shift was performed,
* and last shift elements of the array will be reset to 0.
* @param shift
*/
public void shiftData(int shift) {
if ( shift >= data.length ) {
// if we shift by more than the length of stored data, we lose
// all that data completely, so we just re-initialize the array.
// This is not the operating mode CircularArray is intended for
// but we can handle it, just in case.
for ( int i = 0 ; i < data.length ; i++ ) data[i] = 0;
offset = 0;
return;
}
// shift < data.length, so at least some data should be preserved
final int newOffset = ( offset+shift ) % data.length;
if ( newOffset < offset ) {
// wrap-around!
for ( int i = offset ; i < data.length ; i++ ) data[i] = 0;
for ( int i = 0; i < newOffset ; i++ ) data[i] = 0;
} else {
for ( int i = offset ; i < newOffset ; i++ ) data[i] = 0;
}
offset = newOffset;
}
}
}

View File

@ -1,195 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.interval;
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Iterator;
/**
* Created by IntelliJ IDEA.
* User: asivache
* Date: Oct 7, 2010
* Time: 2:40:02 PM
* To change this template use File | Settings | File Templates.
*/
/** This class provides an adapter to Iterator<GenomeLoc> that returns only (parts of) underlying iterator's
* intervals overlapping with specified "master set" of bounding intervals. The underlying iterator must return
* NON-overlapping intervals in coordinate-sorted order, otherwise the behavior is unspecified. If the master set is represented by
* another interval iterator, it should return sorted and NON-overlapping intervals.
*
*/
public class OverlappingIntervalIterator implements Iterator<GenomeLoc> {
PushbackIterator<GenomeLoc> iter = null;
PushbackIterator<GenomeLoc> boundBy = null;
GenomeLoc prefetchedOverlap = null;
GenomeLoc currentBound = null;
GenomeLoc currentInterval = null;
/** Creates new overlapping iterator that will internally traverse <code>intervals</code> and return only
* overlaps of those with set of intervals returned by <code>boundBy</code>.
* @param intervals
* @param boundBy
*/
public OverlappingIntervalIterator(Iterator<GenomeLoc> intervals, Iterator<GenomeLoc> boundBy) {
this.iter = new PushbackIterator<GenomeLoc>(intervals);
this.boundBy = new PushbackIterator<GenomeLoc>(boundBy);
if ( iter.hasNext() && boundBy.hasNext() ) {
currentInterval = iter.next(); // load first interval
currentBound = boundBy.next(); // load first bounding interval
fetchNextOverlap();
}
}
/** Traverses both iterators in sync, until the first overlap between the two is reached. If no overlap is found
* until the end of the either of the two streams, leaves prefetchedOverlap set to null
*/
private void fetchNextOverlap() {
prefetchedOverlap = null;
// System.out.println("Fetching... (interval="+currentInterval+"; bound="+currentBound+")");
while ( currentInterval != null && currentBound != null ) {
if ( currentInterval.isBefore(currentBound) ) {
// System.out.println(currentInterval +" is before "+currentBound );
if ( ! iter.hasNext() ) currentInterval = null;
else currentInterval = iter.next();
continue;
}
if ( currentInterval.isPast(currentBound) ) {
// System.out.println(currentInterval +" is past "+currentBound );
if ( ! boundBy.hasNext() ) currentBound = null;
else currentBound = boundBy.next();
continue;
}
// we are at this point only if currentInterval overlaps with currentBound
prefetchedOverlap = currentInterval.intersect(currentBound);
// System.out.println("Fetched next overlap: "+prefetchedOverlap);
// now we need to advance at least one of the iterators, so that we would not
// call the same overlap again
// however we still do not know if we are done with either current interval or current bound, because
// two special situations are possible:
//
// 1) next interval overlaps with 2) current interval also overlaps with
// the same bounding interval; next bounding interval; note that
// note that in this case next in this case next bound necessarily
// interval necessarily starts before starts before the next interval
// the next bound
//
// curr. int next int. curr. int
// ----- ------ --------------------------
// ------------------- --------- -------------
// curr. bound curr. bound next bound
// To solve this issue we update either only currentInterval or only currentBound to their next value,
// whichever of those next values (intervals) comes first on the reference genome;
// the rest of the traversal to the next overlap will be performed on the next invocation of
// fetchNextOverlap().
advanceToNearest();
break; // now that we computed the overlap and advanced (at least one of) the intervals/bounds to
// the next location, we are done - bail out from the loop.
}
}
private void advanceToNearest() {
if ( ! iter.hasNext() ) {
currentBound = boundBy.hasNext() ? boundBy.next() : null;
} else {
if ( ! boundBy.hasNext() ) currentInterval = iter.hasNext() ? iter.next() : null;
else {
// both intervals and bounds have next value available; let's check which comes first:
GenomeLoc nextInterval = iter.next();
GenomeLoc nextBound = boundBy.next();
if ( nextInterval.compareTo(nextBound) < 0 ) {
currentInterval = nextInterval;
boundBy.pushback(nextBound);
} else {
currentBound = nextBound;
iter.pushback(nextInterval);
}
}
}
}
/**
* Returns <tt>true</tt> if the iteration has more elements. (In other
* words, returns <tt>true</tt> if <tt>next</tt> would return an element
* rather than throwing an exception.)
*
* @return <tt>true</tt> if the iterator has more elements.
*/
public boolean hasNext() {
return prefetchedOverlap != null;
}
/**
* Returns the next element in the iteration.
*
* @return the next element in the iteration.
* @throws java.util.NoSuchElementException
* iteration has no more elements.
*/
public GenomeLoc next() {
if ( prefetchedOverlap == null )
throw new java.util.NoSuchElementException("Illegal call to next(): Overlapping iterator has no more overlaps");
GenomeLoc ret = prefetchedOverlap; // cache current prefetched overlap
fetchNextOverlap(); // prefetch next overlap
return ret ;
}
/**
* Removes from the underlying collection the last element returned by the
* iterator (optional operation). This method can be called only once per
* call to <tt>next</tt>. The behavior of an iterator is unspecified if
* the underlying collection is modified while the iteration is in
* progress in any way other than by calling this method.
*
* @throws UnsupportedOperationException if the <tt>remove</tt>
* operation is not supported by this Iterator.
* @throws IllegalStateException if the <tt>next</tt> method has not
* yet been called, or the <tt>remove</tt> method has already
* been called after the last call to the <tt>next</tt>
* method.
*/
public void remove() {
throw new UnsupportedOperationException("remove() method is not supported by OverlappingIntervalIterator");
//To change body of implemented methods use File | Settings | File Templates.
}
}

View File

@ -236,18 +236,6 @@ public class AlignmentUtils {
return n;
}
public static int getNumAlignedBases(final SAMRecord r) {
int n = 0;
final Cigar cigar = r.getCigar();
if (cigar == null) return 0;
for (final CigarElement e : cigar.getCigarElements())
if (e.getOperator() == CigarOperator.M)
n += e.getLength();
return n;
}
public static int getNumAlignedBasesCountingSoftClips(final SAMRecord r) {
int n = 0;
final Cigar cigar = r.getCigar();
@ -512,47 +500,6 @@ public class AlignmentUtils {
return true;
}
/**
* Returns true is read is mapped and mapped uniquely (Q>0).
*
* @param read
* @return
*/
public static boolean isReadUniquelyMapped(SAMRecord read) {
return (!AlignmentUtils.isReadUnmapped(read)) && read.getMappingQuality() > 0;
}
/**
* Returns the array of base qualitites in the order the bases were read on the machine (i.e. always starting from
* cycle 1). In other words, if the read is unmapped or aligned in the forward direction, the read's own base
* qualities are returned as stored in the SAM record; if the read is aligned in the reverse direction, the array
* of read's base qualitites is inverted (in this case new array is allocated and returned).
*
* @param read
* @return
*/
public static byte[] getQualsInCycleOrder(SAMRecord read) {
if (isReadUnmapped(read) || !read.getReadNegativeStrandFlag()) return read.getBaseQualities();
return Utils.reverse(read.getBaseQualities());
}
/**
* Returns the array of original base qualitites (before recalibration) in the order the bases were read on the machine (i.e. always starting from
* cycle 1). In other words, if the read is unmapped or aligned in the forward direction, the read's own base
* qualities are returned as stored in the SAM record; if the read is aligned in the reverse direction, the array
* of read's base qualitites is inverted (in this case new array is allocated and returned). If no original base qualities
* are available this method will throw a runtime exception.
*
* @param read
* @return
*/
public static byte[] getOriginalQualsInCycleOrder(SAMRecord read) {
if (isReadUnmapped(read) || !read.getReadNegativeStrandFlag()) return read.getOriginalBaseQualities();
return Utils.reverse(read.getOriginalBaseQualities());
}
/**
* Takes the alignment of the read sequence <code>readSeq</code> to the reference sequence <code>refSeq</code>
* starting at 0-based position <code>refIndex</code> on the <code>refSeq</code> and specified by its <code>cigar</code>.