Separated out the DoC calculations from the XHMM pipeline, so that CalcDepthOfCoverage can be used for calculating joint coverage on a per-base accounting over multiple samples (e.g., family samples)
This commit is contained in:
parent
449b89bd34
commit
0b717e2e2e
|
|
@ -1,143 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.RODRecordListImpl;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.LinkedList;
|
||||
import java.util.ListIterator;
|
||||
|
||||
/**
|
||||
* Key algorithmic helper for ReadBasedReferenceOrderedData
|
||||
*
|
||||
* Takes a single iterator of features, and provides a single capability that returns
|
||||
* the list of RODs that overlap an interval. Allows sequential getOverlapping calls
|
||||
* from intervals provided that these intervals always have increasing getStart() values.
|
||||
*
|
||||
*/
|
||||
class IntervalOverlappingRODsFromStream {
|
||||
/**
|
||||
* Only held for QC purposes
|
||||
*/
|
||||
GenomeLoc lastQuery = null;
|
||||
|
||||
private final String name;
|
||||
private final LinkedList<GATKFeature> currentFeatures = new LinkedList<GATKFeature>();
|
||||
private final PeekableIterator<RODRecordList> futureFeatures;
|
||||
|
||||
/**
|
||||
* Create a new IntervalOverlappingRODsFromStream that reads elements from futureFeatures and
|
||||
* returns RODRecordLists having name
|
||||
*
|
||||
* @param name
|
||||
* @param futureFeatures
|
||||
*/
|
||||
IntervalOverlappingRODsFromStream(final String name, final PeekableIterator<RODRecordList> futureFeatures) {
|
||||
if ( futureFeatures == null ) throw new IllegalArgumentException("futureFeatures cannot be null");
|
||||
|
||||
this.name = name;
|
||||
this.futureFeatures = futureFeatures;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of RODs overlapping loc from this stream of RODs.
|
||||
*
|
||||
* Sequential calls to this function must obey the rule that loc2.getStart >= loc1.getStart
|
||||
*
|
||||
* @param loc the interval to query
|
||||
* @return a non-null RODRecordList containing the overlapping RODs, which may be empty
|
||||
*/
|
||||
@Ensures({"overlaps(loc, result)",
|
||||
"! futureFeatures.hasNext() || futureFeatures.peek().getLocation().isPast(loc)",
|
||||
"result != null"})
|
||||
public RODRecordList getOverlapping(final GenomeLoc loc) {
|
||||
if ( lastQuery != null && loc.getStart() < lastQuery.getStart() )
|
||||
throw new IllegalArgumentException(String.format("BUG: query interval (%s) starts before the previous interval %s", loc, lastQuery));
|
||||
|
||||
trimCurrentFeaturesToLoc(loc);
|
||||
readOverlappingFutureFeatures(loc);
|
||||
return new RODRecordListImpl(name, subsetToOverlapping(loc, currentFeatures), loc);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* For contract assurance. Checks that all bindings in loc overlap
|
||||
*
|
||||
* @param loc
|
||||
* @param bindings
|
||||
* @return
|
||||
*/
|
||||
@Requires({"loc != null", "bindings != null"})
|
||||
private boolean overlaps(final GenomeLoc loc, final RODRecordList bindings) {
|
||||
for ( final GATKFeature feature : bindings )
|
||||
if ( ! feature.getLocation().overlapsP(loc) )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subset the features in all to those that overlap with loc
|
||||
*
|
||||
* The current features list contains everything read that cannot be thrown away yet, but not
|
||||
* everything in there necessarily overlaps with loc. Subset to just those that do overlap
|
||||
*
|
||||
* @param loc the location that features must overlap
|
||||
* @param all the list of all features
|
||||
* @return a subset of all that overlaps with loc
|
||||
*/
|
||||
@Requires({"loc != null", "all != null"})
|
||||
@Ensures("result.size() <= all.size()")
|
||||
private Collection<GATKFeature> subsetToOverlapping(final GenomeLoc loc, final Collection<GATKFeature> all) {
|
||||
final LinkedList<GATKFeature> overlapping = new LinkedList<GATKFeature>();
|
||||
for ( final GATKFeature feature : all )
|
||||
if ( feature.getLocation().overlapsP(loc) )
|
||||
overlapping.add(feature);
|
||||
return overlapping;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update function. Remove all elements of currentFeatures that end before loc
|
||||
*
|
||||
* @param loc the location to use
|
||||
*/
|
||||
@Requires("loc != null")
|
||||
@Ensures("currentFeatures.size() <= old(currentFeatures.size())")
|
||||
private void trimCurrentFeaturesToLoc(final GenomeLoc loc) {
|
||||
final ListIterator<GATKFeature> it = currentFeatures.listIterator();
|
||||
while ( it.hasNext() ) {
|
||||
final GATKFeature feature = it.next();
|
||||
if ( feature.getLocation().isBefore(loc) )
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update function: Read all elements from futureFeatures that overlap with loc
|
||||
*
|
||||
* Stops at the first element that starts before the end of loc, or the stream empties
|
||||
*
|
||||
* @param loc
|
||||
*/
|
||||
@Requires("loc != null")
|
||||
@Ensures("currentFeatures.size() >= old(currentFeatures.size())")
|
||||
private void readOverlappingFutureFeatures(final GenomeLoc loc) {
|
||||
while ( futureFeatures.hasNext() ) {
|
||||
final GenomeLoc nextLoc = futureFeatures.peek().getLocation();
|
||||
if ( nextLoc.isBefore(loc) ) {
|
||||
futureFeatures.next(); // next rod element is before loc, throw it away and keep looking
|
||||
} else if ( nextLoc.isPast(loc) ) {
|
||||
break; // next element is past loc, stop looking but don't pop it
|
||||
} else if ( nextLoc.overlapsP(loc) ) {
|
||||
// add overlapping elements to our current features, removing from stream
|
||||
for ( final GATKFeature feature : futureFeatures.next() ) {
|
||||
currentFeatures.add(feature);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
/**
|
||||
* Type of downsampling method to invoke.
|
||||
*
|
||||
* @author hanna
|
||||
* @version 0.1
|
||||
*/
|
||||
|
||||
public enum DownsampleType {
|
||||
NONE,
|
||||
ALL_READS,
|
||||
BY_SAMPLE
|
||||
}
|
||||
|
|
@ -1,153 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
/**
|
||||
* Describes the method for downsampling reads at a given locus.
|
||||
*/
|
||||
|
||||
public class DownsamplingMethod {
|
||||
/**
|
||||
* Type of downsampling to perform.
|
||||
*/
|
||||
public final DownsampleType type;
|
||||
|
||||
/**
|
||||
* Actual downsampling target is specified as an integer number of reads.
|
||||
*/
|
||||
public final Integer toCoverage;
|
||||
|
||||
/**
|
||||
* Actual downsampling target is specified as a fraction of total available reads.
|
||||
*/
|
||||
public final Double toFraction;
|
||||
|
||||
/**
|
||||
* Use the new experimental downsampling?
|
||||
*/
|
||||
public final boolean useExperimentalDownsampling;
|
||||
|
||||
/**
|
||||
* Expresses no downsampling applied at all.
|
||||
*/
|
||||
public static final DownsamplingMethod NONE = new DownsamplingMethod(DownsampleType.NONE,null,null,false);
|
||||
|
||||
/**
|
||||
* Default type to use if no type is specified
|
||||
*/
|
||||
public static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
|
||||
|
||||
/**
|
||||
* Default target coverage for locus-based traversals
|
||||
*/
|
||||
public static int DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000;
|
||||
|
||||
public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction, boolean useExperimentalDownsampling ) {
|
||||
this.type = type != null ? type : DEFAULT_DOWNSAMPLING_TYPE;
|
||||
this.toCoverage = toCoverage;
|
||||
this.toFraction = toFraction;
|
||||
this.useExperimentalDownsampling = useExperimentalDownsampling;
|
||||
|
||||
if ( type == DownsampleType.NONE ) {
|
||||
toCoverage = null;
|
||||
toFraction = null;
|
||||
}
|
||||
|
||||
validate();
|
||||
}
|
||||
|
||||
private void validate() {
|
||||
// Can't leave toFraction and toCoverage null unless type is NONE
|
||||
if ( type != DownsampleType.NONE && toFraction == null && toCoverage == null )
|
||||
throw new UserException.CommandLineException("Must specify either toFraction or toCoverage when downsampling.");
|
||||
|
||||
// Fraction and coverage cannot both be specified.
|
||||
if ( toFraction != null && toCoverage != null )
|
||||
throw new UserException.CommandLineException("Downsampling coverage and fraction are both specified. Please choose only one.");
|
||||
|
||||
// toCoverage must be > 0 when specified
|
||||
if ( toCoverage != null && toCoverage <= 0 ) {
|
||||
throw new UserException.CommandLineException("toCoverage must be > 0 when downsampling to coverage");
|
||||
}
|
||||
|
||||
// toFraction must be >= 0.0 and <= 1.0 when specified
|
||||
if ( toFraction != null && (toFraction < 0.0 || toFraction > 1.0) ) {
|
||||
throw new UserException.CommandLineException("toFraction must be >= 0.0 and <= 1.0 when downsampling to a fraction of reads");
|
||||
}
|
||||
|
||||
// Some restrictions only exist for the old downsampling implementation:
|
||||
if ( ! useExperimentalDownsampling ) {
|
||||
// By sample downsampling does not work with a fraction of reads in the old downsampling implementation
|
||||
if( type == DownsampleType.BY_SAMPLE && toFraction != null )
|
||||
throw new UserException.CommandLineException("Cannot downsample to fraction with the BY_SAMPLE method");
|
||||
}
|
||||
|
||||
// Some restrictions only exist for the new downsampling implementation:
|
||||
if ( useExperimentalDownsampling ) {
|
||||
if ( type == DownsampleType.ALL_READS && toCoverage != null ) {
|
||||
throw new UserException.CommandLineException("Cannot downsample to coverage with the ALL_READS method in the experimental downsampling implementation");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder("Downsampling Settings: ");
|
||||
|
||||
if ( type == DownsampleType.NONE ) {
|
||||
builder.append("No downsampling");
|
||||
}
|
||||
else {
|
||||
builder.append(String.format("Method: %s ", type));
|
||||
|
||||
if ( toCoverage != null ) {
|
||||
builder.append(String.format("Target Coverage: %d ", toCoverage));
|
||||
}
|
||||
else {
|
||||
builder.append(String.format("Target Fraction: %.2f ", toFraction));
|
||||
}
|
||||
|
||||
if ( useExperimentalDownsampling ) {
|
||||
builder.append("Using Experimental Downsampling");
|
||||
}
|
||||
}
|
||||
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker, boolean useExperimentalDownsampling ) {
|
||||
if ( walker instanceof LocusWalker || walker instanceof ActiveRegionWalker ) {
|
||||
return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE,
|
||||
null, useExperimentalDownsampling);
|
||||
}
|
||||
else {
|
||||
return new DownsamplingMethod(DownsampleType.NONE, null, null, useExperimentalDownsampling);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Factory for creating FractionalDownsamplers on demand
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class FractionalDownsamplerFactory<T extends SAMRecord> implements ReadsDownsamplerFactory<T> {
|
||||
|
||||
private double fraction;
|
||||
|
||||
public FractionalDownsamplerFactory( double fraction ) {
|
||||
this.fraction = fraction;
|
||||
}
|
||||
|
||||
public ReadsDownsampler<T> newInstance() {
|
||||
return new FractionalDownsampler<T>(fraction);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,212 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Leveling Downsampler: Given a set of Lists of arbitrary items and a target size, removes items from
|
||||
* the Lists in an even fashion until the total size of all Lists is <= the target size. Leveling
|
||||
* does not occur until all Lists have been submitted and signalEndOfInput() is called.
|
||||
*
|
||||
* The Lists should be LinkedLists for maximum efficiency during item removal, however other
|
||||
* kinds of Lists are also accepted (albeit at a slight performance penalty).
|
||||
*
|
||||
* Since this downsampler extends the Downsampler interface rather than the ReadsDownsampler interface,
|
||||
* the Lists need not contain reads. However this downsampler may not be wrapped within one of the
|
||||
* DownsamplingReadsIterators
|
||||
*
|
||||
* @param <T> the List type representing the stacks to be leveled
|
||||
* @param <E> the type of the elements of each List
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class LevelingDownsampler<T extends List<E>, E> implements Downsampler<T> {
|
||||
|
||||
private int targetSize;
|
||||
|
||||
private List<T> groups;
|
||||
|
||||
private boolean groupsAreFinalized;
|
||||
|
||||
private int numDiscardedItems;
|
||||
|
||||
/**
|
||||
* Construct a LevelingDownsampler
|
||||
*
|
||||
* @param targetSize the sum of the sizes of all individual Lists this downsampler is fed may not exceed
|
||||
* this value -- if it does, items are removed from Lists evenly until the total size
|
||||
* is <= this value
|
||||
*/
|
||||
public LevelingDownsampler( int targetSize ) {
|
||||
this.targetSize = targetSize;
|
||||
clear();
|
||||
reset();
|
||||
}
|
||||
|
||||
public void submit( T item ) {
|
||||
groups.add(item);
|
||||
}
|
||||
|
||||
public void submit( Collection<T> items ){
|
||||
groups.addAll(items);
|
||||
}
|
||||
|
||||
public boolean hasFinalizedItems() {
|
||||
return groupsAreFinalized && groups.size() > 0;
|
||||
}
|
||||
|
||||
public List<T> consumeFinalizedItems() {
|
||||
if ( ! hasFinalizedItems() ) {
|
||||
return new ArrayList<T>();
|
||||
}
|
||||
|
||||
// pass by reference rather than make a copy, for speed
|
||||
List<T> toReturn = groups;
|
||||
clear();
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
public boolean hasPendingItems() {
|
||||
return ! groupsAreFinalized && groups.size() > 0;
|
||||
}
|
||||
|
||||
public T peekFinalized() {
|
||||
return hasFinalizedItems() ? groups.get(0) : null;
|
||||
}
|
||||
|
||||
public T peekPending() {
|
||||
return hasPendingItems() ? groups.get(0) : null;
|
||||
}
|
||||
|
||||
public int getNumberOfDiscardedItems() {
|
||||
return numDiscardedItems;
|
||||
}
|
||||
|
||||
public void signalEndOfInput() {
|
||||
levelGroups();
|
||||
groupsAreFinalized = true;
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
groups = new ArrayList<T>();
|
||||
groupsAreFinalized = false;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
numDiscardedItems = 0;
|
||||
}
|
||||
|
||||
private void levelGroups() {
|
||||
int totalSize = 0;
|
||||
int[] groupSizes = new int[groups.size()];
|
||||
int currentGroupIndex = 0;
|
||||
|
||||
for ( T group : groups ) {
|
||||
groupSizes[currentGroupIndex] = group.size();
|
||||
totalSize += groupSizes[currentGroupIndex];
|
||||
currentGroupIndex++;
|
||||
}
|
||||
|
||||
if ( totalSize <= targetSize ) {
|
||||
return; // no need to eliminate any items
|
||||
}
|
||||
|
||||
// We will try to remove exactly this many items, however we will refuse to allow any
|
||||
// one group to fall below size 1, and so might end up removing fewer items than this
|
||||
int numItemsToRemove = totalSize - targetSize;
|
||||
|
||||
currentGroupIndex = 0;
|
||||
int numConsecutiveUmodifiableGroups = 0;
|
||||
|
||||
// Continue until we've either removed all the items we wanted to, or we can't
|
||||
// remove any more items without violating the constraint that all groups must
|
||||
// be left with at least one item
|
||||
while ( numItemsToRemove > 0 && numConsecutiveUmodifiableGroups < groupSizes.length ) {
|
||||
if ( groupSizes[currentGroupIndex] > 1 ) {
|
||||
groupSizes[currentGroupIndex]--;
|
||||
numItemsToRemove--;
|
||||
numConsecutiveUmodifiableGroups = 0;
|
||||
}
|
||||
else {
|
||||
numConsecutiveUmodifiableGroups++;
|
||||
}
|
||||
|
||||
currentGroupIndex = (currentGroupIndex + 1) % groupSizes.length;
|
||||
}
|
||||
|
||||
// Now we actually go through and reduce each group to its new count as specified in groupSizes
|
||||
currentGroupIndex = 0;
|
||||
for ( T group : groups ) {
|
||||
downsampleOneGroup(group, groupSizes[currentGroupIndex]);
|
||||
currentGroupIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
private void downsampleOneGroup( T group, int numItemsToKeep ) {
|
||||
if ( numItemsToKeep >= group.size() ) {
|
||||
return;
|
||||
}
|
||||
|
||||
numDiscardedItems += group.size() - numItemsToKeep;
|
||||
|
||||
BitSet itemsToKeep = new BitSet(group.size());
|
||||
for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(group.size(), numItemsToKeep) ) {
|
||||
itemsToKeep.set(selectedIndex);
|
||||
}
|
||||
|
||||
int currentIndex = 0;
|
||||
|
||||
// If our group is a linked list, we can remove the desired items in a single O(n) pass with an iterator
|
||||
if ( group instanceof LinkedList ) {
|
||||
Iterator iter = group.iterator();
|
||||
while ( iter.hasNext() ) {
|
||||
iter.next();
|
||||
|
||||
if ( ! itemsToKeep.get(currentIndex) ) {
|
||||
iter.remove();
|
||||
}
|
||||
|
||||
currentIndex++;
|
||||
}
|
||||
}
|
||||
// If it's not a linked list, it's more efficient to copy the desired items into a new list and back rather
|
||||
// than suffer O(n^2) of item shifting
|
||||
else {
|
||||
List<E> keptItems = new ArrayList<E>(numItemsToKeep);
|
||||
|
||||
for ( E item : group ) {
|
||||
if ( itemsToKeep.get(currentIndex) ) {
|
||||
keptItems.add(item);
|
||||
}
|
||||
currentIndex++;
|
||||
}
|
||||
group.clear();
|
||||
group.addAll(keptItems);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,202 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMRecordComparator;
|
||||
import net.sf.samtools.SAMRecordCoordinateComparator;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* StingSAMIterator wrapper around our generic reads downsampler interface
|
||||
* that downsamples reads for each sample independently, and then re-assembles
|
||||
* the reads back into a single merged stream.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class PerSampleDownsamplingReadsIterator implements StingSAMIterator {
|
||||
|
||||
private StingSAMIterator nestedSAMIterator;
|
||||
private ReadsDownsamplerFactory<SAMRecord> downsamplerFactory;
|
||||
private Map<String, ReadsDownsampler<SAMRecord>> perSampleDownsamplers;
|
||||
private PriorityQueue<SAMRecord> orderedDownsampledReadsCache;
|
||||
private SAMRecord nextRead = null;
|
||||
private SAMRecordComparator readComparator = new SAMRecordCoordinateComparator();
|
||||
private SAMRecord earliestPendingRead = null;
|
||||
private ReadsDownsampler<SAMRecord> earliestPendingDownsampler = null;
|
||||
|
||||
// Initial size of our cache of finalized reads
|
||||
private static final int DOWNSAMPLED_READS_INITIAL_CACHE_SIZE = 4096;
|
||||
|
||||
// The number of positional changes that can occur in the read stream before all downsamplers
|
||||
// should be informed of the current position (guards against samples with relatively sparse reads
|
||||
// getting stuck in a pending state):
|
||||
private static final int DOWNSAMPLER_POSITIONAL_UPDATE_INTERVAL = 3; // TODO: experiment with this value
|
||||
|
||||
/**
|
||||
* @param iter wrapped iterator from which this iterator will pull reads
|
||||
* @param downsamplerFactory factory used to create new downsamplers as needed
|
||||
*/
|
||||
public PerSampleDownsamplingReadsIterator( StingSAMIterator iter, ReadsDownsamplerFactory<SAMRecord> downsamplerFactory ) {
|
||||
nestedSAMIterator = iter;
|
||||
this.downsamplerFactory = downsamplerFactory;
|
||||
perSampleDownsamplers = new HashMap<String, ReadsDownsampler<SAMRecord>>();
|
||||
orderedDownsampledReadsCache = new PriorityQueue<SAMRecord>(DOWNSAMPLED_READS_INITIAL_CACHE_SIZE, readComparator);
|
||||
|
||||
advanceToNextRead();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return nextRead != null;
|
||||
}
|
||||
|
||||
public SAMRecord next() {
|
||||
if ( nextRead == null ) {
|
||||
throw new NoSuchElementException("next() called when there are no more items");
|
||||
}
|
||||
|
||||
SAMRecord toReturn = nextRead;
|
||||
advanceToNextRead();
|
||||
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
private void advanceToNextRead() {
|
||||
if ( ! readyToReleaseReads() && ! fillDownsampledReadsCache() ) {
|
||||
nextRead = null;
|
||||
}
|
||||
else {
|
||||
nextRead = orderedDownsampledReadsCache.poll();
|
||||
}
|
||||
}
|
||||
|
||||
private boolean readyToReleaseReads() {
|
||||
if ( orderedDownsampledReadsCache.isEmpty() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return earliestPendingRead == null ||
|
||||
readComparator.compare(orderedDownsampledReadsCache.peek(), earliestPendingRead) <= 0;
|
||||
}
|
||||
|
||||
private void updateEarliestPendingRead( ReadsDownsampler<SAMRecord> currentDownsampler ) {
|
||||
// If there is no recorded earliest pending read and this downsampler has pending items,
|
||||
// then this downsampler's first pending item becomes the new earliest pending read:
|
||||
if ( earliestPendingRead == null && currentDownsampler.hasPendingItems() ) {
|
||||
earliestPendingRead = currentDownsampler.peekPending();
|
||||
earliestPendingDownsampler = currentDownsampler;
|
||||
}
|
||||
// In all other cases, we only need to update the earliest pending read when the downsampler
|
||||
// associated with it experiences a change in its pending reads, since by assuming a sorted
|
||||
// read stream we're assured that each downsampler's earliest pending read will only increase
|
||||
// in genomic position over time.
|
||||
//
|
||||
// TODO: An occasional O(samples) linear search seems like a better option than keeping the downsamplers
|
||||
// TODO: sorted by earliest pending read, which would cost at least O(total_reads * (samples + log(samples))),
|
||||
// TODO: but need to verify this empirically.
|
||||
else if ( currentDownsampler == earliestPendingDownsampler &&
|
||||
(! currentDownsampler.hasPendingItems() || readComparator.compare(currentDownsampler.peekPending(), earliestPendingRead) != 0) ) {
|
||||
|
||||
earliestPendingRead = null;
|
||||
earliestPendingDownsampler = null;
|
||||
for ( ReadsDownsampler<SAMRecord> perSampleDownsampler : perSampleDownsamplers.values() ) {
|
||||
if ( perSampleDownsampler.hasPendingItems() &&
|
||||
(earliestPendingRead == null || readComparator.compare(perSampleDownsampler.peekPending(), earliestPendingRead) < 0) ) {
|
||||
|
||||
earliestPendingRead = perSampleDownsampler.peekPending();
|
||||
earliestPendingDownsampler = perSampleDownsampler;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean fillDownsampledReadsCache() {
|
||||
SAMRecord prevRead = null;
|
||||
int numPositionalChanges = 0;
|
||||
|
||||
// Continue submitting reads to the per-sample downsamplers until the read at the top of the priority queue
|
||||
// can be released without violating global sort order
|
||||
while ( nestedSAMIterator.hasNext() && ! readyToReleaseReads() ) {
|
||||
SAMRecord read = nestedSAMIterator.next();
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
|
||||
ReadsDownsampler<SAMRecord> thisSampleDownsampler = perSampleDownsamplers.get(sampleName);
|
||||
if ( thisSampleDownsampler == null ) {
|
||||
thisSampleDownsampler = downsamplerFactory.newInstance();
|
||||
perSampleDownsamplers.put(sampleName, thisSampleDownsampler);
|
||||
}
|
||||
|
||||
thisSampleDownsampler.submit(read);
|
||||
updateEarliestPendingRead(thisSampleDownsampler);
|
||||
|
||||
if ( prevRead != null && prevRead.getAlignmentStart() != read.getAlignmentStart() ) {
|
||||
numPositionalChanges++;
|
||||
}
|
||||
|
||||
// If the number of times we've changed position exceeds a certain threshold, inform all
|
||||
// downsamplers of the current position in the read stream. This is to prevent downsamplers
|
||||
// for samples with sparser reads than others from getting stuck too long in a pending state.
|
||||
if ( numPositionalChanges > DOWNSAMPLER_POSITIONAL_UPDATE_INTERVAL ) {
|
||||
for ( ReadsDownsampler<SAMRecord> perSampleDownsampler : perSampleDownsamplers.values() ) {
|
||||
perSampleDownsampler.signalNoMoreReadsBefore(read);
|
||||
updateEarliestPendingRead(perSampleDownsampler);
|
||||
}
|
||||
}
|
||||
|
||||
prevRead = read;
|
||||
}
|
||||
|
||||
if ( ! nestedSAMIterator.hasNext() ) {
|
||||
for ( ReadsDownsampler<SAMRecord> perSampleDownsampler : perSampleDownsamplers.values() ) {
|
||||
perSampleDownsampler.signalEndOfInput();
|
||||
}
|
||||
earliestPendingRead = null;
|
||||
earliestPendingDownsampler = null;
|
||||
}
|
||||
|
||||
for ( ReadsDownsampler<SAMRecord> perSampleDownsampler : perSampleDownsamplers.values() ) {
|
||||
if ( perSampleDownsampler.hasFinalizedItems() ) {
|
||||
orderedDownsampledReadsCache.addAll(perSampleDownsampler.consumeFinalizedItems());
|
||||
}
|
||||
}
|
||||
|
||||
return readyToReleaseReads();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||
}
|
||||
|
||||
public void close() {
|
||||
nestedSAMIterator.close();
|
||||
}
|
||||
|
||||
public Iterator<SAMRecord> iterator() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,37 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* A ReadsDownsamplerFactory can be used to create an arbitrary number of instances of a particular
|
||||
* downsampler, all sharing the same construction parameters.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public interface ReadsDownsamplerFactory<T extends SAMRecord> {
|
||||
public ReadsDownsampler<T> newInstance();
|
||||
}
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Factory for creating ReservoirDownsamplers on demand
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class ReservoirDownsamplerFactory<T extends SAMRecord> implements ReadsDownsamplerFactory<T> {
|
||||
|
||||
private int targetSampleSize;
|
||||
|
||||
public ReservoirDownsamplerFactory( int targetSampleSize ) {
|
||||
this.targetSampleSize = targetSampleSize;
|
||||
}
|
||||
|
||||
public ReadsDownsampler<T> newInstance() {
|
||||
return new ReservoirDownsampler<T>(targetSampleSize);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,169 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Simple Positional Downsampler: Downsample each stack of reads at each alignment start to a size <= a target coverage
|
||||
* using a Reservoir downsampler. Stores only O(target coverage) reads in memory at any given time.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class SimplePositionalDownsampler<T extends SAMRecord> implements ReadsDownsampler<T> {
|
||||
|
||||
private int targetCoverage;
|
||||
|
||||
private ReservoirDownsampler<T> reservoir;
|
||||
|
||||
private int currentContigIndex;
|
||||
|
||||
private int currentAlignmentStart;
|
||||
|
||||
private boolean positionEstablished;
|
||||
|
||||
private boolean unmappedReadsReached;
|
||||
|
||||
private ArrayList<T> finalizedReads;
|
||||
|
||||
private int numDiscardedItems;
|
||||
|
||||
/**
|
||||
* Construct a SimplePositionalDownsampler
|
||||
*
|
||||
* @param targetCoverage Maximum number of reads that may share any given alignment start position
|
||||
*/
|
||||
public SimplePositionalDownsampler( int targetCoverage ) {
|
||||
this.targetCoverage = targetCoverage;
|
||||
reservoir = new ReservoirDownsampler<T>(targetCoverage);
|
||||
finalizedReads = new ArrayList<T>();
|
||||
clear();
|
||||
reset();
|
||||
}
|
||||
|
||||
public void submit( T newRead ) {
|
||||
updatePositionalState(newRead);
|
||||
|
||||
if ( unmappedReadsReached ) { // don't downsample the unmapped reads at the end of the stream
|
||||
finalizedReads.add(newRead);
|
||||
}
|
||||
else {
|
||||
int reservoirPreviouslyDiscardedItems = reservoir.getNumberOfDiscardedItems();
|
||||
reservoir.submit(newRead);
|
||||
numDiscardedItems += reservoir.getNumberOfDiscardedItems() - reservoirPreviouslyDiscardedItems;
|
||||
}
|
||||
}
|
||||
|
||||
public void submit( Collection<T> newReads ) {
|
||||
for ( T read : newReads ) {
|
||||
submit(read);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasFinalizedItems() {
|
||||
return finalizedReads.size() > 0;
|
||||
}
|
||||
|
||||
public List<T> consumeFinalizedItems() {
|
||||
// pass by reference rather than make a copy, for speed
|
||||
List<T> toReturn = finalizedReads;
|
||||
finalizedReads = new ArrayList<T>();
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
public boolean hasPendingItems() {
|
||||
return reservoir.hasFinalizedItems();
|
||||
}
|
||||
|
||||
public T peekFinalized() {
|
||||
return finalizedReads.isEmpty() ? null : finalizedReads.get(0);
|
||||
}
|
||||
|
||||
public T peekPending() {
|
||||
return reservoir.peekFinalized();
|
||||
}
|
||||
|
||||
public int getNumberOfDiscardedItems() {
|
||||
return numDiscardedItems;
|
||||
}
|
||||
|
||||
public void signalEndOfInput() {
|
||||
finalizeReservoir();
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
reservoir.clear();
|
||||
reservoir.reset();
|
||||
finalizedReads.clear();
|
||||
positionEstablished = false;
|
||||
unmappedReadsReached = false;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
numDiscardedItems = 0;
|
||||
}
|
||||
|
||||
public boolean requiresCoordinateSortOrder() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public void signalNoMoreReadsBefore( T read ) {
|
||||
updatePositionalState(read);
|
||||
}
|
||||
|
||||
private void updatePositionalState( T newRead ) {
|
||||
if ( readIsPastCurrentPosition(newRead) ) {
|
||||
if ( reservoir.hasFinalizedItems() ) {
|
||||
finalizeReservoir();
|
||||
}
|
||||
|
||||
setCurrentPosition(newRead);
|
||||
|
||||
if ( newRead.getReadUnmappedFlag() ) {
|
||||
unmappedReadsReached = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void setCurrentPosition( T read ) {
|
||||
currentContigIndex = read.getReferenceIndex();
|
||||
currentAlignmentStart = read.getAlignmentStart();
|
||||
positionEstablished = true;
|
||||
}
|
||||
|
||||
private boolean readIsPastCurrentPosition( T read ) {
|
||||
return ! positionEstablished ||
|
||||
read.getReferenceIndex() > currentContigIndex ||
|
||||
read.getAlignmentStart() > currentAlignmentStart ||
|
||||
(read.getReadUnmappedFlag() && ! unmappedReadsReached);
|
||||
}
|
||||
|
||||
private void finalizeReservoir() {
|
||||
finalizedReads.addAll(reservoir.consumeFinalizedItems());
|
||||
reservoir.reset();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Factory for creating SimplePositionalDownsamplers on demand
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class SimplePositionalDownsamplerFactory<T extends SAMRecord> implements ReadsDownsamplerFactory<T> {
|
||||
|
||||
private int targetCoverage;
|
||||
|
||||
public SimplePositionalDownsamplerFactory( int targetCoverage ) {
|
||||
this.targetCoverage = targetCoverage;
|
||||
}
|
||||
|
||||
public ReadsDownsampler<T> newInstance() {
|
||||
return new SimplePositionalDownsampler<T>(targetCoverage);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
|
||||
public class LegacyDownsampleIterator implements StingSAMIterator {
|
||||
|
||||
StingSAMIterator it;
|
||||
int cutoff;
|
||||
SAMRecord next;
|
||||
|
||||
public LegacyDownsampleIterator(StingSAMIterator it, double fraction) {
|
||||
this.it = it;
|
||||
cutoff = (int)(fraction * 10000);
|
||||
next = getNextRecord();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return next != null;
|
||||
}
|
||||
|
||||
public SAMRecord next() {
|
||||
SAMRecord result = next;
|
||||
next = getNextRecord();
|
||||
return result;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||
}
|
||||
|
||||
private SAMRecord getNextRecord() {
|
||||
while ( true ) {
|
||||
if ( !it.hasNext() )
|
||||
return null;
|
||||
SAMRecord rec = it.next();
|
||||
if ( GenomeAnalysisEngine.getRandomGenerator().nextInt(10000) < cutoff )
|
||||
return rec;
|
||||
}
|
||||
}
|
||||
|
||||
public void close() {
|
||||
it.close();
|
||||
}
|
||||
|
||||
public Iterator<SAMRecord> iterator() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,649 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
|
||||
import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
|
||||
*/
|
||||
public class LocusIteratorByStateExperimental extends LocusIterator {
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// member fields
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Used to create new GenomeLocs.
|
||||
*/
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
private final ArrayList<String> samples;
|
||||
private final ReadStateManager readStates;
|
||||
|
||||
protected static class SAMRecordState {
|
||||
SAMRecord read;
|
||||
int readOffset = -1; // how far are we offset from the start of the read bases?
|
||||
int genomeOffset = -1; // how far are we offset from the alignment start on the genome?
|
||||
|
||||
Cigar cigar = null;
|
||||
int cigarOffset = -1;
|
||||
CigarElement curElement = null;
|
||||
int nCigarElements = 0;
|
||||
|
||||
int cigarElementCounter = -1; // how far are we into a single cigarElement
|
||||
|
||||
// The logical model for generating extended events is as follows: the "record state" implements the traversal
|
||||
// along the reference; thus stepForwardOnGenome() returns on every and only on actual reference bases. This
|
||||
// can be a (mis)match or a deletion (in the latter case, we still return on every individual reference base the
|
||||
// deletion spans). In the extended events mode, the record state also remembers if there was an insertion, or
|
||||
// if the deletion just started *right before* the current reference base the record state is pointing to upon the return from
|
||||
// stepForwardOnGenome(). The next call to stepForwardOnGenome() will clear that memory (as we remember only extended
|
||||
// events immediately preceding the current reference base).
|
||||
|
||||
public SAMRecordState(SAMRecord read) {
|
||||
this.read = read;
|
||||
cigar = read.getCigar();
|
||||
nCigarElements = cigar.numCigarElements();
|
||||
|
||||
//System.out.printf("Creating a SAMRecordState: %s%n", this);
|
||||
}
|
||||
|
||||
public SAMRecord getRead() {
|
||||
return read;
|
||||
}
|
||||
|
||||
/**
|
||||
* What is our current offset in the read's bases that aligns us with the reference genome?
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getReadOffset() {
|
||||
return readOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* What is the current offset w.r.t. the alignment state that aligns us to the readOffset?
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getGenomeOffset() {
|
||||
return genomeOffset;
|
||||
}
|
||||
|
||||
public int getGenomePosition() {
|
||||
return read.getAlignmentStart() + getGenomeOffset();
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation(GenomeLocParser genomeLocParser) {
|
||||
return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
|
||||
}
|
||||
|
||||
public CigarOperator getCurrentCigarOperator() {
|
||||
return curElement.getOperator();
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement);
|
||||
}
|
||||
|
||||
public CigarElement peekForwardOnGenome() {
|
||||
return ( cigarElementCounter + 1 > curElement.getLength() && cigarOffset + 1 < nCigarElements ? cigar.getCigarElement(cigarOffset + 1) : curElement );
|
||||
}
|
||||
|
||||
public CigarElement peekBackwardOnGenome() {
|
||||
return ( cigarElementCounter - 1 == 0 && cigarOffset - 1 > 0 ? cigar.getCigarElement(cigarOffset - 1) : curElement );
|
||||
}
|
||||
|
||||
|
||||
public CigarOperator stepForwardOnGenome() {
|
||||
// we enter this method with readOffset = index of the last processed base on the read
|
||||
// (-1 if we did not process a single base yet); this can be last matching base, or last base of an insertion
|
||||
|
||||
|
||||
if (curElement == null || ++cigarElementCounter > curElement.getLength()) {
|
||||
cigarOffset++;
|
||||
if (cigarOffset < nCigarElements) {
|
||||
curElement = cigar.getCigarElement(cigarOffset);
|
||||
cigarElementCounter = 0;
|
||||
// next line: guards against cigar elements of length 0; when new cigar element is retrieved,
|
||||
// we reenter in order to re-check cigarElementCounter against curElement's length
|
||||
return stepForwardOnGenome();
|
||||
} else {
|
||||
if (curElement != null && curElement.getOperator() == CigarOperator.D)
|
||||
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
|
||||
|
||||
// Reads that contain indels model the genomeOffset as the following base in the reference. Because
|
||||
// we fall into this else block only when indels end the read, increment genomeOffset such that the
|
||||
// current offset of this read is the next ref base after the end of the indel. This position will
|
||||
// model a point on the reference somewhere after the end of the read.
|
||||
genomeOffset++; // extended events need that. Logically, it's legal to advance the genomic offset here:
|
||||
// we do step forward on the ref, and by returning null we also indicate that we are past the read end.
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
boolean done = false;
|
||||
switch (curElement.getOperator()) {
|
||||
case H: // ignore hard clips
|
||||
case P: // ignore pads
|
||||
cigarElementCounter = curElement.getLength();
|
||||
break;
|
||||
case I: // insertion w.r.t. the reference
|
||||
case S: // soft clip
|
||||
cigarElementCounter = curElement.getLength();
|
||||
readOffset += curElement.getLength();
|
||||
break;
|
||||
case D: // deletion w.r.t. the reference
|
||||
if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string
|
||||
throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
|
||||
// should be the same as N case
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
break;
|
||||
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
break;
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
readOffset++;
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator());
|
||||
}
|
||||
|
||||
return done ? curElement.getOperator() : stepForwardOnGenome();
|
||||
}
|
||||
}
|
||||
|
||||
//final boolean DEBUG = false;
|
||||
//final boolean DEBUG2 = false && DEBUG;
|
||||
private ReadProperties readInfo;
|
||||
private AlignmentContext nextAlignmentContext;
|
||||
private boolean performLevelingDownsampling;
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors and other basic operations
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public LocusIteratorByStateExperimental(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples) {
|
||||
this.readInfo = readInformation;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.samples = new ArrayList<String>(samples);
|
||||
this.readStates = new ReadStateManager(samIterator);
|
||||
|
||||
this.performLevelingDownsampling = readInfo.getDownsamplingMethod() != null &&
|
||||
readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
|
||||
readInfo.getDownsamplingMethod().toCoverage != null;
|
||||
|
||||
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
|
||||
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
|
||||
if (this.samples.isEmpty() && samIterator.hasNext()) {
|
||||
throw new IllegalArgumentException("samples list must not be empty");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For testing only. Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list
|
||||
* for the system.
|
||||
*/
|
||||
public final static Collection<String> sampleListForSAMWithoutReadGroups() {
|
||||
List<String> samples = new ArrayList<String>();
|
||||
samples.add(null);
|
||||
return samples;
|
||||
}
|
||||
|
||||
public Iterator<AlignmentContext> iterator() {
|
||||
return this;
|
||||
}
|
||||
|
||||
public void close() {
|
||||
//this.it.close();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
lazyLoadNextAlignmentContext();
|
||||
return (nextAlignmentContext != null);
|
||||
//if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
|
||||
}
|
||||
|
||||
private GenomeLoc getLocation() {
|
||||
return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// next() routine and associated collection operations
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
public AlignmentContext next() {
|
||||
lazyLoadNextAlignmentContext();
|
||||
if (!hasNext())
|
||||
throw new NoSuchElementException("LocusIteratorByState: out of elements.");
|
||||
AlignmentContext currentAlignmentContext = nextAlignmentContext;
|
||||
nextAlignmentContext = null;
|
||||
return currentAlignmentContext;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the next alignment context from the given state. Note that this is implemented as a lazy load method.
|
||||
* nextAlignmentContext MUST BE null in order for this method to advance to the next entry.
|
||||
*/
|
||||
private void lazyLoadNextAlignmentContext() {
|
||||
while (nextAlignmentContext == null && readStates.hasNext()) {
|
||||
readStates.collectPendingReads();
|
||||
|
||||
final GenomeLoc location = getLocation();
|
||||
final Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
|
||||
|
||||
// TODO: How can you determine here whether the current pileup has been downsampled?
|
||||
boolean hasBeenSampled = false;
|
||||
|
||||
for (final String sample : samples) {
|
||||
final Iterator<SAMRecordState> iterator = readStates.iterator(sample);
|
||||
final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
|
||||
|
||||
int size = 0; // number of elements in this sample's pileup
|
||||
int nDeletions = 0; // number of deletions in this sample's pileup
|
||||
int nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0)
|
||||
|
||||
while (iterator.hasNext()) {
|
||||
final SAMRecordState state = iterator.next(); // state object with the read/offset information
|
||||
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
|
||||
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
|
||||
final CigarElement nextElement = state.peekForwardOnGenome(); // next cigar element
|
||||
final CigarElement lastElement = state.peekBackwardOnGenome(); // last cigar element
|
||||
final boolean isSingleElementCigar = nextElement == lastElement;
|
||||
final CigarOperator nextOp = nextElement.getOperator(); // next cigar operator
|
||||
final CigarOperator lastOp = lastElement.getOperator(); // last cigar operator
|
||||
int readOffset = state.getReadOffset(); // the base offset on this read
|
||||
|
||||
final boolean isBeforeDeletion = nextOp == CigarOperator.DELETION;
|
||||
final boolean isAfterDeletion = lastOp == CigarOperator.DELETION;
|
||||
final boolean isBeforeInsertion = nextOp == CigarOperator.INSERTION;
|
||||
final boolean isAfterInsertion = lastOp == CigarOperator.INSERTION && !isSingleElementCigar;
|
||||
final boolean isNextToSoftClip = nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart());
|
||||
|
||||
int nextElementLength = nextElement.getLength();
|
||||
|
||||
if (op == CigarOperator.N) // N's are never added to any pileup
|
||||
continue;
|
||||
|
||||
if (op == CigarOperator.D) {
|
||||
// TODO -- LIBS is totally busted for deletions so that reads with Ds right before Is in their CIGAR are broken; must fix
|
||||
if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so
|
||||
pile.add(new PileupElement(read, readOffset, true, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, nextOp == CigarOperator.D ? nextElementLength : -1));
|
||||
size++;
|
||||
nDeletions++;
|
||||
if (read.getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!filterBaseInRead(read, location.getStart())) {
|
||||
String insertedBaseString = null;
|
||||
if (nextOp == CigarOperator.I) {
|
||||
final int insertionOffset = isSingleElementCigar ? 0 : 1;
|
||||
// TODO -- someone please implement a better fix for the single element insertion CIGAR!
|
||||
if (isSingleElementCigar)
|
||||
readOffset -= (nextElement.getLength() - 1); // LIBS has passed over the insertion bases!
|
||||
insertedBaseString = new String(Arrays.copyOfRange(read.getReadBases(), readOffset + insertionOffset, readOffset + insertionOffset + nextElement.getLength()));
|
||||
}
|
||||
|
||||
pile.add(new PileupElement(read, readOffset, false, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, insertedBaseString, nextElementLength));
|
||||
size++;
|
||||
if (read.getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pile.size() != 0) // if this pileup added at least one base, add it to the full pileup
|
||||
fullPileup.put(sample, new ReadBackedPileupImpl(location, pile, size, nDeletions, nMQ0Reads));
|
||||
}
|
||||
|
||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||
if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done
|
||||
nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location, fullPileup), hasBeenSampled);
|
||||
}
|
||||
}
|
||||
|
||||
// fast testing of position
|
||||
private boolean readIsPastCurrentPosition(SAMRecord read) {
|
||||
if (readStates.isEmpty())
|
||||
return false;
|
||||
else {
|
||||
SAMRecordState state = readStates.getFirst();
|
||||
SAMRecord ourRead = state.getRead();
|
||||
return read.getReferenceIndex() > ourRead.getReferenceIndex() || read.getAlignmentStart() > state.getGenomePosition();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic place to put per-base filters appropriate to LocusIteratorByState
|
||||
*
|
||||
* @param rec
|
||||
* @param pos
|
||||
* @return
|
||||
*/
|
||||
private static boolean filterBaseInRead(GATKSAMRecord rec, long pos) {
|
||||
return ReadUtils.isBaseInsideAdaptor(rec, pos);
|
||||
}
|
||||
|
||||
private void updateReadStates() {
|
||||
for (final String sample : samples) {
|
||||
Iterator<SAMRecordState> it = readStates.iterator(sample);
|
||||
while (it.hasNext()) {
|
||||
SAMRecordState state = it.next();
|
||||
CigarOperator op = state.stepForwardOnGenome();
|
||||
if (op == null) {
|
||||
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
|
||||
// already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
|
||||
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
|
||||
it.remove(); // we've stepped off the end of the object
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||
}
|
||||
|
||||
protected class ReadStateManager {
|
||||
private final PeekableIterator<SAMRecord> iterator;
|
||||
private final SamplePartitioner samplePartitioner;
|
||||
private final Map<String, PerSampleReadStateManager> readStatesBySample = new HashMap<String, PerSampleReadStateManager>();
|
||||
private int totalReadStates = 0;
|
||||
|
||||
public ReadStateManager(Iterator<SAMRecord> source) {
|
||||
this.iterator = new PeekableIterator<SAMRecord>(source);
|
||||
|
||||
for (final String sample : samples) {
|
||||
readStatesBySample.put(sample, new PerSampleReadStateManager());
|
||||
}
|
||||
|
||||
samplePartitioner = new SamplePartitioner();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a iterator over all the reads associated with the given sample. Note that remove() is implemented
|
||||
* for this iterator; if present, total read states will be decremented.
|
||||
*
|
||||
* @param sample The sample.
|
||||
* @return Iterator over the reads associated with that sample.
|
||||
*/
|
||||
public Iterator<SAMRecordState> iterator(final String sample) {
|
||||
return new Iterator<SAMRecordState>() {
|
||||
private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sample).iterator();
|
||||
|
||||
public boolean hasNext() {
|
||||
return wrappedIterator.hasNext();
|
||||
}
|
||||
|
||||
public SAMRecordState next() {
|
||||
return wrappedIterator.next();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
wrappedIterator.remove();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return totalReadStates == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the total number of reads in the manager across all samples.
|
||||
*
|
||||
* @return Total number of reads over all samples.
|
||||
*/
|
||||
public int size() {
|
||||
return totalReadStates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the total number of reads in the manager in the given sample.
|
||||
*
|
||||
* @param sample The sample.
|
||||
* @return Total number of reads in the given sample.
|
||||
*/
|
||||
public int size(final String sample) {
|
||||
return readStatesBySample.get(sample).size();
|
||||
}
|
||||
|
||||
public SAMRecordState getFirst() {
|
||||
for (final String sample : samples) {
|
||||
PerSampleReadStateManager reads = readStatesBySample.get(sample);
|
||||
if (!reads.isEmpty())
|
||||
return reads.peek();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return totalReadStates > 0 || iterator.hasNext();
|
||||
}
|
||||
|
||||
public void collectPendingReads() {
|
||||
if (!iterator.hasNext())
|
||||
return;
|
||||
|
||||
if (readStates.size() == 0) {
|
||||
int firstContigIndex = iterator.peek().getReferenceIndex();
|
||||
int firstAlignmentStart = iterator.peek().getAlignmentStart();
|
||||
while (iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) {
|
||||
samplePartitioner.submitRead(iterator.next());
|
||||
}
|
||||
} else {
|
||||
// Fast fail in the case that the read is past the current position.
|
||||
if (readIsPastCurrentPosition(iterator.peek()))
|
||||
return;
|
||||
|
||||
while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) {
|
||||
samplePartitioner.submitRead(iterator.next());
|
||||
}
|
||||
}
|
||||
|
||||
for (final String sample : samples) {
|
||||
Collection<SAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
|
||||
PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
|
||||
addReadsToSample(statesBySample, newReads);
|
||||
}
|
||||
|
||||
samplePartitioner.reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add reads with the given sample name to the given hanger entry.
|
||||
*
|
||||
* @param readStates The list of read states to add this collection of reads.
|
||||
* @param reads Reads to add. Selected reads will be pulled from this source.
|
||||
*/
|
||||
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads) {
|
||||
if (reads.isEmpty())
|
||||
return;
|
||||
|
||||
Collection<SAMRecordState> newReadStates = new LinkedList<SAMRecordState>();
|
||||
|
||||
for (SAMRecord read : reads) {
|
||||
SAMRecordState state = new SAMRecordState(read);
|
||||
state.stepForwardOnGenome();
|
||||
newReadStates.add(state);
|
||||
}
|
||||
|
||||
readStates.addStatesAtNextAlignmentStart(newReadStates);
|
||||
}
|
||||
|
||||
protected class PerSampleReadStateManager implements Iterable<SAMRecordState> {
|
||||
private List<LinkedList<SAMRecordState>> readStatesByAlignmentStart = new LinkedList<LinkedList<SAMRecordState>>();
|
||||
private int thisSampleReadStates = 0;
|
||||
private Downsampler<LinkedList<SAMRecordState>> levelingDownsampler =
|
||||
performLevelingDownsampling ?
|
||||
new LevelingDownsampler<LinkedList<SAMRecordState>, SAMRecordState>(readInfo.getDownsamplingMethod().toCoverage) :
|
||||
null;
|
||||
|
||||
public void addStatesAtNextAlignmentStart(Collection<SAMRecordState> states) {
|
||||
if ( states.isEmpty() ) {
|
||||
return;
|
||||
}
|
||||
|
||||
readStatesByAlignmentStart.add(new LinkedList<SAMRecordState>(states));
|
||||
thisSampleReadStates += states.size();
|
||||
totalReadStates += states.size();
|
||||
|
||||
if ( levelingDownsampler != null ) {
|
||||
levelingDownsampler.submit(readStatesByAlignmentStart);
|
||||
levelingDownsampler.signalEndOfInput();
|
||||
|
||||
thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
|
||||
totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
|
||||
|
||||
// use returned List directly rather than make a copy, for efficiency's sake
|
||||
readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
|
||||
levelingDownsampler.reset();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return readStatesByAlignmentStart.isEmpty();
|
||||
}
|
||||
|
||||
public SAMRecordState peek() {
|
||||
return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return thisSampleReadStates;
|
||||
}
|
||||
|
||||
public Iterator<SAMRecordState> iterator() {
|
||||
return new Iterator<SAMRecordState>() {
|
||||
private Iterator<LinkedList<SAMRecordState>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
|
||||
private LinkedList<SAMRecordState> currentPositionReadStates = null;
|
||||
private Iterator<SAMRecordState> currentPositionReadStatesIterator = null;
|
||||
|
||||
public boolean hasNext() {
|
||||
return alignmentStartIterator.hasNext() ||
|
||||
(currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
|
||||
}
|
||||
|
||||
public SAMRecordState next() {
|
||||
if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
|
||||
currentPositionReadStates = alignmentStartIterator.next();
|
||||
currentPositionReadStatesIterator = currentPositionReadStates.iterator();
|
||||
}
|
||||
|
||||
return currentPositionReadStatesIterator.next();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
currentPositionReadStatesIterator.remove();
|
||||
thisSampleReadStates--;
|
||||
totalReadStates--;
|
||||
|
||||
if ( currentPositionReadStates.isEmpty() ) {
|
||||
alignmentStartIterator.remove();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: stores reads by sample ID string, not by sample object
|
||||
*/
|
||||
private class SamplePartitioner {
|
||||
private Map<String, Collection<SAMRecord>> readsBySample;
|
||||
private long readsSeen = 0;
|
||||
|
||||
public SamplePartitioner() {
|
||||
readsBySample = new HashMap<String, Collection<SAMRecord>>();
|
||||
|
||||
for ( String sample : samples ) {
|
||||
readsBySample.put(sample, new ArrayList<SAMRecord>());
|
||||
}
|
||||
}
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
if (readsBySample.containsKey(sampleName))
|
||||
readsBySample.get(sampleName).add(read);
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public long getNumReadsSeen() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getReadsForSample(String sampleName) {
|
||||
if ( ! readsBySample.containsKey(sampleName) )
|
||||
throw new NoSuchElementException("Sample name not found");
|
||||
return readsBySample.get(sampleName);
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
for ( Collection<SAMRecord> perSampleReads : readsBySample.values() )
|
||||
perSampleReads.clear();
|
||||
readsSeen = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,144 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* Baseclass used to describe a read transformer like BAQ and BQSR
|
||||
*
|
||||
* Read transformers are plugable infrastructure that modify read state
|
||||
* either on input, on output, or within walkers themselves.
|
||||
*
|
||||
* The function apply() is called on each read seen by the GATK (after passing
|
||||
* all ReadFilters) and it can do as it sees fit (without modifying the alignment)
|
||||
* to the read to change qualities, add tags, etc.
|
||||
*
|
||||
* Initialize is called once right before the GATK traversal begins providing
|
||||
* the ReadTransformer with the ability to collect and initialize data from the
|
||||
* engine.
|
||||
*
|
||||
* Note that all ReadTransformers within the classpath are created and initialized. If one
|
||||
* shouldn't be run it should look at the command line options of the engine and override
|
||||
* the enabled.
|
||||
*
|
||||
* @since 8/31/12
|
||||
* @author depristo
|
||||
*/
|
||||
abstract public class ReadTransformer {
|
||||
/**
|
||||
* When should this read transform be applied?
|
||||
*/
|
||||
private ApplicationTime applicationTime;
|
||||
|
||||
/**
|
||||
* Keep track of whether we've been initialized already, and ensure it's not called more than once.
|
||||
*/
|
||||
private boolean initialized = false;
|
||||
|
||||
protected ReadTransformer() {}
|
||||
|
||||
/**
|
||||
* Master initialization routine. Called to setup a ReadTransform, using it's overloaded initialialSub routine.
|
||||
*
|
||||
* @param overrideTime if not null, we will run this ReadTransform at the time provided, regardless of the timing of this read transformer itself
|
||||
* @param engine the engine, for initializing values
|
||||
* @param walker the walker we intend to run
|
||||
*/
|
||||
@Requires({"initialized == false", "engine != null", "walker != null"})
|
||||
@Ensures("initialized == true")
|
||||
public final void initialize(final ApplicationTime overrideTime, final GenomeAnalysisEngine engine, final Walker walker) {
|
||||
if ( engine == null ) throw new IllegalArgumentException("engine cannot be null");
|
||||
if ( walker == null ) throw new IllegalArgumentException("walker cannot be null");
|
||||
|
||||
this.applicationTime = initializeSub(engine, walker);
|
||||
if ( overrideTime != null ) this.applicationTime = overrideTime;
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subclasses must override this to initialize themeselves
|
||||
*
|
||||
* @param engine the engine, for initializing values
|
||||
* @param walker the walker we intend to run
|
||||
* @return the point of time we'd like this read transform to be run
|
||||
*/
|
||||
@Requires({"engine != null", "walker != null"})
|
||||
@Ensures("result != null")
|
||||
protected abstract ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker);
|
||||
|
||||
/**
|
||||
* Should this ReadTransformer be activated? Called after initialize, which allows this
|
||||
* read transformer to look at its arguments and decide if it should be active. All
|
||||
* ReadTransformers must override this, as by default they are not enabled.
|
||||
*
|
||||
* @return true if this ReadTransformer should be used on the read stream
|
||||
*/
|
||||
public boolean enabled() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Has this transformer been initialized?
|
||||
*
|
||||
* @return true if it has
|
||||
*/
|
||||
public final boolean isInitialized() {
|
||||
return initialized;
|
||||
}
|
||||
|
||||
/**
|
||||
* When should we apply this read transformer?
|
||||
*
|
||||
* @return true if yes
|
||||
*/
|
||||
public final ApplicationTime getApplicationTime() {
|
||||
return applicationTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Primary interface function for a read transform to actually do some work
|
||||
*
|
||||
* The function apply() is called on each read seen by the GATK (after passing
|
||||
* all ReadFilters) and it can do as it sees fit (without modifying the alignment)
|
||||
* to the read to change qualities, add tags, etc.
|
||||
*
|
||||
* @param read the read to transform
|
||||
* @return the transformed read
|
||||
*/
|
||||
@Requires("read != null")
|
||||
@Ensures("result != null")
|
||||
abstract public GATKSAMRecord apply(final GATKSAMRecord read);
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName();
|
||||
}
|
||||
|
||||
/**
|
||||
* When should a read transformer be applied?
|
||||
*/
|
||||
public static enum ApplicationTime {
|
||||
/**
|
||||
* Walker does not tolerate this read transformer
|
||||
*/
|
||||
FORBIDDEN,
|
||||
|
||||
/**
|
||||
* apply the transformation to the incoming reads, the default
|
||||
*/
|
||||
ON_INPUT,
|
||||
|
||||
/**
|
||||
* apply the transformation to the outgoing read stream
|
||||
*/
|
||||
ON_OUTPUT,
|
||||
|
||||
/**
|
||||
* the walker will deal with the calculation itself
|
||||
*/
|
||||
HANDLED_IN_WALKER
|
||||
}
|
||||
}
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 14, 2009
|
||||
* Time: 1:51:22 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Allows the walker to indicate what type of data it wants to consume.
|
||||
*/
|
||||
|
||||
@Documented
|
||||
@Inherited
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface ReadTransformersMode {
|
||||
public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT;
|
||||
}
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
/**
|
||||
* A class for imposing a trio structure on three samples; a common paradigm
|
||||
*
|
||||
* todo -- there should probably be an interface or abstract class "Pedigree" that generalizes the notion of
|
||||
* -- imposing structure on samples. But given how complex pedigrees can quickly become, it's not
|
||||
* -- clear the best way to do this.
|
||||
*/
|
||||
public class Trio {
|
||||
private Sample mother;
|
||||
private Sample father;
|
||||
private Sample child;
|
||||
|
||||
public Trio(Sample mom, Sample dad, Sample spawn) {
|
||||
assert mom.getID().equals(spawn.getMaternalID()) && dad.getID().equals(spawn.getPaternalID()) : "Samples passed to trio constructor do not form a trio";
|
||||
mother = mom;
|
||||
father = dad;
|
||||
child = spawn;
|
||||
}
|
||||
|
||||
public Sample getMother() {
|
||||
return mother;
|
||||
}
|
||||
|
||||
public String getMaternalID() {
|
||||
return mother.getID();
|
||||
}
|
||||
|
||||
public Sample getFather() {
|
||||
return father;
|
||||
}
|
||||
|
||||
public String getPaternalID() {
|
||||
return father.getID();
|
||||
}
|
||||
|
||||
public Sample getChild() {
|
||||
return child;
|
||||
}
|
||||
|
||||
public String getChildID() {
|
||||
return child.getID();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,103 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
|
||||
/**
|
||||
* A simple solution to iterating over all reference positions over a series of genomic locations.
|
||||
*/
|
||||
public abstract class TraverseLociBase<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,LocusShardDataProvider> {
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
protected static final Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
|
||||
@Override
|
||||
protected final String getTraversalType() {
|
||||
return "sites";
|
||||
}
|
||||
|
||||
protected static class TraverseResults<T> {
|
||||
final int numIterations;
|
||||
final T reduceResult;
|
||||
|
||||
public TraverseResults(int numIterations, T reduceResult) {
|
||||
this.numIterations = numIterations;
|
||||
this.reduceResult = reduceResult;
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract TraverseResults<T> traverse( final LocusWalker<M,T> walker,
|
||||
final LocusView locusView,
|
||||
final LocusReferenceView referenceView,
|
||||
final ReferenceOrderedView referenceOrderedDataView,
|
||||
final T sum);
|
||||
|
||||
@Override
|
||||
public T traverse( LocusWalker<M,T> walker,
|
||||
LocusShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
logger.debug(String.format("TraverseLociBase.traverse: Shard is %s", dataProvider));
|
||||
|
||||
final LocusView locusView = getLocusView( walker, dataProvider );
|
||||
|
||||
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
||||
//ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = null;
|
||||
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
||||
referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
|
||||
else
|
||||
referenceOrderedDataView = (RodLocusView)locusView;
|
||||
|
||||
final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||
|
||||
final TraverseResults<T> result = traverse( walker, locusView, referenceView, referenceOrderedDataView, sum );
|
||||
sum = result.reduceResult;
|
||||
dataProvider.getShard().getReadMetrics().incrementNumIterations(result.numIterations);
|
||||
updateCumulativeMetrics(dataProvider.getShard());
|
||||
}
|
||||
|
||||
// We have a final map call to execute here to clean up the skipped based from the
|
||||
// last position in the ROD to that in the interval
|
||||
if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA && ! walker.isDone() ) {
|
||||
// only do this if the walker isn't done!
|
||||
final RodLocusView rodLocusView = (RodLocusView)locusView;
|
||||
final long nSkipped = rodLocusView.getLastSkippedBases();
|
||||
if ( nSkipped > 0 ) {
|
||||
final GenomeLoc site = rodLocusView.getLocOneBeyondShard();
|
||||
final AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileupImpl(site), nSkipped);
|
||||
final M x = walker.map(null, null, ac);
|
||||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the best view of loci for this walker given the available data. The view will function as a 'trigger track'
|
||||
* of sorts, providing a consistent interface so that TraverseLociBase doesn't need to be reimplemented for any new datatype
|
||||
* that comes along.
|
||||
* @param walker walker to interrogate.
|
||||
* @param dataProvider Data which which to drive the locus view.
|
||||
* @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.
|
||||
*/
|
||||
private LocusView getLocusView( Walker<M,T> walker, LocusShardDataProvider dataProvider ) {
|
||||
final DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
|
||||
if( dataSource == DataSource.READS )
|
||||
return new CoveredLocusView(dataProvider);
|
||||
else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers )
|
||||
return new AllLocusView(dataProvider);
|
||||
else if( dataSource == DataSource.REFERENCE_ORDERED_DATA )
|
||||
return new RodLocusView(dataProvider);
|
||||
else
|
||||
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusReferenceView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReferenceOrderedView;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
/**
|
||||
* A simple solution to iterating over all reference positions over a series of genomic locations.
|
||||
*/
|
||||
public class TraverseLociLinear<M,T> extends TraverseLociBase<M,T> {
|
||||
|
||||
@Override
|
||||
protected TraverseResults<T> traverse(LocusWalker<M, T> walker, LocusView locusView, LocusReferenceView referenceView, ReferenceOrderedView referenceOrderedDataView, T sum) {
|
||||
// We keep processing while the next reference location is within the interval
|
||||
boolean done = false;
|
||||
int numIterations = 0;
|
||||
|
||||
while( locusView.hasNext() && ! done ) {
|
||||
numIterations++;
|
||||
final AlignmentContext locus = locusView.next();
|
||||
final GenomeLoc location = locus.getLocation();
|
||||
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||
|
||||
final boolean keepMeP = walker.filter(tracker, refContext, locus);
|
||||
if (keepMeP) {
|
||||
final M x = walker.map(tracker, refContext, locus);
|
||||
sum = walker.reduce(x, sum);
|
||||
done = walker.isDone();
|
||||
}
|
||||
|
||||
printProgress(locus.getLocation());
|
||||
}
|
||||
|
||||
return new TraverseResults<T>(numIterations, sum);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,205 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusReferenceView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReferenceOrderedView;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.nanoScheduler.NSMapFunction;
|
||||
import org.broadinstitute.sting.utils.nanoScheduler.NSProgressFunction;
|
||||
import org.broadinstitute.sting.utils.nanoScheduler.NSReduceFunction;
|
||||
import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* A simple solution to iterating over all reference positions over a series of genomic locations.
|
||||
*/
|
||||
public class TraverseLociNano<M,T> extends TraverseLociBase<M,T> {
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private static final boolean DEBUG = false;
|
||||
private static final int BUFFER_SIZE = 1000;
|
||||
|
||||
final NanoScheduler<MapData, MapResult, T> nanoScheduler;
|
||||
|
||||
public TraverseLociNano(int nThreads) {
|
||||
nanoScheduler = new NanoScheduler<MapData, MapResult, T>(BUFFER_SIZE, nThreads);
|
||||
nanoScheduler.setProgressFunction(new TraverseLociProgress());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TraverseResults<T> traverse(final LocusWalker<M, T> walker,
|
||||
final LocusView locusView,
|
||||
final LocusReferenceView referenceView,
|
||||
final ReferenceOrderedView referenceOrderedDataView,
|
||||
final T sum) {
|
||||
nanoScheduler.setDebug(DEBUG);
|
||||
final TraverseLociMap myMap = new TraverseLociMap(walker);
|
||||
final TraverseLociReduce myReduce = new TraverseLociReduce(walker);
|
||||
|
||||
final MapDataIterator inputIterator = new MapDataIterator(locusView, referenceView, referenceOrderedDataView);
|
||||
final T result = nanoScheduler.execute(inputIterator, myMap, sum, myReduce);
|
||||
|
||||
return new TraverseResults<T>(inputIterator.numIterations, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create iterator that provides inputs for all map calls into MapData, to be provided
|
||||
* to NanoScheduler for Map/Reduce
|
||||
*/
|
||||
private class MapDataIterator implements Iterator<MapData> {
|
||||
final LocusView locusView;
|
||||
final LocusReferenceView referenceView;
|
||||
final ReferenceOrderedView referenceOrderedDataView;
|
||||
int numIterations = 0;
|
||||
|
||||
private MapDataIterator(LocusView locusView, LocusReferenceView referenceView, ReferenceOrderedView referenceOrderedDataView) {
|
||||
this.locusView = locusView;
|
||||
this.referenceView = referenceView;
|
||||
this.referenceOrderedDataView = referenceOrderedDataView;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return locusView.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public MapData next() {
|
||||
final AlignmentContext locus = locusView.next();
|
||||
final GenomeLoc location = locus.getLocation();
|
||||
|
||||
//logger.info("Pulling data from MapDataIterator at " + location);
|
||||
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(location, refContext);
|
||||
|
||||
numIterations++;
|
||||
return new MapData(locus, refContext, tracker);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Cannot remove elements from MapDataIterator");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void printOnTraversalDone() {
|
||||
nanoScheduler.shutdown();
|
||||
super.printOnTraversalDone();
|
||||
}
|
||||
|
||||
/**
|
||||
* The input data needed for each map call. The read, the reference, and the RODs
|
||||
*/
|
||||
private class MapData {
|
||||
final AlignmentContext alignmentContext;
|
||||
final ReferenceContext refContext;
|
||||
final RefMetaDataTracker tracker;
|
||||
|
||||
private MapData(final AlignmentContext alignmentContext, ReferenceContext refContext, RefMetaDataTracker tracker) {
|
||||
this.alignmentContext = alignmentContext;
|
||||
this.refContext = refContext;
|
||||
this.tracker = tracker;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "MapData " + alignmentContext.getLocation();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Contains the results of a map call, indicating whether the call was good, filtered, or done
|
||||
*/
|
||||
private class MapResult {
|
||||
final M value;
|
||||
final boolean reduceMe;
|
||||
|
||||
/**
|
||||
* Create a MapResult with value that should be reduced
|
||||
*
|
||||
* @param value the value to reduce
|
||||
*/
|
||||
private MapResult(final M value) {
|
||||
this.value = value;
|
||||
this.reduceMe = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a MapResult that shouldn't be reduced
|
||||
*/
|
||||
private MapResult() {
|
||||
this.value = null;
|
||||
this.reduceMe = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A static object that tells reduce that the result of map should be skipped (filtered or done)
|
||||
*/
|
||||
private final MapResult SKIP_REDUCE = new MapResult();
|
||||
|
||||
/**
|
||||
* MapFunction for TraverseReads meeting NanoScheduler interface requirements
|
||||
*
|
||||
* Applies walker.map to MapData, returning a MapResult object containing the result
|
||||
*/
|
||||
private class TraverseLociMap implements NSMapFunction<MapData, MapResult> {
|
||||
final LocusWalker<M,T> walker;
|
||||
|
||||
private TraverseLociMap(LocusWalker<M, T> walker) {
|
||||
this.walker = walker;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MapResult apply(final MapData data) {
|
||||
if ( ! walker.isDone() ) {
|
||||
final boolean keepMeP = walker.filter(data.tracker, data.refContext, data.alignmentContext);
|
||||
if (keepMeP) {
|
||||
final M x = walker.map(data.tracker, data.refContext, data.alignmentContext);
|
||||
return new MapResult(x);
|
||||
}
|
||||
}
|
||||
return SKIP_REDUCE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* NSReduceFunction for TraverseReads meeting NanoScheduler interface requirements
|
||||
*
|
||||
* Takes a MapResult object and applies the walkers reduce function to each map result, when applicable
|
||||
*/
|
||||
private class TraverseLociReduce implements NSReduceFunction<MapResult, T> {
|
||||
final LocusWalker<M,T> walker;
|
||||
|
||||
private TraverseLociReduce(LocusWalker<M, T> walker) {
|
||||
this.walker = walker;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T apply(MapResult one, T sum) {
|
||||
if ( one.reduceMe )
|
||||
// only run reduce on values that aren't DONE or FAILED
|
||||
return walker.reduce(one.value, sum);
|
||||
else
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
|
||||
private class TraverseLociProgress implements NSProgressFunction<MapData> {
|
||||
@Override
|
||||
public void progress(MapData lastProcessedMap) {
|
||||
if (lastProcessedMap.alignmentContext != null)
|
||||
printProgress(lastProcessedMap.alignmentContext.getLocation());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,234 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrderedView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.ReadShard;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.nanoScheduler.NSMapFunction;
|
||||
import org.broadinstitute.sting.utils.nanoScheduler.NSReduceFunction;
|
||||
import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A nano-scheduling version of TraverseReads.
|
||||
*
|
||||
* Implements the traversal of a walker that accepts individual reads, the reference, and
|
||||
* RODs per map call. Directly supports shared memory parallelism via NanoScheduler
|
||||
*
|
||||
* @author depristo
|
||||
* @version 1.0
|
||||
* @date 9/2/2012
|
||||
*/
|
||||
public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,ReadShardDataProvider> {
|
||||
/** our log, which we want to capture anything from this class */
|
||||
protected static final Logger logger = Logger.getLogger(TraverseReadsNano.class);
|
||||
private static final boolean DEBUG = false;
|
||||
final NanoScheduler<MapData, MapResult, T> nanoScheduler;
|
||||
|
||||
public TraverseReadsNano(int nThreads) {
|
||||
final int bufferSize = ReadShard.getReadBufferSize() + 1; // actually has 1 more than max
|
||||
nanoScheduler = new NanoScheduler<MapData, MapResult, T>(bufferSize, nThreads);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getTraversalType() {
|
||||
return "reads";
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverse by reads, given the data and the walker
|
||||
*
|
||||
* @param walker the walker to traverse with
|
||||
* @param dataProvider the provider of the reads data
|
||||
* @param sum the value of type T, specified by the walker, to feed to the walkers reduce function
|
||||
* @return the reduce variable of the read walker
|
||||
*/
|
||||
public T traverse(ReadWalker<M,T> walker,
|
||||
ReadShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
logger.debug(String.format("TraverseReadsNano.traverse Covered dataset is %s", dataProvider));
|
||||
|
||||
if( !dataProvider.hasReads() )
|
||||
throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");
|
||||
|
||||
nanoScheduler.setDebug(DEBUG);
|
||||
final TraverseReadsMap myMap = new TraverseReadsMap(walker);
|
||||
final TraverseReadsReduce myReduce = new TraverseReadsReduce(walker);
|
||||
|
||||
final List<MapData> aggregatedInputs = aggregateMapData(dataProvider);
|
||||
final T result = nanoScheduler.execute(aggregatedInputs.iterator(), myMap, sum, myReduce);
|
||||
|
||||
final GATKSAMRecord lastRead = aggregatedInputs.get(aggregatedInputs.size() - 1).read;
|
||||
final GenomeLoc locus = engine.getGenomeLocParser().createGenomeLoc(lastRead);
|
||||
|
||||
updateCumulativeMetrics(dataProvider.getShard());
|
||||
printProgress(locus);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregate all of the inputs for all map calls into MapData, to be provided
|
||||
* to NanoScheduler for Map/Reduce
|
||||
*
|
||||
* @param dataProvider the source of our data
|
||||
* @return a linked list of MapData objects holding the read, ref, and ROD info for every map/reduce
|
||||
* should execute
|
||||
*/
|
||||
private List<MapData> aggregateMapData(final ReadShardDataProvider dataProvider) {
|
||||
final ReadView reads = new ReadView(dataProvider);
|
||||
final ReadReferenceView reference = new ReadReferenceView(dataProvider);
|
||||
final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
|
||||
|
||||
final List<MapData> mapData = new LinkedList<MapData>();
|
||||
for ( final SAMRecord read : reads ) {
|
||||
final ReferenceContext refContext = ! read.getReadUnmappedFlag()
|
||||
? reference.getReferenceContext(read)
|
||||
: null;
|
||||
|
||||
// if the read is mapped, create a metadata tracker
|
||||
final RefMetaDataTracker tracker = read.getReferenceIndex() >= 0
|
||||
? rodView.getReferenceOrderedDataForRead(read)
|
||||
: null;
|
||||
|
||||
// update the number of reads we've seen
|
||||
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
mapData.add(new MapData((GATKSAMRecord)read, refContext, tracker));
|
||||
}
|
||||
|
||||
return mapData;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void printOnTraversalDone() {
|
||||
nanoScheduler.shutdown();
|
||||
super.printOnTraversalDone();
|
||||
}
|
||||
|
||||
/**
|
||||
* The input data needed for each map call. The read, the reference, and the RODs
|
||||
*/
|
||||
private class MapData {
|
||||
final GATKSAMRecord read;
|
||||
final ReferenceContext refContext;
|
||||
final RefMetaDataTracker tracker;
|
||||
|
||||
private MapData(GATKSAMRecord read, ReferenceContext refContext, RefMetaDataTracker tracker) {
|
||||
this.read = read;
|
||||
this.refContext = refContext;
|
||||
this.tracker = tracker;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Contains the results of a map call, indicating whether the call was good, filtered, or done
|
||||
*/
|
||||
private class MapResult {
|
||||
final M value;
|
||||
final boolean reduceMe;
|
||||
|
||||
/**
|
||||
* Create a MapResult with value that should be reduced
|
||||
*
|
||||
* @param value the value to reduce
|
||||
*/
|
||||
private MapResult(final M value) {
|
||||
this.value = value;
|
||||
this.reduceMe = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a MapResult that shouldn't be reduced
|
||||
*/
|
||||
private MapResult() {
|
||||
this.value = null;
|
||||
this.reduceMe = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A static object that tells reduce that the result of map should be skipped (filtered or done)
|
||||
*/
|
||||
private final MapResult SKIP_REDUCE = new MapResult();
|
||||
|
||||
/**
|
||||
* MapFunction for TraverseReads meeting NanoScheduler interface requirements
|
||||
*
|
||||
* Applies walker.map to MapData, returning a MapResult object containing the result
|
||||
*/
|
||||
private class TraverseReadsMap implements NSMapFunction<MapData, MapResult> {
|
||||
final ReadWalker<M,T> walker;
|
||||
|
||||
private TraverseReadsMap(ReadWalker<M, T> walker) {
|
||||
this.walker = walker;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MapResult apply(final MapData data) {
|
||||
if ( ! walker.isDone() ) {
|
||||
final boolean keepMeP = walker.filter(data.refContext, data.read);
|
||||
if (keepMeP)
|
||||
return new MapResult(walker.map(data.refContext, data.read, data.tracker));
|
||||
}
|
||||
|
||||
return SKIP_REDUCE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* NSReduceFunction for TraverseReads meeting NanoScheduler interface requirements
|
||||
*
|
||||
* Takes a MapResult object and applies the walkers reduce function to each map result, when applicable
|
||||
*/
|
||||
private class TraverseReadsReduce implements NSReduceFunction<MapResult, T> {
|
||||
final ReadWalker<M,T> walker;
|
||||
|
||||
private TraverseReadsReduce(ReadWalker<M, T> walker) {
|
||||
this.walker = walker;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T apply(MapResult one, T sum) {
|
||||
if ( one.reduceMe )
|
||||
// only run reduce on values that aren't DONE or FAILED
|
||||
return walker.reduce(one.value, sum);
|
||||
else
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
/**
|
||||
* Root parallelism interface. Walkers that implement this
|
||||
* declare that their map function is thread-safe and so multiple
|
||||
* map calls can be run in parallel in the same JVM instance.
|
||||
*/
|
||||
public interface NanoSchedulable {
|
||||
}
|
||||
|
|
@ -1,133 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.fasta;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* Generates an alternative reference sequence over the specified interval.
|
||||
*
|
||||
* <p>
|
||||
* Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
|
||||
* Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
|
||||
* Several important notes:
|
||||
* 1) if there are multiple variants that start at a site, it chooses one of them randomly.
|
||||
* 2) when there are overlapping indels (but with different start positions) only the first will be chosen.
|
||||
* 3) this tool works only for SNPs and for simple indels (but not for things like complex substitutions).
|
||||
* Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* The reference, requested intervals, and any number of variant rod files.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* A fasta file representing the requested intervals.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T FastaAlternateReferenceMaker \
|
||||
* -o output.fasta \
|
||||
* -L input.intervals \
|
||||
* --variant input.vcf \
|
||||
* [--snpmask mask.vcf]
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Companion Utilities", extraDocs = {CommandLineGATK.class} )
|
||||
@Reference(window=@Window(start=-1,stop=50))
|
||||
@Requires(value={DataSource.REFERENCE})
|
||||
public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
|
||||
|
||||
/**
|
||||
* Variants from these input files are used by this tool to construct an alternate reference.
|
||||
*/
|
||||
@Input(fullName = "variant", shortName = "V", doc="variants to model", required=false)
|
||||
public List<RodBinding<VariantContext>> variants = Collections.emptyList();
|
||||
|
||||
/**
|
||||
* Snps from this file are used as a mask when constructing the alternate reference.
|
||||
*/
|
||||
@Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false)
|
||||
public RodBinding<VariantContext> snpmask;
|
||||
|
||||
private int deletionBasesRemaining = 0;
|
||||
|
||||
public Pair<GenomeLoc, String> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
if (deletionBasesRemaining > 0) {
|
||||
deletionBasesRemaining--;
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), "");
|
||||
}
|
||||
|
||||
String refBase = String.valueOf((char)ref.getBase());
|
||||
|
||||
// Check to see if we have a called snp
|
||||
for ( VariantContext vc : tracker.getValues(variants, ref.getLocus()) ) {
|
||||
if ( vc.isFiltered() )
|
||||
continue;
|
||||
|
||||
if ( vc.isSimpleDeletion()) {
|
||||
deletionBasesRemaining = vc.getReference().length() - 1;
|
||||
// delete the next n bases, not this one
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
|
||||
} else if ( vc.isSimpleInsertion()) {
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
|
||||
} else if (vc.isSNP()) {
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
|
||||
}
|
||||
}
|
||||
|
||||
// if we don't have a called site, and we have a mask at this site, mask it
|
||||
for ( VariantContext vc : tracker.getValues(snpmask) ) {
|
||||
if ( vc.isSNP()) {
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), "N");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// if we got here then we're just ref
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,127 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.fasta;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* Renders a new reference in FASTA format consisting of only those loci provided in the input data set.
|
||||
*
|
||||
* <p>
|
||||
* The output format can be partially controlled using the provided command-line arguments.
|
||||
* Specify intervals with the usual -L argument to output only the reference bases within your intervals.
|
||||
* Overlapping intervals are automatically merged; reference bases for each disjoint interval will be output as a
|
||||
* separate fasta sequence (named numerically in order).
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* The reference and requested intervals.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* A fasta file representing the requested intervals.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T FastaReferenceMaker \
|
||||
* -o output.fasta \
|
||||
* -L input.intervals
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Companion Utilities", extraDocs = {CommandLineGATK.class} )
|
||||
public class FastaReferenceMaker extends RefWalker<Pair<GenomeLoc, String>, GenomeLoc> {
|
||||
|
||||
@Output PrintStream out;
|
||||
|
||||
@Argument(fullName="lineWidth", shortName="lw", doc="Maximum length of sequence to write per line", required=false)
|
||||
public int fastaLineWidth=60;
|
||||
|
||||
/**
|
||||
* Please note that when using this argument adjacent intervals will automatically be merged.
|
||||
*/
|
||||
@Argument(fullName="rawOnelineSeq", shortName="raw", doc="Print sequences with no FASTA header lines, one line per interval (i.e. lineWidth = infinity)", required=false)
|
||||
public boolean fastaRawSeqs=false;
|
||||
|
||||
protected FastaSequence fasta;
|
||||
|
||||
public void initialize() {
|
||||
if (fastaRawSeqs) fastaLineWidth = Integer.MAX_VALUE;
|
||||
fasta = new FastaSequence(out, fastaLineWidth, fastaRawSeqs);
|
||||
}
|
||||
|
||||
public Pair<GenomeLoc, String> map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) {
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), String.valueOf((char)ref.getBase()));
|
||||
}
|
||||
|
||||
public GenomeLoc reduceInit() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public GenomeLoc reduce(Pair<GenomeLoc, String> value, GenomeLoc sum) {
|
||||
if ( value == null )
|
||||
return sum;
|
||||
|
||||
// if there is no interval to the left, then this is the first one
|
||||
if ( sum == null ) {
|
||||
sum = value.first;
|
||||
fasta.append(value.second);
|
||||
}
|
||||
// if the intervals don't overlap, print out the leftmost one and start a new one
|
||||
// (end of contig or new interval)
|
||||
else if ( value.first.getStart() != sum.getStop() + 1 ) {
|
||||
fasta.flush();
|
||||
sum = value.first;
|
||||
fasta.append(value.second);
|
||||
}
|
||||
// otherwise, merge them
|
||||
else {
|
||||
sum = getToolkit().getGenomeLocParser().setStop(sum, value.first.getStop());
|
||||
fasta.append(value.second);
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
public void onTraversalDone(GenomeLoc sum) {
|
||||
fasta.flush();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.baq;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.walkers.BAQMode;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* Applies Heng's BAQ calculation to a stream of incoming reads
|
||||
*/
|
||||
public class BAQReadTransformer extends ReadTransformer {
|
||||
private BAQ baqHMM;
|
||||
private IndexedFastaSequenceFile refReader;
|
||||
private BAQ.CalculationMode cmode;
|
||||
private BAQ.QualityMode qmode;
|
||||
|
||||
@Override
|
||||
public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) {
|
||||
final BAQMode mode = WalkerManager.getWalkerAnnotation(walker, BAQMode.class);
|
||||
this.refReader = engine.getReferenceDataSource().getReference();
|
||||
this.cmode = engine.getArguments().BAQMode;
|
||||
this.qmode = mode.QualityMode();
|
||||
baqHMM = new BAQ(engine.getArguments().BAQGOP);
|
||||
|
||||
if ( qmode == BAQ.QualityMode.DONT_MODIFY )
|
||||
throw new ReviewedStingException("BUG: shouldn't create BAQ transformer with quality mode DONT_MODIFY");
|
||||
|
||||
if ( mode.ApplicationTime() == ReadTransformer.ApplicationTime.FORBIDDEN && enabled() )
|
||||
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + cmode + " was requested.");
|
||||
|
||||
return mode.ApplicationTime();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean enabled() {
|
||||
return cmode != BAQ.CalculationMode.OFF;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GATKSAMRecord apply(final GATKSAMRecord read) {
|
||||
baqHMM.baqRead(read, refReader, cmode, qmode);
|
||||
return read;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,44 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.baq;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Iterator that applies a ReadTransformer to a stream of reads
|
||||
*/
|
||||
public class ReadTransformingIterator implements StingSAMIterator {
|
||||
private final StingSAMIterator it;
|
||||
private final ReadTransformer transformer;
|
||||
|
||||
/**
|
||||
* Creates a new ReadTransforming iterator
|
||||
*/
|
||||
@Requires({"it != null", "transformer != null", "transformer.isInitialized()"})
|
||||
public ReadTransformingIterator(final StingSAMIterator it, final ReadTransformer transformer) {
|
||||
if ( ! transformer.isInitialized() )
|
||||
throw new IllegalStateException("Creating a read transformer stream for an uninitialized read transformer: " + transformer);
|
||||
if ( transformer.getApplicationTime() == ReadTransformer.ApplicationTime.FORBIDDEN )
|
||||
throw new IllegalStateException("Creating a read transformer stream for a forbidden transformer " + transformer);
|
||||
|
||||
this.it = it;
|
||||
this.transformer = transformer;
|
||||
}
|
||||
|
||||
@Requires("hasNext()")
|
||||
@Ensures("result != null")
|
||||
public SAMRecord next() {
|
||||
final GATKSAMRecord read = (GATKSAMRecord)it.next();
|
||||
return transformer.apply(read);
|
||||
}
|
||||
|
||||
public boolean hasNext() { return this.it.hasNext(); }
|
||||
public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
|
||||
public void close() { it.close(); }
|
||||
public Iterator<SAMRecord> iterator() { return this; }
|
||||
}
|
||||
|
|
@ -1,82 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.nanoScheduler;
|
||||
|
||||
import com.google.java.contract.Invariant;
|
||||
|
||||
/**
|
||||
* Wrapper to hold data for a blocking queue, distinguishing an EOF marker from a real object
|
||||
*
|
||||
* The only way to tell in a consumer thread that a blocking queue has no more data ever
|
||||
* coming down the pipe is to pass in a "poison" or EOF object. This class provides
|
||||
* a generic capacity for that...
|
||||
*
|
||||
* The use case looks like this:
|
||||
*
|
||||
* BlockingQueue q
|
||||
* producer:
|
||||
* while ( x has items )
|
||||
* q.put(new BlockingQueueValue(x))
|
||||
* q.put(new BlockingQueueValue())
|
||||
*
|
||||
* Consumer:
|
||||
* while ( true )
|
||||
* value = q.take()
|
||||
* if ( value.isLast() )
|
||||
* break
|
||||
* else
|
||||
* do something useful with value
|
||||
*
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 9/6/12
|
||||
* Time: 3:08 PM
|
||||
*/
|
||||
@Invariant("! isLast || value == null")
|
||||
class BlockingQueueValue<T> {
|
||||
/**
|
||||
* True if this is the EOF marker object
|
||||
*/
|
||||
final private boolean isLast;
|
||||
|
||||
/**
|
||||
* Our value, if we aren't the EOF marker
|
||||
*/
|
||||
final private T value;
|
||||
|
||||
/**
|
||||
* Create a new BlockingQueueValue containing a real value, where last is false
|
||||
* @param value
|
||||
*/
|
||||
BlockingQueueValue(final T value) {
|
||||
isLast = false;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new BlockingQueueValue that is the last item
|
||||
*/
|
||||
BlockingQueueValue() {
|
||||
isLast = true;
|
||||
this.value = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this the EOF marker?
|
||||
*
|
||||
* @return true if so, else false
|
||||
*/
|
||||
public boolean isLast() {
|
||||
return isLast;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the value held by this BlockingQueueValue
|
||||
*
|
||||
* @return the value
|
||||
* @throws IllegalStateException if this is the last item
|
||||
*/
|
||||
public T getValue() {
|
||||
if ( isLast() )
|
||||
throw new IllegalStateException("Cannot get value for last object");
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.nanoScheduler;
|
||||
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
/**
|
||||
* Create a future that simply returns a given value
|
||||
*
|
||||
* The only standard way to create a future in java is via the ExecutorService interface.
|
||||
* If you have a data structure holding futures of value T, and you want to add a
|
||||
* value to it for some reason (to add a EOF marker, for instance) you can use this
|
||||
* class to create a dummy Future<T> that simply returns a value.
|
||||
*
|
||||
* @author depristo
|
||||
* @since 09/12
|
||||
*/
|
||||
class FutureValue<V> implements Future<V> {
|
||||
final V value;
|
||||
|
||||
FutureValue(final V value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@Override public boolean cancel(boolean mayInterruptIfRunning) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override public boolean isCancelled() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override public boolean isDone() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override public V get() throws InterruptedException, ExecutionException {
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override public V get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException {
|
||||
return get();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,62 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.nanoScheduler;
|
||||
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
|
||||
/**
|
||||
* Producer Thread that reads input values from an inputReads and puts them into a BlockingQueue
|
||||
*/
|
||||
class InputProducer<InputType> implements Runnable {
|
||||
/**
|
||||
* The iterator we are using to get data from
|
||||
*/
|
||||
final Iterator<InputType> inputReader;
|
||||
|
||||
/**
|
||||
* Our timer (may be null) that we use to track our input costs
|
||||
*/
|
||||
final SimpleTimer inputTimer;
|
||||
|
||||
/**
|
||||
* Where we put our input values for consumption
|
||||
*/
|
||||
final BlockingQueue<InputValue> outputQueue;
|
||||
|
||||
public InputProducer(final Iterator<InputType> inputReader,
|
||||
final SimpleTimer inputTimer,
|
||||
final BlockingQueue<InputValue> outputQueue) {
|
||||
if ( inputReader == null ) throw new IllegalArgumentException("inputReader cannot be null");
|
||||
if ( outputQueue == null ) throw new IllegalArgumentException("OutputQueue cannot be null");
|
||||
|
||||
this.inputReader = inputReader;
|
||||
this.inputTimer = inputTimer;
|
||||
this.outputQueue = outputQueue;
|
||||
}
|
||||
|
||||
public void run() {
|
||||
try {
|
||||
while ( inputReader.hasNext() ) {
|
||||
if ( inputTimer != null ) inputTimer.restart();
|
||||
final InputType input = inputReader.next();
|
||||
if ( inputTimer != null ) inputTimer.stop();
|
||||
outputQueue.put(new InputValue(input));
|
||||
}
|
||||
|
||||
// add the EOF object so our consumer knows we are done in all inputs
|
||||
outputQueue.put(new InputValue());
|
||||
} catch (InterruptedException ex) {
|
||||
throw new ReviewedStingException("got execution exception", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper class that contains a read value suitable for EOF marking in a BlockingQueue
|
||||
*/
|
||||
class InputValue extends BlockingQueueValue<InputType> {
|
||||
private InputValue(InputType datum) { super(datum); }
|
||||
private InputValue() { }
|
||||
}
|
||||
}
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.nanoScheduler;
|
||||
|
||||
/**
|
||||
* Holds the results of a map job suitable for producer/consumer threading
|
||||
* via a BlockingQueue
|
||||
*/
|
||||
class MapResult<MapType> extends BlockingQueueValue<MapType> {
|
||||
final int jobID;
|
||||
|
||||
/**
|
||||
* Create a new MapResult with value datum and jod jobID ID
|
||||
*
|
||||
* @param datum the value produced by the map job
|
||||
* @param jobID the id of the map job (for correctness testing)
|
||||
*/
|
||||
MapResult(final MapType datum, final int jobID) {
|
||||
super(datum);
|
||||
this.jobID = jobID;
|
||||
if ( jobID < 0 ) throw new IllegalArgumentException("JobID must be >= 0");
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the EOF marker version of MapResult
|
||||
*/
|
||||
MapResult() {
|
||||
super();
|
||||
this.jobID = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the job ID of the map job that produced this MapResult
|
||||
*/
|
||||
public int getJobID() {
|
||||
return jobID;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.nanoScheduler;
|
||||
|
||||
/**
|
||||
* A function that maps from InputType -> ResultType
|
||||
*
|
||||
* For use with the NanoScheduler
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/24/12
|
||||
* Time: 9:49 AM
|
||||
*/
|
||||
public interface NSMapFunction<InputType, ResultType> {
|
||||
/**
|
||||
* Return function on input, returning a value of ResultType
|
||||
* @param input
|
||||
* @return
|
||||
*/
|
||||
public ResultType apply(final InputType input);
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.nanoScheduler;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 9/4/12
|
||||
* Time: 2:10 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public interface NSProgressFunction<InputType> {
|
||||
public void progress(final InputType lastMapInput);
|
||||
}
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.nanoScheduler;
|
||||
|
||||
/**
|
||||
* A function that combines a value of MapType with an existing ReduceValue into a new ResultType
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/24/12
|
||||
* Time: 9:49 AM
|
||||
*/
|
||||
public interface NSReduceFunction<MapType, ReduceType> {
|
||||
/**
|
||||
* Combine one with sum into a new ReduceType
|
||||
* @param one the result of a map call on an input element
|
||||
* @param sum the cumulative reduce result over all previous map calls
|
||||
* @return
|
||||
*/
|
||||
public ReduceType apply(MapType one, ReduceType sum);
|
||||
}
|
||||
|
|
@ -1,392 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.nanoScheduler;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.threading.NamedThreadFactory;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.*;
|
||||
|
||||
/**
|
||||
* Framework for very fine grained MapReduce parallelism
|
||||
*
|
||||
* The overall framework works like this
|
||||
*
|
||||
* nano <- new Nanoschedule(inputBufferSize, numberOfMapElementsToProcessTogether, nThreads)
|
||||
* List[Input] outerData : outerDataLoop )
|
||||
* result = nano.execute(outerData.iterator(), map, reduce)
|
||||
*
|
||||
* inputBufferSize determines how many elements from the input stream are read in one go by the
|
||||
* nanoscheduler. The scheduler may hold up to inputBufferSize in memory at one time, as well
|
||||
* as up to inputBufferSize map results as well.
|
||||
*
|
||||
* numberOfMapElementsToProcessTogether determines how many input elements are processed
|
||||
* together each thread cycle. For example, if this value is 10, then the input data
|
||||
* is grouped together in units of 10 elements each, and map called on each in term. The more
|
||||
* heavy-weight the map function is, in terms of CPU costs, the more it makes sense to
|
||||
* have this number be small. The lighter the CPU cost per element, though, the more this
|
||||
* parameter introduces overhead due to need to context switch among threads to process
|
||||
* each input element. A value of -1 lets the nanoscheduler guess at a reasonable trade-off value.
|
||||
*
|
||||
* nThreads is a bit obvious yes? Note though that the nanoscheduler assumes that it gets 1 thread
|
||||
* from its client during the execute call, as this call blocks until all work is done. The caller
|
||||
* thread is put to work by execute to help with the processing of the data. So in reality the
|
||||
* nanoScheduler only spawn nThreads - 1 additional workers (if this is > 1).
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/24/12
|
||||
* Time: 9:47 AM
|
||||
*/
|
||||
public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||
private final static Logger logger = Logger.getLogger(NanoScheduler.class);
|
||||
private final static boolean ALLOW_SINGLE_THREAD_FASTPATH = true;
|
||||
private final static boolean LOG_MAP_TIMES = false;
|
||||
private final static boolean TIME_CALLS = true;
|
||||
|
||||
private final static int MAP_BUFFER_SIZE_SCALE_FACTOR = 100;
|
||||
|
||||
final int inputBufferSize;
|
||||
final int mapBufferSize;
|
||||
final int nThreads;
|
||||
final ExecutorService inputExecutor;
|
||||
final ExecutorService reduceExecutor;
|
||||
final ThreadPoolExecutor mapExecutor;
|
||||
|
||||
boolean shutdown = false;
|
||||
boolean debug = false;
|
||||
private NSProgressFunction<InputType> progressFunction = null;
|
||||
|
||||
final SimpleTimer outsideSchedulerTimer = TIME_CALLS ? new SimpleTimer("outside") : null;
|
||||
final SimpleTimer inputTimer = TIME_CALLS ? new SimpleTimer("input") : null;
|
||||
final SimpleTimer mapTimer = TIME_CALLS ? new SimpleTimer("map") : null;
|
||||
final SimpleTimer reduceTimer = TIME_CALLS ? new SimpleTimer("reduce") : null;
|
||||
|
||||
/**
|
||||
* Create a new nanoscheduler with the desire characteristics requested by the argument
|
||||
*
|
||||
* @param inputBufferSize the number of input elements to read in each scheduling cycle.
|
||||
* @param nThreads the number of threads to use to get work done, in addition to the
|
||||
* thread calling execute
|
||||
*/
|
||||
public NanoScheduler(final int inputBufferSize, final int nThreads) {
|
||||
if ( inputBufferSize < 1 ) throw new IllegalArgumentException("inputBufferSize must be >= 1, got " + inputBufferSize);
|
||||
if ( nThreads < 1 ) throw new IllegalArgumentException("nThreads must be >= 1, got " + nThreads);
|
||||
|
||||
this.inputBufferSize = inputBufferSize;
|
||||
this.mapBufferSize = inputBufferSize * MAP_BUFFER_SIZE_SCALE_FACTOR;
|
||||
this.nThreads = nThreads;
|
||||
|
||||
if ( nThreads == 1 ) {
|
||||
this.mapExecutor = null;
|
||||
this.inputExecutor = this.reduceExecutor = null;
|
||||
} else {
|
||||
this.mapExecutor = (ThreadPoolExecutor)Executors.newFixedThreadPool(nThreads-1, new NamedThreadFactory("NS-map-thread-%d"));
|
||||
this.mapExecutor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
|
||||
this.inputExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d"));
|
||||
this.reduceExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-reduce-thread-%d"));
|
||||
}
|
||||
|
||||
// start timing the time spent outside of the nanoScheduler
|
||||
outsideSchedulerTimer.start();
|
||||
}
|
||||
|
||||
/**
|
||||
* The number of parallel map threads in use with this NanoScheduler
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result > 0")
|
||||
public int getnThreads() {
|
||||
return nThreads;
|
||||
}
|
||||
|
||||
/**
|
||||
* The input buffer size used by this NanoScheduler
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result > 0")
|
||||
public int getInputBufferSize() {
|
||||
return inputBufferSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells this nanoScheduler to shutdown immediately, releasing all its resources.
|
||||
*
|
||||
* After this call, execute cannot be invoked without throwing an error
|
||||
*/
|
||||
public void shutdown() {
|
||||
outsideSchedulerTimer.stop();
|
||||
|
||||
if ( nThreads > 1 ) {
|
||||
shutdownExecutor("inputExecutor", inputExecutor);
|
||||
shutdownExecutor("mapExecutor", mapExecutor);
|
||||
shutdownExecutor("reduceExecutor", reduceExecutor);
|
||||
}
|
||||
shutdown = true;
|
||||
|
||||
if (TIME_CALLS) {
|
||||
printTimerInfo("Input time", inputTimer);
|
||||
printTimerInfo("Map time", mapTimer);
|
||||
printTimerInfo("Reduce time", reduceTimer);
|
||||
printTimerInfo("Outside time", outsideSchedulerTimer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to cleanly shutdown an execution service, checking that the execution
|
||||
* state is clean when it's done.
|
||||
*
|
||||
* @param name a string name for error messages for the executorService we are shutting down
|
||||
* @param executorService the executorService to shut down
|
||||
*/
|
||||
@Requires({"name != null", "executorService != null"})
|
||||
@Ensures("executorService.isShutdown()")
|
||||
private void shutdownExecutor(final String name, final ExecutorService executorService) {
|
||||
if ( executorService.isShutdown() || executorService.isTerminated() )
|
||||
throw new IllegalStateException("Executor service " + name + " is already shut down!");
|
||||
|
||||
final List<Runnable> remaining = executorService.shutdownNow();
|
||||
if ( ! remaining.isEmpty() )
|
||||
throw new IllegalStateException(remaining.size() + " remaining tasks found in an executor " + name + ", unexpected behavior!");
|
||||
}
|
||||
|
||||
/**
|
||||
* Print to logger.info timing information from timer, with name label
|
||||
*
|
||||
* @param label the name of the timer to display. Should be human readable
|
||||
* @param timer the timer whose elapsed time we will display
|
||||
*/
|
||||
@Requires({"label != null", "timer != null"})
|
||||
private void printTimerInfo(final String label, final SimpleTimer timer) {
|
||||
final double total = inputTimer.getElapsedTime() + mapTimer.getElapsedTime()
|
||||
+ reduceTimer.getElapsedTime() + outsideSchedulerTimer.getElapsedTime();
|
||||
final double myTimeInSec = timer.getElapsedTime();
|
||||
final double myTimePercent = myTimeInSec / total * 100;
|
||||
logger.info(String.format("%s: %s (%5.2f%%)", label, new AutoFormattingTime(myTimeInSec), myTimePercent));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if this nanoScheduler is shutdown, or false if its still open for business
|
||||
*/
|
||||
public boolean isShutdown() {
|
||||
return shutdown;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return are we displaying verbose debugging information about the scheduling?
|
||||
*/
|
||||
public boolean isDebug() {
|
||||
return debug;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to display a String.formatted message if we are doing verbose debugging
|
||||
*
|
||||
* @param format the format argument suitable for String.format
|
||||
* @param args the arguments for String.format
|
||||
*/
|
||||
@Requires("format != null")
|
||||
private void debugPrint(final String format, Object ... args) {
|
||||
if ( isDebug() )
|
||||
logger.info("Thread " + Thread.currentThread().getId() + ":" + String.format(format, args));
|
||||
}
|
||||
|
||||
/**
|
||||
* Turn on/off verbose debugging
|
||||
*
|
||||
* @param debug true if we want verbose debugging
|
||||
*/
|
||||
public void setDebug(boolean debug) {
|
||||
this.debug = debug;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the progress callback function to progressFunction
|
||||
*
|
||||
* The progress callback is invoked after each buffer size elements have been processed by map/reduce
|
||||
*
|
||||
* @param progressFunction a progress function to call, or null if you don't want any progress callback
|
||||
*/
|
||||
public void setProgressFunction(final NSProgressFunction<InputType> progressFunction) {
|
||||
this.progressFunction = progressFunction;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a map/reduce job with this nanoScheduler
|
||||
*
|
||||
* Data comes from inputReader. Will be read until hasNext() == false.
|
||||
* map is called on each element provided by inputReader. No order of operations is guarenteed
|
||||
* reduce is called in order of the input data provided by inputReader on the result of map() applied
|
||||
* to each element.
|
||||
*
|
||||
* Note that the caller thread is put to work with this function call. The call doesn't return
|
||||
* until all elements have been processes.
|
||||
*
|
||||
* It is safe to call this function repeatedly on a single nanoScheduler, at least until the
|
||||
* shutdown method is called.
|
||||
*
|
||||
* Note that this function goes through a single threaded fast path if the number of threads
|
||||
* is 1.
|
||||
*
|
||||
* @param inputReader an iterator providing us with the input data to nanoSchedule map/reduce over
|
||||
* @param map the map function from input type -> map type, will be applied in parallel to each input
|
||||
* @param reduce the reduce function from map type + reduce type -> reduce type to be applied in order to map results
|
||||
* @return the last reduce value
|
||||
*/
|
||||
public ReduceType execute(final Iterator<InputType> inputReader,
|
||||
final NSMapFunction<InputType, MapType> map,
|
||||
final ReduceType initialValue,
|
||||
final NSReduceFunction<MapType, ReduceType> reduce) {
|
||||
if ( isShutdown() ) throw new IllegalStateException("execute called on already shutdown NanoScheduler");
|
||||
if ( inputReader == null ) throw new IllegalArgumentException("inputReader cannot be null");
|
||||
if ( map == null ) throw new IllegalArgumentException("map function cannot be null");
|
||||
if ( reduce == null ) throw new IllegalArgumentException("reduce function cannot be null");
|
||||
|
||||
outsideSchedulerTimer.stop();
|
||||
|
||||
ReduceType result;
|
||||
if ( ALLOW_SINGLE_THREAD_FASTPATH && getnThreads() == 1 ) {
|
||||
result = executeSingleThreaded(inputReader, map, initialValue, reduce);
|
||||
} else {
|
||||
result = executeMultiThreaded(inputReader, map, initialValue, reduce);
|
||||
}
|
||||
|
||||
outsideSchedulerTimer.restart();
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple efficient reference implementation for single threaded execution.
|
||||
*
|
||||
* @return the reduce result of this map/reduce job
|
||||
*/
|
||||
@Requires({"inputReader != null", "map != null", "reduce != null"})
|
||||
private ReduceType executeSingleThreaded(final Iterator<InputType> inputReader,
|
||||
final NSMapFunction<InputType, MapType> map,
|
||||
final ReduceType initialValue,
|
||||
final NSReduceFunction<MapType, ReduceType> reduce) {
|
||||
ReduceType sum = initialValue;
|
||||
int i = 0;
|
||||
|
||||
// start timer to ensure that both hasNext and next are caught by the timer
|
||||
if ( TIME_CALLS ) inputTimer.restart();
|
||||
while ( inputReader.hasNext() ) {
|
||||
final InputType input = inputReader.next();
|
||||
if ( TIME_CALLS ) inputTimer.stop();
|
||||
|
||||
// map
|
||||
if ( TIME_CALLS ) mapTimer.restart();
|
||||
final long preMapTime = LOG_MAP_TIMES ? 0 : mapTimer.currentTimeNano();
|
||||
final MapType mapValue = map.apply(input);
|
||||
if ( LOG_MAP_TIMES ) logger.info("MAP TIME " + (mapTimer.currentTimeNano() - preMapTime));
|
||||
if ( TIME_CALLS ) mapTimer.stop();
|
||||
|
||||
if ( i++ % inputBufferSize == 0 && progressFunction != null )
|
||||
progressFunction.progress(input);
|
||||
|
||||
// reduce
|
||||
if ( TIME_CALLS ) reduceTimer.restart();
|
||||
sum = reduce.apply(mapValue, sum);
|
||||
if ( TIME_CALLS ) reduceTimer.stop();
|
||||
|
||||
if ( TIME_CALLS ) inputTimer.restart();
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Efficient parallel version of Map/Reduce
|
||||
*
|
||||
* @return the reduce result of this map/reduce job
|
||||
*/
|
||||
@Requires({"inputReader != null", "map != null", "reduce != null"})
|
||||
private ReduceType executeMultiThreaded(final Iterator<InputType> inputReader,
|
||||
final NSMapFunction<InputType, MapType> map,
|
||||
final ReduceType initialValue,
|
||||
final NSReduceFunction<MapType, ReduceType> reduce) {
|
||||
debugPrint("Executing nanoScheduler");
|
||||
|
||||
// a blocking queue that limits the number of input datum to the requested buffer size
|
||||
final BlockingQueue<InputProducer<InputType>.InputValue> inputQueue
|
||||
= new LinkedBlockingDeque<InputProducer<InputType>.InputValue>(inputBufferSize);
|
||||
|
||||
// a priority queue that stores up to mapBufferSize elements
|
||||
// produced by completed map jobs.
|
||||
final BlockingQueue<Future<MapResult<MapType>>> mapResultQueue =
|
||||
new LinkedBlockingDeque<Future<MapResult<MapType>>>(mapBufferSize);
|
||||
|
||||
// Start running the input reader thread
|
||||
inputExecutor.submit(new InputProducer<InputType>(inputReader, inputTimer, inputQueue));
|
||||
|
||||
// Start running the reducer thread
|
||||
final ReducerThread<MapType, ReduceType> reducer
|
||||
= new ReducerThread<MapType, ReduceType>(reduce, reduceTimer, initialValue, mapResultQueue);
|
||||
final Future<ReduceType> reduceResult = reduceExecutor.submit(reducer);
|
||||
|
||||
try {
|
||||
int numJobs = 0;
|
||||
|
||||
while ( true ) {
|
||||
// block on input
|
||||
final InputProducer<InputType>.InputValue inputEnqueueWrapped = inputQueue.take();
|
||||
|
||||
if ( ! inputEnqueueWrapped.isLast() ) {
|
||||
// get the object itself
|
||||
final InputType input = inputEnqueueWrapped.getValue();
|
||||
|
||||
// the next map call has jobID + 1
|
||||
numJobs++;
|
||||
|
||||
// send job for map via the completion service
|
||||
final CallableMap doMap = new CallableMap(map, numJobs, input);
|
||||
final Future<MapResult<MapType>> mapJob = mapExecutor.submit(doMap);
|
||||
mapResultQueue.put(mapJob);
|
||||
|
||||
debugPrint(" Done with cycle of map/reduce");
|
||||
|
||||
if ( numJobs % inputBufferSize == 0 && progressFunction != null )
|
||||
progressFunction.progress(input);
|
||||
} else {
|
||||
mapResultQueue.put(new FutureValue<MapResult<MapType>>(new MapResult<MapType>()));
|
||||
return reduceResult.get(); // wait for our result of reduce
|
||||
}
|
||||
}
|
||||
} catch (InterruptedException ex) {
|
||||
throw new ReviewedStingException("got execution exception", ex);
|
||||
} catch (ExecutionException ex) {
|
||||
throw new ReviewedStingException("got execution exception", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple callable version of the map function for use with the executor pool
|
||||
*/
|
||||
private class CallableMap implements Callable<MapResult<MapType>> {
|
||||
final int id;
|
||||
final InputType input;
|
||||
final NSMapFunction<InputType, MapType> map;
|
||||
|
||||
@Requires({"map != null"})
|
||||
private CallableMap(final NSMapFunction<InputType, MapType> map,
|
||||
final int id,
|
||||
final InputType input) {
|
||||
this.id = id;
|
||||
this.input = input;
|
||||
this.map = map;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MapResult<MapType> call() {
|
||||
if ( TIME_CALLS ) mapTimer.restart();
|
||||
if ( debug ) debugPrint("\t\tmap " + input);
|
||||
final MapType result = map.apply(input);
|
||||
if ( TIME_CALLS ) mapTimer.stop();
|
||||
return new MapResult<MapType>(result, id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.nanoScheduler;
|
||||
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.Future;
|
||||
|
||||
/**
|
||||
* Thread that runs the reduce of the map/reduce.
|
||||
*
|
||||
* This thread reads from mapResultsQueue until the poison EOF object arrives. At each
|
||||
* stage is calls reduce(value, sum). The blocking mapResultQueue ensures that the
|
||||
* queue waits until the mapResultQueue has a value to take. Then, it gets and waits
|
||||
* until the map result Future has a value.
|
||||
*/
|
||||
class ReducerThread<MapType, ReduceType> implements Callable<ReduceType> {
|
||||
final NSReduceFunction<MapType, ReduceType> reduce;
|
||||
final SimpleTimer reduceTimer;
|
||||
final BlockingQueue<Future<MapResult<MapType>>> mapResultQueue;
|
||||
|
||||
ReduceType sum;
|
||||
int lastJobID = -1;
|
||||
|
||||
public ReducerThread(final NSReduceFunction<MapType, ReduceType> reduce,
|
||||
final SimpleTimer reduceTimer,
|
||||
final ReduceType sum,
|
||||
final BlockingQueue<Future<MapResult<MapType>>> mapResultQueue) {
|
||||
if ( reduce == null ) throw new IllegalArgumentException("Reduce function cannot be null");
|
||||
if ( mapResultQueue == null ) throw new IllegalArgumentException("mapResultQueue cannot be null");
|
||||
|
||||
this.reduce = reduce;
|
||||
this.reduceTimer = reduceTimer;
|
||||
this.sum = sum;
|
||||
this.mapResultQueue = mapResultQueue;
|
||||
}
|
||||
|
||||
public ReduceType call() {
|
||||
try {
|
||||
while ( true ) {
|
||||
final MapResult<MapType> result = mapResultQueue.take().get();
|
||||
if ( result.isLast() ) {
|
||||
// we are done, just return sum
|
||||
return sum;
|
||||
}
|
||||
else if ( result.getJobID() < lastJobID ) {
|
||||
// make sure the map results are coming in order
|
||||
throw new IllegalStateException("BUG: last jobID " + lastJobID + " > current jobID " + result.getJobID());
|
||||
} else {
|
||||
lastJobID = result.getJobID();
|
||||
// apply reduce, keeping track of sum
|
||||
if ( reduceTimer != null ) reduceTimer.restart();
|
||||
sum = reduce.apply(result.getValue(), sum);
|
||||
if ( reduceTimer != null ) reduceTimer.stop();
|
||||
}
|
||||
}
|
||||
} catch (ExecutionException ex) {
|
||||
throw new ReviewedStingException("got execution exception", ex);
|
||||
} catch (InterruptedException ex) {
|
||||
throw new ReviewedStingException("got execution exception", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 14, 2009
|
||||
* Time: 1:51:22 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Allows the walker to indicate what type of data it wants to consume.
|
||||
*/
|
||||
|
||||
@Documented
|
||||
@Inherited
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface BQSRMode {
|
||||
public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT;
|
||||
}
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* A ReadTransformer that applies BQSR on the fly to reads
|
||||
*
|
||||
* User: rpoplin
|
||||
* Date: 2/13/12
|
||||
*/
|
||||
public class BQSRReadTransformer extends ReadTransformer {
|
||||
private boolean enabled;
|
||||
private BaseRecalibration bqsr;
|
||||
|
||||
@Override
|
||||
public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) {
|
||||
this.enabled = engine.hasBaseRecalibration();
|
||||
this.bqsr = engine.getBaseRecalibration();
|
||||
final BQSRMode mode = WalkerManager.getWalkerAnnotation(walker, BQSRMode.class);
|
||||
return mode.ApplicationTime();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean enabled() {
|
||||
return enabled;
|
||||
}
|
||||
|
||||
/**
|
||||
* initialize a new BQSRReadTransformer that applies BQSR on the fly to incoming reads.
|
||||
*/
|
||||
@Override
|
||||
public GATKSAMRecord apply(GATKSAMRecord read) {
|
||||
bqsr.recalibrateRead(read);
|
||||
return read;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,86 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.picard.sam.MergingSamRecordIterator;
|
||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Simple wrapper class that multiplexes multiple ArtificialSingleSampleReadStreams into a single stream of reads
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class ArtificialMultiSampleReadStream implements Iterable<SAMRecord> {
|
||||
|
||||
private Collection<ArtificialSingleSampleReadStream> perSampleArtificialReadStreams;
|
||||
private MergingSamRecordIterator mergingIterator;
|
||||
|
||||
public ArtificialMultiSampleReadStream( Collection<ArtificialSingleSampleReadStream> perSampleArtificialReadStreams ) {
|
||||
if ( perSampleArtificialReadStreams == null || perSampleArtificialReadStreams.isEmpty() ) {
|
||||
throw new ReviewedStingException("Can't create an ArtificialMultiSampleReadStream out of 0 ArtificialSingleSampleReadStreams");
|
||||
}
|
||||
|
||||
this.perSampleArtificialReadStreams = perSampleArtificialReadStreams;
|
||||
}
|
||||
|
||||
public Iterator<SAMRecord> iterator() {
|
||||
// lazy initialization to prevent reads from being created until they're needed
|
||||
initialize();
|
||||
|
||||
return mergingIterator;
|
||||
}
|
||||
|
||||
public StingSAMIterator getStingSAMIterator() {
|
||||
// lazy initialization to prevent reads from being created until they're needed
|
||||
initialize();
|
||||
|
||||
return StingSAMIteratorAdapter.adapt(mergingIterator);
|
||||
}
|
||||
|
||||
private void initialize() {
|
||||
Collection<SAMFileReader> perSampleSAMReaders = new ArrayList<SAMFileReader>(perSampleArtificialReadStreams.size());
|
||||
Collection<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(perSampleArtificialReadStreams.size());
|
||||
|
||||
for ( ArtificialSingleSampleReadStream readStream : perSampleArtificialReadStreams ) {
|
||||
Collection<SAMRecord> thisStreamReads = readStream.makeReads();
|
||||
|
||||
SAMFileReader reader = new ArtificialSAMFileReader(readStream.getHeader(),
|
||||
thisStreamReads.toArray(new SAMRecord[thisStreamReads.size()]));
|
||||
perSampleSAMReaders.add(reader);
|
||||
headers.add(reader.getFileHeader());
|
||||
}
|
||||
|
||||
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, headers, true);
|
||||
mergingIterator = new MergingSamRecordIterator(headerMerger, perSampleSAMReaders, true);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,212 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* An artificial stream of reads from a single read group/sample with configurable characteristics
|
||||
* such as:
|
||||
*
|
||||
* -the number of contigs that the reads should be distributed across
|
||||
* -number of "stacks" of reads sharing the same alignment start position per contig
|
||||
* -the min/max number of reads in each stack (exact values chosen randomly from this range)
|
||||
* -the min/max distance between stack start positions (exact values chosen randomly from this range)
|
||||
* -the min/max length of each read (exact values chosen randomly from this range)
|
||||
* -the number of unmapped reads
|
||||
*
|
||||
* The cigar string for all reads will be *M, where * is the length of the read.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class ArtificialSingleSampleReadStream implements Iterable<SAMRecord> {
|
||||
private SAMFileHeader header;
|
||||
private String readGroupID;
|
||||
private int numContigs;
|
||||
private int numStacksPerContig;
|
||||
private int minReadsPerStack;
|
||||
private int maxReadsPerStack;
|
||||
private int minDistanceBetweenStacks;
|
||||
private int maxDistanceBetweenStacks;
|
||||
private int minReadLength;
|
||||
private int maxReadLength;
|
||||
private int numUnmappedReads;
|
||||
|
||||
private static final String READ_GROUP_TAG = "RG";
|
||||
|
||||
public ArtificialSingleSampleReadStream( SAMFileHeader header,
|
||||
String readGroupID,
|
||||
int numContigs,
|
||||
int numStacksPerContig,
|
||||
int minReadsPerStack,
|
||||
int maxReadsPerStack,
|
||||
int minDistanceBetweenStacks,
|
||||
int maxDistanceBetweenStacks,
|
||||
int minReadLength,
|
||||
int maxReadLength,
|
||||
int numUnmappedReads ) {
|
||||
this.header = header;
|
||||
this.readGroupID = readGroupID;
|
||||
this.numContigs = numContigs;
|
||||
this.numStacksPerContig = numStacksPerContig;
|
||||
this.minReadsPerStack = minReadsPerStack;
|
||||
this.maxReadsPerStack = maxReadsPerStack;
|
||||
this.minDistanceBetweenStacks = minDistanceBetweenStacks;
|
||||
this.maxDistanceBetweenStacks = maxDistanceBetweenStacks;
|
||||
this.minReadLength = minReadLength;
|
||||
this.maxReadLength = maxReadLength;
|
||||
this.numUnmappedReads = numUnmappedReads;
|
||||
|
||||
validateStreamParameters();
|
||||
}
|
||||
|
||||
private void validateStreamParameters() {
|
||||
if ( header == null || readGroupID == null ) {
|
||||
throw new ReviewedStingException("null SAMFileHeader or read group ID") ;
|
||||
}
|
||||
|
||||
if ( header.getReadGroup(readGroupID) == null ) {
|
||||
throw new ReviewedStingException(String.format("Read group %s not found in SAMFileHeader", readGroupID));
|
||||
}
|
||||
|
||||
if ( numContigs < 0 || numStacksPerContig < 0 || minReadsPerStack < 0 || maxReadsPerStack < 0 ||
|
||||
minDistanceBetweenStacks < 0 || maxDistanceBetweenStacks < 0 || minReadLength < 0 || maxReadLength < 0 ||
|
||||
numUnmappedReads < 0 ) {
|
||||
throw new ReviewedStingException("Read stream parameters must be >= 0");
|
||||
}
|
||||
|
||||
if ( (numContigs == 0 && numStacksPerContig != 0) || (numContigs != 0 && numStacksPerContig == 0) ) {
|
||||
throw new ReviewedStingException("numContigs and numStacksPerContig must either both be > 0, or both be 0");
|
||||
}
|
||||
|
||||
if ( minReadsPerStack > maxReadsPerStack ) {
|
||||
throw new ReviewedStingException("minReadsPerStack > maxReadsPerStack");
|
||||
}
|
||||
|
||||
if ( minDistanceBetweenStacks > maxDistanceBetweenStacks ) {
|
||||
throw new ReviewedStingException("minDistanceBetweenStacks > maxDistanceBetweenStacks");
|
||||
}
|
||||
|
||||
if ( minReadLength > maxReadLength ) {
|
||||
throw new ReviewedStingException("minReadLength > maxReadLength");
|
||||
}
|
||||
}
|
||||
|
||||
public Iterator<SAMRecord> iterator() {
|
||||
return makeReads().iterator();
|
||||
}
|
||||
|
||||
public StingSAMIterator getStingSAMIterator() {
|
||||
return StingSAMIteratorAdapter.adapt(iterator());
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> makeReads() {
|
||||
Collection<SAMRecord> reads = new ArrayList<SAMRecord>(numContigs * numStacksPerContig * maxReadsPerStack);
|
||||
|
||||
for ( int contig = 0; contig < numContigs; contig++ ) {
|
||||
int alignmentStart = 1;
|
||||
|
||||
for ( int stack = 0; stack < numStacksPerContig; stack++ ) {
|
||||
reads.addAll(makeReadStack(contig, alignmentStart, MathUtils.randomIntegerInRange(minReadsPerStack, maxReadsPerStack)));
|
||||
alignmentStart += MathUtils.randomIntegerInRange(minDistanceBetweenStacks, maxDistanceBetweenStacks);
|
||||
}
|
||||
}
|
||||
|
||||
if ( numUnmappedReads > 0 ) {
|
||||
reads.addAll(makeReadStack(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, SAMRecord.NO_ALIGNMENT_START, numUnmappedReads));
|
||||
}
|
||||
|
||||
return reads;
|
||||
}
|
||||
|
||||
private Collection<SAMRecord> makeReadStack( int contig, int alignmentStart, int stackSize ) {
|
||||
Collection<SAMRecord> readStack = new ArrayList<SAMRecord>(stackSize);
|
||||
|
||||
for ( int i = 0; i < stackSize; i++ ) {
|
||||
SAMRecord read = ArtificialSAMUtils.createArtificialRead(header,
|
||||
"foo",
|
||||
contig,
|
||||
alignmentStart,
|
||||
MathUtils.randomIntegerInRange(minReadLength, maxReadLength));
|
||||
read.setAttribute(READ_GROUP_TAG, readGroupID);
|
||||
readStack.add(read);
|
||||
}
|
||||
|
||||
return readStack;
|
||||
}
|
||||
|
||||
public SAMFileHeader getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
public String getReadGroupID() {
|
||||
return readGroupID;
|
||||
}
|
||||
|
||||
public int getNumContigs() {
|
||||
return numContigs;
|
||||
}
|
||||
|
||||
public int getNumStacksPerContig() {
|
||||
return numStacksPerContig;
|
||||
}
|
||||
|
||||
public int getMinReadsPerStack() {
|
||||
return minReadsPerStack;
|
||||
}
|
||||
|
||||
public int getMaxReadsPerStack() {
|
||||
return maxReadsPerStack;
|
||||
}
|
||||
|
||||
public int getMinDistanceBetweenStacks() {
|
||||
return minDistanceBetweenStacks;
|
||||
}
|
||||
|
||||
public int getMaxDistanceBetweenStacks() {
|
||||
return maxDistanceBetweenStacks;
|
||||
}
|
||||
|
||||
public int getMinReadLength() {
|
||||
return minReadLength;
|
||||
}
|
||||
|
||||
public int getMaxReadLength() {
|
||||
return maxReadLength;
|
||||
}
|
||||
|
||||
public int getNumUnmappedReads() {
|
||||
return numUnmappedReads;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,281 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A class for analyzing and validating the read stream produced by an ArtificialSingleSampleReadStream.
|
||||
*
|
||||
* Collects various statistics about the stream of reads it's fed, and validates the stream
|
||||
* by checking whether the collected statistics match the nominal properties of the stream.
|
||||
*
|
||||
* Subclasses are expected to override the validate() method in order to check whether an artificial
|
||||
* read stream has been *transformed* in some way (eg., by downsampling or some other process), rather
|
||||
* than merely checking whether the stream matches its original properties.
|
||||
*
|
||||
* Usage is simple:
|
||||
*
|
||||
* ArtificialSingleSampleReadStreamAnalyzer analyzer = new ArtificialSingleSampleReadStreamAnalyzer(originalStream);
|
||||
* analyzer.analyze(originalOrTransformedStream);
|
||||
* analyzer.validate(); // override this method if you want to check whether the stream has been transformed
|
||||
* // in a certain way relative to the original stream
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class ArtificialSingleSampleReadStreamAnalyzer {
|
||||
protected ArtificialSingleSampleReadStream originalStream;
|
||||
protected SAMRecord lastRead;
|
||||
protected int totalReads;
|
||||
protected boolean allSamplesMatch;
|
||||
protected int numContigs;
|
||||
protected List<Integer> stacksPerContig;
|
||||
protected Integer minReadsPerStack;
|
||||
protected Integer maxReadsPerStack;
|
||||
protected Integer minDistanceBetweenStacks;
|
||||
protected Integer maxDistanceBetweenStacks;
|
||||
protected Integer minReadLength;
|
||||
protected Integer maxReadLength;
|
||||
protected int numUnmappedReads;
|
||||
|
||||
protected int currentContigNumStacks;
|
||||
protected int currentStackNumReads;
|
||||
|
||||
/**
|
||||
* Construct a new read stream analyzer, providing an ArtificialSingleSampleReadStream that will
|
||||
* serve as the basis for comparison after the analysis is complete.
|
||||
*
|
||||
* @param originalStream the original ArtificialSingleSampleReadStream upon which the stream
|
||||
* that will be fed to the analyzer is based
|
||||
*/
|
||||
public ArtificialSingleSampleReadStreamAnalyzer( ArtificialSingleSampleReadStream originalStream ) {
|
||||
this.originalStream = originalStream;
|
||||
reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset all read stream statistics collected by this analyzer to prepare for a fresh run
|
||||
*/
|
||||
public void reset() {
|
||||
lastRead = null;
|
||||
totalReads = 0;
|
||||
allSamplesMatch = true;
|
||||
numContigs = 0;
|
||||
stacksPerContig = new ArrayList<Integer>();
|
||||
minReadsPerStack = null;
|
||||
maxReadsPerStack = null;
|
||||
minDistanceBetweenStacks = null;
|
||||
maxDistanceBetweenStacks = null;
|
||||
minReadLength = null;
|
||||
maxReadLength = null;
|
||||
numUnmappedReads = 0;
|
||||
currentContigNumStacks = 0;
|
||||
currentStackNumReads = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect statistics on the stream of reads passed in
|
||||
*
|
||||
* @param stream the stream of reads to analyze
|
||||
*/
|
||||
public void analyze( Iterable<SAMRecord> stream ) {
|
||||
for ( SAMRecord read : stream ) {
|
||||
update(read);
|
||||
}
|
||||
finalizeStats();
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate the stream by checking whether our collected statistics match the properties of the
|
||||
* original stream. Throws a ReviewedStingException if the stream is invalid.
|
||||
*
|
||||
* Override this method if you want to check whether the stream has been transformed in some
|
||||
* way relative to the original stream.
|
||||
*/
|
||||
public void validate() {
|
||||
if ( (originalStream.getNumContigs() == 0 || originalStream.getNumStacksPerContig() == 0) && originalStream.getNumUnmappedReads() == 0 ) {
|
||||
if ( totalReads != 0 ) {
|
||||
throw new ReviewedStingException("got reads from the stream, but the stream was configured to have 0 reads");
|
||||
}
|
||||
return; // no further validation needed for the 0-reads case
|
||||
}
|
||||
else if ( totalReads == 0 ) {
|
||||
throw new ReviewedStingException("got no reads from the stream, but the stream was configured to have > 0 reads");
|
||||
}
|
||||
|
||||
if ( ! allSamplesMatch ) {
|
||||
throw new ReviewedStingException("some reads had the wrong sample");
|
||||
}
|
||||
|
||||
if ( numContigs != originalStream.getNumContigs() ) {
|
||||
throw new ReviewedStingException("number of contigs not correct");
|
||||
}
|
||||
|
||||
if ( stacksPerContig.size() != originalStream.getNumContigs() ) {
|
||||
throw new ReviewedStingException(String.format("bug in analyzer code: calculated sizes for %d contigs even though there were only %d contigs",
|
||||
stacksPerContig.size(), originalStream.getNumContigs()));
|
||||
}
|
||||
|
||||
for ( int contigStackCount : stacksPerContig ) {
|
||||
if ( contigStackCount != originalStream.getNumStacksPerContig() ) {
|
||||
throw new ReviewedStingException("contig had incorrect number of stacks");
|
||||
}
|
||||
}
|
||||
|
||||
if ( originalStream.getNumStacksPerContig() > 0 ) {
|
||||
if ( minReadsPerStack < originalStream.getMinReadsPerStack() ) {
|
||||
throw new ReviewedStingException("stack had fewer than the minimum number of reads");
|
||||
}
|
||||
if ( maxReadsPerStack > originalStream.getMaxReadsPerStack() ) {
|
||||
throw new ReviewedStingException("stack had more than the maximum number of reads");
|
||||
}
|
||||
}
|
||||
else if ( minReadsPerStack != null || maxReadsPerStack != null ) {
|
||||
throw new ReviewedStingException("bug in analyzer code: reads per stack was calculated even though 0 stacks per contig was specified");
|
||||
}
|
||||
|
||||
if ( originalStream.getNumStacksPerContig() > 1 ) {
|
||||
if ( minDistanceBetweenStacks < originalStream.getMinDistanceBetweenStacks() ) {
|
||||
throw new ReviewedStingException("stacks were separated by less than the minimum distance");
|
||||
}
|
||||
if ( maxDistanceBetweenStacks > originalStream.getMaxDistanceBetweenStacks() ) {
|
||||
throw new ReviewedStingException("stacks were separated by more than the maximum distance");
|
||||
}
|
||||
}
|
||||
else if ( minDistanceBetweenStacks != null || maxDistanceBetweenStacks != null ) {
|
||||
throw new ReviewedStingException("bug in analyzer code: distance between stacks was calculated even though numStacksPerContig was <= 1");
|
||||
}
|
||||
|
||||
if ( minReadLength < originalStream.getMinReadLength() ) {
|
||||
throw new ReviewedStingException("read was shorter than the minimum allowed length");
|
||||
}
|
||||
if ( maxReadLength > originalStream.getMaxReadLength() ) {
|
||||
throw new ReviewedStingException("read was longer than the maximum allowed length");
|
||||
}
|
||||
|
||||
if ( numUnmappedReads != originalStream.getNumUnmappedReads() ) {
|
||||
throw new ReviewedStingException(String.format("wrong number of unmapped reads: requested %d but saw %d",
|
||||
originalStream.getNumUnmappedReads(), numUnmappedReads));
|
||||
}
|
||||
|
||||
if ( (originalStream.getNumContigs() == 0 || originalStream.getNumStacksPerContig() == 0) &&
|
||||
numUnmappedReads != totalReads ) {
|
||||
throw new ReviewedStingException("stream should have consisted only of unmapped reads, but saw some mapped reads");
|
||||
}
|
||||
}
|
||||
|
||||
public void update( SAMRecord read ) {
|
||||
if ( read.getReadUnmappedFlag() ) {
|
||||
numUnmappedReads++;
|
||||
|
||||
if ( numUnmappedReads == 1 && lastRead != null ) {
|
||||
processContigChange();
|
||||
numContigs--;
|
||||
}
|
||||
}
|
||||
else if ( lastRead == null ) {
|
||||
numContigs = 1;
|
||||
currentContigNumStacks = 1;
|
||||
currentStackNumReads = 1;
|
||||
}
|
||||
else if ( ! read.getReferenceIndex().equals(lastRead.getReferenceIndex()) ) {
|
||||
processContigChange();
|
||||
}
|
||||
else if ( read.getAlignmentStart() != lastRead.getAlignmentStart() ) {
|
||||
processStackChangeWithinContig(read);
|
||||
}
|
||||
else {
|
||||
currentStackNumReads++;
|
||||
}
|
||||
|
||||
updateReadLength(read.getReadLength());
|
||||
allSamplesMatch = allSamplesMatch && readHasCorrectSample(read);
|
||||
totalReads++;
|
||||
|
||||
lastRead = read;
|
||||
}
|
||||
|
||||
|
||||
private void processContigChange() {
|
||||
numContigs++;
|
||||
|
||||
stacksPerContig.add(currentContigNumStacks);
|
||||
currentContigNumStacks = 1;
|
||||
|
||||
updateReadsPerStack(currentStackNumReads);
|
||||
currentStackNumReads = 1;
|
||||
}
|
||||
|
||||
private void processStackChangeWithinContig( SAMRecord read ) {
|
||||
currentContigNumStacks++;
|
||||
|
||||
updateReadsPerStack(currentStackNumReads);
|
||||
currentStackNumReads = 1;
|
||||
|
||||
updateDistanceBetweenStacks(read.getAlignmentStart() - lastRead.getAlignmentStart());
|
||||
}
|
||||
|
||||
private void updateReadsPerStack( int stackReadCount ) {
|
||||
if ( minReadsPerStack == null || stackReadCount < minReadsPerStack ) {
|
||||
minReadsPerStack = stackReadCount;
|
||||
}
|
||||
if ( maxReadsPerStack == null || stackReadCount > maxReadsPerStack ) {
|
||||
maxReadsPerStack = stackReadCount;
|
||||
}
|
||||
}
|
||||
|
||||
private void updateDistanceBetweenStacks( int stackDistance ) {
|
||||
if ( minDistanceBetweenStacks == null || stackDistance < minDistanceBetweenStacks ) {
|
||||
minDistanceBetweenStacks = stackDistance;
|
||||
}
|
||||
if ( maxDistanceBetweenStacks == null || stackDistance > maxDistanceBetweenStacks ) {
|
||||
maxDistanceBetweenStacks = stackDistance;
|
||||
}
|
||||
}
|
||||
|
||||
private void updateReadLength( int readLength ) {
|
||||
if ( minReadLength == null || readLength < minReadLength ) {
|
||||
minReadLength = readLength;
|
||||
}
|
||||
if ( maxReadLength == null || readLength > maxReadLength ) {
|
||||
maxReadLength = readLength;
|
||||
}
|
||||
}
|
||||
|
||||
private boolean readHasCorrectSample( SAMRecord read ) {
|
||||
return originalStream.getReadGroupID().equals(read.getAttribute("RG"));
|
||||
}
|
||||
|
||||
public void finalizeStats() {
|
||||
if ( lastRead != null && ! lastRead.getReadUnmappedFlag() ) {
|
||||
stacksPerContig.add(currentContigNumStacks);
|
||||
updateReadsPerStack(currentStackNumReads);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,158 +0,0 @@
|
|||
/*
|
||||
* The MIT License
|
||||
*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.utils.threading;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.log4j.Priority;
|
||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
||||
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.lang.management.ThreadInfo;
|
||||
import java.lang.management.ThreadMXBean;
|
||||
import java.util.ArrayList;
|
||||
import java.util.EnumMap;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Creates threads that automatically monitor their efficiency via the parent ThreadEfficiencyMonitor
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/14/12
|
||||
* Time: 8:47 AM
|
||||
*/
|
||||
@Invariant({
|
||||
"activeThreads.size() <= nThreadsToCreate",
|
||||
"countDownLatch.getCount() <= nThreadsToCreate",
|
||||
"nThreadsCreated <= nThreadsToCreate"
|
||||
})
|
||||
public class EfficiencyMonitoringThreadFactory extends ThreadEfficiencyMonitor implements ThreadFactory {
|
||||
final int nThreadsToCreate;
|
||||
final List<Thread> activeThreads;
|
||||
|
||||
int nThreadsCreated = 0;
|
||||
|
||||
/**
|
||||
* Counts down the number of active activeThreads whose runtime info hasn't been incorporated into
|
||||
* times. Counts down from nThreadsToCreate to 0, at which point any code waiting
|
||||
* on the final times is freed to run.
|
||||
*/
|
||||
final CountDownLatch countDownLatch;
|
||||
|
||||
/**
|
||||
* Create a new factory generating threads whose runtime and contention
|
||||
* behavior is tracked in this factory.
|
||||
*
|
||||
* @param nThreadsToCreate the number of threads we will create in the factory before it's considered complete
|
||||
*/
|
||||
public EfficiencyMonitoringThreadFactory(final int nThreadsToCreate) {
|
||||
super();
|
||||
if ( nThreadsToCreate <= 0 ) throw new IllegalArgumentException("nThreadsToCreate <= 0: " + nThreadsToCreate);
|
||||
|
||||
this.nThreadsToCreate = nThreadsToCreate;
|
||||
activeThreads = new ArrayList<Thread>(nThreadsToCreate);
|
||||
countDownLatch = new CountDownLatch(nThreadsToCreate);
|
||||
}
|
||||
|
||||
/**
|
||||
* How many threads have been created by this factory so far?
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result >= 0")
|
||||
public int getNThreadsCreated() {
|
||||
return nThreadsCreated;
|
||||
}
|
||||
|
||||
/**
|
||||
* Only useful for testing, so that we can wait for all of the threads in the factory to complete running
|
||||
*
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
protected void waitForAllThreadsToComplete() throws InterruptedException {
|
||||
countDownLatch.await();
|
||||
}
|
||||
|
||||
@Ensures({
|
||||
"activeThreads.size() <= old(activeThreads.size())",
|
||||
"! activeThreads.contains(thread)",
|
||||
"countDownLatch.getCount() <= old(countDownLatch.getCount())"
|
||||
})
|
||||
@Override
|
||||
public synchronized void threadIsDone(final Thread thread) {
|
||||
nThreadsAnalyzed++;
|
||||
|
||||
if ( DEBUG ) logger.warn(" Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName());
|
||||
|
||||
super.threadIsDone(thread);
|
||||
|
||||
// remove the thread from the list of active activeThreads, if it's in there, and decrement the countdown latch
|
||||
if ( activeThreads.remove(thread) ) {
|
||||
// one less thread is live for those blocking on all activeThreads to be complete
|
||||
countDownLatch.countDown();
|
||||
if ( DEBUG ) logger.warn(" -> Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new thread from this factory
|
||||
*
|
||||
* @param runnable
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
@Ensures({
|
||||
"activeThreads.size() > old(activeThreads.size())",
|
||||
"activeThreads.contains(result)",
|
||||
"nThreadsCreated == old(nThreadsCreated) + 1"
|
||||
})
|
||||
public synchronized Thread newThread(final Runnable runnable) {
|
||||
if ( activeThreads.size() >= nThreadsToCreate)
|
||||
throw new IllegalStateException("Attempting to create more activeThreads than allowed by constructor argument nThreadsToCreate " + nThreadsToCreate);
|
||||
|
||||
nThreadsCreated++;
|
||||
final Thread myThread = new TrackingThread(runnable);
|
||||
activeThreads.add(myThread);
|
||||
return myThread;
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper around Thread that tracks the runtime of the thread and calls threadIsDone() when complete
|
||||
*/
|
||||
private class TrackingThread extends Thread {
|
||||
private TrackingThread(Runnable runnable) {
|
||||
super(runnable);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
super.run();
|
||||
threadIsDone(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.threading;
|
||||
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
|
||||
/**
|
||||
* Thread factor that produces threads with a given name pattern
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 9/5/12
|
||||
* Time: 9:22 PM
|
||||
*
|
||||
*/
|
||||
public class NamedThreadFactory implements ThreadFactory {
|
||||
static int id = 0;
|
||||
final String format;
|
||||
|
||||
public NamedThreadFactory(String format) {
|
||||
this.format = format;
|
||||
String.format(format, id); // test the name
|
||||
}
|
||||
|
||||
@Override
|
||||
public Thread newThread(Runnable r) {
|
||||
return new Thread(r, String.format(format, id++));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,207 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.threading;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.log4j.Priority;
|
||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
||||
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.lang.management.ThreadInfo;
|
||||
import java.lang.management.ThreadMXBean;
|
||||
import java.util.EnumMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Uses an MXBean to monitor thread efficiency
|
||||
*
|
||||
* Once the monitor is created, calls to threadIsDone() can be used to add information
|
||||
* about the efficiency of the provided thread to this monitor.
|
||||
*
|
||||
* Provides simple print() for displaying efficiency information to a logger
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/22/12
|
||||
* Time: 10:48 AM
|
||||
*/
|
||||
@Invariant({"nThreadsAnalyzed >= 0"})
|
||||
public class ThreadEfficiencyMonitor {
|
||||
protected static final boolean DEBUG = false;
|
||||
protected static Logger logger = Logger.getLogger(EfficiencyMonitoringThreadFactory.class);
|
||||
final EnumMap<State, Long> times = new EnumMap<State, Long>(State.class);
|
||||
|
||||
/**
|
||||
* The number of threads we've included in our efficiency monitoring
|
||||
*/
|
||||
int nThreadsAnalyzed = 0;
|
||||
|
||||
/**
|
||||
* The bean used to get the thread info about blocked and waiting times
|
||||
*/
|
||||
final ThreadMXBean bean;
|
||||
|
||||
public ThreadEfficiencyMonitor() {
|
||||
bean = ManagementFactory.getThreadMXBean();
|
||||
|
||||
// get the bean, and start tracking
|
||||
if ( bean.isThreadContentionMonitoringSupported() )
|
||||
bean.setThreadContentionMonitoringEnabled(true);
|
||||
else
|
||||
logger.warn("Thread contention monitoring not supported, we cannot track GATK multi-threaded efficiency");
|
||||
//bean.setThreadCpuTimeEnabled(true);
|
||||
|
||||
if ( bean.isThreadCpuTimeSupported() )
|
||||
bean.setThreadCpuTimeEnabled(true);
|
||||
else
|
||||
logger.warn("Thread CPU monitoring not supported, we cannot track GATK multi-threaded efficiency");
|
||||
|
||||
// initialize times to 0
|
||||
for ( final State state : State.values() )
|
||||
times.put(state, 0l);
|
||||
}
|
||||
|
||||
private static long nanoToMilli(final long timeInNano) {
|
||||
return TimeUnit.NANOSECONDS.toMillis(timeInNano);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the time spent in state across all threads created by this factory
|
||||
*
|
||||
* @param state to get information about
|
||||
* @return the time in milliseconds
|
||||
*/
|
||||
@Ensures({"result >= 0"})
|
||||
public synchronized long getStateTime(final State state) {
|
||||
return times.get(state);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the total time spent in all states across all threads created by this factory
|
||||
*
|
||||
* @return the time in milliseconds
|
||||
*/
|
||||
@Ensures({"result >= 0"})
|
||||
public synchronized long getTotalTime() {
|
||||
long total = 0;
|
||||
for ( final long time : times.values() )
|
||||
total += time;
|
||||
return total;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the fraction of time spent in state across all threads created by this factory
|
||||
*
|
||||
* @return the percentage (0.0-100.0) of time spent in state over all state times of all threads
|
||||
*/
|
||||
@Ensures({"result >= 0.0", "result <= 100.0"})
|
||||
public synchronized double getStatePercent(final State state) {
|
||||
return (100.0 * getStateTime(state)) / Math.max(getTotalTime(), 1);
|
||||
}
|
||||
|
||||
public int getnThreadsAnalyzed() {
|
||||
return nThreadsAnalyzed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized String toString() {
|
||||
final StringBuilder b = new StringBuilder();
|
||||
|
||||
b.append("total ").append(getTotalTime()).append(" ");
|
||||
for ( final State state : State.values() ) {
|
||||
b.append(state).append(" ").append(getStateTime(state)).append(" ");
|
||||
}
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Print usage information about threads from this factory to logger
|
||||
* with the INFO priority
|
||||
*
|
||||
* @param logger
|
||||
*/
|
||||
public synchronized void printUsageInformation(final Logger logger) {
|
||||
printUsageInformation(logger, Priority.INFO);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print usage information about threads from this factory to logger
|
||||
* with the provided priority
|
||||
*
|
||||
* @param logger
|
||||
*/
|
||||
public synchronized void printUsageInformation(final Logger logger, final Priority priority) {
|
||||
logger.debug("Number of threads monitored: " + getnThreadsAnalyzed());
|
||||
logger.debug("Total runtime " + new AutoFormattingTime(TimeUnit.MILLISECONDS.toSeconds(getTotalTime())));
|
||||
for ( final State state : State.values() ) {
|
||||
logger.debug(String.format("\tPercent of time spent %s is %.2f", state.getUserFriendlyName(), getStatePercent(state)));
|
||||
}
|
||||
logger.log(priority, String.format("CPU efficiency : %6.2f%% of time spent %s", getStatePercent(State.USER_CPU), State.USER_CPU.getUserFriendlyName()));
|
||||
logger.log(priority, String.format("Walker inefficiency : %6.2f%% of time spent %s", getStatePercent(State.BLOCKING), State.BLOCKING.getUserFriendlyName()));
|
||||
logger.log(priority, String.format("I/O inefficiency : %6.2f%% of time spent %s", getStatePercent(State.WAITING_FOR_IO), State.WAITING_FOR_IO.getUserFriendlyName()));
|
||||
logger.log(priority, String.format("Thread inefficiency : %6.2f%% of time spent %s", getStatePercent(State.WAITING), State.WAITING.getUserFriendlyName()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the information about completed thread that ran for runtime in milliseconds
|
||||
*
|
||||
* This method updates all of the key timing and tracking information in the factory so that
|
||||
* thread can be retired. After this call the factory shouldn't have a pointer to the thread any longer
|
||||
*
|
||||
* @param thread the thread whose information we are updating
|
||||
*/
|
||||
@Ensures({
|
||||
"getTotalTime() >= old(getTotalTime())"
|
||||
})
|
||||
public synchronized void threadIsDone(final Thread thread) {
|
||||
nThreadsAnalyzed++;
|
||||
|
||||
if ( DEBUG ) logger.warn("UpdateThreadInfo called");
|
||||
|
||||
final long threadID = thread.getId();
|
||||
final ThreadInfo info = bean.getThreadInfo(thread.getId());
|
||||
final long totalTimeNano = bean.getThreadCpuTime(threadID);
|
||||
final long userTimeNano = bean.getThreadUserTime(threadID);
|
||||
final long systemTimeNano = totalTimeNano - userTimeNano;
|
||||
final long userTimeInMilliseconds = nanoToMilli(userTimeNano);
|
||||
final long systemTimeInMilliseconds = nanoToMilli(systemTimeNano);
|
||||
|
||||
if ( info != null ) {
|
||||
if ( DEBUG ) logger.warn("Updating thread with user runtime " + userTimeInMilliseconds + " and system runtime " + systemTimeInMilliseconds + " of which blocked " + info.getBlockedTime() + " and waiting " + info.getWaitedTime());
|
||||
incTimes(State.BLOCKING, info.getBlockedTime());
|
||||
incTimes(State.WAITING, info.getWaitedTime());
|
||||
incTimes(State.USER_CPU, userTimeInMilliseconds);
|
||||
incTimes(State.WAITING_FOR_IO, systemTimeInMilliseconds);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function that increments the times counter by by for state
|
||||
*
|
||||
* @param state
|
||||
* @param by
|
||||
*/
|
||||
@Requires({"state != null", "by >= 0"})
|
||||
@Ensures("getTotalTime() == old(getTotalTime()) + by")
|
||||
private synchronized void incTimes(final State state, final long by) {
|
||||
times.put(state, times.get(state) + by);
|
||||
}
|
||||
|
||||
public enum State {
|
||||
BLOCKING("blocking on synchronized data structures"),
|
||||
WAITING("waiting on some other thread"),
|
||||
USER_CPU("doing productive CPU work"),
|
||||
WAITING_FOR_IO("waiting for I/O");
|
||||
|
||||
private final String userFriendlyName;
|
||||
|
||||
private State(String userFriendlyName) {
|
||||
this.userFriendlyName = userFriendlyName;
|
||||
}
|
||||
|
||||
public String getUserFriendlyName() {
|
||||
return userFriendlyName;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 8/31/12
|
||||
* Time: 11:03 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class InvalidArgumentIntegrationTest extends WalkerTest {
|
||||
private static final String callsB36 = BaseTest.validationDataLocation + "lowpass.N3.chr1.raw.vcf";
|
||||
|
||||
private WalkerTest.WalkerTestSpec baseTest(String flag, String arg, Class exeption) {
|
||||
return new WalkerTest.WalkerTestSpec("-T VariantsToTable -M 10 --variant:vcf "
|
||||
+ callsB36 + " -F POS,CHROM -R "
|
||||
+ b36KGReference + " -o %s " + flag + " " + arg,
|
||||
1, exeption);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnknownReadFilter() {
|
||||
executeTest("UnknownReadFilter",baseTest("-rf","TestUnknownReadFilter", UserException.MalformedReadFilterException.class));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMalformedWalkerArgs() {
|
||||
executeTest("MalformedWalkerArgs",
|
||||
new WalkerTest.WalkerTestSpec("-T UnknownWalkerName -M 10 --variant:vcf "
|
||||
+ callsB36 + " -F POS,CHROM -R "
|
||||
+ b36KGReference + " -o %s ",
|
||||
1, UserException.MalformedWalkerArgumentsException.class));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,163 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.Assert;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
public class LevelingDownsamplerUnitTest extends BaseTest {
|
||||
|
||||
private static class LevelingDownsamplerUniformStacksTest extends TestDataProvider {
|
||||
public enum DataStructure { LINKED_LIST, ARRAY_LIST }
|
||||
|
||||
int targetSize;
|
||||
int numStacks;
|
||||
int stackSize;
|
||||
DataStructure dataStructure;
|
||||
int expectedSize;
|
||||
|
||||
public LevelingDownsamplerUniformStacksTest( int targetSize, int numStacks, int stackSize, DataStructure dataStructure ) {
|
||||
super(LevelingDownsamplerUniformStacksTest.class);
|
||||
|
||||
this.targetSize = targetSize;
|
||||
this.numStacks = numStacks;
|
||||
this.stackSize = stackSize;
|
||||
this.dataStructure = dataStructure;
|
||||
expectedSize = calculateExpectedDownsampledStackSize();
|
||||
|
||||
setName(String.format("%s: targetSize=%d numStacks=%d stackSize=%d dataStructure=%s expectedSize=%d",
|
||||
getClass().getSimpleName(), targetSize, numStacks, stackSize, dataStructure, expectedSize));
|
||||
}
|
||||
|
||||
public Collection<List<Object>> createStacks() {
|
||||
Collection<List<Object>> stacks = new ArrayList<List<Object>>();
|
||||
|
||||
for ( int i = 1; i <= numStacks; i++ ) {
|
||||
List<Object> stack = dataStructure == DataStructure.LINKED_LIST ? new LinkedList<Object>() : new ArrayList<Object>();
|
||||
|
||||
for ( int j = 1; j <= stackSize; j++ ) {
|
||||
stack.add(new Object());
|
||||
}
|
||||
|
||||
stacks.add(stack);
|
||||
}
|
||||
|
||||
return stacks;
|
||||
}
|
||||
|
||||
private int calculateExpectedDownsampledStackSize() {
|
||||
int numItemsToRemove = numStacks * stackSize - targetSize;
|
||||
|
||||
if ( numStacks == 0 ) {
|
||||
return 0;
|
||||
}
|
||||
else if ( numItemsToRemove <= 0 ) {
|
||||
return stackSize;
|
||||
}
|
||||
|
||||
return Math.max(1, stackSize - (numItemsToRemove / numStacks));
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "UniformStacksDataProvider")
|
||||
public Object[][] createUniformStacksTestData() {
|
||||
for ( int targetSize = 1; targetSize <= 10000; targetSize *= 10 ) {
|
||||
for ( int numStacks = 0; numStacks <= 10; numStacks++ ) {
|
||||
for ( int stackSize = 1; stackSize <= 1000; stackSize *= 10 ) {
|
||||
for ( LevelingDownsamplerUniformStacksTest.DataStructure dataStructure : LevelingDownsamplerUniformStacksTest.DataStructure.values() ) {
|
||||
new LevelingDownsamplerUniformStacksTest(targetSize, numStacks, stackSize, dataStructure);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return LevelingDownsamplerUniformStacksTest.getTests(LevelingDownsamplerUniformStacksTest.class);
|
||||
}
|
||||
|
||||
@Test( dataProvider = "UniformStacksDataProvider" )
|
||||
public void testLevelingDownsamplerWithUniformStacks( LevelingDownsamplerUniformStacksTest test ) {
|
||||
logger.warn("Running test: " + test);
|
||||
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
|
||||
Downsampler<List<Object>> downsampler = new LevelingDownsampler<List<Object>, Object>(test.targetSize);
|
||||
|
||||
downsampler.submit(test.createStacks());
|
||||
|
||||
if ( test.numStacks > 0 ) {
|
||||
Assert.assertFalse(downsampler.hasFinalizedItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() == null);
|
||||
Assert.assertTrue(downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekPending() != null);
|
||||
}
|
||||
else {
|
||||
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
|
||||
}
|
||||
|
||||
downsampler.signalEndOfInput();
|
||||
|
||||
if ( test.numStacks > 0 ) {
|
||||
Assert.assertTrue(downsampler.hasFinalizedItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() != null);
|
||||
Assert.assertFalse(downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekPending() == null);
|
||||
}
|
||||
else {
|
||||
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
|
||||
}
|
||||
|
||||
List<List<Object>> downsampledStacks = downsampler.consumeFinalizedItems();
|
||||
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
|
||||
|
||||
Assert.assertEquals(downsampledStacks.size(), test.numStacks);
|
||||
|
||||
int totalRemainingItems = 0;
|
||||
for ( List<Object> stack : downsampledStacks ) {
|
||||
Assert.assertTrue(Math.abs(stack.size() - test.expectedSize) <= 1);
|
||||
totalRemainingItems += stack.size();
|
||||
}
|
||||
|
||||
int numItemsReportedDiscarded = downsampler.getNumberOfDiscardedItems();
|
||||
int numItemsActuallyDiscarded = test.numStacks * test.stackSize - totalRemainingItems;
|
||||
|
||||
Assert.assertEquals(numItemsReportedDiscarded, numItemsActuallyDiscarded);
|
||||
|
||||
downsampler.reset();
|
||||
Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0);
|
||||
|
||||
Assert.assertTrue(totalRemainingItems <= Math.max(test.targetSize, test.numStacks));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,298 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.VerifyingSamIterator;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialMultiSampleReadStream;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStream;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStreamAnalyzer;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class PerSampleDownsamplingReadsIteratorUnitTest extends BaseTest {
|
||||
|
||||
private static class PerSampleDownsamplingReadsIteratorTest extends TestDataProvider {
|
||||
|
||||
// TODO: tests should distinguish between variance across samples and variance within a sample
|
||||
|
||||
private enum StreamDensity {
|
||||
SPARSE (MAX_READ_LENGTH, MAX_READ_LENGTH * 2),
|
||||
DENSE (1, MIN_READ_LENGTH),
|
||||
MIXED (1, MAX_READ_LENGTH * 2),
|
||||
UNIFORM_DENSE (1, 1),
|
||||
UNIFORM_SPARSE (MAX_READ_LENGTH * 2, MAX_READ_LENGTH * 2);
|
||||
|
||||
int minDistanceBetweenStacks;
|
||||
int maxDistanceBetweenStacks;
|
||||
|
||||
StreamDensity( int minDistanceBetweenStacks, int maxDistanceBetweenStacks ) {
|
||||
this.minDistanceBetweenStacks = minDistanceBetweenStacks;
|
||||
this.maxDistanceBetweenStacks = maxDistanceBetweenStacks;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("StreamDensity:%d-%d", minDistanceBetweenStacks, maxDistanceBetweenStacks);
|
||||
}
|
||||
}
|
||||
|
||||
private enum StreamStackDepth {
|
||||
NON_UNIFORM_LOW (1, 5),
|
||||
NON_UNIFORM_HIGH (15, 20),
|
||||
NON_UNIFORM_MIXED (1, 20),
|
||||
UNIFORM_SINGLE (1, 1),
|
||||
UNIFORM_LOW (2, 2),
|
||||
UNIFORM_HIGH (20, 20),
|
||||
UNIFORM_MEDIUM (10, 10); // should set target coverage to this value for testing
|
||||
|
||||
int minReadsPerStack;
|
||||
int maxReadsPerStack;
|
||||
|
||||
StreamStackDepth( int minReadsPerStack, int maxReadsPerStack ) {
|
||||
this.minReadsPerStack = minReadsPerStack;
|
||||
this.maxReadsPerStack = maxReadsPerStack;
|
||||
}
|
||||
|
||||
public boolean isUniform() {
|
||||
return minReadsPerStack == maxReadsPerStack;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("StreamStackDepth:%d-%d", minReadsPerStack, maxReadsPerStack);
|
||||
}
|
||||
}
|
||||
|
||||
private enum StreamStacksPerContig {
|
||||
UNIFORM(20, 20),
|
||||
NON_UNIFORM(1, 30);
|
||||
|
||||
int minStacksPerContig;
|
||||
int maxStacksPerContig;
|
||||
|
||||
StreamStacksPerContig( int minStacksPerContig, int maxStacksPerContig ) {
|
||||
this.minStacksPerContig = minStacksPerContig;
|
||||
this.maxStacksPerContig = maxStacksPerContig;
|
||||
}
|
||||
|
||||
public boolean isUniform() {
|
||||
return minStacksPerContig == maxStacksPerContig;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("StreamStacksPerContig:%d-%d", minStacksPerContig, maxStacksPerContig);
|
||||
}
|
||||
}
|
||||
|
||||
// Not interested in testing multiple ranges for the read lengths, as none of our current
|
||||
// downsamplers are affected by read length
|
||||
private static final int MIN_READ_LENGTH = 50;
|
||||
private static final int MAX_READ_LENGTH = 150;
|
||||
|
||||
private ReadsDownsamplerFactory<SAMRecord> downsamplerFactory;
|
||||
private int targetCoverage;
|
||||
private int numSamples;
|
||||
private int minContigs;
|
||||
private int maxContigs;
|
||||
private StreamDensity streamDensity;
|
||||
private StreamStackDepth streamStackDepth;
|
||||
private StreamStacksPerContig streamStacksPerContig;
|
||||
private double unmappedReadsFraction;
|
||||
private int unmappedReadsCount;
|
||||
private boolean verifySortedness;
|
||||
|
||||
private ArtificialMultiSampleReadStream mergedReadStream;
|
||||
private Map<String, ArtificialSingleSampleReadStream> perSampleArtificialReadStreams;
|
||||
private Map<String, ArtificialSingleSampleReadStreamAnalyzer> perSampleStreamAnalyzers;
|
||||
private SAMFileHeader header;
|
||||
|
||||
public PerSampleDownsamplingReadsIteratorTest( ReadsDownsamplerFactory<SAMRecord> downsamplerFactory,
|
||||
int targetCoverage,
|
||||
int numSamples,
|
||||
int minContigs,
|
||||
int maxContigs,
|
||||
StreamDensity streamDensity,
|
||||
StreamStackDepth streamStackDepth,
|
||||
StreamStacksPerContig streamStacksPerContig,
|
||||
double unmappedReadsFraction,
|
||||
int unmappedReadsCount,
|
||||
boolean verifySortedness ) {
|
||||
super(PerSampleDownsamplingReadsIteratorTest.class);
|
||||
|
||||
this.downsamplerFactory = downsamplerFactory;
|
||||
this.targetCoverage = targetCoverage;
|
||||
this.numSamples = numSamples;
|
||||
this.minContigs = minContigs;
|
||||
this.maxContigs = maxContigs;
|
||||
this.streamDensity = streamDensity;
|
||||
this.streamStackDepth = streamStackDepth;
|
||||
this.streamStacksPerContig = streamStacksPerContig;
|
||||
this.unmappedReadsFraction = unmappedReadsFraction;
|
||||
this.unmappedReadsCount = unmappedReadsCount;
|
||||
this.verifySortedness = verifySortedness;
|
||||
|
||||
header = createHeader();
|
||||
createReadStreams();
|
||||
|
||||
setName(String.format("%s: targetCoverage=%d numSamples=%d minContigs=%d maxContigs=%d %s %s %s unmappedReadsFraction=%.2f unmappedReadsCount=%d verifySortedness=%b",
|
||||
getClass().getSimpleName(), targetCoverage, numSamples, minContigs, maxContigs, streamDensity, streamStackDepth, streamStacksPerContig, unmappedReadsFraction, unmappedReadsCount, verifySortedness));
|
||||
}
|
||||
|
||||
private SAMFileHeader createHeader() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(maxContigs, 1, (streamDensity.maxDistanceBetweenStacks + MAX_READ_LENGTH) * streamStacksPerContig.maxStacksPerContig + 100000);
|
||||
List<String> readGroups = new ArrayList<String>(numSamples);
|
||||
List<String> sampleNames = new ArrayList<String>(numSamples);
|
||||
|
||||
for ( int i = 0; i < numSamples; i++ ) {
|
||||
readGroups.add("ReadGroup" + i);
|
||||
sampleNames.add("Sample" + i);
|
||||
}
|
||||
|
||||
return ArtificialSAMUtils.createEnumeratedReadGroups(header, readGroups, sampleNames);
|
||||
}
|
||||
|
||||
private void createReadStreams() {
|
||||
perSampleArtificialReadStreams = new HashMap<String, ArtificialSingleSampleReadStream>(numSamples);
|
||||
perSampleStreamAnalyzers = new HashMap<String, ArtificialSingleSampleReadStreamAnalyzer>(numSamples);
|
||||
|
||||
for (SAMReadGroupRecord readGroup : header.getReadGroups() ) {
|
||||
String readGroupID = readGroup.getReadGroupId();
|
||||
String sampleName = readGroup.getSample();
|
||||
|
||||
int thisSampleNumContigs = MathUtils.randomIntegerInRange(minContigs, maxContigs);
|
||||
int thisSampleStacksPerContig = MathUtils.randomIntegerInRange(streamStacksPerContig.minStacksPerContig, streamStacksPerContig.maxStacksPerContig);
|
||||
|
||||
int thisSampleNumUnmappedReads = GenomeAnalysisEngine.getRandomGenerator().nextDouble() < unmappedReadsFraction ? unmappedReadsCount : 0;
|
||||
|
||||
ArtificialSingleSampleReadStream thisSampleStream = new ArtificialSingleSampleReadStream(header,
|
||||
readGroupID,
|
||||
thisSampleNumContigs,
|
||||
thisSampleStacksPerContig,
|
||||
streamStackDepth.minReadsPerStack,
|
||||
streamStackDepth.maxReadsPerStack,
|
||||
streamDensity.minDistanceBetweenStacks,
|
||||
streamDensity.maxDistanceBetweenStacks,
|
||||
MIN_READ_LENGTH,
|
||||
MAX_READ_LENGTH,
|
||||
thisSampleNumUnmappedReads);
|
||||
perSampleArtificialReadStreams.put(sampleName, thisSampleStream);
|
||||
perSampleStreamAnalyzers.put(sampleName, new PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer(thisSampleStream, targetCoverage));
|
||||
}
|
||||
|
||||
mergedReadStream = new ArtificialMultiSampleReadStream(perSampleArtificialReadStreams.values());
|
||||
}
|
||||
|
||||
public void run() {
|
||||
StingSAMIterator downsamplingIter = new PerSampleDownsamplingReadsIterator(mergedReadStream.getStingSAMIterator(), downsamplerFactory);
|
||||
|
||||
if ( verifySortedness ) {
|
||||
downsamplingIter = new VerifyingSamIterator(downsamplingIter);
|
||||
}
|
||||
|
||||
while ( downsamplingIter.hasNext() ) {
|
||||
SAMRecord read = downsamplingIter.next();
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
|
||||
ArtificialSingleSampleReadStreamAnalyzer analyzer = perSampleStreamAnalyzers.get(sampleName);
|
||||
if ( analyzer != null ) {
|
||||
analyzer.update(read);
|
||||
}
|
||||
else {
|
||||
throw new ReviewedStingException("bug: stream analyzer for sample " + sampleName + " not found");
|
||||
}
|
||||
}
|
||||
|
||||
for ( Map.Entry<String, ArtificialSingleSampleReadStreamAnalyzer> analyzerEntry : perSampleStreamAnalyzers.entrySet() ) {
|
||||
ArtificialSingleSampleReadStreamAnalyzer analyzer = analyzerEntry.getValue();
|
||||
analyzer.finalizeStats();
|
||||
|
||||
// Validate the downsampled read stream for each sample individually
|
||||
analyzer.validate();
|
||||
}
|
||||
|
||||
// Allow memory used by this test to be reclaimed:
|
||||
mergedReadStream = null;
|
||||
perSampleArtificialReadStreams = null;
|
||||
perSampleStreamAnalyzers = null;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "PerSampleDownsamplingReadsIteratorTestDataProvider")
|
||||
public Object[][] createPerSampleDownsamplingReadsIteratorTests() {
|
||||
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
|
||||
// Some values don't vary across tests
|
||||
int targetCoverage = PerSampleDownsamplingReadsIteratorTest.StreamStackDepth.UNIFORM_MEDIUM.minReadsPerStack;
|
||||
ReadsDownsamplerFactory<SAMRecord> downsamplerFactory = new SimplePositionalDownsamplerFactory<SAMRecord>(targetCoverage);
|
||||
int maxContigs = 3;
|
||||
boolean verifySortedness = true;
|
||||
|
||||
for ( int numSamples : Arrays.asList(1, 2, 10) ) {
|
||||
for ( int minContigs = 1; minContigs <= maxContigs; minContigs++ ) {
|
||||
for ( PerSampleDownsamplingReadsIteratorTest.StreamDensity streamDensity : PerSampleDownsamplingReadsIteratorTest.StreamDensity.values() ) {
|
||||
for ( PerSampleDownsamplingReadsIteratorTest.StreamStackDepth streamStackDepth : PerSampleDownsamplingReadsIteratorTest.StreamStackDepth.values() ) {
|
||||
for (PerSampleDownsamplingReadsIteratorTest.StreamStacksPerContig streamStacksPerContig : PerSampleDownsamplingReadsIteratorTest.StreamStacksPerContig.values() ) {
|
||||
for ( double unmappedReadsFraction : Arrays.asList(0.0, 1.0, 0.5) ) {
|
||||
for ( int unmappedReadsCount : Arrays.asList(1, 50) ) {
|
||||
new PerSampleDownsamplingReadsIteratorTest(downsamplerFactory,
|
||||
targetCoverage,
|
||||
numSamples,
|
||||
minContigs,
|
||||
maxContigs,
|
||||
streamDensity,
|
||||
streamStackDepth,
|
||||
streamStacksPerContig,
|
||||
unmappedReadsFraction,
|
||||
unmappedReadsCount,
|
||||
verifySortedness);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return PerSampleDownsamplingReadsIteratorTest.getTests(PerSampleDownsamplingReadsIteratorTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "PerSampleDownsamplingReadsIteratorTestDataProvider")
|
||||
public void runPerSampleDownsamplingReadsIteratorTest( PerSampleDownsamplingReadsIteratorTest test ) {
|
||||
logger.warn("Running test: " + test);
|
||||
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
test.run();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,126 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStream;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStreamAnalyzer;
|
||||
|
||||
/**
|
||||
* Class for analyzing an artificial read stream that has been positionally downsampled, and verifying
|
||||
* that the downsampling was done correctly without changing the stream in unexpected ways.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer extends ArtificialSingleSampleReadStreamAnalyzer {
|
||||
private int targetCoverage;
|
||||
|
||||
public PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer( ArtificialSingleSampleReadStream originalStream, int targetCoverage ) {
|
||||
super(originalStream);
|
||||
this.targetCoverage = targetCoverage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Overridden validate() method that checks for the effects of positional downsampling in addition to checking
|
||||
* for whether the original properties of the stream not affected by downsampling have been preserved
|
||||
*/
|
||||
@Override
|
||||
public void validate() {
|
||||
if ( (originalStream.getNumContigs() == 0 || originalStream.getNumStacksPerContig() == 0) && originalStream.getNumUnmappedReads() == 0 ) {
|
||||
if ( totalReads != 0 ) {
|
||||
throw new ReviewedStingException("got reads from the stream, but the stream was configured to have 0 reads");
|
||||
}
|
||||
return; // no further validation needed for the 0-reads case
|
||||
}
|
||||
else if ( totalReads == 0 ) {
|
||||
throw new ReviewedStingException("got no reads from the stream, but the stream was configured to have > 0 reads");
|
||||
}
|
||||
|
||||
if ( ! allSamplesMatch ) {
|
||||
throw new ReviewedStingException("some reads had the wrong sample");
|
||||
}
|
||||
|
||||
if ( numContigs != originalStream.getNumContigs() ) {
|
||||
throw new ReviewedStingException("number of contigs not correct");
|
||||
}
|
||||
|
||||
if ( stacksPerContig.size() != originalStream.getNumContigs() ) {
|
||||
throw new ReviewedStingException(String.format("bug in analyzer code: calculated sizes for %d contigs even though there were only %d contigs",
|
||||
stacksPerContig.size(), originalStream.getNumContigs()));
|
||||
}
|
||||
|
||||
for ( int contigStackCount : stacksPerContig ) {
|
||||
if ( contigStackCount != originalStream.getNumStacksPerContig() ) {
|
||||
throw new ReviewedStingException("contig had incorrect number of stacks");
|
||||
}
|
||||
}
|
||||
|
||||
if ( originalStream.getNumStacksPerContig() > 0 ) {
|
||||
|
||||
// Check for the effects of positional downsampling:
|
||||
int stackMinimumAfterDownsampling = Math.min(targetCoverage, originalStream.getMinReadsPerStack());
|
||||
int stackMaximumAfterDownsampling = targetCoverage;
|
||||
|
||||
if ( minReadsPerStack < stackMinimumAfterDownsampling ) {
|
||||
throw new ReviewedStingException("stack had fewer than the minimum number of reads after downsampling");
|
||||
}
|
||||
if ( maxReadsPerStack > stackMaximumAfterDownsampling ) {
|
||||
throw new ReviewedStingException("stack had more than the maximum number of reads after downsampling");
|
||||
}
|
||||
}
|
||||
else if ( minReadsPerStack != null || maxReadsPerStack != null ) {
|
||||
throw new ReviewedStingException("bug in analyzer code: reads per stack was calculated even though 0 stacks per contig was specified");
|
||||
}
|
||||
|
||||
if ( originalStream.getNumStacksPerContig() > 1 ) {
|
||||
if ( minDistanceBetweenStacks < originalStream.getMinDistanceBetweenStacks() ) {
|
||||
throw new ReviewedStingException("stacks were separated by less than the minimum distance");
|
||||
}
|
||||
if ( maxDistanceBetweenStacks > originalStream.getMaxDistanceBetweenStacks() ) {
|
||||
throw new ReviewedStingException("stacks were separated by more than the maximum distance");
|
||||
}
|
||||
}
|
||||
else if ( minDistanceBetweenStacks != null || maxDistanceBetweenStacks != null ) {
|
||||
throw new ReviewedStingException("bug in analyzer code: distance between stacks was calculated even though numStacksPerContig was <= 1");
|
||||
}
|
||||
|
||||
if ( minReadLength < originalStream.getMinReadLength() ) {
|
||||
throw new ReviewedStingException("read was shorter than the minimum allowed length");
|
||||
}
|
||||
if ( maxReadLength > originalStream.getMaxReadLength() ) {
|
||||
throw new ReviewedStingException("read was longer than the maximum allowed length");
|
||||
}
|
||||
|
||||
if ( numUnmappedReads != originalStream.getNumUnmappedReads() ) {
|
||||
throw new ReviewedStingException(String.format("wrong number of unmapped reads: requested %d but saw %d",
|
||||
originalStream.getNumUnmappedReads(), numUnmappedReads));
|
||||
}
|
||||
|
||||
if ( (originalStream.getNumContigs() == 0 || originalStream.getNumStacksPerContig() == 0) &&
|
||||
numUnmappedReads != totalReads ) {
|
||||
throw new ReviewedStingException("stream should have consisted only of unmapped reads, but saw some mapped reads");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,129 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.Assert;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
public class ReservoirDownsamplerUnitTest extends BaseTest {
|
||||
|
||||
private static class ReservoirDownsamplerTest extends TestDataProvider {
|
||||
int reservoirSize;
|
||||
int totalReads;
|
||||
int expectedNumReadsAfterDownsampling;
|
||||
int expectedNumDiscardedItems;
|
||||
|
||||
public ReservoirDownsamplerTest( int reservoirSize, int totalReads ) {
|
||||
super(ReservoirDownsamplerTest.class);
|
||||
|
||||
this.reservoirSize = reservoirSize;
|
||||
this.totalReads = totalReads;
|
||||
|
||||
expectedNumReadsAfterDownsampling = Math.min(reservoirSize, totalReads);
|
||||
expectedNumDiscardedItems = totalReads <= reservoirSize ? 0 : totalReads - reservoirSize;
|
||||
|
||||
setName(String.format("%s: reservoirSize=%d totalReads=%d expectedNumReadsAfterDownsampling=%d expectedNumDiscardedItems=%d",
|
||||
getClass().getSimpleName(), reservoirSize, totalReads, expectedNumReadsAfterDownsampling, expectedNumDiscardedItems));
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> createReads() {
|
||||
Collection<SAMRecord> reads = new ArrayList<SAMRecord>(totalReads);
|
||||
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
|
||||
reads.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(totalReads, header, "foo", 0, 1, 100));
|
||||
|
||||
return reads;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "ReservoirDownsamplerTestDataProvider")
|
||||
public Object[][] createReservoirDownsamplerTestData() {
|
||||
for ( int reservoirSize = 1; reservoirSize <= 10000; reservoirSize *= 10 ) {
|
||||
new ReservoirDownsamplerTest(reservoirSize, 0);
|
||||
for ( int totalReads = 1; totalReads <= 10000; totalReads *= 10 ) {
|
||||
new ReservoirDownsamplerTest(reservoirSize, totalReads);
|
||||
}
|
||||
}
|
||||
|
||||
return ReservoirDownsamplerTest.getTests(ReservoirDownsamplerTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ReservoirDownsamplerTestDataProvider")
|
||||
public void testReservoirDownsampler( ReservoirDownsamplerTest test ) {
|
||||
logger.warn("Running test: " + test);
|
||||
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
|
||||
ReadsDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(test.reservoirSize);
|
||||
|
||||
downsampler.submit(test.createReads());
|
||||
|
||||
if ( test.totalReads > 0 ) {
|
||||
Assert.assertTrue(downsampler.hasFinalizedItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() != null);
|
||||
Assert.assertFalse(downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekPending() == null);
|
||||
}
|
||||
else {
|
||||
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
|
||||
}
|
||||
|
||||
downsampler.signalEndOfInput();
|
||||
|
||||
if ( test.totalReads > 0 ) {
|
||||
Assert.assertTrue(downsampler.hasFinalizedItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() != null);
|
||||
Assert.assertFalse(downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekPending() == null);
|
||||
}
|
||||
else {
|
||||
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
|
||||
}
|
||||
|
||||
List<SAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
|
||||
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
|
||||
|
||||
Assert.assertEquals(downsampledReads.size(), test.expectedNumReadsAfterDownsampling);
|
||||
|
||||
Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), test.expectedNumDiscardedItems);
|
||||
Assert.assertEquals(test.totalReads - downsampledReads.size(), test.expectedNumDiscardedItems);
|
||||
|
||||
downsampler.reset();
|
||||
Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,330 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.Assert;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class SimplePositionalDownsamplerUnitTest extends BaseTest {
|
||||
|
||||
private static class SimplePositionalDownsamplerTest extends TestDataProvider {
|
||||
int targetCoverage;
|
||||
int numStacks;
|
||||
List<Integer> stackSizes;
|
||||
List<Integer> expectedStackSizes;
|
||||
boolean multipleContigs;
|
||||
int totalInitialReads;
|
||||
|
||||
public SimplePositionalDownsamplerTest( int targetCoverage, List<Integer> stackSizes, boolean multipleContigs ) {
|
||||
super(SimplePositionalDownsamplerTest.class);
|
||||
|
||||
this.targetCoverage = targetCoverage;
|
||||
this.numStacks = stackSizes.size();
|
||||
this.stackSizes = stackSizes;
|
||||
this.multipleContigs = multipleContigs;
|
||||
|
||||
calculateExpectedDownsampledStackSizes();
|
||||
|
||||
totalInitialReads = 0;
|
||||
for ( Integer stackSize : stackSizes ) {
|
||||
totalInitialReads += stackSize;
|
||||
}
|
||||
|
||||
setName(String.format("%s: targetCoverage=%d numStacks=%d stackSizes=%s expectedSizes=%s multipleContigs=%b",
|
||||
getClass().getSimpleName(), targetCoverage, numStacks, stackSizes, expectedStackSizes, multipleContigs));
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> createReads() {
|
||||
Collection<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
SAMFileHeader header = multipleContigs ?
|
||||
ArtificialSAMUtils.createArtificialSamHeader(2, 1, 1000000) :
|
||||
ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
|
||||
|
||||
int refIndex = 0;
|
||||
int alignmentStart = 1;
|
||||
int readLength = 100;
|
||||
|
||||
for ( int i = 0; i < numStacks; i++ ) {
|
||||
if ( multipleContigs && refIndex == 0 && i >= numStacks / 2 ) {
|
||||
refIndex++;
|
||||
}
|
||||
|
||||
reads.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(stackSizes.get(i), header, "foo",
|
||||
refIndex, alignmentStart, readLength));
|
||||
|
||||
alignmentStart += 10;
|
||||
}
|
||||
|
||||
return reads;
|
||||
}
|
||||
|
||||
private void calculateExpectedDownsampledStackSizes() {
|
||||
expectedStackSizes = new ArrayList<Integer>(numStacks);
|
||||
|
||||
for ( Integer stackSize : stackSizes ) {
|
||||
int expectedSize = targetCoverage >= stackSize ? stackSize : targetCoverage;
|
||||
expectedStackSizes.add(expectedSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "SimplePositionalDownsamplerTestDataProvider")
|
||||
public Object[][] createSimplePositionalDownsamplerTestData() {
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
|
||||
for ( int targetCoverage = 1; targetCoverage <= 10000; targetCoverage *= 10 ) {
|
||||
for ( int contigs = 1; contigs <= 2; contigs++ ) {
|
||||
for ( int numStacks = 0; numStacks <= 10; numStacks++ ) {
|
||||
List<Integer> stackSizes = new ArrayList<Integer>(numStacks);
|
||||
for ( int stack = 1; stack <= numStacks; stack++ ) {
|
||||
stackSizes.add(GenomeAnalysisEngine.getRandomGenerator().nextInt(targetCoverage * 2) + 1);
|
||||
}
|
||||
new SimplePositionalDownsamplerTest(targetCoverage, stackSizes, contigs > 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return SimplePositionalDownsamplerTest.getTests(SimplePositionalDownsamplerTest.class);
|
||||
}
|
||||
|
||||
@Test( dataProvider = "SimplePositionalDownsamplerTestDataProvider" )
|
||||
public void testSimplePostionalDownsampler( SimplePositionalDownsamplerTest test ) {
|
||||
logger.warn("Running test: " + test);
|
||||
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
|
||||
ReadsDownsampler<SAMRecord> downsampler = new SimplePositionalDownsampler<SAMRecord>(test.targetCoverage);
|
||||
|
||||
downsampler.submit(test.createReads());
|
||||
|
||||
if ( test.numStacks > 1 ) {
|
||||
Assert.assertTrue(downsampler.hasFinalizedItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() != null);
|
||||
Assert.assertTrue(downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekPending() != null);
|
||||
}
|
||||
else if ( test.numStacks == 1 ) {
|
||||
Assert.assertFalse(downsampler.hasFinalizedItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() == null);
|
||||
Assert.assertTrue(downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekPending() != null);
|
||||
}
|
||||
else {
|
||||
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
|
||||
}
|
||||
|
||||
downsampler.signalEndOfInput();
|
||||
|
||||
if ( test.numStacks > 0 ) {
|
||||
Assert.assertTrue(downsampler.hasFinalizedItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() != null);
|
||||
Assert.assertFalse(downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekPending() == null);
|
||||
}
|
||||
else {
|
||||
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
|
||||
}
|
||||
|
||||
List<SAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
|
||||
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
|
||||
|
||||
if ( test.numStacks == 0 ) {
|
||||
Assert.assertTrue(downsampledReads.isEmpty());
|
||||
}
|
||||
else {
|
||||
List<Integer> downsampledStackSizes = getDownsampledStackSizesAndVerifySortedness(downsampledReads);
|
||||
|
||||
Assert.assertEquals(downsampledStackSizes.size(), test.numStacks);
|
||||
Assert.assertEquals(downsampledStackSizes, test.expectedStackSizes);
|
||||
|
||||
int numReadsActuallyEliminated = test.totalInitialReads - downsampledReads.size();
|
||||
int numReadsReportedEliminated = downsampler.getNumberOfDiscardedItems();
|
||||
Assert.assertEquals(numReadsActuallyEliminated, numReadsReportedEliminated);
|
||||
}
|
||||
|
||||
downsampler.reset();
|
||||
Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0);
|
||||
}
|
||||
|
||||
private List<Integer> getDownsampledStackSizesAndVerifySortedness( List<SAMRecord> downsampledReads ) {
|
||||
List<Integer> stackSizes = new ArrayList<Integer>();
|
||||
|
||||
if ( downsampledReads.isEmpty() ) {
|
||||
return stackSizes;
|
||||
}
|
||||
|
||||
Iterator<SAMRecord> iter = downsampledReads.iterator();
|
||||
Assert.assertTrue(iter.hasNext());
|
||||
|
||||
SAMRecord previousRead = iter.next();
|
||||
int currentStackSize = 1;
|
||||
|
||||
while ( iter.hasNext() ) {
|
||||
SAMRecord currentRead = iter.next();
|
||||
|
||||
if ( currentRead.getReferenceIndex() > previousRead.getReferenceIndex() || currentRead.getAlignmentStart() > previousRead.getAlignmentStart() ) {
|
||||
stackSizes.add(currentStackSize);
|
||||
currentStackSize = 1;
|
||||
}
|
||||
else if ( currentRead.getReferenceIndex() < previousRead.getReferenceIndex() || currentRead.getAlignmentStart() < previousRead.getAlignmentStart() ) {
|
||||
Assert.fail(String.format("Reads are out of order: %s %s", previousRead, currentRead));
|
||||
}
|
||||
else {
|
||||
currentStackSize++;
|
||||
}
|
||||
|
||||
previousRead = currentRead;
|
||||
}
|
||||
|
||||
stackSizes.add(currentStackSize);
|
||||
return stackSizes;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimplePositionalDownsamplerSignalNoMoreReadsBefore() {
|
||||
ReadsDownsampler<SAMRecord> downsampler = new SimplePositionalDownsampler<SAMRecord>(1000);
|
||||
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
|
||||
|
||||
Collection<SAMRecord> readStack = new ArrayList<SAMRecord>();
|
||||
readStack.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(50, header, "foo", 0, 1, 100));
|
||||
downsampler.submit(readStack);
|
||||
|
||||
Assert.assertFalse(downsampler.hasFinalizedItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() == null);
|
||||
Assert.assertTrue(downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekPending() != null);
|
||||
|
||||
SAMRecord laterRead = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 2, 100);
|
||||
downsampler.signalNoMoreReadsBefore(laterRead);
|
||||
|
||||
Assert.assertTrue(downsampler.hasFinalizedItems());
|
||||
Assert.assertTrue(downsampler.peekFinalized() != null);
|
||||
Assert.assertFalse(downsampler.hasPendingItems());
|
||||
Assert.assertTrue(downsampler.peekPending() == null);
|
||||
|
||||
List<SAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
|
||||
|
||||
Assert.assertEquals(downsampledReads.size(), readStack.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasicUnmappedReadsSupport() {
|
||||
ReadsDownsampler<SAMRecord> downsampler = new SimplePositionalDownsampler<SAMRecord>(100);
|
||||
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
|
||||
|
||||
Collection<SAMRecord> readStack = new ArrayList<SAMRecord>();
|
||||
readStack.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(200, header, "foo", SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX,
|
||||
SAMRecord.NO_ALIGNMENT_START, 100));
|
||||
for ( SAMRecord read : readStack ) {
|
||||
Assert.assertTrue(read.getReadUnmappedFlag());
|
||||
}
|
||||
|
||||
downsampler.submit(readStack);
|
||||
downsampler.signalEndOfInput();
|
||||
|
||||
List<SAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
|
||||
|
||||
// Unmapped reads should not get downsampled at all by the SimplePositionalDownsampler
|
||||
Assert.assertEquals(downsampledReads.size(), readStack.size());
|
||||
|
||||
for ( SAMRecord read: downsampledReads ) {
|
||||
Assert.assertTrue(read.getReadUnmappedFlag());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMixedMappedAndUnmappedReadsSupport() {
|
||||
ReadsDownsampler<SAMRecord> downsampler = new SimplePositionalDownsampler<SAMRecord>(100);
|
||||
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
|
||||
|
||||
Collection<SAMRecord> mappedReadStack = new ArrayList<SAMRecord>();
|
||||
mappedReadStack.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(200, header, "foo", 0, 1, 100));
|
||||
for ( SAMRecord read : mappedReadStack ) {
|
||||
Assert.assertFalse(read.getReadUnmappedFlag());
|
||||
}
|
||||
|
||||
Collection<SAMRecord> unmappedReadStack = new ArrayList<SAMRecord>();
|
||||
unmappedReadStack.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(200, header, "foo", SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX,
|
||||
SAMRecord.NO_ALIGNMENT_START, 100));
|
||||
for ( SAMRecord read : unmappedReadStack ) {
|
||||
Assert.assertTrue(read.getReadUnmappedFlag());
|
||||
}
|
||||
|
||||
downsampler.submit(mappedReadStack);
|
||||
downsampler.submit(unmappedReadStack);
|
||||
downsampler.signalEndOfInput();
|
||||
|
||||
List<SAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
|
||||
|
||||
// Unmapped reads should not get downsampled at all by the SimplePositionalDownsampler
|
||||
Assert.assertEquals(downsampledReads.size(), 300);
|
||||
Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 100);
|
||||
|
||||
int count = 1;
|
||||
for ( SAMRecord read: downsampledReads ) {
|
||||
if ( count <= 100 ) {
|
||||
Assert.assertFalse(read.getReadUnmappedFlag());
|
||||
}
|
||||
else {
|
||||
Assert.assertTrue(read.getReadUnmappedFlag());
|
||||
}
|
||||
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGATKSAMRecordSupport() {
|
||||
ReadsDownsampler<GATKSAMRecord> downsampler = new SimplePositionalDownsampler<GATKSAMRecord>(1000);
|
||||
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
|
||||
|
||||
List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
|
||||
for ( int i = 0; i < 10; i++ ) {
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 10, 20 * i + 10));
|
||||
}
|
||||
|
||||
downsampler.submit(reads);
|
||||
downsampler.signalEndOfInput();
|
||||
List<GATKSAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
|
||||
|
||||
Assert.assertEquals(downsampledReads.size(), 10);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,546 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* testing of the experimental version of LocusIteratorByState
|
||||
*/
|
||||
public class LocusIteratorByStateExperimentalUnitTest extends BaseTest {
|
||||
private static SAMFileHeader header;
|
||||
private LocusIteratorByStateExperimental li;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeClass
|
||||
public void beforeClass() {
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
private final LocusIteratorByStateExperimental makeLTBS(List<SAMRecord> reads, ReadProperties readAttributes) {
|
||||
return new LocusIteratorByStateExperimental(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LocusIteratorByStateExperimental.sampleListForSAMWithoutReadGroups());
|
||||
}
|
||||
|
||||
private static ReadProperties createTestReadProperties() {
|
||||
return createTestReadProperties(null);
|
||||
}
|
||||
|
||||
private static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod ) {
|
||||
return new ReadProperties(
|
||||
Collections.<SAMReaderID>emptyList(),
|
||||
new SAMFileHeader(),
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.STRICT,
|
||||
downsamplingMethod,
|
||||
new ValidationExclusion(),
|
||||
Collections.<ReadFilter>emptyList(),
|
||||
Collections.<ReadTransformer>emptyList(),
|
||||
false,
|
||||
(byte) -1
|
||||
);
|
||||
}
|
||||
|
||||
private static class FakeCloseableIterator<T> implements CloseableIterator<T> {
|
||||
Iterator<T> iterator;
|
||||
|
||||
public FakeCloseableIterator(Iterator<T> it) {
|
||||
iterator = it;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
return;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return iterator.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public T next() {
|
||||
return iterator.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Don't remove!");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testXandEQOperators() {
|
||||
final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
||||
final byte[] bases2 = new byte[] {'A','A','A','C','A','A','A','A','A','C'};
|
||||
|
||||
// create a test version of the Reads object
|
||||
ReadProperties readAttributes = createTestReadProperties();
|
||||
|
||||
SAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header,"r1",0,1,10);
|
||||
r1.setReadBases(bases1);
|
||||
r1.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
r1.setCigarString("10M");
|
||||
|
||||
SAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header,"r2",0,1,10);
|
||||
r2.setReadBases(bases2);
|
||||
r2.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
|
||||
r2.setCigarString("3=1X5=1X");
|
||||
|
||||
SAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header,"r3",0,1,10);
|
||||
r3.setReadBases(bases2);
|
||||
r3.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
|
||||
r3.setCigarString("3=1X5M1X");
|
||||
|
||||
SAMRecord r4 = ArtificialSAMUtils.createArtificialRead(header,"r4",0,1,10);
|
||||
r4.setReadBases(bases2);
|
||||
r4.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
r4.setCigarString("10M");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(r1, r2, r3, r4);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads,readAttributes);
|
||||
|
||||
while (li.hasNext()) {
|
||||
AlignmentContext context = li.next();
|
||||
ReadBackedPileup pileup = context.getBasePileup();
|
||||
Assert.assertEquals(pileup.depthOfCoverage(), 4);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIndelsInRegularPileup() {
|
||||
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
||||
final byte[] indelBases = new byte[] {'A','A','A','A','C','T','A','A','A','A','A','A'};
|
||||
|
||||
// create a test version of the Reads object
|
||||
ReadProperties readAttributes = createTestReadProperties();
|
||||
|
||||
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
|
||||
before.setReadBases(bases);
|
||||
before.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
before.setCigarString("10M");
|
||||
|
||||
SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,2,10);
|
||||
during.setReadBases(indelBases);
|
||||
during.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
|
||||
during.setCigarString("4M2I6M");
|
||||
|
||||
SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,3,10);
|
||||
after.setReadBases(bases);
|
||||
after.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
after.setCigarString("10M");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(before, during, after);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads,readAttributes);
|
||||
|
||||
boolean foundIndel = false;
|
||||
while (li.hasNext()) {
|
||||
AlignmentContext context = li.next();
|
||||
ReadBackedPileup pileup = context.getBasePileup().getBaseFilteredPileup(10);
|
||||
for (PileupElement p : pileup) {
|
||||
if (p.isBeforeInsertion()) {
|
||||
foundIndel = true;
|
||||
Assert.assertEquals(p.getEventLength(), 2, "Wrong event length");
|
||||
Assert.assertEquals(p.getEventBases(), "CT", "Inserted bases are incorrect");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Assert.assertTrue(foundIndel,"Indel in pileup not found");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWholeIndelReadInIsolation() {
|
||||
final int firstLocus = 44367789;
|
||||
|
||||
// create a test version of the Reads object
|
||||
ReadProperties readAttributes = createTestReadProperties();
|
||||
|
||||
SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header, "indelOnly", 0, firstLocus, 76);
|
||||
indelOnlyRead.setReadBases(Utils.dupBytes((byte)'A',76));
|
||||
indelOnlyRead.setBaseQualities(Utils.dupBytes((byte) '@', 76));
|
||||
indelOnlyRead.setCigarString("76I");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(indelOnlyRead);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads, readAttributes);
|
||||
|
||||
// Traditionally, reads that end with indels bleed into the pileup at the following locus. Verify that the next pileup contains this read
|
||||
// and considers it to be an indel-containing read.
|
||||
Assert.assertTrue(li.hasNext(),"Should have found a whole-indel read in the normal base pileup without extended events enabled");
|
||||
AlignmentContext alignmentContext = li.next();
|
||||
Assert.assertEquals(alignmentContext.getLocation().getStart(), firstLocus, "Base pileup is at incorrect location.");
|
||||
ReadBackedPileup basePileup = alignmentContext.getBasePileup();
|
||||
Assert.assertEquals(basePileup.getReads().size(),1,"Pileup is of incorrect size");
|
||||
Assert.assertSame(basePileup.getReads().get(0), indelOnlyRead, "Read in pileup is incorrect");
|
||||
}
|
||||
|
||||
/**
|
||||
* Test to make sure that reads supporting only an indel (example cigar string: 76I) do
|
||||
* not negatively influence the ordering of the pileup.
|
||||
*/
|
||||
@Test
|
||||
public void testWholeIndelRead() {
|
||||
final int firstLocus = 44367788, secondLocus = firstLocus + 1;
|
||||
|
||||
SAMRecord leadingRead = ArtificialSAMUtils.createArtificialRead(header,"leading",0,firstLocus,76);
|
||||
leadingRead.setReadBases(Utils.dupBytes((byte)'A',76));
|
||||
leadingRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
||||
leadingRead.setCigarString("1M75I");
|
||||
|
||||
SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header,"indelOnly",0,secondLocus,76);
|
||||
indelOnlyRead.setReadBases(Utils.dupBytes((byte) 'A', 76));
|
||||
indelOnlyRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
||||
indelOnlyRead.setCigarString("76I");
|
||||
|
||||
SAMRecord fullMatchAfterIndel = ArtificialSAMUtils.createArtificialRead(header,"fullMatch",0,secondLocus,76);
|
||||
fullMatchAfterIndel.setReadBases(Utils.dupBytes((byte)'A',76));
|
||||
fullMatchAfterIndel.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
||||
fullMatchAfterIndel.setCigarString("75I1M");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(leadingRead, indelOnlyRead, fullMatchAfterIndel);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads, createTestReadProperties());
|
||||
int currentLocus = firstLocus;
|
||||
int numAlignmentContextsFound = 0;
|
||||
|
||||
while(li.hasNext()) {
|
||||
AlignmentContext alignmentContext = li.next();
|
||||
Assert.assertEquals(alignmentContext.getLocation().getStart(),currentLocus,"Current locus returned by alignment context is incorrect");
|
||||
|
||||
if(currentLocus == firstLocus) {
|
||||
List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
|
||||
Assert.assertEquals(readsAtLocus.size(),1,"Wrong number of reads at locus " + currentLocus);
|
||||
Assert.assertSame(readsAtLocus.get(0),leadingRead,"leadingRead absent from pileup at locus " + currentLocus);
|
||||
}
|
||||
else if(currentLocus == secondLocus) {
|
||||
List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
|
||||
Assert.assertEquals(readsAtLocus.size(),2,"Wrong number of reads at locus " + currentLocus);
|
||||
Assert.assertSame(readsAtLocus.get(0),indelOnlyRead,"indelOnlyRead absent from pileup at locus " + currentLocus);
|
||||
Assert.assertSame(readsAtLocus.get(1),fullMatchAfterIndel,"fullMatchAfterIndel absent from pileup at locus " + currentLocus);
|
||||
}
|
||||
|
||||
currentLocus++;
|
||||
numAlignmentContextsFound++;
|
||||
}
|
||||
|
||||
Assert.assertEquals(numAlignmentContextsFound, 2, "Found incorrect number of alignment contexts");
|
||||
}
|
||||
|
||||
/**
|
||||
* Test to make sure that reads supporting only an indel (example cigar string: 76I) are represented properly
|
||||
*/
|
||||
@Test
|
||||
public void testWholeIndelReadRepresentedTest() {
|
||||
final int firstLocus = 44367788, secondLocus = firstLocus + 1;
|
||||
|
||||
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,secondLocus,1);
|
||||
read1.setReadBases(Utils.dupBytes((byte) 'A', 1));
|
||||
read1.setBaseQualities(Utils.dupBytes((byte) '@', 1));
|
||||
read1.setCigarString("1I");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(read1);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads, createTestReadProperties());
|
||||
|
||||
while(li.hasNext()) {
|
||||
AlignmentContext alignmentContext = li.next();
|
||||
ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||
Assert.assertTrue(p.getNumberOfElements() == 1);
|
||||
PileupElement pe = p.iterator().next();
|
||||
Assert.assertTrue(pe.isBeforeInsertion());
|
||||
Assert.assertFalse(pe.isAfterInsertion());
|
||||
Assert.assertEquals(pe.getEventBases(), "A");
|
||||
}
|
||||
|
||||
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,secondLocus,10);
|
||||
read2.setReadBases(Utils.dupBytes((byte) 'A', 10));
|
||||
read2.setBaseQualities(Utils.dupBytes((byte) '@', 10));
|
||||
read2.setCigarString("10I");
|
||||
|
||||
reads = Arrays.asList(read2);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads, createTestReadProperties());
|
||||
|
||||
while(li.hasNext()) {
|
||||
AlignmentContext alignmentContext = li.next();
|
||||
ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||
Assert.assertTrue(p.getNumberOfElements() == 1);
|
||||
PileupElement pe = p.iterator().next();
|
||||
Assert.assertTrue(pe.isBeforeInsertion());
|
||||
Assert.assertFalse(pe.isAfterInsertion());
|
||||
Assert.assertEquals(pe.getEventBases(), "AAAAAAAAAA");
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
// comprehensive LIBS/PileupElement tests //
|
||||
////////////////////////////////////////////
|
||||
|
||||
private static final int IS_BEFORE_DELETED_BASE_FLAG = 1;
|
||||
private static final int IS_BEFORE_DELETION_START_FLAG = 2;
|
||||
private static final int IS_AFTER_DELETED_BASE_FLAG = 4;
|
||||
private static final int IS_AFTER_DELETION_END_FLAG = 8;
|
||||
private static final int IS_BEFORE_INSERTION_FLAG = 16;
|
||||
private static final int IS_AFTER_INSERTION_FLAG = 32;
|
||||
private static final int IS_NEXT_TO_SOFTCLIP_FLAG = 64;
|
||||
|
||||
private static class LIBSTest {
|
||||
|
||||
|
||||
final String cigar;
|
||||
final int readLength;
|
||||
final List<Integer> offsets;
|
||||
final List<Integer> flags;
|
||||
|
||||
private LIBSTest(final String cigar, final int readLength, final List<Integer> offsets, final List<Integer> flags) {
|
||||
this.cigar = cigar;
|
||||
this.readLength = readLength;
|
||||
this.offsets = offsets;
|
||||
this.flags = flags;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "LIBSTest")
|
||||
public Object[][] createLIBSTestData() {
|
||||
return new Object[][]{
|
||||
{new LIBSTest("1I", 1, Arrays.asList(0), Arrays.asList(IS_BEFORE_INSERTION_FLAG))},
|
||||
{new LIBSTest("10I", 10, Arrays.asList(0), Arrays.asList(IS_BEFORE_INSERTION_FLAG))},
|
||||
{new LIBSTest("2M2I2M", 6, Arrays.asList(0,1,4,5), Arrays.asList(0,IS_BEFORE_INSERTION_FLAG,IS_AFTER_INSERTION_FLAG,0))},
|
||||
{new LIBSTest("2M2I", 4, Arrays.asList(0,1), Arrays.asList(0,IS_BEFORE_INSERTION_FLAG))},
|
||||
//TODO -- uncomment these when LIBS is fixed
|
||||
//{new LIBSTest("2I2M", 4, Arrays.asList(2,3), Arrays.asList(IS_AFTER_INSERTION_FLAG,0))},
|
||||
//{new LIBSTest("1I1M1D1M", 3, Arrays.asList(0,1), Arrays.asList(IS_AFTER_INSERTION_FLAG | IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG))},
|
||||
//{new LIBSTest("1S1I1M", 3, Arrays.asList(2), Arrays.asList(IS_AFTER_INSERTION_FLAG))},
|
||||
{new LIBSTest("1M2D2M", 3, Arrays.asList(0,1,2), Arrays.asList(IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG,0))},
|
||||
{new LIBSTest("1S1M", 2, Arrays.asList(1), Arrays.asList(IS_NEXT_TO_SOFTCLIP_FLAG))},
|
||||
{new LIBSTest("1M1S", 2, Arrays.asList(0), Arrays.asList(IS_NEXT_TO_SOFTCLIP_FLAG))},
|
||||
{new LIBSTest("1S1M1I", 3, Arrays.asList(1), Arrays.asList(IS_BEFORE_INSERTION_FLAG | IS_NEXT_TO_SOFTCLIP_FLAG))}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "LIBSTest")
|
||||
public void testLIBS(LIBSTest params) {
|
||||
final int locus = 44367788;
|
||||
|
||||
SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, params.readLength);
|
||||
read.setReadBases(Utils.dupBytes((byte) 'A', params.readLength));
|
||||
read.setBaseQualities(Utils.dupBytes((byte) '@', params.readLength));
|
||||
read.setCigarString(params.cigar);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(Arrays.asList(read), createTestReadProperties());
|
||||
|
||||
int offset = 0;
|
||||
while ( li.hasNext() ) {
|
||||
AlignmentContext alignmentContext = li.next();
|
||||
ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||
Assert.assertTrue(p.getNumberOfElements() == 1);
|
||||
PileupElement pe = p.iterator().next();
|
||||
|
||||
final int flag = params.flags.get(offset);
|
||||
Assert.assertEquals(pe.isBeforeDeletedBase(), (flag & IS_BEFORE_DELETED_BASE_FLAG) != 0);
|
||||
Assert.assertEquals(pe.isBeforeDeletionStart(), (flag & IS_BEFORE_DELETION_START_FLAG) != 0);
|
||||
Assert.assertEquals(pe.isAfterDeletedBase(), (flag & IS_AFTER_DELETED_BASE_FLAG) != 0);
|
||||
Assert.assertEquals(pe.isAfterDeletionEnd(), (flag & IS_AFTER_DELETION_END_FLAG) != 0);
|
||||
Assert.assertEquals(pe.isBeforeInsertion(), (flag & IS_BEFORE_INSERTION_FLAG) != 0);
|
||||
Assert.assertEquals(pe.isAfterInsertion(), (flag & IS_AFTER_INSERTION_FLAG) != 0);
|
||||
Assert.assertEquals(pe.isNextToSoftClip(), (flag & IS_NEXT_TO_SOFTCLIP_FLAG) != 0);
|
||||
|
||||
Assert.assertEquals(pe.getOffset(), params.offsets.get(offset).intValue());
|
||||
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// End comprehensive LIBS/PileupElement tests //
|
||||
////////////////////////////////////////////////
|
||||
|
||||
|
||||
///////////////////////////////////////
|
||||
// Read State Manager Tests //
|
||||
///////////////////////////////////////
|
||||
|
||||
private class PerSampleReadStateManagerTest extends TestDataProvider {
|
||||
private List<Integer> readCountsPerAlignmentStart;
|
||||
private List<SAMRecord> reads;
|
||||
private List<ArrayList<LocusIteratorByStateExperimental.SAMRecordState>> recordStatesByAlignmentStart;
|
||||
private int removalInterval;
|
||||
|
||||
public PerSampleReadStateManagerTest( List<Integer> readCountsPerAlignmentStart, int removalInterval ) {
|
||||
super(PerSampleReadStateManagerTest.class);
|
||||
|
||||
this.readCountsPerAlignmentStart = readCountsPerAlignmentStart;
|
||||
this.removalInterval = removalInterval;
|
||||
|
||||
reads = new ArrayList<SAMRecord>();
|
||||
recordStatesByAlignmentStart = new ArrayList<ArrayList<LocusIteratorByStateExperimental.SAMRecordState>>();
|
||||
|
||||
setName(String.format("%s: readCountsPerAlignmentStart: %s removalInterval: %d",
|
||||
getClass().getSimpleName(), readCountsPerAlignmentStart, removalInterval));
|
||||
}
|
||||
|
||||
public void run() {
|
||||
LocusIteratorByStateExperimental libs = makeLTBS(new ArrayList<SAMRecord>(), createTestReadProperties());
|
||||
LocusIteratorByStateExperimental.ReadStateManager readStateManager =
|
||||
libs.new ReadStateManager(new ArrayList<SAMRecord>().iterator());
|
||||
LocusIteratorByStateExperimental.ReadStateManager.PerSampleReadStateManager perSampleReadStateManager =
|
||||
readStateManager.new PerSampleReadStateManager();
|
||||
|
||||
makeReads();
|
||||
|
||||
for ( ArrayList<LocusIteratorByStateExperimental.SAMRecordState> stackRecordStates : recordStatesByAlignmentStart ) {
|
||||
perSampleReadStateManager.addStatesAtNextAlignmentStart(stackRecordStates);
|
||||
}
|
||||
|
||||
// read state manager should have the right number of reads
|
||||
Assert.assertEquals(reads.size(), perSampleReadStateManager.size());
|
||||
|
||||
Iterator<SAMRecord> originalReadsIterator = reads.iterator();
|
||||
Iterator<LocusIteratorByStateExperimental.SAMRecordState> recordStateIterator = perSampleReadStateManager.iterator();
|
||||
int recordStateCount = 0;
|
||||
int numReadStatesRemoved = 0;
|
||||
|
||||
// Do a first-pass validation of the record state iteration by making sure we get back everything we
|
||||
// put in, in the same order, doing any requested removals of read states along the way
|
||||
while ( recordStateIterator.hasNext() ) {
|
||||
LocusIteratorByStateExperimental.SAMRecordState readState = recordStateIterator.next();
|
||||
recordStateCount++;
|
||||
SAMRecord readFromPerSampleReadStateManager = readState.getRead();
|
||||
|
||||
Assert.assertTrue(originalReadsIterator.hasNext());
|
||||
SAMRecord originalRead = originalReadsIterator.next();
|
||||
|
||||
// The read we get back should be literally the same read in memory as we put in
|
||||
Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
|
||||
|
||||
// If requested, remove a read state every removalInterval states
|
||||
if ( removalInterval > 0 && recordStateCount % removalInterval == 0 ) {
|
||||
recordStateIterator.remove();
|
||||
numReadStatesRemoved++;
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertFalse(originalReadsIterator.hasNext());
|
||||
|
||||
// If we removed any read states, do a second pass through the read states to make sure the right
|
||||
// states were removed
|
||||
if ( numReadStatesRemoved > 0 ) {
|
||||
Assert.assertEquals(perSampleReadStateManager.size(), reads.size() - numReadStatesRemoved);
|
||||
|
||||
originalReadsIterator = reads.iterator();
|
||||
recordStateIterator = perSampleReadStateManager.iterator();
|
||||
int readCount = 0;
|
||||
int readStateCount = 0;
|
||||
|
||||
// Match record states with the reads that should remain after removal
|
||||
while ( recordStateIterator.hasNext() ) {
|
||||
LocusIteratorByStateExperimental.SAMRecordState readState = recordStateIterator.next();
|
||||
readStateCount++;
|
||||
SAMRecord readFromPerSampleReadStateManager = readState.getRead();
|
||||
|
||||
Assert.assertTrue(originalReadsIterator.hasNext());
|
||||
|
||||
SAMRecord originalRead = originalReadsIterator.next();
|
||||
readCount++;
|
||||
|
||||
if ( readCount % removalInterval == 0 ) {
|
||||
originalRead = originalReadsIterator.next(); // advance to next read, since the previous one should have been discarded
|
||||
readCount++;
|
||||
}
|
||||
|
||||
// The read we get back should be literally the same read in memory as we put in (after accounting for removals)
|
||||
Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
|
||||
}
|
||||
|
||||
Assert.assertEquals(readStateCount, reads.size() - numReadStatesRemoved);
|
||||
}
|
||||
|
||||
// Allow memory used by this test to be reclaimed
|
||||
readCountsPerAlignmentStart = null;
|
||||
reads = null;
|
||||
recordStatesByAlignmentStart = null;
|
||||
}
|
||||
|
||||
private void makeReads() {
|
||||
int alignmentStart = 1;
|
||||
|
||||
for ( int readsThisStack : readCountsPerAlignmentStart ) {
|
||||
ArrayList<SAMRecord> stackReads = new ArrayList<SAMRecord>(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(readsThisStack, header, "foo", 0, alignmentStart, MathUtils.randomIntegerInRange(50, 100)));
|
||||
ArrayList<LocusIteratorByStateExperimental.SAMRecordState> stackRecordStates = new ArrayList<LocusIteratorByStateExperimental.SAMRecordState>();
|
||||
|
||||
for ( SAMRecord read : stackReads ) {
|
||||
stackRecordStates.add(new LocusIteratorByStateExperimental.SAMRecordState(read));
|
||||
}
|
||||
|
||||
reads.addAll(stackReads);
|
||||
recordStatesByAlignmentStart.add(stackRecordStates);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "PerSampleReadStateManagerTestDataProvider")
|
||||
public Object[][] createPerSampleReadStateManagerTests() {
|
||||
for ( List<Integer> thisTestReadStateCounts : Arrays.asList( Arrays.asList(1),
|
||||
Arrays.asList(2),
|
||||
Arrays.asList(10),
|
||||
Arrays.asList(1, 1),
|
||||
Arrays.asList(2, 2),
|
||||
Arrays.asList(10, 10),
|
||||
Arrays.asList(1, 10),
|
||||
Arrays.asList(10, 1),
|
||||
Arrays.asList(1, 1, 1),
|
||||
Arrays.asList(2, 2, 2),
|
||||
Arrays.asList(10, 10, 10),
|
||||
Arrays.asList(1, 1, 1, 1, 1, 1),
|
||||
Arrays.asList(10, 10, 10, 10, 10, 10),
|
||||
Arrays.asList(1, 2, 10, 1, 2, 10)
|
||||
) ) {
|
||||
|
||||
for ( int removalInterval : Arrays.asList(0, 2, 3) ) {
|
||||
new PerSampleReadStateManagerTest(thisTestReadStateCounts, removalInterval);
|
||||
}
|
||||
}
|
||||
|
||||
return PerSampleReadStateManagerTest.getTests(PerSampleReadStateManagerTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "PerSampleReadStateManagerTestDataProvider")
|
||||
public void runPerSampleReadStateManagerTest( PerSampleReadStateManagerTest test ) {
|
||||
logger.warn("Running test: " + test);
|
||||
|
||||
test.run();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,166 +0,0 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Basic tests to prove the integrity of the reservoir downsampler.
|
||||
* At the moment, always run tests on SAM records as that's the task
|
||||
* for which the downsampler was conceived.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class LegacyReservoirDownsamplerUnitTest {
|
||||
private static final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1,1,200);
|
||||
|
||||
|
||||
@Test
|
||||
public void testEmptyIterator() {
|
||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
|
||||
Assert.assertTrue(downsampler.isEmpty(),"Downsampler is not empty but should be.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOneElementWithPoolSizeOne() {
|
||||
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
Collection<SAMRecord> batchedReads = downsampler.getDownsampledContents();
|
||||
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
|
||||
Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOneElementWithPoolSizeGreaterThanOne() {
|
||||
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
Collection<SAMRecord> batchedReads = downsampler.getDownsampledContents();
|
||||
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
|
||||
Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPoolFilledPartially() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||
Assert.assertEquals(batchedReads.size(), 3, "Downsampler is returning the wrong number of reads");
|
||||
|
||||
Assert.assertSame(batchedReads.get(0), reads.get(0), "Downsampler read 1 is incorrect");
|
||||
Assert.assertSame(batchedReads.get(1), reads.get(1), "Downsampler read 2 is incorrect");
|
||||
Assert.assertSame(batchedReads.get(2), reads.get(2), "Downsampler read 3 is incorrect");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPoolFilledExactly() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
|
||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||
Assert.assertEquals(batchedReads.size(), 5, "Downsampler is returning the wrong number of reads");
|
||||
Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read");
|
||||
|
||||
Assert.assertSame(batchedReads.get(0), reads.get(0), "Downsampler read 1 is incorrect");
|
||||
Assert.assertSame(batchedReads.get(1), reads.get(1), "Downsampler read 2 is incorrect");
|
||||
Assert.assertSame(batchedReads.get(2), reads.get(2), "Downsampler read 3 is incorrect");
|
||||
Assert.assertSame(batchedReads.get(3), reads.get(3), "Downsampler read 4 is incorrect");
|
||||
Assert.assertSame(batchedReads.get(4), reads.get(4), "Downsampler read 5 is incorrect");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLargerPileWithZeroElementPool() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(0);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertTrue(downsampler.isEmpty(),"Downsampler isn't empty but should be");
|
||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||
Assert.assertEquals(batchedReads.size(), 0, "Downsampler is returning the wrong number of reads");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLargerPileWithSingleElementPool() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
|
||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
|
||||
Assert.assertTrue(reads.contains(batchedReads.get(0)),"Downsampler is returning a bad read.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFillingAcrossLoci() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
|
||||
Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read.");
|
||||
|
||||
reads.clear();
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,2,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,2,76));
|
||||
|
||||
downsampler.clear();
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||
Assert.assertEquals(batchedReads.size(), 2, "Downsampler is returning the wrong number of reads");
|
||||
Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read.");
|
||||
Assert.assertEquals(batchedReads.get(1), reads.get(1), "Downsampler is returning an incorrect read.");
|
||||
|
||||
reads.clear();
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,3,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,3,76));
|
||||
|
||||
downsampler.clear();
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
|
||||
Assert.assertEquals(batchedReads.size(), 2, "Downsampler is returning the wrong number of reads");
|
||||
Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read.");
|
||||
Assert.assertEquals(batchedReads.get(1), reads.get(1), "Downsampler is returning an incorrect read.");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,71 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.nanoScheduler;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.LinkedBlockingDeque;
|
||||
|
||||
/**
|
||||
* UnitTests for the InputProducer
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/24/12
|
||||
* Time: 11:25 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class InputProducerUnitTest extends BaseTest {
|
||||
@DataProvider(name = "InputProducerTest")
|
||||
public Object[][] createInputProducerTest() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( final int nElements : Arrays.asList(0, 1, 10, 100, 1000, 10000, 100000) ) {
|
||||
for ( final int queueSize : Arrays.asList(1, 10, 100) ) {
|
||||
tests.add(new Object[]{ nElements, queueSize });
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "InputProducerTest", timeOut = NanoSchedulerUnitTest.NANO_SCHEDULE_MAX_RUNTIME)
|
||||
public void testInputProducer(final int nElements, final int queueSize) throws InterruptedException {
|
||||
final List<Integer> elements = new ArrayList<Integer>(nElements);
|
||||
for ( int i = 0; i < nElements; i++ ) elements.add(i);
|
||||
|
||||
final LinkedBlockingDeque<InputProducer<Integer>.InputValue> readQueue =
|
||||
new LinkedBlockingDeque<InputProducer<Integer>.InputValue>(queueSize);
|
||||
|
||||
final InputProducer<Integer> ip = new InputProducer<Integer>(elements.iterator(), null, readQueue);
|
||||
|
||||
final ExecutorService es = Executors.newSingleThreadExecutor();
|
||||
es.submit(ip);
|
||||
|
||||
int lastValue = -1;
|
||||
int nRead = 0;
|
||||
while ( true ) {
|
||||
final int observedQueueSize = readQueue.size();
|
||||
Assert.assertTrue(observedQueueSize <= queueSize,
|
||||
"Reader is enqueuing more elements " + observedQueueSize + " than allowed " + queueSize);
|
||||
|
||||
final InputProducer<Integer>.InputValue value = readQueue.take();
|
||||
if ( value.isLast() ) {
|
||||
Assert.assertEquals(nRead, nElements, "Number of input values " + nRead + " not all that are expected " + nElements);
|
||||
Assert.assertEquals(readQueue.size(), 0, "Last queue element found but queue contains more values!");
|
||||
break;
|
||||
} else {
|
||||
Assert.assertTrue(lastValue < value.getValue(), "Read values coming out of order!");
|
||||
final int expected = lastValue + 1;
|
||||
Assert.assertEquals((int)value.getValue(), expected, "Value observed " + value.getValue() + " not equal to the expected value " + expected);
|
||||
nRead++;
|
||||
lastValue = value.getValue();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,182 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.nanoScheduler;
|
||||
|
||||
import org.apache.log4j.BasicConfigurator;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* UnitTests for the NanoScheduler
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/24/12
|
||||
* Time: 11:25 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class NanoSchedulerUnitTest extends BaseTest {
|
||||
public static final int NANO_SCHEDULE_MAX_RUNTIME = 60000;
|
||||
|
||||
private static class Map2x implements NSMapFunction<Integer, Integer> {
|
||||
@Override public Integer apply(Integer input) { return input * 2; }
|
||||
}
|
||||
|
||||
private static class ReduceSum implements NSReduceFunction<Integer, Integer> {
|
||||
int prevOne = Integer.MIN_VALUE;
|
||||
|
||||
@Override public Integer apply(Integer one, Integer sum) {
|
||||
Assert.assertTrue(prevOne < one, "Reduce came in out of order. Prev " + prevOne + " cur " + one);
|
||||
return one + sum;
|
||||
}
|
||||
}
|
||||
|
||||
private static class ProgressCallback implements NSProgressFunction<Integer> {
|
||||
int callBacks = 0;
|
||||
|
||||
@Override
|
||||
public void progress(Integer lastMapInput) {
|
||||
callBacks++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static int sum2x(final int start, final int end) {
|
||||
int sum = 0;
|
||||
for ( int i = start; i < end; i++ )
|
||||
sum += 2 * i;
|
||||
return sum;
|
||||
}
|
||||
|
||||
private static class NanoSchedulerBasicTest extends TestDataProvider {
|
||||
final int bufferSize, nThreads, start, end, expectedResult;
|
||||
|
||||
public NanoSchedulerBasicTest(final int bufferSize, final int nThreads, final int start, final int end) {
|
||||
super(NanoSchedulerBasicTest.class);
|
||||
this.bufferSize = bufferSize;
|
||||
this.nThreads = nThreads;
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.expectedResult = sum2x(start, end);
|
||||
setName(String.format("%s nt=%d buf=%d start=%d end=%d sum=%d",
|
||||
getClass().getSimpleName(), nThreads, bufferSize, start, end, expectedResult));
|
||||
}
|
||||
|
||||
public Iterator<Integer> makeReader() {
|
||||
final List<Integer> ints = new ArrayList<Integer>();
|
||||
for ( int i = start; i < end; i++ )
|
||||
ints.add(i);
|
||||
return ints.iterator();
|
||||
}
|
||||
|
||||
public int nExpectedCallbacks() {
|
||||
int nElements = Math.max(end - start, 0);
|
||||
return nElements / bufferSize;
|
||||
}
|
||||
|
||||
public Map2x makeMap() { return new Map2x(); }
|
||||
public Integer initReduce() { return 0; }
|
||||
public ReduceSum makeReduce() { return new ReduceSum(); }
|
||||
}
|
||||
|
||||
static NanoSchedulerBasicTest exampleTest = null;
|
||||
@DataProvider(name = "NanoSchedulerBasicTest")
|
||||
public Object[][] createNanoSchedulerBasicTest() {
|
||||
for ( final int bufferSize : Arrays.asList(1, 10, 1000, 1000000) ) {
|
||||
for ( final int nt : Arrays.asList(1, 2, 4) ) {
|
||||
for ( final int start : Arrays.asList(0) ) {
|
||||
for ( final int end : Arrays.asList(0, 1, 2, 11, 10000, 100000) ) {
|
||||
exampleTest = new NanoSchedulerBasicTest(bufferSize, nt, start, end);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NanoSchedulerBasicTest.getTests(NanoSchedulerBasicTest.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "NanoSchedulerBasicTest", timeOut = NANO_SCHEDULE_MAX_RUNTIME)
|
||||
public void testSingleThreadedNanoScheduler(final NanoSchedulerBasicTest test) throws InterruptedException {
|
||||
logger.warn("Running " + test);
|
||||
if ( test.nThreads == 1 )
|
||||
testNanoScheduler(test);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "NanoSchedulerBasicTest", timeOut = NANO_SCHEDULE_MAX_RUNTIME, dependsOnMethods = "testSingleThreadedNanoScheduler")
|
||||
public void testMultiThreadedNanoScheduler(final NanoSchedulerBasicTest test) throws InterruptedException {
|
||||
logger.warn("Running " + test);
|
||||
if ( test.nThreads >= 1 )
|
||||
testNanoScheduler(test);
|
||||
}
|
||||
|
||||
private void testNanoScheduler(final NanoSchedulerBasicTest test) throws InterruptedException {
|
||||
final NanoScheduler<Integer, Integer, Integer> nanoScheduler =
|
||||
new NanoScheduler<Integer, Integer, Integer>(test.bufferSize, test.nThreads);
|
||||
|
||||
final ProgressCallback callback = new ProgressCallback();
|
||||
nanoScheduler.setProgressFunction(callback);
|
||||
|
||||
Assert.assertEquals(nanoScheduler.getInputBufferSize(), test.bufferSize, "inputBufferSize argument");
|
||||
Assert.assertEquals(nanoScheduler.getnThreads(), test.nThreads, "nThreads argument");
|
||||
|
||||
final Integer sum = nanoScheduler.execute(test.makeReader(), test.makeMap(), test.initReduce(), test.makeReduce());
|
||||
Assert.assertNotNull(sum);
|
||||
Assert.assertEquals((int)sum, test.expectedResult, "NanoScheduler sum not the same as calculated directly");
|
||||
|
||||
Assert.assertTrue(callback.callBacks >= test.nExpectedCallbacks(), "Not enough callbacks detected. Expected at least " + test.nExpectedCallbacks() + " but saw only " + callback.callBacks);
|
||||
nanoScheduler.shutdown();
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "NanoSchedulerBasicTest", dependsOnMethods = "testMultiThreadedNanoScheduler", timeOut = NANO_SCHEDULE_MAX_RUNTIME)
|
||||
public void testNanoSchedulerInLoop(final NanoSchedulerBasicTest test) throws InterruptedException {
|
||||
if ( test.bufferSize > 1) {
|
||||
logger.warn("Running " + test);
|
||||
|
||||
final NanoScheduler<Integer, Integer, Integer> nanoScheduler =
|
||||
new NanoScheduler<Integer, Integer, Integer>(test.bufferSize, test.nThreads);
|
||||
|
||||
// test reusing the scheduler
|
||||
for ( int i = 0; i < 10; i++ ) {
|
||||
final Integer sum = nanoScheduler.execute(test.makeReader(), test.makeMap(), test.initReduce(), test.makeReduce());
|
||||
Assert.assertNotNull(sum);
|
||||
Assert.assertEquals((int)sum, test.expectedResult, "NanoScheduler sum not the same as calculated directly");
|
||||
}
|
||||
|
||||
nanoScheduler.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeOut = NANO_SCHEDULE_MAX_RUNTIME)
|
||||
public void testShutdown() throws InterruptedException {
|
||||
final NanoScheduler<Integer, Integer, Integer> nanoScheduler = new NanoScheduler<Integer, Integer, Integer>(1, 2);
|
||||
Assert.assertFalse(nanoScheduler.isShutdown(), "scheduler should be alive");
|
||||
nanoScheduler.shutdown();
|
||||
Assert.assertTrue(nanoScheduler.isShutdown(), "scheduler should be dead");
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IllegalStateException.class, timeOut = NANO_SCHEDULE_MAX_RUNTIME)
|
||||
public void testShutdownExecuteFailure() throws InterruptedException {
|
||||
final NanoScheduler<Integer, Integer, Integer> nanoScheduler = new NanoScheduler<Integer, Integer, Integer>(1, 2);
|
||||
nanoScheduler.shutdown();
|
||||
nanoScheduler.execute(exampleTest.makeReader(), exampleTest.makeMap(), exampleTest.initReduce(), exampleTest.makeReduce());
|
||||
}
|
||||
|
||||
public static void main(String [ ] args) {
|
||||
org.apache.log4j.Logger logger = org.apache.log4j.Logger.getRootLogger();
|
||||
BasicConfigurator.configure();
|
||||
logger.setLevel(org.apache.log4j.Level.DEBUG);
|
||||
|
||||
final NanoSchedulerBasicTest test = new NanoSchedulerBasicTest(1000, Integer.valueOf(args[0]), 0, Integer.valueOf(args[1]));
|
||||
final NanoScheduler<Integer, Integer, Integer> nanoScheduler =
|
||||
new NanoScheduler<Integer, Integer, Integer>(test.bufferSize, test.nThreads);
|
||||
nanoScheduler.setDebug(true);
|
||||
|
||||
final Integer sum = nanoScheduler.execute(test.makeReader(), test.makeMap(), test.initReduce(), test.makeReduce());
|
||||
System.out.printf("Sum = %d, expected =%d%n", sum, test.expectedResult);
|
||||
nanoScheduler.shutdown();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,94 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.nanoScheduler;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.*;
|
||||
|
||||
/**
|
||||
* UnitTests for the InputProducer
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/24/12
|
||||
* Time: 11:25 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReducerThreadUnitTest extends BaseTest {
|
||||
@DataProvider(name = "ReducerThreadTest")
|
||||
public Object[][] createReducerThreadTest() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( final int nElements : Arrays.asList(0, 1, 10, 100, 1000, 10000, 100000) ) {
|
||||
tests.add(new Object[]{ nElements });
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "ReducerThreadTest", timeOut = NanoSchedulerUnitTest.NANO_SCHEDULE_MAX_RUNTIME)
|
||||
public void testReducerThreadTest(final int nElements) throws Exception {
|
||||
List<Integer> values = new ArrayList<Integer>(nElements);
|
||||
List<Integer> jobIDs = new ArrayList<Integer>(nElements);
|
||||
for ( int i = 0; i < nElements; i++ ) {
|
||||
values.add(i);
|
||||
jobIDs.add(i);
|
||||
}
|
||||
|
||||
runTests(values, jobIDs);
|
||||
}
|
||||
|
||||
@Test(enabled = true, timeOut = NanoSchedulerUnitTest.NANO_SCHEDULE_MAX_RUNTIME, expectedExceptions = ExecutionException.class)
|
||||
public void testReducerThreadTestByJobOrder() throws Exception {
|
||||
runTests(Arrays.asList(0, 1, 2), Arrays.asList(1, 3, 2));
|
||||
}
|
||||
|
||||
private void runTests( final List<Integer> mapValues, final List<Integer> jobIDs) throws Exception {
|
||||
final LinkedBlockingDeque<Future<MapResult<Integer>>> mapResultsQueue =
|
||||
new LinkedBlockingDeque<Future<MapResult<Integer>>>(mapValues.size()+1);
|
||||
|
||||
for ( int i = 0; i < mapValues.size(); i++ ) {
|
||||
final int value = mapValues.get(i);
|
||||
final int jobID = jobIDs.get(i);
|
||||
final MapResult<Integer> mapResult = new MapResult<Integer>(value, jobID);
|
||||
mapResultsQueue.add(new FutureValue<MapResult<Integer>>(mapResult));
|
||||
}
|
||||
mapResultsQueue.add(new FutureValue<MapResult<Integer>>(new MapResult<Integer>()));
|
||||
|
||||
final ReduceSumTest reduce = new ReduceSumTest(mapResultsQueue);
|
||||
final ReducerThread<Integer, Integer> thread
|
||||
= new ReducerThread<Integer, Integer>(reduce, null, 0, mapResultsQueue);
|
||||
|
||||
final ExecutorService es = Executors.newSingleThreadExecutor();
|
||||
final Future<Integer> value = es.submit(thread);
|
||||
value.get();
|
||||
|
||||
Assert.assertEquals(reduce.nRead, mapValues.size());
|
||||
}
|
||||
|
||||
public class ReduceSumTest implements NSReduceFunction<Integer, Integer> {
|
||||
final LinkedBlockingDeque<Future<MapResult<Integer>>> mapResultsQueue;
|
||||
int nRead = 0;
|
||||
int lastValue = -1;
|
||||
|
||||
public ReduceSumTest(LinkedBlockingDeque<Future<MapResult<Integer>>> mapResultsQueue) {
|
||||
this.mapResultsQueue = mapResultsQueue;
|
||||
}
|
||||
|
||||
@Override public Integer apply(Integer one, Integer sum) {
|
||||
Assert.assertTrue(lastValue < one, "Reduce came in out of order. Prev " + lastValue + " cur " + one);
|
||||
|
||||
Assert.assertTrue(lastValue < one, "Read values coming out of order!");
|
||||
final int expected = lastValue + 1;
|
||||
Assert.assertEquals((int)one, expected, "Value observed " + one + " not equal to the expected value " + expected);
|
||||
nRead++;
|
||||
lastValue = expected;
|
||||
|
||||
return one + sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,161 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
|
||||
public class ArtificialSingleSampleReadStreamUnitTest extends BaseTest {
|
||||
|
||||
private static class ArtificialSingleSampleReadStreamTest extends TestDataProvider {
|
||||
private ArtificialSingleSampleReadStream stream;
|
||||
private ArtificialSingleSampleReadStreamAnalyzer streamAnalyzer;
|
||||
|
||||
public ArtificialSingleSampleReadStreamTest( ArtificialSingleSampleReadStream stream ) {
|
||||
super(ArtificialSingleSampleReadStreamTest.class);
|
||||
|
||||
this.stream = stream;
|
||||
|
||||
setName(String.format("%s: numContigs=%d stacksPerContig=%d readsPerStack=%d-%d distanceBetweenStacks=%d-%d readLength=%d-%d unmappedReads=%d",
|
||||
getClass().getSimpleName(),
|
||||
stream.getNumContigs(),
|
||||
stream.getNumStacksPerContig(),
|
||||
stream.getMinReadsPerStack(),
|
||||
stream.getMaxReadsPerStack(),
|
||||
stream.getMinDistanceBetweenStacks(),
|
||||
stream.getMaxDistanceBetweenStacks(),
|
||||
stream.getMinReadLength(),
|
||||
stream.getMaxReadLength(),
|
||||
stream.getNumUnmappedReads()));
|
||||
}
|
||||
|
||||
public void run() {
|
||||
streamAnalyzer= new ArtificialSingleSampleReadStreamAnalyzer(stream);
|
||||
|
||||
streamAnalyzer.analyze(stream);
|
||||
|
||||
// Check whether the observed properties of the stream match its nominal properties
|
||||
streamAnalyzer.validate();
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "ArtificialSingleSampleReadStreamTestDataProvider")
|
||||
public Object[][] createArtificialSingleSampleReadStreamTests() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(3, 1, 10000);
|
||||
String readGroupID = "testReadGroup";
|
||||
SAMReadGroupRecord readGroup = new SAMReadGroupRecord(readGroupID);
|
||||
readGroup.setSample("testSample");
|
||||
header.addReadGroup(readGroup);
|
||||
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
|
||||
// brute force testing!
|
||||
for ( int numContigs = 0; numContigs <= 2; numContigs++ ) {
|
||||
for ( int stacksPerContig = 0; stacksPerContig <= 2; stacksPerContig++ ) {
|
||||
for ( int minReadsPerStack = 1; minReadsPerStack <= 2; minReadsPerStack++ ) {
|
||||
for ( int maxReadsPerStack = 1; maxReadsPerStack <= 3; maxReadsPerStack++ ) {
|
||||
for ( int minDistanceBetweenStacks = 1; minDistanceBetweenStacks <= 2; minDistanceBetweenStacks++ ) {
|
||||
for ( int maxDistanceBetweenStacks = 1; maxDistanceBetweenStacks <= 3; maxDistanceBetweenStacks++ ) {
|
||||
for ( int minReadLength = 1; minReadLength <= 2; minReadLength++ ) {
|
||||
for ( int maxReadLength = 1; maxReadLength <= 3; maxReadLength++ ) {
|
||||
for ( int numUnmappedReads = 0; numUnmappedReads <= 2; numUnmappedReads++ ) {
|
||||
// Only test sane combinations here
|
||||
if ( minReadsPerStack <= maxReadsPerStack &&
|
||||
minDistanceBetweenStacks <= maxDistanceBetweenStacks &&
|
||||
minReadLength <= maxReadLength &&
|
||||
((numContigs > 0 && stacksPerContig > 0) || (numContigs == 0 && stacksPerContig == 0)) ) {
|
||||
|
||||
new ArtificialSingleSampleReadStreamTest(new ArtificialSingleSampleReadStream(header,
|
||||
readGroupID,
|
||||
numContigs,
|
||||
stacksPerContig,
|
||||
minReadsPerStack,
|
||||
maxReadsPerStack,
|
||||
minDistanceBetweenStacks,
|
||||
maxDistanceBetweenStacks,
|
||||
minReadLength,
|
||||
maxReadLength,
|
||||
numUnmappedReads));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ArtificialSingleSampleReadStreamTest.getTests(ArtificialSingleSampleReadStreamTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ArtificialSingleSampleReadStreamTestDataProvider")
|
||||
public void testArtificialSingleSampleReadStream( ArtificialSingleSampleReadStreamTest test ) {
|
||||
logger.warn("Running test: " + test);
|
||||
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
test.run();
|
||||
}
|
||||
|
||||
@DataProvider(name = "ArtificialSingleSampleReadStreamInvalidArgumentsTestDataProvider")
|
||||
public Object[][] createInvalidArgumentsTests() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(3, 1, 10000);
|
||||
String readGroupID = "testReadGroup";
|
||||
header.addReadGroup(new SAMReadGroupRecord(readGroupID));
|
||||
|
||||
return new Object[][] {
|
||||
{"testNullHeader", null, readGroupID, 1, 1, 1, 2, 1, 2, 1, 2, 0},
|
||||
{"testNullReadGroup", header, null, 1, 1, 1, 2, 1, 2, 1, 2, 0},
|
||||
{"testInvalidReadGroup", header, "foo", 1, 1, 1, 2, 1, 2, 1, 2, 0},
|
||||
{"testInvalidNumContigs", header, readGroupID, -1, 1, 1, 2, 1, 2, 1, 2, 0},
|
||||
{"testInvalidNumStacksPerContig", header, readGroupID, 1, -1, 1, 2, 1, 2, 1, 2, 0},
|
||||
{"test0ContigsNon0StacksPerContig", header, readGroupID, 0, 1, 1, 2, 1, 2, 1, 2, 0},
|
||||
{"testNon0Contigs0StacksPerContig", header, readGroupID, 1, 0, 1, 2, 1, 2, 1, 2, 0},
|
||||
{"testInvalidMinReadsPerStack", header, readGroupID, 1, 1, -1, 2, 1, 2, 1, 2, 0},
|
||||
{"testInvalidMaxReadsPerStack", header, readGroupID, 1, 1, 1, -2, 1, 2, 1, 2, 0},
|
||||
{"testInvalidMinDistanceBetweenStacks", header, readGroupID, 1, 1, 1, 2, -1, 2, 1, 2, 0},
|
||||
{"testInvalidMaxDistanceBetweenStacks", header, readGroupID, 1, 1, 1, 2, 1, -2, 1, 2, 0},
|
||||
{"testInvalidMinReadLength", header, readGroupID, 1, 1, 1, 2, 1, 2, -1, 2, 0},
|
||||
{"testInvalidMaxReadLength", header, readGroupID, 1, 1, 1, 2, 1, 2, 1, -2, 0},
|
||||
{"testInvalidReadsPerStackRange", header, readGroupID, 1, 1, 2, 1, 1, 2, 1, 2, 0},
|
||||
{"testInvalidDistanceBetweenStacksRange", header, readGroupID, 1, 1, 1, 2, 2, 1, 1, 2, 0},
|
||||
{"testInvalidReadLengthRange", header, readGroupID, 1, 1, 1, 2, 1, 2, 2, 1, 0},
|
||||
{"testInvalidNumUnmappedReads", header, readGroupID, 1, 1, 1, 2, 1, 2, 1, 2, -1},
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ArtificialSingleSampleReadStreamInvalidArgumentsTestDataProvider",
|
||||
expectedExceptions = ReviewedStingException.class)
|
||||
public void testInvalidArguments( String testName,
|
||||
SAMFileHeader header,
|
||||
String readGroupID,
|
||||
int numContigs,
|
||||
int numStacksPerContig,
|
||||
int minReadsPerStack,
|
||||
int maxReadsPerStack,
|
||||
int minDistanceBetweenStacks,
|
||||
int maxDistanceBetweenStacks,
|
||||
int minReadLength,
|
||||
int maxReadLength,
|
||||
int numUnmappedReads ) {
|
||||
|
||||
logger.warn("Running test: " + testName);
|
||||
|
||||
ArtificialSingleSampleReadStream stream = new ArtificialSingleSampleReadStream(header,
|
||||
readGroupID,
|
||||
numContigs,
|
||||
numStacksPerContig,
|
||||
minReadsPerStack,
|
||||
maxReadsPerStack,
|
||||
minDistanceBetweenStacks,
|
||||
maxDistanceBetweenStacks,
|
||||
minReadLength,
|
||||
maxReadLength,
|
||||
numUnmappedReads);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,184 +0,0 @@
|
|||
/*
|
||||
* The MIT License
|
||||
*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.utils.threading;
|
||||
|
||||
import org.apache.log4j.Priority;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Tests for the state monitoring thread factory.
|
||||
*/
|
||||
public class EfficiencyMonitoringThreadFactoryUnitTest extends BaseTest {
|
||||
// the duration of the tests -- 100 ms is tolerable given the number of tests we are doing
|
||||
private final static long THREAD_TARGET_DURATION_IN_MILLISECOND = 100000;
|
||||
private final static int MAX_THREADS = 4;
|
||||
final static Object GLOBAL_LOCK = new Object();
|
||||
|
||||
private class StateTest extends TestDataProvider {
|
||||
private final double TOLERANCE = 0.1; // willing to tolerate a 10% error
|
||||
|
||||
final List<EfficiencyMonitoringThreadFactory.State> statesForThreads;
|
||||
|
||||
public StateTest(final List<EfficiencyMonitoringThreadFactory.State> statesForThreads) {
|
||||
super(StateTest.class);
|
||||
this.statesForThreads = statesForThreads;
|
||||
setName("StateTest " + Utils.join(",", statesForThreads));
|
||||
}
|
||||
|
||||
public List<EfficiencyMonitoringThreadFactory.State> getStatesForThreads() {
|
||||
return statesForThreads;
|
||||
}
|
||||
|
||||
public int getNStates() { return statesForThreads.size(); }
|
||||
|
||||
public double maxStatePercent(final EfficiencyMonitoringThreadFactory.State state) { return 100*(fraction(state) + TOLERANCE); }
|
||||
public double minStatePercent(final EfficiencyMonitoringThreadFactory.State state) { return 100*(fraction(state) - TOLERANCE); }
|
||||
|
||||
private double fraction(final EfficiencyMonitoringThreadFactory.State state) {
|
||||
return Collections.frequency(statesForThreads, state) / (1.0 * statesForThreads.size());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test helper threading class that puts the thread into RUNNING, BLOCKED, or WAITING state as
|
||||
* requested for input argument
|
||||
*/
|
||||
private static class StateTestThread implements Callable<Double> {
|
||||
private final EfficiencyMonitoringThreadFactory.State stateToImplement;
|
||||
|
||||
private StateTestThread(final EfficiencyMonitoringThreadFactory.State stateToImplement) {
|
||||
this.stateToImplement = stateToImplement;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Double call() throws Exception {
|
||||
switch ( stateToImplement ) {
|
||||
case USER_CPU:
|
||||
// do some work until we get to THREAD_TARGET_DURATION_IN_MILLISECOND
|
||||
double sum = 0.0;
|
||||
final long startTime = System.currentTimeMillis();
|
||||
for ( int i = 1; System.currentTimeMillis() - startTime < (THREAD_TARGET_DURATION_IN_MILLISECOND - 1); i++ ) {
|
||||
sum += Math.log10(i);
|
||||
}
|
||||
return sum;
|
||||
case WAITING:
|
||||
Thread.currentThread().sleep(THREAD_TARGET_DURATION_IN_MILLISECOND);
|
||||
return 0.0;
|
||||
case BLOCKING:
|
||||
if ( EfficiencyMonitoringThreadFactory.DEBUG ) logger.warn("Blocking...");
|
||||
synchronized (GLOBAL_LOCK) {
|
||||
// the GLOBAL_LOCK must be held by the unit test itself for this to properly block
|
||||
if ( EfficiencyMonitoringThreadFactory.DEBUG ) logger.warn(" ... done blocking");
|
||||
}
|
||||
return 0.0;
|
||||
case WAITING_FOR_IO:
|
||||
// TODO -- implement me
|
||||
// shouldn't ever get here, throw an exception
|
||||
throw new ReviewedStingException("WAITING_FOR_IO testing currently not implemented, until we figure out how to force a system call block");
|
||||
default:
|
||||
throw new ReviewedStingException("Unexpected thread test state " + stateToImplement);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "StateTest")
|
||||
public Object[][] createStateTest() {
|
||||
for ( final int nThreads : Arrays.asList(3) ) {
|
||||
//final List<EfficiencyMonitoringThreadFactory.State> allStates = Arrays.asList(EfficiencyMonitoringThreadFactory.State.WAITING_FOR_IO);
|
||||
final List<EfficiencyMonitoringThreadFactory.State> allStates = Arrays.asList(EfficiencyMonitoringThreadFactory.State.USER_CPU, EfficiencyMonitoringThreadFactory.State.WAITING, EfficiencyMonitoringThreadFactory.State.BLOCKING);
|
||||
//final List<EfficiencyMonitoringThreadFactory.State> allStates = Arrays.asList(EfficiencyMonitoringThreadFactory.State.values());
|
||||
for (final List<EfficiencyMonitoringThreadFactory.State> states : Utils.makePermutations(allStates, nThreads, true) ) {
|
||||
//if ( Collections.frequency(states, Thread.State.BLOCKED) > 0)
|
||||
new StateTest(states);
|
||||
}
|
||||
}
|
||||
|
||||
return StateTest.getTests(StateTest.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "StateTest", timeOut = MAX_THREADS * THREAD_TARGET_DURATION_IN_MILLISECOND)
|
||||
public void testStateTest(final StateTest test) throws InterruptedException {
|
||||
// allows us to test blocking
|
||||
final EfficiencyMonitoringThreadFactory factory = new EfficiencyMonitoringThreadFactory(test.getNStates());
|
||||
final ExecutorService threadPool = Executors.newFixedThreadPool(test.getNStates(), factory);
|
||||
|
||||
logger.warn("Running " + test);
|
||||
synchronized (GLOBAL_LOCK) {
|
||||
//logger.warn(" Have lock");
|
||||
for ( final EfficiencyMonitoringThreadFactory.State threadToRunState : test.getStatesForThreads() )
|
||||
threadPool.submit(new StateTestThread(threadToRunState));
|
||||
|
||||
// lock has to be here for the whole running of the activeThreads but end before the sleep so the blocked activeThreads
|
||||
// can block for their allotted time
|
||||
threadPool.shutdown();
|
||||
Thread.sleep(THREAD_TARGET_DURATION_IN_MILLISECOND);
|
||||
}
|
||||
//logger.warn(" Releasing lock");
|
||||
threadPool.awaitTermination(10, TimeUnit.SECONDS);
|
||||
//logger.warn(" done awaiting termination");
|
||||
//logger.warn(" waiting for all activeThreads to complete");
|
||||
factory.waitForAllThreadsToComplete();
|
||||
//logger.warn(" done waiting for activeThreads");
|
||||
|
||||
// make sure we counted everything properly
|
||||
final long totalTime = factory.getTotalTime();
|
||||
final long minTime = (long)(THREAD_TARGET_DURATION_IN_MILLISECOND * 0.5) * test.getNStates();
|
||||
final long maxTime = (long)(THREAD_TARGET_DURATION_IN_MILLISECOND * 1.5) * test.getNStates();
|
||||
//logger.warn("Testing total time");
|
||||
Assert.assertTrue(totalTime >= minTime, "Factory results not properly accumulated: totalTime = " + totalTime + " < minTime = " + minTime);
|
||||
Assert.assertTrue(totalTime <= maxTime, "Factory results not properly accumulated: totalTime = " + totalTime + " > maxTime = " + maxTime);
|
||||
|
||||
for (final EfficiencyMonitoringThreadFactory.State state : EfficiencyMonitoringThreadFactory.State.values() ) {
|
||||
final double min = test.minStatePercent(state);
|
||||
final double max = test.maxStatePercent(state);
|
||||
final double obs = factory.getStatePercent(state);
|
||||
// logger.warn(" Checking " + state
|
||||
// + " min " + String.format("%.2f", min)
|
||||
// + " max " + String.format("%.2f", max)
|
||||
// + " obs " + String.format("%.2f", obs)
|
||||
// + " factor = " + factory);
|
||||
Assert.assertTrue(obs >= min, "Too little time spent in state " + state + " obs " + obs + " min " + min);
|
||||
Assert.assertTrue(obs <= max, "Too much time spent in state " + state + " obs " + obs + " max " + min);
|
||||
}
|
||||
|
||||
// we actually ran the expected number of activeThreads
|
||||
Assert.assertEquals(factory.getNThreadsCreated(), test.getNStates());
|
||||
|
||||
// should be called to ensure we don't format / NPE on output
|
||||
factory.printUsageInformation(logger, Priority.WARN);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue