Separated out the DoC calculations from the XHMM pipeline, so that CalcDepthOfCoverage can be used for calculating joint coverage on a per-base accounting over multiple samples (e.g., family samples)

This commit is contained in:
Menachem Fromer 2012-09-10 15:32:41 -04:00
parent 449b89bd34
commit 0b717e2e2e
54 changed files with 0 additions and 7218 deletions

View File

@ -1,143 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.gatk.refdata.RODRecordListImpl;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Collection;
import java.util.LinkedList;
import java.util.ListIterator;
/**
* Key algorithmic helper for ReadBasedReferenceOrderedData
*
* Takes a single iterator of features, and provides a single capability that returns
* the list of RODs that overlap an interval. Allows sequential getOverlapping calls
* from intervals provided that these intervals always have increasing getStart() values.
*
*/
class IntervalOverlappingRODsFromStream {
/**
* Only held for QC purposes
*/
GenomeLoc lastQuery = null;
private final String name;
private final LinkedList<GATKFeature> currentFeatures = new LinkedList<GATKFeature>();
private final PeekableIterator<RODRecordList> futureFeatures;
/**
* Create a new IntervalOverlappingRODsFromStream that reads elements from futureFeatures and
* returns RODRecordLists having name
*
* @param name
* @param futureFeatures
*/
IntervalOverlappingRODsFromStream(final String name, final PeekableIterator<RODRecordList> futureFeatures) {
if ( futureFeatures == null ) throw new IllegalArgumentException("futureFeatures cannot be null");
this.name = name;
this.futureFeatures = futureFeatures;
}
/**
* Get the list of RODs overlapping loc from this stream of RODs.
*
* Sequential calls to this function must obey the rule that loc2.getStart >= loc1.getStart
*
* @param loc the interval to query
* @return a non-null RODRecordList containing the overlapping RODs, which may be empty
*/
@Ensures({"overlaps(loc, result)",
"! futureFeatures.hasNext() || futureFeatures.peek().getLocation().isPast(loc)",
"result != null"})
public RODRecordList getOverlapping(final GenomeLoc loc) {
if ( lastQuery != null && loc.getStart() < lastQuery.getStart() )
throw new IllegalArgumentException(String.format("BUG: query interval (%s) starts before the previous interval %s", loc, lastQuery));
trimCurrentFeaturesToLoc(loc);
readOverlappingFutureFeatures(loc);
return new RODRecordListImpl(name, subsetToOverlapping(loc, currentFeatures), loc);
}
/**
* For contract assurance. Checks that all bindings in loc overlap
*
* @param loc
* @param bindings
* @return
*/
@Requires({"loc != null", "bindings != null"})
private boolean overlaps(final GenomeLoc loc, final RODRecordList bindings) {
for ( final GATKFeature feature : bindings )
if ( ! feature.getLocation().overlapsP(loc) )
return false;
return true;
}
/**
* Subset the features in all to those that overlap with loc
*
* The current features list contains everything read that cannot be thrown away yet, but not
* everything in there necessarily overlaps with loc. Subset to just those that do overlap
*
* @param loc the location that features must overlap
* @param all the list of all features
* @return a subset of all that overlaps with loc
*/
@Requires({"loc != null", "all != null"})
@Ensures("result.size() <= all.size()")
private Collection<GATKFeature> subsetToOverlapping(final GenomeLoc loc, final Collection<GATKFeature> all) {
final LinkedList<GATKFeature> overlapping = new LinkedList<GATKFeature>();
for ( final GATKFeature feature : all )
if ( feature.getLocation().overlapsP(loc) )
overlapping.add(feature);
return overlapping;
}
/**
* Update function. Remove all elements of currentFeatures that end before loc
*
* @param loc the location to use
*/
@Requires("loc != null")
@Ensures("currentFeatures.size() <= old(currentFeatures.size())")
private void trimCurrentFeaturesToLoc(final GenomeLoc loc) {
final ListIterator<GATKFeature> it = currentFeatures.listIterator();
while ( it.hasNext() ) {
final GATKFeature feature = it.next();
if ( feature.getLocation().isBefore(loc) )
it.remove();
}
}
/**
* Update function: Read all elements from futureFeatures that overlap with loc
*
* Stops at the first element that starts before the end of loc, or the stream empties
*
* @param loc
*/
@Requires("loc != null")
@Ensures("currentFeatures.size() >= old(currentFeatures.size())")
private void readOverlappingFutureFeatures(final GenomeLoc loc) {
while ( futureFeatures.hasNext() ) {
final GenomeLoc nextLoc = futureFeatures.peek().getLocation();
if ( nextLoc.isBefore(loc) ) {
futureFeatures.next(); // next rod element is before loc, throw it away and keep looking
} else if ( nextLoc.isPast(loc) ) {
break; // next element is past loc, stop looking but don't pop it
} else if ( nextLoc.overlapsP(loc) ) {
// add overlapping elements to our current features, removing from stream
for ( final GATKFeature feature : futureFeatures.next() ) {
currentFeatures.add(feature);
}
}
}
}
}

View File

@ -1,14 +0,0 @@
package org.broadinstitute.sting.gatk.downsampling;
/**
* Type of downsampling method to invoke.
*
* @author hanna
* @version 0.1
*/
public enum DownsampleType {
NONE,
ALL_READS,
BY_SAMPLE
}

View File

@ -1,153 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.exceptions.UserException;
/**
* Describes the method for downsampling reads at a given locus.
*/
public class DownsamplingMethod {
/**
* Type of downsampling to perform.
*/
public final DownsampleType type;
/**
* Actual downsampling target is specified as an integer number of reads.
*/
public final Integer toCoverage;
/**
* Actual downsampling target is specified as a fraction of total available reads.
*/
public final Double toFraction;
/**
* Use the new experimental downsampling?
*/
public final boolean useExperimentalDownsampling;
/**
* Expresses no downsampling applied at all.
*/
public static final DownsamplingMethod NONE = new DownsamplingMethod(DownsampleType.NONE,null,null,false);
/**
* Default type to use if no type is specified
*/
public static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
/**
* Default target coverage for locus-based traversals
*/
public static int DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000;
public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction, boolean useExperimentalDownsampling ) {
this.type = type != null ? type : DEFAULT_DOWNSAMPLING_TYPE;
this.toCoverage = toCoverage;
this.toFraction = toFraction;
this.useExperimentalDownsampling = useExperimentalDownsampling;
if ( type == DownsampleType.NONE ) {
toCoverage = null;
toFraction = null;
}
validate();
}
private void validate() {
// Can't leave toFraction and toCoverage null unless type is NONE
if ( type != DownsampleType.NONE && toFraction == null && toCoverage == null )
throw new UserException.CommandLineException("Must specify either toFraction or toCoverage when downsampling.");
// Fraction and coverage cannot both be specified.
if ( toFraction != null && toCoverage != null )
throw new UserException.CommandLineException("Downsampling coverage and fraction are both specified. Please choose only one.");
// toCoverage must be > 0 when specified
if ( toCoverage != null && toCoverage <= 0 ) {
throw new UserException.CommandLineException("toCoverage must be > 0 when downsampling to coverage");
}
// toFraction must be >= 0.0 and <= 1.0 when specified
if ( toFraction != null && (toFraction < 0.0 || toFraction > 1.0) ) {
throw new UserException.CommandLineException("toFraction must be >= 0.0 and <= 1.0 when downsampling to a fraction of reads");
}
// Some restrictions only exist for the old downsampling implementation:
if ( ! useExperimentalDownsampling ) {
// By sample downsampling does not work with a fraction of reads in the old downsampling implementation
if( type == DownsampleType.BY_SAMPLE && toFraction != null )
throw new UserException.CommandLineException("Cannot downsample to fraction with the BY_SAMPLE method");
}
// Some restrictions only exist for the new downsampling implementation:
if ( useExperimentalDownsampling ) {
if ( type == DownsampleType.ALL_READS && toCoverage != null ) {
throw new UserException.CommandLineException("Cannot downsample to coverage with the ALL_READS method in the experimental downsampling implementation");
}
}
}
public String toString() {
StringBuilder builder = new StringBuilder("Downsampling Settings: ");
if ( type == DownsampleType.NONE ) {
builder.append("No downsampling");
}
else {
builder.append(String.format("Method: %s ", type));
if ( toCoverage != null ) {
builder.append(String.format("Target Coverage: %d ", toCoverage));
}
else {
builder.append(String.format("Target Fraction: %.2f ", toFraction));
}
if ( useExperimentalDownsampling ) {
builder.append("Using Experimental Downsampling");
}
}
return builder.toString();
}
public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker, boolean useExperimentalDownsampling ) {
if ( walker instanceof LocusWalker || walker instanceof ActiveRegionWalker ) {
return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE,
null, useExperimentalDownsampling);
}
else {
return new DownsamplingMethod(DownsampleType.NONE, null, null, useExperimentalDownsampling);
}
}
}

View File

@ -1,45 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import net.sf.samtools.SAMRecord;
/**
* Factory for creating FractionalDownsamplers on demand
*
* @author David Roazen
*/
public class FractionalDownsamplerFactory<T extends SAMRecord> implements ReadsDownsamplerFactory<T> {
private double fraction;
public FractionalDownsamplerFactory( double fraction ) {
this.fraction = fraction;
}
public ReadsDownsampler<T> newInstance() {
return new FractionalDownsampler<T>(fraction);
}
}

View File

@ -1,212 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import org.broadinstitute.sting.utils.MathUtils;
import java.util.*;
/**
* Leveling Downsampler: Given a set of Lists of arbitrary items and a target size, removes items from
* the Lists in an even fashion until the total size of all Lists is <= the target size. Leveling
* does not occur until all Lists have been submitted and signalEndOfInput() is called.
*
* The Lists should be LinkedLists for maximum efficiency during item removal, however other
* kinds of Lists are also accepted (albeit at a slight performance penalty).
*
* Since this downsampler extends the Downsampler interface rather than the ReadsDownsampler interface,
* the Lists need not contain reads. However this downsampler may not be wrapped within one of the
* DownsamplingReadsIterators
*
* @param <T> the List type representing the stacks to be leveled
* @param <E> the type of the elements of each List
*
* @author David Roazen
*/
public class LevelingDownsampler<T extends List<E>, E> implements Downsampler<T> {
private int targetSize;
private List<T> groups;
private boolean groupsAreFinalized;
private int numDiscardedItems;
/**
* Construct a LevelingDownsampler
*
* @param targetSize the sum of the sizes of all individual Lists this downsampler is fed may not exceed
* this value -- if it does, items are removed from Lists evenly until the total size
* is <= this value
*/
public LevelingDownsampler( int targetSize ) {
this.targetSize = targetSize;
clear();
reset();
}
public void submit( T item ) {
groups.add(item);
}
public void submit( Collection<T> items ){
groups.addAll(items);
}
public boolean hasFinalizedItems() {
return groupsAreFinalized && groups.size() > 0;
}
public List<T> consumeFinalizedItems() {
if ( ! hasFinalizedItems() ) {
return new ArrayList<T>();
}
// pass by reference rather than make a copy, for speed
List<T> toReturn = groups;
clear();
return toReturn;
}
public boolean hasPendingItems() {
return ! groupsAreFinalized && groups.size() > 0;
}
public T peekFinalized() {
return hasFinalizedItems() ? groups.get(0) : null;
}
public T peekPending() {
return hasPendingItems() ? groups.get(0) : null;
}
public int getNumberOfDiscardedItems() {
return numDiscardedItems;
}
public void signalEndOfInput() {
levelGroups();
groupsAreFinalized = true;
}
public void clear() {
groups = new ArrayList<T>();
groupsAreFinalized = false;
}
public void reset() {
numDiscardedItems = 0;
}
private void levelGroups() {
int totalSize = 0;
int[] groupSizes = new int[groups.size()];
int currentGroupIndex = 0;
for ( T group : groups ) {
groupSizes[currentGroupIndex] = group.size();
totalSize += groupSizes[currentGroupIndex];
currentGroupIndex++;
}
if ( totalSize <= targetSize ) {
return; // no need to eliminate any items
}
// We will try to remove exactly this many items, however we will refuse to allow any
// one group to fall below size 1, and so might end up removing fewer items than this
int numItemsToRemove = totalSize - targetSize;
currentGroupIndex = 0;
int numConsecutiveUmodifiableGroups = 0;
// Continue until we've either removed all the items we wanted to, or we can't
// remove any more items without violating the constraint that all groups must
// be left with at least one item
while ( numItemsToRemove > 0 && numConsecutiveUmodifiableGroups < groupSizes.length ) {
if ( groupSizes[currentGroupIndex] > 1 ) {
groupSizes[currentGroupIndex]--;
numItemsToRemove--;
numConsecutiveUmodifiableGroups = 0;
}
else {
numConsecutiveUmodifiableGroups++;
}
currentGroupIndex = (currentGroupIndex + 1) % groupSizes.length;
}
// Now we actually go through and reduce each group to its new count as specified in groupSizes
currentGroupIndex = 0;
for ( T group : groups ) {
downsampleOneGroup(group, groupSizes[currentGroupIndex]);
currentGroupIndex++;
}
}
private void downsampleOneGroup( T group, int numItemsToKeep ) {
if ( numItemsToKeep >= group.size() ) {
return;
}
numDiscardedItems += group.size() - numItemsToKeep;
BitSet itemsToKeep = new BitSet(group.size());
for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(group.size(), numItemsToKeep) ) {
itemsToKeep.set(selectedIndex);
}
int currentIndex = 0;
// If our group is a linked list, we can remove the desired items in a single O(n) pass with an iterator
if ( group instanceof LinkedList ) {
Iterator iter = group.iterator();
while ( iter.hasNext() ) {
iter.next();
if ( ! itemsToKeep.get(currentIndex) ) {
iter.remove();
}
currentIndex++;
}
}
// If it's not a linked list, it's more efficient to copy the desired items into a new list and back rather
// than suffer O(n^2) of item shifting
else {
List<E> keptItems = new ArrayList<E>(numItemsToKeep);
for ( E item : group ) {
if ( itemsToKeep.get(currentIndex) ) {
keptItems.add(item);
}
currentIndex++;
}
group.clear();
group.addAll(keptItems);
}
}
}

View File

@ -1,202 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMRecordComparator;
import net.sf.samtools.SAMRecordCoordinateComparator;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import java.util.*;
/**
* StingSAMIterator wrapper around our generic reads downsampler interface
* that downsamples reads for each sample independently, and then re-assembles
* the reads back into a single merged stream.
*
* @author David Roazen
*/
public class PerSampleDownsamplingReadsIterator implements StingSAMIterator {
private StingSAMIterator nestedSAMIterator;
private ReadsDownsamplerFactory<SAMRecord> downsamplerFactory;
private Map<String, ReadsDownsampler<SAMRecord>> perSampleDownsamplers;
private PriorityQueue<SAMRecord> orderedDownsampledReadsCache;
private SAMRecord nextRead = null;
private SAMRecordComparator readComparator = new SAMRecordCoordinateComparator();
private SAMRecord earliestPendingRead = null;
private ReadsDownsampler<SAMRecord> earliestPendingDownsampler = null;
// Initial size of our cache of finalized reads
private static final int DOWNSAMPLED_READS_INITIAL_CACHE_SIZE = 4096;
// The number of positional changes that can occur in the read stream before all downsamplers
// should be informed of the current position (guards against samples with relatively sparse reads
// getting stuck in a pending state):
private static final int DOWNSAMPLER_POSITIONAL_UPDATE_INTERVAL = 3; // TODO: experiment with this value
/**
* @param iter wrapped iterator from which this iterator will pull reads
* @param downsamplerFactory factory used to create new downsamplers as needed
*/
public PerSampleDownsamplingReadsIterator( StingSAMIterator iter, ReadsDownsamplerFactory<SAMRecord> downsamplerFactory ) {
nestedSAMIterator = iter;
this.downsamplerFactory = downsamplerFactory;
perSampleDownsamplers = new HashMap<String, ReadsDownsampler<SAMRecord>>();
orderedDownsampledReadsCache = new PriorityQueue<SAMRecord>(DOWNSAMPLED_READS_INITIAL_CACHE_SIZE, readComparator);
advanceToNextRead();
}
public boolean hasNext() {
return nextRead != null;
}
public SAMRecord next() {
if ( nextRead == null ) {
throw new NoSuchElementException("next() called when there are no more items");
}
SAMRecord toReturn = nextRead;
advanceToNextRead();
return toReturn;
}
private void advanceToNextRead() {
if ( ! readyToReleaseReads() && ! fillDownsampledReadsCache() ) {
nextRead = null;
}
else {
nextRead = orderedDownsampledReadsCache.poll();
}
}
private boolean readyToReleaseReads() {
if ( orderedDownsampledReadsCache.isEmpty() ) {
return false;
}
return earliestPendingRead == null ||
readComparator.compare(orderedDownsampledReadsCache.peek(), earliestPendingRead) <= 0;
}
private void updateEarliestPendingRead( ReadsDownsampler<SAMRecord> currentDownsampler ) {
// If there is no recorded earliest pending read and this downsampler has pending items,
// then this downsampler's first pending item becomes the new earliest pending read:
if ( earliestPendingRead == null && currentDownsampler.hasPendingItems() ) {
earliestPendingRead = currentDownsampler.peekPending();
earliestPendingDownsampler = currentDownsampler;
}
// In all other cases, we only need to update the earliest pending read when the downsampler
// associated with it experiences a change in its pending reads, since by assuming a sorted
// read stream we're assured that each downsampler's earliest pending read will only increase
// in genomic position over time.
//
// TODO: An occasional O(samples) linear search seems like a better option than keeping the downsamplers
// TODO: sorted by earliest pending read, which would cost at least O(total_reads * (samples + log(samples))),
// TODO: but need to verify this empirically.
else if ( currentDownsampler == earliestPendingDownsampler &&
(! currentDownsampler.hasPendingItems() || readComparator.compare(currentDownsampler.peekPending(), earliestPendingRead) != 0) ) {
earliestPendingRead = null;
earliestPendingDownsampler = null;
for ( ReadsDownsampler<SAMRecord> perSampleDownsampler : perSampleDownsamplers.values() ) {
if ( perSampleDownsampler.hasPendingItems() &&
(earliestPendingRead == null || readComparator.compare(perSampleDownsampler.peekPending(), earliestPendingRead) < 0) ) {
earliestPendingRead = perSampleDownsampler.peekPending();
earliestPendingDownsampler = perSampleDownsampler;
}
}
}
}
private boolean fillDownsampledReadsCache() {
SAMRecord prevRead = null;
int numPositionalChanges = 0;
// Continue submitting reads to the per-sample downsamplers until the read at the top of the priority queue
// can be released without violating global sort order
while ( nestedSAMIterator.hasNext() && ! readyToReleaseReads() ) {
SAMRecord read = nestedSAMIterator.next();
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
ReadsDownsampler<SAMRecord> thisSampleDownsampler = perSampleDownsamplers.get(sampleName);
if ( thisSampleDownsampler == null ) {
thisSampleDownsampler = downsamplerFactory.newInstance();
perSampleDownsamplers.put(sampleName, thisSampleDownsampler);
}
thisSampleDownsampler.submit(read);
updateEarliestPendingRead(thisSampleDownsampler);
if ( prevRead != null && prevRead.getAlignmentStart() != read.getAlignmentStart() ) {
numPositionalChanges++;
}
// If the number of times we've changed position exceeds a certain threshold, inform all
// downsamplers of the current position in the read stream. This is to prevent downsamplers
// for samples with sparser reads than others from getting stuck too long in a pending state.
if ( numPositionalChanges > DOWNSAMPLER_POSITIONAL_UPDATE_INTERVAL ) {
for ( ReadsDownsampler<SAMRecord> perSampleDownsampler : perSampleDownsamplers.values() ) {
perSampleDownsampler.signalNoMoreReadsBefore(read);
updateEarliestPendingRead(perSampleDownsampler);
}
}
prevRead = read;
}
if ( ! nestedSAMIterator.hasNext() ) {
for ( ReadsDownsampler<SAMRecord> perSampleDownsampler : perSampleDownsamplers.values() ) {
perSampleDownsampler.signalEndOfInput();
}
earliestPendingRead = null;
earliestPendingDownsampler = null;
}
for ( ReadsDownsampler<SAMRecord> perSampleDownsampler : perSampleDownsamplers.values() ) {
if ( perSampleDownsampler.hasFinalizedItems() ) {
orderedDownsampledReadsCache.addAll(perSampleDownsampler.consumeFinalizedItems());
}
}
return readyToReleaseReads();
}
public void remove() {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
public void close() {
nestedSAMIterator.close();
}
public Iterator<SAMRecord> iterator() {
return this;
}
}

View File

@ -1,37 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import net.sf.samtools.SAMRecord;
/**
* A ReadsDownsamplerFactory can be used to create an arbitrary number of instances of a particular
* downsampler, all sharing the same construction parameters.
*
* @author David Roazen
*/
public interface ReadsDownsamplerFactory<T extends SAMRecord> {
public ReadsDownsampler<T> newInstance();
}

View File

@ -1,45 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import net.sf.samtools.SAMRecord;
/**
* Factory for creating ReservoirDownsamplers on demand
*
* @author David Roazen
*/
public class ReservoirDownsamplerFactory<T extends SAMRecord> implements ReadsDownsamplerFactory<T> {
private int targetSampleSize;
public ReservoirDownsamplerFactory( int targetSampleSize ) {
this.targetSampleSize = targetSampleSize;
}
public ReadsDownsampler<T> newInstance() {
return new ReservoirDownsampler<T>(targetSampleSize);
}
}

View File

@ -1,169 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import net.sf.samtools.SAMRecord;
import java.util.*;
/**
* Simple Positional Downsampler: Downsample each stack of reads at each alignment start to a size <= a target coverage
* using a Reservoir downsampler. Stores only O(target coverage) reads in memory at any given time.
*
* @author David Roazen
*/
public class SimplePositionalDownsampler<T extends SAMRecord> implements ReadsDownsampler<T> {
private int targetCoverage;
private ReservoirDownsampler<T> reservoir;
private int currentContigIndex;
private int currentAlignmentStart;
private boolean positionEstablished;
private boolean unmappedReadsReached;
private ArrayList<T> finalizedReads;
private int numDiscardedItems;
/**
* Construct a SimplePositionalDownsampler
*
* @param targetCoverage Maximum number of reads that may share any given alignment start position
*/
public SimplePositionalDownsampler( int targetCoverage ) {
this.targetCoverage = targetCoverage;
reservoir = new ReservoirDownsampler<T>(targetCoverage);
finalizedReads = new ArrayList<T>();
clear();
reset();
}
public void submit( T newRead ) {
updatePositionalState(newRead);
if ( unmappedReadsReached ) { // don't downsample the unmapped reads at the end of the stream
finalizedReads.add(newRead);
}
else {
int reservoirPreviouslyDiscardedItems = reservoir.getNumberOfDiscardedItems();
reservoir.submit(newRead);
numDiscardedItems += reservoir.getNumberOfDiscardedItems() - reservoirPreviouslyDiscardedItems;
}
}
public void submit( Collection<T> newReads ) {
for ( T read : newReads ) {
submit(read);
}
}
public boolean hasFinalizedItems() {
return finalizedReads.size() > 0;
}
public List<T> consumeFinalizedItems() {
// pass by reference rather than make a copy, for speed
List<T> toReturn = finalizedReads;
finalizedReads = new ArrayList<T>();
return toReturn;
}
public boolean hasPendingItems() {
return reservoir.hasFinalizedItems();
}
public T peekFinalized() {
return finalizedReads.isEmpty() ? null : finalizedReads.get(0);
}
public T peekPending() {
return reservoir.peekFinalized();
}
public int getNumberOfDiscardedItems() {
return numDiscardedItems;
}
public void signalEndOfInput() {
finalizeReservoir();
}
public void clear() {
reservoir.clear();
reservoir.reset();
finalizedReads.clear();
positionEstablished = false;
unmappedReadsReached = false;
}
public void reset() {
numDiscardedItems = 0;
}
public boolean requiresCoordinateSortOrder() {
return true;
}
public void signalNoMoreReadsBefore( T read ) {
updatePositionalState(read);
}
private void updatePositionalState( T newRead ) {
if ( readIsPastCurrentPosition(newRead) ) {
if ( reservoir.hasFinalizedItems() ) {
finalizeReservoir();
}
setCurrentPosition(newRead);
if ( newRead.getReadUnmappedFlag() ) {
unmappedReadsReached = true;
}
}
}
private void setCurrentPosition( T read ) {
currentContigIndex = read.getReferenceIndex();
currentAlignmentStart = read.getAlignmentStart();
positionEstablished = true;
}
private boolean readIsPastCurrentPosition( T read ) {
return ! positionEstablished ||
read.getReferenceIndex() > currentContigIndex ||
read.getAlignmentStart() > currentAlignmentStart ||
(read.getReadUnmappedFlag() && ! unmappedReadsReached);
}
private void finalizeReservoir() {
finalizedReads.addAll(reservoir.consumeFinalizedItems());
reservoir.reset();
}
}

View File

@ -1,45 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import net.sf.samtools.SAMRecord;
/**
* Factory for creating SimplePositionalDownsamplers on demand
*
* @author David Roazen
*/
public class SimplePositionalDownsamplerFactory<T extends SAMRecord> implements ReadsDownsamplerFactory<T> {
private int targetCoverage;
public SimplePositionalDownsamplerFactory( int targetCoverage ) {
this.targetCoverage = targetCoverage;
}
public ReadsDownsampler<T> newInstance() {
return new SimplePositionalDownsampler<T>(targetCoverage);
}
}

View File

@ -1,52 +0,0 @@
package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import java.util.Iterator;
public class LegacyDownsampleIterator implements StingSAMIterator {
StingSAMIterator it;
int cutoff;
SAMRecord next;
public LegacyDownsampleIterator(StingSAMIterator it, double fraction) {
this.it = it;
cutoff = (int)(fraction * 10000);
next = getNextRecord();
}
public boolean hasNext() {
return next != null;
}
public SAMRecord next() {
SAMRecord result = next;
next = getNextRecord();
return result;
}
public void remove() {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
private SAMRecord getNextRecord() {
while ( true ) {
if ( !it.hasNext() )
return null;
SAMRecord rec = it.next();
if ( GenomeAnalysisEngine.getRandomGenerator().nextInt(10000) < cutoff )
return rec;
}
}
public void close() {
it.close();
}
public Iterator<SAMRecord> iterator() {
return this;
}
}

View File

@ -1,649 +0,0 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.iterators;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.*;
/**
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
*/
public class LocusIteratorByStateExperimental extends LocusIterator {
/**
* our log, which we want to capture anything from this class
*/
private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
// -----------------------------------------------------------------------------------------------------------------
//
// member fields
//
// -----------------------------------------------------------------------------------------------------------------
/**
* Used to create new GenomeLocs.
*/
private final GenomeLocParser genomeLocParser;
private final ArrayList<String> samples;
private final ReadStateManager readStates;
protected static class SAMRecordState {
SAMRecord read;
int readOffset = -1; // how far are we offset from the start of the read bases?
int genomeOffset = -1; // how far are we offset from the alignment start on the genome?
Cigar cigar = null;
int cigarOffset = -1;
CigarElement curElement = null;
int nCigarElements = 0;
int cigarElementCounter = -1; // how far are we into a single cigarElement
// The logical model for generating extended events is as follows: the "record state" implements the traversal
// along the reference; thus stepForwardOnGenome() returns on every and only on actual reference bases. This
// can be a (mis)match or a deletion (in the latter case, we still return on every individual reference base the
// deletion spans). In the extended events mode, the record state also remembers if there was an insertion, or
// if the deletion just started *right before* the current reference base the record state is pointing to upon the return from
// stepForwardOnGenome(). The next call to stepForwardOnGenome() will clear that memory (as we remember only extended
// events immediately preceding the current reference base).
public SAMRecordState(SAMRecord read) {
this.read = read;
cigar = read.getCigar();
nCigarElements = cigar.numCigarElements();
//System.out.printf("Creating a SAMRecordState: %s%n", this);
}
public SAMRecord getRead() {
return read;
}
/**
* What is our current offset in the read's bases that aligns us with the reference genome?
*
* @return
*/
public int getReadOffset() {
return readOffset;
}
/**
* What is the current offset w.r.t. the alignment state that aligns us to the readOffset?
*
* @return
*/
public int getGenomeOffset() {
return genomeOffset;
}
public int getGenomePosition() {
return read.getAlignmentStart() + getGenomeOffset();
}
public GenomeLoc getLocation(GenomeLocParser genomeLocParser) {
return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
}
public CigarOperator getCurrentCigarOperator() {
return curElement.getOperator();
}
public String toString() {
return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement);
}
public CigarElement peekForwardOnGenome() {
return ( cigarElementCounter + 1 > curElement.getLength() && cigarOffset + 1 < nCigarElements ? cigar.getCigarElement(cigarOffset + 1) : curElement );
}
public CigarElement peekBackwardOnGenome() {
return ( cigarElementCounter - 1 == 0 && cigarOffset - 1 > 0 ? cigar.getCigarElement(cigarOffset - 1) : curElement );
}
public CigarOperator stepForwardOnGenome() {
// we enter this method with readOffset = index of the last processed base on the read
// (-1 if we did not process a single base yet); this can be last matching base, or last base of an insertion
if (curElement == null || ++cigarElementCounter > curElement.getLength()) {
cigarOffset++;
if (cigarOffset < nCigarElements) {
curElement = cigar.getCigarElement(cigarOffset);
cigarElementCounter = 0;
// next line: guards against cigar elements of length 0; when new cigar element is retrieved,
// we reenter in order to re-check cigarElementCounter against curElement's length
return stepForwardOnGenome();
} else {
if (curElement != null && curElement.getOperator() == CigarOperator.D)
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
// Reads that contain indels model the genomeOffset as the following base in the reference. Because
// we fall into this else block only when indels end the read, increment genomeOffset such that the
// current offset of this read is the next ref base after the end of the indel. This position will
// model a point on the reference somewhere after the end of the read.
genomeOffset++; // extended events need that. Logically, it's legal to advance the genomic offset here:
// we do step forward on the ref, and by returning null we also indicate that we are past the read end.
return null;
}
}
boolean done = false;
switch (curElement.getOperator()) {
case H: // ignore hard clips
case P: // ignore pads
cigarElementCounter = curElement.getLength();
break;
case I: // insertion w.r.t. the reference
case S: // soft clip
cigarElementCounter = curElement.getLength();
readOffset += curElement.getLength();
break;
case D: // deletion w.r.t. the reference
if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string
throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
// should be the same as N case
genomeOffset++;
done = true;
break;
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
genomeOffset++;
done = true;
break;
case M:
case EQ:
case X:
readOffset++;
genomeOffset++;
done = true;
break;
default:
throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator());
}
return done ? curElement.getOperator() : stepForwardOnGenome();
}
}
//final boolean DEBUG = false;
//final boolean DEBUG2 = false && DEBUG;
private ReadProperties readInfo;
private AlignmentContext nextAlignmentContext;
private boolean performLevelingDownsampling;
// -----------------------------------------------------------------------------------------------------------------
//
// constructors and other basic operations
//
// -----------------------------------------------------------------------------------------------------------------
public LocusIteratorByStateExperimental(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples) {
this.readInfo = readInformation;
this.genomeLocParser = genomeLocParser;
this.samples = new ArrayList<String>(samples);
this.readStates = new ReadStateManager(samIterator);
this.performLevelingDownsampling = readInfo.getDownsamplingMethod() != null &&
readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
readInfo.getDownsamplingMethod().toCoverage != null;
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
if (this.samples.isEmpty() && samIterator.hasNext()) {
throw new IllegalArgumentException("samples list must not be empty");
}
}
/**
* For testing only. Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list
* for the system.
*/
public final static Collection<String> sampleListForSAMWithoutReadGroups() {
List<String> samples = new ArrayList<String>();
samples.add(null);
return samples;
}
public Iterator<AlignmentContext> iterator() {
return this;
}
public void close() {
//this.it.close();
}
public boolean hasNext() {
lazyLoadNextAlignmentContext();
return (nextAlignmentContext != null);
//if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
}
private GenomeLoc getLocation() {
return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
}
// -----------------------------------------------------------------------------------------------------------------
//
// next() routine and associated collection operations
//
// -----------------------------------------------------------------------------------------------------------------
public AlignmentContext next() {
lazyLoadNextAlignmentContext();
if (!hasNext())
throw new NoSuchElementException("LocusIteratorByState: out of elements.");
AlignmentContext currentAlignmentContext = nextAlignmentContext;
nextAlignmentContext = null;
return currentAlignmentContext;
}
/**
* Creates the next alignment context from the given state. Note that this is implemented as a lazy load method.
* nextAlignmentContext MUST BE null in order for this method to advance to the next entry.
*/
private void lazyLoadNextAlignmentContext() {
while (nextAlignmentContext == null && readStates.hasNext()) {
readStates.collectPendingReads();
final GenomeLoc location = getLocation();
final Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
// TODO: How can you determine here whether the current pileup has been downsampled?
boolean hasBeenSampled = false;
for (final String sample : samples) {
final Iterator<SAMRecordState> iterator = readStates.iterator(sample);
final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
int size = 0; // number of elements in this sample's pileup
int nDeletions = 0; // number of deletions in this sample's pileup
int nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0)
while (iterator.hasNext()) {
final SAMRecordState state = iterator.next(); // state object with the read/offset information
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
final CigarElement nextElement = state.peekForwardOnGenome(); // next cigar element
final CigarElement lastElement = state.peekBackwardOnGenome(); // last cigar element
final boolean isSingleElementCigar = nextElement == lastElement;
final CigarOperator nextOp = nextElement.getOperator(); // next cigar operator
final CigarOperator lastOp = lastElement.getOperator(); // last cigar operator
int readOffset = state.getReadOffset(); // the base offset on this read
final boolean isBeforeDeletion = nextOp == CigarOperator.DELETION;
final boolean isAfterDeletion = lastOp == CigarOperator.DELETION;
final boolean isBeforeInsertion = nextOp == CigarOperator.INSERTION;
final boolean isAfterInsertion = lastOp == CigarOperator.INSERTION && !isSingleElementCigar;
final boolean isNextToSoftClip = nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart());
int nextElementLength = nextElement.getLength();
if (op == CigarOperator.N) // N's are never added to any pileup
continue;
if (op == CigarOperator.D) {
// TODO -- LIBS is totally busted for deletions so that reads with Ds right before Is in their CIGAR are broken; must fix
if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so
pile.add(new PileupElement(read, readOffset, true, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, nextOp == CigarOperator.D ? nextElementLength : -1));
size++;
nDeletions++;
if (read.getMappingQuality() == 0)
nMQ0Reads++;
}
}
else {
if (!filterBaseInRead(read, location.getStart())) {
String insertedBaseString = null;
if (nextOp == CigarOperator.I) {
final int insertionOffset = isSingleElementCigar ? 0 : 1;
// TODO -- someone please implement a better fix for the single element insertion CIGAR!
if (isSingleElementCigar)
readOffset -= (nextElement.getLength() - 1); // LIBS has passed over the insertion bases!
insertedBaseString = new String(Arrays.copyOfRange(read.getReadBases(), readOffset + insertionOffset, readOffset + insertionOffset + nextElement.getLength()));
}
pile.add(new PileupElement(read, readOffset, false, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, insertedBaseString, nextElementLength));
size++;
if (read.getMappingQuality() == 0)
nMQ0Reads++;
}
}
}
if (pile.size() != 0) // if this pileup added at least one base, add it to the full pileup
fullPileup.put(sample, new ReadBackedPileupImpl(location, pile, size, nDeletions, nMQ0Reads));
}
updateReadStates(); // critical - must be called after we get the current state offsets and location
if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done
nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location, fullPileup), hasBeenSampled);
}
}
// fast testing of position
private boolean readIsPastCurrentPosition(SAMRecord read) {
if (readStates.isEmpty())
return false;
else {
SAMRecordState state = readStates.getFirst();
SAMRecord ourRead = state.getRead();
return read.getReferenceIndex() > ourRead.getReferenceIndex() || read.getAlignmentStart() > state.getGenomePosition();
}
}
/**
* Generic place to put per-base filters appropriate to LocusIteratorByState
*
* @param rec
* @param pos
* @return
*/
private static boolean filterBaseInRead(GATKSAMRecord rec, long pos) {
return ReadUtils.isBaseInsideAdaptor(rec, pos);
}
private void updateReadStates() {
for (final String sample : samples) {
Iterator<SAMRecordState> it = readStates.iterator(sample);
while (it.hasNext()) {
SAMRecordState state = it.next();
CigarOperator op = state.stepForwardOnGenome();
if (op == null) {
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
// already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
it.remove(); // we've stepped off the end of the object
}
}
}
}
public void remove() {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
protected class ReadStateManager {
private final PeekableIterator<SAMRecord> iterator;
private final SamplePartitioner samplePartitioner;
private final Map<String, PerSampleReadStateManager> readStatesBySample = new HashMap<String, PerSampleReadStateManager>();
private int totalReadStates = 0;
public ReadStateManager(Iterator<SAMRecord> source) {
this.iterator = new PeekableIterator<SAMRecord>(source);
for (final String sample : samples) {
readStatesBySample.put(sample, new PerSampleReadStateManager());
}
samplePartitioner = new SamplePartitioner();
}
/**
* Returns a iterator over all the reads associated with the given sample. Note that remove() is implemented
* for this iterator; if present, total read states will be decremented.
*
* @param sample The sample.
* @return Iterator over the reads associated with that sample.
*/
public Iterator<SAMRecordState> iterator(final String sample) {
return new Iterator<SAMRecordState>() {
private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sample).iterator();
public boolean hasNext() {
return wrappedIterator.hasNext();
}
public SAMRecordState next() {
return wrappedIterator.next();
}
public void remove() {
wrappedIterator.remove();
}
};
}
public boolean isEmpty() {
return totalReadStates == 0;
}
/**
* Retrieves the total number of reads in the manager across all samples.
*
* @return Total number of reads over all samples.
*/
public int size() {
return totalReadStates;
}
/**
* Retrieves the total number of reads in the manager in the given sample.
*
* @param sample The sample.
* @return Total number of reads in the given sample.
*/
public int size(final String sample) {
return readStatesBySample.get(sample).size();
}
public SAMRecordState getFirst() {
for (final String sample : samples) {
PerSampleReadStateManager reads = readStatesBySample.get(sample);
if (!reads.isEmpty())
return reads.peek();
}
return null;
}
public boolean hasNext() {
return totalReadStates > 0 || iterator.hasNext();
}
public void collectPendingReads() {
if (!iterator.hasNext())
return;
if (readStates.size() == 0) {
int firstContigIndex = iterator.peek().getReferenceIndex();
int firstAlignmentStart = iterator.peek().getAlignmentStart();
while (iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) {
samplePartitioner.submitRead(iterator.next());
}
} else {
// Fast fail in the case that the read is past the current position.
if (readIsPastCurrentPosition(iterator.peek()))
return;
while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) {
samplePartitioner.submitRead(iterator.next());
}
}
for (final String sample : samples) {
Collection<SAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
addReadsToSample(statesBySample, newReads);
}
samplePartitioner.reset();
}
/**
* Add reads with the given sample name to the given hanger entry.
*
* @param readStates The list of read states to add this collection of reads.
* @param reads Reads to add. Selected reads will be pulled from this source.
*/
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads) {
if (reads.isEmpty())
return;
Collection<SAMRecordState> newReadStates = new LinkedList<SAMRecordState>();
for (SAMRecord read : reads) {
SAMRecordState state = new SAMRecordState(read);
state.stepForwardOnGenome();
newReadStates.add(state);
}
readStates.addStatesAtNextAlignmentStart(newReadStates);
}
protected class PerSampleReadStateManager implements Iterable<SAMRecordState> {
private List<LinkedList<SAMRecordState>> readStatesByAlignmentStart = new LinkedList<LinkedList<SAMRecordState>>();
private int thisSampleReadStates = 0;
private Downsampler<LinkedList<SAMRecordState>> levelingDownsampler =
performLevelingDownsampling ?
new LevelingDownsampler<LinkedList<SAMRecordState>, SAMRecordState>(readInfo.getDownsamplingMethod().toCoverage) :
null;
public void addStatesAtNextAlignmentStart(Collection<SAMRecordState> states) {
if ( states.isEmpty() ) {
return;
}
readStatesByAlignmentStart.add(new LinkedList<SAMRecordState>(states));
thisSampleReadStates += states.size();
totalReadStates += states.size();
if ( levelingDownsampler != null ) {
levelingDownsampler.submit(readStatesByAlignmentStart);
levelingDownsampler.signalEndOfInput();
thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
// use returned List directly rather than make a copy, for efficiency's sake
readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
levelingDownsampler.reset();
}
}
public boolean isEmpty() {
return readStatesByAlignmentStart.isEmpty();
}
public SAMRecordState peek() {
return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
}
public int size() {
return thisSampleReadStates;
}
public Iterator<SAMRecordState> iterator() {
return new Iterator<SAMRecordState>() {
private Iterator<LinkedList<SAMRecordState>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
private LinkedList<SAMRecordState> currentPositionReadStates = null;
private Iterator<SAMRecordState> currentPositionReadStatesIterator = null;
public boolean hasNext() {
return alignmentStartIterator.hasNext() ||
(currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
}
public SAMRecordState next() {
if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
currentPositionReadStates = alignmentStartIterator.next();
currentPositionReadStatesIterator = currentPositionReadStates.iterator();
}
return currentPositionReadStatesIterator.next();
}
public void remove() {
currentPositionReadStatesIterator.remove();
thisSampleReadStates--;
totalReadStates--;
if ( currentPositionReadStates.isEmpty() ) {
alignmentStartIterator.remove();
}
}
};
}
}
}
/**
* Note: stores reads by sample ID string, not by sample object
*/
private class SamplePartitioner {
private Map<String, Collection<SAMRecord>> readsBySample;
private long readsSeen = 0;
public SamplePartitioner() {
readsBySample = new HashMap<String, Collection<SAMRecord>>();
for ( String sample : samples ) {
readsBySample.put(sample, new ArrayList<SAMRecord>());
}
}
public void submitRead(SAMRecord read) {
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
if (readsBySample.containsKey(sampleName))
readsBySample.get(sampleName).add(read);
readsSeen++;
}
public long getNumReadsSeen() {
return readsSeen;
}
public Collection<SAMRecord> getReadsForSample(String sampleName) {
if ( ! readsBySample.containsKey(sampleName) )
throw new NoSuchElementException("Sample name not found");
return readsBySample.get(sampleName);
}
public void reset() {
for ( Collection<SAMRecord> perSampleReads : readsBySample.values() )
perSampleReads.clear();
readsSeen = 0;
}
}
}

View File

@ -1,144 +0,0 @@
package org.broadinstitute.sting.gatk.iterators;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* Baseclass used to describe a read transformer like BAQ and BQSR
*
* Read transformers are plugable infrastructure that modify read state
* either on input, on output, or within walkers themselves.
*
* The function apply() is called on each read seen by the GATK (after passing
* all ReadFilters) and it can do as it sees fit (without modifying the alignment)
* to the read to change qualities, add tags, etc.
*
* Initialize is called once right before the GATK traversal begins providing
* the ReadTransformer with the ability to collect and initialize data from the
* engine.
*
* Note that all ReadTransformers within the classpath are created and initialized. If one
* shouldn't be run it should look at the command line options of the engine and override
* the enabled.
*
* @since 8/31/12
* @author depristo
*/
abstract public class ReadTransformer {
/**
* When should this read transform be applied?
*/
private ApplicationTime applicationTime;
/**
* Keep track of whether we've been initialized already, and ensure it's not called more than once.
*/
private boolean initialized = false;
protected ReadTransformer() {}
/**
* Master initialization routine. Called to setup a ReadTransform, using it's overloaded initialialSub routine.
*
* @param overrideTime if not null, we will run this ReadTransform at the time provided, regardless of the timing of this read transformer itself
* @param engine the engine, for initializing values
* @param walker the walker we intend to run
*/
@Requires({"initialized == false", "engine != null", "walker != null"})
@Ensures("initialized == true")
public final void initialize(final ApplicationTime overrideTime, final GenomeAnalysisEngine engine, final Walker walker) {
if ( engine == null ) throw new IllegalArgumentException("engine cannot be null");
if ( walker == null ) throw new IllegalArgumentException("walker cannot be null");
this.applicationTime = initializeSub(engine, walker);
if ( overrideTime != null ) this.applicationTime = overrideTime;
initialized = true;
}
/**
* Subclasses must override this to initialize themeselves
*
* @param engine the engine, for initializing values
* @param walker the walker we intend to run
* @return the point of time we'd like this read transform to be run
*/
@Requires({"engine != null", "walker != null"})
@Ensures("result != null")
protected abstract ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker);
/**
* Should this ReadTransformer be activated? Called after initialize, which allows this
* read transformer to look at its arguments and decide if it should be active. All
* ReadTransformers must override this, as by default they are not enabled.
*
* @return true if this ReadTransformer should be used on the read stream
*/
public boolean enabled() {
return false;
}
/**
* Has this transformer been initialized?
*
* @return true if it has
*/
public final boolean isInitialized() {
return initialized;
}
/**
* When should we apply this read transformer?
*
* @return true if yes
*/
public final ApplicationTime getApplicationTime() {
return applicationTime;
}
/**
* Primary interface function for a read transform to actually do some work
*
* The function apply() is called on each read seen by the GATK (after passing
* all ReadFilters) and it can do as it sees fit (without modifying the alignment)
* to the read to change qualities, add tags, etc.
*
* @param read the read to transform
* @return the transformed read
*/
@Requires("read != null")
@Ensures("result != null")
abstract public GATKSAMRecord apply(final GATKSAMRecord read);
@Override
public String toString() {
return getClass().getSimpleName();
}
/**
* When should a read transformer be applied?
*/
public static enum ApplicationTime {
/**
* Walker does not tolerate this read transformer
*/
FORBIDDEN,
/**
* apply the transformation to the incoming reads, the default
*/
ON_INPUT,
/**
* apply the transformation to the outgoing read stream
*/
ON_OUTPUT,
/**
* the walker will deal with the calculation itself
*/
HANDLED_IN_WALKER
}
}

View File

@ -1,28 +0,0 @@
package org.broadinstitute.sting.gatk.iterators;
import java.lang.annotation.*;
/**
* User: hanna
* Date: May 14, 2009
* Time: 1:51:22 PM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
* Software and documentation are copyright 2005 by the Broad Institute.
* All rights are reserved.
*
* Users acknowledge that this software is supplied without any warranty or support.
* The Broad Institute is not responsible for its use, misuse, or
* functionality.
*/
/**
* Allows the walker to indicate what type of data it wants to consume.
*/
@Documented
@Inherited
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface ReadTransformersMode {
public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT;
}

View File

@ -1,45 +0,0 @@
package org.broadinstitute.sting.gatk.samples;
/**
* A class for imposing a trio structure on three samples; a common paradigm
*
* todo -- there should probably be an interface or abstract class "Pedigree" that generalizes the notion of
* -- imposing structure on samples. But given how complex pedigrees can quickly become, it's not
* -- clear the best way to do this.
*/
public class Trio {
private Sample mother;
private Sample father;
private Sample child;
public Trio(Sample mom, Sample dad, Sample spawn) {
assert mom.getID().equals(spawn.getMaternalID()) && dad.getID().equals(spawn.getPaternalID()) : "Samples passed to trio constructor do not form a trio";
mother = mom;
father = dad;
child = spawn;
}
public Sample getMother() {
return mother;
}
public String getMaternalID() {
return mother.getID();
}
public Sample getFather() {
return father;
}
public String getPaternalID() {
return father.getID();
}
public Sample getChild() {
return child;
}
public String getChildID() {
return child.getID();
}
}

View File

@ -1,103 +0,0 @@
package org.broadinstitute.sting.gatk.traversals;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.providers.*;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
/**
* A simple solution to iterating over all reference positions over a series of genomic locations.
*/
public abstract class TraverseLociBase<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,LocusShardDataProvider> {
/**
* our log, which we want to capture anything from this class
*/
protected static final Logger logger = Logger.getLogger(TraversalEngine.class);
@Override
protected final String getTraversalType() {
return "sites";
}
protected static class TraverseResults<T> {
final int numIterations;
final T reduceResult;
public TraverseResults(int numIterations, T reduceResult) {
this.numIterations = numIterations;
this.reduceResult = reduceResult;
}
}
protected abstract TraverseResults<T> traverse( final LocusWalker<M,T> walker,
final LocusView locusView,
final LocusReferenceView referenceView,
final ReferenceOrderedView referenceOrderedDataView,
final T sum);
@Override
public T traverse( LocusWalker<M,T> walker,
LocusShardDataProvider dataProvider,
T sum) {
logger.debug(String.format("TraverseLociBase.traverse: Shard is %s", dataProvider));
final LocusView locusView = getLocusView( walker, dataProvider );
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
//ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
ReferenceOrderedView referenceOrderedDataView = null;
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
else
referenceOrderedDataView = (RodLocusView)locusView;
final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
final TraverseResults<T> result = traverse( walker, locusView, referenceView, referenceOrderedDataView, sum );
sum = result.reduceResult;
dataProvider.getShard().getReadMetrics().incrementNumIterations(result.numIterations);
updateCumulativeMetrics(dataProvider.getShard());
}
// We have a final map call to execute here to clean up the skipped based from the
// last position in the ROD to that in the interval
if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA && ! walker.isDone() ) {
// only do this if the walker isn't done!
final RodLocusView rodLocusView = (RodLocusView)locusView;
final long nSkipped = rodLocusView.getLastSkippedBases();
if ( nSkipped > 0 ) {
final GenomeLoc site = rodLocusView.getLocOneBeyondShard();
final AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileupImpl(site), nSkipped);
final M x = walker.map(null, null, ac);
sum = walker.reduce(x, sum);
}
}
return sum;
}
/**
* Gets the best view of loci for this walker given the available data. The view will function as a 'trigger track'
* of sorts, providing a consistent interface so that TraverseLociBase doesn't need to be reimplemented for any new datatype
* that comes along.
* @param walker walker to interrogate.
* @param dataProvider Data which which to drive the locus view.
* @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.
*/
private LocusView getLocusView( Walker<M,T> walker, LocusShardDataProvider dataProvider ) {
final DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
if( dataSource == DataSource.READS )
return new CoveredLocusView(dataProvider);
else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers )
return new AllLocusView(dataProvider);
else if( dataSource == DataSource.REFERENCE_ORDERED_DATA )
return new RodLocusView(dataProvider);
else
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
}
}

View File

@ -1,47 +0,0 @@
package org.broadinstitute.sting.gatk.traversals;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.providers.LocusReferenceView;
import org.broadinstitute.sting.gatk.datasources.providers.LocusView;
import org.broadinstitute.sting.gatk.datasources.providers.ReferenceOrderedView;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
* A simple solution to iterating over all reference positions over a series of genomic locations.
*/
public class TraverseLociLinear<M,T> extends TraverseLociBase<M,T> {
@Override
protected TraverseResults<T> traverse(LocusWalker<M, T> walker, LocusView locusView, LocusReferenceView referenceView, ReferenceOrderedView referenceOrderedDataView, T sum) {
// We keep processing while the next reference location is within the interval
boolean done = false;
int numIterations = 0;
while( locusView.hasNext() && ! done ) {
numIterations++;
final AlignmentContext locus = locusView.next();
final GenomeLoc location = locus.getLocation();
// create reference context. Note that if we have a pileup of "extended events", the context will
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
final ReferenceContext refContext = referenceView.getReferenceContext(location);
// Iterate forward to get all reference ordered data covering this location
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
final boolean keepMeP = walker.filter(tracker, refContext, locus);
if (keepMeP) {
final M x = walker.map(tracker, refContext, locus);
sum = walker.reduce(x, sum);
done = walker.isDone();
}
printProgress(locus.getLocation());
}
return new TraverseResults<T>(numIterations, sum);
}
}

View File

@ -1,205 +0,0 @@
package org.broadinstitute.sting.gatk.traversals;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.providers.LocusReferenceView;
import org.broadinstitute.sting.gatk.datasources.providers.LocusView;
import org.broadinstitute.sting.gatk.datasources.providers.ReferenceOrderedView;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.nanoScheduler.NSMapFunction;
import org.broadinstitute.sting.utils.nanoScheduler.NSProgressFunction;
import org.broadinstitute.sting.utils.nanoScheduler.NSReduceFunction;
import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
import java.util.Iterator;
/**
* A simple solution to iterating over all reference positions over a series of genomic locations.
*/
public class TraverseLociNano<M,T> extends TraverseLociBase<M,T> {
/** our log, which we want to capture anything from this class */
private static final boolean DEBUG = false;
private static final int BUFFER_SIZE = 1000;
final NanoScheduler<MapData, MapResult, T> nanoScheduler;
public TraverseLociNano(int nThreads) {
nanoScheduler = new NanoScheduler<MapData, MapResult, T>(BUFFER_SIZE, nThreads);
nanoScheduler.setProgressFunction(new TraverseLociProgress());
}
@Override
protected TraverseResults<T> traverse(final LocusWalker<M, T> walker,
final LocusView locusView,
final LocusReferenceView referenceView,
final ReferenceOrderedView referenceOrderedDataView,
final T sum) {
nanoScheduler.setDebug(DEBUG);
final TraverseLociMap myMap = new TraverseLociMap(walker);
final TraverseLociReduce myReduce = new TraverseLociReduce(walker);
final MapDataIterator inputIterator = new MapDataIterator(locusView, referenceView, referenceOrderedDataView);
final T result = nanoScheduler.execute(inputIterator, myMap, sum, myReduce);
return new TraverseResults<T>(inputIterator.numIterations, result);
}
/**
* Create iterator that provides inputs for all map calls into MapData, to be provided
* to NanoScheduler for Map/Reduce
*/
private class MapDataIterator implements Iterator<MapData> {
final LocusView locusView;
final LocusReferenceView referenceView;
final ReferenceOrderedView referenceOrderedDataView;
int numIterations = 0;
private MapDataIterator(LocusView locusView, LocusReferenceView referenceView, ReferenceOrderedView referenceOrderedDataView) {
this.locusView = locusView;
this.referenceView = referenceView;
this.referenceOrderedDataView = referenceOrderedDataView;
}
@Override
public boolean hasNext() {
return locusView.hasNext();
}
@Override
public MapData next() {
final AlignmentContext locus = locusView.next();
final GenomeLoc location = locus.getLocation();
//logger.info("Pulling data from MapDataIterator at " + location);
// create reference context. Note that if we have a pileup of "extended events", the context will
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
final ReferenceContext refContext = referenceView.getReferenceContext(location);
// Iterate forward to get all reference ordered data covering this location
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(location, refContext);
numIterations++;
return new MapData(locus, refContext, tracker);
}
@Override
public void remove() {
throw new UnsupportedOperationException("Cannot remove elements from MapDataIterator");
}
}
@Override
public void printOnTraversalDone() {
nanoScheduler.shutdown();
super.printOnTraversalDone();
}
/**
* The input data needed for each map call. The read, the reference, and the RODs
*/
private class MapData {
final AlignmentContext alignmentContext;
final ReferenceContext refContext;
final RefMetaDataTracker tracker;
private MapData(final AlignmentContext alignmentContext, ReferenceContext refContext, RefMetaDataTracker tracker) {
this.alignmentContext = alignmentContext;
this.refContext = refContext;
this.tracker = tracker;
}
@Override
public String toString() {
return "MapData " + alignmentContext.getLocation();
}
}
/**
* Contains the results of a map call, indicating whether the call was good, filtered, or done
*/
private class MapResult {
final M value;
final boolean reduceMe;
/**
* Create a MapResult with value that should be reduced
*
* @param value the value to reduce
*/
private MapResult(final M value) {
this.value = value;
this.reduceMe = true;
}
/**
* Create a MapResult that shouldn't be reduced
*/
private MapResult() {
this.value = null;
this.reduceMe = false;
}
}
/**
* A static object that tells reduce that the result of map should be skipped (filtered or done)
*/
private final MapResult SKIP_REDUCE = new MapResult();
/**
* MapFunction for TraverseReads meeting NanoScheduler interface requirements
*
* Applies walker.map to MapData, returning a MapResult object containing the result
*/
private class TraverseLociMap implements NSMapFunction<MapData, MapResult> {
final LocusWalker<M,T> walker;
private TraverseLociMap(LocusWalker<M, T> walker) {
this.walker = walker;
}
@Override
public MapResult apply(final MapData data) {
if ( ! walker.isDone() ) {
final boolean keepMeP = walker.filter(data.tracker, data.refContext, data.alignmentContext);
if (keepMeP) {
final M x = walker.map(data.tracker, data.refContext, data.alignmentContext);
return new MapResult(x);
}
}
return SKIP_REDUCE;
}
}
/**
* NSReduceFunction for TraverseReads meeting NanoScheduler interface requirements
*
* Takes a MapResult object and applies the walkers reduce function to each map result, when applicable
*/
private class TraverseLociReduce implements NSReduceFunction<MapResult, T> {
final LocusWalker<M,T> walker;
private TraverseLociReduce(LocusWalker<M, T> walker) {
this.walker = walker;
}
@Override
public T apply(MapResult one, T sum) {
if ( one.reduceMe )
// only run reduce on values that aren't DONE or FAILED
return walker.reduce(one.value, sum);
else
return sum;
}
}
private class TraverseLociProgress implements NSProgressFunction<MapData> {
@Override
public void progress(MapData lastProcessedMap) {
if (lastProcessedMap.alignmentContext != null)
printProgress(lastProcessedMap.alignmentContext.getLocation());
}
}
}

View File

@ -1,234 +0,0 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.traversals;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrderedView;
import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
import org.broadinstitute.sting.gatk.datasources.reads.ReadShard;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.nanoScheduler.NSMapFunction;
import org.broadinstitute.sting.utils.nanoScheduler.NSReduceFunction;
import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.LinkedList;
import java.util.List;
/**
* A nano-scheduling version of TraverseReads.
*
* Implements the traversal of a walker that accepts individual reads, the reference, and
* RODs per map call. Directly supports shared memory parallelism via NanoScheduler
*
* @author depristo
* @version 1.0
* @date 9/2/2012
*/
public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,ReadShardDataProvider> {
/** our log, which we want to capture anything from this class */
protected static final Logger logger = Logger.getLogger(TraverseReadsNano.class);
private static final boolean DEBUG = false;
final NanoScheduler<MapData, MapResult, T> nanoScheduler;
public TraverseReadsNano(int nThreads) {
final int bufferSize = ReadShard.getReadBufferSize() + 1; // actually has 1 more than max
nanoScheduler = new NanoScheduler<MapData, MapResult, T>(bufferSize, nThreads);
}
@Override
protected String getTraversalType() {
return "reads";
}
/**
* Traverse by reads, given the data and the walker
*
* @param walker the walker to traverse with
* @param dataProvider the provider of the reads data
* @param sum the value of type T, specified by the walker, to feed to the walkers reduce function
* @return the reduce variable of the read walker
*/
public T traverse(ReadWalker<M,T> walker,
ReadShardDataProvider dataProvider,
T sum) {
logger.debug(String.format("TraverseReadsNano.traverse Covered dataset is %s", dataProvider));
if( !dataProvider.hasReads() )
throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");
nanoScheduler.setDebug(DEBUG);
final TraverseReadsMap myMap = new TraverseReadsMap(walker);
final TraverseReadsReduce myReduce = new TraverseReadsReduce(walker);
final List<MapData> aggregatedInputs = aggregateMapData(dataProvider);
final T result = nanoScheduler.execute(aggregatedInputs.iterator(), myMap, sum, myReduce);
final GATKSAMRecord lastRead = aggregatedInputs.get(aggregatedInputs.size() - 1).read;
final GenomeLoc locus = engine.getGenomeLocParser().createGenomeLoc(lastRead);
updateCumulativeMetrics(dataProvider.getShard());
printProgress(locus);
return result;
}
/**
* Aggregate all of the inputs for all map calls into MapData, to be provided
* to NanoScheduler for Map/Reduce
*
* @param dataProvider the source of our data
* @return a linked list of MapData objects holding the read, ref, and ROD info for every map/reduce
* should execute
*/
private List<MapData> aggregateMapData(final ReadShardDataProvider dataProvider) {
final ReadView reads = new ReadView(dataProvider);
final ReadReferenceView reference = new ReadReferenceView(dataProvider);
final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
final List<MapData> mapData = new LinkedList<MapData>();
for ( final SAMRecord read : reads ) {
final ReferenceContext refContext = ! read.getReadUnmappedFlag()
? reference.getReferenceContext(read)
: null;
// if the read is mapped, create a metadata tracker
final RefMetaDataTracker tracker = read.getReferenceIndex() >= 0
? rodView.getReferenceOrderedDataForRead(read)
: null;
// update the number of reads we've seen
dataProvider.getShard().getReadMetrics().incrementNumIterations();
mapData.add(new MapData((GATKSAMRecord)read, refContext, tracker));
}
return mapData;
}
@Override
public void printOnTraversalDone() {
nanoScheduler.shutdown();
super.printOnTraversalDone();
}
/**
* The input data needed for each map call. The read, the reference, and the RODs
*/
private class MapData {
final GATKSAMRecord read;
final ReferenceContext refContext;
final RefMetaDataTracker tracker;
private MapData(GATKSAMRecord read, ReferenceContext refContext, RefMetaDataTracker tracker) {
this.read = read;
this.refContext = refContext;
this.tracker = tracker;
}
}
/**
* Contains the results of a map call, indicating whether the call was good, filtered, or done
*/
private class MapResult {
final M value;
final boolean reduceMe;
/**
* Create a MapResult with value that should be reduced
*
* @param value the value to reduce
*/
private MapResult(final M value) {
this.value = value;
this.reduceMe = true;
}
/**
* Create a MapResult that shouldn't be reduced
*/
private MapResult() {
this.value = null;
this.reduceMe = false;
}
}
/**
* A static object that tells reduce that the result of map should be skipped (filtered or done)
*/
private final MapResult SKIP_REDUCE = new MapResult();
/**
* MapFunction for TraverseReads meeting NanoScheduler interface requirements
*
* Applies walker.map to MapData, returning a MapResult object containing the result
*/
private class TraverseReadsMap implements NSMapFunction<MapData, MapResult> {
final ReadWalker<M,T> walker;
private TraverseReadsMap(ReadWalker<M, T> walker) {
this.walker = walker;
}
@Override
public MapResult apply(final MapData data) {
if ( ! walker.isDone() ) {
final boolean keepMeP = walker.filter(data.refContext, data.read);
if (keepMeP)
return new MapResult(walker.map(data.refContext, data.read, data.tracker));
}
return SKIP_REDUCE;
}
}
/**
* NSReduceFunction for TraverseReads meeting NanoScheduler interface requirements
*
* Takes a MapResult object and applies the walkers reduce function to each map result, when applicable
*/
private class TraverseReadsReduce implements NSReduceFunction<MapResult, T> {
final ReadWalker<M,T> walker;
private TraverseReadsReduce(ReadWalker<M, T> walker) {
this.walker = walker;
}
@Override
public T apply(MapResult one, T sum) {
if ( one.reduceMe )
// only run reduce on values that aren't DONE or FAILED
return walker.reduce(one.value, sum);
else
return sum;
}
}
}

View File

@ -1,31 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers;
/**
* Root parallelism interface. Walkers that implement this
* declare that their map function is thread-safe and so multiple
* map calls can be run in parallel in the same JVM instance.
*/
public interface NanoSchedulable {
}

View File

@ -1,133 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.fasta;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Collections;
import java.util.List;
/**
* Generates an alternative reference sequence over the specified interval.
*
* <p>
* Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
* Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
* Several important notes:
* 1) if there are multiple variants that start at a site, it chooses one of them randomly.
* 2) when there are overlapping indels (but with different start positions) only the first will be chosen.
* 3) this tool works only for SNPs and for simple indels (but not for things like complex substitutions).
* Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
*
* <h2>Input</h2>
* <p>
* The reference, requested intervals, and any number of variant rod files.
* </p>
*
* <h2>Output</h2>
* <p>
* A fasta file representing the requested intervals.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T FastaAlternateReferenceMaker \
* -o output.fasta \
* -L input.intervals \
* --variant input.vcf \
* [--snpmask mask.vcf]
* </pre>
*
*/
@DocumentedGATKFeature( groupName = "Companion Utilities", extraDocs = {CommandLineGATK.class} )
@Reference(window=@Window(start=-1,stop=50))
@Requires(value={DataSource.REFERENCE})
public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
/**
* Variants from these input files are used by this tool to construct an alternate reference.
*/
@Input(fullName = "variant", shortName = "V", doc="variants to model", required=false)
public List<RodBinding<VariantContext>> variants = Collections.emptyList();
/**
* Snps from this file are used as a mask when constructing the alternate reference.
*/
@Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false)
public RodBinding<VariantContext> snpmask;
private int deletionBasesRemaining = 0;
public Pair<GenomeLoc, String> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (deletionBasesRemaining > 0) {
deletionBasesRemaining--;
return new Pair<GenomeLoc, String>(context.getLocation(), "");
}
String refBase = String.valueOf((char)ref.getBase());
// Check to see if we have a called snp
for ( VariantContext vc : tracker.getValues(variants, ref.getLocus()) ) {
if ( vc.isFiltered() )
continue;
if ( vc.isSimpleDeletion()) {
deletionBasesRemaining = vc.getReference().length() - 1;
// delete the next n bases, not this one
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
} else if ( vc.isSimpleInsertion()) {
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
} else if (vc.isSNP()) {
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
}
}
// if we don't have a called site, and we have a mask at this site, mask it
for ( VariantContext vc : tracker.getValues(snpmask) ) {
if ( vc.isSNP()) {
return new Pair<GenomeLoc, String>(context.getLocation(), "N");
}
}
// if we got here then we're just ref
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
}
}

View File

@ -1,127 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.fasta;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;
/**
* Renders a new reference in FASTA format consisting of only those loci provided in the input data set.
*
* <p>
* The output format can be partially controlled using the provided command-line arguments.
* Specify intervals with the usual -L argument to output only the reference bases within your intervals.
* Overlapping intervals are automatically merged; reference bases for each disjoint interval will be output as a
* separate fasta sequence (named numerically in order).
*
* <h2>Input</h2>
* <p>
* The reference and requested intervals.
* </p>
*
* <h2>Output</h2>
* <p>
* A fasta file representing the requested intervals.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T FastaReferenceMaker \
* -o output.fasta \
* -L input.intervals
* </pre>
*
*/
@DocumentedGATKFeature( groupName = "Companion Utilities", extraDocs = {CommandLineGATK.class} )
public class FastaReferenceMaker extends RefWalker<Pair<GenomeLoc, String>, GenomeLoc> {
@Output PrintStream out;
@Argument(fullName="lineWidth", shortName="lw", doc="Maximum length of sequence to write per line", required=false)
public int fastaLineWidth=60;
/**
* Please note that when using this argument adjacent intervals will automatically be merged.
*/
@Argument(fullName="rawOnelineSeq", shortName="raw", doc="Print sequences with no FASTA header lines, one line per interval (i.e. lineWidth = infinity)", required=false)
public boolean fastaRawSeqs=false;
protected FastaSequence fasta;
public void initialize() {
if (fastaRawSeqs) fastaLineWidth = Integer.MAX_VALUE;
fasta = new FastaSequence(out, fastaLineWidth, fastaRawSeqs);
}
public Pair<GenomeLoc, String> map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) {
return new Pair<GenomeLoc, String>(context.getLocation(), String.valueOf((char)ref.getBase()));
}
public GenomeLoc reduceInit() {
return null;
}
public GenomeLoc reduce(Pair<GenomeLoc, String> value, GenomeLoc sum) {
if ( value == null )
return sum;
// if there is no interval to the left, then this is the first one
if ( sum == null ) {
sum = value.first;
fasta.append(value.second);
}
// if the intervals don't overlap, print out the leftmost one and start a new one
// (end of contig or new interval)
else if ( value.first.getStart() != sum.getStop() + 1 ) {
fasta.flush();
sum = value.first;
fasta.append(value.second);
}
// otherwise, merge them
else {
sum = getToolkit().getGenomeLocParser().setStop(sum, value.first.getStop());
fasta.append(value.second);
}
return sum;
}
public void onTraversalDone(GenomeLoc sum) {
fasta.flush();
}
}

View File

@ -1,49 +0,0 @@
package org.broadinstitute.sting.utils.baq;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.walkers.BAQMode;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* Applies Heng's BAQ calculation to a stream of incoming reads
*/
public class BAQReadTransformer extends ReadTransformer {
private BAQ baqHMM;
private IndexedFastaSequenceFile refReader;
private BAQ.CalculationMode cmode;
private BAQ.QualityMode qmode;
@Override
public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) {
final BAQMode mode = WalkerManager.getWalkerAnnotation(walker, BAQMode.class);
this.refReader = engine.getReferenceDataSource().getReference();
this.cmode = engine.getArguments().BAQMode;
this.qmode = mode.QualityMode();
baqHMM = new BAQ(engine.getArguments().BAQGOP);
if ( qmode == BAQ.QualityMode.DONT_MODIFY )
throw new ReviewedStingException("BUG: shouldn't create BAQ transformer with quality mode DONT_MODIFY");
if ( mode.ApplicationTime() == ReadTransformer.ApplicationTime.FORBIDDEN && enabled() )
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + cmode + " was requested.");
return mode.ApplicationTime();
}
@Override
public boolean enabled() {
return cmode != BAQ.CalculationMode.OFF;
}
@Override
public GATKSAMRecord apply(final GATKSAMRecord read) {
baqHMM.baqRead(read, refReader, cmode, qmode);
return read;
}
}

View File

@ -1,44 +0,0 @@
package org.broadinstitute.sting.utils.baq;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Iterator;
/**
* Iterator that applies a ReadTransformer to a stream of reads
*/
public class ReadTransformingIterator implements StingSAMIterator {
private final StingSAMIterator it;
private final ReadTransformer transformer;
/**
* Creates a new ReadTransforming iterator
*/
@Requires({"it != null", "transformer != null", "transformer.isInitialized()"})
public ReadTransformingIterator(final StingSAMIterator it, final ReadTransformer transformer) {
if ( ! transformer.isInitialized() )
throw new IllegalStateException("Creating a read transformer stream for an uninitialized read transformer: " + transformer);
if ( transformer.getApplicationTime() == ReadTransformer.ApplicationTime.FORBIDDEN )
throw new IllegalStateException("Creating a read transformer stream for a forbidden transformer " + transformer);
this.it = it;
this.transformer = transformer;
}
@Requires("hasNext()")
@Ensures("result != null")
public SAMRecord next() {
final GATKSAMRecord read = (GATKSAMRecord)it.next();
return transformer.apply(read);
}
public boolean hasNext() { return this.it.hasNext(); }
public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
public void close() { it.close(); }
public Iterator<SAMRecord> iterator() { return this; }
}

View File

@ -1,82 +0,0 @@
package org.broadinstitute.sting.utils.nanoScheduler;
import com.google.java.contract.Invariant;
/**
* Wrapper to hold data for a blocking queue, distinguishing an EOF marker from a real object
*
* The only way to tell in a consumer thread that a blocking queue has no more data ever
* coming down the pipe is to pass in a "poison" or EOF object. This class provides
* a generic capacity for that...
*
* The use case looks like this:
*
* BlockingQueue q
* producer:
* while ( x has items )
* q.put(new BlockingQueueValue(x))
* q.put(new BlockingQueueValue())
*
* Consumer:
* while ( true )
* value = q.take()
* if ( value.isLast() )
* break
* else
* do something useful with value
*
*
* User: depristo
* Date: 9/6/12
* Time: 3:08 PM
*/
@Invariant("! isLast || value == null")
class BlockingQueueValue<T> {
/**
* True if this is the EOF marker object
*/
final private boolean isLast;
/**
* Our value, if we aren't the EOF marker
*/
final private T value;
/**
* Create a new BlockingQueueValue containing a real value, where last is false
* @param value
*/
BlockingQueueValue(final T value) {
isLast = false;
this.value = value;
}
/**
* Create a new BlockingQueueValue that is the last item
*/
BlockingQueueValue() {
isLast = true;
this.value = null;
}
/**
* Is this the EOF marker?
*
* @return true if so, else false
*/
public boolean isLast() {
return isLast;
}
/**
* Get the value held by this BlockingQueueValue
*
* @return the value
* @throws IllegalStateException if this is the last item
*/
public T getValue() {
if ( isLast() )
throw new IllegalStateException("Cannot get value for last object");
return value;
}
}

View File

@ -1,45 +0,0 @@
package org.broadinstitute.sting.utils.nanoScheduler;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
/**
* Create a future that simply returns a given value
*
* The only standard way to create a future in java is via the ExecutorService interface.
* If you have a data structure holding futures of value T, and you want to add a
* value to it for some reason (to add a EOF marker, for instance) you can use this
* class to create a dummy Future<T> that simply returns a value.
*
* @author depristo
* @since 09/12
*/
class FutureValue<V> implements Future<V> {
final V value;
FutureValue(final V value) {
this.value = value;
}
@Override public boolean cancel(boolean mayInterruptIfRunning) {
return true;
}
@Override public boolean isCancelled() {
return false;
}
@Override public boolean isDone() {
return true;
}
@Override public V get() throws InterruptedException, ExecutionException {
return value;
}
@Override public V get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException {
return get();
}
}

View File

@ -1,62 +0,0 @@
package org.broadinstitute.sting.utils.nanoScheduler;
import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Iterator;
import java.util.concurrent.BlockingQueue;
/**
* Producer Thread that reads input values from an inputReads and puts them into a BlockingQueue
*/
class InputProducer<InputType> implements Runnable {
/**
* The iterator we are using to get data from
*/
final Iterator<InputType> inputReader;
/**
* Our timer (may be null) that we use to track our input costs
*/
final SimpleTimer inputTimer;
/**
* Where we put our input values for consumption
*/
final BlockingQueue<InputValue> outputQueue;
public InputProducer(final Iterator<InputType> inputReader,
final SimpleTimer inputTimer,
final BlockingQueue<InputValue> outputQueue) {
if ( inputReader == null ) throw new IllegalArgumentException("inputReader cannot be null");
if ( outputQueue == null ) throw new IllegalArgumentException("OutputQueue cannot be null");
this.inputReader = inputReader;
this.inputTimer = inputTimer;
this.outputQueue = outputQueue;
}
public void run() {
try {
while ( inputReader.hasNext() ) {
if ( inputTimer != null ) inputTimer.restart();
final InputType input = inputReader.next();
if ( inputTimer != null ) inputTimer.stop();
outputQueue.put(new InputValue(input));
}
// add the EOF object so our consumer knows we are done in all inputs
outputQueue.put(new InputValue());
} catch (InterruptedException ex) {
throw new ReviewedStingException("got execution exception", ex);
}
}
/**
* Helper class that contains a read value suitable for EOF marking in a BlockingQueue
*/
class InputValue extends BlockingQueueValue<InputType> {
private InputValue(InputType datum) { super(datum); }
private InputValue() { }
}
}

View File

@ -1,36 +0,0 @@
package org.broadinstitute.sting.utils.nanoScheduler;
/**
* Holds the results of a map job suitable for producer/consumer threading
* via a BlockingQueue
*/
class MapResult<MapType> extends BlockingQueueValue<MapType> {
final int jobID;
/**
* Create a new MapResult with value datum and jod jobID ID
*
* @param datum the value produced by the map job
* @param jobID the id of the map job (for correctness testing)
*/
MapResult(final MapType datum, final int jobID) {
super(datum);
this.jobID = jobID;
if ( jobID < 0 ) throw new IllegalArgumentException("JobID must be >= 0");
}
/**
* Create the EOF marker version of MapResult
*/
MapResult() {
super();
this.jobID = Integer.MAX_VALUE;
}
/**
* @return the job ID of the map job that produced this MapResult
*/
public int getJobID() {
return jobID;
}
}

View File

@ -1,19 +0,0 @@
package org.broadinstitute.sting.utils.nanoScheduler;
/**
* A function that maps from InputType -> ResultType
*
* For use with the NanoScheduler
*
* User: depristo
* Date: 8/24/12
* Time: 9:49 AM
*/
public interface NSMapFunction<InputType, ResultType> {
/**
* Return function on input, returning a value of ResultType
* @param input
* @return
*/
public ResultType apply(final InputType input);
}

View File

@ -1,12 +0,0 @@
package org.broadinstitute.sting.utils.nanoScheduler;
/**
* Created with IntelliJ IDEA.
* User: depristo
* Date: 9/4/12
* Time: 2:10 PM
* To change this template use File | Settings | File Templates.
*/
public interface NSProgressFunction<InputType> {
public void progress(final InputType lastMapInput);
}

View File

@ -1,18 +0,0 @@
package org.broadinstitute.sting.utils.nanoScheduler;
/**
* A function that combines a value of MapType with an existing ReduceValue into a new ResultType
*
* User: depristo
* Date: 8/24/12
* Time: 9:49 AM
*/
public interface NSReduceFunction<MapType, ReduceType> {
/**
* Combine one with sum into a new ReduceType
* @param one the result of a map call on an input element
* @param sum the cumulative reduce result over all previous map calls
* @return
*/
public ReduceType apply(MapType one, ReduceType sum);
}

View File

@ -1,392 +0,0 @@
package org.broadinstitute.sting.utils.nanoScheduler;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.AutoFormattingTime;
import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.threading.NamedThreadFactory;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.*;
/**
* Framework for very fine grained MapReduce parallelism
*
* The overall framework works like this
*
* nano <- new Nanoschedule(inputBufferSize, numberOfMapElementsToProcessTogether, nThreads)
* List[Input] outerData : outerDataLoop )
* result = nano.execute(outerData.iterator(), map, reduce)
*
* inputBufferSize determines how many elements from the input stream are read in one go by the
* nanoscheduler. The scheduler may hold up to inputBufferSize in memory at one time, as well
* as up to inputBufferSize map results as well.
*
* numberOfMapElementsToProcessTogether determines how many input elements are processed
* together each thread cycle. For example, if this value is 10, then the input data
* is grouped together in units of 10 elements each, and map called on each in term. The more
* heavy-weight the map function is, in terms of CPU costs, the more it makes sense to
* have this number be small. The lighter the CPU cost per element, though, the more this
* parameter introduces overhead due to need to context switch among threads to process
* each input element. A value of -1 lets the nanoscheduler guess at a reasonable trade-off value.
*
* nThreads is a bit obvious yes? Note though that the nanoscheduler assumes that it gets 1 thread
* from its client during the execute call, as this call blocks until all work is done. The caller
* thread is put to work by execute to help with the processing of the data. So in reality the
* nanoScheduler only spawn nThreads - 1 additional workers (if this is > 1).
*
* User: depristo
* Date: 8/24/12
* Time: 9:47 AM
*/
public class NanoScheduler<InputType, MapType, ReduceType> {
private final static Logger logger = Logger.getLogger(NanoScheduler.class);
private final static boolean ALLOW_SINGLE_THREAD_FASTPATH = true;
private final static boolean LOG_MAP_TIMES = false;
private final static boolean TIME_CALLS = true;
private final static int MAP_BUFFER_SIZE_SCALE_FACTOR = 100;
final int inputBufferSize;
final int mapBufferSize;
final int nThreads;
final ExecutorService inputExecutor;
final ExecutorService reduceExecutor;
final ThreadPoolExecutor mapExecutor;
boolean shutdown = false;
boolean debug = false;
private NSProgressFunction<InputType> progressFunction = null;
final SimpleTimer outsideSchedulerTimer = TIME_CALLS ? new SimpleTimer("outside") : null;
final SimpleTimer inputTimer = TIME_CALLS ? new SimpleTimer("input") : null;
final SimpleTimer mapTimer = TIME_CALLS ? new SimpleTimer("map") : null;
final SimpleTimer reduceTimer = TIME_CALLS ? new SimpleTimer("reduce") : null;
/**
* Create a new nanoscheduler with the desire characteristics requested by the argument
*
* @param inputBufferSize the number of input elements to read in each scheduling cycle.
* @param nThreads the number of threads to use to get work done, in addition to the
* thread calling execute
*/
public NanoScheduler(final int inputBufferSize, final int nThreads) {
if ( inputBufferSize < 1 ) throw new IllegalArgumentException("inputBufferSize must be >= 1, got " + inputBufferSize);
if ( nThreads < 1 ) throw new IllegalArgumentException("nThreads must be >= 1, got " + nThreads);
this.inputBufferSize = inputBufferSize;
this.mapBufferSize = inputBufferSize * MAP_BUFFER_SIZE_SCALE_FACTOR;
this.nThreads = nThreads;
if ( nThreads == 1 ) {
this.mapExecutor = null;
this.inputExecutor = this.reduceExecutor = null;
} else {
this.mapExecutor = (ThreadPoolExecutor)Executors.newFixedThreadPool(nThreads-1, new NamedThreadFactory("NS-map-thread-%d"));
this.mapExecutor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
this.inputExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d"));
this.reduceExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-reduce-thread-%d"));
}
// start timing the time spent outside of the nanoScheduler
outsideSchedulerTimer.start();
}
/**
* The number of parallel map threads in use with this NanoScheduler
* @return
*/
@Ensures("result > 0")
public int getnThreads() {
return nThreads;
}
/**
* The input buffer size used by this NanoScheduler
* @return
*/
@Ensures("result > 0")
public int getInputBufferSize() {
return inputBufferSize;
}
/**
* Tells this nanoScheduler to shutdown immediately, releasing all its resources.
*
* After this call, execute cannot be invoked without throwing an error
*/
public void shutdown() {
outsideSchedulerTimer.stop();
if ( nThreads > 1 ) {
shutdownExecutor("inputExecutor", inputExecutor);
shutdownExecutor("mapExecutor", mapExecutor);
shutdownExecutor("reduceExecutor", reduceExecutor);
}
shutdown = true;
if (TIME_CALLS) {
printTimerInfo("Input time", inputTimer);
printTimerInfo("Map time", mapTimer);
printTimerInfo("Reduce time", reduceTimer);
printTimerInfo("Outside time", outsideSchedulerTimer);
}
}
/**
* Helper function to cleanly shutdown an execution service, checking that the execution
* state is clean when it's done.
*
* @param name a string name for error messages for the executorService we are shutting down
* @param executorService the executorService to shut down
*/
@Requires({"name != null", "executorService != null"})
@Ensures("executorService.isShutdown()")
private void shutdownExecutor(final String name, final ExecutorService executorService) {
if ( executorService.isShutdown() || executorService.isTerminated() )
throw new IllegalStateException("Executor service " + name + " is already shut down!");
final List<Runnable> remaining = executorService.shutdownNow();
if ( ! remaining.isEmpty() )
throw new IllegalStateException(remaining.size() + " remaining tasks found in an executor " + name + ", unexpected behavior!");
}
/**
* Print to logger.info timing information from timer, with name label
*
* @param label the name of the timer to display. Should be human readable
* @param timer the timer whose elapsed time we will display
*/
@Requires({"label != null", "timer != null"})
private void printTimerInfo(final String label, final SimpleTimer timer) {
final double total = inputTimer.getElapsedTime() + mapTimer.getElapsedTime()
+ reduceTimer.getElapsedTime() + outsideSchedulerTimer.getElapsedTime();
final double myTimeInSec = timer.getElapsedTime();
final double myTimePercent = myTimeInSec / total * 100;
logger.info(String.format("%s: %s (%5.2f%%)", label, new AutoFormattingTime(myTimeInSec), myTimePercent));
}
/**
* @return true if this nanoScheduler is shutdown, or false if its still open for business
*/
public boolean isShutdown() {
return shutdown;
}
/**
* @return are we displaying verbose debugging information about the scheduling?
*/
public boolean isDebug() {
return debug;
}
/**
* Helper function to display a String.formatted message if we are doing verbose debugging
*
* @param format the format argument suitable for String.format
* @param args the arguments for String.format
*/
@Requires("format != null")
private void debugPrint(final String format, Object ... args) {
if ( isDebug() )
logger.info("Thread " + Thread.currentThread().getId() + ":" + String.format(format, args));
}
/**
* Turn on/off verbose debugging
*
* @param debug true if we want verbose debugging
*/
public void setDebug(boolean debug) {
this.debug = debug;
}
/**
* Set the progress callback function to progressFunction
*
* The progress callback is invoked after each buffer size elements have been processed by map/reduce
*
* @param progressFunction a progress function to call, or null if you don't want any progress callback
*/
public void setProgressFunction(final NSProgressFunction<InputType> progressFunction) {
this.progressFunction = progressFunction;
}
/**
* Execute a map/reduce job with this nanoScheduler
*
* Data comes from inputReader. Will be read until hasNext() == false.
* map is called on each element provided by inputReader. No order of operations is guarenteed
* reduce is called in order of the input data provided by inputReader on the result of map() applied
* to each element.
*
* Note that the caller thread is put to work with this function call. The call doesn't return
* until all elements have been processes.
*
* It is safe to call this function repeatedly on a single nanoScheduler, at least until the
* shutdown method is called.
*
* Note that this function goes through a single threaded fast path if the number of threads
* is 1.
*
* @param inputReader an iterator providing us with the input data to nanoSchedule map/reduce over
* @param map the map function from input type -> map type, will be applied in parallel to each input
* @param reduce the reduce function from map type + reduce type -> reduce type to be applied in order to map results
* @return the last reduce value
*/
public ReduceType execute(final Iterator<InputType> inputReader,
final NSMapFunction<InputType, MapType> map,
final ReduceType initialValue,
final NSReduceFunction<MapType, ReduceType> reduce) {
if ( isShutdown() ) throw new IllegalStateException("execute called on already shutdown NanoScheduler");
if ( inputReader == null ) throw new IllegalArgumentException("inputReader cannot be null");
if ( map == null ) throw new IllegalArgumentException("map function cannot be null");
if ( reduce == null ) throw new IllegalArgumentException("reduce function cannot be null");
outsideSchedulerTimer.stop();
ReduceType result;
if ( ALLOW_SINGLE_THREAD_FASTPATH && getnThreads() == 1 ) {
result = executeSingleThreaded(inputReader, map, initialValue, reduce);
} else {
result = executeMultiThreaded(inputReader, map, initialValue, reduce);
}
outsideSchedulerTimer.restart();
return result;
}
/**
* Simple efficient reference implementation for single threaded execution.
*
* @return the reduce result of this map/reduce job
*/
@Requires({"inputReader != null", "map != null", "reduce != null"})
private ReduceType executeSingleThreaded(final Iterator<InputType> inputReader,
final NSMapFunction<InputType, MapType> map,
final ReduceType initialValue,
final NSReduceFunction<MapType, ReduceType> reduce) {
ReduceType sum = initialValue;
int i = 0;
// start timer to ensure that both hasNext and next are caught by the timer
if ( TIME_CALLS ) inputTimer.restart();
while ( inputReader.hasNext() ) {
final InputType input = inputReader.next();
if ( TIME_CALLS ) inputTimer.stop();
// map
if ( TIME_CALLS ) mapTimer.restart();
final long preMapTime = LOG_MAP_TIMES ? 0 : mapTimer.currentTimeNano();
final MapType mapValue = map.apply(input);
if ( LOG_MAP_TIMES ) logger.info("MAP TIME " + (mapTimer.currentTimeNano() - preMapTime));
if ( TIME_CALLS ) mapTimer.stop();
if ( i++ % inputBufferSize == 0 && progressFunction != null )
progressFunction.progress(input);
// reduce
if ( TIME_CALLS ) reduceTimer.restart();
sum = reduce.apply(mapValue, sum);
if ( TIME_CALLS ) reduceTimer.stop();
if ( TIME_CALLS ) inputTimer.restart();
}
return sum;
}
/**
* Efficient parallel version of Map/Reduce
*
* @return the reduce result of this map/reduce job
*/
@Requires({"inputReader != null", "map != null", "reduce != null"})
private ReduceType executeMultiThreaded(final Iterator<InputType> inputReader,
final NSMapFunction<InputType, MapType> map,
final ReduceType initialValue,
final NSReduceFunction<MapType, ReduceType> reduce) {
debugPrint("Executing nanoScheduler");
// a blocking queue that limits the number of input datum to the requested buffer size
final BlockingQueue<InputProducer<InputType>.InputValue> inputQueue
= new LinkedBlockingDeque<InputProducer<InputType>.InputValue>(inputBufferSize);
// a priority queue that stores up to mapBufferSize elements
// produced by completed map jobs.
final BlockingQueue<Future<MapResult<MapType>>> mapResultQueue =
new LinkedBlockingDeque<Future<MapResult<MapType>>>(mapBufferSize);
// Start running the input reader thread
inputExecutor.submit(new InputProducer<InputType>(inputReader, inputTimer, inputQueue));
// Start running the reducer thread
final ReducerThread<MapType, ReduceType> reducer
= new ReducerThread<MapType, ReduceType>(reduce, reduceTimer, initialValue, mapResultQueue);
final Future<ReduceType> reduceResult = reduceExecutor.submit(reducer);
try {
int numJobs = 0;
while ( true ) {
// block on input
final InputProducer<InputType>.InputValue inputEnqueueWrapped = inputQueue.take();
if ( ! inputEnqueueWrapped.isLast() ) {
// get the object itself
final InputType input = inputEnqueueWrapped.getValue();
// the next map call has jobID + 1
numJobs++;
// send job for map via the completion service
final CallableMap doMap = new CallableMap(map, numJobs, input);
final Future<MapResult<MapType>> mapJob = mapExecutor.submit(doMap);
mapResultQueue.put(mapJob);
debugPrint(" Done with cycle of map/reduce");
if ( numJobs % inputBufferSize == 0 && progressFunction != null )
progressFunction.progress(input);
} else {
mapResultQueue.put(new FutureValue<MapResult<MapType>>(new MapResult<MapType>()));
return reduceResult.get(); // wait for our result of reduce
}
}
} catch (InterruptedException ex) {
throw new ReviewedStingException("got execution exception", ex);
} catch (ExecutionException ex) {
throw new ReviewedStingException("got execution exception", ex);
}
}
/**
* A simple callable version of the map function for use with the executor pool
*/
private class CallableMap implements Callable<MapResult<MapType>> {
final int id;
final InputType input;
final NSMapFunction<InputType, MapType> map;
@Requires({"map != null"})
private CallableMap(final NSMapFunction<InputType, MapType> map,
final int id,
final InputType input) {
this.id = id;
this.input = input;
this.map = map;
}
@Override
public MapResult<MapType> call() {
if ( TIME_CALLS ) mapTimer.restart();
if ( debug ) debugPrint("\t\tmap " + input);
final MapType result = map.apply(input);
if ( TIME_CALLS ) mapTimer.stop();
return new MapResult<MapType>(result, id);
}
}
}

View File

@ -1,65 +0,0 @@
package org.broadinstitute.sting.utils.nanoScheduler;
import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
/**
* Thread that runs the reduce of the map/reduce.
*
* This thread reads from mapResultsQueue until the poison EOF object arrives. At each
* stage is calls reduce(value, sum). The blocking mapResultQueue ensures that the
* queue waits until the mapResultQueue has a value to take. Then, it gets and waits
* until the map result Future has a value.
*/
class ReducerThread<MapType, ReduceType> implements Callable<ReduceType> {
final NSReduceFunction<MapType, ReduceType> reduce;
final SimpleTimer reduceTimer;
final BlockingQueue<Future<MapResult<MapType>>> mapResultQueue;
ReduceType sum;
int lastJobID = -1;
public ReducerThread(final NSReduceFunction<MapType, ReduceType> reduce,
final SimpleTimer reduceTimer,
final ReduceType sum,
final BlockingQueue<Future<MapResult<MapType>>> mapResultQueue) {
if ( reduce == null ) throw new IllegalArgumentException("Reduce function cannot be null");
if ( mapResultQueue == null ) throw new IllegalArgumentException("mapResultQueue cannot be null");
this.reduce = reduce;
this.reduceTimer = reduceTimer;
this.sum = sum;
this.mapResultQueue = mapResultQueue;
}
public ReduceType call() {
try {
while ( true ) {
final MapResult<MapType> result = mapResultQueue.take().get();
if ( result.isLast() ) {
// we are done, just return sum
return sum;
}
else if ( result.getJobID() < lastJobID ) {
// make sure the map results are coming in order
throw new IllegalStateException("BUG: last jobID " + lastJobID + " > current jobID " + result.getJobID());
} else {
lastJobID = result.getJobID();
// apply reduce, keeping track of sum
if ( reduceTimer != null ) reduceTimer.restart();
sum = reduce.apply(result.getValue(), sum);
if ( reduceTimer != null ) reduceTimer.stop();
}
}
} catch (ExecutionException ex) {
throw new ReviewedStingException("got execution exception", ex);
} catch (InterruptedException ex) {
throw new ReviewedStingException("got execution exception", ex);
}
}
}

View File

@ -1,30 +0,0 @@
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import java.lang.annotation.*;
/**
* User: hanna
* Date: May 14, 2009
* Time: 1:51:22 PM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
* Software and documentation are copyright 2005 by the Broad Institute.
* All rights are reserved.
*
* Users acknowledge that this software is supplied without any warranty or support.
* The Broad Institute is not responsible for its use, misuse, or
* functionality.
*/
/**
* Allows the walker to indicate what type of data it wants to consume.
*/
@Documented
@Inherited
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface BQSRMode {
public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT;
}

View File

@ -1,40 +0,0 @@
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* A ReadTransformer that applies BQSR on the fly to reads
*
* User: rpoplin
* Date: 2/13/12
*/
public class BQSRReadTransformer extends ReadTransformer {
private boolean enabled;
private BaseRecalibration bqsr;
@Override
public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) {
this.enabled = engine.hasBaseRecalibration();
this.bqsr = engine.getBaseRecalibration();
final BQSRMode mode = WalkerManager.getWalkerAnnotation(walker, BQSRMode.class);
return mode.ApplicationTime();
}
@Override
public boolean enabled() {
return enabled;
}
/**
* initialize a new BQSRReadTransformer that applies BQSR on the fly to incoming reads.
*/
@Override
public GATKSAMRecord apply(GATKSAMRecord read) {
bqsr.recalibrateRead(read);
return read;
}
}

View File

@ -1,86 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.sam;
import net.sf.picard.sam.MergingSamRecordIterator;
import net.sf.picard.sam.SamFileHeaderMerger;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.*;
/**
* Simple wrapper class that multiplexes multiple ArtificialSingleSampleReadStreams into a single stream of reads
*
* @author David Roazen
*/
public class ArtificialMultiSampleReadStream implements Iterable<SAMRecord> {
private Collection<ArtificialSingleSampleReadStream> perSampleArtificialReadStreams;
private MergingSamRecordIterator mergingIterator;
public ArtificialMultiSampleReadStream( Collection<ArtificialSingleSampleReadStream> perSampleArtificialReadStreams ) {
if ( perSampleArtificialReadStreams == null || perSampleArtificialReadStreams.isEmpty() ) {
throw new ReviewedStingException("Can't create an ArtificialMultiSampleReadStream out of 0 ArtificialSingleSampleReadStreams");
}
this.perSampleArtificialReadStreams = perSampleArtificialReadStreams;
}
public Iterator<SAMRecord> iterator() {
// lazy initialization to prevent reads from being created until they're needed
initialize();
return mergingIterator;
}
public StingSAMIterator getStingSAMIterator() {
// lazy initialization to prevent reads from being created until they're needed
initialize();
return StingSAMIteratorAdapter.adapt(mergingIterator);
}
private void initialize() {
Collection<SAMFileReader> perSampleSAMReaders = new ArrayList<SAMFileReader>(perSampleArtificialReadStreams.size());
Collection<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(perSampleArtificialReadStreams.size());
for ( ArtificialSingleSampleReadStream readStream : perSampleArtificialReadStreams ) {
Collection<SAMRecord> thisStreamReads = readStream.makeReads();
SAMFileReader reader = new ArtificialSAMFileReader(readStream.getHeader(),
thisStreamReads.toArray(new SAMRecord[thisStreamReads.size()]));
perSampleSAMReaders.add(reader);
headers.add(reader.getFileHeader());
}
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, headers, true);
mergingIterator = new MergingSamRecordIterator(headerMerger, perSampleSAMReaders, true);
}
}

View File

@ -1,212 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
/**
* An artificial stream of reads from a single read group/sample with configurable characteristics
* such as:
*
* -the number of contigs that the reads should be distributed across
* -number of "stacks" of reads sharing the same alignment start position per contig
* -the min/max number of reads in each stack (exact values chosen randomly from this range)
* -the min/max distance between stack start positions (exact values chosen randomly from this range)
* -the min/max length of each read (exact values chosen randomly from this range)
* -the number of unmapped reads
*
* The cigar string for all reads will be *M, where * is the length of the read.
*
* @author David Roazen
*/
public class ArtificialSingleSampleReadStream implements Iterable<SAMRecord> {
private SAMFileHeader header;
private String readGroupID;
private int numContigs;
private int numStacksPerContig;
private int minReadsPerStack;
private int maxReadsPerStack;
private int minDistanceBetweenStacks;
private int maxDistanceBetweenStacks;
private int minReadLength;
private int maxReadLength;
private int numUnmappedReads;
private static final String READ_GROUP_TAG = "RG";
public ArtificialSingleSampleReadStream( SAMFileHeader header,
String readGroupID,
int numContigs,
int numStacksPerContig,
int minReadsPerStack,
int maxReadsPerStack,
int minDistanceBetweenStacks,
int maxDistanceBetweenStacks,
int minReadLength,
int maxReadLength,
int numUnmappedReads ) {
this.header = header;
this.readGroupID = readGroupID;
this.numContigs = numContigs;
this.numStacksPerContig = numStacksPerContig;
this.minReadsPerStack = minReadsPerStack;
this.maxReadsPerStack = maxReadsPerStack;
this.minDistanceBetweenStacks = minDistanceBetweenStacks;
this.maxDistanceBetweenStacks = maxDistanceBetweenStacks;
this.minReadLength = minReadLength;
this.maxReadLength = maxReadLength;
this.numUnmappedReads = numUnmappedReads;
validateStreamParameters();
}
private void validateStreamParameters() {
if ( header == null || readGroupID == null ) {
throw new ReviewedStingException("null SAMFileHeader or read group ID") ;
}
if ( header.getReadGroup(readGroupID) == null ) {
throw new ReviewedStingException(String.format("Read group %s not found in SAMFileHeader", readGroupID));
}
if ( numContigs < 0 || numStacksPerContig < 0 || minReadsPerStack < 0 || maxReadsPerStack < 0 ||
minDistanceBetweenStacks < 0 || maxDistanceBetweenStacks < 0 || minReadLength < 0 || maxReadLength < 0 ||
numUnmappedReads < 0 ) {
throw new ReviewedStingException("Read stream parameters must be >= 0");
}
if ( (numContigs == 0 && numStacksPerContig != 0) || (numContigs != 0 && numStacksPerContig == 0) ) {
throw new ReviewedStingException("numContigs and numStacksPerContig must either both be > 0, or both be 0");
}
if ( minReadsPerStack > maxReadsPerStack ) {
throw new ReviewedStingException("minReadsPerStack > maxReadsPerStack");
}
if ( minDistanceBetweenStacks > maxDistanceBetweenStacks ) {
throw new ReviewedStingException("minDistanceBetweenStacks > maxDistanceBetweenStacks");
}
if ( minReadLength > maxReadLength ) {
throw new ReviewedStingException("minReadLength > maxReadLength");
}
}
public Iterator<SAMRecord> iterator() {
return makeReads().iterator();
}
public StingSAMIterator getStingSAMIterator() {
return StingSAMIteratorAdapter.adapt(iterator());
}
public Collection<SAMRecord> makeReads() {
Collection<SAMRecord> reads = new ArrayList<SAMRecord>(numContigs * numStacksPerContig * maxReadsPerStack);
for ( int contig = 0; contig < numContigs; contig++ ) {
int alignmentStart = 1;
for ( int stack = 0; stack < numStacksPerContig; stack++ ) {
reads.addAll(makeReadStack(contig, alignmentStart, MathUtils.randomIntegerInRange(minReadsPerStack, maxReadsPerStack)));
alignmentStart += MathUtils.randomIntegerInRange(minDistanceBetweenStacks, maxDistanceBetweenStacks);
}
}
if ( numUnmappedReads > 0 ) {
reads.addAll(makeReadStack(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, SAMRecord.NO_ALIGNMENT_START, numUnmappedReads));
}
return reads;
}
private Collection<SAMRecord> makeReadStack( int contig, int alignmentStart, int stackSize ) {
Collection<SAMRecord> readStack = new ArrayList<SAMRecord>(stackSize);
for ( int i = 0; i < stackSize; i++ ) {
SAMRecord read = ArtificialSAMUtils.createArtificialRead(header,
"foo",
contig,
alignmentStart,
MathUtils.randomIntegerInRange(minReadLength, maxReadLength));
read.setAttribute(READ_GROUP_TAG, readGroupID);
readStack.add(read);
}
return readStack;
}
public SAMFileHeader getHeader() {
return header;
}
public String getReadGroupID() {
return readGroupID;
}
public int getNumContigs() {
return numContigs;
}
public int getNumStacksPerContig() {
return numStacksPerContig;
}
public int getMinReadsPerStack() {
return minReadsPerStack;
}
public int getMaxReadsPerStack() {
return maxReadsPerStack;
}
public int getMinDistanceBetweenStacks() {
return minDistanceBetweenStacks;
}
public int getMaxDistanceBetweenStacks() {
return maxDistanceBetweenStacks;
}
public int getMinReadLength() {
return minReadLength;
}
public int getMaxReadLength() {
return maxReadLength;
}
public int getNumUnmappedReads() {
return numUnmappedReads;
}
}

View File

@ -1,281 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.ArrayList;
import java.util.List;
/**
* A class for analyzing and validating the read stream produced by an ArtificialSingleSampleReadStream.
*
* Collects various statistics about the stream of reads it's fed, and validates the stream
* by checking whether the collected statistics match the nominal properties of the stream.
*
* Subclasses are expected to override the validate() method in order to check whether an artificial
* read stream has been *transformed* in some way (eg., by downsampling or some other process), rather
* than merely checking whether the stream matches its original properties.
*
* Usage is simple:
*
* ArtificialSingleSampleReadStreamAnalyzer analyzer = new ArtificialSingleSampleReadStreamAnalyzer(originalStream);
* analyzer.analyze(originalOrTransformedStream);
* analyzer.validate(); // override this method if you want to check whether the stream has been transformed
* // in a certain way relative to the original stream
*
* @author David Roazen
*/
public class ArtificialSingleSampleReadStreamAnalyzer {
protected ArtificialSingleSampleReadStream originalStream;
protected SAMRecord lastRead;
protected int totalReads;
protected boolean allSamplesMatch;
protected int numContigs;
protected List<Integer> stacksPerContig;
protected Integer minReadsPerStack;
protected Integer maxReadsPerStack;
protected Integer minDistanceBetweenStacks;
protected Integer maxDistanceBetweenStacks;
protected Integer minReadLength;
protected Integer maxReadLength;
protected int numUnmappedReads;
protected int currentContigNumStacks;
protected int currentStackNumReads;
/**
* Construct a new read stream analyzer, providing an ArtificialSingleSampleReadStream that will
* serve as the basis for comparison after the analysis is complete.
*
* @param originalStream the original ArtificialSingleSampleReadStream upon which the stream
* that will be fed to the analyzer is based
*/
public ArtificialSingleSampleReadStreamAnalyzer( ArtificialSingleSampleReadStream originalStream ) {
this.originalStream = originalStream;
reset();
}
/**
* Reset all read stream statistics collected by this analyzer to prepare for a fresh run
*/
public void reset() {
lastRead = null;
totalReads = 0;
allSamplesMatch = true;
numContigs = 0;
stacksPerContig = new ArrayList<Integer>();
minReadsPerStack = null;
maxReadsPerStack = null;
minDistanceBetweenStacks = null;
maxDistanceBetweenStacks = null;
minReadLength = null;
maxReadLength = null;
numUnmappedReads = 0;
currentContigNumStacks = 0;
currentStackNumReads = 0;
}
/**
* Collect statistics on the stream of reads passed in
*
* @param stream the stream of reads to analyze
*/
public void analyze( Iterable<SAMRecord> stream ) {
for ( SAMRecord read : stream ) {
update(read);
}
finalizeStats();
}
/**
* Validate the stream by checking whether our collected statistics match the properties of the
* original stream. Throws a ReviewedStingException if the stream is invalid.
*
* Override this method if you want to check whether the stream has been transformed in some
* way relative to the original stream.
*/
public void validate() {
if ( (originalStream.getNumContigs() == 0 || originalStream.getNumStacksPerContig() == 0) && originalStream.getNumUnmappedReads() == 0 ) {
if ( totalReads != 0 ) {
throw new ReviewedStingException("got reads from the stream, but the stream was configured to have 0 reads");
}
return; // no further validation needed for the 0-reads case
}
else if ( totalReads == 0 ) {
throw new ReviewedStingException("got no reads from the stream, but the stream was configured to have > 0 reads");
}
if ( ! allSamplesMatch ) {
throw new ReviewedStingException("some reads had the wrong sample");
}
if ( numContigs != originalStream.getNumContigs() ) {
throw new ReviewedStingException("number of contigs not correct");
}
if ( stacksPerContig.size() != originalStream.getNumContigs() ) {
throw new ReviewedStingException(String.format("bug in analyzer code: calculated sizes for %d contigs even though there were only %d contigs",
stacksPerContig.size(), originalStream.getNumContigs()));
}
for ( int contigStackCount : stacksPerContig ) {
if ( contigStackCount != originalStream.getNumStacksPerContig() ) {
throw new ReviewedStingException("contig had incorrect number of stacks");
}
}
if ( originalStream.getNumStacksPerContig() > 0 ) {
if ( minReadsPerStack < originalStream.getMinReadsPerStack() ) {
throw new ReviewedStingException("stack had fewer than the minimum number of reads");
}
if ( maxReadsPerStack > originalStream.getMaxReadsPerStack() ) {
throw new ReviewedStingException("stack had more than the maximum number of reads");
}
}
else if ( minReadsPerStack != null || maxReadsPerStack != null ) {
throw new ReviewedStingException("bug in analyzer code: reads per stack was calculated even though 0 stacks per contig was specified");
}
if ( originalStream.getNumStacksPerContig() > 1 ) {
if ( minDistanceBetweenStacks < originalStream.getMinDistanceBetweenStacks() ) {
throw new ReviewedStingException("stacks were separated by less than the minimum distance");
}
if ( maxDistanceBetweenStacks > originalStream.getMaxDistanceBetweenStacks() ) {
throw new ReviewedStingException("stacks were separated by more than the maximum distance");
}
}
else if ( minDistanceBetweenStacks != null || maxDistanceBetweenStacks != null ) {
throw new ReviewedStingException("bug in analyzer code: distance between stacks was calculated even though numStacksPerContig was <= 1");
}
if ( minReadLength < originalStream.getMinReadLength() ) {
throw new ReviewedStingException("read was shorter than the minimum allowed length");
}
if ( maxReadLength > originalStream.getMaxReadLength() ) {
throw new ReviewedStingException("read was longer than the maximum allowed length");
}
if ( numUnmappedReads != originalStream.getNumUnmappedReads() ) {
throw new ReviewedStingException(String.format("wrong number of unmapped reads: requested %d but saw %d",
originalStream.getNumUnmappedReads(), numUnmappedReads));
}
if ( (originalStream.getNumContigs() == 0 || originalStream.getNumStacksPerContig() == 0) &&
numUnmappedReads != totalReads ) {
throw new ReviewedStingException("stream should have consisted only of unmapped reads, but saw some mapped reads");
}
}
public void update( SAMRecord read ) {
if ( read.getReadUnmappedFlag() ) {
numUnmappedReads++;
if ( numUnmappedReads == 1 && lastRead != null ) {
processContigChange();
numContigs--;
}
}
else if ( lastRead == null ) {
numContigs = 1;
currentContigNumStacks = 1;
currentStackNumReads = 1;
}
else if ( ! read.getReferenceIndex().equals(lastRead.getReferenceIndex()) ) {
processContigChange();
}
else if ( read.getAlignmentStart() != lastRead.getAlignmentStart() ) {
processStackChangeWithinContig(read);
}
else {
currentStackNumReads++;
}
updateReadLength(read.getReadLength());
allSamplesMatch = allSamplesMatch && readHasCorrectSample(read);
totalReads++;
lastRead = read;
}
private void processContigChange() {
numContigs++;
stacksPerContig.add(currentContigNumStacks);
currentContigNumStacks = 1;
updateReadsPerStack(currentStackNumReads);
currentStackNumReads = 1;
}
private void processStackChangeWithinContig( SAMRecord read ) {
currentContigNumStacks++;
updateReadsPerStack(currentStackNumReads);
currentStackNumReads = 1;
updateDistanceBetweenStacks(read.getAlignmentStart() - lastRead.getAlignmentStart());
}
private void updateReadsPerStack( int stackReadCount ) {
if ( minReadsPerStack == null || stackReadCount < minReadsPerStack ) {
minReadsPerStack = stackReadCount;
}
if ( maxReadsPerStack == null || stackReadCount > maxReadsPerStack ) {
maxReadsPerStack = stackReadCount;
}
}
private void updateDistanceBetweenStacks( int stackDistance ) {
if ( minDistanceBetweenStacks == null || stackDistance < minDistanceBetweenStacks ) {
minDistanceBetweenStacks = stackDistance;
}
if ( maxDistanceBetweenStacks == null || stackDistance > maxDistanceBetweenStacks ) {
maxDistanceBetweenStacks = stackDistance;
}
}
private void updateReadLength( int readLength ) {
if ( minReadLength == null || readLength < minReadLength ) {
minReadLength = readLength;
}
if ( maxReadLength == null || readLength > maxReadLength ) {
maxReadLength = readLength;
}
}
private boolean readHasCorrectSample( SAMRecord read ) {
return originalStream.getReadGroupID().equals(read.getAttribute("RG"));
}
public void finalizeStats() {
if ( lastRead != null && ! lastRead.getReadUnmappedFlag() ) {
stacksPerContig.add(currentContigNumStacks);
updateReadsPerStack(currentStackNumReads);
}
}
}

View File

@ -1,158 +0,0 @@
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.threading;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.apache.log4j.Priority;
import org.broadinstitute.sting.utils.AutoFormattingTime;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
/**
* Creates threads that automatically monitor their efficiency via the parent ThreadEfficiencyMonitor
*
* User: depristo
* Date: 8/14/12
* Time: 8:47 AM
*/
@Invariant({
"activeThreads.size() <= nThreadsToCreate",
"countDownLatch.getCount() <= nThreadsToCreate",
"nThreadsCreated <= nThreadsToCreate"
})
public class EfficiencyMonitoringThreadFactory extends ThreadEfficiencyMonitor implements ThreadFactory {
final int nThreadsToCreate;
final List<Thread> activeThreads;
int nThreadsCreated = 0;
/**
* Counts down the number of active activeThreads whose runtime info hasn't been incorporated into
* times. Counts down from nThreadsToCreate to 0, at which point any code waiting
* on the final times is freed to run.
*/
final CountDownLatch countDownLatch;
/**
* Create a new factory generating threads whose runtime and contention
* behavior is tracked in this factory.
*
* @param nThreadsToCreate the number of threads we will create in the factory before it's considered complete
*/
public EfficiencyMonitoringThreadFactory(final int nThreadsToCreate) {
super();
if ( nThreadsToCreate <= 0 ) throw new IllegalArgumentException("nThreadsToCreate <= 0: " + nThreadsToCreate);
this.nThreadsToCreate = nThreadsToCreate;
activeThreads = new ArrayList<Thread>(nThreadsToCreate);
countDownLatch = new CountDownLatch(nThreadsToCreate);
}
/**
* How many threads have been created by this factory so far?
* @return
*/
@Ensures("result >= 0")
public int getNThreadsCreated() {
return nThreadsCreated;
}
/**
* Only useful for testing, so that we can wait for all of the threads in the factory to complete running
*
* @throws InterruptedException
*/
protected void waitForAllThreadsToComplete() throws InterruptedException {
countDownLatch.await();
}
@Ensures({
"activeThreads.size() <= old(activeThreads.size())",
"! activeThreads.contains(thread)",
"countDownLatch.getCount() <= old(countDownLatch.getCount())"
})
@Override
public synchronized void threadIsDone(final Thread thread) {
nThreadsAnalyzed++;
if ( DEBUG ) logger.warn(" Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName());
super.threadIsDone(thread);
// remove the thread from the list of active activeThreads, if it's in there, and decrement the countdown latch
if ( activeThreads.remove(thread) ) {
// one less thread is live for those blocking on all activeThreads to be complete
countDownLatch.countDown();
if ( DEBUG ) logger.warn(" -> Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName());
}
}
/**
* Create a new thread from this factory
*
* @param runnable
* @return
*/
@Override
@Ensures({
"activeThreads.size() > old(activeThreads.size())",
"activeThreads.contains(result)",
"nThreadsCreated == old(nThreadsCreated) + 1"
})
public synchronized Thread newThread(final Runnable runnable) {
if ( activeThreads.size() >= nThreadsToCreate)
throw new IllegalStateException("Attempting to create more activeThreads than allowed by constructor argument nThreadsToCreate " + nThreadsToCreate);
nThreadsCreated++;
final Thread myThread = new TrackingThread(runnable);
activeThreads.add(myThread);
return myThread;
}
/**
* A wrapper around Thread that tracks the runtime of the thread and calls threadIsDone() when complete
*/
private class TrackingThread extends Thread {
private TrackingThread(Runnable runnable) {
super(runnable);
}
@Override
public void run() {
super.run();
threadIsDone(this);
}
}
}

View File

@ -1,26 +0,0 @@
package org.broadinstitute.sting.utils.threading;
import java.util.concurrent.ThreadFactory;
/**
* Thread factor that produces threads with a given name pattern
*
* User: depristo
* Date: 9/5/12
* Time: 9:22 PM
*
*/
public class NamedThreadFactory implements ThreadFactory {
static int id = 0;
final String format;
public NamedThreadFactory(String format) {
this.format = format;
String.format(format, id); // test the name
}
@Override
public Thread newThread(Runnable r) {
return new Thread(r, String.format(format, id++));
}
}

View File

@ -1,207 +0,0 @@
package org.broadinstitute.sting.utils.threading;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.apache.log4j.Priority;
import org.broadinstitute.sting.utils.AutoFormattingTime;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean;
import java.util.EnumMap;
import java.util.concurrent.TimeUnit;
/**
* Uses an MXBean to monitor thread efficiency
*
* Once the monitor is created, calls to threadIsDone() can be used to add information
* about the efficiency of the provided thread to this monitor.
*
* Provides simple print() for displaying efficiency information to a logger
*
* User: depristo
* Date: 8/22/12
* Time: 10:48 AM
*/
@Invariant({"nThreadsAnalyzed >= 0"})
public class ThreadEfficiencyMonitor {
protected static final boolean DEBUG = false;
protected static Logger logger = Logger.getLogger(EfficiencyMonitoringThreadFactory.class);
final EnumMap<State, Long> times = new EnumMap<State, Long>(State.class);
/**
* The number of threads we've included in our efficiency monitoring
*/
int nThreadsAnalyzed = 0;
/**
* The bean used to get the thread info about blocked and waiting times
*/
final ThreadMXBean bean;
public ThreadEfficiencyMonitor() {
bean = ManagementFactory.getThreadMXBean();
// get the bean, and start tracking
if ( bean.isThreadContentionMonitoringSupported() )
bean.setThreadContentionMonitoringEnabled(true);
else
logger.warn("Thread contention monitoring not supported, we cannot track GATK multi-threaded efficiency");
//bean.setThreadCpuTimeEnabled(true);
if ( bean.isThreadCpuTimeSupported() )
bean.setThreadCpuTimeEnabled(true);
else
logger.warn("Thread CPU monitoring not supported, we cannot track GATK multi-threaded efficiency");
// initialize times to 0
for ( final State state : State.values() )
times.put(state, 0l);
}
private static long nanoToMilli(final long timeInNano) {
return TimeUnit.NANOSECONDS.toMillis(timeInNano);
}
/**
* Get the time spent in state across all threads created by this factory
*
* @param state to get information about
* @return the time in milliseconds
*/
@Ensures({"result >= 0"})
public synchronized long getStateTime(final State state) {
return times.get(state);
}
/**
* Get the total time spent in all states across all threads created by this factory
*
* @return the time in milliseconds
*/
@Ensures({"result >= 0"})
public synchronized long getTotalTime() {
long total = 0;
for ( final long time : times.values() )
total += time;
return total;
}
/**
* Get the fraction of time spent in state across all threads created by this factory
*
* @return the percentage (0.0-100.0) of time spent in state over all state times of all threads
*/
@Ensures({"result >= 0.0", "result <= 100.0"})
public synchronized double getStatePercent(final State state) {
return (100.0 * getStateTime(state)) / Math.max(getTotalTime(), 1);
}
public int getnThreadsAnalyzed() {
return nThreadsAnalyzed;
}
@Override
public synchronized String toString() {
final StringBuilder b = new StringBuilder();
b.append("total ").append(getTotalTime()).append(" ");
for ( final State state : State.values() ) {
b.append(state).append(" ").append(getStateTime(state)).append(" ");
}
return b.toString();
}
/**
* Print usage information about threads from this factory to logger
* with the INFO priority
*
* @param logger
*/
public synchronized void printUsageInformation(final Logger logger) {
printUsageInformation(logger, Priority.INFO);
}
/**
* Print usage information about threads from this factory to logger
* with the provided priority
*
* @param logger
*/
public synchronized void printUsageInformation(final Logger logger, final Priority priority) {
logger.debug("Number of threads monitored: " + getnThreadsAnalyzed());
logger.debug("Total runtime " + new AutoFormattingTime(TimeUnit.MILLISECONDS.toSeconds(getTotalTime())));
for ( final State state : State.values() ) {
logger.debug(String.format("\tPercent of time spent %s is %.2f", state.getUserFriendlyName(), getStatePercent(state)));
}
logger.log(priority, String.format("CPU efficiency : %6.2f%% of time spent %s", getStatePercent(State.USER_CPU), State.USER_CPU.getUserFriendlyName()));
logger.log(priority, String.format("Walker inefficiency : %6.2f%% of time spent %s", getStatePercent(State.BLOCKING), State.BLOCKING.getUserFriendlyName()));
logger.log(priority, String.format("I/O inefficiency : %6.2f%% of time spent %s", getStatePercent(State.WAITING_FOR_IO), State.WAITING_FOR_IO.getUserFriendlyName()));
logger.log(priority, String.format("Thread inefficiency : %6.2f%% of time spent %s", getStatePercent(State.WAITING), State.WAITING.getUserFriendlyName()));
}
/**
* Update the information about completed thread that ran for runtime in milliseconds
*
* This method updates all of the key timing and tracking information in the factory so that
* thread can be retired. After this call the factory shouldn't have a pointer to the thread any longer
*
* @param thread the thread whose information we are updating
*/
@Ensures({
"getTotalTime() >= old(getTotalTime())"
})
public synchronized void threadIsDone(final Thread thread) {
nThreadsAnalyzed++;
if ( DEBUG ) logger.warn("UpdateThreadInfo called");
final long threadID = thread.getId();
final ThreadInfo info = bean.getThreadInfo(thread.getId());
final long totalTimeNano = bean.getThreadCpuTime(threadID);
final long userTimeNano = bean.getThreadUserTime(threadID);
final long systemTimeNano = totalTimeNano - userTimeNano;
final long userTimeInMilliseconds = nanoToMilli(userTimeNano);
final long systemTimeInMilliseconds = nanoToMilli(systemTimeNano);
if ( info != null ) {
if ( DEBUG ) logger.warn("Updating thread with user runtime " + userTimeInMilliseconds + " and system runtime " + systemTimeInMilliseconds + " of which blocked " + info.getBlockedTime() + " and waiting " + info.getWaitedTime());
incTimes(State.BLOCKING, info.getBlockedTime());
incTimes(State.WAITING, info.getWaitedTime());
incTimes(State.USER_CPU, userTimeInMilliseconds);
incTimes(State.WAITING_FOR_IO, systemTimeInMilliseconds);
}
}
/**
* Helper function that increments the times counter by by for state
*
* @param state
* @param by
*/
@Requires({"state != null", "by >= 0"})
@Ensures("getTotalTime() == old(getTotalTime()) + by")
private synchronized void incTimes(final State state, final long by) {
times.put(state, times.get(state) + by);
}
public enum State {
BLOCKING("blocking on synchronized data structures"),
WAITING("waiting on some other thread"),
USER_CPU("doing productive CPU work"),
WAITING_FOR_IO("waiting for I/O");
private final String userFriendlyName;
private State(String userFriendlyName) {
this.userFriendlyName = userFriendlyName;
}
public String getUserFriendlyName() {
return userFriendlyName;
}
}
}

View File

@ -1,41 +0,0 @@
package org.broadinstitute.sting.commandline;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test;
import org.testng.annotations.DataProvider;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: 8/31/12
* Time: 11:03 AM
* To change this template use File | Settings | File Templates.
*/
public class InvalidArgumentIntegrationTest extends WalkerTest {
private static final String callsB36 = BaseTest.validationDataLocation + "lowpass.N3.chr1.raw.vcf";
private WalkerTest.WalkerTestSpec baseTest(String flag, String arg, Class exeption) {
return new WalkerTest.WalkerTestSpec("-T VariantsToTable -M 10 --variant:vcf "
+ callsB36 + " -F POS,CHROM -R "
+ b36KGReference + " -o %s " + flag + " " + arg,
1, exeption);
}
@Test
public void testUnknownReadFilter() {
executeTest("UnknownReadFilter",baseTest("-rf","TestUnknownReadFilter", UserException.MalformedReadFilterException.class));
}
@Test
public void testMalformedWalkerArgs() {
executeTest("MalformedWalkerArgs",
new WalkerTest.WalkerTestSpec("-T UnknownWalkerName -M 10 --variant:vcf "
+ callsB36 + " -F POS,CHROM -R "
+ b36KGReference + " -o %s ",
1, UserException.MalformedWalkerArgumentsException.class));
}
}

View File

@ -1,163 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.testng.annotations.Test;
import org.testng.annotations.DataProvider;
import org.testng.Assert;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
public class LevelingDownsamplerUnitTest extends BaseTest {
private static class LevelingDownsamplerUniformStacksTest extends TestDataProvider {
public enum DataStructure { LINKED_LIST, ARRAY_LIST }
int targetSize;
int numStacks;
int stackSize;
DataStructure dataStructure;
int expectedSize;
public LevelingDownsamplerUniformStacksTest( int targetSize, int numStacks, int stackSize, DataStructure dataStructure ) {
super(LevelingDownsamplerUniformStacksTest.class);
this.targetSize = targetSize;
this.numStacks = numStacks;
this.stackSize = stackSize;
this.dataStructure = dataStructure;
expectedSize = calculateExpectedDownsampledStackSize();
setName(String.format("%s: targetSize=%d numStacks=%d stackSize=%d dataStructure=%s expectedSize=%d",
getClass().getSimpleName(), targetSize, numStacks, stackSize, dataStructure, expectedSize));
}
public Collection<List<Object>> createStacks() {
Collection<List<Object>> stacks = new ArrayList<List<Object>>();
for ( int i = 1; i <= numStacks; i++ ) {
List<Object> stack = dataStructure == DataStructure.LINKED_LIST ? new LinkedList<Object>() : new ArrayList<Object>();
for ( int j = 1; j <= stackSize; j++ ) {
stack.add(new Object());
}
stacks.add(stack);
}
return stacks;
}
private int calculateExpectedDownsampledStackSize() {
int numItemsToRemove = numStacks * stackSize - targetSize;
if ( numStacks == 0 ) {
return 0;
}
else if ( numItemsToRemove <= 0 ) {
return stackSize;
}
return Math.max(1, stackSize - (numItemsToRemove / numStacks));
}
}
@DataProvider(name = "UniformStacksDataProvider")
public Object[][] createUniformStacksTestData() {
for ( int targetSize = 1; targetSize <= 10000; targetSize *= 10 ) {
for ( int numStacks = 0; numStacks <= 10; numStacks++ ) {
for ( int stackSize = 1; stackSize <= 1000; stackSize *= 10 ) {
for ( LevelingDownsamplerUniformStacksTest.DataStructure dataStructure : LevelingDownsamplerUniformStacksTest.DataStructure.values() ) {
new LevelingDownsamplerUniformStacksTest(targetSize, numStacks, stackSize, dataStructure);
}
}
}
}
return LevelingDownsamplerUniformStacksTest.getTests(LevelingDownsamplerUniformStacksTest.class);
}
@Test( dataProvider = "UniformStacksDataProvider" )
public void testLevelingDownsamplerWithUniformStacks( LevelingDownsamplerUniformStacksTest test ) {
logger.warn("Running test: " + test);
GenomeAnalysisEngine.resetRandomGenerator();
Downsampler<List<Object>> downsampler = new LevelingDownsampler<List<Object>, Object>(test.targetSize);
downsampler.submit(test.createStacks());
if ( test.numStacks > 0 ) {
Assert.assertFalse(downsampler.hasFinalizedItems());
Assert.assertTrue(downsampler.peekFinalized() == null);
Assert.assertTrue(downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekPending() != null);
}
else {
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
}
downsampler.signalEndOfInput();
if ( test.numStacks > 0 ) {
Assert.assertTrue(downsampler.hasFinalizedItems());
Assert.assertTrue(downsampler.peekFinalized() != null);
Assert.assertFalse(downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekPending() == null);
}
else {
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
}
List<List<Object>> downsampledStacks = downsampler.consumeFinalizedItems();
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
Assert.assertEquals(downsampledStacks.size(), test.numStacks);
int totalRemainingItems = 0;
for ( List<Object> stack : downsampledStacks ) {
Assert.assertTrue(Math.abs(stack.size() - test.expectedSize) <= 1);
totalRemainingItems += stack.size();
}
int numItemsReportedDiscarded = downsampler.getNumberOfDiscardedItems();
int numItemsActuallyDiscarded = test.numStacks * test.stackSize - totalRemainingItems;
Assert.assertEquals(numItemsReportedDiscarded, numItemsActuallyDiscarded);
downsampler.reset();
Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0);
Assert.assertTrue(totalRemainingItems <= Math.max(test.targetSize, test.numStacks));
}
}

View File

@ -1,298 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.VerifyingSamIterator;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ArtificialMultiSampleReadStream;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStream;
import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStreamAnalyzer;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
public class PerSampleDownsamplingReadsIteratorUnitTest extends BaseTest {
private static class PerSampleDownsamplingReadsIteratorTest extends TestDataProvider {
// TODO: tests should distinguish between variance across samples and variance within a sample
private enum StreamDensity {
SPARSE (MAX_READ_LENGTH, MAX_READ_LENGTH * 2),
DENSE (1, MIN_READ_LENGTH),
MIXED (1, MAX_READ_LENGTH * 2),
UNIFORM_DENSE (1, 1),
UNIFORM_SPARSE (MAX_READ_LENGTH * 2, MAX_READ_LENGTH * 2);
int minDistanceBetweenStacks;
int maxDistanceBetweenStacks;
StreamDensity( int minDistanceBetweenStacks, int maxDistanceBetweenStacks ) {
this.minDistanceBetweenStacks = minDistanceBetweenStacks;
this.maxDistanceBetweenStacks = maxDistanceBetweenStacks;
}
public String toString() {
return String.format("StreamDensity:%d-%d", minDistanceBetweenStacks, maxDistanceBetweenStacks);
}
}
private enum StreamStackDepth {
NON_UNIFORM_LOW (1, 5),
NON_UNIFORM_HIGH (15, 20),
NON_UNIFORM_MIXED (1, 20),
UNIFORM_SINGLE (1, 1),
UNIFORM_LOW (2, 2),
UNIFORM_HIGH (20, 20),
UNIFORM_MEDIUM (10, 10); // should set target coverage to this value for testing
int minReadsPerStack;
int maxReadsPerStack;
StreamStackDepth( int minReadsPerStack, int maxReadsPerStack ) {
this.minReadsPerStack = minReadsPerStack;
this.maxReadsPerStack = maxReadsPerStack;
}
public boolean isUniform() {
return minReadsPerStack == maxReadsPerStack;
}
public String toString() {
return String.format("StreamStackDepth:%d-%d", minReadsPerStack, maxReadsPerStack);
}
}
private enum StreamStacksPerContig {
UNIFORM(20, 20),
NON_UNIFORM(1, 30);
int minStacksPerContig;
int maxStacksPerContig;
StreamStacksPerContig( int minStacksPerContig, int maxStacksPerContig ) {
this.minStacksPerContig = minStacksPerContig;
this.maxStacksPerContig = maxStacksPerContig;
}
public boolean isUniform() {
return minStacksPerContig == maxStacksPerContig;
}
public String toString() {
return String.format("StreamStacksPerContig:%d-%d", minStacksPerContig, maxStacksPerContig);
}
}
// Not interested in testing multiple ranges for the read lengths, as none of our current
// downsamplers are affected by read length
private static final int MIN_READ_LENGTH = 50;
private static final int MAX_READ_LENGTH = 150;
private ReadsDownsamplerFactory<SAMRecord> downsamplerFactory;
private int targetCoverage;
private int numSamples;
private int minContigs;
private int maxContigs;
private StreamDensity streamDensity;
private StreamStackDepth streamStackDepth;
private StreamStacksPerContig streamStacksPerContig;
private double unmappedReadsFraction;
private int unmappedReadsCount;
private boolean verifySortedness;
private ArtificialMultiSampleReadStream mergedReadStream;
private Map<String, ArtificialSingleSampleReadStream> perSampleArtificialReadStreams;
private Map<String, ArtificialSingleSampleReadStreamAnalyzer> perSampleStreamAnalyzers;
private SAMFileHeader header;
public PerSampleDownsamplingReadsIteratorTest( ReadsDownsamplerFactory<SAMRecord> downsamplerFactory,
int targetCoverage,
int numSamples,
int minContigs,
int maxContigs,
StreamDensity streamDensity,
StreamStackDepth streamStackDepth,
StreamStacksPerContig streamStacksPerContig,
double unmappedReadsFraction,
int unmappedReadsCount,
boolean verifySortedness ) {
super(PerSampleDownsamplingReadsIteratorTest.class);
this.downsamplerFactory = downsamplerFactory;
this.targetCoverage = targetCoverage;
this.numSamples = numSamples;
this.minContigs = minContigs;
this.maxContigs = maxContigs;
this.streamDensity = streamDensity;
this.streamStackDepth = streamStackDepth;
this.streamStacksPerContig = streamStacksPerContig;
this.unmappedReadsFraction = unmappedReadsFraction;
this.unmappedReadsCount = unmappedReadsCount;
this.verifySortedness = verifySortedness;
header = createHeader();
createReadStreams();
setName(String.format("%s: targetCoverage=%d numSamples=%d minContigs=%d maxContigs=%d %s %s %s unmappedReadsFraction=%.2f unmappedReadsCount=%d verifySortedness=%b",
getClass().getSimpleName(), targetCoverage, numSamples, minContigs, maxContigs, streamDensity, streamStackDepth, streamStacksPerContig, unmappedReadsFraction, unmappedReadsCount, verifySortedness));
}
private SAMFileHeader createHeader() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(maxContigs, 1, (streamDensity.maxDistanceBetweenStacks + MAX_READ_LENGTH) * streamStacksPerContig.maxStacksPerContig + 100000);
List<String> readGroups = new ArrayList<String>(numSamples);
List<String> sampleNames = new ArrayList<String>(numSamples);
for ( int i = 0; i < numSamples; i++ ) {
readGroups.add("ReadGroup" + i);
sampleNames.add("Sample" + i);
}
return ArtificialSAMUtils.createEnumeratedReadGroups(header, readGroups, sampleNames);
}
private void createReadStreams() {
perSampleArtificialReadStreams = new HashMap<String, ArtificialSingleSampleReadStream>(numSamples);
perSampleStreamAnalyzers = new HashMap<String, ArtificialSingleSampleReadStreamAnalyzer>(numSamples);
for (SAMReadGroupRecord readGroup : header.getReadGroups() ) {
String readGroupID = readGroup.getReadGroupId();
String sampleName = readGroup.getSample();
int thisSampleNumContigs = MathUtils.randomIntegerInRange(minContigs, maxContigs);
int thisSampleStacksPerContig = MathUtils.randomIntegerInRange(streamStacksPerContig.minStacksPerContig, streamStacksPerContig.maxStacksPerContig);
int thisSampleNumUnmappedReads = GenomeAnalysisEngine.getRandomGenerator().nextDouble() < unmappedReadsFraction ? unmappedReadsCount : 0;
ArtificialSingleSampleReadStream thisSampleStream = new ArtificialSingleSampleReadStream(header,
readGroupID,
thisSampleNumContigs,
thisSampleStacksPerContig,
streamStackDepth.minReadsPerStack,
streamStackDepth.maxReadsPerStack,
streamDensity.minDistanceBetweenStacks,
streamDensity.maxDistanceBetweenStacks,
MIN_READ_LENGTH,
MAX_READ_LENGTH,
thisSampleNumUnmappedReads);
perSampleArtificialReadStreams.put(sampleName, thisSampleStream);
perSampleStreamAnalyzers.put(sampleName, new PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer(thisSampleStream, targetCoverage));
}
mergedReadStream = new ArtificialMultiSampleReadStream(perSampleArtificialReadStreams.values());
}
public void run() {
StingSAMIterator downsamplingIter = new PerSampleDownsamplingReadsIterator(mergedReadStream.getStingSAMIterator(), downsamplerFactory);
if ( verifySortedness ) {
downsamplingIter = new VerifyingSamIterator(downsamplingIter);
}
while ( downsamplingIter.hasNext() ) {
SAMRecord read = downsamplingIter.next();
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
ArtificialSingleSampleReadStreamAnalyzer analyzer = perSampleStreamAnalyzers.get(sampleName);
if ( analyzer != null ) {
analyzer.update(read);
}
else {
throw new ReviewedStingException("bug: stream analyzer for sample " + sampleName + " not found");
}
}
for ( Map.Entry<String, ArtificialSingleSampleReadStreamAnalyzer> analyzerEntry : perSampleStreamAnalyzers.entrySet() ) {
ArtificialSingleSampleReadStreamAnalyzer analyzer = analyzerEntry.getValue();
analyzer.finalizeStats();
// Validate the downsampled read stream for each sample individually
analyzer.validate();
}
// Allow memory used by this test to be reclaimed:
mergedReadStream = null;
perSampleArtificialReadStreams = null;
perSampleStreamAnalyzers = null;
}
}
@DataProvider(name = "PerSampleDownsamplingReadsIteratorTestDataProvider")
public Object[][] createPerSampleDownsamplingReadsIteratorTests() {
GenomeAnalysisEngine.resetRandomGenerator();
// Some values don't vary across tests
int targetCoverage = PerSampleDownsamplingReadsIteratorTest.StreamStackDepth.UNIFORM_MEDIUM.minReadsPerStack;
ReadsDownsamplerFactory<SAMRecord> downsamplerFactory = new SimplePositionalDownsamplerFactory<SAMRecord>(targetCoverage);
int maxContigs = 3;
boolean verifySortedness = true;
for ( int numSamples : Arrays.asList(1, 2, 10) ) {
for ( int minContigs = 1; minContigs <= maxContigs; minContigs++ ) {
for ( PerSampleDownsamplingReadsIteratorTest.StreamDensity streamDensity : PerSampleDownsamplingReadsIteratorTest.StreamDensity.values() ) {
for ( PerSampleDownsamplingReadsIteratorTest.StreamStackDepth streamStackDepth : PerSampleDownsamplingReadsIteratorTest.StreamStackDepth.values() ) {
for (PerSampleDownsamplingReadsIteratorTest.StreamStacksPerContig streamStacksPerContig : PerSampleDownsamplingReadsIteratorTest.StreamStacksPerContig.values() ) {
for ( double unmappedReadsFraction : Arrays.asList(0.0, 1.0, 0.5) ) {
for ( int unmappedReadsCount : Arrays.asList(1, 50) ) {
new PerSampleDownsamplingReadsIteratorTest(downsamplerFactory,
targetCoverage,
numSamples,
minContigs,
maxContigs,
streamDensity,
streamStackDepth,
streamStacksPerContig,
unmappedReadsFraction,
unmappedReadsCount,
verifySortedness);
}
}
}
}
}
}
}
return PerSampleDownsamplingReadsIteratorTest.getTests(PerSampleDownsamplingReadsIteratorTest.class);
}
@Test(dataProvider = "PerSampleDownsamplingReadsIteratorTestDataProvider")
public void runPerSampleDownsamplingReadsIteratorTest( PerSampleDownsamplingReadsIteratorTest test ) {
logger.warn("Running test: " + test);
GenomeAnalysisEngine.resetRandomGenerator();
test.run();
}
}

View File

@ -1,126 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStream;
import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStreamAnalyzer;
/**
* Class for analyzing an artificial read stream that has been positionally downsampled, and verifying
* that the downsampling was done correctly without changing the stream in unexpected ways.
*
* @author David Roazen
*/
public class PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer extends ArtificialSingleSampleReadStreamAnalyzer {
private int targetCoverage;
public PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer( ArtificialSingleSampleReadStream originalStream, int targetCoverage ) {
super(originalStream);
this.targetCoverage = targetCoverage;
}
/**
* Overridden validate() method that checks for the effects of positional downsampling in addition to checking
* for whether the original properties of the stream not affected by downsampling have been preserved
*/
@Override
public void validate() {
if ( (originalStream.getNumContigs() == 0 || originalStream.getNumStacksPerContig() == 0) && originalStream.getNumUnmappedReads() == 0 ) {
if ( totalReads != 0 ) {
throw new ReviewedStingException("got reads from the stream, but the stream was configured to have 0 reads");
}
return; // no further validation needed for the 0-reads case
}
else if ( totalReads == 0 ) {
throw new ReviewedStingException("got no reads from the stream, but the stream was configured to have > 0 reads");
}
if ( ! allSamplesMatch ) {
throw new ReviewedStingException("some reads had the wrong sample");
}
if ( numContigs != originalStream.getNumContigs() ) {
throw new ReviewedStingException("number of contigs not correct");
}
if ( stacksPerContig.size() != originalStream.getNumContigs() ) {
throw new ReviewedStingException(String.format("bug in analyzer code: calculated sizes for %d contigs even though there were only %d contigs",
stacksPerContig.size(), originalStream.getNumContigs()));
}
for ( int contigStackCount : stacksPerContig ) {
if ( contigStackCount != originalStream.getNumStacksPerContig() ) {
throw new ReviewedStingException("contig had incorrect number of stacks");
}
}
if ( originalStream.getNumStacksPerContig() > 0 ) {
// Check for the effects of positional downsampling:
int stackMinimumAfterDownsampling = Math.min(targetCoverage, originalStream.getMinReadsPerStack());
int stackMaximumAfterDownsampling = targetCoverage;
if ( minReadsPerStack < stackMinimumAfterDownsampling ) {
throw new ReviewedStingException("stack had fewer than the minimum number of reads after downsampling");
}
if ( maxReadsPerStack > stackMaximumAfterDownsampling ) {
throw new ReviewedStingException("stack had more than the maximum number of reads after downsampling");
}
}
else if ( minReadsPerStack != null || maxReadsPerStack != null ) {
throw new ReviewedStingException("bug in analyzer code: reads per stack was calculated even though 0 stacks per contig was specified");
}
if ( originalStream.getNumStacksPerContig() > 1 ) {
if ( minDistanceBetweenStacks < originalStream.getMinDistanceBetweenStacks() ) {
throw new ReviewedStingException("stacks were separated by less than the minimum distance");
}
if ( maxDistanceBetweenStacks > originalStream.getMaxDistanceBetweenStacks() ) {
throw new ReviewedStingException("stacks were separated by more than the maximum distance");
}
}
else if ( minDistanceBetweenStacks != null || maxDistanceBetweenStacks != null ) {
throw new ReviewedStingException("bug in analyzer code: distance between stacks was calculated even though numStacksPerContig was <= 1");
}
if ( minReadLength < originalStream.getMinReadLength() ) {
throw new ReviewedStingException("read was shorter than the minimum allowed length");
}
if ( maxReadLength > originalStream.getMaxReadLength() ) {
throw new ReviewedStingException("read was longer than the maximum allowed length");
}
if ( numUnmappedReads != originalStream.getNumUnmappedReads() ) {
throw new ReviewedStingException(String.format("wrong number of unmapped reads: requested %d but saw %d",
originalStream.getNumUnmappedReads(), numUnmappedReads));
}
if ( (originalStream.getNumContigs() == 0 || originalStream.getNumStacksPerContig() == 0) &&
numUnmappedReads != totalReads ) {
throw new ReviewedStingException("stream should have consisted only of unmapped reads, but saw some mapped reads");
}
}
}

View File

@ -1,129 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.testng.Assert;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
public class ReservoirDownsamplerUnitTest extends BaseTest {
private static class ReservoirDownsamplerTest extends TestDataProvider {
int reservoirSize;
int totalReads;
int expectedNumReadsAfterDownsampling;
int expectedNumDiscardedItems;
public ReservoirDownsamplerTest( int reservoirSize, int totalReads ) {
super(ReservoirDownsamplerTest.class);
this.reservoirSize = reservoirSize;
this.totalReads = totalReads;
expectedNumReadsAfterDownsampling = Math.min(reservoirSize, totalReads);
expectedNumDiscardedItems = totalReads <= reservoirSize ? 0 : totalReads - reservoirSize;
setName(String.format("%s: reservoirSize=%d totalReads=%d expectedNumReadsAfterDownsampling=%d expectedNumDiscardedItems=%d",
getClass().getSimpleName(), reservoirSize, totalReads, expectedNumReadsAfterDownsampling, expectedNumDiscardedItems));
}
public Collection<SAMRecord> createReads() {
Collection<SAMRecord> reads = new ArrayList<SAMRecord>(totalReads);
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
reads.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(totalReads, header, "foo", 0, 1, 100));
return reads;
}
}
@DataProvider(name = "ReservoirDownsamplerTestDataProvider")
public Object[][] createReservoirDownsamplerTestData() {
for ( int reservoirSize = 1; reservoirSize <= 10000; reservoirSize *= 10 ) {
new ReservoirDownsamplerTest(reservoirSize, 0);
for ( int totalReads = 1; totalReads <= 10000; totalReads *= 10 ) {
new ReservoirDownsamplerTest(reservoirSize, totalReads);
}
}
return ReservoirDownsamplerTest.getTests(ReservoirDownsamplerTest.class);
}
@Test(dataProvider = "ReservoirDownsamplerTestDataProvider")
public void testReservoirDownsampler( ReservoirDownsamplerTest test ) {
logger.warn("Running test: " + test);
GenomeAnalysisEngine.resetRandomGenerator();
ReadsDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(test.reservoirSize);
downsampler.submit(test.createReads());
if ( test.totalReads > 0 ) {
Assert.assertTrue(downsampler.hasFinalizedItems());
Assert.assertTrue(downsampler.peekFinalized() != null);
Assert.assertFalse(downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekPending() == null);
}
else {
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
}
downsampler.signalEndOfInput();
if ( test.totalReads > 0 ) {
Assert.assertTrue(downsampler.hasFinalizedItems());
Assert.assertTrue(downsampler.peekFinalized() != null);
Assert.assertFalse(downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekPending() == null);
}
else {
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
}
List<SAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
Assert.assertEquals(downsampledReads.size(), test.expectedNumReadsAfterDownsampling);
Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), test.expectedNumDiscardedItems);
Assert.assertEquals(test.totalReads - downsampledReads.size(), test.expectedNumDiscardedItems);
downsampler.reset();
Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0);
}
}

View File

@ -1,330 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.testng.Assert;
import java.util.*;
public class SimplePositionalDownsamplerUnitTest extends BaseTest {
private static class SimplePositionalDownsamplerTest extends TestDataProvider {
int targetCoverage;
int numStacks;
List<Integer> stackSizes;
List<Integer> expectedStackSizes;
boolean multipleContigs;
int totalInitialReads;
public SimplePositionalDownsamplerTest( int targetCoverage, List<Integer> stackSizes, boolean multipleContigs ) {
super(SimplePositionalDownsamplerTest.class);
this.targetCoverage = targetCoverage;
this.numStacks = stackSizes.size();
this.stackSizes = stackSizes;
this.multipleContigs = multipleContigs;
calculateExpectedDownsampledStackSizes();
totalInitialReads = 0;
for ( Integer stackSize : stackSizes ) {
totalInitialReads += stackSize;
}
setName(String.format("%s: targetCoverage=%d numStacks=%d stackSizes=%s expectedSizes=%s multipleContigs=%b",
getClass().getSimpleName(), targetCoverage, numStacks, stackSizes, expectedStackSizes, multipleContigs));
}
public Collection<SAMRecord> createReads() {
Collection<SAMRecord> reads = new ArrayList<SAMRecord>();
SAMFileHeader header = multipleContigs ?
ArtificialSAMUtils.createArtificialSamHeader(2, 1, 1000000) :
ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
int refIndex = 0;
int alignmentStart = 1;
int readLength = 100;
for ( int i = 0; i < numStacks; i++ ) {
if ( multipleContigs && refIndex == 0 && i >= numStacks / 2 ) {
refIndex++;
}
reads.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(stackSizes.get(i), header, "foo",
refIndex, alignmentStart, readLength));
alignmentStart += 10;
}
return reads;
}
private void calculateExpectedDownsampledStackSizes() {
expectedStackSizes = new ArrayList<Integer>(numStacks);
for ( Integer stackSize : stackSizes ) {
int expectedSize = targetCoverage >= stackSize ? stackSize : targetCoverage;
expectedStackSizes.add(expectedSize);
}
}
}
@DataProvider(name = "SimplePositionalDownsamplerTestDataProvider")
public Object[][] createSimplePositionalDownsamplerTestData() {
GenomeAnalysisEngine.resetRandomGenerator();
for ( int targetCoverage = 1; targetCoverage <= 10000; targetCoverage *= 10 ) {
for ( int contigs = 1; contigs <= 2; contigs++ ) {
for ( int numStacks = 0; numStacks <= 10; numStacks++ ) {
List<Integer> stackSizes = new ArrayList<Integer>(numStacks);
for ( int stack = 1; stack <= numStacks; stack++ ) {
stackSizes.add(GenomeAnalysisEngine.getRandomGenerator().nextInt(targetCoverage * 2) + 1);
}
new SimplePositionalDownsamplerTest(targetCoverage, stackSizes, contigs > 1);
}
}
}
return SimplePositionalDownsamplerTest.getTests(SimplePositionalDownsamplerTest.class);
}
@Test( dataProvider = "SimplePositionalDownsamplerTestDataProvider" )
public void testSimplePostionalDownsampler( SimplePositionalDownsamplerTest test ) {
logger.warn("Running test: " + test);
GenomeAnalysisEngine.resetRandomGenerator();
ReadsDownsampler<SAMRecord> downsampler = new SimplePositionalDownsampler<SAMRecord>(test.targetCoverage);
downsampler.submit(test.createReads());
if ( test.numStacks > 1 ) {
Assert.assertTrue(downsampler.hasFinalizedItems());
Assert.assertTrue(downsampler.peekFinalized() != null);
Assert.assertTrue(downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekPending() != null);
}
else if ( test.numStacks == 1 ) {
Assert.assertFalse(downsampler.hasFinalizedItems());
Assert.assertTrue(downsampler.peekFinalized() == null);
Assert.assertTrue(downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekPending() != null);
}
else {
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
}
downsampler.signalEndOfInput();
if ( test.numStacks > 0 ) {
Assert.assertTrue(downsampler.hasFinalizedItems());
Assert.assertTrue(downsampler.peekFinalized() != null);
Assert.assertFalse(downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekPending() == null);
}
else {
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
}
List<SAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
if ( test.numStacks == 0 ) {
Assert.assertTrue(downsampledReads.isEmpty());
}
else {
List<Integer> downsampledStackSizes = getDownsampledStackSizesAndVerifySortedness(downsampledReads);
Assert.assertEquals(downsampledStackSizes.size(), test.numStacks);
Assert.assertEquals(downsampledStackSizes, test.expectedStackSizes);
int numReadsActuallyEliminated = test.totalInitialReads - downsampledReads.size();
int numReadsReportedEliminated = downsampler.getNumberOfDiscardedItems();
Assert.assertEquals(numReadsActuallyEliminated, numReadsReportedEliminated);
}
downsampler.reset();
Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0);
}
private List<Integer> getDownsampledStackSizesAndVerifySortedness( List<SAMRecord> downsampledReads ) {
List<Integer> stackSizes = new ArrayList<Integer>();
if ( downsampledReads.isEmpty() ) {
return stackSizes;
}
Iterator<SAMRecord> iter = downsampledReads.iterator();
Assert.assertTrue(iter.hasNext());
SAMRecord previousRead = iter.next();
int currentStackSize = 1;
while ( iter.hasNext() ) {
SAMRecord currentRead = iter.next();
if ( currentRead.getReferenceIndex() > previousRead.getReferenceIndex() || currentRead.getAlignmentStart() > previousRead.getAlignmentStart() ) {
stackSizes.add(currentStackSize);
currentStackSize = 1;
}
else if ( currentRead.getReferenceIndex() < previousRead.getReferenceIndex() || currentRead.getAlignmentStart() < previousRead.getAlignmentStart() ) {
Assert.fail(String.format("Reads are out of order: %s %s", previousRead, currentRead));
}
else {
currentStackSize++;
}
previousRead = currentRead;
}
stackSizes.add(currentStackSize);
return stackSizes;
}
@Test
public void testSimplePositionalDownsamplerSignalNoMoreReadsBefore() {
ReadsDownsampler<SAMRecord> downsampler = new SimplePositionalDownsampler<SAMRecord>(1000);
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
Collection<SAMRecord> readStack = new ArrayList<SAMRecord>();
readStack.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(50, header, "foo", 0, 1, 100));
downsampler.submit(readStack);
Assert.assertFalse(downsampler.hasFinalizedItems());
Assert.assertTrue(downsampler.peekFinalized() == null);
Assert.assertTrue(downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekPending() != null);
SAMRecord laterRead = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 2, 100);
downsampler.signalNoMoreReadsBefore(laterRead);
Assert.assertTrue(downsampler.hasFinalizedItems());
Assert.assertTrue(downsampler.peekFinalized() != null);
Assert.assertFalse(downsampler.hasPendingItems());
Assert.assertTrue(downsampler.peekPending() == null);
List<SAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
Assert.assertEquals(downsampledReads.size(), readStack.size());
}
@Test
public void testBasicUnmappedReadsSupport() {
ReadsDownsampler<SAMRecord> downsampler = new SimplePositionalDownsampler<SAMRecord>(100);
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
Collection<SAMRecord> readStack = new ArrayList<SAMRecord>();
readStack.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(200, header, "foo", SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX,
SAMRecord.NO_ALIGNMENT_START, 100));
for ( SAMRecord read : readStack ) {
Assert.assertTrue(read.getReadUnmappedFlag());
}
downsampler.submit(readStack);
downsampler.signalEndOfInput();
List<SAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
// Unmapped reads should not get downsampled at all by the SimplePositionalDownsampler
Assert.assertEquals(downsampledReads.size(), readStack.size());
for ( SAMRecord read: downsampledReads ) {
Assert.assertTrue(read.getReadUnmappedFlag());
}
}
@Test
public void testMixedMappedAndUnmappedReadsSupport() {
ReadsDownsampler<SAMRecord> downsampler = new SimplePositionalDownsampler<SAMRecord>(100);
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
Collection<SAMRecord> mappedReadStack = new ArrayList<SAMRecord>();
mappedReadStack.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(200, header, "foo", 0, 1, 100));
for ( SAMRecord read : mappedReadStack ) {
Assert.assertFalse(read.getReadUnmappedFlag());
}
Collection<SAMRecord> unmappedReadStack = new ArrayList<SAMRecord>();
unmappedReadStack.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(200, header, "foo", SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX,
SAMRecord.NO_ALIGNMENT_START, 100));
for ( SAMRecord read : unmappedReadStack ) {
Assert.assertTrue(read.getReadUnmappedFlag());
}
downsampler.submit(mappedReadStack);
downsampler.submit(unmappedReadStack);
downsampler.signalEndOfInput();
List<SAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
// Unmapped reads should not get downsampled at all by the SimplePositionalDownsampler
Assert.assertEquals(downsampledReads.size(), 300);
Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 100);
int count = 1;
for ( SAMRecord read: downsampledReads ) {
if ( count <= 100 ) {
Assert.assertFalse(read.getReadUnmappedFlag());
}
else {
Assert.assertTrue(read.getReadUnmappedFlag());
}
count++;
}
}
@Test
public void testGATKSAMRecordSupport() {
ReadsDownsampler<GATKSAMRecord> downsampler = new SimplePositionalDownsampler<GATKSAMRecord>(1000);
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
for ( int i = 0; i < 10; i++ ) {
reads.add(ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 10, 20 * i + 10));
}
downsampler.submit(reads);
downsampler.signalEndOfInput();
List<GATKSAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
Assert.assertEquals(downsampledReads.size(), 10);
}
}

View File

@ -1,546 +0,0 @@
package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
/**
* testing of the experimental version of LocusIteratorByState
*/
public class LocusIteratorByStateExperimentalUnitTest extends BaseTest {
private static SAMFileHeader header;
private LocusIteratorByStateExperimental li;
private GenomeLocParser genomeLocParser;
@BeforeClass
public void beforeClass() {
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
}
private final LocusIteratorByStateExperimental makeLTBS(List<SAMRecord> reads, ReadProperties readAttributes) {
return new LocusIteratorByStateExperimental(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LocusIteratorByStateExperimental.sampleListForSAMWithoutReadGroups());
}
private static ReadProperties createTestReadProperties() {
return createTestReadProperties(null);
}
private static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod ) {
return new ReadProperties(
Collections.<SAMReaderID>emptyList(),
new SAMFileHeader(),
false,
SAMFileReader.ValidationStringency.STRICT,
downsamplingMethod,
new ValidationExclusion(),
Collections.<ReadFilter>emptyList(),
Collections.<ReadTransformer>emptyList(),
false,
(byte) -1
);
}
private static class FakeCloseableIterator<T> implements CloseableIterator<T> {
Iterator<T> iterator;
public FakeCloseableIterator(Iterator<T> it) {
iterator = it;
}
@Override
public void close() {
return;
}
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public T next() {
return iterator.next();
}
@Override
public void remove() {
throw new UnsupportedOperationException("Don't remove!");
}
}
@Test
public void testXandEQOperators() {
final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
final byte[] bases2 = new byte[] {'A','A','A','C','A','A','A','A','A','C'};
// create a test version of the Reads object
ReadProperties readAttributes = createTestReadProperties();
SAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header,"r1",0,1,10);
r1.setReadBases(bases1);
r1.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
r1.setCigarString("10M");
SAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header,"r2",0,1,10);
r2.setReadBases(bases2);
r2.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
r2.setCigarString("3=1X5=1X");
SAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header,"r3",0,1,10);
r3.setReadBases(bases2);
r3.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
r3.setCigarString("3=1X5M1X");
SAMRecord r4 = ArtificialSAMUtils.createArtificialRead(header,"r4",0,1,10);
r4.setReadBases(bases2);
r4.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
r4.setCigarString("10M");
List<SAMRecord> reads = Arrays.asList(r1, r2, r3, r4);
// create the iterator by state with the fake reads and fake records
li = makeLTBS(reads,readAttributes);
while (li.hasNext()) {
AlignmentContext context = li.next();
ReadBackedPileup pileup = context.getBasePileup();
Assert.assertEquals(pileup.depthOfCoverage(), 4);
}
}
@Test
public void testIndelsInRegularPileup() {
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
final byte[] indelBases = new byte[] {'A','A','A','A','C','T','A','A','A','A','A','A'};
// create a test version of the Reads object
ReadProperties readAttributes = createTestReadProperties();
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
before.setReadBases(bases);
before.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
before.setCigarString("10M");
SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,2,10);
during.setReadBases(indelBases);
during.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
during.setCigarString("4M2I6M");
SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,3,10);
after.setReadBases(bases);
after.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
after.setCigarString("10M");
List<SAMRecord> reads = Arrays.asList(before, during, after);
// create the iterator by state with the fake reads and fake records
li = makeLTBS(reads,readAttributes);
boolean foundIndel = false;
while (li.hasNext()) {
AlignmentContext context = li.next();
ReadBackedPileup pileup = context.getBasePileup().getBaseFilteredPileup(10);
for (PileupElement p : pileup) {
if (p.isBeforeInsertion()) {
foundIndel = true;
Assert.assertEquals(p.getEventLength(), 2, "Wrong event length");
Assert.assertEquals(p.getEventBases(), "CT", "Inserted bases are incorrect");
break;
}
}
}
Assert.assertTrue(foundIndel,"Indel in pileup not found");
}
@Test
public void testWholeIndelReadInIsolation() {
final int firstLocus = 44367789;
// create a test version of the Reads object
ReadProperties readAttributes = createTestReadProperties();
SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header, "indelOnly", 0, firstLocus, 76);
indelOnlyRead.setReadBases(Utils.dupBytes((byte)'A',76));
indelOnlyRead.setBaseQualities(Utils.dupBytes((byte) '@', 76));
indelOnlyRead.setCigarString("76I");
List<SAMRecord> reads = Arrays.asList(indelOnlyRead);
// create the iterator by state with the fake reads and fake records
li = makeLTBS(reads, readAttributes);
// Traditionally, reads that end with indels bleed into the pileup at the following locus. Verify that the next pileup contains this read
// and considers it to be an indel-containing read.
Assert.assertTrue(li.hasNext(),"Should have found a whole-indel read in the normal base pileup without extended events enabled");
AlignmentContext alignmentContext = li.next();
Assert.assertEquals(alignmentContext.getLocation().getStart(), firstLocus, "Base pileup is at incorrect location.");
ReadBackedPileup basePileup = alignmentContext.getBasePileup();
Assert.assertEquals(basePileup.getReads().size(),1,"Pileup is of incorrect size");
Assert.assertSame(basePileup.getReads().get(0), indelOnlyRead, "Read in pileup is incorrect");
}
/**
* Test to make sure that reads supporting only an indel (example cigar string: 76I) do
* not negatively influence the ordering of the pileup.
*/
@Test
public void testWholeIndelRead() {
final int firstLocus = 44367788, secondLocus = firstLocus + 1;
SAMRecord leadingRead = ArtificialSAMUtils.createArtificialRead(header,"leading",0,firstLocus,76);
leadingRead.setReadBases(Utils.dupBytes((byte)'A',76));
leadingRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
leadingRead.setCigarString("1M75I");
SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header,"indelOnly",0,secondLocus,76);
indelOnlyRead.setReadBases(Utils.dupBytes((byte) 'A', 76));
indelOnlyRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
indelOnlyRead.setCigarString("76I");
SAMRecord fullMatchAfterIndel = ArtificialSAMUtils.createArtificialRead(header,"fullMatch",0,secondLocus,76);
fullMatchAfterIndel.setReadBases(Utils.dupBytes((byte)'A',76));
fullMatchAfterIndel.setBaseQualities(Utils.dupBytes((byte)'@',76));
fullMatchAfterIndel.setCigarString("75I1M");
List<SAMRecord> reads = Arrays.asList(leadingRead, indelOnlyRead, fullMatchAfterIndel);
// create the iterator by state with the fake reads and fake records
li = makeLTBS(reads, createTestReadProperties());
int currentLocus = firstLocus;
int numAlignmentContextsFound = 0;
while(li.hasNext()) {
AlignmentContext alignmentContext = li.next();
Assert.assertEquals(alignmentContext.getLocation().getStart(),currentLocus,"Current locus returned by alignment context is incorrect");
if(currentLocus == firstLocus) {
List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
Assert.assertEquals(readsAtLocus.size(),1,"Wrong number of reads at locus " + currentLocus);
Assert.assertSame(readsAtLocus.get(0),leadingRead,"leadingRead absent from pileup at locus " + currentLocus);
}
else if(currentLocus == secondLocus) {
List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
Assert.assertEquals(readsAtLocus.size(),2,"Wrong number of reads at locus " + currentLocus);
Assert.assertSame(readsAtLocus.get(0),indelOnlyRead,"indelOnlyRead absent from pileup at locus " + currentLocus);
Assert.assertSame(readsAtLocus.get(1),fullMatchAfterIndel,"fullMatchAfterIndel absent from pileup at locus " + currentLocus);
}
currentLocus++;
numAlignmentContextsFound++;
}
Assert.assertEquals(numAlignmentContextsFound, 2, "Found incorrect number of alignment contexts");
}
/**
* Test to make sure that reads supporting only an indel (example cigar string: 76I) are represented properly
*/
@Test
public void testWholeIndelReadRepresentedTest() {
final int firstLocus = 44367788, secondLocus = firstLocus + 1;
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,secondLocus,1);
read1.setReadBases(Utils.dupBytes((byte) 'A', 1));
read1.setBaseQualities(Utils.dupBytes((byte) '@', 1));
read1.setCigarString("1I");
List<SAMRecord> reads = Arrays.asList(read1);
// create the iterator by state with the fake reads and fake records
li = makeLTBS(reads, createTestReadProperties());
while(li.hasNext()) {
AlignmentContext alignmentContext = li.next();
ReadBackedPileup p = alignmentContext.getBasePileup();
Assert.assertTrue(p.getNumberOfElements() == 1);
PileupElement pe = p.iterator().next();
Assert.assertTrue(pe.isBeforeInsertion());
Assert.assertFalse(pe.isAfterInsertion());
Assert.assertEquals(pe.getEventBases(), "A");
}
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,secondLocus,10);
read2.setReadBases(Utils.dupBytes((byte) 'A', 10));
read2.setBaseQualities(Utils.dupBytes((byte) '@', 10));
read2.setCigarString("10I");
reads = Arrays.asList(read2);
// create the iterator by state with the fake reads and fake records
li = makeLTBS(reads, createTestReadProperties());
while(li.hasNext()) {
AlignmentContext alignmentContext = li.next();
ReadBackedPileup p = alignmentContext.getBasePileup();
Assert.assertTrue(p.getNumberOfElements() == 1);
PileupElement pe = p.iterator().next();
Assert.assertTrue(pe.isBeforeInsertion());
Assert.assertFalse(pe.isAfterInsertion());
Assert.assertEquals(pe.getEventBases(), "AAAAAAAAAA");
}
}
////////////////////////////////////////////
// comprehensive LIBS/PileupElement tests //
////////////////////////////////////////////
private static final int IS_BEFORE_DELETED_BASE_FLAG = 1;
private static final int IS_BEFORE_DELETION_START_FLAG = 2;
private static final int IS_AFTER_DELETED_BASE_FLAG = 4;
private static final int IS_AFTER_DELETION_END_FLAG = 8;
private static final int IS_BEFORE_INSERTION_FLAG = 16;
private static final int IS_AFTER_INSERTION_FLAG = 32;
private static final int IS_NEXT_TO_SOFTCLIP_FLAG = 64;
private static class LIBSTest {
final String cigar;
final int readLength;
final List<Integer> offsets;
final List<Integer> flags;
private LIBSTest(final String cigar, final int readLength, final List<Integer> offsets, final List<Integer> flags) {
this.cigar = cigar;
this.readLength = readLength;
this.offsets = offsets;
this.flags = flags;
}
}
@DataProvider(name = "LIBSTest")
public Object[][] createLIBSTestData() {
return new Object[][]{
{new LIBSTest("1I", 1, Arrays.asList(0), Arrays.asList(IS_BEFORE_INSERTION_FLAG))},
{new LIBSTest("10I", 10, Arrays.asList(0), Arrays.asList(IS_BEFORE_INSERTION_FLAG))},
{new LIBSTest("2M2I2M", 6, Arrays.asList(0,1,4,5), Arrays.asList(0,IS_BEFORE_INSERTION_FLAG,IS_AFTER_INSERTION_FLAG,0))},
{new LIBSTest("2M2I", 4, Arrays.asList(0,1), Arrays.asList(0,IS_BEFORE_INSERTION_FLAG))},
//TODO -- uncomment these when LIBS is fixed
//{new LIBSTest("2I2M", 4, Arrays.asList(2,3), Arrays.asList(IS_AFTER_INSERTION_FLAG,0))},
//{new LIBSTest("1I1M1D1M", 3, Arrays.asList(0,1), Arrays.asList(IS_AFTER_INSERTION_FLAG | IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG))},
//{new LIBSTest("1S1I1M", 3, Arrays.asList(2), Arrays.asList(IS_AFTER_INSERTION_FLAG))},
{new LIBSTest("1M2D2M", 3, Arrays.asList(0,1,2), Arrays.asList(IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG,0))},
{new LIBSTest("1S1M", 2, Arrays.asList(1), Arrays.asList(IS_NEXT_TO_SOFTCLIP_FLAG))},
{new LIBSTest("1M1S", 2, Arrays.asList(0), Arrays.asList(IS_NEXT_TO_SOFTCLIP_FLAG))},
{new LIBSTest("1S1M1I", 3, Arrays.asList(1), Arrays.asList(IS_BEFORE_INSERTION_FLAG | IS_NEXT_TO_SOFTCLIP_FLAG))}
};
}
@Test(dataProvider = "LIBSTest")
public void testLIBS(LIBSTest params) {
final int locus = 44367788;
SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, params.readLength);
read.setReadBases(Utils.dupBytes((byte) 'A', params.readLength));
read.setBaseQualities(Utils.dupBytes((byte) '@', params.readLength));
read.setCigarString(params.cigar);
// create the iterator by state with the fake reads and fake records
li = makeLTBS(Arrays.asList(read), createTestReadProperties());
int offset = 0;
while ( li.hasNext() ) {
AlignmentContext alignmentContext = li.next();
ReadBackedPileup p = alignmentContext.getBasePileup();
Assert.assertTrue(p.getNumberOfElements() == 1);
PileupElement pe = p.iterator().next();
final int flag = params.flags.get(offset);
Assert.assertEquals(pe.isBeforeDeletedBase(), (flag & IS_BEFORE_DELETED_BASE_FLAG) != 0);
Assert.assertEquals(pe.isBeforeDeletionStart(), (flag & IS_BEFORE_DELETION_START_FLAG) != 0);
Assert.assertEquals(pe.isAfterDeletedBase(), (flag & IS_AFTER_DELETED_BASE_FLAG) != 0);
Assert.assertEquals(pe.isAfterDeletionEnd(), (flag & IS_AFTER_DELETION_END_FLAG) != 0);
Assert.assertEquals(pe.isBeforeInsertion(), (flag & IS_BEFORE_INSERTION_FLAG) != 0);
Assert.assertEquals(pe.isAfterInsertion(), (flag & IS_AFTER_INSERTION_FLAG) != 0);
Assert.assertEquals(pe.isNextToSoftClip(), (flag & IS_NEXT_TO_SOFTCLIP_FLAG) != 0);
Assert.assertEquals(pe.getOffset(), params.offsets.get(offset).intValue());
offset++;
}
}
////////////////////////////////////////////////
// End comprehensive LIBS/PileupElement tests //
////////////////////////////////////////////////
///////////////////////////////////////
// Read State Manager Tests //
///////////////////////////////////////
private class PerSampleReadStateManagerTest extends TestDataProvider {
private List<Integer> readCountsPerAlignmentStart;
private List<SAMRecord> reads;
private List<ArrayList<LocusIteratorByStateExperimental.SAMRecordState>> recordStatesByAlignmentStart;
private int removalInterval;
public PerSampleReadStateManagerTest( List<Integer> readCountsPerAlignmentStart, int removalInterval ) {
super(PerSampleReadStateManagerTest.class);
this.readCountsPerAlignmentStart = readCountsPerAlignmentStart;
this.removalInterval = removalInterval;
reads = new ArrayList<SAMRecord>();
recordStatesByAlignmentStart = new ArrayList<ArrayList<LocusIteratorByStateExperimental.SAMRecordState>>();
setName(String.format("%s: readCountsPerAlignmentStart: %s removalInterval: %d",
getClass().getSimpleName(), readCountsPerAlignmentStart, removalInterval));
}
public void run() {
LocusIteratorByStateExperimental libs = makeLTBS(new ArrayList<SAMRecord>(), createTestReadProperties());
LocusIteratorByStateExperimental.ReadStateManager readStateManager =
libs.new ReadStateManager(new ArrayList<SAMRecord>().iterator());
LocusIteratorByStateExperimental.ReadStateManager.PerSampleReadStateManager perSampleReadStateManager =
readStateManager.new PerSampleReadStateManager();
makeReads();
for ( ArrayList<LocusIteratorByStateExperimental.SAMRecordState> stackRecordStates : recordStatesByAlignmentStart ) {
perSampleReadStateManager.addStatesAtNextAlignmentStart(stackRecordStates);
}
// read state manager should have the right number of reads
Assert.assertEquals(reads.size(), perSampleReadStateManager.size());
Iterator<SAMRecord> originalReadsIterator = reads.iterator();
Iterator<LocusIteratorByStateExperimental.SAMRecordState> recordStateIterator = perSampleReadStateManager.iterator();
int recordStateCount = 0;
int numReadStatesRemoved = 0;
// Do a first-pass validation of the record state iteration by making sure we get back everything we
// put in, in the same order, doing any requested removals of read states along the way
while ( recordStateIterator.hasNext() ) {
LocusIteratorByStateExperimental.SAMRecordState readState = recordStateIterator.next();
recordStateCount++;
SAMRecord readFromPerSampleReadStateManager = readState.getRead();
Assert.assertTrue(originalReadsIterator.hasNext());
SAMRecord originalRead = originalReadsIterator.next();
// The read we get back should be literally the same read in memory as we put in
Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
// If requested, remove a read state every removalInterval states
if ( removalInterval > 0 && recordStateCount % removalInterval == 0 ) {
recordStateIterator.remove();
numReadStatesRemoved++;
}
}
Assert.assertFalse(originalReadsIterator.hasNext());
// If we removed any read states, do a second pass through the read states to make sure the right
// states were removed
if ( numReadStatesRemoved > 0 ) {
Assert.assertEquals(perSampleReadStateManager.size(), reads.size() - numReadStatesRemoved);
originalReadsIterator = reads.iterator();
recordStateIterator = perSampleReadStateManager.iterator();
int readCount = 0;
int readStateCount = 0;
// Match record states with the reads that should remain after removal
while ( recordStateIterator.hasNext() ) {
LocusIteratorByStateExperimental.SAMRecordState readState = recordStateIterator.next();
readStateCount++;
SAMRecord readFromPerSampleReadStateManager = readState.getRead();
Assert.assertTrue(originalReadsIterator.hasNext());
SAMRecord originalRead = originalReadsIterator.next();
readCount++;
if ( readCount % removalInterval == 0 ) {
originalRead = originalReadsIterator.next(); // advance to next read, since the previous one should have been discarded
readCount++;
}
// The read we get back should be literally the same read in memory as we put in (after accounting for removals)
Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
}
Assert.assertEquals(readStateCount, reads.size() - numReadStatesRemoved);
}
// Allow memory used by this test to be reclaimed
readCountsPerAlignmentStart = null;
reads = null;
recordStatesByAlignmentStart = null;
}
private void makeReads() {
int alignmentStart = 1;
for ( int readsThisStack : readCountsPerAlignmentStart ) {
ArrayList<SAMRecord> stackReads = new ArrayList<SAMRecord>(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(readsThisStack, header, "foo", 0, alignmentStart, MathUtils.randomIntegerInRange(50, 100)));
ArrayList<LocusIteratorByStateExperimental.SAMRecordState> stackRecordStates = new ArrayList<LocusIteratorByStateExperimental.SAMRecordState>();
for ( SAMRecord read : stackReads ) {
stackRecordStates.add(new LocusIteratorByStateExperimental.SAMRecordState(read));
}
reads.addAll(stackReads);
recordStatesByAlignmentStart.add(stackRecordStates);
}
}
}
@DataProvider(name = "PerSampleReadStateManagerTestDataProvider")
public Object[][] createPerSampleReadStateManagerTests() {
for ( List<Integer> thisTestReadStateCounts : Arrays.asList( Arrays.asList(1),
Arrays.asList(2),
Arrays.asList(10),
Arrays.asList(1, 1),
Arrays.asList(2, 2),
Arrays.asList(10, 10),
Arrays.asList(1, 10),
Arrays.asList(10, 1),
Arrays.asList(1, 1, 1),
Arrays.asList(2, 2, 2),
Arrays.asList(10, 10, 10),
Arrays.asList(1, 1, 1, 1, 1, 1),
Arrays.asList(10, 10, 10, 10, 10, 10),
Arrays.asList(1, 2, 10, 1, 2, 10)
) ) {
for ( int removalInterval : Arrays.asList(0, 2, 3) ) {
new PerSampleReadStateManagerTest(thisTestReadStateCounts, removalInterval);
}
}
return PerSampleReadStateManagerTest.getTests(PerSampleReadStateManagerTest.class);
}
@Test(dataProvider = "PerSampleReadStateManagerTestDataProvider")
public void runPerSampleReadStateManagerTest( PerSampleReadStateManagerTest test ) {
logger.warn("Running test: " + test);
test.run();
}
}

View File

@ -1,166 +0,0 @@
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.Test;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
import java.util.*;
/**
* Basic tests to prove the integrity of the reservoir downsampler.
* At the moment, always run tests on SAM records as that's the task
* for which the downsampler was conceived.
*
* @author mhanna
* @version 0.1
*/
public class LegacyReservoirDownsamplerUnitTest {
private static final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1,1,200);
@Test
public void testEmptyIterator() {
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
Assert.assertTrue(downsampler.isEmpty(),"Downsampler is not empty but should be.");
}
@Test
public void testOneElementWithPoolSizeOne() {
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
Collection<SAMRecord> batchedReads = downsampler.getDownsampledContents();
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read");
}
@Test
public void testOneElementWithPoolSizeGreaterThanOne() {
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
Collection<SAMRecord> batchedReads = downsampler.getDownsampledContents();
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read");
}
@Test
public void testPoolFilledPartially() {
List<SAMRecord> reads = new ArrayList<SAMRecord>();
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
Assert.assertEquals(batchedReads.size(), 3, "Downsampler is returning the wrong number of reads");
Assert.assertSame(batchedReads.get(0), reads.get(0), "Downsampler read 1 is incorrect");
Assert.assertSame(batchedReads.get(1), reads.get(1), "Downsampler read 2 is incorrect");
Assert.assertSame(batchedReads.get(2), reads.get(2), "Downsampler read 3 is incorrect");
}
@Test
public void testPoolFilledExactly() {
List<SAMRecord> reads = new ArrayList<SAMRecord>();
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
Assert.assertEquals(batchedReads.size(), 5, "Downsampler is returning the wrong number of reads");
Assert.assertSame(batchedReads.iterator().next(), reads.get(0), "Downsampler is returning an incorrect read");
Assert.assertSame(batchedReads.get(0), reads.get(0), "Downsampler read 1 is incorrect");
Assert.assertSame(batchedReads.get(1), reads.get(1), "Downsampler read 2 is incorrect");
Assert.assertSame(batchedReads.get(2), reads.get(2), "Downsampler read 3 is incorrect");
Assert.assertSame(batchedReads.get(3), reads.get(3), "Downsampler read 4 is incorrect");
Assert.assertSame(batchedReads.get(4), reads.get(4), "Downsampler read 5 is incorrect");
}
@Test
public void testLargerPileWithZeroElementPool() {
List<SAMRecord> reads = new ArrayList<SAMRecord>();
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(0);
downsampler.addAll(reads);
Assert.assertTrue(downsampler.isEmpty(),"Downsampler isn't empty but should be");
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
Assert.assertEquals(batchedReads.size(), 0, "Downsampler is returning the wrong number of reads");
}
@Test
public void testLargerPileWithSingleElementPool() {
List<SAMRecord> reads = new ArrayList<SAMRecord>();
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
Assert.assertTrue(reads.contains(batchedReads.get(0)),"Downsampler is returning a bad read.");
}
@Test
public void testFillingAcrossLoci() {
List<SAMRecord> reads = new ArrayList<SAMRecord>();
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
List<SAMRecord> batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
Assert.assertEquals(batchedReads.size(), 1, "Downsampler is returning the wrong number of reads");
Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read.");
reads.clear();
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,2,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,2,76));
downsampler.clear();
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
Assert.assertEquals(batchedReads.size(), 2, "Downsampler is returning the wrong number of reads");
Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read.");
Assert.assertEquals(batchedReads.get(1), reads.get(1), "Downsampler is returning an incorrect read.");
reads.clear();
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,3,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,3,76));
downsampler.clear();
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
batchedReads = new ArrayList<SAMRecord>(downsampler.getDownsampledContents());
Assert.assertEquals(batchedReads.size(), 2, "Downsampler is returning the wrong number of reads");
Assert.assertEquals(batchedReads.get(0), reads.get(0), "Downsampler is returning an incorrect read.");
Assert.assertEquals(batchedReads.get(1), reads.get(1), "Downsampler is returning an incorrect read.");
}
}

View File

@ -1,71 +0,0 @@
package org.broadinstitute.sting.utils.nanoScheduler;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingDeque;
/**
* UnitTests for the InputProducer
*
* User: depristo
* Date: 8/24/12
* Time: 11:25 AM
* To change this template use File | Settings | File Templates.
*/
public class InputProducerUnitTest extends BaseTest {
@DataProvider(name = "InputProducerTest")
public Object[][] createInputProducerTest() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( final int nElements : Arrays.asList(0, 1, 10, 100, 1000, 10000, 100000) ) {
for ( final int queueSize : Arrays.asList(1, 10, 100) ) {
tests.add(new Object[]{ nElements, queueSize });
}
}
return tests.toArray(new Object[][]{});
}
@Test(enabled = true, dataProvider = "InputProducerTest", timeOut = NanoSchedulerUnitTest.NANO_SCHEDULE_MAX_RUNTIME)
public void testInputProducer(final int nElements, final int queueSize) throws InterruptedException {
final List<Integer> elements = new ArrayList<Integer>(nElements);
for ( int i = 0; i < nElements; i++ ) elements.add(i);
final LinkedBlockingDeque<InputProducer<Integer>.InputValue> readQueue =
new LinkedBlockingDeque<InputProducer<Integer>.InputValue>(queueSize);
final InputProducer<Integer> ip = new InputProducer<Integer>(elements.iterator(), null, readQueue);
final ExecutorService es = Executors.newSingleThreadExecutor();
es.submit(ip);
int lastValue = -1;
int nRead = 0;
while ( true ) {
final int observedQueueSize = readQueue.size();
Assert.assertTrue(observedQueueSize <= queueSize,
"Reader is enqueuing more elements " + observedQueueSize + " than allowed " + queueSize);
final InputProducer<Integer>.InputValue value = readQueue.take();
if ( value.isLast() ) {
Assert.assertEquals(nRead, nElements, "Number of input values " + nRead + " not all that are expected " + nElements);
Assert.assertEquals(readQueue.size(), 0, "Last queue element found but queue contains more values!");
break;
} else {
Assert.assertTrue(lastValue < value.getValue(), "Read values coming out of order!");
final int expected = lastValue + 1;
Assert.assertEquals((int)value.getValue(), expected, "Value observed " + value.getValue() + " not equal to the expected value " + expected);
nRead++;
lastValue = value.getValue();
}
}
}
}

View File

@ -1,182 +0,0 @@
package org.broadinstitute.sting.utils.nanoScheduler;
import org.apache.log4j.BasicConfigurator;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
/**
* UnitTests for the NanoScheduler
*
* User: depristo
* Date: 8/24/12
* Time: 11:25 AM
* To change this template use File | Settings | File Templates.
*/
public class NanoSchedulerUnitTest extends BaseTest {
public static final int NANO_SCHEDULE_MAX_RUNTIME = 60000;
private static class Map2x implements NSMapFunction<Integer, Integer> {
@Override public Integer apply(Integer input) { return input * 2; }
}
private static class ReduceSum implements NSReduceFunction<Integer, Integer> {
int prevOne = Integer.MIN_VALUE;
@Override public Integer apply(Integer one, Integer sum) {
Assert.assertTrue(prevOne < one, "Reduce came in out of order. Prev " + prevOne + " cur " + one);
return one + sum;
}
}
private static class ProgressCallback implements NSProgressFunction<Integer> {
int callBacks = 0;
@Override
public void progress(Integer lastMapInput) {
callBacks++;
}
}
private static int sum2x(final int start, final int end) {
int sum = 0;
for ( int i = start; i < end; i++ )
sum += 2 * i;
return sum;
}
private static class NanoSchedulerBasicTest extends TestDataProvider {
final int bufferSize, nThreads, start, end, expectedResult;
public NanoSchedulerBasicTest(final int bufferSize, final int nThreads, final int start, final int end) {
super(NanoSchedulerBasicTest.class);
this.bufferSize = bufferSize;
this.nThreads = nThreads;
this.start = start;
this.end = end;
this.expectedResult = sum2x(start, end);
setName(String.format("%s nt=%d buf=%d start=%d end=%d sum=%d",
getClass().getSimpleName(), nThreads, bufferSize, start, end, expectedResult));
}
public Iterator<Integer> makeReader() {
final List<Integer> ints = new ArrayList<Integer>();
for ( int i = start; i < end; i++ )
ints.add(i);
return ints.iterator();
}
public int nExpectedCallbacks() {
int nElements = Math.max(end - start, 0);
return nElements / bufferSize;
}
public Map2x makeMap() { return new Map2x(); }
public Integer initReduce() { return 0; }
public ReduceSum makeReduce() { return new ReduceSum(); }
}
static NanoSchedulerBasicTest exampleTest = null;
@DataProvider(name = "NanoSchedulerBasicTest")
public Object[][] createNanoSchedulerBasicTest() {
for ( final int bufferSize : Arrays.asList(1, 10, 1000, 1000000) ) {
for ( final int nt : Arrays.asList(1, 2, 4) ) {
for ( final int start : Arrays.asList(0) ) {
for ( final int end : Arrays.asList(0, 1, 2, 11, 10000, 100000) ) {
exampleTest = new NanoSchedulerBasicTest(bufferSize, nt, start, end);
}
}
}
}
return NanoSchedulerBasicTest.getTests(NanoSchedulerBasicTest.class);
}
@Test(enabled = true, dataProvider = "NanoSchedulerBasicTest", timeOut = NANO_SCHEDULE_MAX_RUNTIME)
public void testSingleThreadedNanoScheduler(final NanoSchedulerBasicTest test) throws InterruptedException {
logger.warn("Running " + test);
if ( test.nThreads == 1 )
testNanoScheduler(test);
}
@Test(enabled = true, dataProvider = "NanoSchedulerBasicTest", timeOut = NANO_SCHEDULE_MAX_RUNTIME, dependsOnMethods = "testSingleThreadedNanoScheduler")
public void testMultiThreadedNanoScheduler(final NanoSchedulerBasicTest test) throws InterruptedException {
logger.warn("Running " + test);
if ( test.nThreads >= 1 )
testNanoScheduler(test);
}
private void testNanoScheduler(final NanoSchedulerBasicTest test) throws InterruptedException {
final NanoScheduler<Integer, Integer, Integer> nanoScheduler =
new NanoScheduler<Integer, Integer, Integer>(test.bufferSize, test.nThreads);
final ProgressCallback callback = new ProgressCallback();
nanoScheduler.setProgressFunction(callback);
Assert.assertEquals(nanoScheduler.getInputBufferSize(), test.bufferSize, "inputBufferSize argument");
Assert.assertEquals(nanoScheduler.getnThreads(), test.nThreads, "nThreads argument");
final Integer sum = nanoScheduler.execute(test.makeReader(), test.makeMap(), test.initReduce(), test.makeReduce());
Assert.assertNotNull(sum);
Assert.assertEquals((int)sum, test.expectedResult, "NanoScheduler sum not the same as calculated directly");
Assert.assertTrue(callback.callBacks >= test.nExpectedCallbacks(), "Not enough callbacks detected. Expected at least " + test.nExpectedCallbacks() + " but saw only " + callback.callBacks);
nanoScheduler.shutdown();
}
@Test(enabled = true, dataProvider = "NanoSchedulerBasicTest", dependsOnMethods = "testMultiThreadedNanoScheduler", timeOut = NANO_SCHEDULE_MAX_RUNTIME)
public void testNanoSchedulerInLoop(final NanoSchedulerBasicTest test) throws InterruptedException {
if ( test.bufferSize > 1) {
logger.warn("Running " + test);
final NanoScheduler<Integer, Integer, Integer> nanoScheduler =
new NanoScheduler<Integer, Integer, Integer>(test.bufferSize, test.nThreads);
// test reusing the scheduler
for ( int i = 0; i < 10; i++ ) {
final Integer sum = nanoScheduler.execute(test.makeReader(), test.makeMap(), test.initReduce(), test.makeReduce());
Assert.assertNotNull(sum);
Assert.assertEquals((int)sum, test.expectedResult, "NanoScheduler sum not the same as calculated directly");
}
nanoScheduler.shutdown();
}
}
@Test(timeOut = NANO_SCHEDULE_MAX_RUNTIME)
public void testShutdown() throws InterruptedException {
final NanoScheduler<Integer, Integer, Integer> nanoScheduler = new NanoScheduler<Integer, Integer, Integer>(1, 2);
Assert.assertFalse(nanoScheduler.isShutdown(), "scheduler should be alive");
nanoScheduler.shutdown();
Assert.assertTrue(nanoScheduler.isShutdown(), "scheduler should be dead");
}
@Test(expectedExceptions = IllegalStateException.class, timeOut = NANO_SCHEDULE_MAX_RUNTIME)
public void testShutdownExecuteFailure() throws InterruptedException {
final NanoScheduler<Integer, Integer, Integer> nanoScheduler = new NanoScheduler<Integer, Integer, Integer>(1, 2);
nanoScheduler.shutdown();
nanoScheduler.execute(exampleTest.makeReader(), exampleTest.makeMap(), exampleTest.initReduce(), exampleTest.makeReduce());
}
public static void main(String [ ] args) {
org.apache.log4j.Logger logger = org.apache.log4j.Logger.getRootLogger();
BasicConfigurator.configure();
logger.setLevel(org.apache.log4j.Level.DEBUG);
final NanoSchedulerBasicTest test = new NanoSchedulerBasicTest(1000, Integer.valueOf(args[0]), 0, Integer.valueOf(args[1]));
final NanoScheduler<Integer, Integer, Integer> nanoScheduler =
new NanoScheduler<Integer, Integer, Integer>(test.bufferSize, test.nThreads);
nanoScheduler.setDebug(true);
final Integer sum = nanoScheduler.execute(test.makeReader(), test.makeMap(), test.initReduce(), test.makeReduce());
System.out.printf("Sum = %d, expected =%d%n", sum, test.expectedResult);
nanoScheduler.shutdown();
}
}

View File

@ -1,94 +0,0 @@
package org.broadinstitute.sting.utils.nanoScheduler;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.*;
/**
* UnitTests for the InputProducer
*
* User: depristo
* Date: 8/24/12
* Time: 11:25 AM
* To change this template use File | Settings | File Templates.
*/
public class ReducerThreadUnitTest extends BaseTest {
@DataProvider(name = "ReducerThreadTest")
public Object[][] createReducerThreadTest() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( final int nElements : Arrays.asList(0, 1, 10, 100, 1000, 10000, 100000) ) {
tests.add(new Object[]{ nElements });
}
return tests.toArray(new Object[][]{});
}
@Test(enabled = true, dataProvider = "ReducerThreadTest", timeOut = NanoSchedulerUnitTest.NANO_SCHEDULE_MAX_RUNTIME)
public void testReducerThreadTest(final int nElements) throws Exception {
List<Integer> values = new ArrayList<Integer>(nElements);
List<Integer> jobIDs = new ArrayList<Integer>(nElements);
for ( int i = 0; i < nElements; i++ ) {
values.add(i);
jobIDs.add(i);
}
runTests(values, jobIDs);
}
@Test(enabled = true, timeOut = NanoSchedulerUnitTest.NANO_SCHEDULE_MAX_RUNTIME, expectedExceptions = ExecutionException.class)
public void testReducerThreadTestByJobOrder() throws Exception {
runTests(Arrays.asList(0, 1, 2), Arrays.asList(1, 3, 2));
}
private void runTests( final List<Integer> mapValues, final List<Integer> jobIDs) throws Exception {
final LinkedBlockingDeque<Future<MapResult<Integer>>> mapResultsQueue =
new LinkedBlockingDeque<Future<MapResult<Integer>>>(mapValues.size()+1);
for ( int i = 0; i < mapValues.size(); i++ ) {
final int value = mapValues.get(i);
final int jobID = jobIDs.get(i);
final MapResult<Integer> mapResult = new MapResult<Integer>(value, jobID);
mapResultsQueue.add(new FutureValue<MapResult<Integer>>(mapResult));
}
mapResultsQueue.add(new FutureValue<MapResult<Integer>>(new MapResult<Integer>()));
final ReduceSumTest reduce = new ReduceSumTest(mapResultsQueue);
final ReducerThread<Integer, Integer> thread
= new ReducerThread<Integer, Integer>(reduce, null, 0, mapResultsQueue);
final ExecutorService es = Executors.newSingleThreadExecutor();
final Future<Integer> value = es.submit(thread);
value.get();
Assert.assertEquals(reduce.nRead, mapValues.size());
}
public class ReduceSumTest implements NSReduceFunction<Integer, Integer> {
final LinkedBlockingDeque<Future<MapResult<Integer>>> mapResultsQueue;
int nRead = 0;
int lastValue = -1;
public ReduceSumTest(LinkedBlockingDeque<Future<MapResult<Integer>>> mapResultsQueue) {
this.mapResultsQueue = mapResultsQueue;
}
@Override public Integer apply(Integer one, Integer sum) {
Assert.assertTrue(lastValue < one, "Reduce came in out of order. Prev " + lastValue + " cur " + one);
Assert.assertTrue(lastValue < one, "Read values coming out of order!");
final int expected = lastValue + 1;
Assert.assertEquals((int)one, expected, "Value observed " + one + " not equal to the expected value " + expected);
nRead++;
lastValue = expected;
return one + sum;
}
}
}

View File

@ -1,161 +0,0 @@
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.testng.annotations.Test;
import org.testng.annotations.DataProvider;
import org.broadinstitute.sting.BaseTest;
public class ArtificialSingleSampleReadStreamUnitTest extends BaseTest {
private static class ArtificialSingleSampleReadStreamTest extends TestDataProvider {
private ArtificialSingleSampleReadStream stream;
private ArtificialSingleSampleReadStreamAnalyzer streamAnalyzer;
public ArtificialSingleSampleReadStreamTest( ArtificialSingleSampleReadStream stream ) {
super(ArtificialSingleSampleReadStreamTest.class);
this.stream = stream;
setName(String.format("%s: numContigs=%d stacksPerContig=%d readsPerStack=%d-%d distanceBetweenStacks=%d-%d readLength=%d-%d unmappedReads=%d",
getClass().getSimpleName(),
stream.getNumContigs(),
stream.getNumStacksPerContig(),
stream.getMinReadsPerStack(),
stream.getMaxReadsPerStack(),
stream.getMinDistanceBetweenStacks(),
stream.getMaxDistanceBetweenStacks(),
stream.getMinReadLength(),
stream.getMaxReadLength(),
stream.getNumUnmappedReads()));
}
public void run() {
streamAnalyzer= new ArtificialSingleSampleReadStreamAnalyzer(stream);
streamAnalyzer.analyze(stream);
// Check whether the observed properties of the stream match its nominal properties
streamAnalyzer.validate();
}
}
@DataProvider(name = "ArtificialSingleSampleReadStreamTestDataProvider")
public Object[][] createArtificialSingleSampleReadStreamTests() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(3, 1, 10000);
String readGroupID = "testReadGroup";
SAMReadGroupRecord readGroup = new SAMReadGroupRecord(readGroupID);
readGroup.setSample("testSample");
header.addReadGroup(readGroup);
GenomeAnalysisEngine.resetRandomGenerator();
// brute force testing!
for ( int numContigs = 0; numContigs <= 2; numContigs++ ) {
for ( int stacksPerContig = 0; stacksPerContig <= 2; stacksPerContig++ ) {
for ( int minReadsPerStack = 1; minReadsPerStack <= 2; minReadsPerStack++ ) {
for ( int maxReadsPerStack = 1; maxReadsPerStack <= 3; maxReadsPerStack++ ) {
for ( int minDistanceBetweenStacks = 1; minDistanceBetweenStacks <= 2; minDistanceBetweenStacks++ ) {
for ( int maxDistanceBetweenStacks = 1; maxDistanceBetweenStacks <= 3; maxDistanceBetweenStacks++ ) {
for ( int minReadLength = 1; minReadLength <= 2; minReadLength++ ) {
for ( int maxReadLength = 1; maxReadLength <= 3; maxReadLength++ ) {
for ( int numUnmappedReads = 0; numUnmappedReads <= 2; numUnmappedReads++ ) {
// Only test sane combinations here
if ( minReadsPerStack <= maxReadsPerStack &&
minDistanceBetweenStacks <= maxDistanceBetweenStacks &&
minReadLength <= maxReadLength &&
((numContigs > 0 && stacksPerContig > 0) || (numContigs == 0 && stacksPerContig == 0)) ) {
new ArtificialSingleSampleReadStreamTest(new ArtificialSingleSampleReadStream(header,
readGroupID,
numContigs,
stacksPerContig,
minReadsPerStack,
maxReadsPerStack,
minDistanceBetweenStacks,
maxDistanceBetweenStacks,
minReadLength,
maxReadLength,
numUnmappedReads));
}
}
}
}
}
}
}
}
}
}
return ArtificialSingleSampleReadStreamTest.getTests(ArtificialSingleSampleReadStreamTest.class);
}
@Test(dataProvider = "ArtificialSingleSampleReadStreamTestDataProvider")
public void testArtificialSingleSampleReadStream( ArtificialSingleSampleReadStreamTest test ) {
logger.warn("Running test: " + test);
GenomeAnalysisEngine.resetRandomGenerator();
test.run();
}
@DataProvider(name = "ArtificialSingleSampleReadStreamInvalidArgumentsTestDataProvider")
public Object[][] createInvalidArgumentsTests() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(3, 1, 10000);
String readGroupID = "testReadGroup";
header.addReadGroup(new SAMReadGroupRecord(readGroupID));
return new Object[][] {
{"testNullHeader", null, readGroupID, 1, 1, 1, 2, 1, 2, 1, 2, 0},
{"testNullReadGroup", header, null, 1, 1, 1, 2, 1, 2, 1, 2, 0},
{"testInvalidReadGroup", header, "foo", 1, 1, 1, 2, 1, 2, 1, 2, 0},
{"testInvalidNumContigs", header, readGroupID, -1, 1, 1, 2, 1, 2, 1, 2, 0},
{"testInvalidNumStacksPerContig", header, readGroupID, 1, -1, 1, 2, 1, 2, 1, 2, 0},
{"test0ContigsNon0StacksPerContig", header, readGroupID, 0, 1, 1, 2, 1, 2, 1, 2, 0},
{"testNon0Contigs0StacksPerContig", header, readGroupID, 1, 0, 1, 2, 1, 2, 1, 2, 0},
{"testInvalidMinReadsPerStack", header, readGroupID, 1, 1, -1, 2, 1, 2, 1, 2, 0},
{"testInvalidMaxReadsPerStack", header, readGroupID, 1, 1, 1, -2, 1, 2, 1, 2, 0},
{"testInvalidMinDistanceBetweenStacks", header, readGroupID, 1, 1, 1, 2, -1, 2, 1, 2, 0},
{"testInvalidMaxDistanceBetweenStacks", header, readGroupID, 1, 1, 1, 2, 1, -2, 1, 2, 0},
{"testInvalidMinReadLength", header, readGroupID, 1, 1, 1, 2, 1, 2, -1, 2, 0},
{"testInvalidMaxReadLength", header, readGroupID, 1, 1, 1, 2, 1, 2, 1, -2, 0},
{"testInvalidReadsPerStackRange", header, readGroupID, 1, 1, 2, 1, 1, 2, 1, 2, 0},
{"testInvalidDistanceBetweenStacksRange", header, readGroupID, 1, 1, 1, 2, 2, 1, 1, 2, 0},
{"testInvalidReadLengthRange", header, readGroupID, 1, 1, 1, 2, 1, 2, 2, 1, 0},
{"testInvalidNumUnmappedReads", header, readGroupID, 1, 1, 1, 2, 1, 2, 1, 2, -1},
};
}
@Test(dataProvider = "ArtificialSingleSampleReadStreamInvalidArgumentsTestDataProvider",
expectedExceptions = ReviewedStingException.class)
public void testInvalidArguments( String testName,
SAMFileHeader header,
String readGroupID,
int numContigs,
int numStacksPerContig,
int minReadsPerStack,
int maxReadsPerStack,
int minDistanceBetweenStacks,
int maxDistanceBetweenStacks,
int minReadLength,
int maxReadLength,
int numUnmappedReads ) {
logger.warn("Running test: " + testName);
ArtificialSingleSampleReadStream stream = new ArtificialSingleSampleReadStream(header,
readGroupID,
numContigs,
numStacksPerContig,
minReadsPerStack,
maxReadsPerStack,
minDistanceBetweenStacks,
maxDistanceBetweenStacks,
minReadLength,
maxReadLength,
numUnmappedReads);
}
}

View File

@ -1,184 +0,0 @@
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.threading;
import org.apache.log4j.Priority;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
/**
* Tests for the state monitoring thread factory.
*/
public class EfficiencyMonitoringThreadFactoryUnitTest extends BaseTest {
// the duration of the tests -- 100 ms is tolerable given the number of tests we are doing
private final static long THREAD_TARGET_DURATION_IN_MILLISECOND = 100000;
private final static int MAX_THREADS = 4;
final static Object GLOBAL_LOCK = new Object();
private class StateTest extends TestDataProvider {
private final double TOLERANCE = 0.1; // willing to tolerate a 10% error
final List<EfficiencyMonitoringThreadFactory.State> statesForThreads;
public StateTest(final List<EfficiencyMonitoringThreadFactory.State> statesForThreads) {
super(StateTest.class);
this.statesForThreads = statesForThreads;
setName("StateTest " + Utils.join(",", statesForThreads));
}
public List<EfficiencyMonitoringThreadFactory.State> getStatesForThreads() {
return statesForThreads;
}
public int getNStates() { return statesForThreads.size(); }
public double maxStatePercent(final EfficiencyMonitoringThreadFactory.State state) { return 100*(fraction(state) + TOLERANCE); }
public double minStatePercent(final EfficiencyMonitoringThreadFactory.State state) { return 100*(fraction(state) - TOLERANCE); }
private double fraction(final EfficiencyMonitoringThreadFactory.State state) {
return Collections.frequency(statesForThreads, state) / (1.0 * statesForThreads.size());
}
}
/**
* Test helper threading class that puts the thread into RUNNING, BLOCKED, or WAITING state as
* requested for input argument
*/
private static class StateTestThread implements Callable<Double> {
private final EfficiencyMonitoringThreadFactory.State stateToImplement;
private StateTestThread(final EfficiencyMonitoringThreadFactory.State stateToImplement) {
this.stateToImplement = stateToImplement;
}
@Override
public Double call() throws Exception {
switch ( stateToImplement ) {
case USER_CPU:
// do some work until we get to THREAD_TARGET_DURATION_IN_MILLISECOND
double sum = 0.0;
final long startTime = System.currentTimeMillis();
for ( int i = 1; System.currentTimeMillis() - startTime < (THREAD_TARGET_DURATION_IN_MILLISECOND - 1); i++ ) {
sum += Math.log10(i);
}
return sum;
case WAITING:
Thread.currentThread().sleep(THREAD_TARGET_DURATION_IN_MILLISECOND);
return 0.0;
case BLOCKING:
if ( EfficiencyMonitoringThreadFactory.DEBUG ) logger.warn("Blocking...");
synchronized (GLOBAL_LOCK) {
// the GLOBAL_LOCK must be held by the unit test itself for this to properly block
if ( EfficiencyMonitoringThreadFactory.DEBUG ) logger.warn(" ... done blocking");
}
return 0.0;
case WAITING_FOR_IO:
// TODO -- implement me
// shouldn't ever get here, throw an exception
throw new ReviewedStingException("WAITING_FOR_IO testing currently not implemented, until we figure out how to force a system call block");
default:
throw new ReviewedStingException("Unexpected thread test state " + stateToImplement);
}
}
}
@DataProvider(name = "StateTest")
public Object[][] createStateTest() {
for ( final int nThreads : Arrays.asList(3) ) {
//final List<EfficiencyMonitoringThreadFactory.State> allStates = Arrays.asList(EfficiencyMonitoringThreadFactory.State.WAITING_FOR_IO);
final List<EfficiencyMonitoringThreadFactory.State> allStates = Arrays.asList(EfficiencyMonitoringThreadFactory.State.USER_CPU, EfficiencyMonitoringThreadFactory.State.WAITING, EfficiencyMonitoringThreadFactory.State.BLOCKING);
//final List<EfficiencyMonitoringThreadFactory.State> allStates = Arrays.asList(EfficiencyMonitoringThreadFactory.State.values());
for (final List<EfficiencyMonitoringThreadFactory.State> states : Utils.makePermutations(allStates, nThreads, true) ) {
//if ( Collections.frequency(states, Thread.State.BLOCKED) > 0)
new StateTest(states);
}
}
return StateTest.getTests(StateTest.class);
}
@Test(enabled = true, dataProvider = "StateTest", timeOut = MAX_THREADS * THREAD_TARGET_DURATION_IN_MILLISECOND)
public void testStateTest(final StateTest test) throws InterruptedException {
// allows us to test blocking
final EfficiencyMonitoringThreadFactory factory = new EfficiencyMonitoringThreadFactory(test.getNStates());
final ExecutorService threadPool = Executors.newFixedThreadPool(test.getNStates(), factory);
logger.warn("Running " + test);
synchronized (GLOBAL_LOCK) {
//logger.warn(" Have lock");
for ( final EfficiencyMonitoringThreadFactory.State threadToRunState : test.getStatesForThreads() )
threadPool.submit(new StateTestThread(threadToRunState));
// lock has to be here for the whole running of the activeThreads but end before the sleep so the blocked activeThreads
// can block for their allotted time
threadPool.shutdown();
Thread.sleep(THREAD_TARGET_DURATION_IN_MILLISECOND);
}
//logger.warn(" Releasing lock");
threadPool.awaitTermination(10, TimeUnit.SECONDS);
//logger.warn(" done awaiting termination");
//logger.warn(" waiting for all activeThreads to complete");
factory.waitForAllThreadsToComplete();
//logger.warn(" done waiting for activeThreads");
// make sure we counted everything properly
final long totalTime = factory.getTotalTime();
final long minTime = (long)(THREAD_TARGET_DURATION_IN_MILLISECOND * 0.5) * test.getNStates();
final long maxTime = (long)(THREAD_TARGET_DURATION_IN_MILLISECOND * 1.5) * test.getNStates();
//logger.warn("Testing total time");
Assert.assertTrue(totalTime >= minTime, "Factory results not properly accumulated: totalTime = " + totalTime + " < minTime = " + minTime);
Assert.assertTrue(totalTime <= maxTime, "Factory results not properly accumulated: totalTime = " + totalTime + " > maxTime = " + maxTime);
for (final EfficiencyMonitoringThreadFactory.State state : EfficiencyMonitoringThreadFactory.State.values() ) {
final double min = test.minStatePercent(state);
final double max = test.maxStatePercent(state);
final double obs = factory.getStatePercent(state);
// logger.warn(" Checking " + state
// + " min " + String.format("%.2f", min)
// + " max " + String.format("%.2f", max)
// + " obs " + String.format("%.2f", obs)
// + " factor = " + factory);
Assert.assertTrue(obs >= min, "Too little time spent in state " + state + " obs " + obs + " min " + min);
Assert.assertTrue(obs <= max, "Too much time spent in state " + state + " obs " + obs + " max " + min);
}
// we actually ran the expected number of activeThreads
Assert.assertEquals(factory.getNThreadsCreated(), test.getNStates());
// should be called to ensure we don't format / NPE on output
factory.printUsageInformation(logger, Priority.WARN);
}
}