Resolving merge conflicts.

2012-08-31 11:25:55 -04:00 · 2012-08-31 11:25:55 -04:00 · ff6ebbf3fd
parent e22bd09477 143fbead03
commit ff6ebbf3fd
20 changed files with 323 additions and 95 deletions
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@ -262,7 +262,7 @@ public class SAMDataSource {
        else {
            // Choose a sensible default for the read buffer size.  For the moment, we're picking 1000 reads per BAM per shard (which effectively
            // will mean per-thread once ReadWalkers are parallelized) with a max cap of 250K reads in memory at once.
-            ReadShard.setReadBufferSize(Math.min(1000*samFiles.size(),250000));
+            ReadShard.setReadBufferSize(Math.min(10000*samFiles.size(),250000));
        }

        resourcePool = new SAMResourcePool(Integer.MAX_VALUE);
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@ -100,22 +100,29 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
     * @return The best-fit microscheduler.
     */
    public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, ThreadAllocation threadAllocation) {
-        if (walker instanceof TreeReducible && threadAllocation.getNumCPUThreads() > 1) {
-            if(walker.isReduceByInterval())
+        if (threadAllocation.getNumCPUThreads() > 1) {
+            if (walker.isReduceByInterval())
                throw new UserException.BadArgumentValue("nt", String.format("The analysis %s aggregates results by interval.  Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution.  Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
+
            logger.info(String.format("Running the GATK in parallel mode with %d concurrent threads",threadAllocation.getNumCPUThreads()));

-            if ( walker instanceof ReadWalker )
+            if ( walker instanceof ReadWalker ) {
+                if ( ! (walker instanceof ThreadSafeMapReduce) ) badNT(engine, walker);
                return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
-            else
+            } else {
+                // TODO -- update test for when nano scheduling only is an option
+                if ( ! (walker instanceof TreeReducible) ) badNT(engine, walker);
                return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
+            }
        } else {
-            if(threadAllocation.getNumCPUThreads() > 1)
-                throw new UserException.BadArgumentValue("nt", String.format("The analysis %s currently does not support parallel execution.  Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
            return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
        }
    }

+    private static void badNT(final GenomeAnalysisEngine engine, final Walker walker) {
+        throw new UserException.BadArgumentValue("nt", String.format("The analysis %s currently does not support parallel execution.  Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
+    }
+
    /**
     * Create a microscheduler given the reads and reference.
     *
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java
@ -25,9 +25,13 @@

 package org.broadinstitute.sting.gatk.filters;

+import com.google.common.base.Function;
+import com.google.common.collect.Collections2;
+import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.classloader.PluginManager;

 import java.util.Collection;
+import java.util.List;

 /**
 * Manage filters and filter options.  Any requests for basic filtering classes
@ -54,4 +58,26 @@ public class FilterManager extends PluginManager<ReadFilter> {
    public Collection<Class<? extends ReadFilter>> getValues() {
        return this.getPlugins();
    }
+
+    /**
+     * Rather than use the default error message, print out a list of read filters as well.
+     * @param pluginCategory - string, the category of the plugin (e.g. read filter)
+     * @param pluginName - string, what we were trying to match (but failed to)
+     * @return - A wall of text with the default message, followed by a listing of available read filters
+     */
+    @Override
+    protected String formatErrorMessage(String pluginCategory, String pluginName) {
+        List<Class<? extends ReadFilter>> availableFilters = this.getPluginsImplementing(ReadFilter.class);
+        Collection<String> availableFilterNames = Collections2.transform(availableFilters, new Function<Class<? extends ReadFilter>,String>(){
+
+            @Override
+            public String apply(final Class<? extends ReadFilter> input) {
+                return getName(input);
+            }
+        });
+
+        return String.format("Read filter %s not found. Available read filters:%n%s.%n%n%s",pluginName,
+                Utils.join(String.format(", "),availableFilterNames),
+                "Please consult the GATK Documentation (http://www.broadinstitute.org/gatk/gatkdocs/) for more information.");
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java
@ -1,7 +1,6 @@
 package org.broadinstitute.sting.gatk.iterators;

 import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.exceptions.UserException;

@ -48,9 +47,9 @@ public class VerifyingSamIterator implements StingSAMIterator {
            if(cur.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX || cur.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START)
                throw new UserException.MalformedBAM(last,String.format("read %s has inconsistent mapping information.",cur.format()));

-            GenomeLoc lastLoc = genomeLocParser.createGenomeLoc( last );
-            GenomeLoc curLoc = genomeLocParser.createGenomeLoc( cur );
-            return curLoc.compareTo(lastLoc) == -1;
+            return (last.getReferenceIndex() > cur.getReferenceIndex()) ||
+                    (last.getReferenceIndex().equals(cur.getReferenceIndex()) &&
+                            last.getAlignmentStart() > cur.getAlignmentStart());
        }
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java
@ -27,16 +27,21 @@ package org.broadinstitute.sting.gatk.traversals;
 import net.sf.samtools.SAMRecord;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.datasources.providers.*;
+import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrderedView;
+import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView;
+import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
+import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
 import org.broadinstitute.sting.gatk.datasources.reads.ReadShard;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.nanoScheduler.MapFunction;
 import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
 import org.broadinstitute.sting.utils.nanoScheduler.ReduceFunction;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

+import java.util.ArrayList;
+import java.util.List;
+
 /**
 * @author aaron
 * @version 1.0
@ -50,12 +55,13 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
    /** our log, which we want to capture anything from this class */
    protected static final Logger logger = Logger.getLogger(TraverseReadsNano.class);
    private static final boolean DEBUG = false;
-    final NanoScheduler<SAMRecord, M, T> nanoScheduler;
+    private static final int MIN_GROUP_SIZE = 100;
+    final NanoScheduler<MapData, M, T> nanoScheduler;

    public TraverseReadsNano(int nThreads) {
        final int bufferSize = ReadShard.getReadBufferSize() + 1; // actually has 1 more than max
-        final int mapGroupSize = bufferSize / 10 + 1;
-        nanoScheduler = new NanoScheduler<SAMRecord, M, T>(bufferSize, mapGroupSize, nThreads);
+        final int mapGroupSize = (int)Math.max(Math.ceil(bufferSize / 50.0 + 1), MIN_GROUP_SIZE);
+        nanoScheduler = new NanoScheduler<MapData, M, T>(bufferSize, mapGroupSize, nThreads);
    }

    @Override
@ -79,24 +85,42 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
        if( !dataProvider.hasReads() )
            throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");

-        if ( dataProvider.hasReferenceOrderedData() )
-            throw new ReviewedStingException("Parallel read walkers currently don't support access to reference ordered data");
-
-        final ReadView reads = new ReadView(dataProvider);
-        final ReadReferenceView reference = new ReadReferenceView(dataProvider);
-        final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
-
        nanoScheduler.setDebug(DEBUG);
-        final TraverseReadsMap myMap = new TraverseReadsMap(reads, reference, rodView, walker);
+        final TraverseReadsMap myMap = new TraverseReadsMap(walker);
        final TraverseReadsReduce myReduce = new TraverseReadsReduce(walker);

-        T result = nanoScheduler.execute(reads.iterator().iterator(), myMap, sum, myReduce);
+        T result = nanoScheduler.execute(aggregateMapData(dataProvider).iterator(), myMap, sum, myReduce);
        // TODO -- how do we print progress?
        //printProgress(dataProvider.getShard(), ???);

        return result;
    }

+    private List<MapData> aggregateMapData(final ReadShardDataProvider dataProvider) {
+        final ReadView reads = new ReadView(dataProvider);
+        final ReadReferenceView reference = new ReadReferenceView(dataProvider);
+        final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
+
+        final List<MapData> mapData = new ArrayList<MapData>();  // TODO -- need size of reads
+        for ( final SAMRecord read : reads ) {
+            final ReferenceContext refContext = ! read.getReadUnmappedFlag()
+                    ? reference.getReferenceContext(read)
+                    : null;
+
+            // if the read is mapped, create a metadata tracker
+            final RefMetaDataTracker tracker = read.getReferenceIndex() >= 0
+                    ? rodView.getReferenceOrderedDataForRead(read)
+                    : null;
+
+            // update the number of reads we've seen
+            dataProvider.getShard().getReadMetrics().incrementNumIterations();
+
+            mapData.add(new MapData((GATKSAMRecord)read, refContext, tracker));
+        }
+
+        return mapData;
+    }
+
    @Override
    public void printOnTraversalDone() {
        nanoScheduler.shutdown();
@ -116,36 +140,31 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
        }
    }

-    private class TraverseReadsMap implements MapFunction<SAMRecord, M> {
-        final ReadView reads;
-        final ReadReferenceView reference;
-        final ReadBasedReferenceOrderedView rodView;
+    private class MapData {
+        final GATKSAMRecord read;
+        final ReferenceContext refContext;
+        final RefMetaDataTracker tracker;
+
+        private MapData(GATKSAMRecord read, ReferenceContext refContext, RefMetaDataTracker tracker) {
+            this.read = read;
+            this.refContext = refContext;
+            this.tracker = tracker;
+        }
+    }
+
+    private class TraverseReadsMap implements MapFunction<MapData, M> {
        final ReadWalker<M,T> walker;

-        private TraverseReadsMap(ReadView reads, ReadReferenceView reference, ReadBasedReferenceOrderedView rodView, ReadWalker<M, T> walker) {
-            this.reads = reads;
-            this.reference = reference;
-            this.rodView = rodView;
+        private TraverseReadsMap(ReadWalker<M, T> walker) {
            this.walker = walker;
        }

        @Override
-        public M apply(final SAMRecord read) {
+        public M apply(final MapData data) {
            if ( ! walker.isDone() ) {
-                // ReferenceContext -- the reference bases covered by the read
-                final ReferenceContext refContext = ! read.getReadUnmappedFlag() && reference != null
-                        ? reference.getReferenceContext(read)
-                        : null;
-
-                // update the number of reads we've seen
-                //dataProvider.getShard().getReadMetrics().incrementNumIterations();
-
-                // if the read is mapped, create a metadata tracker
-                final RefMetaDataTracker tracker = read.getReferenceIndex() >= 0 ? rodView.getReferenceOrderedDataForRead(read) : null;
-
-                final boolean keepMeP = walker.filter(refContext, (GATKSAMRecord) read);
+                final boolean keepMeP = walker.filter(data.refContext, data.read);
                if (keepMeP) {
-                    return walker.map(refContext, (GATKSAMRecord) read, tracker);
+                    return walker.map(data.refContext, data.read, data.tracker);
                }
            }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStat.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStat.java
@ -45,7 +45,7 @@ import java.text.NumberFormat;
 */
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS})
-public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatus> implements TreeReducible<FlagStat.FlagStatus> {
+public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatus> implements ThreadSafeMapReduce {
    @Output
    PrintStream out;

@ -193,11 +193,6 @@ public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatu
        return sum.add(value);
    }

-    @Override
-    public FlagStatus treeReduce(final FlagStatus value, final FlagStatus sum) {
-        return sum.add(value);
-    }
-
    @Override
    public void onTraversalDone(final FlagStatus result) {
        out.println(result.toString());
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java
@ -93,7 +93,7 @@ import java.util.TreeSet;
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
@Requires({DataSource.READS, DataSource.REFERENCE})
-public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> implements TreeReducible<SAMFileWriter> {
+public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> implements ThreadSafeMapReduce {

    @Output(doc="Write output to this BAM filename instead of STDOUT", required = true)
    SAMFileWriter out;
@ -245,9 +245,4 @@ public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> impleme
        output.addAlignment(read);
        return output;
    }
-
-    @Override
-    public SAMFileWriter treeReduce(SAMFileWriter lhs, SAMFileWriter rhs) {
-        return lhs; // nothing to do
-    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ThreadSafeMapReduce.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ThreadSafeMapReduce.java
@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2010.  The Broad Institute
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+package org.broadinstitute.sting.gatk.walkers;
+
+/**
+ * Root parallelism interface.  Walkers that implement this
+ * declare that their map function is thread-safe and so multiple
+ * map calls can be run in parallel in the same JVM instance.
+ */
+public interface ThreadSafeMapReduce {
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/TreeReducible.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/TreeReducible.java
@ -13,7 +13,7 @@ package org.broadinstitute.sting.gatk.walkers;
 * shards of the data can reduce with each other, and the composite result
 * can be reduced with other composite results.
 */
-public interface TreeReducible<ReduceType> {
+public interface TreeReducible<ReduceType> extends ThreadSafeMapReduce {
    /**
     * A composite, 'reduce of reduces' function.
     * @param lhs 'left-most' portion of data in the composite reduce.
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java
@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.DataSource;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.gatk.walkers.Requires;
-import org.broadinstitute.sting.gatk.walkers.TreeReducible;
+import org.broadinstitute.sting.gatk.walkers.ThreadSafeMapReduce;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

@ -41,12 +41,11 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 */
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS, DataSource.REFERENCE})
-public class CountReads extends ReadWalker<Integer, Integer> implements TreeReducible<Integer> {
+public class CountReads extends ReadWalker<Integer, Integer> implements ThreadSafeMapReduce {
    public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
        return 1;
    }

    @Override public Integer reduceInit() { return 0; }
    @Override public Integer reduce(Integer value, Integer sum) { return value + sum; }
-    @Override public Integer treeReduce(Integer lhs, Integer rhs) { return lhs + rhs; }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
@ -39,11 +39,11 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.gatk.walkers.TreeReducible;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
-import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
-import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
 import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
+import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;

 import java.io.File;
 import java.util.*;
@ -218,7 +218,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
                String filterString = null;

                // Annotate the new record with its VQSLOD and the worst performing annotation
-                builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lodString); // use the String representation so that we don't lose precision on output
+                builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lod);
                builder.attribute(VariantRecalibrator.CULPRIT_KEY, recalDatum.getAttribute(VariantRecalibrator.CULPRIT_KEY));

                for( int i = tranches.size() - 1; i >= 0; i-- ) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
@ -286,7 +286,6 @@ public class VariantDataManager {
            case INDEL:
            case MIXED:
            case SYMBOLIC:
-            case STRUCTURAL_INDEL:
                return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.INDEL );
            default:
                return false;
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
@ -427,7 +427,10 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
                result = cmpContig;
            } else {
                if ( this.getStart() < that.getStart() ) result = -1;
-                if ( this.getStart() > that.getStart() ) result = 1;
+                else if ( this.getStart() > that.getStart() ) result = 1;
+                // these have the same start, so check the ends
+                else if ( this.getStop() < that.getStop() ) result = -1;
+                else if ( this.getStop() > that.getStop() ) result = 1;
            }
        }

--- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
+++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
@ -277,7 +277,7 @@ public class PluginManager<PluginType> {
    public PluginType createByName(String pluginName) {
        Class<? extends PluginType> plugin = pluginsByName.get(pluginName);
        if( plugin == null )
-            throw new UserException(String.format("Could not find %s with name: %s", pluginCategory,pluginName));
+            throw new UserException(formatErrorMessage(pluginCategory,pluginName));
        try {
            return plugin.newInstance();
        } catch (Exception e) {
@ -330,4 +330,14 @@ public class PluginManager<PluginType> {

        return pluginName;
    }
+
+    /**
+     * Generate the error message for the plugin manager. The message is allowed to depend on the class.
+     * @param pluginCategory - string, the category of the plugin (e.g. read filter)
+     * @param pluginName - string, what we were trying to match (but failed to)
+     * @return error message text describing the error
+     */
+    protected String formatErrorMessage(String pluginCategory, String pluginName ) {
+        return String.format("Could not find %s with name: %s", pluginCategory,pluginName);
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
@ -43,7 +43,8 @@ import java.util.concurrent.*;
 * Time: 9:47 AM
 */
 public class NanoScheduler<InputType, MapType, ReduceType> {
-    private static Logger logger = Logger.getLogger(NanoScheduler.class);
+    private final static Logger logger = Logger.getLogger(NanoScheduler.class);
+    private final static boolean ALLOW_SINGLE_THREAD_FASTPATH = true;

    final int bufferSize;
    final int mapGroupSize;
@ -79,7 +80,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
            this.mapGroupSize = mapGroupSize;
        }

-        this.executor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads - 1);
+        this.executor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads);
    }

    /**
@ -172,7 +173,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
        if ( map == null ) throw new IllegalArgumentException("map function cannot be null");
        if ( reduce == null ) throw new IllegalArgumentException("reduce function cannot be null");

-        if ( getnThreads() == 1 ) {
+        if ( ALLOW_SINGLE_THREAD_FASTPATH && getnThreads() == 1 ) {
            return executeSingleThreaded(inputReader, map, initialValue, reduce);
        } else {
            return executeMultiThreaded(inputReader, map, initialValue, reduce);
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
@ -457,7 +457,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
        SNP,
        MNP,    // a multi-nucleotide polymorphism
        INDEL,
-        STRUCTURAL_INDEL,
        SYMBOLIC,
        MIXED,
    }
@ -531,7 +530,17 @@ public class VariantContext implements Feature { // to enable tribble integratio
    }

    public boolean isStructuralIndel() {
-        return getType() == Type.STRUCTURAL_INDEL;
+        if ( getType() == Type.INDEL ) {
+            List<Integer> sizes = getIndelLengths();
+            if ( sizes != null ) {
+                for ( Integer length : sizes ) {
+                    if ( length > MAX_ALLELE_SIZE_FOR_NON_SV ) {
+                        return true;
+                    }
+                }
+            }
+        }
+        return false;
    }

    /**
@ -716,7 +725,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
     * @return a list of indel lengths ( null if not of type indel or mixed )
     */
    public List<Integer> getIndelLengths() {
-        if ( getType() != Type.INDEL && getType() != Type.MIXED && getType() != Type.STRUCTURAL_INDEL ) {
+        if ( getType() != Type.INDEL && getType() != Type.MIXED ) {
            return null;
        }

@ -1263,13 +1272,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
        // is reserved for cases of multiple alternate alleles of different types).  Therefore, if we've reached this point
        // in the code (so we're not a SNP, MNP, or symbolic allele), we absolutely must be an INDEL.

-        // Because a number of structural variation callers write the whole alternate allele into the VCF where possible,
-        // this can result in insertion/deletion alleles of structural variant size, e.g. 151+. As of July 2012, we now
-        // classify these as structural events, rather than indel events, as we think differently about the mechanism,
-        // representation, and handling of these events. Check for this case here:
-        if ( ref.length() > MAX_ALLELE_SIZE_FOR_NON_SV || allele.length() > MAX_ALLELE_SIZE_FOR_NON_SV )
-            return Type.STRUCTURAL_INDEL;
-
        return Type.INDEL;

        // old incorrect logic:
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewUnitTest.java
@ -121,7 +121,7 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest {
        // test in the present of a large spanning element
        {
            List<Feature> oneLargeSpan = new ArrayList<Feature>(handPickedFeatures);
-            oneLargeSpan.add(new BasicFeature(contig, 1, 100));
+            oneLargeSpan.add(new BasicFeature(contig, 1, 30));
            createTestsForFeatures(oneLargeSpan);
        }

@ -135,7 +135,7 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest {
        // test in the presence of a partially spanning element at the end
        {
            List<Feature> partialSpanEnd = new ArrayList<Feature>(handPickedFeatures);
-            partialSpanEnd.add(new BasicFeature(contig, 10, 100));
+            partialSpanEnd.add(new BasicFeature(contig, 10, 30));
            createTestsForFeatures(partialSpanEnd);
        }

@ -165,7 +165,7 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest {
        int featuresStart = 1; for ( final Feature f : features ) featuresStart = Math.min(featuresStart, f.getStart());
        int featuresStop = 1; for ( final Feature f : features ) featuresStop = Math.max(featuresStop, f.getEnd());

-        for ( final int size : Arrays.asList(1, 5, 10, 100, 1000) ) {
+        for ( final int size : Arrays.asList(1, 5, 10, 100) ) {
            final List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
            // regularly spaced
            for ( int start = featuresStart; start < featuresStop; start++) {
@ -256,11 +256,12 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest {
            }

            // all 3 way pairwise tests
-            for ( List<ReadMetaDataTrackerRODStreamTest> singleTest : Utils.makePermutations(multiSiteTests, 3, false)) {
-                tests.add(new Object[]{singleTest, testStateless});
-            }
+            //for ( List<ReadMetaDataTrackerRODStreamTest> singleTest : Utils.makePermutations(multiSiteTests, 3, false)) {
+            //    tests.add(new Object[]{singleTest, testStateless});
+            //}
        }

+        logger.warn("Creating " + tests.size() + " tests for ReadMetaDataTrackerTests");
        return tests.toArray(new Object[][]{});
    }

--- a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java
@ -19,6 +19,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
 import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;

 import java.util.*;
@ -255,6 +256,90 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
        }
    }

+    ////////////////////////////////////////////
+    // comprehensive LIBS/PileupElement tests //
+    ////////////////////////////////////////////
+
+    private static final int IS_BEFORE_DELETED_BASE_FLAG = 1;
+    private static final int IS_BEFORE_DELETION_START_FLAG = 2;
+    private static final int IS_AFTER_DELETED_BASE_FLAG = 4;
+    private static final int IS_AFTER_DELETION_END_FLAG = 8;
+    private static final int IS_BEFORE_INSERTION_FLAG = 16;
+    private static final int IS_AFTER_INSERTION_FLAG = 32;
+    private static final int IS_NEXT_TO_SOFTCLIP_FLAG = 64;
+
+    private static class LIBSTest {
+
+
+        final String cigar;
+        final int readLength;
+        final List<Integer> offsets;
+        final List<Integer> flags;
+
+        private LIBSTest(final String cigar, final int readLength, final List<Integer> offsets, final List<Integer> flags) {
+            this.cigar = cigar;
+            this.readLength = readLength;
+            this.offsets = offsets;
+            this.flags = flags;
+        }
+    }
+
+    @DataProvider(name = "LIBSTest")
+    public Object[][] createLIBSTestData() {
+        return new Object[][]{
+                {new LIBSTest("1I", 1, Arrays.asList(0), Arrays.asList(IS_BEFORE_INSERTION_FLAG))},
+                {new LIBSTest("10I", 10, Arrays.asList(0), Arrays.asList(IS_BEFORE_INSERTION_FLAG))},
+                {new LIBSTest("2M2I2M", 6, Arrays.asList(0,1,4,5), Arrays.asList(0,IS_BEFORE_INSERTION_FLAG,IS_AFTER_INSERTION_FLAG,0))},
+                {new LIBSTest("2M2I", 4, Arrays.asList(0,1), Arrays.asList(0,IS_BEFORE_INSERTION_FLAG))},
+                //TODO -- uncomment these when LIBS is fixed
+                //{new LIBSTest("2I2M", 4, Arrays.asList(2,3), Arrays.asList(IS_AFTER_INSERTION_FLAG,0))},
+                //{new LIBSTest("1I1M1D1M", 3, Arrays.asList(0,1), Arrays.asList(IS_AFTER_INSERTION_FLAG | IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG))},
+                //{new LIBSTest("1S1I1M", 3, Arrays.asList(2), Arrays.asList(IS_AFTER_INSERTION_FLAG))},
+                {new LIBSTest("1M2D2M", 3, Arrays.asList(0,1,2), Arrays.asList(IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG,0))},
+                {new LIBSTest("1S1M", 2, Arrays.asList(1), Arrays.asList(IS_NEXT_TO_SOFTCLIP_FLAG))},
+                {new LIBSTest("1M1S", 2, Arrays.asList(0), Arrays.asList(IS_NEXT_TO_SOFTCLIP_FLAG))},
+                {new LIBSTest("1S1M1I", 3, Arrays.asList(1), Arrays.asList(IS_BEFORE_INSERTION_FLAG | IS_NEXT_TO_SOFTCLIP_FLAG))}
+        };
+    }
+
+    @Test(dataProvider = "LIBSTest")
+    public void testLIBS(LIBSTest params) {
+        final int locus = 44367788;
+
+        SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, params.readLength);
+        read.setReadBases(Utils.dupBytes((byte) 'A', params.readLength));
+        read.setBaseQualities(Utils.dupBytes((byte) '@', params.readLength));
+        read.setCigarString(params.cigar);
+
+        // create the iterator by state with the fake reads and fake records
+        li = makeLTBS(Arrays.asList(read), createTestReadProperties());
+
+        int offset = 0;
+        while ( li.hasNext() ) {
+            AlignmentContext alignmentContext = li.next();
+            ReadBackedPileup p = alignmentContext.getBasePileup();
+            Assert.assertTrue(p.getNumberOfElements() == 1);
+            PileupElement pe = p.iterator().next();
+
+            final int flag = params.flags.get(offset);
+            Assert.assertEquals(pe.isBeforeDeletedBase(), (flag & IS_BEFORE_DELETED_BASE_FLAG) != 0);
+            Assert.assertEquals(pe.isBeforeDeletionStart(), (flag & IS_BEFORE_DELETION_START_FLAG) != 0);
+            Assert.assertEquals(pe.isAfterDeletedBase(), (flag & IS_AFTER_DELETED_BASE_FLAG) != 0);
+            Assert.assertEquals(pe.isAfterDeletionEnd(), (flag & IS_AFTER_DELETION_END_FLAG) != 0);
+            Assert.assertEquals(pe.isBeforeInsertion(), (flag & IS_BEFORE_INSERTION_FLAG) != 0);
+            Assert.assertEquals(pe.isAfterInsertion(), (flag & IS_AFTER_INSERTION_FLAG) != 0);
+            Assert.assertEquals(pe.isNextToSoftClip(), (flag & IS_NEXT_TO_SOFTCLIP_FLAG) != 0);
+
+            Assert.assertEquals(pe.getOffset(), params.offsets.get(offset).intValue());
+
+            offset++;
+        }
+    }
+
+    ////////////////////////////////////////////////
+    // End comprehensive LIBS/PileupElement tests //
+    ////////////////////////////////////////////////
+
    private static ReadProperties createTestReadProperties() {
        return new ReadProperties(
                Collections.<SAMReaderID>emptyList(),
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
@ -1,10 +1,10 @@
 package org.broadinstitute.sting.gatk.walkers.variantrecalibration;

 import org.broadinstitute.sting.WalkerTest;
-import org.testng.annotations.Test;
 import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;

-import java.util.*;
+import java.util.Arrays;

 public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
    private static class VRTest {
@ -28,7 +28,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
    VRTest lowPass = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf",
            "f360ce3eb2b0b887301be917a9843e2b",  // tranches
            "287fea5ea066bf3fdd71f5ce9b58eab3",  // recal file
-            "356b9570817b9389da71fbe991d8b2f5"); // cut VCF
+            "afa297c743437551cc2bd36ddd6d6d75"); // cut VCF

    @DataProvider(name = "VRTest")
    public Object[][] createData1() {
@ -77,7 +77,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
    VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf",
            "a8ce3cd3dccafdf7d580bcce7d660a9a",  // tranches
            "74c10fc15f9739a938b7138909fbde04",  // recal file
-            "62fda105e14b619a1c263855cf56af1d"); // cut VCF
+            "c30d163871a37f2bbf8ee7f761e870b4"); // cut VCF

    @DataProvider(name = "VRBCFTest")
    public Object[][] createVRBCFTest() {
@ -129,13 +129,13 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
            validationDataLocation + "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as .
            "b7589cd098dc153ec64c02dcff2838e4",  // tranches
            "a04a9001f62eff43d363f4d63769f3ee",  // recal file
-            "64f576881e21323dd4078262604717a2"); // cut VCF
+            "b2c6827be592c24a4692b1753edc7d23"); // cut VCF

    VRTest indelFiltered = new VRTest(
            validationDataLocation + "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS
            "b7589cd098dc153ec64c02dcff2838e4",  // tranches
            "a04a9001f62eff43d363f4d63769f3ee",  // recal file
-            "af22c55d91394c56a222fd40d6d54781"); // cut VCF
+            "5d483fe1ba2ef36ee9e6c14cbd654706"); // cut VCF

    @DataProvider(name = "VRIndelTest")
    public Object[][] createTestVariantRecalibratorIndel() {
@ -193,7 +193,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
                        " -o %s" +
                        " -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" +
                        " -recalFile " + privateTestDir + "VQSR.mixedTest.recal",
-                Arrays.asList("ec519e1f01459813dab57aefffc019e2"));
+                Arrays.asList("018b3a5cc7cf0cb5468c6a0c80ccaa8b"));
        executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
    }
 }
--- a/public/java/test/org/broadinstitute/sting/utils/GenomeLocUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/GenomeLocUnitTest.java
@ -16,6 +16,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;

 import java.io.File;
 import java.io.FileNotFoundException;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
@ -211,4 +212,59 @@ public class GenomeLocUnitTest extends BaseTest {
            Assert.assertEquals(cfg.gl1.reciprocialOverlapFraction(cfg.gl2), cfg.overlapFraction);
        }
    }
+
+    // -------------------------------------------------------------------------------------
+    //
+    // testing comparison, hashcode, and equals
+    //
+    // -------------------------------------------------------------------------------------
+
+    @DataProvider(name = "GenomeLocComparisons")
+    public Object[][] createGenomeLocComparisons() {
+        List<Object[]> tests = new ArrayList<Object[]>();
+
+        final int start = 10;
+        for ( int stop = start; stop < start + 3; stop++ ) {
+            final GenomeLoc g1 = genomeLocParser.createGenomeLoc("chr2", start, stop);
+            for ( final String contig : Arrays.asList("chr1", "chr2", "chr3")) {
+                for ( int start2 = start - 1; start2 <= stop + 1; start2++ ) {
+                    for ( int stop2 = start2; stop2 < stop + 2; stop2++ ) {
+                        final GenomeLoc g2 = genomeLocParser.createGenomeLoc(contig, start2, stop2);
+
+                        ComparisonResult cmp = ComparisonResult.EQUALS;
+                        if ( contig.equals("chr3") ) cmp = ComparisonResult.LESS_THAN;
+                        else if ( contig.equals("chr1") ) cmp = ComparisonResult.GREATER_THAN;
+                        else if ( start < start2 ) cmp = ComparisonResult.LESS_THAN;
+                        else if ( start > start2 ) cmp = ComparisonResult.GREATER_THAN;
+                        else if ( stop < stop2 ) cmp = ComparisonResult.LESS_THAN;
+                        else if ( stop > stop2 ) cmp = ComparisonResult.GREATER_THAN;
+
+                        tests.add(new Object[]{g1, g2, cmp});
+                    }
+                }
+            }
+        }
+
+        return tests.toArray(new Object[][]{});
+    }
+
+    private enum ComparisonResult {
+        LESS_THAN(-1),
+        EQUALS(0),
+        GREATER_THAN(1);
+
+        final int cmp;
+
+        private ComparisonResult(int cmp) {
+            this.cmp = cmp;
+        }
+    }
+
+    @Test(dataProvider = "GenomeLocComparisons")
+    public void testGenomeLocComparisons(GenomeLoc g1, GenomeLoc g2, ComparisonResult expected) {
+        Assert.assertEquals(g1.compareTo(g2), expected.cmp, "Comparing genome locs failed");
+        Assert.assertEquals(g1.equals(g2), expected == ComparisonResult.EQUALS);
+        if ( expected == ComparisonResult.EQUALS )
+            Assert.assertEquals(g1.hashCode(), g2.hashCode(), "Equal genome locs don't have the same hash code");
+    }
 }