Merge branch 'master' of gsa2:/humgen/gsa-scr1/chartl/dev/unstable

2012-11-06 10:21:17 -05:00 · 2012-11-06 10:21:17 -05:00 · f78d8fa157
parent ec17bb1a75 bff7803f3a
commit f78d8fa157
34 changed files with 532 additions and 669 deletions
--- a/build.xml
+++ b/build.xml
@ -891,7 +891,7 @@


    <!-- Build a package consisting of all supporting files. Don't call this target directly. Call one of the specific packaging targets below -->
-    <target name="package" depends="dist,stage,require.executable" description="bundle up an executable for distribution">
+    <target name="package" depends="require.clean,dist,stage,require.executable" description="bundle up an executable for distribution">
      <mkdir dir="${package.output.dir}" />
      <xslt destdir="${package.output.dir}" style="${package.xml.dir}/CreatePackager.xsl" useImplicitFileset="false">
          <flattenmapper/>
@ -1011,6 +1011,24 @@
        <delete dir="${pipelinetest.dir}"/>
    </target>

+    <!-- Depend on this target if your target requires a clean working directory but you don't want to depend on clean directly -->
+    <target name="require.clean">
+        <condition property="not.clean">
+            <or>
+                <available file="${build.dir}" />
+                <available file="${lib.dir}" />
+                <available file="${contract.dump.dir}" />
+                <available file="${staging.dir}" />
+                <available file="${dist.dir}" />
+                <available file="${pipelinetest.dir}" />
+                <available file="${javadoc.dir}" />
+                <available file="${scaladoc.dir}" />
+                <available file="${gatkdocs.dir}" />
+            </or>
+        </condition>
+        <fail message="Selected build target requires a clean working directory. Run ant clean and then try again." if="not.clean" />
+    </target>
+

    <!-- ******************************************************************************** -->
    <!-- gsalib                                                                           -->
@ -1174,14 +1192,16 @@
            <echo message="" />
            <echo message="Sting: Running @{testtype} test cases!"/>

+	    <!-- no test is allowed to run for more than 10 hours -->
            <taskdef resource="testngtasks" classpath="${testng.jar}"/>
            <testng outputDir="@{outputdir}"
                    classpathref="${testng.classpath}"
                    haltOnFailure="false" failureProperty="test.failure"
                    verbose="2"
+		            timeout="36000000"
                    workingDir="${basedir}"
                    useDefaultListeners="false"
-                    listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.StingTextReporter,org.uncommons.reportng.HTMLReporter">
+                    listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.TestNGTestTransformer,org.broadinstitute.sting.StingTextReporter,org.uncommons.reportng.HTMLReporter">
                <jvmarg value="-Xmx${test.maxmemory}" />
                <jvmarg value="-ea" />
                <jvmarg value="-Djava.awt.headless=true" />
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@ -26,7 +26,6 @@
 package org.broadinstitute.sting.gatk.walkers.haplotypecaller;

 import com.google.java.contract.Ensures;
-import net.sf.picard.reference.IndexedFastaSequenceFile;
 import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -41,7 +40,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
-import org.broadinstitute.sting.gatk.walkers.genotyper.*;
+import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
+import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
+import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
+import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
@ -212,7 +214,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
    private VariantAnnotatorEngine annotationEngine;

    // fasta reference reader to supplement the edges of the reference sequence
-    private IndexedFastaSequenceFile referenceReader;
+    private CachingIndexedFastaSequenceFile referenceReader;

    // reference base padding size
    private static final int REFERENCE_PADDING = 900;
@ -324,15 +326,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
                }
            }
            if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) {
-                return new ActivityProfileResult(1.0);
+                return new ActivityProfileResult(ref.getLocus(), 1.0);
            }
        }

        if( USE_ALLELES_TRIGGER ) {
-            return new ActivityProfileResult( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
+            return new ActivityProfileResult( ref.getLocus(), tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
        }

-        if( context == null ) { return new ActivityProfileResult(0.0); }
+        if( context == null ) { return new ActivityProfileResult(ref.getLocus(), 0.0); }

        final List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
        noCall.add(Allele.NO_CALL);
@ -369,7 +371,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
        final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
        final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() );

-        return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() );
+        return new ActivityProfileResult( ref.getLocus(), isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() );
    }

    //---------------------------------------------------------------------------------------------------------------
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@ -53,7 +53,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void testHaplotypeCallerSingleSampleSymbolic() {
-        HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "16013a9203367c3d1c4ce1dcdc81ef4a");
+        HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "d86fae2d1b504b422b7b0cfbbdecc2c4");
    }

    private void HCTestIndelQualityScores(String bam, String args, String md5) {
@ -69,8 +69,8 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void HCTestProblematicReadsModifiedInActiveRegions() {
-        final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3";
-        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c306140ad28515ee06c603c225217939"));
+        final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
+        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("f6326adfdf5bc147626b30a89ce06d56"));
        executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java
@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.contexts;

 import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
-import net.sf.samtools.util.StringUtil;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
@ -39,10 +38,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
 * @author hanna
 * @version 0.1
 */
-
 public class ReferenceContext {
-    final public static boolean UPPERCASE_REFERENCE = true;
-
    /**
     * Facilitates creation of new GenomeLocs.
     */
@ -59,7 +55,8 @@ public class ReferenceContext {
    final private GenomeLoc window;

    /**
-     * The bases in the window around the current locus.  If null, then bases haven't been fetched yet
+     * The bases in the window around the current locus.  If null, then bases haven't been fetched yet.
+     * Bases are always upper cased
     */
    private byte[] basesCache = null;

@ -81,7 +78,7 @@ public class ReferenceContext {
         *
         * @return
         */
-        @Ensures("result != null")
+        @Ensures({"result != null"})
        public byte[] getBases();
    }

@ -146,7 +143,9 @@ public class ReferenceContext {
    private void fetchBasesFromProvider() {
        if ( basesCache == null ) {
            basesCache = basesProvider.getBases();
-            if (UPPERCASE_REFERENCE) StringUtil.toUpperCase(basesCache);
+
+            // must be an assertion that only runs when the bases are fetch to run in a reasonable amount of time
+            assert BaseUtils.isUpperCase(basesCache);
        }
    }

@ -194,6 +193,7 @@ public class ReferenceContext {
    /**
     * All the bases in the window from the current base forward to the end of the window.
     */
+    @Ensures({"result != null", "result.length > 0"})
    public byte[] getForwardBases() {
        final byte[] bases = getBases();
        final int mid = locus.getStart() - window.getStart();
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
 import org.broadinstitute.sting.gatk.walkers.DataSource;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
 import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
 import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
@ -46,99 +45,127 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                       T sum) {
        logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));

-        final LocusView locusView = getLocusView( walker, dataProvider );
-        final GenomeLocSortedSet initialIntervals = engine.getIntervals();
+        final LocusView locusView = new AllLocusView(dataProvider);

        final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
        final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
        final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();

-        if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
-            int minStart = Integer.MAX_VALUE;
-            ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
+        int minStart = Integer.MAX_VALUE;
+        final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
+        ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );

-            ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);
+        ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);

-            // We keep processing while the next reference location is within the interval
-            GenomeLoc prevLoc = null;
-            while( locusView.hasNext() ) {
-                final AlignmentContext locus = locusView.next();
-                GenomeLoc location = locus.getLocation();
+        // We keep processing while the next reference location is within the interval
+        GenomeLoc prevLoc = null;
+        while( locusView.hasNext() ) {
+            final AlignmentContext locus = locusView.next();
+            final GenomeLoc location = locus.getLocation();

-                if(prevLoc != null) {
-                    // fill in the active / inactive labels from the stop of the previous location to the start of this location
-                    // TODO refactor to separate function
-                    for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) {
-                        final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
-                        if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
-                            profile.add(fakeLoc, new ActivityProfileResult( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ));
-                        }
-                    }
+            // Grab all the previously unseen reads from this pileup and add them to the massive read list
+            // Note that this must occur before we leave because we are outside the intervals because
+            // reads may occur outside our intervals but overlap them in the future
+            // TODO -- this whole HashSet logic should be changed to a linked list of reads with
+            // TODO -- subsequent pass over them to find the ones overlapping the active regions
+            for( final PileupElement p : locus.getBasePileup() ) {
+                final GATKSAMRecord read = p.getRead();
+                if( !myReads.contains(read) ) {
+                    myReads.add(read);
                }

-                dataProvider.getShard().getReadMetrics().incrementNumIterations();
-
-                // create reference context. Note that if we have a pileup of "extended events", the context will
-                // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
-                final ReferenceContext refContext = referenceView.getReferenceContext(location);
-
-                // Iterate forward to get all reference ordered data covering this location
-                final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
-
-                // Call the walkers isActive function for this locus and add them to the list to be integrated later
-                if( initialIntervals == null || initialIntervals.overlaps( location ) ) {
-                    profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location));
-                }
-
-                // Grab all the previously unseen reads from this pileup and add them to the massive read list
-                for( final PileupElement p : locus.getBasePileup() ) {
-                    final GATKSAMRecord read = p.getRead();
-                    if( !myReads.contains(read) ) {
-                        myReads.add(read);
-                    }
-
-                    // If this is the last pileup for this shard calculate the minimum alignment start so that we know
-                    // which active regions in the work queue are now safe to process
-                    minStart = Math.min(minStart, read.getAlignmentStart());
-                }
-
-                prevLoc = location;
-
-                printProgress(locus.getLocation());
+                // If this is the last pileup for this shard calculate the minimum alignment start so that we know
+                // which active regions in the work queue are now safe to process
+                minStart = Math.min(minStart, read.getAlignmentStart());
            }

-            updateCumulativeMetrics(dataProvider.getShard());
+            // skip this location -- it's not part of our engine intervals
+            // TODO -- this is dangerously slow with current overlaps implementation : GSA-649 / GenomeLocSortedSet.overlaps is crazy slow
+            if ( outsideEngineIntervals(location) )
+                continue;

-            // Take the individual isActive calls and integrate them into contiguous active regions and
-            // add these blocks of work to the work queue
-            // band-pass filter the list of isActive probabilities and turn into active regions
-            final ActivityProfile bandPassFiltered = profile.bandPassFilter();
-            final List<ActiveRegion> activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize );
-
-            // add active regions to queue of regions to process
-            // first check if can merge active regions over shard boundaries
-            if( !activeRegions.isEmpty() ) {
-                if( !workQueue.isEmpty() ) {
-                    final ActiveRegion last = workQueue.getLast();
-                    final ActiveRegion first = activeRegions.get(0);
-                    if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
-                        workQueue.removeLast();
-                        activeRegions.remove(first);
-                        workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) );
-                    }
-                }
-                workQueue.addAll( activeRegions );
+            if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) {
+                // we've move across some interval boundary, restart profile
+                profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
            }

-            logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
+            dataProvider.getShard().getReadMetrics().incrementNumIterations();

-            // now go and process all of the active regions
-            sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig());
+            // create reference context. Note that if we have a pileup of "extended events", the context will
+            // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
+            final ReferenceContext refContext = referenceView.getReferenceContext(location);
+
+            // Iterate forward to get all reference ordered data covering this location
+            final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
+
+            // Call the walkers isActive function for this locus and add them to the list to be integrated later
+            profile.add(walkerActiveProb(walker, tracker, refContext, locus, location));
+
+            prevLoc = location;
+
+            printProgress(locus.getLocation());
        }

+        updateCumulativeMetrics(dataProvider.getShard());
+
+        if ( ! profile.isEmpty() )
+            incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
+
+        // add active regions to queue of regions to process
+        // first check if can merge active regions over shard boundaries
+        if( !activeRegions.isEmpty() ) {
+            if( !workQueue.isEmpty() ) {
+                final ActiveRegion last = workQueue.getLast();
+                final ActiveRegion first = activeRegions.get(0);
+                if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
+                    workQueue.removeLast();
+                    activeRegions.remove(first);
+                    workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) );
+                }
+            }
+            workQueue.addAll( activeRegions );
+        }
+
+        logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
+
+        // now go and process all of the active regions
+        sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig());
+
        return sum;
    }

+    /**
+     * Is the loc outside of the intervals being requested for processing by the GATK?
+     * @param loc
+     * @return
+     */
+    private boolean outsideEngineIntervals(final GenomeLoc loc) {
+        return engine.getIntervals() != null && ! engine.getIntervals().overlaps(loc);
+    }
+
+    /**
+     * Take the individual isActive calls and integrate them into contiguous active regions and
+     * add these blocks of work to the work queue
+     * band-pass filter the list of isActive probabilities and turn into active regions
+     *
+     * @param profile
+     * @param activeRegions
+     * @param activeRegionExtension
+     * @param maxRegionSize
+     * @return
+     */
+    private ActivityProfile incorporateActiveRegions(final ActivityProfile profile,
+                                                     final List<ActiveRegion> activeRegions,
+                                                     final int activeRegionExtension,
+                                                     final int maxRegionSize) {
+        if ( profile.isEmpty() )
+            throw new IllegalStateException("trying to incorporate an empty active profile " + profile);
+
+        final ActivityProfile bandPassFiltered = profile.bandPassFilter();
+        activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ));
+        return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() );
+    }
+

    // --------------------------------------------------------------------------------
    //
@ -150,7 +177,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                                          final RefMetaDataTracker tracker, final ReferenceContext refContext,
                                          final AlignmentContext locus, final GenomeLoc location) {
        if ( walker.hasPresetActiveRegions() ) {
-            return new ActivityProfileResult(walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
+            return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
        } else {
            return walker.isActive( tracker, refContext, locus );
        }
@ -250,30 +277,6 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
        return walker.reduce( x, sum );
    }

-    // --------------------------------------------------------------------------------
-    //
-    // engine interaction code
-    //
-    // --------------------------------------------------------------------------------
-
-    /**
-     * Gets the best view of loci for this walker given the available data.
-     * @param walker walker to interrogate.
-     * @param dataProvider Data which which to drive the locus view.
-     * @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.
-     */
-    private LocusView getLocusView( final Walker<M,T> walker, final LocusShardDataProvider dataProvider ) {
-        final DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
-        if( dataSource == DataSource.READS )
-            return new CoveredLocusView(dataProvider);
-        else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers )
-            return new AllLocusView(dataProvider);
-        else if( dataSource == DataSource.REFERENCE_ORDERED_DATA )
-            return new RodLocusView(dataProvider);
-        else
-            throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
-    }
-
    /**
     * Special function called in LinearMicroScheduler to empty out the work queue.
     * Ugly for now but will be cleaned up when we push this functionality more into the engine
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java
@ -57,7 +57,7 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
        int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup());

        // note the linear probability scale
-        return new ActivityProfileResult(Math.min(depth / coverageThreshold, 1));
+        return new ActivityProfileResult(ref.getLocus(), Math.min(depth / coverageThreshold, 1));

    }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
@ -25,7 +25,7 @@

 package org.broadinstitute.sting.gatk.walkers.indels;

-import net.sf.picard.reference.IndexedFastaSequenceFile;
+import com.google.java.contract.Requires;
 import net.sf.samtools.*;
 import net.sf.samtools.util.RuntimeIOException;
 import net.sf.samtools.util.SequenceUtil;
@ -236,6 +236,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
     * then extensions (".bam" or ".sam") will be stripped from the input file names and the provided string value will be pasted on instead; 2) if the
     * value ends with a '.map' (e.g. input_output.map), then the two-column tab-separated file with the specified name must exist and list unique output
     * file name (2nd column) for each input file name (1st column).
+     *
+     * Note that some GATK arguments do NOT work in conjunction with nWayOut (e.g. --disable_bam_indexing).
     */
    @Argument(fullName="nWayOut", shortName="nWayOut", required=false, doc="Generate one output file for each input (-I) bam file")
    protected String N_WAY_OUT = null;
@ -274,7 +276,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
    protected String OUT_SNPS = null;

    // fasta reference reader to supplement the edges of the reference sequence
-    private IndexedFastaSequenceFile referenceReader;
+    private CachingIndexedFastaSequenceFile referenceReader;

    // the intervals input by the user
    private Iterator<GenomeLoc> intervals = null;
@ -1601,7 +1603,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {

        public List<GATKSAMRecord> getReads() { return reads; }

-        public byte[] getReference(IndexedFastaSequenceFile referenceReader) {
+        @Requires("referenceReader.isUppercasingBases()")
+        public byte[] getReference(CachingIndexedFastaSequenceFile referenceReader) {
            // set up the reference if we haven't done so yet
            if ( reference == null ) {
                // first, pad the reference to handle deletions in narrow windows (e.g. those with only 1 read)
@ -1609,7 +1612,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
                int padRight = Math.min(loc.getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength());
                loc = getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), padLeft, padRight);
                reference = referenceReader.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases();
-                StringUtil.toUpperCase(reference);
            }

            return reference;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java
@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
-
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
-import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
-import org.broadinstitute.sting.utils.collections.Pair;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-
-/**
- * @author rpoplin
- * @since Apr 6, 2010
- */
-
-//@Analysis(name = "Variant Quality Score", description = "Shows various stats of sets of variants binned by variant quality score")
-@Deprecated
-public class VariantQualityScore {
-    // TODO - this should really be a stratification
-
-//    public class VariantQualityScore extends VariantEvaluator {
-//
-//    // a mapping from quality score histogram bin to Ti/Tv ratio
-//    @DataPoint(description = "the Ti/Tv ratio broken out by variant quality")
-//    TiTvStats titvStats = null;
-//
-//    @DataPoint(description = "average variant quality for each allele count")
-//    AlleleCountStats alleleCountStats = null;
-//
-//    static class TiTvStats extends TableType {
-//        final static int NUM_BINS = 20;
-//        final HashMap<Integer, Pair<Long,Long>> qualByIsTransition = new HashMap<Integer, Pair<Long,Long>>(); // A hashMap holds all the qualities until we are able to bin them appropriately
-//        final long transitionByQuality[] = new long[NUM_BINS];
-//        final long transversionByQuality[] = new long[NUM_BINS];
-//        final double titvByQuality[] = new double[NUM_BINS]; // the final ti/tv sets that get reported out
-//
-//        public Object[] getRowKeys() {
-//            return new String[]{"sample"};
-//        }
-//
-//        public Object[] getColumnKeys() {
-//            final String columnKeys[] = new String[NUM_BINS];
-//            for( int iii = 0; iii < NUM_BINS; iii++ ) {
-//                columnKeys[iii] = "titvBin" + iii;
-//            }
-//            return columnKeys;
-//        }
-//
-//        public String getCell(int x, int y) {
-//            return String.valueOf(titvByQuality[y]);
-//        }
-//
-//        public String toString() {
-//            StringBuffer returnString = new StringBuffer();
-//            // output the ti/tv array
-//            returnString.append("titvByQuality: ");
-//            for( int iii = 0; iii < NUM_BINS; iii++ ) {
-//                returnString.append(titvByQuality[iii]);
-//                returnString.append(" ");
-//            }
-//            return returnString.toString();
-//        }
-//
-//        public void incrValue( final double qual, final boolean isTransition ) {
-//            final Integer qualKey = Math.round((float) qual);
-//            final long numTransition = (isTransition ? 1L : 0L);
-//            final long numTransversion = (isTransition ? 0L : 1L);
-//            if( qualByIsTransition.containsKey(qualKey) ) {
-//                Pair<Long,Long> transitionPair = qualByIsTransition.get(qualKey);
-//                transitionPair.set(transitionPair.getFirst() + numTransition, transitionPair.getSecond() + numTransversion);
-//                qualByIsTransition.put(qualKey, transitionPair);
-//            } else {
-//                qualByIsTransition.put(qualKey, new Pair<Long,Long>(numTransition,numTransversion));
-//            }
-//        }
-//
-//        public void organizeTiTvTables() {
-//            for( int iii = 0; iii < NUM_BINS; iii++ ) {
-//                transitionByQuality[iii] = 0L;
-//                transversionByQuality[iii] = 0L;
-//                titvByQuality[iii] = 0.0;
-//            }
-//
-//            int maxQual = 0;
-//
-//            // Calculate the maximum quality score in order to normalize and histogram
-//            for( final Integer qual : qualByIsTransition.keySet() ) {
-//                if( qual > maxQual ) {
-//                    maxQual = qual;
-//                }
-//            }
-//
-//            final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1));
-//
-//            for( final Integer qual : qualByIsTransition.keySet() ) {
-//                final int index = (int)Math.floor( ((double) qual) / binSize );
-//                if( index >= 0 ) { // BUGBUG: why is there overflow here?
-//                    Pair<Long,Long> transitionPair = qualByIsTransition.get(qual);
-//                    transitionByQuality[index] += transitionPair.getFirst();
-//                    transversionByQuality[index] += transitionPair.getSecond();
-//                }
-//            }
-//
-//            for( int iii = 0; iii < NUM_BINS; iii++ ) {
-//                if( transitionByQuality[iii] + transversionByQuality[iii] > 800L ) { // need to have a sufficient number of variants to get a useful Ti/Tv ratio
-//                    titvByQuality[iii] = ((double) transitionByQuality[iii]) / ((double) transversionByQuality[iii]);
-//                } else {
-//                    titvByQuality[iii] = 0.0;
-//                }
-//            }
-//
-//        }
-//    }
-//
-//    class AlleleCountStats extends TableType {
-//        final HashMap<Integer, ArrayList<Double>> qualityListMap = new HashMap<Integer, ArrayList<Double>>();
-//        final HashMap<Integer, Double> qualityMap = new HashMap<Integer, Double>();
-//
-//        public Object[] getRowKeys() {
-//            final int NUM_BINS = qualityListMap.keySet().size();
-//            final String rowKeys[] = new String[NUM_BINS];
-//            int iii = 0;
-//            for( final Integer key : qualityListMap.keySet() ) {
-//                rowKeys[iii] = "AC" + key;
-//                iii++;
-//            }
-//            return rowKeys;
-//
-//        }
-//
-//        public Object[] getColumnKeys() {
-//            return new String[]{"alleleCount","avgQual"};
-//        }
-//
-//        public String getCell(int x, int y) {
-//            int iii = 0;
-//            for( final Integer key : qualityListMap.keySet() ) {
-//                if(iii == x) {
-//                    if(y == 0) { return String.valueOf(key); }
-//                    else { return String.valueOf(qualityMap.get(key)); }
-//                }
-//                iii++;
-//            }
-//            return null;
-//        }
-//
-//        public String toString() {
-//            String returnString = "";
-//            // output the quality map
-//            returnString += "AlleleCountStats: ";
-//            //for( int iii = 0; iii < NUM_BINS; iii++ ) {
-//            //    returnString += titvByQuality[iii] + " ";
-//            //}
-//            return returnString;
-//        }
-//
-//        public void incrValue( final double qual, final int alleleCount ) {
-//            ArrayList<Double> list = qualityListMap.get(alleleCount);
-//            if(list==null) { list = new ArrayList<Double>(); }
-//            list.add(qual);
-//            qualityListMap.put(alleleCount, list);
-//        }
-//
-//        public void organizeAlleleCountTables() {
-//            for( final Integer key : qualityListMap.keySet() ) {
-//                final ArrayList<Double> list = qualityListMap.get(key);
-//                double meanQual = 0.0;
-//                final double numQuals = (double)list.size();
-//                for( Double qual : list ) {
-//                    meanQual += qual / numQuals;
-//                }
-//                qualityMap.put(key, meanQual);
-//            }
-//        }
-//    }
-//
-//    //public VariantQualityScore(VariantEvalWalker parent) {
-//        //super(parent);
-//    //}
-//
-//    public String getName() {
-//        return "VariantQualityScore";
-//    }
-//
-//    public int getComparisonOrder() {
-//        return 1;   // we only need to see each eval track
-//    }
-//
-//    public String toString() {
-//        return getName();
-//    }
-//
-//    public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-//        final String interesting = null;
-//
-//        if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphicInSamples() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
-//            if( titvStats == null ) { titvStats = new TiTvStats(); }
-//            titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));
-//
-//            if( alleleCountStats == null ) { alleleCountStats = new AlleleCountStats(); }
-//            int alternateAlleleCount = 0;
-//            for (final Allele a : eval.getAlternateAlleles()) {
-//                alternateAlleleCount += eval.getCalledChrCount(a);
-//            }
-//            alleleCountStats.incrValue(eval.getPhredScaledQual(), alternateAlleleCount);
-//        }
-//
-//        return interesting; // This module doesn't capture any interesting sites, so return null
-//    }
-//
-//    public void finalizeEvaluation() {
-//        if( titvStats != null ) {
-//            titvStats.organizeTiTvTables();
-//        }
-//        if( alleleCountStats != null ) {
-//            alleleCountStats.organizeAlleleCountTables();
-//        }
-//    }
-}
--- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
@ -24,33 +24,6 @@ public class BaseUtils {
    public final static byte[] BASES = {'A', 'C', 'G', 'T'};
    public final static byte[] EXTENDED_BASES = {'A', 'C', 'G', 'T', 'N', 'D'};

-    public enum Base {
-        A('A', 0),
-        C('C', 1),
-        G('G', 2),
-        T('T', 3);
-
-        byte b;
-        int index;
-
-        private Base(char base, int index) {
-            this.b = (byte) base;
-            this.index = index;
-        }
-
-        public byte getBase() { return b; }
-
-        public char getBaseAsChar() { return (char) b; }
-
-        public int getIndex() { return index; }
-
-        public boolean sameBase(byte o) { return b == o; }
-
-        public boolean sameBase(char o) { return b == (byte) o; }
-
-        public boolean sameBase(int i) { return index == i; }
-    }
-
    static private final int[] baseIndexMap = new int[256];
    static {
        Arrays.fill(baseIndexMap, -1);
@ -130,6 +103,17 @@ public class BaseUtils {
        return false;
    }

+    public static boolean isUpperCase(final byte[] bases) {
+        for ( byte base : bases )
+            if ( ! isUpperCase(base) )
+                return false;
+        return true;
+    }
+
+    public static boolean isUpperCase(final byte base) {
+        return base >= 'A' && base <= 'Z';
+    }
+
    /**
     * Converts a IUPAC nucleotide code to a pair of bases
     *
@ -271,59 +255,6 @@ public class BaseUtils {
        }
    }

-    /**
-     * Converts a base index to a base index representing its cross-talk partner
-     *
-     * @param baseIndex 0, 1, 2, 3
-     * @return 1, 0, 3, 2, or -1 if the index can't be understood
-     */
-    static public int crossTalkPartnerIndex(int baseIndex) {
-        switch (baseIndex) {
-            case 0:
-                return 1; // A -> C
-            case 1:
-                return 0; // C -> A
-            case 2:
-                return 3; // G -> T
-            case 3:
-                return 2; // T -> G
-            default:
-                return -1;
-        }
-    }
-
-    /**
-     * Converts a base to the base representing its cross-talk partner
-     *
-     * @param base [AaCcGgTt]
-     * @return C, A, T, G, or '.' if the base can't be understood
-     */
-    @Deprecated
-    static public char crossTalkPartnerBase(char base) {
-        return (char) baseIndexToSimpleBase(crossTalkPartnerIndex(simpleBaseToBaseIndex(base)));
-    }
-
-    /**
-     * Return the complement of a base index.
-     *
-     * @param baseIndex the base index (0:A, 1:C, 2:G, 3:T)
-     * @return the complementary base index
-     */
-    static public byte complementIndex(int baseIndex) {
-        switch (baseIndex) {
-            case 0:
-                return 3; // a -> t
-            case 1:
-                return 2; // c -> g
-            case 2:
-                return 1; // g -> c
-            case 3:
-                return 0; // t -> a
-            default:
-                return -1; // wtf?
-        }
-    }
-
    /**
     * Return the complement (A <-> T or C <-> G) of a base, or the specified base if it can't be complemented (i.e. an ambiguous base).
     *
@ -350,7 +281,7 @@ public class BaseUtils {
    }

    @Deprecated
-    static public char simpleComplement(char base) {
+    static private char simpleComplement(char base) {
        return (char) simpleComplement((byte) base);
    }

@ -370,22 +301,6 @@ public class BaseUtils {
        return rcbases;
    }

-    /**
-     * Complement a byte array of bases (that is, chars casted to bytes, *not* base indices in byte form)
-     *
-     * @param bases the byte array of bases
-     * @return the complement of the base byte array
-     */
-    static public byte[] simpleComplement(byte[] bases) {
-        byte[] rcbases = new byte[bases.length];
-
-        for (int i = 0; i < bases.length; i++) {
-            rcbases[i] = simpleComplement(bases[i]);
-        }
-
-        return rcbases;
-    }
-
    /**
     * Reverse complement a char array of bases
     *
@ -403,23 +318,6 @@ public class BaseUtils {
        return rcbases;
    }

-    /**
-     * Complement a char array of bases
-     *
-     * @param bases the char array of bases
-     * @return the complement of the base char array
-     */
-    @Deprecated
-    static public char[] simpleComplement(char[] bases) {
-        char[] rcbases = new char[bases.length];
-
-        for (int i = 0; i < bases.length; i++) {
-            rcbases[i] = simpleComplement(bases[i]);
-        }
-
-        return rcbases;
-    }
-
    /**
     * Reverse complement a String of bases.  Preserves ambiguous bases.
     *
@ -431,17 +329,6 @@ public class BaseUtils {
        return new String(simpleReverseComplement(bases.getBytes()));
    }

-    /**
-     * Complement a String of bases.  Preserves ambiguous bases.
-     *
-     * @param bases the String of bases
-     * @return the complement of the String
-     */
-    @Deprecated
-    static public String simpleComplement(String bases) {
-        return new String(simpleComplement(bases.getBytes()));
-    }
-
    /**
     * Returns the uppercased version of the bases
     *
@ -543,82 +430,4 @@ public class BaseUtils {

        return randomBaseIndex;
    }
-
-    /**
-     * Return a random base (A, C, G, T).
-     *
-     * @return a random base (A, C, G, T)
-     */
-    @Deprecated
-    static public byte getRandomBase() {
-        return getRandomBase('.');
-    }
-
-    /**
-     * Return a random base, excluding some base.
-     *
-     * @param excludeBase the base to exclude
-     * @return a random base, excluding the one specified (A, C, G, T)
-     */
-    @Deprecated
-    static public byte getRandomBase(char excludeBase) {
-        return BaseUtils.baseIndexToSimpleBase(getRandomBaseIndex(BaseUtils.simpleBaseToBaseIndex(excludeBase)));
-    }
-
-    /**
-     * Computes the smallest period >= minPeriod for the specified string. The period is defined as such p,
-     * that for all  i = 0... seq.length-1,  seq[ i % p ] = seq[i] (or equivalently seq[i] = seq[i+p] for i=0...seq.length-1-p).
-     * The sequence does <i>not</i> have to contain whole number of periods. For instance, "ACACACAC" has a period
-     * of 2 (it has a period of 4 as well), and so does
-     * "ACACA"; similarly, smallest periods of "CTCCTC", "CTCCT", and "CTCC" are all equal to 3. The "trivial" period is
-     * the length of the string itself, and it will always be returned if no smaller period can be found in the specified period range
-     * or if specified minPeriod is greater than the sequence length.
-     *
-     * @param seq
-     * @return
-     */
-    public static int sequencePeriod(byte[] seq, int minPeriod) {
-        int period = (minPeriod > seq.length ? seq.length : minPeriod);
-        // we assume that bases [0,period-1] repeat themselves and check this assumption
-        // until we find correct period
-
-        for (int pos = period; pos < seq.length; pos++) {
-
-            int offset = pos % period; // we are currenlty 'offset' bases into the putative repeat of period 'period'
-            // if our current hypothesis holds, base[pos] must be the same as base[offset]
-
-            if (Character.toUpperCase(seq[pos]) != Character.toUpperCase(seq[offset])) {
-
-                // period we have been trying so far does not work.
-                // two possibilities:
-                // A) offset = 0, i.e. current position pos must be start of the next repeat, but it is not;
-                //      in this case only bases from start up to the current one, inclusive, may form a repeat, if at all;
-                //       so period is at least pos+1 (remember, pos is 0-based), then on the next loop re-entrance
-                //      pos will be autoincremented and we will be checking next base
-                // B) offset != 0, i.e. the current base breaks the repeat, but maybe it starts a new one?
-                //     hence we should first check if it matches the first base of the sequence, and to do that
-                //     we set period to pos  (thus trying the hypothesis that bases from start up to the current one,
-                //     non-inclusive are repeated hereafter), and decrement pos (this will re-test current base against the first base
-                // on the next loop re-entrance after pos is autoincremented)
-                if (offset == 0)
-                    period = pos + 1;
-                else
-                    period = pos--;
-
-            }
-        }
-        return period;
-    }
 }
-
-/* code snippet for testing sequencePeriod():
- * 
- *     	String str = "CCTTG";
-    	int p = 0;
-    	System.out.print("Periods of " + str +" are:");
-    	while ( p < str.length() ) {
-    		p = sequencePeriod(str, p+1);
-        	System.out.print(" "+p);
-    	}
-    	System.out.println(); System.exit(1);
-*/
--- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java
@ -1,11 +1,11 @@
 package org.broadinstitute.sting.utils.activeregion;

-import net.sf.picard.reference.IndexedFastaSequenceFile;
-import net.sf.samtools.util.StringUtil;
+import com.google.java.contract.Requires;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.HasGenomeLocation;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
+import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.util.ArrayList;
@ -54,27 +54,31 @@ public class ActiveRegion implements HasGenomeLocation {

    public ArrayList<GATKSAMRecord> getReads() { return reads; }

-    public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader ) {
+    @Requires("referenceReader.isUppercasingBases()")
+    public byte[] getActiveRegionReference( final CachingIndexedFastaSequenceFile referenceReader ) {
        return getActiveRegionReference(referenceReader, 0);
    }

-    public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
+    @Requires("referenceReader.isUppercasingBases()")
+    public byte[] getActiveRegionReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding ) {
        return getReference( referenceReader, padding, extendedLoc );
    }

-    public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) {
+    @Requires("referenceReader.isUppercasingBases()")
+    public byte[] getFullReference( final CachingIndexedFastaSequenceFile referenceReader ) {
        return getFullReference(referenceReader, 0);
    }

-    public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
+    @Requires("referenceReader.isUppercasingBases()")
+    public byte[] getFullReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding ) {
        return getReference( referenceReader, padding, fullExtentReferenceLoc );
    }

-    private byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
+    @Requires("referenceReader.isUppercasingBases()")
+    private byte[] getReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
        final byte[] reference =  referenceReader.getSubsequenceAt( genomeLoc.getContig(),
                Math.max(1, genomeLoc.getStart() - padding),
                Math.min(referenceReader.getSequenceDictionary().getSequence(genomeLoc.getContig()).getSequenceLength(), genomeLoc.getStop() + padding) ).getBases();
-        StringUtil.toUpperCase(reference);
        return reference;
    }

--- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java
@ -24,11 +24,11 @@

 package org.broadinstitute.sting.utils.activeregion;

+import com.google.java.contract.Requires;
 import org.apache.commons.lang.ArrayUtils;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

 import java.util.ArrayList;
 import java.util.Collections;
@ -45,6 +45,7 @@ public class ActivityProfile {
    final GenomeLocParser parser;
    final boolean presetRegions;
    GenomeLoc regionStartLoc = null;
+    GenomeLoc regionStopLoc = null;
    final List<ActivityProfileResult> isActiveList;
    private static final int FILTER_SIZE = 80;
    private static final double[] GaussianKernel;
@ -71,19 +72,49 @@ public class ActivityProfile {
        this.regionStartLoc = regionStartLoc;
    }

-    public void add(final GenomeLoc loc, final ActivityProfileResult result) {
-        if ( loc.size() != 1 )
-            throw new ReviewedStingException("Bad add call to ActivityProfile: loc " + loc + " size != 1" );
-        isActiveList.add(result);
-        if( regionStartLoc == null ) {
+    @Override
+    public String toString() {
+        return "ActivityProfile{" +
+                "start=" + regionStartLoc +
+                ", stop=" + regionStopLoc +
+                '}';
+    }
+
+    /**
+     * Add the next ActivityProfileResult to this profile.
+     *
+     * Must be contiguous with the previously added result, or an IllegalArgumentException will be thrown
+     *
+     * @param result a well-formed ActivityProfileResult result to incorporate into this profile
+     */
+    @Requires("result != null")
+    public void add(final ActivityProfileResult result) {
+        final GenomeLoc loc = result.getLoc();
+
+        if ( regionStartLoc == null ) {
            regionStartLoc = loc;
+            regionStopLoc = loc;
+        } else {
+            if ( regionStopLoc.getStart() != loc.getStart() - 1 )
+                throw new IllegalArgumentException("Bad add call to ActivityProfile: loc " + loc + " not immediate after last loc " + regionStopLoc );
+            regionStopLoc = loc;
        }
+
+        isActiveList.add(result);
    }

    public int size() {
        return isActiveList.size();
    }

+    public boolean isEmpty() {
+        return isActiveList.isEmpty();
+    }
+
+    public boolean hasPresetRegions() {
+        return presetRegions;
+    }
+
    /**
     * Band pass this ActivityProfile, producing a new profile that's band pass filtered
     * @return a new ActivityProfile that's the band-pass filtered version of this profile
@ -104,14 +135,21 @@ public class ActivityProfile {
            }
            iii++;
        }
-        final double[] filteredProbArray = new double[activeProbArray.length];
+
+        final double[] filteredProbArray;
        if( !presetRegions ) {
+            // if we aren't using preset regions, actually apply the band pass filter for activeProbArray into filteredProbArray
+            filteredProbArray = new double[activeProbArray.length];
            for( iii = 0; iii < activeProbArray.length; iii++ ) {
                final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii));
                final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1));
                filteredProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel);
            }
+        } else {
+            // otherwise we simply use the activeProbArray directly
+            filteredProbArray = activeProbArray;
        }
+
        iii = 0;
        for( final double prob : filteredProbArray ) {
            final ActivityProfileResult result = isActiveList.get(iii++);
@ -119,6 +157,7 @@ public class ActivityProfile {
            result.resultState = ActivityProfileResult.ActivityProfileResultState.NONE;
            result.resultValue = null;
        }
+
        return new ActivityProfile(parser, presetRegions, isActiveList, regionStartLoc);
    }

@ -166,6 +205,7 @@ public class ActivityProfile {
    private final List<ActiveRegion> createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) {
        return createActiveRegion(isActive, curStart, curEnd, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());
    }
+
    private final List<ActiveRegion> createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize, final List<ActiveRegion> returnList) {
        if( !isActive || curEnd - curStart < maxRegionSize ) {
            final GenomeLoc loc = parser.createGenomeLoc(regionStartLoc.getContig(), regionStartLoc.getStart() + curStart, regionStartLoc.getStart() + curEnd);
--- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java
@ -1,12 +1,16 @@
 package org.broadinstitute.sting.utils.activeregion;

+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+import org.broadinstitute.sting.utils.GenomeLoc;
+
 /**
 * Created with IntelliJ IDEA.
 * User: rpoplin
 * Date: 7/27/12
 */
-
 public class ActivityProfileResult {
+    private GenomeLoc loc;
    public double isActiveProb;
    public ActivityProfileResultState resultState;
    public Number resultValue;
@ -16,16 +20,52 @@ public class ActivityProfileResult {
        HIGH_QUALITY_SOFT_CLIPS
    }

-    public ActivityProfileResult( final double isActiveProb ) {
-        this.isActiveProb = isActiveProb;
-        this.resultState = ActivityProfileResultState.NONE;
-        this.resultValue = null;
+    /**
+     * Create a new ActivityProfileResult at loc with probability of being active of isActiveProb
+     *
+     * @param loc the position of the result profile (for debugging purposes)
+     * @param isActiveProb the probability of being active (between 0 and 1)
+     */
+    @Requires({"loc != null", "isActiveProb >= 0.0 && isActiveProb <= 1.0"})
+    public ActivityProfileResult( final GenomeLoc loc, final double isActiveProb ) {
+        this(loc, isActiveProb, ActivityProfileResultState.NONE, null);
    }

-    public ActivityProfileResult( final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) {
+    /**
+     * Create a new ActivityProfileResult at loc with probability of being active of isActiveProb that maintains some
+     * information about the result state and value (TODO RYAN -- what do these mean?)
+     *
+     * @param loc the position of the result profile (for debugging purposes)
+     * @param isActiveProb the probability of being active (between 0 and 1)
+     */
+    @Requires({"loc != null", "isActiveProb >= 0.0 && isActiveProb <= 1.0"})
+    public ActivityProfileResult( final GenomeLoc loc, final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) {
+        // make sure the location of that activity profile is 1
+        if ( loc.size() != 1 )
+            throw new IllegalArgumentException("Location for an ActivityProfileResult must have to size 1 bp but saw " + loc);
+
+        this.loc = loc;
        this.isActiveProb = isActiveProb;
        this.resultState = resultState;
        this.resultValue = resultValue;
    }

+    /**
+     * Get the genome loc associated with the ActivityProfileResult
+     * @return the location of this result
+     */
+    @Ensures("result != null")
+    public GenomeLoc getLoc() {
+        return loc;
+    }
+
+    @Override
+    public String toString() {
+        return "ActivityProfileResult{" +
+                "loc=" + loc +
+                ", isActiveProb=" + isActiveProb +
+                ", resultState=" + resultState +
+                ", resultValue=" + resultValue +
+                '}';
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
@ -587,7 +587,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>

        int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR);
        if ( nParts != genotypeParts.length )
-            generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records", lineNo);
+            generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records at " + chr + ":" + pos, lineNo);

        ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nParts);

--- a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java
+++ b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java
@ -29,6 +29,7 @@ import net.sf.picard.reference.FastaSequenceIndex;
 import net.sf.picard.reference.IndexedFastaSequenceFile;
 import net.sf.picard.reference.ReferenceSequence;
 import net.sf.samtools.SAMSequenceRecord;
+import net.sf.samtools.util.StringUtil;
 import org.apache.log4j.Priority;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

@ -40,6 +41,8 @@ import java.util.Arrays;
 * A caching version of the IndexedFastaSequenceFile that avoids going to disk as often as the raw indexer.
 *
 * Thread-safe!  Uses a thread-local cache
+ *
+ * Automatically upper-cases the bases coming in, unless they the flag preserveCase is explicitly set
 */
 public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
    protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(CachingIndexedFastaSequenceFile.class);
@ -54,10 +57,15 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
    public static final long DEFAULT_CACHE_SIZE = 1000000;

    /** The cache size of this CachingIndexedFastaSequenceFile */
-    final long cacheSize;
+    private final long cacheSize;

    /** When we have a cache miss at position X, we load sequence from X - cacheMissBackup */
-    final long cacheMissBackup;
+    private final long cacheMissBackup;
+
+    /**
+     * If true, we will preserve the case of the original base in the genome, not
+     */
+    private final boolean preserveCase;

    // information about checking efficiency
    long cacheHits = 0;
@ -84,37 +92,17 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
    /**
     * Same as general constructor but allows one to override the default cacheSize
     *
-     * @param fasta
-     * @param index
-     * @param cacheSize
+     * @param fasta the file we will read our FASTA sequence from.
+     * @param index the index of the fasta file, used for efficient random access
+     * @param cacheSize the size in bp of the cache we will use for this reader
+     * @param preserveCase If true, we will keep the case of the underlying bases in the FASTA, otherwise everything is converted to upper case
     */
-    public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index, final long cacheSize) {
+    public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index, final long cacheSize, final boolean preserveCase) {
        super(fasta, index);
        if ( cacheSize < 0 ) throw new IllegalArgumentException("cacheSize must be > 0");
        this.cacheSize = cacheSize;
        this.cacheMissBackup = Math.max(cacheSize / 1000, 1);
-    }
-
-    /**
-     * Open the given indexed fasta sequence file.  Throw an exception if the file cannot be opened.
-     *
-     * @param fasta The file to open.
-     * @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk.
-     * @throws java.io.FileNotFoundException If the fasta or any of its supporting files cannot be found.
-     */
-    public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index) {
-        this(fasta, index, DEFAULT_CACHE_SIZE);
-    }
-
-    /**
-     * Open the given indexed fasta sequence file.  Throw an exception if the file cannot be opened.
-     *
-     * Looks for a index file for fasta on disk
-     *
-     * @param fasta The file to open.
-     */
-    public CachingIndexedFastaSequenceFile(final File fasta) throws FileNotFoundException {
-        this(fasta, DEFAULT_CACHE_SIZE);
+        this.preserveCase = preserveCase;
    }

    /**
@ -124,12 +112,76 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
     * Uses provided cacheSize instead of the default
     *
     * @param fasta The file to open.
+     * @param cacheSize the size of the cache to use in this CachingIndexedFastaReader, must be >= 0
+     * @param preserveCase If true, we will keep the case of the underlying bases in the FASTA, otherwise everything is converted to upper case
     */
-    public CachingIndexedFastaSequenceFile(final File fasta, final long cacheSize ) throws FileNotFoundException {
+    public CachingIndexedFastaSequenceFile(final File fasta, final long cacheSize, final boolean preserveCase ) throws FileNotFoundException {
        super(fasta);
        if ( cacheSize < 0 ) throw new IllegalArgumentException("cacheSize must be > 0");
        this.cacheSize = cacheSize;
        this.cacheMissBackup = Math.max(cacheSize / 1000, 1);
+        this.preserveCase = preserveCase;
+    }
+
+//    /**
+//     * Open the given indexed fasta sequence file.  Throw an exception if the file cannot be opened.
+//     *
+//     * @param fasta The file to open.
+//     * @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk.
+//     * @throws java.io.FileNotFoundException If the fasta or any of its supporting files cannot be found.
+//     */
+//    public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index) {
+//        this(fasta, index, DEFAULT_CACHE_SIZE);
+//    }
+
+    /**
+     * Same as general constructor but allows one to override the default cacheSize
+     *
+     * By default, this CachingIndexedFastaReader converts all incoming bases to upper case
+     *
+     * @param fasta the file we will read our FASTA sequence from.
+     * @param index the index of the fasta file, used for efficient random access
+     * @param cacheSize the size in bp of the cache we will use for this reader
+     */
+    public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index, final long cacheSize) {
+        this(fasta, index, cacheSize, false);
+    }
+
+    /**
+     * Open the given indexed fasta sequence file.  Throw an exception if the file cannot be opened.
+     *
+     * Looks for a index file for fasta on disk.
+     * This CachingIndexedFastaReader will convert all FASTA bases to upper cases under the hood
+     *
+     * @param fasta The file to open.
+     */
+    public CachingIndexedFastaSequenceFile(final File fasta) throws FileNotFoundException {
+        this(fasta, false);
+    }
+
+    /**
+     * Open the given indexed fasta sequence file.  Throw an exception if the file cannot be opened.
+     *
+     * Looks for a index file for fasta on disk
+     *
+     * @param fasta The file to open.
+     * @param preserveCase If true, we will keep the case of the underlying bases in the FASTA, otherwise everything is converted to upper case
+     */
+    public CachingIndexedFastaSequenceFile(final File fasta, final boolean preserveCase) throws FileNotFoundException {
+        this(fasta, DEFAULT_CACHE_SIZE, preserveCase);
+    }
+
+    /**
+     * Open the given indexed fasta sequence file.  Throw an exception if the file cannot be opened.
+     *
+     * Looks for a index file for fasta on disk
+     * Uses provided cacheSize instead of the default
+     *
+     * @param fasta The file to open.
+     * @param cacheSize the size of the cache to use in this CachingIndexedFastaReader, must be >= 0
+     */
+    public CachingIndexedFastaSequenceFile(final File fasta, final long cacheSize ) throws FileNotFoundException {
+        this(fasta, cacheSize, false);
    }

    /**
@ -168,6 +220,25 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
        return cacheSize;
    }

+    /**
+     * Is this CachingIndexedFastaReader keeping the original case of bases in the fasta, or is
+     * everything being made upper case?
+     *
+     * @return true if the bases coming from this reader are in the original case in the fasta, false if they are all upper cased
+     */
+    public boolean isPreservingCase() {
+        return preserveCase;
+    }
+
+    /**
+     * Is uppercasing bases?
+     *
+     * @return true if bases coming from this CachingIndexedFastaSequenceFile are all upper cased, false if this reader are in the original case in the fasta
+     */
+    public boolean isUppercasingBases() {
+        return ! isPreservingCase();
+    }
+
    /**
     * Gets the subsequence of the contig in the range [start,stop]
     *
@ -177,8 +248,10 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
     * @param contig Contig whose subsequence to retrieve.
     * @param start inclusive, 1-based start of region.
     * @param stop inclusive, 1-based stop of region.
-     * @return The partial reference sequence associated with this range.
+     * @return The partial reference sequence associated with this range.  If preserveCase is false, then
+     *         all of the bases in the ReferenceSequence returned by this method will be upper cased.
     */
+    @Override
    public ReferenceSequence getSubsequenceAt( final String contig, final long start, final long stop ) {
        final ReferenceSequence result;
        final Cache myCache = cache.get();
@ -186,6 +259,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
        if ( (stop - start) >= cacheSize ) {
            cacheMisses++;
            result = super.getSubsequenceAt(contig, start, stop);
+            if ( ! preserveCase ) StringUtil.toUpperCase(result.getBases());
        } else {
            // todo -- potential optimization is to check if contig.name == contig, as this in generally will be true
            SAMSequenceRecord contigInfo = super.getSequenceDictionary().getSequence(contig);
@ -198,7 +272,9 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
                myCache.start = Math.max(start - cacheMissBackup, 0);
                myCache.stop  = Math.min(start + cacheSize + cacheMissBackup, contigInfo.getSequenceLength());
                myCache.seq   = super.getSubsequenceAt(contig, myCache.start, myCache.stop);
-                //System.out.printf("New cache at %s %d-%d%n", contig, cacheStart, cacheStop);
+
+                // convert all of the bases in the sequence to upper case if we aren't preserving cases
+                if ( ! preserveCase ) StringUtil.toUpperCase(myCache.seq.getBases());
            } else {
                cacheHits++;
            }
@ -215,8 +291,10 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
            }
        }

+        // for debugging -- print out our efficiency if requested
        if ( PRINT_EFFICIENCY && (getCacheHits() + getCacheMisses()) % PRINT_FREQUENCY == 0 )
            printEfficiency(Priority.INFO);
+
        return result;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java
@ -76,7 +76,6 @@ public class BCF2FieldWriterManager {
        if ( map.containsKey(field) )
            throw new ReviewedStingException("BUG: field " + field + " already seen in VCFHeader while building BCF2 field encoders");
        map.put(field, writer);
-        if ( logger.isDebugEnabled() ) logger.debug(writer);
    }

    // -----------------------------------------------------------------
--- a/public/java/test/org/broadinstitute/sting/TestNGTestTransformer.java
+++ b/public/java/test/org/broadinstitute/sting/TestNGTestTransformer.java
@ -0,0 +1,37 @@
+package org.broadinstitute.sting;
+
+import org.apache.log4j.Logger;
+import org.testng.IAnnotationTransformer;
+import org.testng.annotations.ITestAnnotation;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+
+/**
+ * Provide default @Test values for GATK testng tests.
+ *
+ * Currently only sets the maximum runtime to 10 minutes, if it's not been specified.
+ *
+ * See http://beust.com/weblog/2006/10/18/annotation-transformers-in-java/
+ *
+ * @author depristo
+ * @since 10/31/12
+ * @version 0.1
+ */
+public class TestNGTestTransformer implements IAnnotationTransformer {
+    public static final long DEFAULT_TIMEOUT = 1000 * 60 * 10; // 10 minutes max per test
+
+    final static Logger logger = Logger.getLogger(TestNGTestTransformer.class);
+
+    public void transform(ITestAnnotation annotation,
+                          Class testClass,
+                          Constructor testConstructor,
+                          Method testMethod)
+    {
+        if ( annotation.getTimeOut() == 0 ) {
+            logger.warn("test " + testMethod.toString() + " has no specified timeout, adding default timeout " + DEFAULT_TIMEOUT / 1000 / 60 + " minutes");
+            annotation.setTimeOut(DEFAULT_TIMEOUT);
+        }
+    }
+}
+
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLargeScaleTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLargeScaleTest.java
@ -7,7 +7,7 @@ import java.util.ArrayList;

 public class UnifiedGenotyperLargeScaleTest extends WalkerTest {

-    @Test
+    @Test( timeOut = 18000000 )
    public void testUnifiedGenotyperWholeGenome() {
        WalkerTestSpec spec = new WalkerTestSpec(
                "-R " + hg18Reference +
@ -22,7 +22,7 @@ public class UnifiedGenotyperLargeScaleTest extends WalkerTest {
        executeTest("testUnifiedGenotyperWholeGenome", spec);
    }

-    @Test
+    @Test( timeOut = 18000000 )
    public void testUnifiedGenotyperWholeExome() {
        WalkerTestSpec spec = new WalkerTestSpec(
                "-R " + hg18Reference +
@ -37,7 +37,7 @@ public class UnifiedGenotyperLargeScaleTest extends WalkerTest {
        executeTest("testUnifiedGenotyperWholeExome", spec);
    }

-    @Test
+    @Test( timeOut = 18000000 )
    public void testUnifiedGenotyperWGParallel() {
        WalkerTestSpec spec = new WalkerTestSpec(
                "-R " + hg18Reference +
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLiteIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLiteIntegrationTest.java
@ -0,0 +1,35 @@
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import org.broadinstitute.sting.WalkerTest;
+import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
+import org.testng.SkipException;
+import org.testng.annotations.Test;
+
+import java.util.Arrays;
+
+// ********************************************************************************** //
+// Note that this class also serves as an integration test for the VariantAnnotator!  //
+// ********************************************************************************** //
+
+public class UnifiedGenotyperLiteIntegrationTest extends WalkerTest {
+
+    private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
+
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // testing contamination down-sampling gets ignored
+    //
+    // --------------------------------------------------------------------------------------------------------------
+
+    @Test
+    public void testContaminationDownsampling() {
+        if ( !GATKLiteUtils.isGATKLite() )
+            throw new SkipException("Only want to test for GATK lite");
+
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
+                Arrays.asList("9addd225a985178339a0c49dc5fdc220"));
+        executeTest("test contamination_percentage_to_filter gets ignored", spec);
+    }
+
+}
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerLargeScaleTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerLargeScaleTest.java
@ -6,7 +6,7 @@ import org.testng.annotations.Test;
 import java.util.ArrayList;

 public class IndelRealignerLargeScaleTest extends WalkerTest {
-    @Test
+    @Test( timeOut = 18000000 )
    public void testHighCoverage() {
        WalkerTestSpec spec = new WalkerTestSpec(

@ -21,7 +21,7 @@ public class IndelRealignerLargeScaleTest extends WalkerTest {
        executeTest("testIndelRealignerHighCoverage", spec);
    }

-    @Test
+    @Test( timeOut = 18000000 )
    public void testRealigner() {
        WalkerTestSpec spec1 = new WalkerTestSpec(

--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorLargeScaleTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorLargeScaleTest.java
@ -6,7 +6,7 @@ import org.testng.annotations.Test;
 import java.util.ArrayList;

 public class RealignerTargetCreatorLargeScaleTest extends WalkerTest {
-    @Test
+    @Test( timeOut = 18000000 )
    public void testRealignerTargetCreator() {

        WalkerTestSpec spec1 = new WalkerTestSpec(
--- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java
@ -123,7 +123,7 @@ public class ActivityProfileUnitTest extends BaseTest {
        for ( int i = 0; i < cfg.probs.size(); i++ ) {
            double p = cfg.probs.get(i);
            GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i);
-            profile.add(loc, new ActivityProfileResult(p));
+            profile.add(new ActivityProfileResult(loc, p));
        }
        Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() ));

--- a/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java
@ -30,6 +30,7 @@ import java.util.concurrent.Executors;
 public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
    private File simpleFasta = new File(publicTestDir + "/exampleFASTA.fasta");
    private static final int STEP_SIZE = 1;
+    private final static boolean DEBUG = false;

    //private static final List<Integer> QUERY_SIZES = Arrays.asList(1);
    private static final List<Integer> QUERY_SIZES = Arrays.asList(1, 10, 100);
@ -53,9 +54,9 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
        return cacheSizeRequested == -1 ? CachingIndexedFastaSequenceFile.DEFAULT_CACHE_SIZE : cacheSizeRequested;
    }

-    @Test(dataProvider = "fastas", enabled = true)
+    @Test(dataProvider = "fastas", enabled = true && ! DEBUG)
    public void testCachingIndexedFastaReaderSequential1(File fasta, int cacheSize, int querySize) throws FileNotFoundException {
-        final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize));
+        final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true);

        SAMSequenceRecord contig = caching.getSequenceDictionary().getSequence(0);
        logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d",
@ -64,6 +65,8 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
    }

    private void testSequential(final CachingIndexedFastaSequenceFile caching, final File fasta, final int querySize) throws FileNotFoundException {
+        Assert.assertTrue(caching.isPreservingCase(), "testSequential only works for case preserving CachingIndexedFastaSequenceFile readers");
+
        final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);

        SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
@ -92,10 +95,10 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
    }

    // Tests grabbing sequences around a middle cached value.
-    @Test(dataProvider = "fastas", enabled = true)
+    @Test(dataProvider = "fastas", enabled = true && ! DEBUG)
    public void testCachingIndexedFastaReaderTwoStage(File fasta, int cacheSize, int querySize) throws FileNotFoundException {
        final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
-        final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize));
+        final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true);

        SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);

@ -123,11 +126,6 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
    @DataProvider(name = "ParallelFastaTest")
    public Object[][] createParallelFastaTest() {
        List<Object[]> params = new ArrayList<Object[]>();
-//        for ( int nt : Arrays.asList(1, 2, 3) ) {
-//            for ( int cacheSize : CACHE_SIZES ) {
-//                params.add(new Object[]{simpleFasta, cacheSize, 10, nt});
-//            }
-//        }

        for ( File fasta : Arrays.asList(simpleFasta) ) {
            for ( int cacheSize : CACHE_SIZES ) {
@ -143,9 +141,9 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
    }


-    @Test(dataProvider = "ParallelFastaTest", enabled = true, timeOut = 60000)
+    @Test(dataProvider = "ParallelFastaTest", enabled = true && ! DEBUG, timeOut = 60000)
    public void testCachingIndexedFastaReaderParallel(final File fasta, final int cacheSize, final int querySize, final int nt) throws FileNotFoundException, InterruptedException {
-        final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize));
+        final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true);

        logger.warn(String.format("Parallel caching index fasta reader test cacheSize %d querySize %d nt %d", caching.getCacheSize(), querySize, nt));
        for ( int iterations = 0; iterations < 1; iterations++ ) {
@ -163,4 +161,49 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
            executor.shutdownNow();
        }
    }
+
+    // make sure some bases are lower case and some are upper case
+    @Test(enabled = true)
+    public void testMixedCasesInExample() throws FileNotFoundException, InterruptedException {
+        final IndexedFastaSequenceFile original = new IndexedFastaSequenceFile(new File(exampleFASTA));
+        final CachingIndexedFastaSequenceFile casePreserving = new CachingIndexedFastaSequenceFile(new File(exampleFASTA), true);
+        final CachingIndexedFastaSequenceFile allUpper = new CachingIndexedFastaSequenceFile(new File(exampleFASTA));
+
+        int nMixedCase = 0;
+        for ( SAMSequenceRecord contig : original.getSequenceDictionary().getSequences() ) {
+            nMixedCase += testCases(original, casePreserving, allUpper, contig.getSequenceName(), -1, -1);
+
+            final int step = 100;
+            for ( int lastPos = step; lastPos < contig.getSequenceLength(); lastPos += step ) {
+                testCases(original, casePreserving, allUpper, contig.getSequenceName(), lastPos - step, lastPos);
+            }
+        }
+
+        Assert.assertTrue(nMixedCase > 0, "No mixed cases sequences found in file.  Unexpected test state");
+    }
+
+    private int testCases(final IndexedFastaSequenceFile original,
+                          final IndexedFastaSequenceFile casePreserving,
+                          final IndexedFastaSequenceFile allUpper,
+                          final String contig, final int start, final int stop ) {
+        final String orig = fetchBaseString(original, contig, start, stop);
+        final String keptCase = fetchBaseString(casePreserving, contig, start, stop);
+        final String upperCase = fetchBaseString(allUpper, contig, start, stop).toUpperCase();
+
+        final String origToUpper = orig.toUpperCase();
+        if ( ! orig.equals(origToUpper) ) {
+            Assert.assertEquals(keptCase, orig, "Case preserving operation not equal to the original case for contig " + contig);
+            Assert.assertEquals(upperCase, origToUpper, "All upper case reader not equal to the uppercase of original case for contig " + contig);
+            return 1;
+        } else {
+            return 0;
+        }
+    }
+
+    private String fetchBaseString(final IndexedFastaSequenceFile reader, final String contig, final int start, final int stop) {
+        if ( start == -1 )
+            return new String(reader.getSequence(contig).getBases());
+        else
+            return new String(reader.getSubsequenceAt(contig, start, stop).getBases());
+    }
 }
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala
@ -28,7 +28,7 @@ import org.testng.annotations.Test
 import org.broadinstitute.sting.BaseTest

 class DataProcessingPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
  def testSimpleBAM {
    val projectName = "test1"
    val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
@ -45,7 +45,7 @@ class DataProcessingPipelineTest {
    PipelineTest.executeTest(spec)
  }

-  @Test
+  @Test(timeOut=36000000)
  def testBWAPEBAM {
    val projectName = "test2"
    val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala
@ -28,7 +28,7 @@ import org.testng.annotations.Test
 import org.broadinstitute.sting.BaseTest

 class PacbioProcessingPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
  def testPacbioProcessingPipeline {
    val testOut = "exampleBAM.recal.bam"
    val spec = new PipelineTestSpec
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/DevNullOutputPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/DevNullOutputPipelineTest.scala
@ -53,7 +53,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
 import org.broadinstitute.sting.BaseTest

 class DevNullOutputPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
  def testDevNullOutput() {
    val spec = new PipelineTestSpec
    spec.name = "devnulloutput"
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala
@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
 import org.broadinstitute.sting.BaseTest

 class ExampleCountLociPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
  def testCountLoci() {
    val testOut = "count.out"
    val spec = new PipelineTestSpec
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala
@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
 import org.broadinstitute.sting.BaseTest

 class ExampleCountReadsPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
  def testCountReads() {
    val spec = new PipelineTestSpec
    spec.name = "countreads"
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala
@ -77,7 +77,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
 import org.broadinstitute.sting.BaseTest

 class ExampleReadFilterPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
  def testExampleReadFilter() {
    val spec = new PipelineTestSpec
    spec.name = "examplereadfilter"
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala
@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
 import org.broadinstitute.sting.BaseTest

 class ExampleRetryMemoryLimitPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
  def testRetryMemoryLimit() {
    val spec = new PipelineTestSpec
    spec.name = "RetryMemoryLimit"
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala
@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
 import org.broadinstitute.sting.BaseTest

 class ExampleUnifiedGenotyperPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
  def testUnifiedGenotyper() {
    val spec = new PipelineTestSpec
    spec.name = "unifiedgenotyper"
@ -51,7 +51,7 @@ class ExampleUnifiedGenotyperPipelineTest {
      Array("vcf_intervals", BaseTest.validationDataLocation + "intervalTest.1.vcf")
    ).asInstanceOf[Array[Array[Object]]]

-  @Test(dataProvider = "ugIntervals")
+  @Test(dataProvider = "ugIntervals", timeOut=36000000)
  def testUnifiedGenotyperWithIntervals(intervalsName: String, intervalsPath: String) {
    val spec = new PipelineTestSpec
    spec.name = "unifiedgenotyper_with_" + intervalsName
@ -64,7 +64,7 @@ class ExampleUnifiedGenotyperPipelineTest {
    PipelineTest.executeTest(spec)
  }

-  @Test
+  @Test(timeOut=36000000)
  def testUnifiedGenotyperNoGCOpt() {
    val spec = new PipelineTestSpec
    spec.name = "unifiedgenotyper_no_gc_opt"
@ -80,7 +80,7 @@ class ExampleUnifiedGenotyperPipelineTest {
  @DataProvider(name="resMemReqParams")
  def getResMemReqParam = Array(Array("mem_free"), Array("virtual_free")).asInstanceOf[Array[Array[Object]]]

-  @Test(dataProvider = "resMemReqParams")
+  @Test(dataProvider = "resMemReqParams", timeOut=36000000)
  def testUnifiedGenotyperResMemReqParam(reqParam: String) {
    val spec = new PipelineTestSpec
    spec.name = "unifiedgenotyper_" + reqParam
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala
@ -28,7 +28,7 @@ import org.testng.annotations.Test
 import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}

 class HelloWorldPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
  def testHelloWorld() {
    val spec = new PipelineTestSpec
    spec.name = "HelloWorld"
@ -37,7 +37,7 @@ class HelloWorldPipelineTest {
    PipelineTest.executeTest(spec)
  }

-  @Test
+  @Test(timeOut=36000000)
  def testHelloWorldWithRunName() {
    val spec = new PipelineTestSpec
    spec.name = "HelloWorldWithRunName"
@ -47,7 +47,7 @@ class HelloWorldPipelineTest {
    PipelineTest.executeTest(spec)
  }

-  @Test
+  @Test(timeOut=36000000)
  def testHelloWorldWithMemoryLimit() {
    val spec = new PipelineTestSpec
    spec.name = "HelloWorldMemoryLimit"
@ -57,7 +57,7 @@ class HelloWorldPipelineTest {
    PipelineTest.executeTest(spec)
  }

-  @Test
+  @Test(timeOut=36000000)
  def testHelloWorldWithPriority() {
    val spec = new PipelineTestSpec
    spec.name = "HelloWorldWithPriority"
@ -67,7 +67,7 @@ class HelloWorldPipelineTest {
    PipelineTest.executeTest(spec)
  }

-  @Test
+  @Test(timeOut=36000000)
  def testHelloWorldWithLsfResource() {
    val spec = new PipelineTestSpec
    spec.name = "HelloWorldWithLsfResource"
@ -77,7 +77,7 @@ class HelloWorldPipelineTest {
    PipelineTest.executeTest(spec)
  }

-  @Test
+  @Test(timeOut=36000000)
  def testHelloWorldWithLsfResourceAndMemoryLimit() {
    val spec = new PipelineTestSpec
    spec.name = "HelloWorldWithLsfResourceAndMemoryLimit"
@ -87,7 +87,7 @@ class HelloWorldPipelineTest {
    PipelineTest.executeTest(spec)
  }

-  @Test
+  @Test(timeOut=36000000)
  def testHelloWorldWithLsfEnvironment() {
    val spec = new PipelineTestSpec
    spec.name = "HelloWorldWithLsfEnvironment"
@ -97,7 +97,7 @@ class HelloWorldPipelineTest {
    PipelineTest.executeTest(spec)
  }

-  @Test
+  @Test(timeOut=36000000)
  def testHelloWorldWithGridEngineResource() {
    val spec = new PipelineTestSpec
    spec.name = "HelloWorldWithGridEngineResource"
@ -107,7 +107,7 @@ class HelloWorldPipelineTest {
    PipelineTest.executeTest(spec)
  }

-  @Test
+  @Test(timeOut=36000000)
  def testHelloWorldWithGridEngineResourceAndMemoryLimit() {
    val spec = new PipelineTestSpec
    spec.name = "HelloWorldWithGridEngineResourceAndMemoryLimit"
@ -117,7 +117,7 @@ class HelloWorldPipelineTest {
    PipelineTest.executeTest(spec)
  }

-  @Test
+  @Test(timeOut=36000000)
  def testHelloWorldWithGridEngineEnvironment() {
    val spec = new PipelineTestSpec
    spec.name = "HelloWorldWithGridEngineEnvironment"
--- a/settings/repository/org.broad/tribble-119.jar
+++ b/settings/repository/org.broad/tribble-119.jar
--- a/settings/repository/org.broad/tribble-119.xml
+++ b/settings/repository/org.broad/tribble-119.xml
@ -1,3 +1,3 @@
 <ivy-module version="1.0">
-    <info organisation="org.broad" module="tribble" revision="110" status="integration" />
+    <info organisation="org.broad" module="tribble" revision="119" status="integration" />
 </ivy-module>