Merge branch 'master' of github.com:broadinstitute/gsa-unstable

2012-12-01 00:08:25 -05:00 · 2012-12-01 00:08:25 -05:00 · d0b8cc7773
parent 969c995298 3105f13df3
commit d0b8cc7773
79 changed files with 2277 additions and 595 deletions
--- a/build.xml
+++ b/build.xml
@ -185,10 +185,7 @@
        <include name="**/*.class"/>
    </fileset>

-    <patternset id="dependency.mask" includes="*.jar">
-        <exclude name="testng*.jar" />
-        <exclude name="bcel*.jar" />
-    </patternset>
+    <patternset id="dependency.mask" includes="*.jar" />

    <path id="external.dependencies">
        <fileset dir="${lib.dir}" erroronmissingdir="false">
@ -205,6 +202,16 @@
        <pathelement location="${scala.classes}" />
    </path>

+    <path id="build.results">
+        <!-- Ensure that GenomeAnalysisTK.jar comes first in the path, as it contains overrides for certain classes in our dependencies -->
+        <pathelement location="${dist.dir}/GenomeAnalysisTK.jar" />
+        <!-- After GenomeAnalysisTK.jar we include all of the other jars in the dist directory -->
+        <fileset dir="${dist.dir}" erroronmissingdir="false">
+            <patternset refid="dependency.mask" />
+            <exclude name="GenomeAnalysisTK.jar" />
+        </fileset>
+    </path>
+
    <fileset id="external.source.files" dir="${external.dir}" erroronmissingdir="false">
        <include name="**/*.java" />
    </fileset>
@ -226,20 +233,20 @@
    <!-- the path for resources that need to go into the GATK jar;
         any additional resources should go into this set. -->
    <path id="gatk.resources">
-        <fileset dir="${basedir}">
-            <include name="${java.public.source.dir}/**/templates/*" />
-            <include name="${java.private.source.dir}/**/templates/*" if="include.private" />
-            <include name="${java.protected.source.dir}/**/templates/*" if="include.protected" />
+        <fileset dir="${java.public.source.dir}">
+            <include name="**/resources/*" />
+            <include name="**/templates/*" />
+        </fileset>
+        <fileset dir="${java.private.source.dir}" erroronmissingdir="false">
+            <include name="**/resources/*" if="include.private" />
+            <include name="**/templates/*" if="include.private" />
+        </fileset>
+        <fileset dir="${java.protected.source.dir}" erroronmissingdir="false">
+            <include name="**/resources/*" if="include.protected" />
+            <include name="**/templates/*" if="include.protected" />
        </fileset>
    </path>

-    <path id="build.results">
-        <fileset dir="${dist.dir}">
-            <patternset refid="dependency.mask" />
-         </fileset>
-    </path>
-
-
    <!-- ******************************************************************************** -->
    <!-- Ivy Retrieve                                                                     -->
    <!-- ******************************************************************************** -->
@ -672,6 +679,24 @@
        </jar>
    </target>

+    <target name="na12878kb.jar" depends="gatk.compile,init.jar">
+        <jar jarfile="${dist.dir}/na12878kb.jar">
+            <fileset dir="${java.classes}">
+              <include name="org/broadinstitute/sting/utils/GenomeLocParser*.class"/>
+              <include name="org/broadinstitute/sting/utils/GenomeLoc.class"/>
+              <include name="org/broadinstitute/sting/utils/HasGenomeLocation.class"/>
+              <include name="org/broadinstitute/sting/utils/BaseUtils.class"/>
+              <include name="org/broadinstitute/sting/utils/Utils.class"/>
+              <include name="org/broadinstitute/sting/utils/exceptions/**/*.class"/>
+              <include name="org/broadinstitute/sting/gatk/walkers/na12878kb/core/**/*.class"/>
+              <include name="net/sf/picard/reference/FastaSequenceFile.class"/>
+            </fileset>
+	    <fileset dir="${java.private.source.dir}">
+              <include name="org/broadinstitute/sting/gatk/walkers/na12878kb/core/resources/**/*"/>
+	    </fileset>
+        </jar>
+    </target>
+
    <target name="gatk.jar" depends="gatk.compile, init.jar, R.script.stage" description="generate the GATK distribution">
        <jar jarfile="${dist.dir}/GenomeAnalysisTK.jar">
            <path refid="gatk.resources"/>
@ -1103,15 +1128,10 @@
    </path>

    <path id="testng.default.classpath">
-        <pathelement location="${java.classes}" />
-        <pathelement location="${scala.classes}" />
+        <path refid="build.results" />
        <pathelement location="${java.contracts.dir}" />
        <pathelement location="${java.test.classes}" />
        <pathelement location="${scala.test.classes}" />
-        <pathelement location="${R.tar.dir}" />
-        <path refid="R.script.source.path" />
-        <pathelement location="${key.dir}" />
-        <path refid="external.dependencies" />
    </path>

    <!-- Test targets -->
@ -1119,9 +1139,6 @@
    <target name="test.init.compile">
        <mkdir dir="${java.test.classes}"/>
        <mkdir dir="${scala.test.classes}"/>
-        <antcall target="resolve">
-          <param name="ivy.conf" value="test"/>
-        </antcall>
    </target>

   <target name="test.java.internal.compile" depends="dist,test.init.compile">
@ -1129,10 +1146,8 @@
        <javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
            <src refid="java.test.source.path" />
            <classpath>
-                <path refid="external.dependencies" />
-                <pathelement location="${java.classes}"/>
+                <path refid="build.results" />
                <pathelement location="${java.contracts.dir}"/>
-                <pathelement location="${testng.jar}"/>
            </classpath>
            <compilerarg value="-proc:none"/>
        </javac>
@ -1143,11 +1158,9 @@
        <javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}" srcdir="${external.dir}">
            <include name="*/test/**/*.java"/>
            <classpath>
-                <path refid="external.dependencies" />
+                <path refid="build.results" />
                <pathelement location="${java.test.classes}"/>
-                <pathelement location="${java.classes}"/>
                <pathelement location="${java.contracts.dir}"/>
-                <pathelement location="${testng.jar}"/>
            </classpath>
            <compilerarg value="-proc:none"/>
        </javac>
@ -1160,9 +1173,8 @@
        <scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.test.classes}" deprecation="yes" unchecked="yes">
 	        <src refid="scala.test.source.path" />
            <classpath>
-                <path refid="scala.dependencies"/>
+                <path refid="build.results"/>
                <pathelement location="${java.test.classes}"/>
-                <pathelement location="${testng.jar}"/>
            </classpath>
        </scalac>
    </target>
@ -1369,14 +1381,13 @@

    <!-- Fast test target that cuts major corners for speed. Requires that a full build has been done first. Java-only, single test class only -->
    <!-- Usage: ant fasttest -Dsingle=TestClass -->
-    <target name="fasttest" depends="init.javaonly,init,test.init">
+    <target name="fasttest" depends="init.javaonly,init">
        <condition property="not.clean">
            <and>
                <available file="${build.dir}" />
                <available file="${lib.dir}" />
                <available file="${dist.dir}" />
                <available file="${java.test.classes}" />
-                <available file="${testng.jar}" />
            </and>
        </condition>
        <fail message="fasttest requires a NON-CLEAN working directory (INCLUDING test classes). Do a full test build using ant test.compile first." unless="not.clean" />
@ -1394,13 +1405,27 @@
        <javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
            <src refid="java.test.source.path" />
            <classpath>
-                <path refid="external.dependencies" />
                <pathelement location="${java.classes}"/>
-                <pathelement location="${testng.jar}"/>
+                <path refid="external.dependencies" />
            </classpath>
            <compilerarg value="-proc:none"/>
        </javac>

+        <!-- fasttest uses the unpackaged class files in its test classpath to avoid having to rebuild the jars in dist/ -->
+        <path id="testng.fasttest.classpath">
+            <pathelement location="${java.classes}" />
+            <pathelement location="${scala.classes}" />
+            <pathelement location="${java.contracts.dir}" />
+            <pathelement location="${java.test.classes}" />
+            <pathelement location="${scala.test.classes}" />
+            <pathelement location="${R.tar.dir}" />
+            <path refid="R.script.source.path" />
+            <pathelement location="${key.dir}" />
+            <path refid="external.dependencies" />
+            <path refid="java.source.path" />  <!-- Terrible hack to allow fasttest to see resource files stored in the source tree -->
+        </path>
+        <property name="testng.classpath" value="testng.fasttest.classpath" />
+
        <run-test testtype="${single}" outputdir="${report}/${single}" runfailed="false"/>
    </target>
 </project>
--- a/ivy.xml
+++ b/ivy.xml
@ -24,11 +24,8 @@

 <ivy-module version="1.0">
    <info organisation="org.broadinstitute" module="Sting"/>
-    <configurations defaultconfmapping="test->default">
+    <configurations>
        <conf name="default" description="the core dependencies for the GATK"/>
-        <conf name="test" extends="default" description="external dependencies used for testing and metrics"/>
-        <conf name="scala" extends="default" description="the dependencies for scala"/>
-        <conf name="queue" extends="scala" description="the dependencies for Queue"/>
    </configurations>
    <dependencies defaultconf="default">
        <dependency org="net.sf" name="sam" rev="latest.integration"/>
@ -83,9 +80,9 @@
        <dependency org="org.scala-lang" name="scala-library" rev="2.9.2"/>

        <!-- testing and evaluation dependencies -->
-        <dependency org="org.testng" name="testng" rev="5.14.1" conf="test"/>
-        <dependency org="org.uncommons" name="reportng" rev="1.1.2" conf="test"/>
-        <dependency org="com.google.code.caliper" name="caliper" rev="1.0-SNAPSHOT" conf="test"/>
+        <dependency org="org.testng" name="testng" rev="5.14.1"/>
+        <dependency org="org.uncommons" name="reportng" rev="1.1.2"/>
+        <dependency org="com.google.code.caliper" name="caliper" rev="1.0-SNAPSHOT"/>

        <!-- Contracts for Java and dependencies -->
        <dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-r139"/>
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
@ -253,7 +253,6 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
            intervalList.addAll(toolkit.getIntervals());


-        // todo -- rework the whole NO_PG_TAG thing
        final boolean preSorted = true;
        final boolean indexOnTheFly = true;
        final boolean keep_records = true;
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
@ -220,7 +220,6 @@ public class SlidingWindow {
            regions = findVariantRegions(0, breakpoint, markedSites.getVariantSiteBitSet(), !forceClose);
        }

-        // todo -- can be more aggressive here removing until the NEW window header start location after closing the variant regions
        while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) {
                readsInWindow.pollFirst();
        }
@ -607,9 +606,7 @@ public class SlidingWindow {
                    toRemove.add(read);
                }
            }
-            for (GATKSAMRecord read : toRemove) {
-                readsInWindow.remove(read);
-            }
+            removeReadsFromWindow(toRemove);
        }
        return allReads;
    }
@ -805,9 +802,8 @@ public class SlidingWindow {
                hetReads.add(finalizeRunningConsensus());
        }

-        for (GATKSAMRecord read : toRemove) {
-            readsInWindow.remove(read);
-        }
+        removeReadsFromWindow(toRemove);
+
        return hetReads;
    }

@ -924,5 +920,11 @@ public class SlidingWindow {
            }
        }
    }
+
+    private void removeReadsFromWindow (List<GATKSAMRecord> readsToRemove) {
+        for (GATKSAMRecord read : readsToRemove) {
+            readsInWindow.remove(read);
+        }
+    }
 }

--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
@ -31,7 +31,6 @@ import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import org.apache.commons.lang.ArrayUtils;
 import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
-import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -42,163 +41,26 @@ import java.util.*;
 public class GenotypingEngine {

    private final boolean DEBUG;
-    private final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
    private final static List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
    private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("<UNASSEMBLED_EVENT>", false);

-    public GenotypingEngine( final boolean DEBUG, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) {
+    public GenotypingEngine( final boolean DEBUG ) {
        this.DEBUG = DEBUG;
-        this.OUTPUT_FULL_HAPLOTYPE_SEQUENCE = OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
        noCall.add(Allele.NO_CALL);
    }

-    // WARN
-    // This function is the streamlined approach, currently not being used by default
-    // WARN
-    // WARN: This function is currently only being used by Menachem. Slated for removal/merging with the rest of the code.
-    // WARN
-    @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
-    public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine,
-                                                                                                                             final ArrayList<Haplotype> haplotypes,
-                                                                                                                             final byte[] ref,
-                                                                                                                             final GenomeLoc refLoc,
-                                                                                                                             final GenomeLoc activeRegionWindow,
-                                                                                                                             final GenomeLocParser genomeLocParser ) {
-        // Prepare the list of haplotype indices to genotype
-        final ArrayList<Allele> allelesToGenotype = new ArrayList<Allele>();
-
-        for( final Haplotype h : haplotypes ) {
-            allelesToGenotype.add( Allele.create(h.getBases(), h.isReference()) );
-        }
-        final int numHaplotypes = haplotypes.size();
-
-        // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
-        final GenotypesContext genotypes = GenotypesContext.create(haplotypes.get(0).getSampleKeySet().size());
-        for( final String sample : haplotypes.get(0).getSampleKeySet() ) { // BUGBUG: assume all haplotypes saw the same samples
-            final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2];
-            final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(haplotypes, sample);
-            int glIndex = 0;
-            for( int iii = 0; iii < numHaplotypes; iii++ ) {
-                for( int jjj = 0; jjj <= iii; jjj++ ) {
-                    genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC
-                }
-            }
-            genotypes.add(new GenotypeBuilder(sample, noCall).PL(genotypeLikelihoods).make());
-        }
-        final VariantCallContext call = UG_engine.calculateGenotypes(new VariantContextBuilder().loc(activeRegionWindow).alleles(allelesToGenotype).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel);
-        if( call == null ) { return Collections.emptyList(); } // exact model says that the call confidence is below the specified confidence threshold so nothing to do here
-
-        // Prepare the list of haplotypes that need to be run through Smith-Waterman for output to VCF
-        final ArrayList<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
-        for( final Haplotype h : haplotypes ) {
-            if( call.getAllele(h.getBases()) == null ) { // exact model removed this allele from the list so no need to run SW and output to VCF
-                haplotypesToRemove.add(h);
-            }
-        }
-        haplotypes.removeAll(haplotypesToRemove);
-
-        if( OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) {
-            final List<Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>>> returnVCs = new ArrayList<Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>>>();
-            // set up the default 1-to-1 haplotype mapping object
-            final HashMap<Allele,ArrayList<Haplotype>> haplotypeMapping = new HashMap<Allele,ArrayList<Haplotype>>();
-            for( final Haplotype h : haplotypes ) {
-                final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
-                list.add(h);
-                haplotypeMapping.put(call.getAllele(h.getBases()), list);
-            }
-            returnVCs.add( new Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>>(call,haplotypeMapping) );
-            return returnVCs;
-        }
-
-        final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();
-
-        // Using the cigar from each called haplotype figure out what events need to be written out in a VCF file
-        final TreeSet<Integer> startPosKeySet = new TreeSet<Integer>();
-        int count = 0;
-        if( DEBUG ) { System.out.println("=== Best Haplotypes ==="); }
-        for( final Haplotype h : haplotypes ) {
-            if( DEBUG ) {
-                System.out.println( h.toString() );
-                System.out.println( "> Cigar = " + h.getCigar() );
-            }
-            // Walk along the alignment and turn any difference from the reference into an event
-            h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) );
-            startPosKeySet.addAll(h.getEventMap().keySet());
-        }
-        
-        // Create the VC merge priority list
-        final ArrayList<String> priorityList = new ArrayList<String>();
-        for( int iii = 0; iii < haplotypes.size(); iii++ ) {
-            priorityList.add("HC" + iii);
-        }
-        
-        // Walk along each position in the key set and create each event to be outputted
-        for( final int loc : startPosKeySet ) {
-            if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) {
-                final ArrayList<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>();
-                for( final Haplotype h : haplotypes ) {
-                    final HashMap<Integer,VariantContext> eventMap = h.getEventMap();
-                    final VariantContext vc = eventMap.get(loc);
-                    if( vc != null && !containsVCWithMatchingAlleles(eventsAtThisLoc, vc) ) {
-                        eventsAtThisLoc.add(vc);
-                    }
-                }
-                
-                // Create the allele mapping object which maps the original haplotype alleles to the alleles present in just this event
-                final ArrayList<ArrayList<Haplotype>> alleleMapper = createAlleleMapper( loc, eventsAtThisLoc, haplotypes );
-
-                // Merge the event to find a common reference representation
-                final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
-
-                final HashMap<Allele, ArrayList<Haplotype>> alleleHashMap = new HashMap<Allele, ArrayList<Haplotype>>();
-                int aCount = 0;
-                for( final Allele a : mergedVC.getAlleles() ) {
-                    alleleHashMap.put(a, alleleMapper.get(aCount++)); // BUGBUG: needs to be cleaned up and merged with alleleMapper
-                }
-
-                if( DEBUG ) {
-                    System.out.println("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
-                    //System.out.println("Event/haplotype allele mapping = " + alleleMapper);
-                }
-
-                // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
-                final GenotypesContext myGenotypes = GenotypesContext.create(haplotypes.get(0).getSampleKeySet().size());
-                for( final String sample : haplotypes.get(0).getSampleKeySet() ) { // BUGBUG: assume all haplotypes saw the same samples
-                    final int myNumHaplotypes = alleleMapper.size();
-                    final double[] genotypeLikelihoods = new double[myNumHaplotypes * (myNumHaplotypes+1) / 2];
-                    final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleMapper);
-                    int glIndex = 0;
-                    for( int iii = 0; iii < myNumHaplotypes; iii++ ) {
-                        for( int jjj = 0; jjj <= iii; jjj++ ) {
-                            genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC
-                        }
-                    }
-
-                    // using the allele mapping object translate the haplotype allele into the event allele
-                    final Genotype g = new GenotypeBuilder(sample)
-                            .alleles(findEventAllelesInSample(mergedVC.getAlleles(), call.getAlleles(), call.getGenotype(sample).getAlleles(), alleleMapper, haplotypes))
-                            .phased(loc != startPosKeySet.first())
-                            .PL(genotypeLikelihoods).make();
-                    myGenotypes.add(g);
-                }
-                returnCalls.add( new Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>(
-                                 new VariantContextBuilder(mergedVC).log10PError(call.getLog10PError()).genotypes(myGenotypes).make(), alleleHashMap) );
-            }
-        }
-        return returnCalls;
-    }
-
    // BUGBUG: Create a class to hold this complicated return type
    @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
-    public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
-                                                                                                                               final ArrayList<Haplotype> haplotypes,
-                                                                                                                               final byte[] ref,
-                                                                                                                               final GenomeLoc refLoc,
-                                                                                                                               final GenomeLoc activeRegionWindow,
-                                                                                                                               final GenomeLocParser genomeLocParser,
-                                                                                                                               final ArrayList<VariantContext> activeAllelesToGenotype ) {
+    public List<Pair<VariantContext, Map<Allele, List<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
+                                                                                                                       final List<Haplotype> haplotypes,
+                                                                                                                       final byte[] ref,
+                                                                                                                       final GenomeLoc refLoc,
+                                                                                                                       final GenomeLoc activeRegionWindow,
+                                                                                                                       final GenomeLocParser genomeLocParser,
+                                                                                                                       final List<VariantContext> activeAllelesToGenotype ) {

-        final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();
+        final ArrayList<Pair<VariantContext, Map<Allele, List<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, Map<Allele, List<Haplotype>>>>();
+        final boolean in_GGA_mode = !activeAllelesToGenotype.isEmpty();

        // Using the cigar from each called haplotype figure out what events need to be written out in a VCF file
        final TreeSet<Integer> startPosKeySet = new TreeSet<Integer>();
@ -207,7 +69,7 @@ public class GenotypingEngine {
        for( final Haplotype h : haplotypes ) {
            // Walk along the alignment and turn any difference from the reference into an event
            h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) );
-            if( activeAllelesToGenotype.isEmpty() ) { startPosKeySet.addAll(h.getEventMap().keySet()); }
+            if( !in_GGA_mode ) { startPosKeySet.addAll(h.getEventMap().keySet()); }
            if( DEBUG ) {
                System.out.println( h.toString() );
                System.out.println( "> Cigar = " + h.getCigar() );
@ -217,10 +79,10 @@ public class GenotypingEngine {
        }

        cleanUpSymbolicUnassembledEvents( haplotypes );
-        if( activeAllelesToGenotype.isEmpty() && haplotypes.get(0).getSampleKeySet().size() >= 10 ) { // if not in GGA mode and have at least 10 samples try to create MNP and complex events by looking at LD structure
+        if( !in_GGA_mode && haplotypes.get(0).getSampleKeySet().size() >= 10 ) { // if not in GGA mode and have at least 10 samples try to create MNP and complex events by looking at LD structure
            mergeConsecutiveEventsBasedOnLD( haplotypes, startPosKeySet, ref, refLoc );
        }
-        if( !activeAllelesToGenotype.isEmpty() ) { // we are in GGA mode!
+        if( in_GGA_mode ) {
            for( final VariantContext compVC : activeAllelesToGenotype ) {
                startPosKeySet.add( compVC.getStart() );
            }
@ -232,7 +94,7 @@ public class GenotypingEngine {
                final ArrayList<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>(); // the overlapping events to merge into a common reference view
                final ArrayList<String> priorityList = new ArrayList<String>(); // used to merge overlapping events into common reference view

-                if( activeAllelesToGenotype.isEmpty() ) {
+                if( !in_GGA_mode ) {
                    for( final Haplotype h : haplotypes ) {
                        final HashMap<Integer,VariantContext> eventMap = h.getEventMap();
                        final VariantContext vc = eventMap.get(loc);
@ -261,7 +123,14 @@ public class GenotypingEngine {
                if( eventsAtThisLoc.isEmpty() ) { continue; }

                // Create the allele mapping object which maps the original haplotype alleles to the alleles present in just this event
-                final ArrayList<ArrayList<Haplotype>> alleleMapper = createAlleleMapper( loc, eventsAtThisLoc, haplotypes );
+                Map<Allele, List<Haplotype>> alleleMapper = createAlleleMapper( loc, eventsAtThisLoc, haplotypes );
+
+                final Allele refAllele = eventsAtThisLoc.get(0).getReference();
+                final ArrayList<Allele> alleleOrdering = new ArrayList<Allele>(alleleMapper.size());
+                alleleOrdering.add(refAllele);
+                for( final VariantContext vc : eventsAtThisLoc ) {
+                    alleleOrdering.add(vc.getAlternateAllele(0));
+                }

                // Sanity check the priority list
                for( final VariantContext vc : eventsAtThisLoc ) {
@ -283,11 +152,15 @@ public class GenotypingEngine {
                final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
                if( mergedVC == null ) { continue; }

-                HashMap<Allele, ArrayList<Haplotype>> alleleHashMap = new HashMap<Allele, ArrayList<Haplotype>>();
-                int aCount = 0;
-                for( final Allele a : mergedVC.getAlleles() ) {
-                    alleleHashMap.put(a, alleleMapper.get(aCount++)); // BUGBUG: needs to be cleaned up and merged with alleleMapper
+                // let's update the Allele keys in the mapper because they can change after merging when there are complex events
+                Map<Allele, List<Haplotype>> updatedAlleleMapper = new HashMap<Allele, List<Haplotype>>(alleleMapper.size());
+                for ( int i = 0; i < mergedVC.getNAlleles(); i++ ) {
+                    final Allele oldAllele = alleleOrdering.get(i);
+                    final Allele newAllele = mergedVC.getAlleles().get(i);
+                    updatedAlleleMapper.put(newAllele, alleleMapper.get(oldAllele));
+                    alleleOrdering.set(i, newAllele);
                }
+                alleleMapper = updatedAlleleMapper;

                if( DEBUG ) {
                    System.out.println("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
@ -299,7 +172,7 @@ public class GenotypingEngine {
                for( final String sample : haplotypes.get(0).getSampleKeySet() ) { // BUGBUG: assume all haplotypes saw the same samples
                    final int numHaplotypes = alleleMapper.size();
                    final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2];
-                    final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleMapper);
+                    final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleMapper, alleleOrdering);
                    int glIndex = 0;
                    for( int iii = 0; iii < numHaplotypes; iii++ ) {
                        for( int jjj = 0; jjj <= iii; jjj++ ) {
@ -313,23 +186,23 @@ public class GenotypingEngine {
                    if( call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
                        final VariantContext vcCallTrim = VariantContextUtils.reverseTrimAlleles(call);
                        // also, need to update the allele -> haplotype mapping
-                        final HashMap<Allele, ArrayList<Haplotype>> alleleHashMapTrim = new HashMap<Allele, ArrayList<Haplotype>>();
+                        final HashMap<Allele, List<Haplotype>> alleleHashMapTrim = new HashMap<Allele, List<Haplotype>>();
                        for( int iii = 0; iii < vcCallTrim.getAlleles().size(); iii++ ) { // BUGBUG: this is assuming that the original and trimmed alleles maintain the same ordering in the VC
-                            alleleHashMapTrim.put(vcCallTrim.getAlleles().get(iii), alleleHashMap.get(call.getAlleles().get(iii)));
+                            alleleHashMapTrim.put(vcCallTrim.getAlleles().get(iii), alleleMapper.get(call.getAlleles().get(iii)));
                        }

                        call = vcCallTrim;
-                        alleleHashMap = alleleHashMapTrim;
+                        alleleMapper = alleleHashMapTrim;
                    }

-                    returnCalls.add( new Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>(call, alleleHashMap) );
+                    returnCalls.add( new Pair<VariantContext, Map<Allele,List<Haplotype>>>(call, alleleMapper) );
                }
            }
        }
        return returnCalls;
    }

-    protected static void cleanUpSymbolicUnassembledEvents( final ArrayList<Haplotype> haplotypes ) {
+    protected static void cleanUpSymbolicUnassembledEvents( final List<Haplotype> haplotypes ) {
        final ArrayList<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
        for( final Haplotype h : haplotypes ) {
            for( final VariantContext vc : h.getEventMap().values() ) {
@ -348,7 +221,7 @@ public class GenotypingEngine {
        haplotypes.removeAll(haplotypesToRemove);
    }

-    protected void mergeConsecutiveEventsBasedOnLD( final ArrayList<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {
+    protected void mergeConsecutiveEventsBasedOnLD( final List<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {
        final int MAX_SIZE_TO_COMBINE = 15;
        final double MERGE_EVENTS_R2_THRESHOLD = 0.95;
        if( startPosKeySet.size() <= 1 ) { return; }
@ -395,7 +268,9 @@ public class GenotypingEngine {
                        final ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
                        haplotypeList.add(h);
                        for( final String sample : haplotypes.get(0).getSampleKeySet() ) {
-                            final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( haplotypeList, sample )[0][0];
+                            final HashSet<String> sampleSet = new HashSet<String>(1);
+                            sampleSet.add(sample);
+                            final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( sampleSet,  haplotypeList )[0][0];
                            if( thisHapVC == null ) {
                                if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
                                else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }
@ -489,37 +364,87 @@ public class GenotypingEngine {

    @Requires({"haplotypes.size() >= eventsAtThisLoc.size() + 1"})
    @Ensures({"result.size() == eventsAtThisLoc.size() + 1"})
-    protected static ArrayList<ArrayList<Haplotype>> createAlleleMapper( final int loc, final ArrayList<VariantContext> eventsAtThisLoc, final ArrayList<Haplotype> haplotypes ) {
-        final ArrayList<ArrayList<Haplotype>> alleleMapper = new ArrayList<ArrayList<Haplotype>>();
-        final ArrayList<Haplotype> refList = new ArrayList<Haplotype>();
+    protected static Map<Allele, List<Haplotype>> createAlleleMapper( final int loc, final List<VariantContext> eventsAtThisLoc, final List<Haplotype> haplotypes ) {
+
+        final Map<Allele, List<Haplotype>> alleleMapper = new HashMap<Allele, List<Haplotype>>(eventsAtThisLoc.size()+1);
+        final Allele refAllele = eventsAtThisLoc.get(0).getReference();
+        alleleMapper.put(refAllele, new ArrayList<Haplotype>());
+        for( final VariantContext vc : eventsAtThisLoc )
+            alleleMapper.put(vc.getAlternateAllele(0), new ArrayList<Haplotype>());
+
+        final ArrayList<Haplotype> undeterminedHaplotypes = new ArrayList<Haplotype>(haplotypes.size());
        for( final Haplotype h : haplotypes ) {
            if( h.getEventMap().get(loc) == null ) { // no event at this location so this is a reference-supporting haplotype
-                refList.add(h);
+                alleleMapper.get(refAllele).add(h);
+            } else if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() && alleleMapper.containsKey(h.getArtificialAllele()) ) {
+                alleleMapper.get(h.getArtificialAllele()).add(h);
            } else {
-                boolean foundInEventList = false;
+                boolean haplotypeIsDetermined = false;
                for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) {
                    if( h.getEventMap().get(loc).hasSameAllelesAs(vcAtThisLoc) ) {
-                        foundInEventList = true;
+                        alleleMapper.get(vcAtThisLoc.getAlternateAllele(0)).add(h);
+                        haplotypeIsDetermined = true;
+                        break;
                    }
                }
-                if( !foundInEventList ) { // event at this location isn't one of the genotype-able options (during GGA) so this is a reference-supporting haplotype
-                    refList.add(h);
-                }
+
+                if( !haplotypeIsDetermined )
+                    undeterminedHaplotypes.add(h);
            }
        }
-        alleleMapper.add(refList);
-        for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) {
-            final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
-            for( final Haplotype h : haplotypes ) {
-                if( h.getEventMap().get(loc) != null && h.getEventMap().get(loc).hasSameAllelesAs(vcAtThisLoc) ) {
-                    list.add(h);
+
+        for( final Haplotype h : undeterminedHaplotypes ) {
+            Allele matchingAllele = null;
+            for( final Map.Entry<Allele, List<Haplotype>> alleleToTest : alleleMapper.entrySet() ) {
+                // don't test against the reference allele
+                if( alleleToTest.getKey().equals(refAllele) )
+                    continue;
+
+                final Haplotype artificialHaplotype = alleleToTest.getValue().get(0);
+                if( isSubSetOf(artificialHaplotype.getEventMap(), h.getEventMap(), true) ) {
+                    matchingAllele = alleleToTest.getKey();
+                    break;
                }
            }
-            alleleMapper.add(list);
+
+            if( matchingAllele == null )
+                matchingAllele = refAllele;
+            alleleMapper.get(matchingAllele).add(h);
        }
+
        return alleleMapper;
    }

+    protected static boolean isSubSetOf(final Map<Integer, VariantContext> subset, final Map<Integer, VariantContext> superset, final boolean resolveSupersetToSubset) {
+
+        for ( final Map.Entry<Integer, VariantContext> fromSubset : subset.entrySet() ) {
+            final VariantContext fromSuperset = superset.get(fromSubset.getKey());
+            if ( fromSuperset == null )
+                return false;
+
+            List<Allele> supersetAlleles = fromSuperset.getAlternateAlleles();
+            if ( resolveSupersetToSubset )
+                supersetAlleles = resolveAlternateAlleles(fromSubset.getValue().getReference(), fromSuperset.getReference(), supersetAlleles);
+
+            if ( !supersetAlleles.contains(fromSubset.getValue().getAlternateAllele(0)) )
+                return false;
+        }
+
+        return true;
+    }
+
+    private static List<Allele> resolveAlternateAlleles(final Allele targetReference, final Allele actualReference, final List<Allele> currentAlleles) {
+        if ( targetReference.length() <= actualReference.length() )
+            return currentAlleles;
+
+        final List<Allele> newAlleles = new ArrayList<Allele>(currentAlleles.size());
+        final byte[] extraBases = Arrays.copyOfRange(targetReference.getBases(), actualReference.length(), targetReference.length());
+        for ( final Allele a : currentAlleles ) {
+            newAlleles.add(Allele.extend(a, extraBases));
+        }
+        return newAlleles;
+    }
+
    @Ensures({"result.size() == haplotypeAllelesForSample.size()"})
    protected static List<Allele> findEventAllelesInSample( final List<Allele> eventAlleles, final List<Allele> haplotypeAlleles, final List<Allele> haplotypeAllelesForSample, final ArrayList<ArrayList<Haplotype>> alleleMapper, final ArrayList<Haplotype> haplotypes ) {
        if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return noCall; }
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@ -45,6 +45,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection
 import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
 import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
 import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
 import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
@ -131,14 +132,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
    @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
    protected int MIN_PRUNE_FACTOR = 1;

-    @Advanced
-    @Argument(fullName="genotypeFullActiveRegion", shortName="genotypeFullActiveRegion", doc = "If specified, alternate alleles are considered to be the full active region for the purposes of genotyping", required = false)
-    protected boolean GENOTYPE_FULL_ACTIVE_REGION = false;
-
-    @Advanced
-    @Argument(fullName="fullHaplotype", shortName="fullHaplotype", doc = "If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference", required = false)
-    protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false;
-
    @Advanced
    @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false)
    protected int gcpHMM = 10;
@ -248,10 +241,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem

        // create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
        UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC);
-        simpleUAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
-        simpleUAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
-        simpleUAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING );
-        simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING );
+        simpleUAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
+        simpleUAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
+        simpleUAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING ); // low values used for isActive determination only, default/user-specified values used for actual calling
+        simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling
+        simpleUAC.CONTAMINATION_FRACTION = 0.0;
        simpleUAC.exactCallsLog = null;
        UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);

@ -273,15 +267,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
                VCFConstants.GENOTYPE_QUALITY_KEY,
                VCFConstants.DEPTH_KEY,
                VCFConstants.GENOTYPE_PL_KEY);
-        // header lines for the experimental HaplotypeCaller-specific annotations
-        headerInfo.add(new VCFInfoHeaderLine("NVH", 1, VCFHeaderLineType.Integer, "Number of variants found on the haplotype that contained this variant"));
-        headerInfo.add(new VCFInfoHeaderLine("NumHapEval", 1, VCFHeaderLineType.Integer, "Number of haplotypes that were chosen for evaluation in this active region"));
-        headerInfo.add(new VCFInfoHeaderLine("NumHapAssembly", 1, VCFHeaderLineType.Integer, "Number of haplotypes created during the assembly of this active region"));
-        headerInfo.add(new VCFInfoHeaderLine("ActiveRegionSize", 1, VCFHeaderLineType.Integer, "Number of base pairs that comprise this active region"));
-        headerInfo.add(new VCFInfoHeaderLine("EVENTLENGTH", 1, VCFHeaderLineType.Integer, "Max length of all the alternate alleles"));
-        headerInfo.add(new VCFInfoHeaderLine("TYPE", 1, VCFHeaderLineType.String, "Type of event: SNP or INDEL"));
-        headerInfo.add(new VCFInfoHeaderLine("extType", 1, VCFHeaderLineType.String, "Extended type of event: SNP, MNP, INDEL, or COMPLEX"));
-        headerInfo.add(new VCFInfoHeaderLine("QDE", 1, VCFHeaderLineType.Float, "QD value divided by the number of variants found on the haplotype that contained this variant"));

        // FILTER fields are added unconditionally as it's not always 100% certain the circumstances
        // where the filters are used.  For example, in emitting all sites the lowQual field is used
@ -298,7 +283,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem

        assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter );
        likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM );
-        genotypingEngine = new GenotypingEngine( DEBUG, OUTPUT_FULL_HAPLOTYPE_SEQUENCE );
+        genotypingEngine = new GenotypingEngine( DEBUG );
    }

    //---------------------------------------------------------------------------------------------------------------
@ -311,9 +296,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
    @Override
    public boolean includeReadsWithDeletionAtLoci() { return true; }

-    // enable non primary reads in the active region
+    // enable non primary and extended reads in the active region
    @Override
-    public boolean wantsNonPrimaryReads() { return true; }
+    public EnumSet<ActiveRegionReadState> desiredReadStates() {
+        return EnumSet.of(
+                ActiveRegionReadState.PRIMARY,
+                ActiveRegionReadState.NONPRIMARY,
+                ActiveRegionReadState.EXTENDED
+        );
+    }

    @Override
    @Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
@ -421,52 +412,13 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
        // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes )
        final ArrayList<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes );

-        for( final Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>> callResult :
-                ( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
-                  ? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getExtendedLoc(), getToolkit().getGenomeLocParser() )
-                  : genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) {
+        for( final Pair<VariantContext, Map<Allele, List<Haplotype>>> callResult :
+                genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) {
            if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); }

            final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog );
            final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
            final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes());
-
-            if( !GENOTYPE_FULL_ACTIVE_REGION ) {
-                // add some custom annotations to the calls
-
-                // Calculate the number of variants on the haplotype
-                int maxNumVar = 0;
-                for( final Allele allele : callResult.getFirst().getAlleles() ) {
-                    if( !allele.isReference() ) {
-                        for( final Haplotype haplotype : callResult.getSecond().get(allele) ) {
-                            final int numVar = haplotype.getEventMap().size();
-                            if( numVar > maxNumVar ) { maxNumVar = numVar; }
-                        }
-                    }
-                }
-                // Calculate the event length
-                int maxLength = 0;
-                for ( final Allele a : annotatedCall.getAlternateAlleles() ) {
-                    final int length = a.length() - annotatedCall.getReference().length();
-                    if( Math.abs(length) > Math.abs(maxLength) ) { maxLength = length; }
-                }
-
-                myAttributes.put("NVH", maxNumVar);
-                myAttributes.put("NumHapEval", bestHaplotypes.size());
-                myAttributes.put("NumHapAssembly", haplotypes.size());
-                myAttributes.put("ActiveRegionSize", activeRegion.getLocation().size());
-                myAttributes.put("EVENTLENGTH", maxLength);
-                myAttributes.put("TYPE", (annotatedCall.isSNP() || annotatedCall.isMNP() ? "SNP" : "INDEL") );
-                myAttributes.put("extType", annotatedCall.getType().toString() );
-
-                //if( likelihoodCalculationEngine.haplotypeScore != null ) {
-                //    myAttributes.put("HaplotypeScore", String.format("%.4f", likelihoodCalculationEngine.haplotypeScore));
-                //}
-                if( annotatedCall.hasAttribute("QD") ) {
-                    myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) );
-                }
-            }
-
            vcfWriter.add( new VariantContextBuilder(annotatedCall).attributes(myAttributes).make() );
        }

@ -522,6 +474,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
            if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) {
                final GATKSAMRecord clippedRead = ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY );
                // protect against INTERVALS with abnormally high coverage
+                // BUGBUG: remove when positinal downsampler is hooked up to ART/HC
                if( clippedRead.getReadLength() > 0 && activeRegion.size() < samplesList.size() * DOWNSAMPLE_PER_SAMPLE_PER_REGION ) {
                    activeRegion.add(clippedRead);
                }
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
@ -148,45 +148,31 @@ public class LikelihoodCalculationEngine {
        return Math.min(b1.length, b2.length);
    }

-    @Requires({"haplotypes.size() > 0"})
-    @Ensures({"result.length == result[0].length", "result.length == haplotypes.size()"})
-    public static double[][] computeDiploidHaplotypeLikelihoods( final ArrayList<Haplotype> haplotypes, final String sample ) {
-        // set up the default 1-to-1 haplotype mapping object, BUGBUG: target for future optimization?
-        final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
-        for( final Haplotype h : haplotypes ) {
-            final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
-            list.add(h);
-            haplotypeMapping.add(list);
-        }
-        return computeDiploidHaplotypeLikelihoods( sample, haplotypeMapping );
-    }
-
    // This function takes just a single sample and a haplotypeMapping
    @Requires({"haplotypeMapping.size() > 0"})
    @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
-    public static double[][] computeDiploidHaplotypeLikelihoods( final String sample, final ArrayList<ArrayList<Haplotype>> haplotypeMapping ) {
+    public static double[][] computeDiploidHaplotypeLikelihoods( final String sample, final Map<Allele, List<Haplotype>> haplotypeMapping, final List<Allele> alleleOrdering ) {
        final TreeSet<String> sampleSet = new TreeSet<String>();
        sampleSet.add(sample);
-        return computeDiploidHaplotypeLikelihoods(sampleSet, haplotypeMapping);
+        return computeDiploidHaplotypeLikelihoods(sampleSet, haplotypeMapping, alleleOrdering);
    }

    // This function takes a set of samples to pool over and a haplotypeMapping
    @Requires({"haplotypeMapping.size() > 0"})
    @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
-    public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples, final ArrayList<ArrayList<Haplotype>> haplotypeMapping ) {
+    public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples, final Map<Allele, List<Haplotype>> haplotypeMapping, final List<Allele> alleleOrdering ) {

-        final int numHaplotypes = haplotypeMapping.size();
+        final int numHaplotypes = alleleOrdering.size();
        final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
        for( int iii = 0; iii < numHaplotypes; iii++ ) {
            Arrays.fill(haplotypeLikelihoodMatrix[iii], Double.NEGATIVE_INFINITY);
        }

        // compute the diploid haplotype likelihoods
-        // todo - needs to be generalized to arbitrary ploidy, cleaned and merged with PairHMMIndelErrorModel code
        for( int iii = 0; iii < numHaplotypes; iii++ ) {
-            for( int jjj = 0; jjj <= iii; jjj++ ) {                
-                for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) {
-                    for( final Haplotype jjj_mapped : haplotypeMapping.get(jjj) ) {
+            for( int jjj = 0; jjj <= iii; jjj++ ) {
+                for( final Haplotype iii_mapped : haplotypeMapping.get(alleleOrdering.get(iii)) ) {
+                    for( final Haplotype jjj_mapped : haplotypeMapping.get(alleleOrdering.get(jjj)) ) {
                        double haplotypeLikelihood = 0.0;
                        for( final String sample : samples ) {
                            final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
@ -200,12 +186,48 @@ public class LikelihoodCalculationEngine {
                        }
                        haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood);
                    }
-                }       
+                }
            }
        }

        // normalize the diploid likelihoods matrix
-        return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );        
+        return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );
+    }
+
+    // This function takes a set of samples to pool over and a haplotypeMapping
+    @Requires({"haplotypeList.size() > 0"})
+    @Ensures({"result.length == result[0].length", "result.length == haplotypeList.size()"})
+    public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples, final List<Haplotype> haplotypeList ) {
+
+        final int numHaplotypes = haplotypeList.size();
+        final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
+        for( int iii = 0; iii < numHaplotypes; iii++ ) {
+            Arrays.fill(haplotypeLikelihoodMatrix[iii], Double.NEGATIVE_INFINITY);
+        }
+
+        // compute the diploid haplotype likelihoods
+        // todo - needs to be generalized to arbitrary ploidy, cleaned and merged with PairHMMIndelErrorModel code
+        for( int iii = 0; iii < numHaplotypes; iii++ ) {
+            final Haplotype iii_haplotype = haplotypeList.get(iii);
+            for( int jjj = 0; jjj <= iii; jjj++ ) {
+                final Haplotype jjj_haplotype = haplotypeList.get(jjj);
+                double haplotypeLikelihood = 0.0;
+                for( final String sample : samples ) {
+                    final double[] readLikelihoods_iii = iii_haplotype.getReadLikelihoods(sample);
+                    final int[] readCounts_iii = iii_haplotype.getReadCounts(sample);
+                    final double[] readLikelihoods_jjj = jjj_haplotype.getReadLikelihoods(sample);
+                    for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
+                        // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
+                        // First term is approximated by Jacobian log with table lookup.
+                        haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF );
+                    }
+                }
+                haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood);
+            }
+        }
+
+        // normalize the diploid likelihoods matrix
+        return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );
    }

    @Requires({"likelihoodMatrix.length == likelihoodMatrix[0].length"})
@ -296,14 +318,7 @@ public class LikelihoodCalculationEngine {
        final Set<String> sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples
        final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
        bestHaplotypesIndexList.add( findReferenceIndex(haplotypes) ); // always start with the reference haplotype
-        // set up the default 1-to-1 haplotype mapping object
-        final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
-        for( final Haplotype h : haplotypes ) {
-            final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
-            list.add(h);
-            haplotypeMapping.add(list);
-        }
-        final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( sampleKeySet, haplotypeMapping ); // all samples pooled together
+        final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( sampleKeySet, haplotypes ); // all samples pooled together

        int hap1 = 0;
        int hap2 = 0;
@ -347,7 +362,7 @@ public class LikelihoodCalculationEngine {
    public static Map<String, PerReadAlleleLikelihoodMap> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser,
                                                                                            final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList,
                                                                                            final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
-                                                                                            final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call,
+                                                                                            final Pair<VariantContext, Map<Allele,List<Haplotype>>> call,
                                                                                            final double downsamplingFraction,
                                                                                            final PrintStream downsamplingLog ) {
        final Map<String, PerReadAlleleLikelihoodMap> returnMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
@ -278,9 +278,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
        final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
        final int activeRegionStop = refHaplotype.getAlignmentStartHapwrtRef() + refHaplotype.getCigar().getReferenceLength();

-        for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype
+        // for GGA mode, add the desired allele into the haplotype
+        for( final VariantContext compVC : activeAllelesToGenotype ) {
            for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
-                final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart());
+                final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart());
                if( !addHaplotype( insertedRefHaplotype, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
                    return returnHaplotypes;
                    //throw new ReviewedStingException("Unable to add reference+allele haplotype during GGA-enabled assembly: " + insertedRefHaplotype);
@ -290,15 +291,24 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {

        for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
            for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) {
+
                final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
                if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
-                    if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
+
+                    // for GGA mode, add the desired allele into the haplotype if it isn't already present
+                    if( !activeAllelesToGenotype.isEmpty() ) {
                        final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place
                        for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
                            final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
-                            if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {
+
+                            // This if statement used to additionally have:
+                            //      "|| !vcOnHaplotype.hasSameAllelesAs(compVC)"
+                            //  but that can lead to problems downstream when e.g. you are injecting a 1bp deletion onto
+                            //  a haplotype that already contains a 1bp insertion (so practically it is reference but
+                            //  falls into the bin for the 1bp deletion because we keep track of the artificial alleles).
+                            if( vcOnHaplotype == null ) {
                                for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
-                                    addHaplotype( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart()), fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop );
+                                    addHaplotype( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()), fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop );
                                }
                            }
                        }
@ -369,6 +379,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {

        h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() );
        h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0) );
+        if ( haplotype.isArtificialHaplotype() )
+            h.setArtificialAllele(haplotype.getArtificialAllele(), haplotype.getArtificialAllelePosition());
        h.leftBreakPoint = leftBreakPoint;
        h.rightBreakPoint = rightBreakPoint;
        if( swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() != activeRegionStop - activeRegionStart ) { // protect against SW failures
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java
@ -51,16 +51,16 @@ public class BQSRIntegrationTest extends WalkerTest {
        String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
        String HiSeqInterval = "chr1:10,000,000-10,100,000";
        return new Object[][]{
-                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "387b41dc2221a1a4a782958944662b25")},
-                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "b5e26902e76abbd59f94f65c70d18165")},
-                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "a8a9c3f83269911cb61c5fe8fb98dc4a")},
-                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "f43a0473101c63ae93444c300d843e81")},
-                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "9e05e63339d4716584bfc717cab6bd0f")},
-                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "1cf9b9c9c64617dc0f3d2f203f918dbe")},
-                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "aa1949a77bc3066fee551a217c970c0d")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "4fd3c9ad97e6ac58cba644a76564c9f7")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "2620f734cce20f70ce13afd880e46e5c")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "5eb3b94e767da19a4c037ee132e4b19a")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "ab261d291b107a3da7897759c0e4fa89")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "292303f649fbb19dc05d4a0197a49eeb")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "8ced9d1094493f17fb1876b818a64541")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "abb838131e403d39820dbd66932d1ed0")},
                {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "f70d8b5358bc2f76696f14b7a807ede0")},
                {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "4c0f63e06830681560a1e9f9aad9fe98")},
-                {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "be2812cd3dae3c326cf35ae3f1c8ad9e")},
+                {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "8f62aa0e75770204c98d8299793cc53c")},
                {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "03c29a0c1d21f72b12daf51cec111599")},
                {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "7080b2cad02ec6e67ebc766b2dccebf8")},
                {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "30e76055c16843b6e33e5b9bd8ced57c")},
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testMultipleSNPAlleles() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
-                Arrays.asList("704888987baacff8c7b273b8ab9938d0"));
+                Arrays.asList("d20c7a143b899f0239bf64b652ad3edb"));
        executeTest("test Multiple SNP alleles", spec);
    }

@ -197,7 +197,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {

    @Test
    public void testOutputParameterAllSites() {
-        testOutputParameters("--output_mode EMIT_ALL_SITES", "81fff490c0f59890f1e75dc290833434");
+        testOutputParameters("--output_mode EMIT_ALL_SITES", "8b26088a035e579c4afd3b46737291e4");
    }

    private void testOutputParameters(final String args, final String md5) {
@ -345,7 +345,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testMultiSampleIndels1() {
        WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
-                Arrays.asList("a4761d7f25e7a62f34494801c98a0da7"));
+                Arrays.asList("69df7a00f800204564ca3726e1871132"));
        List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();

        WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
@ -2,16 +2,14 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;

 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;

-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
+import java.util.*;

 public class AFCalcResultUnitTest extends BaseTest {
    private static class MyTest {
@ -79,4 +77,54 @@ public class AFCalcResultUnitTest extends BaseTest {
        final double[] actualPosteriors = new double[]{result.getLog10PosteriorOfAFEq0(), result.getLog10PosteriorOfAFGT0()};
        Assert.assertEquals(MathUtils.sumLog10(actualPosteriors), 1.0, 1e-3, "Posteriors don't sum to 1 with 1e-3 precision");
    }
+
+    @DataProvider(name = "TestIsPolymorphic")
+    public Object[][] makeTestIsPolymorphic() {
+        List<Object[]> tests = new ArrayList<Object[]>();
+
+        final List<Double> pValues = new LinkedList<Double>();
+        for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999) )
+            for ( final double espilon : Arrays.asList(-1e-5, 0.0, 1e-5) )
+                pValues.add(p + espilon);
+
+        for ( final double pNonRef : pValues  ) {
+            for ( final double pThreshold : pValues ) {
+                final boolean shouldBePoly = pNonRef >= pThreshold;
+                if ( pNonRef != pThreshold)
+                    // let's not deal with numerical instability
+                    tests.add(new Object[]{ pNonRef, pThreshold, shouldBePoly });
+            }
+        }
+
+        return tests.toArray(new Object[][]{});
+    }
+
+    private AFCalcResult makePolymorphicTestData(final double pNonRef) {
+        return new AFCalcResult(
+                new int[]{0},
+                1,
+                alleles,
+                MathUtils.normalizeFromLog10(new double[]{1 - pNonRef, pNonRef}, true, false),
+                log10Even,
+                Collections.singletonMap(C, Math.log10(pNonRef)));
+    }
+
+    @Test(enabled = true, dataProvider = "TestIsPolymorphic")
+    private void testIsPolymorphic(final double pNonRef, final double pThreshold, final boolean shouldBePoly) {
+        final AFCalcResult result = makePolymorphicTestData(pNonRef);
+        final boolean actualIsPoly = result.isPolymorphic(C, Math.log10(pThreshold));
+        Assert.assertEquals(actualIsPoly, shouldBePoly,
+                "isPolymorphic with pNonRef " + pNonRef + " and threshold " + pThreshold + " returned "
+                        + actualIsPoly + " but the expected result is " + shouldBePoly);
+    }
+
+    @Test(enabled = true, dataProvider = "TestIsPolymorphic")
+    private void testIsPolymorphicQual(final double pNonRef, final double pThreshold, final boolean shouldBePoly) {
+        final AFCalcResult result = makePolymorphicTestData(pNonRef);
+        final double qual = QualityUtils.phredScaleCorrectRate(pThreshold);
+        final boolean actualIsPoly = result.isPolymorphicPhredScaledQual(C, qual);
+        Assert.assertEquals(actualIsPoly, shouldBePoly,
+                "isPolymorphic with pNonRef " + pNonRef + " and threshold " + pThreshold + " returned "
+                        + actualIsPoly + " but the expected result is " + shouldBePoly);
+    }
 }
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@ -21,17 +21,19 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void testHaplotypeCallerMultiSample() {
-        HCTest(CEUTRIO_BAM, "", "56aa4b84606b6b0b7dc78a383974d1b3");
+        HCTest(CEUTRIO_BAM, "", "2b39732ff8e0de5bc2ae949aaf7a6f21");
    }

    @Test
    public void testHaplotypeCallerSingleSample() {
-        HCTest(NA12878_BAM, "", "baabae06c85d416920be434939124d7f");
+        HCTest(NA12878_BAM, "", "8b217638ff585effb9cc70e9a9aa544f");
    }

+    // TODO -- add more tests for GGA mode, especially with input alleles that are complex variants and/or not trimmed
    @Test
    public void testHaplotypeCallerMultiSampleGGA() {
-        HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "39da622b309597d7a0b082c8aa1748c9");
+        HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
+                "541aa8291f03ba33bd1ad3d731fd5657");
    }

    private void HCTestComplexVariants(String bam, String args, String md5) {
@ -42,7 +44,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void testHaplotypeCallerMultiSampleComplex() {
-        HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "966d338f423c86a390d685aa6336ec69");
+        HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "fd7170cbde7df04d4fbe1da7903c31c6");
    }

    private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -53,7 +55,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void testHaplotypeCallerSingleSampleSymbolic() {
-        HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "7fbc6b9e27e374f2ffe4be952d88c7c6");
+        HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "99456fc7207c1fe9f367a0d0afae87cd");
    }

    private void HCTestIndelQualityScores(String bam, String args, String md5) {
@ -64,20 +66,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void testHaplotypeCallerSingleSampleIndelQualityScores() {
-        HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "2581e760279291a3901a506d060bfac8");
+        HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "6c1631785b3f832aecab1a99f0454762");
    }

    @Test
    public void HCTestProblematicReadsModifiedInActiveRegions() {
        final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
-        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("788176e1717bd28fc7cbc8e3efbb6100"));
+        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ec437d2d9f3ae07d155983be0155c8ed"));
        executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
    }

    @Test
    public void HCTestStructuralIndels() {
        final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
-        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("96ab8253d242b851ccfc218759f79784"));
+        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("237601bbc39694c7413a332cbb656c8e"));
        executeTest("HCTestStructuralIndels: ", spec);
    }

@ -91,7 +93,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void HCTestReducedBam() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
-                Arrays.asList("425f1a0fb00d7145edf1c55e54346fae"));
+                Arrays.asList("40bf739fb2b1743642498efe79ea6342"));
        executeTest("HC calling on a ReducedRead BAM", spec);
    }
 }
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java
@ -10,7 +10,6 @@ import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;

@ -102,7 +101,9 @@ public class LikelihoodCalculationEngineUnitTest extends BaseTest {
                    haplotypes.add(haplotype);
                }
            }
-            return LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(haplotypes, "myTestSample");
+            final HashSet<String> sampleSet = new HashSet<String>(1);
+            sampleSet.add("myTestSample");
+            return LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sampleSet, haplotypes);
        }
    }

--- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.help.ApplicationDetails;
 import org.broadinstitute.sting.utils.help.HelpFormatter;
+import org.broadinstitute.sting.utils.help.HelpUtils;

 import java.io.IOException;
 import java.util.*;
@ -288,7 +289,7 @@ public abstract class CommandLineProgram {
     */
    private static void printDocumentationReference() {
        errorPrintf("Visit our website and forum for extensive documentation and answers to %n");
-        errorPrintf("commonly asked questions http://www.broadinstitute.org/gatk%n");
+        errorPrintf("commonly asked questions " + HelpUtils.BASE_GATK_URL + "%n");
    }


--- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
@ -39,6 +39,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.help.ApplicationDetails;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.help.GATKDocUtils;
+import org.broadinstitute.sting.utils.help.HelpUtils;
 import org.broadinstitute.sting.utils.text.TextFormattingUtils;

 import java.util.*;
@ -118,17 +119,24 @@ public class CommandLineGATK extends CommandLineExecutable {
    public static final String DISK_QUOTA_EXCEEDED_ERROR = "Disk quota exceeded";

    private static void checkForMaskedUserErrors(final Throwable t) {
+        // masked out of memory error
+        if ( t instanceof OutOfMemoryError )
+            exitSystemWithUserError(new UserException.NotEnoughMemory());
+        // masked user error
+        if ( t instanceof UserException || t instanceof TribbleException )
+            exitSystemWithUserError(new UserException(t.getMessage()));
+
+        // no message means no masked error
        final String message = t.getMessage();
        if ( message == null )
            return;

-        // we know what to do about the common "Too many open files" error
+        // too many open files error
        if ( message.contains("Too many open files") )
            exitSystemWithUserError(new UserException.TooManyOpenFiles());

        // malformed BAM looks like a SAM file
-        if ( message.contains(PICARD_TEXT_SAM_FILE_ERROR_1) ||
-                message.contains(PICARD_TEXT_SAM_FILE_ERROR_2) )
+        if ( message.contains(PICARD_TEXT_SAM_FILE_ERROR_1) || message.contains(PICARD_TEXT_SAM_FILE_ERROR_2) )
            exitSystemWithSamError(t);

        // can't close tribble index when writing
@ -138,12 +146,10 @@ public class CommandLineGATK extends CommandLineExecutable {
        // disk is full
        if ( message.contains(NO_SPACE_LEFT_ON_DEVICE_ERROR) || message.contains(DISK_QUOTA_EXCEEDED_ERROR) )
            exitSystemWithUserError(new UserException.NoSpaceOnDevice());
-        if ( t.getCause() != null && (t.getCause().getMessage().contains(NO_SPACE_LEFT_ON_DEVICE_ERROR) || t.getCause().getMessage().contains(DISK_QUOTA_EXCEEDED_ERROR)) )
-            exitSystemWithUserError(new UserException.NoSpaceOnDevice());

-        // masked out of memory error
-        if ( t.getCause() != null && t.getCause() instanceof OutOfMemoryError )
-            exitSystemWithUserError(new UserException.NotEnoughMemory());
+        // masked error wrapped in another one
+        if ( t.getCause() != null )
+            checkForMaskedUserErrors(t.getCause());
    }

    /**
@ -155,7 +161,7 @@ public class CommandLineGATK extends CommandLineExecutable {
        List<String> header = new ArrayList<String>();
        header.add(String.format("The Genome Analysis Toolkit (GATK) v%s, Compiled %s",getVersionNumber(), getBuildTime()));
        header.add("Copyright (c) 2010 The Broad Institute");
-        header.add("For support and documentation go to http://www.broadinstitute.org/gatk");
+        header.add("For support and documentation go to " + HelpUtils.BASE_GATK_URL);
        return header;
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@ -30,12 +30,10 @@ import net.sf.samtools.*;
 import net.sf.samtools.util.CloseableIterator;
 import net.sf.samtools.util.RuntimeIOException;
 import org.apache.log4j.Logger;
-import org.broadinstitute.sting.gatk.downsampling.*;
-import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
-import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.ReadMetrics;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
+import org.broadinstitute.sting.gatk.downsampling.*;
 import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.gatk.iterators.*;
@ -567,7 +565,7 @@ public class SAMDataSource {
     *
     * @return the start positions of the first chunk of reads for all BAM files
     */
-    public Map<SAMReaderID, GATKBAMFileSpan> getInitialReaderPositions() {
+    protected Map<SAMReaderID, GATKBAMFileSpan> getInitialReaderPositions() {
        Map<SAMReaderID, GATKBAMFileSpan> initialPositions = new HashMap<SAMReaderID, GATKBAMFileSpan>();
        SAMReaders readers = resourcePool.getAvailableReaders();

@ -585,7 +583,7 @@ public class SAMDataSource {
     * @param shard The shard specifying the data limits.
     * @return An iterator over the selected data.
     */
-    public StingSAMIterator getIterator( Shard shard ) {
+    protected StingSAMIterator getIterator( Shard shard ) {
        return getIterator(resourcePool.getAvailableReaders(), shard, shard instanceof ReadShard);
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java
@ -25,11 +25,9 @@

 package org.broadinstitute.sting.gatk.filters;

-import com.google.common.base.Function;
-import com.google.common.collect.Collections2;
-import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.classloader.PluginManager;
 import org.broadinstitute.sting.utils.help.GATKDocUtils;
+import org.broadinstitute.sting.utils.help.HelpUtils;

 import java.util.Collection;
 import java.util.List;
@ -73,7 +71,7 @@ public class FilterManager extends PluginManager<ReadFilter> {

        return String.format("Read filter %s not found. Available read filters:%n%n%s%n%n%s",pluginName,
                userFriendlyListofReadFilters(availableFilters),
-                "Please consult the GATK Documentation (http://www.broadinstitute.org/gatk/gatkdocs/) for more information.");
+                "Please consult the GATK Documentation (" + HelpUtils.GATK_DOCS_URL + ") for more information.");
    }

    private String userFriendlyListofReadFilters(List<Class<? extends ReadFilter>> filters) {
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
@ -271,7 +271,18 @@ public class GATKReport {
     * @return a simplified GATK report
     */
    public static GATKReport newSimpleReport(final String tableName, final String... columns) {
-        GATKReportTable table = new GATKReportTable(tableName, "A simplified GATK table report", columns.length);
+        return newSimpleReportWithDescription(tableName, "A simplified GATK table report", columns);
+    }
+
+    /**
+     * @see #newSimpleReport(String, String...) but with a customized description
+     * @param tableName
+     * @param desc
+     * @param columns
+     * @return
+     */
+    public static GATKReport newSimpleReportWithDescription(final String tableName, final String desc, final String... columns) {
+        GATKReportTable table = new GATKReportTable(tableName, desc, columns.length);

        for (String column : columns) {
            table.addColumn(column, "");
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
@ -80,6 +80,9 @@ public enum GATKReportVersion {
     * @return The version as an enum.
     */
    public static GATKReportVersion fromHeader(String header) {
+        if ( header == null )
+            throw new UserException.BadInput("The GATK report has no version specified in the header");
+
        if (header.startsWith("##:GATKReport.v0.1 "))
            return GATKReportVersion.V0_1;

--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@ -34,9 +34,6 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
    private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
    private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();

-    // package access for unit testing
-    ActivityProfile profile;
-
    @Override
    public String getTraversalUnits() {
        return "active regions";
@ -56,7 +53,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio

        int minStart = Integer.MAX_VALUE;
        final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
-        profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
+        ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );

        ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);

@ -83,7 +80,6 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
            }

            // skip this location -- it's not part of our engine intervals
-            // TODO -- this is dangerously slow with current overlaps implementation : GSA-649 / GenomeLocSortedSet.overlaps is crazy slow
            if ( outsideEngineIntervals(location) )
                continue;

@ -262,13 +258,23 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                        activeRegion.add( read );
                    }
                    for( final ActiveRegion otherRegionToTest : workQueue ) {
-                        if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
-                            otherRegionToTest.add( read );
+                        if( !bestRegion.equals(otherRegionToTest) ) {
+                            // check for non-primary vs. extended
+                            if ( otherRegionToTest.getLocation().overlapsP( readLoc ) ) {
+                                otherRegionToTest.add( read );
+                            } else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
+                                otherRegionToTest.add( read );
+                            }
                        }
                    }
                }
                placedReads.add( read );
-            } else if( activeRegion.getExtendedLoc().overlapsP( readLoc ) && walker.wantsNonPrimaryReads() ) {
+                // check for non-primary vs. extended
+            } else if( activeRegion.getLocation().overlapsP( readLoc ) ) {
+                if ( walker.wantsNonPrimaryReads() ) {
+                    activeRegion.add( read );
+                }
+            } else if( walker.wantsExtendedReads() && activeRegion.getExtendedLoc().overlapsP( readLoc )) {
                activeRegion.add( read );
            }
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
@ -1,5 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers;

+import com.google.java.contract.Ensures;
 import net.sf.picard.reference.IndexedFastaSequenceFile;
 import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.Input;
@ -13,14 +14,14 @@ import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
 import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
 import org.broadinstitute.sting.utils.interval.IntervalSetRule;
 import org.broadinstitute.sting.utils.interval.IntervalUtils;

 import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.List;
+import java.util.*;

 /**
 * Base class for all the Active Region Walkers.
@ -70,11 +71,24 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
        return true;    // We are keeping all the reads
    }

-    public boolean wantsNonPrimaryReads() {
-        return false;
+    public EnumSet<ActiveRegionReadState> desiredReadStates() {
+        return EnumSet.of(ActiveRegionReadState.PRIMARY);
+    }
+
+    public final boolean wantsNonPrimaryReads() {
+        return desiredReadStates().contains(ActiveRegionReadState.NONPRIMARY);
+    }
+
+    public boolean wantsExtendedReads() {
+        return desiredReadStates().contains(ActiveRegionReadState.EXTENDED);
+    }
+
+    public boolean wantsUnmappedReads() {
+        return desiredReadStates().contains(ActiveRegionReadState.UNMAPPED);
    }

    // Determine probability of active status over the AlignmentContext
+    @Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
    public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);

    // Map over the ActiveRegion
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
@ -29,7 +29,7 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn
                                       final List<Double> refQuals, final List<Double> altQuals) {

        if (pileup != null && likelihoodMap == null) {
-            // no per-read likelihoods available:
+            // old UG snp-only path through the annotations
            for ( final PileupElement p : pileup ) {
                if ( isUsableBase(p) ) {
                    if ( allAlleles.get(0).equals(Allele.create(p.getBase(), true)) ) {
@ -43,14 +43,13 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn
        }
        for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : likelihoodMap.getLikelihoodReadMap().entrySet()) {
            final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
+            // BUGBUG: There needs to be a comparable isUsableBase check here
            if (a.isNoCall())
                continue; // read is non-informative
            if (a.isReference())
                refQuals.add((double)el.getKey().getMappingQuality());
            else if (allAlleles.contains(a))
                altQuals.add((double)el.getKey().getMappingQuality());
-
-
        }
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
@ -49,7 +49,7 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
            ReadBackedPileup pileup = null;


-            if (stratifiedContexts != null) {
+            if (stratifiedContexts != null) { // the old UG SNP-only path through the annotations
                final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
                if ( context != null )
                    pileup = context.getBasePileup();
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
@ -39,7 +39,7 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
                                       final List<Double> refQuals, final List<Double> altQuals) {

        if (alleleLikelihoodMap == null) {
-            // use fast SNP-based version if we don't have per-read allele likelihoods
+            // use old UG SNP-based version if we don't have per-read allele likelihoods
            for ( final PileupElement p : pileup ) {
                if ( isUsableBase(p) ) {
                    int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
@ -82,7 +82,7 @@ import java.util.*;
@Allows(value={DataSource.READS, DataSource.REFERENCE})
@Reference(window=@Window(start=-50,stop=50))
@By(DataSource.REFERENCE)
-public class VariantAnnotator extends RodWalker<Integer, Integer> implements AnnotatorCompatible {
+public class VariantAnnotator extends RodWalker<Integer, Integer> implements AnnotatorCompatible, TreeReducible<Integer> {

    @ArgumentCollection
    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@ -275,14 +275,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
        return true;
    }

-    /**
-     * Initialize the number of loci processed to zero.
-     *
-     * @return 0
-     */
-    public Integer reduceInit() { return 0; }
-
-
    /**
     * We want reads that span deletions
     *
@ -323,15 +315,15 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
        return 1;
    }

-    /**
-     * Increment the number of loci processed.
-     *
-     * @param value result of the map.
-     * @param sum   accumulator for the reduce.
-     * @return the new number of loci processed.
-     */
-    public Integer reduce(Integer value, Integer sum) {
-        return sum + value;
+    @Override
+    public Integer reduceInit() { return 0; }
+
+    @Override
+    public Integer reduce(Integer value, Integer sum) { return value + sum; }
+
+    @Override
+    public Integer treeReduce(Integer lhs, Integer rhs) {
+        return lhs + rhs;
    }

    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
@ -227,7 +227,7 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
     */
    public Long map( final ReferenceContext ref, final GATKSAMRecord originalRead, final RefMetaDataTracker metaDataTracker ) {

-        final GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(originalRead);
+        final GATKSAMRecord read = ReadClipper.hardClipSoftClippedBases( ReadClipper.hardClipAdaptorSequence(originalRead) );
        if( read.isEmpty() ) { return 0L; } // the whole read was inside the adaptor so skip it

        RecalUtils.parsePlatformForRead(read, RAC);
@ -268,16 +268,25 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
    }

    protected boolean[] calculateKnownSites( final GATKSAMRecord read, final List<Feature> features ) {
-        final int BUFFER_SIZE = 0;
        final int readLength = read.getReadBases().length;
        final boolean[] knownSites = new boolean[readLength];
        Arrays.fill(knownSites, false);
        for( final Feature f : features ) {
            int featureStartOnRead = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), f.getStart(), ReadUtils.ClippingTail.LEFT_TAIL, true); // BUGBUG: should I use LEFT_TAIL here?
-            if( featureStartOnRead == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { featureStartOnRead = 0; }
+            if( featureStartOnRead == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) {
+                featureStartOnRead = 0;
+            }
+
            int featureEndOnRead = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), f.getEnd(), ReadUtils.ClippingTail.LEFT_TAIL, true);
-            if( featureEndOnRead == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { featureEndOnRead = readLength; }
-            Arrays.fill(knownSites, Math.max(0, featureStartOnRead - BUFFER_SIZE), Math.min(readLength, featureEndOnRead + 1 + BUFFER_SIZE), true);
+            if( featureEndOnRead == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) {
+                featureEndOnRead = readLength;
+            }
+
+            if( featureStartOnRead > readLength ) {
+                featureStartOnRead = featureEndOnRead = readLength;
+            }
+
+            Arrays.fill(knownSites, Math.max(0, featureStartOnRead), Math.min(readLength, featureEndOnRead + 1), true);
        }
        return knownSites;
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
@ -102,13 +102,10 @@ public class RecalibrationArgumentCollection {
    @Argument(fullName = "no_standard_covs", shortName = "noStandard", doc = "Do not use the standard set of covariates, but rather just the ones listed using the -cov argument", required = false)
    public boolean DO_NOT_USE_STANDARD_COVARIATES = false;

-    /////////////////////////////
-    // Debugging-only Arguments
-    /////////////////////////////
    /**
     * This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option.
     */
-    @Hidden
+    @Advanced
    @Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
    public boolean RUN_WITHOUT_DBSNP = false;

@ -139,6 +136,13 @@ public class RecalibrationArgumentCollection {
    @Argument(fullName = "indels_context_size", shortName = "ics", doc = "size of the k-mer context to be used for base insertions and deletions", required = false)
    public int INDELS_CONTEXT_SIZE = 3;

+    /**
+     * The cycle covariate will generate an error if it encounters a cycle greater than this value.
+     * This argument is ignored if the Cycle covariate is not used.
+     */
+    @Argument(fullName = "maximum_cycle_value", shortName = "maxCycle", doc = "the maximum cycle value permitted for the Cycle covariate", required = false)
+    public int MAXIMUM_CYCLE_VALUE = 500;
+
    /**
     * A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off (default is off)
     */
@ -176,9 +180,15 @@ public class RecalibrationArgumentCollection {
    @Argument(fullName = "binary_tag_name", shortName = "bintag", required = false, doc = "the binary tag covariate name if using it")
    public String BINARY_TAG_NAME = null;

+
+    /////////////////////////////
+    // Debugging-only Arguments
+    /////////////////////////////
+
    @Hidden
    @Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
    public String DEFAULT_PLATFORM = null;
+
    @Hidden
    @Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
    public String FORCE_PLATFORM = null;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java
@ -6,11 +6,10 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.fragments.FragmentCollection;
 import org.broadinstitute.sting.utils.pileup.PileupElement;

-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Map;
+import java.util.*;

 /**
 * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
@ -20,6 +19,21 @@ import java.util.Map;
 */
 public class CoverageUtils {

+    public enum CountPileupType {
+        /**
+         * Count all reads independently (even if from the same fragment).
+         */
+        COUNT_READS,
+        /**
+         * Count all fragments (even if the reads that compose the fragment are not consistent at that base).
+         */
+        COUNT_FRAGMENTS,
+        /**
+         * Count all fragments (but only if the reads that compose the fragment are consistent at that base).
+         */
+        COUNT_FRAGMENTS_REQUIRE_SAME_BASE
+    }
+
    /**
     * Returns the counts of bases from reads with MAPQ > minMapQ and base quality > minBaseQ in the context
     * as an array of ints, indexed by the index fields of BaseUtils
@ -64,10 +78,10 @@ public class CoverageUtils {
    }

    public static Map<DoCOutputType.Partition,Map<String,int[]>>
-                    getBaseCountsByPartition(AlignmentContext context, int minMapQ, int maxMapQ, byte minBaseQ, byte maxBaseQ, Collection<DoCOutputType.Partition> types) {
+                    getBaseCountsByPartition(AlignmentContext context, int minMapQ, int maxMapQ, byte minBaseQ, byte maxBaseQ, CountPileupType countType, Collection<DoCOutputType.Partition> types) {

        Map<DoCOutputType.Partition,Map<String,int[]>> countsByIDByType = new HashMap<DoCOutputType.Partition,Map<String,int[]>>();
-        Map<SAMReadGroupRecord,int[]> countsByRG = getBaseCountsByReadGroup(context,minMapQ,maxMapQ,minBaseQ,maxBaseQ);
+        Map<SAMReadGroupRecord,int[]> countsByRG = getBaseCountsByReadGroup(context,minMapQ,maxMapQ,minBaseQ,maxBaseQ,countType);
        for (DoCOutputType.Partition t : types ) {
            // iterate through the read group counts and build the type associations
            for ( Map.Entry<SAMReadGroupRecord,int[]> readGroupCountEntry : countsByRG.entrySet() ) {
@ -95,31 +109,95 @@ public class CoverageUtils {
        }
    }

-    public static Map<SAMReadGroupRecord,int[]> getBaseCountsByReadGroup(AlignmentContext context, int minMapQ, int maxMapQ, byte minBaseQ, byte maxBaseQ) {
+    public static Map<SAMReadGroupRecord,int[]> getBaseCountsByReadGroup(AlignmentContext context, int minMapQ, int maxMapQ, byte minBaseQ, byte maxBaseQ, CountPileupType countType) {
        Map<SAMReadGroupRecord, int[]> countsByRG = new HashMap<SAMReadGroupRecord,int[]>();
-        for ( PileupElement e : context.getBasePileup() ) {
-            if ( e.getMappingQual() >= minMapQ && e.getMappingQual() <= maxMapQ && ( e.getQual() >= minBaseQ && e.getQual() <= maxBaseQ || e.isDeletion() ) ) {
-                SAMReadGroupRecord readGroup = getReadGroup(e.getRead());
-                if ( ! countsByRG.keySet().contains(readGroup) ) {
-                    countsByRG.put(readGroup,new int[6]);
-                    updateCounts(countsByRG.get(readGroup),e);
-                } else {
-                    updateCounts(countsByRG.get(readGroup),e);
+
+        List<PileupElement> countPileup = new LinkedList<PileupElement>();
+        FragmentCollection<PileupElement> fpile;
+
+        switch (countType) {
+
+            case COUNT_READS:
+                for (PileupElement e : context.getBasePileup())
+                    if (countElement(e, minMapQ, maxMapQ, minBaseQ, maxBaseQ))
+                        countPileup.add(e);
+                break;
+
+            case COUNT_FRAGMENTS: // ignore base identities and put in FIRST base that passes filters:
+                fpile = context.getBasePileup().getStartSortedPileup().toFragments();
+
+                for (PileupElement e : fpile.getSingletonReads())
+                    if (countElement(e, minMapQ, maxMapQ, minBaseQ, maxBaseQ))
+                        countPileup.add(e);
+
+                for (List<PileupElement> overlappingPair : fpile.getOverlappingPairs()) {
+                    // iterate over all elements in fragment:
+                    for (PileupElement e : overlappingPair) {
+                        if (countElement(e, minMapQ, maxMapQ, minBaseQ, maxBaseQ)) {
+                            countPileup.add(e); // add the first passing element per fragment
+                            break;
+                        }
+                    }
                }
-            }
+                break;
+
+            case COUNT_FRAGMENTS_REQUIRE_SAME_BASE:
+                fpile = context.getBasePileup().getStartSortedPileup().toFragments();
+
+                for (PileupElement e : fpile.getSingletonReads())
+                    if (countElement(e, minMapQ, maxMapQ, minBaseQ, maxBaseQ))
+                        countPileup.add(e);
+
+                for (List<PileupElement> overlappingPair : fpile.getOverlappingPairs()) {
+                    PileupElement firstElem = null;
+                    PileupElement addElem = null;
+
+                    // iterate over all elements in fragment:
+                    for (PileupElement e : overlappingPair) {
+                        if (firstElem == null)
+                            firstElem = e;
+                        else if (e.getBase() != firstElem.getBase()) {
+                            addElem = null;
+                            break;
+                        }
+
+                        // will add the first passing element per base-consistent fragment:
+                        if (addElem == null && countElement(e, minMapQ, maxMapQ, minBaseQ, maxBaseQ))
+                            addElem = e;
+                    }
+
+                    if (addElem != null)
+                        countPileup.add(addElem);
+                }
+                break;
+
+            default:
+                throw new UserException("Must use valid CountPileupType");
+        }
+
+        for (PileupElement e : countPileup) {
+            SAMReadGroupRecord readGroup = getReadGroup(e.getRead());
+            if (!countsByRG.keySet().contains(readGroup))
+                countsByRG.put(readGroup, new int[6]);
+
+            updateCounts(countsByRG.get(readGroup), e);
        }

        return countsByRG;
    }

+    private static boolean countElement(PileupElement e, int minMapQ, int maxMapQ, byte minBaseQ, byte maxBaseQ) {
+        return (e.getMappingQual() >= minMapQ && e.getMappingQual() <= maxMapQ && ( e.getQual() >= minBaseQ && e.getQual() <= maxBaseQ || e.isDeletion() ));
+    }
+
    private static void updateCounts(int[] counts, PileupElement e) {
        if ( e.isDeletion() ) {
-            counts[BaseUtils.DELETION_INDEX]++;
+            counts[BaseUtils.DELETION_INDEX] += e.getRepresentativeCount();
        } else if ( BaseUtils.basesAreEqual((byte) 'N', e.getBase()) ) {
-            counts[BaseUtils.NO_CALL_INDEX]++;
+            counts[BaseUtils.NO_CALL_INDEX] += e.getRepresentativeCount();
        } else {
            try {
-                counts[BaseUtils.simpleBaseToBaseIndex(e.getBase())]++;
+                counts[BaseUtils.simpleBaseToBaseIndex(e.getBase())] += e.getRepresentativeCount();
            } catch (ArrayIndexOutOfBoundsException exc) {
                throw new ReviewedStingException("Expected a simple base, but actually received"+(char)e.getBase());
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java
@ -129,11 +129,15 @@ public class DepthOfCoverage extends LocusWalker<Map<DoCOutputType.Partition,Map
    int minMappingQuality = -1;
    @Argument(fullName = "maxMappingQuality", doc = "Maximum mapping quality of reads to count towards depth. Defaults to 2^31-1 (Integer.MAX_VALUE).", required = false)
    int maxMappingQuality = Integer.MAX_VALUE;
+
    @Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum quality of bases to count towards depth. Defaults to -1.", required = false)
    byte minBaseQuality = -1;
    @Argument(fullName = "maxBaseQuality", doc = "Maximum quality of bases to count towards depth. Defaults to 127 (Byte.MAX_VALUE).", required = false)
    byte maxBaseQuality = Byte.MAX_VALUE;

+    @Argument(fullName = "countType", doc = "How should overlapping reads from the same fragment be handled?", required = false)
+    CoverageUtils.CountPileupType countType = CoverageUtils.CountPileupType.COUNT_READS;
+
    /**
     * Instead of reporting depth, report the base pileup at each locus
     */
@ -373,7 +377,7 @@ public class DepthOfCoverage extends LocusWalker<Map<DoCOutputType.Partition,Map
                //System.out.printf("\t[log]\t%s",ref.getLocus());
            }

-            return CoverageUtils.getBaseCountsByPartition(context,minMappingQuality,maxMappingQuality,minBaseQuality,maxBaseQuality,partitionTypes);
+            return CoverageUtils.getBaseCountsByPartition(context,minMappingQuality,maxMappingQuality,minBaseQuality,maxBaseQuality,countType,partitionTypes);
        } else {
            return null;
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java
@ -282,6 +282,7 @@ public class DiffEngine {
        // now that we have a specific list of values we want to show, display them
        GATKReport report = new GATKReport();
        final String tableName = "differences";
+        // TODO for Geraldine -- link needs to be updated below
        report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", 3);
        final GATKReportTable table = report.getTable(tableName);
        table.addColumn("Difference");
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java
@ -138,7 +138,8 @@ public class DiffObjects extends RodWalker<Integer, Integer> {
    /**
     * Writes out a file of the DiffEngine format:
     *
-     *      http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine
+     *     TODO for Geraldine -- link needs to be updated below (and also in SelectVariants and RefSeqCodec GATK docs)
+     *     http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine
     */
    @Output(doc="File to which results should be written",required=true)
    protected PrintStream out;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java
@ -47,6 +47,12 @@ import java.util.List;
 * <p>
 * Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
 * Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
+ *
+ * The output format can be partially controlled using the provided command-line arguments.
+ * Specify intervals with the usual -L argument to output only the reference bases within your intervals.
+ * Overlapping intervals are automatically merged; reference bases for each disjoint interval will be output as a
+ * separate fasta sequence (named numerically in order).
+ *
 * Several important notes:
 * 1) if there are multiple variants that start at a site, it chooses one of them randomly.
 * 2) when there are overlapping indels (but with different start positions) only the first will be chosen.
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@ -382,11 +382,9 @@ public class UnifiedGenotyperEngine {
            if ( alternateAllele.isReference() )
                continue;

-            // we are non-ref if the probability of being non-ref > the emit confidence.
-            // the emit confidence is phred-scaled, say 30 => 10^-3.
-            // the posterior AF > 0 is log10: -5 => 10^-5
-            // we are non-ref if 10^-5 < 10^-3 => -5 < -3
-            final boolean isNonRef = AFresult.isPolymorphic(alternateAllele, UAC.STANDARD_CONFIDENCE_FOR_EMITTING / -10.0);
+            // Compute if the site is considered polymorphic with sufficient confidence relative to our
+            // phred-scaled emission QUAL
+            final boolean isNonRef = AFresult.isPolymorphicPhredScaledQual(alternateAllele, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);

            // if the most likely AC is not 0, then this is a good alternate allele to use
            if ( isNonRef ) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java
@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
 import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
 import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.variantcontext.Allele;

@ -234,10 +235,20 @@ public class AFCalcResult {
     *
     * @return true if there's enough confidence (relative to log10minPNonRef) to reject AF == 0
     */
+    @Requires("MathUtils.goodLog10Probability(log10minPNonRef)")
    public boolean isPolymorphic(final Allele allele, final double log10minPNonRef) {
        return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef;
    }

+    /**
+     * Same as #isPolymorphic but takes a phred-scaled quality score as input
+     */
+    public boolean isPolymorphicPhredScaledQual(final Allele allele, final double minPNonRefPhredScaledQual) {
+        if ( minPNonRefPhredScaledQual < 0 ) throw new IllegalArgumentException("phredScaledQual " + minPNonRefPhredScaledQual + " < 0 ");
+        final double log10Threshold = Math.log10(QualityUtils.qualToProb(minPNonRefPhredScaledQual));
+        return isPolymorphic(allele, log10Threshold);
+    }
+
    /**
     * Are any of the alleles polymorphic w.r.t. #isPolymorphic?
     *
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
@ -95,7 +95,8 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr

@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
 public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingStats> {
-    private static final boolean DEBUG = false;
+    @Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information (if -l DEBUG is also specified)", required = false)
+    protected boolean DEBUG = false;
    /**
     * The VCF file we are phasing variants from.
     *
@ -949,7 +950,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
        }

        if (DEBUG) logger.debug("\nPhasing table [AFTER CALCULATION]:\n" + sampleHaps + "\n");
-        MaxHaplotypeAndQuality maxHapQual = new MaxHaplotypeAndQuality(sampleHaps, true);
+        MaxHaplotypeAndQuality maxHapQual = new MaxHaplotypeAndQuality(sampleHaps, DEBUG);
        double posteriorProb = maxHapQual.maxEntry.getScore().getValue();

        if (DEBUG)
@ -971,7 +972,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
        public MaxHaplotypeAndQuality(PhasingTable hapTable, boolean printDebug) {
            // Marginalize each haplotype to its first 2 positions:
            hapTable = HaplotypeTableCreator.marginalizeAsNewTable(hapTable);
-            if (DEBUG && printDebug)
+            if (printDebug)
                logger.debug("\nPhasing table [AFTER MAPPING]:\n" + hapTable + "\n");

            calculateMaxHapAndPhasingQuality(hapTable, printDebug);
@ -981,7 +982,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS

        private void calculateMaxHapAndPhasingQuality(PhasingTable hapTable, boolean printDebug) {
            hapTable.normalizeScores();
-            if (DEBUG && printDebug)
+            if (printDebug)
                logger.debug("\nPhasing table [AFTER NORMALIZATION]:\n" + hapTable + "\n");

            // Determine the phase at this position:
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java
@ -13,7 +13,7 @@ import java.util.Set;
 /**
 * Stratifies the eval RODs by user-supplied JEXL expressions
 *
- * See http://www.broadinstitute.org/gsa/wiki/index.php/Using_JEXL_expressions for more details
+ * See http://gatkforums.broadinstitute.org/discussion/1255/what-are-jexl-expressions-and-how-can-i-use-them-with-the-gatk for more details
 */
 public class JexlExpression extends VariantStratifier implements StandardStratification {
    // needs to know the jexl expressions
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.help.HelpUtils;
 import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
 import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -80,7 +81,7 @@ public class VariantDataManager {
            final double theSTD = standardDeviation(theMean, iii);
            logger.info( annotationKeys.get(iii) + String.format(": \t mean = %.2f\t standard deviation = %.2f", theMean, theSTD) );
            if( Double.isNaN(theMean) ) {
-                throw new UserException.BadInput("Values for " + annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations. See http://www.broadinstitute.org/gsa/wiki/index.php/VariantAnnotator");
+                throw new UserException.BadInput("Values for " + annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations. See " + HelpUtils.GATK_FORUM_URL + "discussion/49/using-variant-annotator");
            }

            foundZeroVarianceAnnotation = foundZeroVarianceAnnotation || (theSTD < 1E-6);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
@ -151,14 +151,6 @@ import java.util.*;
 *   -mvq 50 \
 *   -o violations.vcf
 *
- * Creating a sample of exactly 1000 variants randomly chosen with equal probability from the variant VCF:
- * java -Xmx2g -jar GenomeAnalysisTK.jar \
- *   -R ref.fasta \
- *   -T SelectVariants \
- *   --variant input.vcf \
- *   -o output.vcf \
- *   -number 1000
- *
 * Creating a set with 50% of the total number of variants in the variant VCF:
 * java -Xmx2g -jar GenomeAnalysisTK.jar \
 *   -R ref.fasta \
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
@ -51,8 +51,7 @@ import java.util.*;
 * The Variant Validation Assessor is a tool for vetting/assessing validation data (containing genotypes).
 * The tool produces a VCF that is annotated with information pertaining to plate quality control and by
 * default is soft-filtered by high no-call rate or low Hardy-Weinberg probability.
- * If you have .ped files, please first convert them to VCF format
- * (see http://www.broadinstitute.org/gsa/wiki/index.php/Converting_ped_to_vcf).
+ * If you have .ped files, please first convert them to VCF format.
 *
 * <h2>Input</h2>
 * <p>
--- a/public/java/src/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSession.java
+++ b/public/java/src/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSession.java
@ -210,13 +210,23 @@ public class JnaSession implements Session {
    }

    public static void setAttribute(Pointer jt, String name, String value) throws DrmaaException {
-        checkError(LibDrmaa.drmaa_set_attribute(jt, name, value, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
+        if (getAttrNames().contains(name)) {
+            checkError(LibDrmaa.drmaa_set_attribute(jt, name, value, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
+        }
+        else {
+            throw new InvalidAttributeValueException("Attribute " + name + " is not supported by this implementation of DRMAA");
+        }
    }

    public static String getAttribute(Pointer jt, String name) throws DrmaaException {
-        Memory attrBuffer = new Memory(LibDrmaa.DRMAA_ATTR_BUFFER);
-        checkError(LibDrmaa.drmaa_get_attribute(jt, name, attrBuffer, LibDrmaa.DRMAA_ATTR_BUFFER_LEN, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
-        return attrBuffer.getString(0);
+        if (getAttrNames().contains(name)) {
+            Memory attrBuffer = new Memory(LibDrmaa.DRMAA_ATTR_BUFFER);
+            checkError(LibDrmaa.drmaa_get_attribute(jt, name, attrBuffer, LibDrmaa.DRMAA_ATTR_BUFFER_LEN, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
+            return attrBuffer.getString(0);
+        }
+        else {
+            throw new InvalidAttributeValueException("Attribute " + name + " is not supported by this implementation of DRMAA");
+        }
    }

    public static void setVectorAttribute(Pointer jt, String name, Collection<String> values) throws DrmaaException {
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
@ -315,6 +315,20 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
        return ( comparison == -1 || ( comparison == 0 && this.getStop() < that.getStart() ));        
    }

+    /**
+     * Tests whether this genome loc starts at the same position as that.
+     *
+     * i.e., do this and that have the same contig and the same start position
+     *
+     * @param that genome loc to compare to
+     * @return true if this and that have the same contig and the same start position
+     */
+    @Requires("that != null")
+    public final boolean startsAt( GenomeLoc that ) {
+        int comparison = this.compareContigs(that);
+        return comparison == 0 && this.getStart() == that.getStart();
+    }
+
    /**
     * Tests whether any portion of this contig is before that contig.
     * @param that Other contig to test.
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
@ -374,7 +374,7 @@ public final class GenomeLocParser {
        int start = 1;
        int stop = -1;

-        final int colonIndex = str.indexOf(":");
+        final int colonIndex = str.lastIndexOf(":");
        if(colonIndex == -1) {
            contig = str.substring(0, str.length());  // chr1
            stop = Integer.MAX_VALUE;
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java
@ -43,6 +43,9 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
    // our private storage for the GenomeLoc's
    private List<GenomeLoc> mArray = new ArrayList<GenomeLoc>();

+    // cache this to make overlap checking much more efficient
+    private int previousOverlapSearchIndex = -1;
+
    /** default constructor */
    public GenomeLocSortedSet(GenomeLocParser parser) {
        this.genomeLocParser = parser;
@ -101,7 +104,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
     * Return the number of bps before loc in the sorted set
     *
     * @param loc the location before which we are counting bases
-     * @return
+     * @return the number of base pairs over all previous intervals
     */
    public long sizeBeforeLoc(GenomeLoc loc) {
        long s = 0;
@ -110,7 +113,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
            if ( e.isBefore(loc) )
                s += e.size();
            else if ( e.isPast(loc) )
-                ; // don't do anything
+                break; // we are done
            else // loc is inside of s
                s += loc.getStart() - e.getStart();
        }
@ -131,15 +134,43 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
     * Determine if the given loc overlaps any loc in the sorted set
     *
     * @param loc the location to test
-     * @return
+     * @return trip if the location overlaps any loc
     */
    public boolean overlaps(final GenomeLoc loc) {
-        for(final GenomeLoc e : mArray) {
-            if(e.overlapsP(loc)) {
-                return true;
-            }
+        // edge condition
+        if ( mArray.isEmpty() )
+            return false;
+
+        // use the cached version first
+        if ( previousOverlapSearchIndex != -1 && overlapsAtOrImmediatelyAfterCachedIndex(loc, true) )
+            return true;
+
+        // update the cached index
+        previousOverlapSearchIndex = Collections.binarySearch(mArray, loc);
+
+        // if it matches an interval exactly, we are done
+        if ( previousOverlapSearchIndex >= 0 )
+            return true;
+
+        // check whether it overlaps the interval before or after the insertion point
+        previousOverlapSearchIndex = Math.max(0, -1 * previousOverlapSearchIndex - 2);
+        return overlapsAtOrImmediatelyAfterCachedIndex(loc, false);
+    }
+
+    private boolean overlapsAtOrImmediatelyAfterCachedIndex(final GenomeLoc loc, final boolean updateCachedIndex) {
+        // check the cached entry
+        if ( mArray.get(previousOverlapSearchIndex).overlapsP(loc) )
+            return true;
+
+        // check the entry after the cached entry since we may have moved to it
+        boolean returnValue = false;
+        if ( previousOverlapSearchIndex < mArray.size() - 1 ) {
+            returnValue = mArray.get(previousOverlapSearchIndex + 1).overlapsP(loc);
+            if ( updateCachedIndex )
+                previousOverlapSearchIndex++;
        }
-        return false;
+
+        return returnValue;
    }

    /**
@ -155,7 +186,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
            mArray.add(e);
            return true;
        } else {
-            int loc = Collections.binarySearch(mArray,e);
+            final int loc = Collections.binarySearch(mArray,e);
            if (loc >= 0) {
                throw new ReviewedStingException("Genome Loc Sorted Set already contains the GenomicLoc " + e.toString());
            } else {
--- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
@ -49,7 +49,9 @@ public class Haplotype {
    private int alignmentStartHapwrtRef;
    public int leftBreakPoint = 0;
    public int rightBreakPoint = 0;
- 
+    private Allele artificialAllele = null;
+    private int artificialAllelePosition = -1;
+
    /**
     * Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual
     *
@ -71,6 +73,12 @@ public class Haplotype {
        this(bases, 0);
    }

+    protected Haplotype( final byte[] bases, final Allele artificialAllele, final int artificialAllelePosition ) {
+        this(bases, 0);
+        this.artificialAllele = artificialAllele;
+        this.artificialAllelePosition = artificialAllelePosition;
+    }
+
    public Haplotype( final byte[] bases, final GenomeLoc loc ) {
        this(bases);
        this.genomeLocation = loc;
@ -171,8 +179,25 @@ public class Haplotype {
        this.cigar = cigar;
    }

+    public boolean isArtificialHaplotype() {
+        return artificialAllele != null;
+    }
+
+    public Allele getArtificialAllele() {
+        return artificialAllele;
+    }
+
+    public int getArtificialAllelePosition() {
+        return artificialAllelePosition;
+    }
+
+    public void setArtificialAllele(final Allele artificialAllele, final int artificialAllelePosition) {
+        this.artificialAllele = artificialAllele;
+        this.artificialAllelePosition = artificialAllelePosition;
+    }
+
    @Requires({"refInsertLocation >= 0"})
-    public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation ) {
+    public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation, final int genomicInsertLocation ) {
        // refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates
        final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
        if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= bases.length ) { // desired change falls inside deletion so don't bother creating a new haplotype
@ -182,7 +207,7 @@ public class Haplotype {
        newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, 0, haplotypeInsertLocation)); // bases before the variant
        newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant
        newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, haplotypeInsertLocation + refAllele.length(), bases.length)); // bases after the variant
-        return new Haplotype(newHaplotypeBases);
+        return new Haplotype(newHaplotypeBases, altAllele, genomicInsertLocation);
    }

    public static class HaplotypeBaseComparator implements Comparator<Haplotype>, Serializable {
--- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
@ -9,7 +9,7 @@ import net.sf.samtools.SAMUtils;
 * @author Kiran Garimella
 */
 public class QualityUtils {
-    public final static byte MAX_RECALIBRATED_Q_SCORE = 93;
+    public final static byte MAX_RECALIBRATED_Q_SCORE = SAMUtils.MAX_PHRED_SCORE;
    public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
    public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);

--- a/public/java/src/org/broadinstitute/sting/utils/Utils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java
@ -293,6 +293,10 @@ public class Utils {
        }
    }

+    public static <T> String join(final String separator, final T ... objects) {
+        return join(separator, Arrays.asList(objects));
+    }
+
    public static String dupString(char c, int nCopies) {
        char[] chars = new char[nCopies];
        Arrays.fill(chars, c);
@ -701,11 +705,13 @@ public class Utils {
        List<SAMProgramRecord> oldRecords = header.getProgramRecords();
        List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
        for ( SAMProgramRecord record : oldRecords )
-            if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
+            if ( (programRecord != null && !record.getId().startsWith(programRecord.getId())) || KEEP_ALL_PG_RECORDS )
                newRecords.add(record);

-        newRecords.add(programRecord);
-        header.setProgramRecords(newRecords);
+        if (programRecord != null) {
+            newRecords.add(programRecord);
+            header.setProgramRecords(newRecords);
+        }
        return header;
    }

--- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java
@ -0,0 +1,16 @@
+package org.broadinstitute.sting.utils.activeregion;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: thibault
+ * Date: 11/26/12
+ * Time: 2:35 PM
+ *
+ * Describes how a read relates to an assigned ActiveRegion
+ */
+public enum ActiveRegionReadState {
+    PRIMARY,        // This is the read's primary region
+    NONPRIMARY,     // This region overlaps the read, but it is not primary
+    EXTENDED,       // This region would overlap the read if it were extended
+    UNMAPPED        // This read is not mapped
+}
--- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java
@ -103,11 +103,6 @@ public class ActivityProfile {
        isActiveList.add(result);
    }

-    // for unit testing
-    public List<ActivityProfileResult> getActiveList() {
-        return isActiveList;
-    }
-
    public int size() {
        return isActiveList.size();
    }
--- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
+++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
@ -406,10 +406,15 @@ public class BAQ {
        // so BQi = Qi - BAQi + 64
        byte[] bqTag = new byte[baq.length];
        for ( int i = 0; i < bqTag.length; i++) {
-            int bq = (int)read.getBaseQualities()[i] + 64;
-            int baq_i = (int)baq[i];
-            int tag = bq - baq_i;
-            if ( tag < 0 ) throw new ReviewedStingException("BAQ tag calculation error.  BAQ value above base quality at " + read);
+            final int bq = (int)read.getBaseQualities()[i] + 64;
+            final int baq_i = (int)baq[i];
+            final int tag = bq - baq_i;
+            // problem with the calculation of the correction factor; this is our problem
+            if ( tag < 0 )
+                throw new ReviewedStingException("BAQ tag calculation error.  BAQ value above base quality at " + read);
+            // the original quality is too high, almost certainly due to using the wrong encoding in the BAM file
+            if ( tag > Byte.MAX_VALUE )
+                throw new UserException.MalformedBAM(read, "we encountered an extremely high quality score (" + (bq - 64) + ") with BAQ correction factor of " + baq_i + "; the BAM file appears to be using the wrong encoding for quality scores");
            bqTag[i] = (byte)tag;
        }
        return new String(bqTag);
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java
@ -30,12 +30,17 @@ import net.sf.samtools.SAMSequenceRecord;
 import org.apache.commons.io.FilenameUtils;
 import org.apache.log4j.Logger;
 import org.broad.tribble.Feature;
+import org.broad.tribble.FeatureCodecHeader;
+import org.broad.tribble.readers.PositionalBufferedStream;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
+import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;

 import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
 import java.util.*;

 /**
@ -317,4 +322,33 @@ public class VCFUtils {
            assembly = "hg19";
        return assembly;
    }
+
+    /**
+     * Read all of the VCF records from source into memory, returning the header and the VariantContexts
+     *
+     * @param source the file to read, must be in VCF4 format
+     * @return
+     * @throws IOException
+     */
+    public static Pair<VCFHeader, List<VariantContext>> readVCF(final File source) throws IOException {
+        // read in the features
+        final List<VariantContext> vcs = new ArrayList<VariantContext>();
+        final VCFCodec codec = new VCFCodec();
+        PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source));
+        FeatureCodecHeader header = codec.readHeader(pbs);
+        pbs.close();
+
+        pbs = new PositionalBufferedStream(new FileInputStream(source));
+        pbs.skip(header.getHeaderEnd());
+
+        final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue();
+
+        while ( ! pbs.isDone() ) {
+            final VariantContext vc = codec.decode(pbs);
+            if ( vc != null )
+                vcs.add(vc);
+        }
+
+        return new Pair<VCFHeader, List<VariantContext>>(vcfHeader, vcs);
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
+++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
@ -30,6 +30,7 @@ import net.sf.samtools.SAMSequenceDictionary;
 import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
+import org.broadinstitute.sting.utils.help.HelpUtils;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;

@ -267,7 +268,7 @@ public class UserException extends ReviewedStingException {

    public static class ReadMissingReadGroup extends MalformedBAM {
        public ReadMissingReadGroup(SAMRecord read) {
-            super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK.  Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
+            super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK.  Please use " + HelpUtils.GATK_FORUM_URL + "discussion/59/companion-utilities-replacereadgroups to fix this problem", read.getReadName()));
        }
    }

@ -343,7 +344,7 @@ public class UserException extends ReviewedStingException {
            super(String.format("Lexicographically sorted human genome sequence detected in %s."
                    + "\nFor safety's sake the GATK requires human contigs in karyotypic order: 1, 2, ..., 10, 11, ..., 20, 21, 22, X, Y with M either leading or trailing these contigs."
                    + "\nThis is because all distributed GATK resources are sorted in karyotypic order, and your processing will fail when you need to use these files."
-                    + "\nYou can use the ReorderSam utility to fix this problem: http://www.broadinstitute.org/gsa/wiki/index.php/ReorderSam"
+                    + "\nYou can use the ReorderSam utility to fix this problem: " + HelpUtils.GATK_FORUM_URL + "discussion/58/companion-utilities-reordersam"
                    + "\n  %s contigs = %s",
                    name, name, ReadUtils.prettyPrintSequenceRecords(dict)));
        }
--- a/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java
@ -44,14 +44,13 @@ public class ForumAPIUtils {
    /**
     * How we post to the forum
     */
-    private final static String API_URL = "https://gatkforums.broadinstitute.org/api/v1/";
    final private static String ACCESS_TOKEN = "access_token=";

    public static List<String> getPostedTools(String forumKey) {
        Gson gson = new Gson();
        List<String> output = new ArrayList<String>();

-        String text = httpGet(API_URL + "categories.json?CategoryIdentifier=tool-bulletin&page=1-100000&" + ACCESS_TOKEN + forumKey);
+        String text = httpGet(HelpUtils.GATK_FORUM_API_URL + "categories.json?CategoryIdentifier=tool-bulletin&page=1-100000&" + ACCESS_TOKEN + forumKey);

        APIQuery details = gson.fromJson(text, APIQuery.class);
        ForumDiscussion[] discussions = details.Discussions;
@ -159,7 +158,7 @@ public class ForumAPIUtils {
        Gson gson = new Gson();

        String data = gson.toJson(post.getPostData());
-        httpPost(data, API_URL + "post/discussion.json?" + ACCESS_TOKEN + forumKey);
+        httpPost(data, HelpUtils.GATK_FORUM_API_URL + "post/discussion.json?" + ACCESS_TOKEN + forumKey);


    }
@ -167,8 +166,7 @@ public class ForumAPIUtils {
    class APIQuery {
        ForumDiscussion[] Discussions;

-        public APIQuery() {
-        }
+        public APIQuery() {}
    }

 }
--- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java
@ -28,7 +28,7 @@ public class GATKDocUtils {
    /**
     * The URL root for RELEASED GATKDOC units
     */
-    public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gatk/gatkdocs/";
+    public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = HelpUtils.GATK_DOCS_URL;
    /**
     * The URL root for STABLE GATKDOC units
     */
--- a/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java
@ -32,6 +32,15 @@ import org.broadinstitute.sting.utils.classloader.JVMUtils;
 import java.lang.reflect.Field;

 public class HelpUtils {
+
+    public final static String BASE_GATK_URL = "http://www.broadinstitute.org/gatk";
+    public final static String GATK_DOCS_URL = BASE_GATK_URL + "/gatkdocs/";
+    public final static String GATK_FORUM_URL = "http://gatkforums.broadinstitute.org/";
+    public final static String GATK_FORUM_API_URL = "https://gatkforums.broadinstitute.org/api/v1/";
+
+
+
+
    protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) {
        try {
            Class type = getClassForDoc(classDoc);
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
@ -254,19 +254,32 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
     * Returns a new ReadBackedPileup where only one read from an overlapping read
     * pair is retained.  If the two reads in question disagree to their basecall,
     * neither read is retained.  If they agree on the base, the read with the higher
-     * quality observation is retained
+     * base quality observation is retained
     *
     * @return the newly filtered pileup
     */
    @Override
-    public RBP getOverlappingFragmentFilteredPileup() {
+    public ReadBackedPileup getOverlappingFragmentFilteredPileup() {
+        return getOverlappingFragmentFilteredPileup(true, true);
+    }
+
+    /**
+     * Returns a new ReadBackedPileup where only one read from an overlapping read
+     * pair is retained.  If discardDiscordant and the two reads in question disagree to their basecall,
+     * neither read is retained.  Otherwise, the read with the higher
+     * quality (base or mapping, depending on baseQualNotMapQual) observation is retained
+     *
+     * @return the newly filtered pileup
+     */
+    @Override
+    public RBP getOverlappingFragmentFilteredPileup(boolean discardDiscordant, boolean baseQualNotMapQual) {
        if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
            PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
            PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();

            for (final String sample : tracker.getSamples()) {
                PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
-                AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements).getOverlappingFragmentFilteredPileup();
+                AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements).getOverlappingFragmentFilteredPileup(discardDiscordant, baseQualNotMapQual);
                filteredTracker.addElements(sample, pileup.pileupElementTracker);
            }
            return (RBP) createNewPileup(loc, filteredTracker);
@ -284,11 +297,16 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil

                    // if the reads disagree at this position, throw them both out.  Otherwise
                    // keep the element with the higher quality score
-                    if (existing.getBase() != p.getBase()) {
+                    if (discardDiscordant && existing.getBase() != p.getBase()) {
                        filteredPileup.remove(readName);
                    } else {
-                        if (existing.getQual() < p.getQual()) {
-                            filteredPileup.put(readName, p);
+                        if (baseQualNotMapQual) {
+                            if (existing.getQual() < p.getQual())
+                                filteredPileup.put(readName, p);
+                        }
+                        else {
+                            if (existing.getMappingQual() < p.getMappingQual())
+                                filteredPileup.put(readName, p);
                        }
                    }
                }
@ -634,23 +652,19 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
            PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
            PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();

-            int current = 0;

            for (final String sample : tracker.getSamples()) {
                PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);

-                List<PileupElement> filteredPileup = new ArrayList<PileupElement>();
-                for (PileupElement p : perSampleElements) {
+                int current = 0;
+                UnifiedPileupElementTracker<PE> filteredPileup = new UnifiedPileupElementTracker<PE>();
+                for (PE p : perSampleElements) {
                    if (positions.contains(current))
                        filteredPileup.add(p);
-                }
+                    current++;

-                if (!filteredPileup.isEmpty()) {
-                    AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements);
-                    filteredTracker.addElements(sample, pileup.pileupElementTracker);
                }
-
-                current++;
+                filteredTracker.addElements(sample, filteredPileup);
            }

            return (RBP) createNewPileup(loc, filteredTracker);
@ -998,6 +1012,44 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
        return quals2String(getQuals());
    }

+    /**
+     * Returns a new ReadBackedPileup that is sorted by start coordinate of the reads.
+     *
+     * @return
+     */
+    @Override
+    public RBP getStartSortedPileup() {
+
+        final TreeSet<PE> sortedElements = new TreeSet<PE>(new Comparator<PE>() {
+            @Override
+            public int compare(PE element1, PE element2) {
+                final int difference = element1.getRead().getAlignmentStart() - element2.getRead().getAlignmentStart();
+                return difference != 0 ? difference : element1.getRead().getReadName().compareTo(element2.getRead().getReadName());
+            }
+        });
+
+        if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
+            PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
+
+            for (final String sample : tracker.getSamples()) {
+                PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
+                for (PE pile : perSampleElements)
+                    sortedElements.add(pile);
+            }
+        }
+        else {
+            UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>) pileupElementTracker;
+            for (PE pile : tracker)
+                sortedElements.add(pile);
+        }
+
+        UnifiedPileupElementTracker<PE> sortedTracker = new UnifiedPileupElementTracker<PE>();
+        for (PE pile : sortedElements)
+            sortedTracker.add(pile);
+
+        return (RBP) createNewPileup(loc, sortedTracker);
+    }
+
    @Override
    public FragmentCollection<PileupElement> toFragments() {
        return FragmentUtils.create(this);
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java
@ -60,6 +60,16 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
     */
    public ReadBackedPileup getOverlappingFragmentFilteredPileup();

+    /**
+     * Returns a new ReadBackedPileup where only one read from an overlapping read
+     * pair is retained.  If discardDiscordant and the two reads in question disagree to their basecall,
+     * neither read is retained.  Otherwise, the read with the higher
+     * quality (base or mapping, depending on baseQualNotMapQual) observation is retained
+     *
+     * @return the newly filtered pileup
+     */
+    public ReadBackedPileup getOverlappingFragmentFilteredPileup(boolean discardDiscordant, boolean baseQualNotMapQual);
+
    /**
     * Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup.  Note that this
     * does not copy the data, so both ReadBackedPileups should not be changed.  Doesn't make an unnecessary copy
@ -261,6 +271,13 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
     */
    public byte[] getMappingQuals();

+    /**
+     * Returns a new ReadBackedPileup that is sorted by start coordinate of the reads.
+     *
+     * @return
+     */
+    public ReadBackedPileup getStartSortedPileup();
+
    /**
     * Converts this pileup into a FragmentCollection (see FragmentUtils for documentation)
     * @return
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java
@ -49,7 +49,7 @@ import java.util.EnumSet;

 public class CycleCovariate implements StandardCovariate {

-    private static final int MAXIMUM_CYCLE_VALUE = 1000;
+    private int MAXIMUM_CYCLE_VALUE;
    private static final int CUSHION_FOR_INDELS = 4;
    private String default_platform = null;

@ -59,6 +59,8 @@ public class CycleCovariate implements StandardCovariate {
    // Initialize any member variables using the command-line arguments passed to the walkers
    @Override
    public void initialize(final RecalibrationArgumentCollection RAC) {
+        this.MAXIMUM_CYCLE_VALUE = RAC.MAXIMUM_CYCLE_VALUE;
+
        if (RAC.DEFAULT_PLATFORM != null && !NGSPlatform.isKnown(RAC.DEFAULT_PLATFORM))
            throw new UserException.CommandLineException("The requested default platform (" + RAC.DEFAULT_PLATFORM + ") is not a recognized platform.");

@ -88,6 +90,9 @@ public class CycleCovariate implements StandardCovariate {

            final int MAX_CYCLE_FOR_INDELS = readLength - CUSHION_FOR_INDELS - 1;
            for (int i = 0; i < readLength; i++) {
+                if ( cycle > MAXIMUM_CYCLE_VALUE )
+                    throw new UserException("The maximum allowed value for the cycle is " + MAXIMUM_CYCLE_VALUE + ", but a larger cycle was detected in read " + read.getReadName() + ".  Please use the --maximum_cycle_value argument to increase this value (at the expense of requiring more memory to run)");
+
                final int substitutionKey = keyFromCycle(cycle);
                final int indelKey = (i < CUSHION_FOR_INDELS || i > MAX_CYCLE_FOR_INDELS) ? -1 : substitutionKey;
                values.addCovariate(substitutionKey, indelKey, indelKey, i);
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java
@ -0,0 +1,64 @@
+package org.broadinstitute.sting.utils.recalibration.covariates;
+
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.TandemRepeat;
+import org.broadinstitute.sting.utils.QualityUtils;
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
+
+import java.util.Arrays;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 11/3/12
+ */
+
+public class RepeatLengthCovariate implements ExperimentalCovariate {
+    final int MAX_REPEAT_LENGTH = 20;
+
+    // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
+    public void initialize(final RecalibrationArgumentCollection RAC) {}
+
+    @Override
+    public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
+        byte[] readBytes = read.getReadBases();
+        for (int i = 0; i < readBytes.length; i++) {
+            int maxRL = 0;
+            for (int str = 1; str <= 8; str++) {
+                if (i + str <= readBytes.length) {
+                    maxRL = Math.max(maxRL, VariantContextUtils.findNumberofRepetitions(
+                            Arrays.copyOfRange(readBytes,i,i + str),
+                            Arrays.copyOfRange(readBytes,i,readBytes.length)
+                    ));
+                }
+            }
+            if(maxRL > MAX_REPEAT_LENGTH) { maxRL = MAX_REPEAT_LENGTH; }
+            values.addCovariate(maxRL, maxRL, maxRL, i);
+        }
+    }
+
+    // Used to get the covariate's value from input csv file during on-the-fly recalibration
+    @Override
+    public final Object getValue(final String str) {
+        return Byte.parseByte(str);
+    }
+
+    @Override
+    public String formatKey(final int key) {
+        return String.format("%d", key);
+    }
+
+    @Override
+    public int keyFromValue(final Object value) {
+        return (value instanceof String) ? Integer.parseInt((String) value) : (Integer) value;
+    }
+
+    @Override
+    public int maximumKeyValue() {
+        return MAX_REPEAT_LENGTH + 1;
+    }
+
+}
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
@ -184,9 +184,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
    protected CommonInfo commonInfo = null;
    public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;

-    @Deprecated // ID is no longer stored in the attributes map
-    private final static String ID_KEY = "ID";
-
    public final static Set<String> PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet<String>());

    /** The location of this VariantContext */
@ -287,10 +284,6 @@ public class VariantContext implements Feature { // to enable tribble integratio

        this.commonInfo = new CommonInfo(source, log10PError, filters, attributes);

-        // todo -- remove me when this check is no longer necessary
-        if ( this.commonInfo.hasAttribute(ID_KEY) )
-            throw new IllegalArgumentException("Trying to create a VariantContext with a ID key.  Please use provided constructor argument ID");
-
        if ( alleles == null ) { throw new IllegalArgumentException("Alleles cannot be null"); }

        // we need to make this a LinkedHashSet in case the user prefers a given ordering of alleles
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
@ -982,6 +982,40 @@ public class VariantContextUtils {
    private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
    public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.

+    /**
+     * Split variant context into its biallelic components if there are more than 2 alleles
+     *
+     * For VC has A/B/C alleles, returns A/B and A/C contexts.
+     * Genotypes are all no-calls now (it's not possible to fix them easily)
+     * Alleles are right trimmed to satisfy VCF conventions
+     *
+     * If vc is biallelic or non-variant it is just returned
+     *
+     * Chromosome counts are updated (but they are by definition 0)
+     *
+     * @param vc a potentially multi-allelic variant context
+     * @return a list of bi-allelic (or monomorphic) variant context
+     */
+    public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc) {
+        if ( ! vc.isVariant() || vc.isBiallelic() )
+            // non variant or biallelics already satisfy the contract
+            return Collections.singletonList(vc);
+        else {
+            final List<VariantContext> biallelics = new LinkedList<VariantContext>();
+
+            for ( final Allele alt : vc.getAlternateAlleles() ) {
+                VariantContextBuilder builder = new VariantContextBuilder(vc);
+                final List<Allele> alleles = Arrays.asList(vc.getReference(), alt);
+                builder.alleles(alleles);
+                builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
+                calculateChromosomeCounts(builder, true);
+                biallelics.add(reverseTrimAlleles(builder.make()));
+            }
+
+            return biallelics;
+        }
+    }
+
    /**
     * subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
     *
@ -1236,7 +1270,7 @@ public class VariantContextUtils {
     * @param testString             String to test
     * @return                       Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
     */
-    protected static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
+    public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
        int numRepeats = 0;
        for (int start = 0; start < testString.length; start += repeatUnit.length) {
            int end = start + repeatUnit.length;
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java
@ -71,7 +71,6 @@ abstract class SortingVariantContextWriterBase implements VariantContextWriter {
        this.takeOwnershipOfInner = takeOwnershipOfInner;

        // has to be PriorityBlockingQueue to be thread-safe
-        // see http://getsatisfaction.com/gsa/topics/missing_loci_output_in_multi_thread_mode_when_implement_sortingvcfwriterbase?utm_content=topic_link&utm_medium=email&utm_source=new_topic
        this.queue = new PriorityBlockingQueue<VCFRecord>(50, new VariantContextComparator());

        this.mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@ -1,9 +1,12 @@
 package org.broadinstitute.sting.gatk.traversals;

-import org.testng.Assert;
+import com.google.java.contract.PreconditionError;
+import net.sf.samtools.*;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
+import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
+import org.broadinstitute.sting.utils.interval.IntervalUtils;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import net.sf.picard.reference.IndexedFastaSequenceFile;
-import net.sf.samtools.SAMRecord;
-import net.sf.samtools.SAMSequenceDictionary;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -22,37 +25,61 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
 import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
+import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;


 import java.io.File;
 import java.io.FileNotFoundException;
-import java.util.ArrayList;
-import java.util.List;
+import java.util.*;

 /**
 * Created with IntelliJ IDEA.
 * User: thibault
 * Date: 11/13/12
 * Time: 2:47 PM
+ *
+ * Test the Active Region Traversal Contract
+ * http://iwww.broadinstitute.org/gsa/wiki/index.php/Active_Region_Traversal_Contract
 */
 public class TraverseActiveRegionsTest extends BaseTest {

    private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
        private final double prob;
+        private EnumSet<ActiveRegionReadState> states = super.desiredReadStates();
+
+        protected List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
+        protected Map<GenomeLoc, ActiveRegion> mappedActiveRegions = new HashMap<GenomeLoc, ActiveRegion>();

        public DummyActiveRegionWalker() {
            this.prob = 1.0;
        }

+        public DummyActiveRegionWalker(double constProb) {
+            this.prob = constProb;
+        }
+
+        public DummyActiveRegionWalker(EnumSet<ActiveRegionReadState> wantStates) {
+            this.prob = 1.0;
+            this.states = wantStates;
+        }
+
+        @Override
+        public EnumSet<ActiveRegionReadState> desiredReadStates() {
+            return states;
+        }
+
        @Override
        public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+            isActiveCalls.add(ref.getLocus());
            return new ActivityProfileResult(ref.getLocus(), prob);
        }

        @Override
        public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
+            mappedActiveRegions.put(activeRegion.getLocation(), activeRegion);
            return 0;
        }

@ -70,57 +97,425 @@ public class TraverseActiveRegionsTest extends BaseTest {
    private final TraverseActiveRegions<Integer, Integer> t = new TraverseActiveRegions<Integer, Integer>();

    private IndexedFastaSequenceFile reference;
+    private SAMSequenceDictionary dictionary;
    private GenomeLocParser genomeLocParser;
-    private ActiveRegionWalker<Integer, Integer> walker;
+
+    private List<GenomeLoc> intervals;
+    private List<GATKSAMRecord> reads;

    @BeforeClass
    private void init() throws FileNotFoundException {
        reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
-        SAMSequenceDictionary dictionary = reference.getSequenceDictionary();
+        dictionary = reference.getSequenceDictionary();
        genomeLocParser = new GenomeLocParser(dictionary);
+
+        intervals = new ArrayList<GenomeLoc>();
+        intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
+        intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999));
+        intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
+        intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
+        intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
+        intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100));
+        intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
+
+        reads = new ArrayList<GATKSAMRecord>();
+        reads.add(buildSAMRecord("simple", "1", 100, 200));
+        reads.add(buildSAMRecord("overlap_equal", "1", 10, 20));
+        reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21));
+        reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009));
+        reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008));
+        reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
+        reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
+        reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
+
+        // required by LocusIteratorByState, and I prefer to list them in test case order above
+        ReadUtils.sortReadsByCoordinate(reads);
    }

    @Test
-    public void testAllIntervalsSeen() throws Exception {
-        List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
-        GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1);
-        intervals.add(interval);
+    public void testAllBasesSeen() {
+        DummyActiveRegionWalker walker = new DummyActiveRegionWalker();

-        LocusShardDataProvider dataProvider = createDataProvider(intervals);
+        List<GenomeLoc> activeIntervals = getIsActiveIntervals(walker, intervals);
+        // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
+        verifyEqualIntervals(intervals, activeIntervals);

-        t.traverse(walker, dataProvider, 0);
-
-        boolean allGenomeLocsSeen = true;
-        for (GenomeLoc loc : intervals) {
-            boolean thisGenomeLocSeen = false;
-            for (ActivityProfileResult active : t.profile.getActiveList()) {
-                if (loc.equals(active.getLoc())) {
-                    thisGenomeLocSeen = true;
-                    break;
-                }
-            }
-            if (!thisGenomeLocSeen) {
-                allGenomeLocsSeen = false;
-                break;
-            }
-        }
-
-        Assert.assertTrue(allGenomeLocsSeen, "Some intervals missing from activity profile");
+        // TODO: more tests and edge cases
    }

-    private LocusShardDataProvider createDataProvider(List<GenomeLoc> intervals) {
-        walker = new DummyActiveRegionWalker();
+    private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
+        List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
+        for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) {
+            t.traverse(walker, dataProvider, 0);
+            activeIntervals.addAll(walker.isActiveCalls);
+        }

-        StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList<SAMRecord>());
-        Shard shard = new MockLocusShard(genomeLocParser, intervals);
-        WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser,iterator,shard.getGenomeLocs());
-        WindowMaker.WindowMakerIterator window = windowMaker.next();
+        return activeIntervals;
+    }

+    @Test (expectedExceptions = PreconditionError.class)
+    public void testIsActiveRangeLow () {
+        DummyActiveRegionWalker walker = new DummyActiveRegionWalker(-0.1);
+        getActiveRegions(walker, intervals).values();
+    }
+
+    @Test (expectedExceptions = PreconditionError.class)
+    public void testIsActiveRangeHigh () {
+        DummyActiveRegionWalker walker = new DummyActiveRegionWalker(1.1);
+        getActiveRegions(walker, intervals).values();
+    }
+
+    @Test
+    public void testActiveRegionCoverage() {
+        DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
+
+        Collection<ActiveRegion> activeRegions = getActiveRegions(walker, intervals).values();
+        verifyActiveRegionCoverage(intervals, activeRegions);
+
+        // TODO: more tests and edge cases
+    }
+
+    private void verifyActiveRegionCoverage(List<GenomeLoc> intervals, Collection<ActiveRegion> activeRegions) {
+        List<GenomeLoc> intervalStarts = new ArrayList<GenomeLoc>();
+        List<GenomeLoc> intervalStops = new ArrayList<GenomeLoc>();
+
+        for (GenomeLoc interval : intervals) {
+            intervalStarts.add(interval.getStartLocation());
+            intervalStops.add(interval.getStopLocation());
+        }
+
+        Map<GenomeLoc, ActiveRegion> baseRegionMap = new HashMap<GenomeLoc, ActiveRegion>();
+
+        for (ActiveRegion activeRegion : activeRegions) {
+            for (GenomeLoc activeLoc : toSingleBaseLocs(activeRegion.getLocation())) {
+                // Contract: Regions do not overlap
+                Assert.assertFalse(baseRegionMap.containsKey(activeLoc), "Genome location " + activeLoc + " is assigned to more than one region");
+                baseRegionMap.put(activeLoc, activeRegion);
+            }
+
+            GenomeLoc start = activeRegion.getLocation().getStartLocation();
+            if (intervalStarts.contains(start))
+                intervalStarts.remove(start);
+
+            GenomeLoc stop = activeRegion.getLocation().getStopLocation();
+            if (intervalStops.contains(stop))
+                intervalStops.remove(stop);
+        }
+
+        for (GenomeLoc baseLoc : toSingleBaseLocs(intervals)) {
+            // Contract: Each location in the interval(s) is in exactly one region
+            // Contract: The total set of regions exactly matches the analysis interval(s)
+            Assert.assertTrue(baseRegionMap.containsKey(baseLoc), "Genome location " + baseLoc + " is not assigned to any region");
+            baseRegionMap.remove(baseLoc);
+        }
+
+        // Contract: The total set of regions exactly matches the analysis interval(s)
+        Assert.assertEquals(baseRegionMap.size(), 0, "Active regions contain base(s) outside of the given intervals");
+
+        // Contract: All explicit interval boundaries must also be region boundaries
+        Assert.assertEquals(intervalStarts.size(), 0, "Interval start location does not match an active region start location");
+        Assert.assertEquals(intervalStops.size(), 0, "Interval stop location does not match an active region stop location");
+    }
+
+    @Test
+    public void testPrimaryReadMapping() {
+        DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
+
+        // Contract: Each read has the Primary state in a single region (or none)
+        // This is the region of maximum overlap for the read (earlier if tied)
+
+        // simple: Primary in 1:1-999
+        // overlap_equal: Primary in 1:1-999
+        // overlap_unequal: Primary in 1:1-999
+        // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
+        // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
+        // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
+        // outside_intervals: none
+        // simple20: Primary in 20:10000-10100
+
+        Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
+        ActiveRegion region;
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
+
+        getRead(region, "simple");
+        getRead(region, "overlap_equal");
+        getRead(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        getRead(region, "boundary_unequal");
+        getRead(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        getRead(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        getRead(region, "simple20");
+
+        // TODO: more tests and edge cases
+    }
+
+    @Test
+    public void testNonPrimaryReadMapping() {
+        DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
+                EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY));
+
+        // Contract: Each read has the Primary state in a single region (or none)
+        // This is the region of maximum overlap for the read (earlier if tied)
+
+        // Contract: Each read has the Non-Primary state in all other regions it overlaps
+
+        // simple: Primary in 1:1-999
+        // overlap_equal: Primary in 1:1-999
+        // overlap_unequal: Primary in 1:1-999
+        // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
+        // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
+        // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
+        // outside_intervals: none
+        // simple20: Primary in 20:10000-10100
+
+        Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
+        ActiveRegion region;
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
+
+        getRead(region, "simple");
+        getRead(region, "overlap_equal");
+        getRead(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        getRead(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        getRead(region, "boundary_equal");
+        getRead(region, "boundary_unequal");
+        getRead(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        getRead(region, "boundary_equal");
+        getRead(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        getRead(region, "simple20");
+
+        // TODO: more tests and edge cases
+    }
+
+    @Test
+    public void testExtendedReadMapping() {
+        DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
+                EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY, ActiveRegionReadState.EXTENDED));
+
+        // Contract: Each read has the Primary state in a single region (or none)
+        // This is the region of maximum overlap for the read (earlier if tied)
+
+        // Contract: Each read has the Non-Primary state in all other regions it overlaps
+        // Contract: Each read has the Extended state in regions where it only overlaps if the region is extended
+
+        // simple: Primary in 1:1-999
+        // overlap_equal: Primary in 1:1-999
+        // overlap_unequal: Primary in 1:1-999
+        // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
+        // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
+        // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
+        // outside_intervals: none
+        // simple20: Primary in 20:10000-10100
+
+        Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
+        ActiveRegion region;
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
+
+        getRead(region, "simple");
+        getRead(region, "overlap_equal");
+        getRead(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        getRead(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        getRead(region, "boundary_equal");
+        getRead(region, "boundary_unequal");
+        getRead(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        getRead(region, "boundary_equal");
+        getRead(region, "boundary_unequal");
+        getRead(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        getRead(region, "simple20");
+
+        // TODO: more tests and edge cases
+    }
+
+    private void verifyReadNotPlaced(ActiveRegion region, String readName) {
+        for (SAMRecord read : region.getReads()) {
+            if (read.getReadName().equals(readName))
+                Assert.fail("Read " + readName + " found in active region " + region);
+        }
+    }
+
+    private SAMRecord getRead(ActiveRegion region, String readName) {
+        for (SAMRecord read : region.getReads()) {
+            if (read.getReadName().equals(readName))
+                return read;
+        }
+
+        Assert.fail("Read " + readName + " not assigned to active region " + region);
+        return null;
+    }
+
+    private Map<GenomeLoc, ActiveRegion> getActiveRegions(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
+        for (LocusShardDataProvider dataProvider : createDataProviders(intervals))
+            t.traverse(walker, dataProvider, 0);
+
+        t.endTraversal(walker, 0);
+
+        return walker.mappedActiveRegions;
+    }
+
+    private Collection<GenomeLoc> toSingleBaseLocs(GenomeLoc interval) {
+        List<GenomeLoc> bases = new ArrayList<GenomeLoc>();
+        if (interval.size() == 1)
+            bases.add(interval);
+        else {
+            for (int location = interval.getStart(); location <= interval.getStop(); location++)
+                bases.add(genomeLocParser.createGenomeLoc(interval.getContig(), location, location));
+        }
+
+        return bases;
+    }
+
+    private Collection<GenomeLoc> toSingleBaseLocs(List<GenomeLoc> intervals) {
+        Set<GenomeLoc> bases = new TreeSet<GenomeLoc>();    // for sorting and uniqueness
+        for (GenomeLoc interval : intervals)
+            bases.addAll(toSingleBaseLocs(interval));
+
+        return bases;
+    }
+
+    private void verifyEqualIntervals(List<GenomeLoc> aIntervals, List<GenomeLoc> bIntervals) {
+        Collection<GenomeLoc> aBases = toSingleBaseLocs(aIntervals);
+        Collection<GenomeLoc> bBases = toSingleBaseLocs(bIntervals);
+
+        Assert.assertTrue(aBases.size() == bBases.size(), "Interval lists have a differing number of bases: " + aBases.size() + " vs. " + bBases.size());
+
+        Iterator<GenomeLoc> aIter = aBases.iterator();
+        Iterator<GenomeLoc> bIter = bBases.iterator();
+        while (aIter.hasNext() && bIter.hasNext()) {
+            GenomeLoc aLoc = aIter.next();
+            GenomeLoc bLoc = bIter.next();
+            Assert.assertTrue(aLoc.equals(bLoc), "Interval locations do not match: " + aLoc + " vs. " + bLoc);
+        }
+    }
+
+    // copied from LocusViewTemplate
+    protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) {
+        SAMFileHeader header = new SAMFileHeader();
+        header.setSequenceDictionary(dictionary);
+        GATKSAMRecord record = new GATKSAMRecord(header);
+
+        record.setReadName(readName);
+        record.setReferenceIndex(dictionary.getSequenceIndex(contig));
+        record.setAlignmentStart(alignmentStart);
+
+        Cigar cigar = new Cigar();
+        int len = alignmentEnd - alignmentStart + 1;
+        cigar.add(new CigarElement(len, CigarOperator.M));
+        record.setCigar(cigar);
+        record.setReadBases(new byte[len]);
+        record.setBaseQualities(new byte[len]);
+
+        return record;
+    }
+
+    private List<LocusShardDataProvider> createDataProviders(List<GenomeLoc> intervals) {
        GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
-        //engine.setReferenceDataSource(reference);
        engine.setGenomeLocParser(genomeLocParser);
        t.initialize(engine);

-        return new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>());
+        StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList<SAMRecord>(reads));
+        Shard shard = new MockLocusShard(genomeLocParser, intervals);
+
+        List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();
+        for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, iterator, shard.getGenomeLocs())) {
+            providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
+        }
+
+        return providers;
    }
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
@ -151,7 +151,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    }

    @Test
-    public void testTabixAnnotations() {
+    public void testTabixAnnotationsAndParallelism() {
        final String MD5 = "99938d1e197b8f10c408cac490a00a62";
        for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) {
            WalkerTestSpec spec = new WalkerTestSpec(
@ -159,6 +159,12 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
                    Arrays.asList(MD5));
            executeTest("Testing lookup vcf tabix vs. vcf tribble", spec);
        }
+
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString() + " -A HomopolymerRun -nt 2 --variant:vcf " + validationDataLocation + "CEU.exon.2010_03.sites.vcf -L " + validationDataLocation + "CEU.exon.2010_03.sites.vcf --no_cmdline_in_header", 1,
+                Arrays.asList(MD5));
+
+        executeTest("Testing lookup vcf tabix vs. vcf tribble plus parallelism", spec);
    }

    @Test
--- a/public/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java
@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils;


 import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMSequenceDictionary;
+import net.sf.samtools.SAMSequenceRecord;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -74,6 +76,23 @@ public class GenomeLocParserUnitTest extends BaseTest {
        genomeLocParser.parseGenomeLoc("Bad:0-1");
    }

+    @Test
+    public void testContigHasColon() {
+        SAMFileHeader header = new SAMFileHeader();
+        header.setSortOrder(net.sf.samtools.SAMFileHeader.SortOrder.coordinate);
+        SAMSequenceDictionary dict = new SAMSequenceDictionary();
+        SAMSequenceRecord rec = new SAMSequenceRecord("c:h:r1", 10);
+        rec.setSequenceLength(10);
+        dict.addSequence(rec);
+        header.setSequenceDictionary(dict);
+
+        final GenomeLocParser myGenomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
+        GenomeLoc loc = myGenomeLocParser.parseGenomeLoc("c:h:r1:4-5");
+        assertEquals(0, loc.getContigIndex());
+        assertEquals(loc.getStart(), 4);
+        assertEquals(loc.getStop(), 5);
+    }
+
    @Test
    public void testParseGoodString() {
        GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-10");
@ -81,7 +100,7 @@ public class GenomeLocParserUnitTest extends BaseTest {
        assertEquals(loc.getStop(), 10);
        assertEquals(loc.getStart(), 1);
    }
-    
+
    @Test
    public void testCreateGenomeLoc1() {
        GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100);
--- a/public/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java
@ -6,6 +6,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;

 import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
 import static org.testng.Assert.assertTrue;

 import org.testng.annotations.BeforeClass;
@ -117,6 +118,41 @@ public class GenomeLocSortedSetUnitTest extends BaseTest {
        assertTrue(loc.getContigIndex() == 1);
    }

+    @Test
+    public void overlap() {
+        for ( int i = 1; i < 6; i++ ) {
+            final int start = i * 10;
+            mSortedSet.add(genomeLocParser.createGenomeLoc(contigOneName, start, start + 1));
+        }
+
+        // test matches in and around interval
+        assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 9, 9)));
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 10, 10)));
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 11, 11)));
+        assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 12, 12)));
+
+        // test matches spanning intervals
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 14, 20)));
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 11, 15)));
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 30, 40)));
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 51, 53)));
+
+        // test miss
+        assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 12, 19)));
+
+        // test exact match after miss
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 40, 41)));
+
+        // test matches at beginning of intervals
+        assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 5, 6)));
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 0, 10)));
+
+        // test matches at end of intervals
+        assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 52, 53)));
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 51, 53)));
+        assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 52, 53)));
+    }
+
    @Test
    public void mergingOverlappingAbove() {
        GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 0, 50);
--- a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java
@ -159,7 +159,7 @@ public class HaplotypeUnitTest extends BaseTest {
        final VariantContext vc = new VariantContextBuilder().alleles(alleles).loc("1", loc, loc + h1refAllele.getBases().length - 1).make();
        h.setAlignmentStartHapwrtRef(0);
        h.setCigar(cigar);
-        final Haplotype h1 = h.insertAllele(vc.getReference(), vc.getAlternateAllele(0), loc);
+        final Haplotype h1 = h.insertAllele(vc.getReference(), vc.getAlternateAllele(0), loc, vc.getStart());
        final Haplotype h1expected = new Haplotype(newHap.getBytes());
        Assert.assertEquals(h1, h1expected);
    }
--- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java
@ -28,7 +28,6 @@ public class VCFIntegrationTest extends WalkerTest {
    }

    @Test(enabled = true)
-    // See https://getsatisfaction.com/gsa/topics/support_vcf_4_1_structural_variation_breakend_alleles?utm_content=topic_link&utm_medium=email&utm_source=new_topic
    public void testReadingAndWritingBreakpointAlleles() {
        String testVCF = privateTestDir + "breakpoint-example.vcf";

--- a/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java
@ -1,6 +1,7 @@
 package org.broadinstitute.sting.utils.recalibration;

 import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.recalibration.covariates.CycleCovariate;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -24,7 +25,7 @@ public class CycleCovariateUnitTest {
        covariate.initialize(RAC);
    }

-    @Test(enabled = false)
+    @Test(enabled = true)
    public void testSimpleCycles() {
        short readLength = 10;
        GATKSAMRecord read = ReadUtils.createRandomRead(readLength);
@ -53,9 +54,31 @@ public class CycleCovariateUnitTest {
        for (short i = 0; i < values.length; i++) {
            short actual = Short.decode(covariate.formatKey(values[i][0]));
            int expected = init + (increment * i);
-            //            System.out.println(String.format("%d: %d, %d", i, actual, expected));
            Assert.assertEquals(actual, expected);
        }
    }

+    @Test(enabled = true, expectedExceptions={UserException.class})
+    public void testMoreThanMaxCycleFails() {
+        int readLength = RAC.MAXIMUM_CYCLE_VALUE + 1;
+        GATKSAMRecord read = ReadUtils.createRandomRead(readLength);
+        read.setReadPairedFlag(true);
+        read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
+        read.getReadGroup().setPlatform("illumina");
+
+        ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1);
+        covariate.recordValues(read, readCovariates);
+    }
+
+    @Test(enabled = true)
+    public void testMaxCyclePasses() {
+        int readLength = RAC.MAXIMUM_CYCLE_VALUE;
+        GATKSAMRecord read = ReadUtils.createRandomRead(readLength);
+        read.setReadPairedFlag(true);
+        read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
+        read.getReadGroup().setPlatform("illumina");
+
+        ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1);
+        covariate.recordValues(read, readCovariates);
+    }
 }
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java
@ -782,7 +782,7 @@ public class VariantContextTestProvider {
        Assert.assertEquals(actual.getStart(), expected.getStart(), "start");
        Assert.assertEquals(actual.getEnd(), expected.getEnd(), "end");
        Assert.assertEquals(actual.getID(), expected.getID(), "id");
-        Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles");
+        Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles for " + expected + " vs " + actual);

        assertAttributesEquals(actual.getAttributes(), expected.getAttributes());
        Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied");
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java
@ -26,7 +26,7 @@ package org.broadinstitute.sting.utils.variantcontext;
 import net.sf.picard.reference.IndexedFastaSequenceFile;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
 import org.testng.Assert;
@ -39,7 +39,7 @@ import java.io.FileNotFoundException;
 import java.util.*;

 public class VariantContextUtilsUnitTest extends BaseTest {
-    Allele Aref, T, C, Cref, ATC, ATCATC;
+    Allele Aref, T, C, G, Cref, ATC, ATCATC;
    private GenomeLocParser genomeLocParser;

    @BeforeSuite
@ -58,6 +58,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
        Cref = Allele.create("C", true);
        T = Allele.create("T");
        C = Allele.create("C");
+        G = Allele.create("G");
        ATC = Allele.create("ATC");
        ATCATC = Allele.create("ATCATC");
    }
@ -697,10 +698,120 @@ public class VariantContextUtilsUnitTest extends BaseTest {
        return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
    }

-
    @Test(dataProvider = "ReverseClippingPositionTestProvider")
    public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
        int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
        Assert.assertEquals(result, cfg.expectedClip);
    }
-}
+
+    // --------------------------------------------------------------------------------
+    //
+    // test splitting into bi-allelics
+    //
+    // --------------------------------------------------------------------------------
+
+    @DataProvider(name = "SplitBiallelics")
+    public Object[][] makeSplitBiallelics() throws CloneNotSupportedException {
+        List<Object[]> tests = new ArrayList<Object[]>();
+
+        final VariantContextBuilder root = new VariantContextBuilder("x", "20", 10, 10, Arrays.asList(Aref, C));
+
+        // biallelic -> biallelic
+        tests.add(new Object[]{root.make(), Arrays.asList(root.make())});
+
+        // monos -> monos
+        root.alleles(Arrays.asList(Aref));
+        tests.add(new Object[]{root.make(), Arrays.asList(root.make())});
+
+        root.alleles(Arrays.asList(Aref, C, T));
+        tests.add(new Object[]{root.make(),
+                Arrays.asList(
+                        root.alleles(Arrays.asList(Aref, C)).make(),
+                        root.alleles(Arrays.asList(Aref, T)).make())});
+
+        root.alleles(Arrays.asList(Aref, C, T, G));
+        tests.add(new Object[]{root.make(),
+                Arrays.asList(
+                        root.alleles(Arrays.asList(Aref, C)).make(),
+                        root.alleles(Arrays.asList(Aref, T)).make(),
+                        root.alleles(Arrays.asList(Aref, G)).make())});
+
+        final Allele C      = Allele.create("C");
+        final Allele CA      = Allele.create("CA");
+        final Allele CAA     = Allele.create("CAA");
+        final Allele CAAAA   = Allele.create("CAAAA");
+        final Allele CAAAAA  = Allele.create("CAAAAA");
+        final Allele Cref      = Allele.create("C", true);
+        final Allele CAref     = Allele.create("CA", true);
+        final Allele CAAref    = Allele.create("CAA", true);
+        final Allele CAAAref   = Allele.create("CAAA", true);
+
+        root.alleles(Arrays.asList(Cref, CA, CAA));
+        tests.add(new Object[]{root.make(),
+                Arrays.asList(
+                        root.alleles(Arrays.asList(Cref, CA)).make(),
+                        root.alleles(Arrays.asList(Cref, CAA)).make())});
+
+        root.alleles(Arrays.asList(CAAref, C, CA)).stop(12);
+        tests.add(new Object[]{root.make(),
+                Arrays.asList(
+                        root.alleles(Arrays.asList(CAAref, C)).make(),
+                        root.alleles(Arrays.asList(CAref, C)).stop(11).make())});
+
+        root.alleles(Arrays.asList(CAAAref, C, CA, CAA)).stop(13);
+        tests.add(new Object[]{root.make(),
+                Arrays.asList(
+                        root.alleles(Arrays.asList(CAAAref, C)).make(),
+                        root.alleles(Arrays.asList(CAAref, C)).stop(12).make(),
+                        root.alleles(Arrays.asList(CAref, C)).stop(11).make())});
+
+        root.alleles(Arrays.asList(CAAAref, CAAAAA, CAAAA, CAA, C)).stop(13);
+        tests.add(new Object[]{root.make(),
+                Arrays.asList(
+                        root.alleles(Arrays.asList(Cref, CAA)).stop(10).make(),
+                        root.alleles(Arrays.asList(Cref, CA)).stop(10).make(),
+                        root.alleles(Arrays.asList(CAref, C)).stop(11).make(),
+                        root.alleles(Arrays.asList(CAAAref, C)).stop(13).make())});
+
+        return tests.toArray(new Object[][]{});
+    }
+
+    @Test(dataProvider = "SplitBiallelics")
+    public void testSplitBiallelicsNoGenotypes(final VariantContext vc, final List<VariantContext> expectedBiallelics) {
+        final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vc);
+        Assert.assertEquals(biallelics.size(), expectedBiallelics.size());
+        for ( int i = 0; i < biallelics.size(); i++ ) {
+            final VariantContext actual = biallelics.get(i);
+            final VariantContext expected = expectedBiallelics.get(i);
+            VariantContextTestProvider.assertEquals(actual, expected);
+        }
+    }
+
+    @Test(dataProvider = "SplitBiallelics", dependsOnMethods = "testSplitBiallelicsNoGenotypes")
+    public void testSplitBiallelicsGenotypes(final VariantContext vc, final List<VariantContext> expectedBiallelics) {
+        final List<Genotype> genotypes = new ArrayList<Genotype>();
+
+        int sampleI = 0;
+        for ( final List<Allele> alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) {
+            genotypes.add(GenotypeBuilder.create("sample" + sampleI++, alleles));
+        }
+        genotypes.add(GenotypeBuilder.createMissing("missing", 2));
+
+        final VariantContext vcWithGenotypes = new VariantContextBuilder(vc).genotypes(genotypes).make();
+
+        final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes);
+        for ( int i = 0; i < biallelics.size(); i++ ) {
+            final VariantContext actual = biallelics.get(i);
+            Assert.assertEquals(actual.getNSamples(), vcWithGenotypes.getNSamples()); // not dropping any samples
+
+            for ( final Genotype inputGenotype : genotypes ) {
+                final Genotype actualGenotype = actual.getGenotype(inputGenotype.getSampleName());
+                Assert.assertNotNull(actualGenotype);
+                if ( ! vc.isVariant() || vc.isBiallelic() )
+                    Assert.assertEquals(actualGenotype, vcWithGenotypes.getGenotype(inputGenotype.getSampleName()));
+                else
+                    Assert.assertTrue(actualGenotype.isNoCall());
+            }
+        }
+    }
+}
--- a/public/packages/GATKEngine.xml
+++ b/public/packages/GATKEngine.xml
@ -36,6 +36,9 @@
      <dir name="org/broadinstitute/sting/utils/R" includes="**/*.tar.gz" />
      <!-- All R scripts in org.broadinstitute.sting -->
      <dir name="org/broadinstitute/sting" includes="**/*.R" />
+      <!-- Resources in org.broadinstitute.sting -->
+      <dir name="org/broadinstitute/sting" includes="**/resources/*" />
+      <dir name="org/broadinstitute/sting" includes="**/templates/*" />
      <!-- The GATK public key -->
      <file path="GATK_public.key" />
    </dependencies>
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala
@ -0,0 +1,507 @@
+package org.broadinstitute.sting.queue.qscripts.CNV
+
+import org.broadinstitute.sting.queue.extensions.gatk._
+import org.broadinstitute.sting.queue.QScript
+import org.broadinstitute.sting.queue.util.VCF_BAM_utilities
+import org.broadinstitute.sting.queue.util.DoC._
+import org.broadinstitute.sting.commandline.Hidden
+import java.io.{PrintStream, PrintWriter}
+import org.broadinstitute.sting.utils.text.XReadLines
+import collection.JavaConversions._
+import org.broadinstitute.sting.gatk.walkers.coverage.CoverageUtils
+
+class xhmmCNVpipeline extends QScript {
+  qscript =>
+
+  @Input(doc = "bam input, as .bam or as a list of files", shortName = "I", required = true)
+  var bams: File = _
+
+  @Input(doc = "gatk jar file", shortName = "J", required = true)
+  var gatkJarFile: File = _
+
+  @Input(doc = "xhmm executable file", shortName = "xhmmExec", required = true)
+  var xhmmExec: File = _
+
+  @Input(doc = "Plink/Seq executable file", shortName = "pseqExec", required = true)
+  var pseqExec: File = _
+
+  @Argument(doc = "Plink/Seq SEQDB file (Reference genome sequence)", shortName = "SEQDB", required = true)
+  var pseqSeqDB: String = _
+
+  @Input(shortName = "R", doc = "ref", required = true)
+  var referenceFile: File = _
+
+  @Input(shortName = "L", doc = "Intervals", required = false)
+  var intervals: File = _
+
+  @Argument(doc = "level of parallelism for BAM DoC.   By default is set to 0 [no scattering].", shortName = "scatter", required = false)
+  var scatterCountInput = 0
+
+  @Argument(doc = "Samples to run together for DoC.   By default is set to 1 [one job per sample].", shortName = "samplesPerJob", required = false)
+  var samplesPerJob = 1
+
+  @Output(doc = "Base name for files to output", shortName = "o", required = true)
+  var outputBase: File = _
+
+  @Hidden
+  @Argument(doc = "How should overlapping reads from the same fragment be handled?", shortName = "countType", required = false)
+  var countType = CoverageUtils.CountPileupType.COUNT_FRAGMENTS
+
+  @Argument(doc = "Maximum depth (before GATK down-sampling kicks in...)", shortName = "MAX_DEPTH", required = false)
+  var MAX_DEPTH = 20000
+
+  @Hidden
+  @Argument(doc = "Number of read-depth bins", shortName = "NUM_BINS", required = false)
+  var NUM_BINS = 200
+
+  @Hidden
+  @Argument(doc = "Starting value of read-depth bins", shortName = "START_BIN", required = false)
+  var START_BIN = 1
+
+  @Argument(doc = "Minimum read mapping quality", shortName = "MMQ", required = false)
+  var minMappingQuality = 0
+
+  @Argument(doc = "Minimum base quality to be counted in depth", shortName = "MBQ", required = false)
+  var minBaseQuality = 0
+
+  @Argument(doc = "Memory (in GB) required for storing the whole matrix in memory", shortName = "wholeMatrixMemory", required = false)
+  var wholeMatrixMemory = -1
+
+  @Argument(shortName = "minTargGC", doc = "Exclude all targets with GC content less than this value", required = false)
+  var minTargGC : Double = 0.1
+
+  @Argument(shortName = "maxTargGC", doc = "Exclude all targets with GC content greater than this value", required = false)
+  var maxTargGC : Double = 0.9
+
+  @Argument(shortName = "minTargRepeats", doc = "Exclude all targets with % of repeat-masked bases less than this value", required = false)
+  var minTargRepeats : Double = 0.0
+
+  @Argument(shortName = "maxTargRepeats", doc = "Exclude all targets with % of repeat-masked bases greater than this value", required = false)
+  var maxTargRepeats : Double = 0.1
+
+  @Argument(shortName = "sampleIDsMap", doc = "File mapping BAM sample IDs to desired sample IDs", required = false)
+  var sampleIDsMap: String = ""
+
+  @Argument(shortName = "sampleIDsMapFromColumn", doc = "Column number of OLD sample IDs to map", required = false)
+  var sampleIDsMapFromColumn = 1
+
+  @Argument(shortName = "sampleIDsMapToColumn", doc = "Column number of NEW sample IDs to map", required = false)
+  var sampleIDsMapToColumn = 2
+
+  @Argument(shortName = "rawFilters", doc = "xhmm command-line parameters to filter targets and samples from raw data", required = false)
+  var targetSampleFiltersString: String = ""
+
+  @Argument(shortName = "PCAnormalize", doc = "xhmm command-line parameters to Normalize data using PCA information", required = false)
+  var PCAnormalizeMethodString: String = ""
+
+  @Argument(shortName = "normalizedFilters", doc = "xhmm command-line parameters to filter targets and samples from PCA-normalized data", required = false)
+  var targetSampleNormalizedFiltersString: String = ""
+
+  @Argument(shortName = "xhmmParams", doc = "xhmm model parameters file", required = true)
+  var xhmmParamsArg: File = _
+
+  @Argument(shortName = "discoverParams", doc = "xhmm command-line parameters for discovery step", required = false)
+  var discoverCommandLineParams: String = ""
+
+  @Argument(shortName = "genotypeParams", doc = "xhmm command-line parameters for genotyping step", required = false)
+  var genotypeCommandLineParams: String = ""
+
+  @Argument(shortName = "genotypeSubsegments", doc = "Should we also genotype all subsegments of the discovered CNV?", required = false)
+  var genotypeSubsegments: Boolean = false
+
+  @Argument(shortName = "maxTargetsInSubsegment", doc = "If genotypeSubsegments, then only consider sub-segments consisting of this number of targets or fewer", required = false)
+  var maxTargetsInSubsegment = 30
+
+  @Argument(shortName = "subsegmentGenotypeThreshold", doc = "If genotypeSubsegments, this is the default genotype quality threshold for the sub-segments", required = false)
+  var subsegmentGenotypeThreshold = 20.0
+
+  @Argument(shortName = "longJobQueue", doc = "Job queue to run the 'long-running' commands", required = false)
+  var longJobQueue: String = ""
+
+
+  val PREPARED_TARGS_SUFFIX: String = ".merged.interval_list"
+
+  val RD_OUTPUT_SUFFIX: String = ".RD.txt"
+
+  val TARGS_GC_SUFFIX = ".locus_GC.txt"
+  val EXTREME_GC_TARGS_SUFFIX = ".extreme_gc_targets.txt"
+
+  val TARGS_REPEAT_COMPLEXITY_SUFFIX = ".locus_complexity.txt"
+  val EXTREME_REPEAT_COMPLEXITY_SUFFIX = ".extreme_complexity_targets.txt"
+
+  val FILTERED_TARGS_SUFFIX: String = ".filtered_targets.txt"
+  val FILTERED_SAMPS_SUFFIX: String =  ".filtered_samples.txt"
+
+
+  trait WholeMatrixMemoryLimit extends CommandLineFunction {
+    // Since loading ALL of the data can take significant memory:
+    if (wholeMatrixMemory < 0) {
+      this.memoryLimit = 24
+    }
+    else {
+      this.memoryLimit = wholeMatrixMemory
+    }
+  }
+
+  trait LongRunTime extends CommandLineFunction {
+    if (longJobQueue != "")
+      this.jobQueue = longJobQueue
+  }
+
+  def script = {
+    val prepTargets = new PrepareTargets(List(qscript.intervals), outputBase.getPath + PREPARED_TARGS_SUFFIX, xhmmExec, referenceFile)
+    add(prepTargets)
+
+    trait CommandLineGATKArgs extends CommandLineGATK {
+      this.intervals :+= prepTargets.out
+      this.jarFile = qscript.gatkJarFile
+      this.reference_sequence = qscript.referenceFile
+      this.logging_level = "INFO"
+    }
+
+    val sampleToBams: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]] = VCF_BAM_utilities.getMapOfBAMsForSample(VCF_BAM_utilities.parseBAMsInput(bams))
+    val samples: List[String] = sampleToBams.keys.toList
+    Console.out.printf("Samples are %s%n", samples)
+
+    val groups: List[Group] = buildDoCgroups(samples, sampleToBams, samplesPerJob, outputBase)
+    var docs: List[DoC] = List[DoC]()
+    for (group <- groups) {
+      Console.out.printf("Group is %s%n", group)
+      docs ::= new DoC(group.bams, group.DoC_output, countType, MAX_DEPTH, minMappingQuality, minBaseQuality, scatterCountInput, START_BIN, NUM_BINS, Nil) with CommandLineGATKArgs
+    }
+    addAll(docs)
+
+    val mergeDepths = new MergeGATKdepths(docs.map(u => u.intervalSampleOut), outputBase.getPath + RD_OUTPUT_SUFFIX, "_mean_cvg", xhmmExec, sampleIDsMap, sampleIDsMapFromColumn, sampleIDsMapToColumn, None, false) with WholeMatrixMemoryLimit
+    add(mergeDepths)
+
+    var excludeTargets : List[File] = List[File]()
+    if (minTargGC > 0 || maxTargGC < 1) {
+      val calcGCcontents = new GCContentByInterval with CommandLineGATKArgs
+      calcGCcontents.out = outputBase.getPath + TARGS_GC_SUFFIX
+      add(calcGCcontents)
+
+      val excludeTargetsBasedOnGC = new ExcludeTargetsBasedOnValue(calcGCcontents.out, EXTREME_GC_TARGS_SUFFIX, minTargGC, maxTargGC)
+      add(excludeTargetsBasedOnGC)
+      excludeTargets ::= excludeTargetsBasedOnGC.out
+    }
+
+    class CalculateRepeatComplexity(outFile : String) extends CommandLineFunction {
+      @Input(doc="")
+      var intervals: File = prepTargets.out
+
+      @Output(doc="")
+      var out : File = new File(outFile)
+
+      val regFile : String = outputBase.getPath + ".targets.reg"
+      val locDB : String = outputBase.getPath + ".targets.LOCDB"
+
+      val removeFiles = "rm -f " + regFile + " " + locDB
+      val createRegFile = "cat " + intervals + " | awk 'BEGIN{OFS=\"\\t\"; print \"#CHR\\tBP1\\tBP2\\tID\"} {split($1,a,\":\"); chr=a[1]; if (match(chr,\"chr\")==0) {chr=\"chr\"chr} split(a[2],b,\"-\"); bp1=b[1]; bp2=bp1; if (length(b) > 1) {bp2=b[2]} print chr,bp1,bp2,NR}' > " + regFile
+      val createLOCDB = pseqExec + " . loc-load --locdb " + locDB + " --file " + regFile + " --group targets --out " + locDB + ".loc-load"
+      val calcRepeatMaskedPercent = pseqExec + " . loc-stats --locdb " + locDB + " --group targets --seqdb " + pseqSeqDB + " --out " + locDB + ".loc-stats"
+      val extractRepeatMaskedPercent = "cat " + locDB + ".loc-stats.locstats | awk '{if (NR > 1) print $_}' | sort -k1 -g | awk '{print $10}' | paste " + intervals + " - | awk '{print $1\"\\t\"$2}' > " + out
+
+      var command: String =
+        removeFiles +
+          " && " + createRegFile +
+          " && " + createLOCDB +
+          " && " + calcRepeatMaskedPercent +
+          " && " + extractRepeatMaskedPercent
+
+      def commandLine = command
+
+      override def description = "Calculate the percentage of each target that is repeat-masked in the reference sequence: " + command
+    }
+
+    if (minTargRepeats > 0 || maxTargRepeats < 1) {
+      val calcRepeatComplexity = new CalculateRepeatComplexity(outputBase.getPath + TARGS_REPEAT_COMPLEXITY_SUFFIX)
+      add(calcRepeatComplexity)
+
+      val excludeTargetsBasedOnRepeats = new ExcludeTargetsBasedOnValue(calcRepeatComplexity.out, EXTREME_REPEAT_COMPLEXITY_SUFFIX, minTargRepeats, maxTargRepeats)
+      add(excludeTargetsBasedOnRepeats)
+      excludeTargets ::= excludeTargetsBasedOnRepeats.out
+    }
+
+    val filterCenterDepths = new FilterCenterRawMatrix(mergeDepths.mergedDoC, excludeTargets)
+    add(filterCenterDepths)
+
+    val pca = new PCA(filterCenterDepths.filteredCentered)
+    add(pca)
+
+    val normalize = new Normalize(pca)
+    add(normalize)
+
+    val filterZscore = new FilterAndZscoreNormalized(normalize.normalized)
+    add(filterZscore)
+
+    val filterOriginal = new FilterOriginalData(mergeDepths.mergedDoC, filterCenterDepths, filterZscore)
+    add(filterOriginal)
+
+    val discover = new DiscoverCNVs(filterZscore.filteredZscored, filterOriginal.sameFiltered)
+    add(discover)
+
+    val genotype = new GenotypeCNVs(filterZscore.filteredZscored, discover.xcnv, filterOriginal.sameFiltered)
+    add(genotype)
+
+    if (genotypeSubsegments) {
+      val genotypeSegs = new GenotypeCNVandSubsegments(filterZscore.filteredZscored, discover.xcnv, filterOriginal.sameFiltered)
+      add(genotypeSegs)
+    }
+  }
+
+  class ExcludeTargetsBasedOnValue(locus_valueIn : File, outSuffix : String, minVal : Double, maxVal : Double) extends InProcessFunction {
+    @Input(doc="")
+    var locus_value : File = locus_valueIn
+
+    @Output(doc="")
+    var out : File = new File(outputBase.getPath + outSuffix)
+
+    def run = {
+      var outWriter = new PrintWriter(new PrintStream(out))
+      var elems = asScalaIterator(new XReadLines(locus_value))
+
+      while (elems.hasNext) {
+        val line = elems.next
+        val splitLine = line.split("\\s+")
+        val locus = splitLine(0)
+        val locValStr = splitLine(1)
+        try {
+          val locVal = locValStr.toDouble
+          if (locVal < minVal || locVal > maxVal)
+            outWriter.printf("%s%n", locus)
+        }
+        catch {
+          case nfe: NumberFormatException => println("Ignoring non-numeric value " + locValStr + " for locus " + locus)
+          case e: Exception => throw e
+        }
+      }
+
+      outWriter.close
+    }
+  }
+
+  class FilterCenterRawMatrix(inputParam: File, excludeTargetsIn : List[File]) extends CommandLineFunction with WholeMatrixMemoryLimit {
+    @Input(doc = "")
+    val input = inputParam
+
+    @Input(doc = "")
+    val excludeTargets = excludeTargetsIn
+
+    @Output
+    val filteredCentered: File = new File(outputBase.getPath + ".filtered_centered" + RD_OUTPUT_SUFFIX)
+    @Output
+    val filteredTargets: File = new File(filteredCentered.getPath + FILTERED_TARGS_SUFFIX)
+    @Output
+    val filteredSamples: File = new File(filteredCentered.getPath + FILTERED_SAMPS_SUFFIX)
+
+    var command: String =
+      xhmmExec + " --matrix" +
+      " -r " + input +
+      " --centerData --centerType target" +
+      " -o " + filteredCentered +
+      " --outputExcludedTargets " + filteredTargets +
+      " --outputExcludedSamples " + filteredSamples
+    command += excludeTargets.map(u => " --excludeTargets " + u).reduceLeft(_ + "" + _)
+    if (targetSampleFiltersString != "")
+      command += " " + targetSampleFiltersString
+
+    def commandLine = command
+
+    override def description = "Filters samples and targets and then mean-centers the targets: " + command
+  }
+
+  class PCA(inputParam: File) extends CommandLineFunction with WholeMatrixMemoryLimit {
+    @Input(doc = "")
+    val input = inputParam
+
+    val PCAbase: String = outputBase.getPath + ".RD_PCA"
+
+    @Output
+    val outPC: File = new File(PCAbase + ".PC.txt")
+    @Output
+    val outPC_SD: File = new File(PCAbase + ".PC_SD.txt")
+    @Output
+    val outPC_LOADINGS: File = new File(PCAbase + ".PC_LOADINGS.txt")
+
+    var command: String =
+      xhmmExec + " --PCA" +
+      " -r " + input +
+      " --PCAfiles " + PCAbase
+
+    def commandLine = command
+
+    override def description = "Runs PCA on mean-centered data: " + command
+  }
+
+  class Normalize(pca: PCA) extends CommandLineFunction {
+    @Input(doc = "")
+    val input = pca.input
+
+    @Input(doc = "")
+    val inPC = pca.outPC
+
+    @Input(doc = "")
+    val inPC_SD = pca.outPC_SD
+
+    @Input(doc = "")
+    val inPC_LOADINGS = pca.outPC_LOADINGS
+
+    @Output
+    val normalized: File = new File(outputBase.getPath + ".PCA_normalized.txt")
+
+    var command: String =
+      xhmmExec + " --normalize" +
+      " -r " + input +
+      " --PCAfiles " + pca.PCAbase +
+      " --normalizeOutput " + normalized
+    if (PCAnormalizeMethodString != "")
+      command += " " + PCAnormalizeMethodString
+
+    def commandLine = command
+
+    override def description = "Normalizes mean-centered data using PCA information: " + command
+  }
+
+  class FilterAndZscoreNormalized(inputParam: File) extends CommandLineFunction with WholeMatrixMemoryLimit {
+    @Input(doc = "")
+    val input = inputParam
+
+    @Output
+    val filteredZscored: File = new File(outputBase.getPath + ".PCA_normalized.filtered.sample_zscores" + RD_OUTPUT_SUFFIX)
+    @Output
+    val filteredTargets: File = new File(filteredZscored.getPath + FILTERED_TARGS_SUFFIX)
+    @Output
+    val filteredSamples: File = new File(filteredZscored.getPath + FILTERED_SAMPS_SUFFIX)
+
+    var command: String =
+      xhmmExec + " --matrix" +
+      " -r " + input +
+      " --centerData --centerType sample --zScoreData" +
+      " -o " + filteredZscored +
+      " --outputExcludedTargets " + filteredTargets +
+      " --outputExcludedSamples " + filteredSamples
+    if (targetSampleNormalizedFiltersString != "")
+      command += " " + targetSampleNormalizedFiltersString
+
+    def commandLine = command
+
+    override def description = "Filters and z-score centers (by sample) the PCA-normalized data: " + command
+  }
+
+  class FilterOriginalData(inputParam: File, filt1: FilterCenterRawMatrix, filt2: FilterAndZscoreNormalized) extends CommandLineFunction with WholeMatrixMemoryLimit {
+    @Input(doc = "")
+    val input = inputParam
+
+    @Input(doc = "")
+    val targFilters: List[File] = List(filt1.filteredTargets, filt2.filteredTargets).map(u => new File(u))
+
+    @Input(doc = "")
+    val sampFilters: List[File] = List(filt1.filteredSamples, filt2.filteredSamples).map(u => new File(u))
+
+    @Output
+    val sameFiltered: File = new File(outputBase.getPath + ".same_filtered" + RD_OUTPUT_SUFFIX)
+
+    var command: String =
+      xhmmExec + " --matrix" +
+      " -r " + input +
+      targFilters.map(u => " --excludeTargets " + u).reduceLeft(_ + "" + _) +
+      sampFilters.map(u => " --excludeSamples " + u).reduceLeft(_ + "" + _) +
+      " -o " + sameFiltered
+
+    def commandLine = command
+
+    override def description = "Filters original read-depth data to be the same as filtered, normalized data: " + command
+  }
+
+  class DiscoverCNVs(inputParam: File, origRDParam: File) extends CommandLineFunction with LongRunTime {
+    @Input(doc = "")
+    val input = inputParam
+
+    @Input(doc = "")
+    val xhmmParams = xhmmParamsArg
+
+    @Input(doc = "")
+    val origRD = origRDParam
+
+    @Output
+    val xcnv: File = new File(outputBase.getPath + ".xcnv")
+
+    @Output
+    val aux_xcnv: File = new File(outputBase.getPath + ".aux_xcnv")
+
+    val posteriorsBase = outputBase.getPath
+
+    @Output
+    val dipPosteriors: File = new File(posteriorsBase + ".posteriors.DIP.txt")
+
+    @Output
+    val delPosteriors: File = new File(posteriorsBase + ".posteriors.DEL.txt")
+
+    @Output
+    val dupPosteriors: File = new File(posteriorsBase + ".posteriors.DUP.txt")
+
+    var command: String =
+      xhmmExec + " --discover" +
+      " -p " + xhmmParams +
+      " -r " + input +
+      " -R " + origRD +
+      " -c " + xcnv +
+      " -a " + aux_xcnv +
+      " -s " + posteriorsBase +
+      " " + discoverCommandLineParams
+
+    def commandLine = command
+
+    override def description = "Discovers CNVs in normalized data: " + command
+  }
+
+  abstract class BaseGenotypeCNVs(inputParam: File, xcnv: File, origRDParam: File) extends CommandLineFunction with LongRunTime {
+    @Input(doc = "")
+    val input = inputParam
+
+    @Input(doc = "")
+    val xhmmParams = xhmmParamsArg
+
+    @Input(doc = "")
+    val origRD = origRDParam
+
+    @Input(doc = "")
+    val inXcnv = xcnv
+
+    var command: String =
+      xhmmExec + " --genotype" +
+      " -p " + xhmmParams +
+      " -r " + input +
+      " -g " + inXcnv +
+      " -F " + referenceFile +
+      " -R " + origRD +
+      " " + genotypeCommandLineParams
+  }
+
+  class GenotypeCNVs(inputParam: File, xcnv: File, origRDParam: File) extends BaseGenotypeCNVs(inputParam, xcnv, origRDParam) {
+    @Output
+    val vcf: File = new File(outputBase.getPath + ".vcf")
+
+    command +=
+      " -v " +  vcf
+
+    def commandLine = command
+
+    override def description = "Genotypes discovered CNVs in all samples: " + command
+  }
+
+  class GenotypeCNVandSubsegments(inputParam: File, xcnv: File, origRDParam: File) extends BaseGenotypeCNVs(inputParam, xcnv, origRDParam) {
+    @Output
+    val vcf: File = new File(outputBase.getPath + ".subsegments.vcf")
+
+    command +=
+      " -v " +  vcf +
+      " --subsegments" +
+      " --maxTargetsInSubsegment " + maxTargetsInSubsegment +
+      " --genotypeQualThresholdWhenNoExact " + subsegmentGenotypeThreshold
+
+    def commandLine = command
+
+    override def description = "Genotypes discovered CNVs (and their sub-segments, of up to " + maxTargetsInSubsegment + " targets) in all samples: " + command
+  }
+}
--- a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
@ -72,6 +72,10 @@ class QSettings {
  @Argument(fullName="resident_memory_request_parameter", shortName="resMemReqParam", doc="Parameter for resident memory requests. By default not requested.", required=false)
  var residentRequestParameter: String = _

+  @Argument(fullName="job_walltime", shortName="wallTime", doc="Setting the required DRMAA walltime or LSF run limit.", required=false)
+  @ClassType(classOf[Long])
+  var jobWalltime: Option[Long] = None
+
  /** The name of the parallel environment (required for SGE, for example) */
  @Argument(fullName="job_parallel_env", shortName="jobParaEnv", doc="An SGE style parallel environment to use for jobs requesting more than 1 core.  Equivalent to submitting jobs with -pe ARG nt for jobs with nt > 1", required=false)
  var parallelEnvironmentName: String = "smp_pe" // Broad default
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/drmaa/DrmaaJobRunner.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/drmaa/DrmaaJobRunner.scala
@ -65,6 +65,9 @@ class DrmaaJobRunner(val session: Session, val function: CommandLineFunction) ex
        drmaaJob.setJoinFiles(true)
      }

+      if(!function.wallTime.isEmpty)
+    	  drmaaJob.setHardWallclockTimeLimit(function.wallTime.get)      
+      
      drmaaJob.setNativeSpecification(functionNativeSpec)

      // Instead of running the function.commandLine, run "sh <jobScript>"
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala
@ -151,8 +151,11 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR
          throw new QException("setOption_() returned -1 while setting esub");
      }

-      // LSF specific: get the max runtime for the jobQueue and pass it for this job
-      request.rLimits(LibLsf.LSF_RLIMIT_RUN) = Lsf706JobRunner.getRlimitRun(function.jobQueue)
+      if(!function.wallTime.isEmpty)
+        request.rLimits(LibLsf.LSF_RLIMIT_RUN) = function.wallTime.get.toInt
+      else
+        // LSF specific: get the max runtime for the jobQueue and pass it for this job
+        request.rLimits(LibLsf.LSF_RLIMIT_RUN) = Lsf706JobRunner.getRlimitRun(function.jobQueue)

      // Run the command as sh <jobScript>
      request.command = "sh " + jobScript
--- a/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala
@ -33,6 +33,9 @@ import org.broadinstitute.sting.commandline.Argument
 trait CommandLineFunction extends QFunction with Logging {
  def commandLine: String

+  /** Setting the wall time request for DRMAA / run limit for LSF */
+  var wallTime: Option[Long] = None
+  
  /** Upper memory limit */
  @Argument(doc="Memory limit", required=false)
  var memoryLimit: Option[Double] = None
@ -67,6 +70,9 @@ trait CommandLineFunction extends QFunction with Logging {
    super.copySettingsTo(function)
    function match {
      case commandLineFunction: CommandLineFunction =>
+        if(commandLineFunction.wallTime.isEmpty)
+          commandLineFunction.wallTime = this.wallTime
+        
        if (commandLineFunction.memoryLimit.isEmpty)
          commandLineFunction.memoryLimit = this.memoryLimit

@ -110,6 +116,10 @@ trait CommandLineFunction extends QFunction with Logging {
   * Sets all field values.
   */
  override def freezeFieldValues() {
+   
+    if(wallTime.isEmpty)
+      wallTime = qSettings.jobWalltime
+    
    if (jobQueue == null)
      jobQueue = qSettings.jobQueue

--- a/public/scala/src/org/broadinstitute/sting/queue/util/DoC/package.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/util/DoC/package.scala
@ -0,0 +1,131 @@
+package org.broadinstitute.sting.queue.util
+
+import java.io.File
+import org.broadinstitute.sting.queue.extensions.gatk.{IntervalScatterFunction, CommandLineGATK}
+import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction
+import org.broadinstitute.sting.gatk.downsampling.DownsampleType
+import org.broadinstitute.sting.commandline.{Input, Gather, Output}
+import org.broadinstitute.sting.queue.function.CommandLineFunction
+import org.broadinstitute.sting.gatk.walkers.coverage.CoverageUtils
+
+package object DoC {
+  class DoC(val bams: List[File], val DoC_output: File, val countType: CoverageUtils.CountPileupType, val MAX_DEPTH: Int, val minMappingQuality: Int, val minBaseQuality: Int, val scatterCountInput: Int, val START_BIN: Int, val NUM_BINS: Int, val minCoverageCalcs: Seq[Int]) extends CommandLineGATK with ScatterGatherableFunction {
+    val DOC_OUTPUT_SUFFIX: String = ".sample_interval_summary"
+
+    // So that the output files of this DoC run get deleted once they're used further downstream:
+    this.isIntermediate = true
+
+    this.analysis_type = "DepthOfCoverage"
+
+    this.input_file = bams
+
+    this.downsample_to_coverage = Some(MAX_DEPTH)
+    this.downsampling_type = DownsampleType.BY_SAMPLE
+
+    this.scatterCount = scatterCountInput
+    this.scatterClass = classOf[IntervalScatterFunction]
+
+    // HACK for DoC to work properly within Queue:
+    @Output
+    @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction])
+    var intervalSampleOut: File = new File(DoC_output.getPath + DOC_OUTPUT_SUFFIX)
+
+    override def commandLine = super.commandLine +
+      " --omitDepthOutputAtEachBase" +
+      " --omitLocusTable" +
+      " --minMappingQuality " + minMappingQuality +
+      " --minBaseQuality " + minBaseQuality +
+      optional("--countType", countType, spaceSeparated=true, escape=true, format="%s") +
+      " --start " + START_BIN + " --stop " + MAX_DEPTH + " --nBins " + NUM_BINS +
+      (if (!minCoverageCalcs.isEmpty) minCoverageCalcs.map(cov => " --summaryCoverageThreshold " + cov).reduceLeft(_ + "" + _) else "") +
+      " --includeRefNSites" +
+      " -o " + DoC_output
+
+    override def shortDescription = "DoC: " + DoC_output
+  }
+
+  class DoCwithDepthOutputAtEachBase(bams: List[File], DoC_output: File, countType: CoverageUtils.CountPileupType, MAX_DEPTH: Int, minMappingQuality: Int, minBaseQuality: Int, scatterCountInput: Int, START_BIN: Int, NUM_BINS: Int, minCoverageCalcs: Seq[Int]) extends DoC(bams, DoC_output, countType: CoverageUtils.CountPileupType, MAX_DEPTH: Int, minMappingQuality, minBaseQuality, scatterCountInput, START_BIN, NUM_BINS, minCoverageCalcs) {
+    // HACK for DoC to work properly within Queue:
+    @Output
+    @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction])
+    var outPrefix = DoC_output
+
+    override def commandLine = super.commandLine.replaceAll(" --omitDepthOutputAtEachBase", "")
+  }
+
+  def buildDoCgroups(samples: List[String], sampleToBams: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]], samplesPerJob: Int, outputBase: File): List[Group] = {
+    var l: List[Group] = Nil
+
+    var remaining = samples
+    var subsamples: List[String] = Nil
+    var count = 1
+
+    while (!remaining.isEmpty) {
+      val splitRes = (remaining splitAt samplesPerJob)
+      subsamples = splitRes._1
+      remaining = splitRes._2
+      l ::= new Group("group" + count, outputBase, subsamples, VCF_BAM_utilities.findBAMsForSamples(subsamples, sampleToBams))
+      count = count + 1
+    }
+
+    return l
+  }
+
+  // A group has a list of samples and bam files to use for DoC
+  class Group(val name: String, val outputBase: File, val samples: List[String], val bams: List[File]) {
+    // getName() just includes the file name WITHOUT the path:
+    val groupOutputName = name + "." + outputBase.getName
+
+    // Comment this out, so that when jobs are scattered in DoC class below, they do not scatter into outputs at directories that don't exist!!! :
+    //def DoC_output = new File(outputBase.getParentFile(), groupOutputName)
+
+    def DoC_output = new File(groupOutputName)
+
+    override def toString(): String = String.format("[Group %s [%s] with samples %s against bams %s]", name, DoC_output, samples, bams)
+  }
+
+  class MergeGATKdepths(DoCsToCombine: List[File], outFile: String, columnSuffix: String, xhmmExec: File, sampleIDsMap: String, sampleIDsMapFromColumn: Int, sampleIDsMapToColumn: Int, rdPrecisionArg: Option[Int], outputTargetsBySamples: Boolean) extends CommandLineFunction {
+    @Input(doc = "")
+    var inputDoCfiles: List[File] = DoCsToCombine
+
+    @Output
+    val mergedDoC: File = new File(outFile)
+    var command: String =
+      xhmmExec + " --mergeGATKdepths" +
+        inputDoCfiles.map(input => " --GATKdepths " + input).reduceLeft(_ + "" + _) +
+        " --columnSuffix " + columnSuffix +
+        " -o " + mergedDoC
+    if (sampleIDsMap != "")
+      command += " --sampleIDmap " + sampleIDsMap + " --fromID " + sampleIDsMapFromColumn + " --toID " + sampleIDsMapToColumn
+    rdPrecisionArg match {
+      case Some(rdPrecision) => {
+        command += " --rdPrecision " + rdPrecision
+      }
+      case None => {}
+    }
+    if (outputTargetsBySamples)
+      command += " --outputTargetsBySamples"
+
+    def commandLine = command
+
+    override def description = "Combines DoC outputs for multiple samples (at same loci): " + command
+  }
+
+  class PrepareTargets(intervalsIn: List[File], outIntervals: String, val xhmmExec: File, val referenceFile: File) extends CommandLineFunction {
+    @Input(doc = "List of files containing targeted intervals to be prepared and merged")
+    var inIntervals: List[File] = intervalsIn
+
+    @Output(doc = "The merged intervals file to write to")
+    var out: File = new File(outIntervals)
+
+    var command: String =
+      xhmmExec + " --prepareTargets" +
+        " -F " + referenceFile +
+        inIntervals.map(intervFile => " --targets " + intervFile).reduceLeft(_ + "" + _) +
+        " --mergedTargets " + out
+
+    def commandLine = command
+
+    override def description = "Sort all target intervals, merge overlapping ones, and print the resulting interval list: " + command
+  }
+}
--- a/public/scala/src/org/broadinstitute/sting/queue/util/VCF_BAM_utilities.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/util/VCF_BAM_utilities.scala
@ -26,36 +26,31 @@ object VCF_BAM_utilities {
    case _ => throw new RuntimeException("Unexpected BAM input type: " + bamsIn + "; only permitted extensions are .bam and .list")
  }

-  def getMapOfBAMsForSample(bams: List[File]): scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]] = bams match {
-    case Nil => return scala.collection.mutable.Map.empty[String, scala.collection.mutable.Set[File]]
-
-    case x :: y =>
-      val m: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]] = getMapOfBAMsForSample(y)
-      val bamSamples: List[String] = getSamplesInBAM(x)
+  def getMapOfBAMsForSample(bams: List[File]): scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]] = {
+    var m = scala.collection.mutable.Map.empty[String, scala.collection.mutable.Set[File]]

+    for (bam <- bams) {
+      val bamSamples: List[String] = getSamplesInBAM(bam)
      for (s <- bamSamples) {
        if (!m.contains(s))
          m += s -> scala.collection.mutable.Set.empty[File]

-        m(s) = m(s) + x
+        m(s) += bam
      }
+    }

      return m
  }

  def findBAMsForSamples(samples: List[String], sampleToBams: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]]): List[File] = {
+    var s = scala.collection.mutable.Set.empty[File]

-    def findBAMsForSamplesHelper(samples: List[String]): scala.collection.mutable.Set[File] = samples match {
-      case Nil => scala.collection.mutable.Set.empty[File]
-
-      case x :: y =>
-        var bamsForSampleX: scala.collection.mutable.Set[File] = scala.collection.mutable.Set.empty[File]
-        if (sampleToBams.contains(x))
-          bamsForSampleX = sampleToBams(x)
-        return bamsForSampleX ++ findBAMsForSamplesHelper(y)
+    for (sample <- samples) {
+      if (sampleToBams.contains(sample))
+        s ++= sampleToBams(sample)
    }

    val l: List[File] = Nil
-    return l ++ findBAMsForSamplesHelper(samples)
+    return l ++ s
  }
 }
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala
@ -126,4 +126,15 @@ class HelloWorldPipelineTest {
    spec.jobRunners = Seq("GridEngine")
    PipelineTest.executeTest(spec)
  }
+
+  // disabled because our DRMAA implementation doesn't support wallTime
+  @Test(enabled=false, timeOut=36000000)
+  def testHelloWorldWithWalltime() {
+    val spec = new PipelineTestSpec
+    spec.name = "HelloWorldWithWalltime"
+    spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" +
+      " -wallTime 100"
+    spec.jobRunners = PipelineTest.allJobRunners
+    PipelineTest.executeTest(spec)
+  }
 }