Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable into testing

2012-08-09 16:48:22 -04:00 · 2012-08-09 16:48:22 -04:00 · 68fb04b8f7
parent 5d0a7335ea 3362584014
commit 68fb04b8f7
383 changed files with 15161 additions and 9338 deletions
--- a/build.xml
+++ b/build.xml
@ -646,7 +646,7 @@
        <jar jarfile="${dist.dir}/vcf.jar">
            <fileset dir="${java.classes}">
              <include name="org/broadinstitute/sting/utils/codecs/vcf/**/*.class"/>
-<!--               <include name="org/broadinstitute/sting/utils/codecs/bcf2/**/*.class"/> -->
+              <include name="org/broadinstitute/sting/utils/codecs/bcf2/**/*.class"/>
              <include name="org/broadinstitute/sting/utils/variantcontext/**/*.class"/>
              <include name="org/broadinstitute/sting/utils/exceptions/**"/>
 	          <include name="org/broadinstitute/sting/utils/help/DocumentedGATKFeature.class"/>
@ -679,20 +679,6 @@
            <fileset dir="${java.classes}" includes="**/alignment/**/*.class" />
        </jar>

-        <jar jarfile="${dist.dir}/AnalyzeCovariates.jar" whenmanifestonly="skip">
-            <fileset dir="${java.classes}">
-                <include name="**/analyzecovariates/**/*.class" />
-                <include name="**/gatk/walkers/recalibration/*.class" />
-            </fileset>
-            <fileset dir="${R.script.staging.dir}">
-                <include name="**/analyzecovariates/**/*.R"/>
-                <include name="**/gatk/walkers/recalibration/**/*.R"/>
-            </fileset>
-            <manifest>
-                <attribute name="Main-Class" value="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
-            </manifest>
-        </jar>
-
        <subant target="dist" genericantfile="build.xml">
            <property name="build.dir" value="${external.build.dir}" />
            <property name="dist.dir" value="${external.dist.dir}" />
@ -750,12 +736,6 @@
                <attribute name="Class-Path" value="${jar.classpath}"/>
            </manifest>
        </jar>
-
-        <jar jarfile="${dist.dir}/AnalyzeCovariates.jar" update="true" whenmanifestonly="skip">
-            <manifest>
-                <attribute name="Class-Path" value="${jar.classpath}" />
-            </manifest>
-        </jar>
    </target>

    <target name="queue.manifests" depends="queue.jar, init.manifests" if="include.scala">
@ -1249,11 +1229,7 @@
    </target>


-    <target name="committests">
-        <antcall target="unittest" inheritAll="false"/>
-        <antcall target="integrationtest" inheritAll="false"/>
-        <antcall target="pipelinetest" inheritAll="false"/>
-    </target>
+    <target name="committests" depends="unittest,integrationtest,pipelinetest" />

    <!-- Order of the dependencies is significant in the *.release.tests targets -->
    <target name="gatkfull.binary.release.tests" depends="init.usecontracts,package.gatk.full,init.testgatkjar,unittest,integrationtest" />
--- a/ivy.xml
+++ b/ivy.xml
@ -23,86 +23,90 @@
  -->

 <ivy-module version="1.0">
-  <info organisation="org.broadinstitute" module="Sting"/>
-  <configurations defaultconfmapping="test->default">
-    <conf name="default" description="the core dependencies for the GATK"/>
-    <conf name="test" extends="default" description="external dependencies used for testing and metrics" />
-    <conf name="scala" extends="default" description="the dependencies for scala"/>
-    <conf name="queue" extends="scala" description="the dependencies for Queue"/>
-  </configurations>
-  <dependencies defaultconf="default">
-    <dependency org="net.sf" name="sam" rev="latest.integration"/>
-    <dependency org="net.sf" name="picard" rev="latest.integration"/>
-    <dependency org="edu.mit.broad" name="picard-private-parts" rev="latest.integration"/>
+    <info organisation="org.broadinstitute" module="Sting"/>
+    <configurations defaultconfmapping="test->default">
+        <conf name="default" description="the core dependencies for the GATK"/>
+        <conf name="test" extends="default" description="external dependencies used for testing and metrics"/>
+        <conf name="scala" extends="default" description="the dependencies for scala"/>
+        <conf name="queue" extends="scala" description="the dependencies for Queue"/>
+    </configurations>
+    <dependencies defaultconf="default">
+        <dependency org="net.sf" name="sam" rev="latest.integration"/>
+        <dependency org="net.sf" name="picard" rev="latest.integration"/>
+        <dependency org="edu.mit.broad" name="picard-private-parts" rev="latest.integration"/>

-    <!-- Tribble -->
-    <dependency org="org.broad" name="tribble" rev="latest.integration"/>
+        <!-- Tribble -->
+        <dependency org="org.broad" name="tribble" rev="latest.integration"/>

-    <dependency org="log4j" name="log4j" rev="1.2.15"/>
-    <dependency org="javax.mail" name="mail" rev="1.4.4"/>
-    <dependency org="colt" name="colt" rev="1.2.0"/>
-    <!-- <dependency org="jboss" name="javassist" rev="3.7.ga"/> -->
-    <dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
-    <dependency org="org.apache.bcel" name="bcel" rev="5.2"/>
+        <dependency org="log4j" name="log4j" rev="1.2.15"/>
+        <dependency org="javax.mail" name="mail" rev="1.4.4"/>
+        <dependency org="colt" name="colt" rev="1.2.0"/>
+        <!-- <dependency org="jboss" name="javassist" rev="3.7.ga"/> -->
+        <dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
+        <dependency org="org.apache.bcel" name="bcel" rev="5.2"/>

-    <!-- Dependencies for reflections mvn repository -->
-    <dependency org="org.reflections" name="reflections" rev="0.9.5-RC2"/>
+        <!-- Dependencies for reflections mvn repository -->
+        <dependency org="org.reflections" name="reflections" rev="0.9.5-RC2"/>

-    <!-- Matrix package from math.nist.gov -->
-    <dependency org="gov.nist" name="Jama" rev="1.0.2"/>
+        <!-- Matrix package from math.nist.gov -->
+        <dependency org="gov.nist" name="Jama" rev="1.0.2"/>

-    <!-- Dependencies for the graph aligner -->  
-    <dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
+        <!-- Dependencies for the graph aligner -->
+        <dependency org="net.sf.jgrapht" name="jgrapht" rev="0.8.3"/>

-    <!-- Dependencies for the html walker documention -->  
-    <dependency org="org.freemarker" name="freemarker" rev="2.3.18"/>
-     
-    <!-- Commons Dependencies -->  
-    <dependency org="org.apache.commons" name="commons-email" rev="1.2"/>
-    <dependency org="org.apache.commons" name="commons-jexl" rev="2.1.1"/>
-    <dependency org="commons-lang" name="commons-lang" rev="2.5"/>
-    <dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
-    <dependency org="commons-io" name="commons-io" rev="2.1"/>
-    <dependency org="org.apache.commons" name="commons-math" rev="2.2" />
+        <!-- Dependencies for the html walker documention -->
+        <dependency org="org.freemarker" name="freemarker" rev="2.3.18"/>

-    <!-- Lucene core utilities -->
-    <!-- <dependency org="org.apache.lucene" name="lucene-core" rev="3.0.3"/> -->
+        <!-- Commons Dependencies -->
+        <dependency org="org.apache.commons" name="commons-email" rev="1.2"/>
+        <dependency org="org.apache.commons" name="commons-jexl" rev="2.1.1"/>
+        <dependency org="commons-lang" name="commons-lang" rev="2.5"/>
+        <dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
+        <dependency org="commons-io" name="commons-io" rev="2.1"/>
+        <dependency org="org.apache.commons" name="commons-math" rev="2.2"/>

-    <!-- Dependencies for LSF, DRMAA, and other C libraries -->
-    <dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
+        <!-- Lucene core utilities -->
+        <!-- <dependency org="org.apache.lucene" name="lucene-core" rev="3.0.3"/> -->

-    <!-- Dependencies for amazon.com S3 support -->
-    <dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.1"/>
+        <!-- Dependencies for LSF, DRMAA, and other C libraries -->
+        <dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>

-    <!-- Dependencies for GridEngine -->
-    <dependency org="net.sf.gridscheduler" name="drmaa" rev="latest.integration"/>
+        <!-- Dependencies for amazon.com S3 support -->
+        <dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.1"/>

-    <!-- Scala dependancies -->
-    <dependency org="org.scala-lang" name="scala-compiler" rev="2.8.1"/>
-    <dependency org="org.scala-lang" name="scala-library" rev="2.8.1"/>
+        <!-- Dependencies for GridEngine -->
+        <dependency org="net.sf.gridscheduler" name="drmaa" rev="latest.integration"/>

-    <!-- testing and evaluation dependencies -->
-    <dependency org="org.testng" name="testng" rev="5.14.1" conf="test" />
-    <dependency org="org.uncommons" name="reportng" rev="1.1.2" conf="test" />
-    <dependency org="com.google.code.caliper" name="caliper" rev="1.0-SNAPSHOT" conf="test" />
+        <!-- Scala dependancies -->
+        <dependency org="org.scala-lang" name="scala-compiler" rev="2.8.1"/>
+        <dependency org="org.scala-lang" name="scala-library" rev="2.8.1"/>

-    <!-- Contracts for Java and dependencies -->
-    <dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-20110609" />
-    <dependency org="asm" name="asm-all" rev="3.3.1" />
+        <!-- testing and evaluation dependencies -->
+        <dependency org="org.testng" name="testng" rev="5.14.1" conf="test"/>
+        <dependency org="org.uncommons" name="reportng" rev="1.1.2" conf="test"/>
+        <dependency org="com.google.code.caliper" name="caliper" rev="1.0-SNAPSHOT" conf="test"/>

-    <!-- POI, for reading pipeline files -->
-    <dependency org="org.apache.poi" name="poi" rev="3.8-beta3" />
-    <dependency org="org.apache.poi" name="poi-ooxml" rev="3.8-beta3" />
+        <!-- Contracts for Java and dependencies -->
+        <dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-r139"/>
+        <dependency org="asm" name="asm-all" rev="3.3.1"/>

-    <!-- snpEff annotator for pipelines -->
-    <dependency org="net.sf.snpeff" name="snpeff" rev="2.0.5" />
+        <!-- POI, for reading pipeline files -->
+        <dependency org="org.apache.poi" name="poi" rev="3.8-beta3"/>
+        <dependency org="org.apache.poi" name="poi-ooxml" rev="3.8-beta3"/>

-    <!-- MongoDB for the GXDB project -->
-    <dependency org="org.mongodb" name="mongo-java-driver" rev="2.7.3"/>
+        <!-- snpEff annotator for pipelines -->
+        <dependency org="net.sf.snpeff" name="snpeff" rev="2.0.5"/>

-    <!-- Exclude dependencies on sun libraries where the downloads aren't available but included in the jvm. -->
-    <exclude org="javax.servlet" />
-    <exclude org="javax.jms" />
-    <exclude org="com.sun.*" />
-  </dependencies>
+        <!-- MongoDB for the GXDB project -->
+        <dependency org="org.mongodb" name="mongo-java-driver" rev="2.7.3"/>
+
+        <!-- GSON and HTTP for talking to the REST API on Vanilla Forums -->
+        <dependency org="com.google.code.gson" name="gson" rev="2.2.2"/>
+        <dependency org="org.apache.httpcomponents" name="httpclient" rev="4.1.1"/>
+
+        <!-- Exclude dependencies on sun libraries where the downloads aren't available but included in the jvm. -->
+        <exclude org="javax.servlet"/>
+        <exclude org="javax.jms"/>
+        <exclude org="com.sun.*"/>
+    </dependencies>
 </ivy-module>
--- a/licensing/GATK2_beta_license.doc
+++ b/licensing/GATK2_beta_license.doc
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java
@ -1,7 +1,4 @@
-package org.broadinstitute.sting.gatk.walkers.recalibration;
-
-import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
-import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+package org.broadinstitute.sting.gatk;

 /*
 * Copyright (c) 2009 The Broad Institute
@ -28,34 +25,10 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 * OTHER DEALINGS IN THE SOFTWARE.
 */

-/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Oct 30, 2009
- *
- * The Read Group covariate.
- */
+import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;

-public class ReadGroupCovariate implements RequiredCovariate {
+public class DummyProtectedClass implements ProtectedPackageSource {

-    // Initialize any member variables using the command-line arguments passed to the walkers
-    @Override
-    public void initialize(final RecalibrationArgumentCollection RAC) {
-    }
-
-    @Override
-    public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
-        final String readGroupId = read.getReadGroup().getReadGroupId();
-        for (int i = 0; i < read.getReadLength(); i++) {
-            comparable[i] = readGroupId;
-        }
-    }
-
-    // Used to get the covariate's value from input csv file in TableRecalibrationWalker
-    @Override
-    public final Comparable getValue(final String str) {
-        return str;
-    }
+    // THIS CLASS IS USED JUST SO THAT WE CAN TEST WHETHER WE ARE USING THE LITE OR FULL VERSION OF THE GATK
+    // **** DO NOT REMOVE! ****
 }
-
-
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java
@ -0,0 +1,103 @@
+package org.broadinstitute.sting.gatk.walkers.bqsr;
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
+import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
+import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
+import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.recalibration.EventType;
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.utils.recalibration.RecalDatum;
+import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
+
+public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {
+
+    // optimizations: don't reallocate an array each time
+    private byte[] tempQualArray;
+    private boolean[] tempErrorArray;
+
+    public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) {
+        super.initialize(covariates, recalibrationTables);
+        tempQualArray = new byte[EventType.values().length];
+        tempErrorArray = new boolean[EventType.values().length];
+    }
+
+    /**
+     * Loop through the list of requested covariates and pick out the value from the read, offset, and reference
+     * Using the list of covariate values as a key, pick out the RecalDatum and increment,
+     * adding one to the number of observations and potentially one to the number of mismatches for all three
+     * categories (mismatches, insertions and deletions).
+     *
+     * @param pileupElement The pileup element to update
+     * @param refBase       The reference base at this locus
+     */
+    public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
+        final int offset = pileupElement.getOffset();
+        final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead());
+
+        tempQualArray[EventType.BASE_SUBSTITUTION.index] = pileupElement.getQual();
+        tempErrorArray[EventType.BASE_SUBSTITUTION.index] = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase);
+        tempQualArray[EventType.BASE_INSERTION.index] = pileupElement.getBaseInsertionQual();
+        tempErrorArray[EventType.BASE_INSERTION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterInsertion() : pileupElement.isBeforeInsertion();
+        tempQualArray[EventType.BASE_DELETION.index] = pileupElement.getBaseDeletionQual();
+        tempErrorArray[EventType.BASE_DELETION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterDeletedBase() : pileupElement.isBeforeDeletedBase();
+
+        for (final EventType eventType : EventType.values()) {
+            final int[] keys = readCovariates.getKeySet(offset, eventType);
+            final int eventIndex = eventType.index;
+            final byte qual = tempQualArray[eventIndex];
+            final boolean isError = tempErrorArray[eventIndex];
+
+            final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
+            final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
+            final RecalDatum rgThisDatum = createDatumObject(qual, isError);
+            if (rgPreviousDatum == null)                                                                                // key doesn't exist yet in the map so make a new bucket and add it
+                rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
+            else
+                rgPreviousDatum.combine(rgThisDatum);
+
+            final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
+            final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
+            if (qualPreviousDatum == null)
+                qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
+            else
+                qualPreviousDatum.increment(isError);
+
+            for (int i = 2; i < covariates.length; i++) {
+                if (keys[i] < 0)
+                    continue;
+                final NestedIntegerArray<RecalDatum> covRecalTable = recalibrationTables.getTable(i);
+                final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex);
+                if (covPreviousDatum == null)
+                    covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex);
+                else
+                    covPreviousDatum.increment(isError);
+            }
+        }
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAMWalker.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAMWalker.java
@ -1,6 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.compression.reducereads;

 import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
@ -11,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.LocusWalker;
 import org.broadinstitute.sting.gatk.walkers.ReadFilters;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;

 import java.util.HashMap;
 import java.util.Map;
@ -39,8 +41,9 @@ import java.util.Map;
 * @since 10/30/11
 */

+@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
-public class CompareBAMWalker extends LocusWalker<Map<CompareBAMWalker.TestName, Boolean>, CompareBAMWalker.TestResults> {
+public class CompareBAM extends LocusWalker<Map<CompareBAM.TestName, Boolean>, CompareBAM.TestResults> {
    @Argument(required = true,  shortName = "rr",  fullName = "reduced_readgroup", doc = "The read group ID corresponding to the compressed BAM being tested") public String reducedReadGroupID;
    @Argument(required = false, shortName = "teq", fullName = "test_equal_bases",  doc = "Test if the bases marked as '=' are indeed ref bases.")              public boolean TEST_EQUAL_BASES = false;
    @Argument(required = false, shortName = "tbc", fullName = "test_base_counts",  doc = "Test if the base counts tag in consensus reads are accurate.")       public boolean TEST_BASE_COUNTS = false;
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java
@ -53,7 +53,7 @@ public class MultiSampleCompressor implements Compressor {
                                 final double minAltProportionToTriggerVariant,
                                 final double minIndelProportionToTriggerVariant,
                                 final int minBaseQual,
-                                 final ReduceReadsWalker.DownsampleStrategy downsampleStrategy) {
+                                 final ReduceReads.DownsampleStrategy downsampleStrategy) {
        for ( String name : SampleUtils.getSAMFileSamples(header) ) {
            compressorsPerSample.put(name,
                    new SingleSampleCompressor(name, contextSize, downsampleCoverage,
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java
@ -25,13 +25,11 @@

 package org.broadinstitute.sting.gatk.walkers.compression.reducereads;

-import net.sf.samtools.Cigar;
-import net.sf.samtools.CigarElement;
-import net.sf.samtools.CigarOperator;
 import net.sf.samtools.util.SequenceUtil;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.filters.*;
@ -46,6 +44,7 @@ import org.broadinstitute.sting.utils.GenomeLocComparator;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;

@ -81,9 +80,10 @@ import java.util.*;
 * </pre>
 */

+@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@PartitionBy(PartitionType.INTERVAL)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
-public class ReduceReadsWalker extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {
+public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {

    @Output
    protected StingSAMFileWriter out;
@ -180,7 +180,7 @@ public class ReduceReadsWalker extends ReadWalker<LinkedList<GATKSAMRecord>, Red
     * A value of 0 turns downsampling off.
     */
    @Argument(fullName = "downsample_coverage", shortName = "ds", doc = "", required = false)
-    protected int downsampleCoverage = 0;
+    protected int downsampleCoverage = 250;

    @Hidden
    @Argument(fullName = "", shortName = "dl", doc = "", required = false)
@ -251,7 +251,7 @@ public class ReduceReadsWalker extends ReadWalker<LinkedList<GATKSAMRecord>, Red
        LinkedList<GATKSAMRecord> mappedReads;
        totalReads++;
        if (!debugRead.isEmpty() && read.getReadName().contains(debugRead))
-            System.out.println("Found debug read!");
+                System.out.println("Found debug read!");

        if (debugLevel == 1)
            System.out.printf("\nOriginal: %s %s %d %d\n", read, read.getCigar(), read.getAlignmentStart(), read.getAlignmentEnd());
@ -260,7 +260,14 @@ public class ReduceReadsWalker extends ReadWalker<LinkedList<GATKSAMRecord>, Red
        // attribute hash so we can determine later if we need to write down the alignment shift to the reduced BAM file
        read.setTemporaryAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, read.getAlignmentStart());
        read.setTemporaryAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT, read.getAlignmentEnd());
-        
+
+        // Check if the read goes beyond the boundaries of the chromosome, and hard clip those boundaries.
+        int chromosomeLength = ref.getGenomeLocParser().getContigInfo(read.getReferenceName()).getSequenceLength();
+        if (read.getSoftStart() < 0)
+            read = ReadClipper.hardClipByReadCoordinates(read, 0, -read.getSoftStart() - 1);
+        if (read.getSoftEnd() > chromosomeLength)
+            read = ReadClipper.hardClipByReadCoordinates(read, chromosomeLength - read.getSoftStart() + 1, read.getReadLength() - 1);
+
        if (!DONT_SIMPLIFY_READS)
            read.simplify();                                                                                            // Clear all unnecessary attributes
        if (!DONT_CLIP_ADAPTOR_SEQUENCES)
@ -532,81 +539,12 @@ public class ReduceReadsWalker extends ReadWalker<LinkedList<GATKSAMRecord>, Red
        if (debugLevel == 1)
            System.out.println("BAM: " + read.getCigar() + " " + read.getAlignmentStart() + " " + read.getAlignmentEnd());

-//        if (!DONT_USE_SOFTCLIPPED_BASES)
-//            reSoftClipBases(read);
-
        if (!DONT_COMPRESS_READ_NAMES)
            compressReadName(read);

        out.addAlignment(read);
    }

-    private void reSoftClipBases(GATKSAMRecord read) {
-        Integer left = (Integer) read.getTemporaryAttribute("SL");
-        Integer right = (Integer) read.getTemporaryAttribute("SR");
-        if (left != null || right != null) {
-            Cigar newCigar = new Cigar();
-            for (CigarElement element : read.getCigar().getCigarElements()) {
-                newCigar.add(new CigarElement(element.getLength(), element.getOperator()));
-            }
-
-            if (left != null) {
-                newCigar = updateFirstSoftClipCigarElement(left, newCigar);
-                read.setAlignmentStart(read.getAlignmentStart() + left);
-            }
-
-            if (right != null) {
-                Cigar invertedCigar = invertCigar(newCigar);
-                newCigar = invertCigar(updateFirstSoftClipCigarElement(right, invertedCigar));
-            }
-            read.setCigar(newCigar);
-        }
-    }
-
-    /**
-     * Facility routine to revert the first element of a Cigar string (skipping hard clips) into a soft-clip.
-     * To be used on both ends if provided a flipped Cigar
-     *
-     * @param softClipSize  the length of the soft clipped element to add
-     * @param originalCigar the original Cigar string
-     * @return a new Cigar object with the soft clips added
-     */
-    private Cigar updateFirstSoftClipCigarElement (int softClipSize, Cigar originalCigar) {
-        Cigar result = new Cigar();
-        CigarElement leftElement = new CigarElement(softClipSize, CigarOperator.S);
-        boolean updated = false;
-        for (CigarElement element : originalCigar.getCigarElements()) {
-            if (!updated && element.getOperator() == CigarOperator.M) {
-                result.add(leftElement);
-                int newLength = element.getLength() - softClipSize;
-                if (newLength > 0)
-                    result.add(new CigarElement(newLength, CigarOperator.M));
-                updated = true;
-            }
-            else
-                result.add(element);
-        }
-        return result;
-    }
-
-    /**
-     * Given a cigar string, returns the inverted cigar string.
-     *
-     * @param cigar the original cigar
-     * @return the inverted cigar
-     */
-    private Cigar invertCigar(Cigar cigar) {
-        Stack<CigarElement> stack = new Stack<CigarElement>();
-        for (CigarElement e : cigar.getCigarElements())
-            stack.push(e);
-        Cigar inverted = new Cigar();
-        while (!stack.empty()) {
-            inverted.add(stack.pop());
-        }
-        return inverted;
-    }
-
-
    /**
     * Quality control procedure that checks if the consensus reads contains too many
     * mismatches with the reference. This should never happen and is a good trigger for
@ -663,7 +601,7 @@ public class ReduceReadsWalker extends ReadWalker<LinkedList<GATKSAMRecord>, Red
     * @return Returns true if the read is the original read that went through map().
     */
    private boolean isOriginalRead(LinkedList<GATKSAMRecord> list, GATKSAMRecord read) {
-        return isWholeGenome() || (list.getFirst().equals(read) && ReadUtils.getReadAndIntervalOverlapType(read, intervalList.first()) == ReadUtils.ReadAndIntervalOverlap.OVERLAP_CONTAINED);
+        return isWholeGenome() || list.getFirst().equals(read);
    }

    /**
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java
@ -26,7 +26,7 @@ public class SingleSampleCompressor implements Compressor {
    protected double minIndelProportionToTriggerVariant;
    protected int minBaseQual;

-    protected ReduceReadsWalker.DownsampleStrategy downsampleStrategy;
+    protected ReduceReads.DownsampleStrategy downsampleStrategy;

    public SingleSampleCompressor(final String sampleName,
                                  final int contextSize,
@ -35,7 +35,7 @@ public class SingleSampleCompressor implements Compressor {
                                  final double minAltProportionToTriggerVariant,
                                  final double minIndelProportionToTriggerVariant,
                                  final int minBaseQual,
-                                  final ReduceReadsWalker.DownsampleStrategy downsampleStrategy) {
+                                  final ReduceReads.DownsampleStrategy downsampleStrategy) {
        this.sampleName = sampleName;
        this.contextSize = contextSize;
        this.downsampleCoverage = downsampleCoverage;
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
@ -53,7 +53,7 @@ public class SlidingWindow {
    protected int MIN_BASE_QUAL_TO_COUNT;                                                                               // qual has to be greater than or equal to this value
    protected int MIN_MAPPING_QUALITY;

-    protected ReduceReadsWalker.DownsampleStrategy downsampleStrategy;
+    protected ReduceReads.DownsampleStrategy downsampleStrategy;
    private boolean hasIndelQualities;

    /**
@ -82,7 +82,7 @@ public class SlidingWindow {
    }


-    public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader header, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReadsWalker.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities) {
+    public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader header, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities) {
        this.stopLocation = -1;
        this.contextSize = contextSize;
        this.downsampleCoverage = downsampleCoverage;
@ -499,7 +499,7 @@ public class SlidingWindow {
        result.addAll(addToSyntheticReads(0, start));
        result.addAll(finalizeAndAdd(ConsensusType.BOTH));

-        for (GATKSAMRecord read : result) {
+        for (GATKSAMRecord read : allReads) {
            readsInWindow.remove(read);                                                                                 // todo -- not optimal, but needs to be done so the next region doesn't try to remove the same reads from the header counts.
        }

@ -536,6 +536,10 @@ public class SlidingWindow {
     * @return a list of reads selected by the downsampler to cover the window to at least the desired coverage
     */
    protected List<GATKSAMRecord> downsampleVariantRegion(final List<GATKSAMRecord> allReads) {
+        int nReads = allReads.size();
+        if (nReads == 0)
+            return allReads;
+
        double fraction = 100 / allReads.size();
        if (fraction >= 1)
            return allReads;
@ -545,6 +549,7 @@ public class SlidingWindow {
        return downsampler.consumeDownsampledItems();
    }

+
    /**
     * Properly closes a Sliding Window, finalizing all consensus and variant
     * regions that still exist regardless of being able to fulfill the
@ -627,7 +632,7 @@ public class SlidingWindow {
        int locationIndex = startLocation < 0 ? 0 : readStart - startLocation;

        if (removeRead && locationIndex < 0)
-            throw new ReviewedStingException("read is behind the Sliding Window. read: " + read + " cigar: " + read.getCigarString() + " window: " + startLocation + "," + stopLocation);
+            throw new ReviewedStingException("read is behind the Sliding Window. read: " + read + " start " + read.getUnclippedStart() + "," + read.getUnclippedEnd() + " cigar: " + read.getCigarString() + " window: " + startLocation + "," + stopLocation);

        if (!removeRead) {                                                                                              // we only need to create new header elements if we are adding the read, not when we're removing it
            if (locationIndex < 0) {                                                                                    // Do we need to add extra elements before the start of the header? -- this may happen if the previous read was clipped and this alignment starts before the beginning of the window
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java
@ -5,7 +5,7 @@ import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMFileHeader;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
+import org.broadinstitute.sting.utils.recalibration.EventType;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
@ -102,7 +102,7 @@ public class SyntheticRead {
     * @param base   the base to add
     * @param count  number of reads with this base
     */
-    @Requires("count < Byte.MAX_VALUE")
+    @Requires("count <= Byte.MAX_VALUE")
    public void add(BaseIndex base, byte count, byte qual, byte insQual, byte delQual, double mappingQuality) {
        counts.add(count);
        bases.add(base);
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java
@ -0,0 +1,295 @@
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import com.google.java.contract.Requires;
+import org.apache.commons.lang.ArrayUtils;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
+import org.broadinstitute.sting.utils.Haplotype;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: carneiro
+ * Date: 7/21/11
+ * Time: 2:21 PM
+ *
+ * This is a site based implementation of an Error Model. The error model is a probability
+ * distribution for the site given the phred scaled quality.
+ */
+public class ErrorModel  {
+    private byte maxQualityScore;
+    private byte minQualityScore;
+    private byte phredScaledPrior;
+    private double log10minPower;
+    private int refDepth;
+    private boolean hasData = false;
+    private ProbabilityVector probabilityVector;
+    private static final boolean compressRange = false;
+    
+    private static final double log10MinusE = Math.log10(Math.exp(1.0));
+    private static final boolean DEBUG = false;
+    /**
+     * Calculates the probability of the data (reference sample reads) given the phred scaled site quality score.
+     * 
+     * @param UAC                           Argument Collection
+     * @param refSamplePileup            Reference sample pileup
+     * @param refSampleVC                VC with True alleles in reference sample pileup
+     */
+    public ErrorModel (final UnifiedArgumentCollection UAC,
+                       final ReadBackedPileup refSamplePileup,
+                       VariantContext refSampleVC, final ReferenceContext refContext) {
+        this.maxQualityScore = UAC.maxQualityScore;
+        this.minQualityScore = UAC.minQualityScore;
+        this.phredScaledPrior = UAC.phredScaledPrior;
+        log10minPower = Math.log10(UAC.minPower);
+
+        PairHMMIndelErrorModel pairModel = null;
+        LinkedHashMap<Allele, Haplotype> haplotypeMap = null;
+        HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap = null;
+        double[][] perReadLikelihoods = null;
+
+        double[] model = new double[maxQualityScore+1];
+        Arrays.fill(model,Double.NEGATIVE_INFINITY);
+
+        boolean hasCalledAlleles = false;
+        if (refSampleVC != null) {
+
+            for (Allele allele : refSampleVC.getAlleles()) {
+                if (allele.isCalled()) {
+                    hasCalledAlleles = true;
+                    break;
+                }
+            }
+            haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
+            if (refSampleVC.isIndel()) {
+                pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
+                        UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
+                indelLikelihoodMap = new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
+                IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements
+            }
+        }
+
+        double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore));
+        if (refSamplePileup == null || refSampleVC == null  || !hasCalledAlleles) {
+            for (byte q=minQualityScore; q<=maxQualityScore; q++) {
+                // maximum uncertainty if there's no ref data at site
+                model[q] = p;
+            }
+            this.refDepth = 0;
+        }
+        else {
+            hasData = true;
+            int matches = 0;
+            int coverage = 0;
+
+            Allele refAllele = refSampleVC.getReference();
+
+            if (refSampleVC.isIndel()) {
+                final int readCounts[] = new int[refSamplePileup.getNumberOfElements()];
+                //perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()];
+                final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles());
+                if (!haplotypeMap.isEmpty())
+                    perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
+            }
+            int idx = 0;
+            for (PileupElement refPileupElement : refSamplePileup) {
+                if (DEBUG)
+                    System.out.println(refPileupElement.toString());
+                boolean isMatch = false;
+                for (Allele allele : refSampleVC.getAlleles()) {
+                    boolean m = pileupElementMatches(refPileupElement, allele, refAllele, refContext.getBase());
+                    if (DEBUG) System.out.println(m);
+                    isMatch |= m;
+                }
+                if (refSampleVC.isIndel() && !haplotypeMap.isEmpty()) {
+                    // ignore match/mismatch if reads, as determined by their likelihood, are not informative
+                    double[] perAlleleLikelihoods = perReadLikelihoods[idx++];
+                    if (!isInformativeElement(perAlleleLikelihoods))
+                        matches++;
+                    else
+                        matches += (isMatch?1:0);
+
+                }   else {
+                    matches += (isMatch?1:0);
+                }
+                coverage++;
+            }
+
+            int mismatches = coverage - matches;
+            //System.out.format("Cov:%d match:%d mismatch:%d\n",coverage, matches, mismatches);
+            for (byte q=minQualityScore; q<=maxQualityScore; q++) {
+                if (coverage==0)
+                    model[q] = p;
+                else
+                    model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage,  mismatches);
+            }
+            this.refDepth = coverage;
+        }
+        
+        // compress probability vector
+        this.probabilityVector = new ProbabilityVector(model, compressRange);
+    }
+
+
+    @Requires("likelihoods.length>0")
+    private boolean isInformativeElement(double[] likelihoods) {
+        // if likelihoods are the same, they're not informative
+        final double thresh = 0.1;
+        int maxIdx = MathUtils.maxElementIndex(likelihoods);
+        int minIdx = MathUtils.minElementIndex(likelihoods);
+        if (likelihoods[maxIdx]-likelihoods[minIdx]< thresh)
+            return false;
+        else
+            return true;
+    }
+    /**
+     * Simple constructor that just takes a given log-probability vector as error model.
+     * Only intended for unit testing, not general usage.
+     * @param pvector       Given vector of log-probabilities
+     *
+     */
+    public ErrorModel(double[] pvector) {
+        this.maxQualityScore = (byte)(pvector.length-1);
+        this.minQualityScore = 0;
+        this.probabilityVector = new ProbabilityVector(pvector, compressRange);
+        this.hasData = true;
+
+    }
+
+    public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele, byte refBase) {
+        if (DEBUG)
+            System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n",
+                pileupElement.getBase(), pileupElement.isBeforeDeletionStart(),
+                pileupElement.isBeforeInsertion(),pileupElement.getEventBases(),pileupElement.getEventLength(), allele.toString(), refAllele.toString());
+
+        //pileupElement.
+        // if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch
+        if (allele.isReference()) {
+            // for a ref allele, any base mismatch or new indel is a mismatch.
+            if(allele.getBases().length>0)
+                // todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location
+                // could clean up/simplify this when unpadding is removed
+                return (pileupElement.getBase() == refBase && !pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
+            else
+                // either null allele to compare, or ref/alt lengths are different (indel by definition).
+                // if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch
+                return (!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
+        }
+
+        // for non-ref alleles to compare:
+        if (refAllele.getBases().length == allele.getBases().length)
+            // alleles have the same length (eg snp or mnp)
+            return pileupElement.getBase() == allele.getBases()[0];
+
+        // for non-ref alleles,
+        byte[] alleleBases = allele.getBases();
+        int eventLength = alleleBases.length - refAllele.getBases().length;
+        if (eventLength < 0 && pileupElement.isBeforeDeletionStart() && pileupElement.getEventLength() == -eventLength)
+            return true;
+
+        if (eventLength > 0 && pileupElement.isBeforeInsertion() &&
+                Arrays.equals(pileupElement.getEventBases().getBytes(),alleleBases))
+            return true;
+
+        return false;
+    }
+
+
+    /**
+     * What's the log-likelihood that a site's quality is equal to q? If we see N observations and n mismatches,
+     * and assuming each match is independent of each other and that the match probability is just dependent of
+     * the site quality, so p = 10.^-q/10.
+     * Since we'll normally have relatively high Q sites and deep coverage in reference samples (ie p small, N high),
+     * to avoid underflows we'll use the Poisson approximation with lambda = N*p.
+     * Hence, the log-likelihood of q i.e. Pr(Nmismatches = n | SiteQ = q) ~ Poisson(n | lambda = p*N) with p as above.
+     * @param q                     Desired q to get likelihood from
+     * @param coverage              Total coverage
+     * @param mismatches            Number of mismatches
+     * @return                      Likelihood of observations as a function of q
+     */
+    @Requires({
+            "q >= minQualityScore",
+            "q <= maxQualityScore",
+            "coverage >= 0",
+            "mismatches >= 0",
+            "mismatches <= coverage"
+    })
+    private double log10PoissonProbabilitySiteGivenQual(byte q, int coverage, int mismatches) {
+        // same as   log10ProbabilitySiteGivenQual but with Poisson approximation to avoid numerical underflows
+        double lambda = MathUtils.phredScaleToProbability(q) * (double )coverage;
+        // log10(e^-lambda*lambda^k/k!) = -lambda + k*log10(lambda) - log10factorial(k)
+        return Math.log10(lambda)*mismatches - lambda*log10MinusE- MathUtils.log10Factorial(mismatches);
+    }
+
+    @Requires({"qual-minQualityScore <= maxQualityScore"})
+    public double getSiteLogErrorProbabilityGivenQual (int qual) {
+        return probabilityVector.getLogProbabilityForIndex(qual);
+    }
+
+    public byte getMaxQualityScore() {
+        return maxQualityScore;
+    }
+
+    public byte getMinQualityScore() {
+        return minQualityScore;
+    }
+
+    public int getMinSignificantQualityScore() {
+        return new ProbabilityVector(probabilityVector,true).getMinVal();
+    }
+
+    public int getMaxSignificantQualityScore() {
+        return new ProbabilityVector(probabilityVector,true).getMaxVal();
+    }
+
+    public int getReferenceDepth() {
+        return refDepth;
+    }
+    public boolean hasData() {
+        return hasData;
+    }
+
+    public ProbabilityVector getErrorModelVector() {
+        return probabilityVector;
+    }
+
+    public String toString() {
+        String result = "(";
+        boolean skipComma = true;
+        for (double v : probabilityVector.getProbabilityVector()) {
+            if (skipComma) {
+                skipComma = false;
+            }
+            else {
+                result += ",";
+            }
+            result += String.format("%.4f", v);
+        }
+        return result + ")";
+    }
+    
+    public static int getTotalReferenceDepth(HashMap<String, ErrorModel>  perLaneErrorModels) {
+        int n=0;
+        for (ErrorModel e : perLaneErrorModels.values()) {
+            n += e.getReferenceDepth();
+        }
+        return n;
+    }
+    
+   /* 
+@Requires({"maxAlleleCount >= 0"})
+//todo -- memoize this function
+    public boolean hasPowerForMaxAC (int maxAlleleCount) {
+        int siteQ = (int) Math.ceil(MathUtils.probabilityToPhredScale((double) 1/maxAlleleCount));
+        double log10CumSum = getCumulativeSum(siteQ);
+        return log10CumSum < log10minPower;
+    }  */
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java
@ -0,0 +1,706 @@
+/*
+ * Copyright (c) 2010.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.variantcontext.*;
+
+import java.io.PrintStream;
+import java.util.*;
+
+public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalculationModel {
+    static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
+    final protected UnifiedArgumentCollection UAC;
+
+    private final int ploidy;
+    private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
+    private final static boolean VERBOSE = false;
+
+    protected GeneralPloidyExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
+        super(UAC, N, logger, verboseWriter);
+        ploidy = UAC.samplePloidy;
+        this.UAC = UAC;
+
+    }
+
+    public List<Allele> getLog10PNonRef(final VariantContext vc,
+                                        final double[] log10AlleleFrequencyPriors,
+                                        final AlleleFrequencyCalculationResult result) {
+
+        GenotypesContext GLs = vc.getGenotypes();
+        List<Allele> alleles = vc.getAlleles();
+
+        // don't try to genotype too many alternate alleles
+        if ( vc.getAlternateAlleles().size() > MAX_ALTERNATE_ALLELES_TO_GENOTYPE ) {
+            logger.warn("this tool is currently set to genotype at most " + MAX_ALTERNATE_ALLELES_TO_GENOTYPE + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
+
+            alleles = new ArrayList<Allele>(MAX_ALTERNATE_ALLELES_TO_GENOTYPE + 1);
+            alleles.add(vc.getReference());
+            alleles.addAll(chooseMostLikelyAlternateAlleles(vc, MAX_ALTERNATE_ALLELES_TO_GENOTYPE, ploidy));
+
+
+            GLs = subsetAlleles(vc, alleles, false, ploidy);
+        }
+
+        combineSinglePools(GLs, alleles.size(), ploidy, log10AlleleFrequencyPriors, result);
+
+        return alleles;
+    }
+
+
+    /**
+     * Simple wrapper class to hold values of combined pool likelihoods.
+     * For fast hashing and fast retrieval, there's a hash map that shadows main list.
+     *
+     */
+    static class CombinedPoolLikelihoods {
+        private LinkedList<ExactACset> alleleCountSetList;
+        private HashMap<ExactACcounts,ExactACset> conformationMap;
+        private double maxLikelihood;
+
+
+        public CombinedPoolLikelihoods() {
+            // final int numElements = GenotypeLikelihoods.numLikelihoods();
+            alleleCountSetList = new LinkedList<ExactACset>();
+            conformationMap = new HashMap<ExactACcounts,ExactACset>();
+            maxLikelihood = Double.NEGATIVE_INFINITY;
+        }
+
+        public void add(ExactACset set) {
+            alleleCountSetList.add(set);
+            conformationMap.put(set.ACcounts, set);
+            final double likelihood = set.log10Likelihoods[0];
+
+            if (likelihood > maxLikelihood )
+                maxLikelihood = likelihood;
+
+        }
+
+        public boolean hasConformation(int[] ac) {
+            return conformationMap.containsKey(new ExactACcounts(ac));
+
+        }
+
+        public double getLikelihoodOfConformation(int[] ac) {
+            return conformationMap.get(new ExactACcounts(ac)).log10Likelihoods[0];
+        }
+
+        public double getGLOfACZero() {
+            return alleleCountSetList.get(0).log10Likelihoods[0]; // AC 0 is always at beginning of list
+        }
+
+        public int getLength() {
+            return alleleCountSetList.size();
+        }
+     }
+
+    /**
+     *
+     * Chooses N most likely alleles in a set of pools (samples) based on GL sum over alt alleles
+     * @param vc                          Input variant context
+     * @param numAllelesToChoose          Number of alleles to choose
+     * @param ploidy                      Ploidy per pool
+     * @return                            list of numAllelesToChoose most likely alleles
+     */
+
+    private static List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose, int ploidy) {
+        final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
+        final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
+        for ( int i = 0; i < numOriginalAltAlleles; i++ )
+            likelihoodSums[i] = new LikelihoodSum(vc.getAlternateAllele(i));
+
+        // based on the GLs, find the alternate alleles with the most probability; sum the GLs for the most likely genotype
+        final ArrayList<double[]> GLs = getGLs(vc.getGenotypes());
+        for ( final double[] likelihoods : GLs ) {
+
+            final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
+            final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL);
+            // by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele
+            for (int k=1; k < acCount.length;k++) {
+                if (acCount[k] > 0)
+                    likelihoodSums[k-1].sum += likelihoods[PLindexOfBestGL];
+
+            }
+        }
+
+        // sort them by probability mass and choose the best ones
+        Collections.sort(Arrays.asList(likelihoodSums));
+        final ArrayList<Allele> bestAlleles = new ArrayList<Allele>(numAllelesToChoose);
+        for ( int i = 0; i < numAllelesToChoose; i++ )
+            bestAlleles.add(likelihoodSums[i].allele);
+
+        final ArrayList<Allele> orderedBestAlleles = new ArrayList<Allele>(numAllelesToChoose);
+        for ( Allele allele : vc.getAlternateAlleles() ) {
+            if ( bestAlleles.contains(allele) )
+                orderedBestAlleles.add(allele);
+        }
+
+        return orderedBestAlleles;
+    }
+
+
+    /**
+     * Simple non-optimized version that combines GLs from several pools and produces global AF distribution.
+     * @param GLs                              Inputs genotypes context with per-pool GLs
+     * @param numAlleles                       Number of alternate alleles
+     * @param ploidyPerPool                    Number of samples per pool
+     * @param log10AlleleFrequencyPriors       Frequency priors
+     * @param result                           object to fill with output values
+     */
+    protected static void combineSinglePools(final GenotypesContext GLs,
+                                             final int numAlleles,
+                                             final int ploidyPerPool,
+                                             final double[] log10AlleleFrequencyPriors,
+                                             final AlleleFrequencyCalculationResult result) {
+
+        final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
+
+
+        int combinedPloidy = 0;
+
+        // Combine each pool incrementally - likelihoods will be renormalized at each step
+         CombinedPoolLikelihoods combinedPoolLikelihoods = new CombinedPoolLikelihoods();
+
+        // first element: zero ploidy, e.g. trivial degenerate distribution
+        final int[] zeroCounts = new int[numAlleles];
+        final ExactACset set = new ExactACset(1, new ExactACcounts(zeroCounts));
+        set.log10Likelihoods[0] = 0.0;
+
+        combinedPoolLikelihoods.add(set);
+        for (int p=1; p<genotypeLikelihoods.size(); p++) {
+            result.reset();
+            combinedPoolLikelihoods = fastCombineMultiallelicPool(combinedPoolLikelihoods, genotypeLikelihoods.get(p), combinedPloidy, ploidyPerPool,
+                    numAlleles, log10AlleleFrequencyPriors, result);
+            combinedPloidy = ploidyPerPool + combinedPloidy; // total number of chromosomes in combinedLikelihoods
+        }
+    }
+
+    public static CombinedPoolLikelihoods fastCombineMultiallelicPool(final CombinedPoolLikelihoods originalPool, double[] newGL, int originalPloidy, int newGLPloidy, int numAlleles,
+                                                                      final double[] log10AlleleFrequencyPriors,
+                                                                      final AlleleFrequencyCalculationResult result) {
+
+
+
+        final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>();
+        // mapping of ExactACset indexes to the objects
+        final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>();
+        final CombinedPoolLikelihoods newPool = new CombinedPoolLikelihoods();
+
+        // add AC=0 to the queue
+        final int[] zeroCounts = new int[numAlleles];
+        final int newPloidy = originalPloidy + newGLPloidy;
+        zeroCounts[0] = newPloidy;
+
+        ExactACset zeroSet = new ExactACset(1, new ExactACcounts(zeroCounts));
+
+        ACqueue.add(zeroSet);
+        indexesToACset.put(zeroSet.ACcounts, zeroSet);
+
+        // keep processing while we have AC conformations that need to be calculated
+        double maxLog10L = Double.NEGATIVE_INFINITY;
+        while ( !ACqueue.isEmpty() ) {
+            // compute log10Likelihoods
+            final ExactACset ACset = ACqueue.remove();
+            final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLog10L, ACqueue, indexesToACset);
+            maxLog10L = Math.max(maxLog10L, log10LofKs);
+            // clean up memory
+            indexesToACset.remove(ACset.ACcounts);
+            if ( VERBOSE )
+                System.out.printf(" *** removing used set=%s%n", ACset.ACcounts);
+
+        }
+        return newPool;
+    }
+
+    // todo - refactor, function almost identical except for log10LofK computation in GeneralPloidyGenotypeLikelihoods
+    /**
+     *
+     * @param set                       ExactACset holding conformation to be computed
+     * @param newPool                   New pool likelihood holder
+     * @param originalPool              Original likelihood holder
+     * @param newGL                     New pool GL vector to combine
+     * @param log10AlleleFrequencyPriors Prior object
+     * @param originalPloidy             Total ploidy of original combined pool
+     * @param newGLPloidy                Ploidy of GL vector
+     * @param result                     AFResult object
+     * @param maxLog10L                  max likelihood observed so far
+     * @param ACqueue                    Queue of conformations to compute
+     * @param indexesToACset             AC indices of objects in queue
+     * @return                           max log likelihood
+     */
+    private static double calculateACConformationAndUpdateQueue(final ExactACset set,
+                                                                final CombinedPoolLikelihoods newPool,
+                                                                final CombinedPoolLikelihoods originalPool,
+                                                                final double[] newGL,
+                                                                final double[] log10AlleleFrequencyPriors,
+                                                                final int originalPloidy,
+                                                                final int newGLPloidy,
+                                                                final AlleleFrequencyCalculationResult result,
+                                                                final double  maxLog10L,
+                                                                final LinkedList<ExactACset> ACqueue,
+                                                                final HashMap<ExactACcounts, ExactACset> indexesToACset) {
+
+        // compute likeihood in "set" of new set based on original likelihoods
+        final int numAlleles = set.ACcounts.counts.length;
+        final int newPloidy = set.getACsum();
+        final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy, result);
+
+
+        // add to new pool
+        if (!Double.isInfinite(log10LofK))
+            newPool.add(set);
+
+        if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
+            if ( VERBOSE )
+                System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
+            return log10LofK;
+        }
+
+        // iterate over higher frequencies if possible
+        // by convention, ACcounts contained in set have full vector of possible pool ac counts including ref count.
+        // so, if first element is zero, it automatically means we have no wiggle since we're in a corner of the conformation space
+        final int ACwiggle = set.ACcounts.counts[0];
+        if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
+            return log10LofK;
+
+
+        // add conformations for other cases
+        for ( int allele = 1; allele < numAlleles; allele++ ) {
+            final int[] ACcountsClone = set.ACcounts.getCounts().clone();
+            ACcountsClone[allele]++;
+            // is this a valid conformation?
+            int altSum = (int)MathUtils.sum(ACcountsClone) - ACcountsClone[0];
+            ACcountsClone[0] = newPloidy - altSum;
+            if (ACcountsClone[0] < 0)
+                continue;
+
+
+            GeneralPloidyGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
+        }
+
+
+        return log10LofK;
+    }
+
+
+    /**
+     * Naive combiner of two multiallelic pools - number of alt alleles must be the same.
+     * Math is generalization of biallelic combiner.
+     *
+     * For vector K representing an allele count conformation,
+     * Pr(D | AC = K) = Sum_G Pr(D|AC1 = G) Pr (D|AC2=K-G) * F(G,K)
+     * where F(G,K) = choose(m1,[g0 g1 ...])*choose(m2,[...]) / choose(m1+m2,[k1 k2 ...])
+     * @param originalPool                    First log-likelihood pool GL vector
+     * @param yy                    Second pool GL vector
+     * @param ploidy1               Ploidy of first pool (# of chromosomes in it)
+     * @param ploidy2               Ploidy of second pool
+     * @param numAlleles            Number of alleles
+     * @param log10AlleleFrequencyPriors Array of biallelic priors
+     * @param result                Af calculation result object                  
+     */
+    public static void combineMultiallelicPoolNaively(CombinedPoolLikelihoods originalPool, double[] yy, int ploidy1, int ploidy2, int numAlleles,
+                                                      final double[] log10AlleleFrequencyPriors,
+                                                      final AlleleFrequencyCalculationResult result) {
+/*
+        final int dim1 = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy1);
+        final int dim2 = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy2);
+
+        if (dim1 != originalPool.getLength() || dim2 != yy.length)
+            throw new ReviewedStingException("BUG: Inconsistent vector length");
+
+        if (ploidy2 == 0)
+            return;
+
+        final int newPloidy = ploidy1 + ploidy2;
+
+        // Say L1(K) = Pr(D|AC1=K) * choose(m1,K)
+        // and L2(K) = Pr(D|AC2=K) * choose(m2,K)
+        GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
+        final double[] x = originalPool.getLikelihoodsAsVector(true);
+        while(firstIterator.hasNext()) {
+            x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector());
+            firstIterator.next();
+        }
+
+        GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
+        final double[] y = yy.clone();
+        while(secondIterator.hasNext()) {
+            y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector());
+            secondIterator.next();
+        }
+
+        // initialize output to -log10(choose(m1+m2,[k1 k2...])
+        final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy);
+        final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
+
+
+        // Now, result(K) =  logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K
+        while(outputIterator.hasNext()) {
+            final ExactACset set = new ExactACset(1, new ExactACcounts(outputIterator.getCurrentAltVector()));
+            double likelihood = computeLofK(set, x,y, log10AlleleFrequencyPriors, numAlleles, ploidy1, ploidy2, result);
+
+            originalPool.add(likelihood, set, outputIterator.getLinearIndex());
+            outputIterator.next();
+        }
+*/
+    }
+
+    /**
+     * Compute likelihood of a particular AC conformation and update AFresult object
+     * @param set                     Set of AC counts to compute
+     * @param firstGLs                  Original pool likelihoods before combining
+     * @param secondGL                  New GL vector with additional pool
+     * @param log10AlleleFrequencyPriors     Allele frequency priors
+     * @param numAlleles                Number of alleles (including ref)
+     * @param ploidy1                   Ploidy of original pool (combined)
+     * @param ploidy2                   Ploidy of new pool
+     * @param result                    AFResult object
+     * @return                          log-likehood of requested conformation
+     */
+    private static double computeLofK(final ExactACset set,
+                                      final CombinedPoolLikelihoods firstGLs,
+                                      final double[] secondGL,
+                                      final double[] log10AlleleFrequencyPriors,
+                                      final int numAlleles, final int ploidy1, final int ploidy2,
+                                      final AlleleFrequencyCalculationResult result) {
+
+        final int newPloidy = ploidy1 + ploidy2;
+
+        // sanity check
+        int totalAltK = set.getACsum();
+        if (newPloidy != totalAltK)
+            throw new ReviewedStingException("BUG: inconsistent sizes of set.getACsum and passed ploidy values");
+
+        totalAltK -= set.ACcounts.counts[0];
+        // totalAltK has sum of alt alleles of conformation now
+
+
+        // special case for k = 0 over all k
+        if ( totalAltK == 0 ) {   // all-ref case
+            final double log10Lof0 = firstGLs.getGLOfACZero() + secondGL[HOM_REF_INDEX];
+            set.log10Likelihoods[0] = log10Lof0;
+
+            result.setLog10LikelihoodOfAFzero(log10Lof0);
+            result.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
+
+        }   else {
+
+            // initialize result with denominator
+            // ExactACset holds by convention the conformation of all alleles, and the sum of all allele count is just the ploidy.
+            // To compute n!/k1!k2!k3!... we need to compute first n!/(k2!k3!...) and then further divide by k1! where k1=ploidy-sum_k_i
+
+            int[] currentCount = set.ACcounts.getCounts();
+            double denom =  -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount);
+
+            // for current conformation, get all possible ways to break vector K into two components G1 and G2
+            final GeneralPloidyGenotypeLikelihoods.SumIterator innerIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
+            set.log10Likelihoods[0] = Double.NEGATIVE_INFINITY;
+            while (innerIterator.hasNext()) {
+                // check if breaking current conformation into g1 and g2 is feasible.
+                final int[] acCount2 = innerIterator.getCurrentVector();
+                final int[] acCount1 = MathUtils.vectorDiff(currentCount, acCount2);
+                final int idx2 = innerIterator.getLinearIndex();
+                // see if conformation is valid and if original pool had this conformation
+                // for conformation to be valid, all elements of g2 have to be <= elements of current AC set
+                if (isValidConformation(acCount1,ploidy1) && firstGLs.hasConformation(acCount1)) {
+                    final double gl2 = secondGL[idx2];
+                    if (!Double.isInfinite(gl2)) {
+                        final double firstGL = firstGLs.getLikelihoodOfConformation(acCount1);
+                        final double num1 = MathUtils.log10MultinomialCoefficient(ploidy1, acCount1);
+                        final double num2 = MathUtils.log10MultinomialCoefficient(ploidy2, acCount2);
+                        final double sum = firstGL + gl2 + num1 + num2;
+
+                        set.log10Likelihoods[0] = MathUtils.approximateLog10SumLog10(set.log10Likelihoods[0], sum);
+                    }
+                }
+                innerIterator.next();
+            }
+
+            set.log10Likelihoods[0] += denom;
+        }
+
+        double log10LofK = set.log10Likelihoods[0];
+
+        // update the MLE if necessary
+        final int altCounts[] = Arrays.copyOfRange(set.ACcounts.counts,1, set.ACcounts.counts.length);
+        result.updateMLEifNeeded(log10LofK, altCounts);
+
+        // apply the priors over each alternate allele
+        for (final int ACcount : altCounts ) {
+            if ( ACcount > 0 )
+                log10LofK += log10AlleleFrequencyPriors[ACcount];
+        }
+        result.updateMAPifNeeded(log10LofK, altCounts);
+
+        return log10LofK;
+    }
+
+    /**
+     * Small helper routine - is a particular AC conformationv vector valid? ie are all elements non-negative and sum to ploidy?
+     * @param set                            AC conformation vector
+     * @param ploidy                         Ploidy of set
+     * @return                               Valid conformation
+     */
+    private static boolean isValidConformation(final int[] set, final int ploidy) {
+        int sum=0;
+        for (final int ac: set) {
+            if (ac < 0)
+                return false;
+            sum += ac;
+
+        }
+
+        return (sum == ploidy);
+    }
+
+    /**
+     * Combines naively two biallelic pools (of arbitrary size).
+     * For two pools of size m1 and m2, we can compute the combined likelihood as:
+     *   Pr(D|AC=k) = Sum_{j=0}^k Pr(D|AC1=j) Pr(D|AC2=k-j) * choose(m1,j)*choose(m2,k-j)/choose(m1+m2,k)
+     * @param originalPool              Pool likelihood vector, x[k] = Pr(AC_i = k) for alt allele i
+     * @param newPLVector               Second GL vector
+     * @param ploidy1               Ploidy of first pool (# of chromosomes in it)
+     * @param ploidy2               Ploidy of second pool
+     * @param log10AlleleFrequencyPriors Array of biallelic priors
+     * @param result                Af calculation result object
+     * @return                Combined likelihood vector
+     */
+    public static ProbabilityVector combineBiallelicPoolsNaively(final ProbabilityVector originalPool, final double[] newPLVector,
+                                                                 final int ploidy1, final int ploidy2, final double[] log10AlleleFrequencyPriors,
+                                                                 final AlleleFrequencyCalculationResult result) {
+
+        final int newPloidy = ploidy1 + ploidy2;
+
+        final double[] combinedLikelihoods = new double[1+newPloidy];
+
+        /** Pre-fill result array and incorporate weights into input vectors
+         * Say L1(k) = Pr(D|AC1=k) * choose(m1,k)
+         * and L2(k) = Pr(D|AC2=k) * choose(m2,k)
+         * equation reduces to
+         * Pr(D|AC=k) = 1/choose(m1+m2,k) * Sum_{j=0}^k L1(k) L2(k-j)
+         * which is just plain convolution of L1 and L2 (with pre-existing vector)
+         */
+
+        // intialize result vector to -infinity
+        Arrays.fill(combinedLikelihoods,Double.NEGATIVE_INFINITY);
+
+        final double[] x = Arrays.copyOf(originalPool.getProbabilityVector(),1+ploidy1);
+        for (int k=originalPool.getProbabilityVector().length; k< x.length; k++)
+            x[k] = Double.NEGATIVE_INFINITY;
+
+        final double[] y = newPLVector.clone();
+
+
+        final double log10Lof0 = x[0]+y[0];
+        result.setLog10LikelihoodOfAFzero(log10Lof0);
+        result.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
+
+        double maxElement = log10Lof0;
+        int maxElementIdx = 0;
+        int[] alleleCounts = new int[1];
+        for (int k= originalPool.getMinVal() ; k <= newPloidy; k++) {
+            double[] acc = new double[k+1];
+            Arrays.fill(acc,Double.NEGATIVE_INFINITY);
+            double innerMax = Double.NEGATIVE_INFINITY;
+
+            for (int j=0; j <=k; j++) {
+                double x1,y1;
+
+
+                if (k-j>=0 && k-j < y.length)
+                    y1 = y[k-j] + MathUtils.log10BinomialCoefficient(ploidy2,k-j);
+                else
+                    continue;
+
+                if (j < x.length)
+                    x1 = x[j] + MathUtils.log10BinomialCoefficient(ploidy1,j);
+                else
+                    continue;
+
+                if (Double.isInfinite(x1) || Double.isInfinite(y1))
+                    continue;
+                acc[j] = x1 + y1;
+                if (acc[j] > innerMax)
+                    innerMax = acc[j];
+                else if (acc[j] < innerMax - MAX_LOG10_ERROR_TO_STOP_EARLY)
+                    break;
+            }
+            combinedLikelihoods[k] = MathUtils.log10sumLog10(acc) - MathUtils.log10BinomialCoefficient(newPloidy,k);
+            maxElementIdx = k;
+            double maxDiff = combinedLikelihoods[k] - maxElement;
+            if (maxDiff > 0)
+                maxElement = combinedLikelihoods[k];
+            else if (maxDiff < maxElement - MAX_LOG10_ERROR_TO_STOP_EARLY) {
+                break;
+            }
+
+            alleleCounts[0] = k;
+            result.updateMLEifNeeded(combinedLikelihoods[k],alleleCounts);
+            result.updateMAPifNeeded(combinedLikelihoods[k] + log10AlleleFrequencyPriors[k],alleleCounts);
+
+
+        }
+
+
+        return new ProbabilityVector(MathUtils.normalizeFromLog10(Arrays.copyOf(combinedLikelihoods,maxElementIdx+1),false, true));
+    }
+
+
+    /**
+     * From a given variant context, extract a given subset of alleles, and update genotype context accordingly,
+     * including updating the PL's, and assign genotypes accordingly
+     * @param vc                                variant context with alleles and genotype likelihoods
+     * @param allelesToUse                      alleles to subset
+     * @param assignGenotypes                   true: assign hard genotypes, false: leave as no-call
+     * @param ploidy                            number of chromosomes per sample (pool)
+     * @return                                  GenotypesContext with new PLs
+     */
+    public GenotypesContext subsetAlleles(final VariantContext vc,
+                                          final List<Allele> allelesToUse,
+                                          final boolean assignGenotypes,
+                                          final int ploidy) {
+        // the genotypes with PLs
+        final GenotypesContext oldGTs = vc.getGenotypes();
+        List<Allele> NO_CALL_ALLELES = new ArrayList<Allele>(ploidy);
+
+        for (int k=0; k < ploidy; k++)
+            NO_CALL_ALLELES.add(Allele.NO_CALL);
+
+        // samples
+        final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
+
+        // the new genotypes to create
+        final GenotypesContext newGTs = GenotypesContext.create();
+
+        // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
+        final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
+        final int numNewAltAlleles = allelesToUse.size() - 1;
+
+
+        // create the new genotypes
+        for ( int k = 0; k < oldGTs.size(); k++ ) {
+            final Genotype g = oldGTs.get(sampleIndices.get(k));
+            if ( !g.hasLikelihoods() ) {
+                newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
+                continue;
+            }
+
+            // create the new likelihoods array from the alleles we are allowed to use
+            final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
+            double[] newLikelihoods;
+            if ( numOriginalAltAlleles == numNewAltAlleles) {
+                newLikelihoods = originalLikelihoods;
+            } else {
+                newLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse);
+
+                // might need to re-normalize
+                newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
+            }
+
+            // if there is no mass on the (new) likelihoods, then just no-call the sample
+            if ( MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL ) {
+                newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
+            }
+            else {
+                final GenotypeBuilder gb = new GenotypeBuilder(g);
+
+                if ( numNewAltAlleles == 0 )
+                    gb.noPL();
+                else
+                    gb.PL(newLikelihoods);
+
+                // if we weren't asked to assign a genotype, then just no-call the sample
+                if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL )
+                    gb.alleles(NO_CALL_ALLELES);
+                else
+                    assignGenotype(gb, newLikelihoods, allelesToUse, ploidy);
+                newGTs.add(gb.make());
+            }
+        }
+
+        return newGTs;
+
+    }
+
+    /**
+     * Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs
+     *
+     * @param newLikelihoods       the PL array
+     * @param allelesToUse         the list of alleles to choose from (corresponding to the PLs)
+     * @param numChromosomes        Number of chromosomes per pool
+     *
+     * @return genotype
+     */
+    private static void assignGenotype(final GenotypeBuilder gb,
+                                       final double[] newLikelihoods,
+                                       final List<Allele> allelesToUse,
+                                       final int numChromosomes) {
+        final int numNewAltAlleles = allelesToUse.size() - 1;
+
+
+
+        // find the genotype with maximum likelihoods
+        final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
+
+        final int[] mlAlleleCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
+        final ArrayList<Double> alleleFreqs = new ArrayList<Double>();
+        final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
+
+
+        for (int k=1; k < mlAlleleCount.length; k++) {
+            alleleCounts.add(mlAlleleCount[k]);
+            final double freq = (double)mlAlleleCount[k] / (double)numChromosomes;
+            alleleFreqs.add(freq);
+
+        }
+
+        // per-pool logging of AC and AF
+        gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
+        gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
+
+        // remove PLs if necessary
+        if (newLikelihoods.length > MAX_LENGTH_FOR_POOL_PL_LOGGING)
+            gb.noPL();
+
+        ArrayList<Allele> myAlleles = new ArrayList<Allele>();
+
+        // add list of called ML genotypes to alleles list
+        // TODO - too unwieldy?
+        int idx = 0;
+        for (int mlind = 0; mlind < mlAlleleCount.length; mlind++) {
+            for (int k=0; k < mlAlleleCount[mlind]; k++)
+                myAlleles.add(idx++,allelesToUse.get(mlind));
+        }
+        gb.alleles(myAlleles);
+
+        if ( numNewAltAlleles > 0 )
+            gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
+    }
+
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java
@ -0,0 +1,656 @@
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import net.sf.samtools.SAMUtils;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
+
+import java.util.*;
+
+public abstract class GeneralPloidyGenotypeLikelihoods {
+    protected final int numChromosomes;
+    private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
+
+    protected static final boolean VERBOSE = false;
+    protected static final double qualVec[] = new double[SAMUtils.MAX_PHRED_SCORE+1];
+
+    //
+    // The fundamental data arrays associated with a Genotype Likelhoods object
+    //
+    protected double[] log10Likelihoods;
+    protected double[][] logMismatchProbabilityArray;
+
+    protected final int nSamplesPerPool;
+    protected final HashMap<String, ErrorModel> perLaneErrorModels;
+    protected final int likelihoodDim;
+    protected final boolean ignoreLaneInformation;
+    protected final double LOG10_PLOIDY;
+    protected boolean hasReferenceSampleData;
+
+    protected final int nAlleles;
+    protected final List<Allele> alleles;
+
+    private static final double MIN_LIKELIHOOD = Double.NEGATIVE_INFINITY;
+            
+    private static final int MAX_NUM_ALLELES_TO_CACHE = 20;
+    private static final int MAX_NUM_SAMPLES_PER_POOL = 1000;
+
+    private static final boolean FAST_GL_COMPUTATION = true;
+    // constructor with given logPL elements
+    public GeneralPloidyGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
+                                            final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) {
+        this.alleles = alleles;
+        this.nAlleles = alleles.size();
+        numChromosomes = ploidy;
+        nSamplesPerPool = numChromosomes/2;
+        this.perLaneErrorModels = perLaneErrorModels;
+        this.ignoreLaneInformation = ignoreLaneInformation;
+
+        // check if at least one lane has actual data
+        if (perLaneErrorModels == null || perLaneErrorModels.isEmpty())
+            hasReferenceSampleData = false;
+        else {
+            for (Map.Entry<String,ErrorModel> elt : perLaneErrorModels.entrySet()) {
+                if (elt.getValue().hasData()) {
+                    hasReferenceSampleData = true;
+                    break;
+                }
+            }
+        }
+        // check sizes
+        if (nAlleles > MAX_NUM_ALLELES_TO_CACHE)
+            throw new UserException("No support for this number of alleles");
+
+        if (nSamplesPerPool > MAX_NUM_SAMPLES_PER_POOL)
+            throw new UserException("No support for such large number of samples per pool");
+
+        likelihoodDim = GenotypeLikelihoods.numLikelihoods(nAlleles, numChromosomes);
+
+        if (logLikelihoods == null){
+            log10Likelihoods = new double[likelihoodDim]; 
+            Arrays.fill(log10Likelihoods, MIN_LIKELIHOOD);
+        } else {
+            if (logLikelihoods.length != likelihoodDim)
+                throw new ReviewedStingException("BUG: inconsistent parameters when creating GeneralPloidyGenotypeLikelihoods object");
+
+            log10Likelihoods = logLikelihoods; //.clone(); // is clone needed?
+        }
+        fillCache();
+        LOG10_PLOIDY = Math.log10((double)numChromosomes);
+   }
+
+
+    /**
+     * Crucial inner class that handles addressing elements of pool likelihoods. We store likelihoods as a map
+     * of form int[] -> double (to be more precise, IntArrayWrapper -> Double).
+     * For a given ploidy (chromosome count) and number of alleles, we need a form to iterate deterministically
+     * across all possible allele conformations.
+     * Problem equivalent to listing in determistic order all possible ways in which N integers will sum to P,
+     * where N is number of alleles and P is number of chromosomes.
+     * There's an option to list all integers so that sum will be UP to P.
+     * For example, with P=2,N=2, restrictSumTo = 2 iterator will produce
+     * [2 0 ] [1 1] [ 0 2]
+     *
+     *
+     */
+    protected static class SumIterator {
+        private int[] currentState;
+        private final int[] finalState;
+        private final int restrictSumTo;
+        private final int dim;
+        private boolean hasNext;
+        private int linearIndex;
+        private int currentSum;
+
+        /**
+         * Default constructor. Typical use case: restrictSumTo = -1 if there's no sum restriction, or will generate int[]
+         * vectors so that all add to this value.
+         *
+         * @param finalState                    End state - typically we should set value to (P,P,P,...)
+         * @param restrictSumTo                 See above
+         */
+        public SumIterator(final int[] finalState,final int restrictSumTo) {
+            this.finalState = finalState;
+            this.dim = finalState.length;
+            this.restrictSumTo = restrictSumTo;
+            currentState = new int[dim];
+            reset();
+
+        }
+
+        /**
+         * Shortcut constructor for common use case: iterator will produce 
+         * all vectors of length numAlleles whose sum = numChromosomes
+         * @param numAlleles              Number of alleles
+         * @param numChromosomes          Ploidy
+         */
+        public SumIterator(final int numAlleles, final int numChromosomes) {
+            this(getInitialStateVector(numAlleles,numChromosomes), numChromosomes);            
+        }
+
+
+        private static int[] getInitialStateVector(final int nAlleles, final int numChromosomes) {
+            int[] initialState = new int[nAlleles];
+            Arrays.fill(initialState,numChromosomes);
+            return initialState;
+        }
+        
+        public void setInitialStateVector(final int[] stateVector) {
+            if (restrictSumTo > 0) {
+                // check that desired vector is valid
+                if (MathUtils.sum(stateVector) != restrictSumTo)
+                    throw new ReviewedStingException("BUG: initial state vector nor compatible with sum iterator");
+
+                final int numAlleles = currentState.length;
+                final int ploidy = restrictSumTo;
+
+                linearIndex = GeneralPloidyGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
+            }
+            else
+                throw new ReviewedStingException("BUG: Not supported");
+
+        }
+        public void next() {
+            int initialDim = (restrictSumTo > 0)?1:0;
+            hasNext = next(finalState, initialDim);
+            if (hasNext)
+                linearIndex++;
+        }
+
+        private boolean next(final int[] finalState, int initialDim) {
+            boolean hasNextState = false;
+            for (int currentDim=initialDim; currentDim < finalState.length; currentDim++) {
+                final int x = currentState[currentDim]+1;
+
+                if (x > finalState[currentDim] || (currentSum >= restrictSumTo && initialDim > 0)) {
+                    // update vector sum, and reset position
+                    currentSum -= currentState[currentDim];
+                    currentState[currentDim] = 0;
+                    if (currentDim >= dim-1) {
+                        hasNextState = false;
+                        break;
+                    }
+                }
+                else {
+                    currentState[currentDim] = x;
+                    hasNextState = true;
+                    currentSum++;
+                    break;
+                }
+            }
+            if (initialDim > 0) {
+                currentState[0] = restrictSumTo - currentSum;
+            }
+            return hasNextState;
+        }
+
+        public void reset() {
+            Arrays.fill(currentState, 0);
+            if (restrictSumTo > 0)
+                currentState[0] = restrictSumTo;
+            hasNext = true;
+            linearIndex = 0;
+            currentSum = 0;
+        }
+        public int[] getCurrentVector() {
+            return currentState;
+        }
+        
+        public int[] getCurrentAltVector() {
+            return Arrays.copyOfRange(currentState,1,currentState.length);
+        }
+      /*  public int getCurrentSum() {
+            return currentSum;
+        }
+        */
+        public int getLinearIndex() {
+            return linearIndex;
+        }
+
+        public boolean hasNext() {
+            return hasNext;
+        }
+    }
+
+    public List<Allele> getAlleles() { return alleles;}
+
+    /**
+     * Returns an array of log10 likelihoods for each genotype conformation, with ordering determined by SumIterator class.
+     *
+     * @return likelihoods array
+     */
+    public double[] getLikelihoods() {
+        return log10Likelihoods;
+    }
+
+
+
+
+
+    /**
+     * Set particular element of logPL vector
+     * @param idx          index of allele count conformation to modify
+     * @param pl                    Likelihood to associate with map
+     */
+    public void setLogPLs(final int idx, final double pl) {
+            log10Likelihoods[idx] = pl;
+    }
+
+    public void renormalize() {
+        log10Likelihoods = MathUtils.normalizeFromLog10(log10Likelihoods,false,true);
+    }
+    /** Compute most likely AC conformation based on currently stored PL's - just loop through log PL map and output max value
+     *
+     * @return vector with most likely allele count, ordered according to this object's alleles
+     */
+    public Pair<int[],Double> getMostLikelyACCount() {
+
+        int[] mlInd = null;
+        double maxVal = Double.NEGATIVE_INFINITY;
+
+        final SumIterator iterator = new SumIterator(alleles.size(),numChromosomes);
+
+        int idx = 0;
+        while (iterator.hasNext()) {
+            double pl = log10Likelihoods[idx++];
+            if (pl > maxVal) {
+                maxVal = pl;
+                mlInd = iterator.getCurrentVector().clone();
+                
+            }
+            iterator.next();
+        }
+        if (VERBOSE) {
+            System.out.println(VCFConstants.MLE_ALLELE_COUNT_KEY + ": " + Arrays.toString(mlInd));
+        }
+        return new Pair<int[], Double>(mlInd,maxVal);
+    }
+
+    /**
+     * Given set of alleles with corresponding vector of likelihoods, subset to a new set of alleles
+     *
+     * @param oldLikelihoods        Vector of PL's corresponding to original alleles
+     * @param numChromosomes        Ploidy (number of chromosomes describing PL's)
+     * @param originalAlleles       List of original alleles
+     * @param allelesToSubset       Alleles to subset
+     * @return                      Vector of new PL's, ordered accorrding to SumIterator's ordering
+     */
+    public static double[] subsetToAlleles(final double[] oldLikelihoods, final int numChromosomes,
+                                                   final List<Allele> originalAlleles, final List<Allele> allelesToSubset) {
+
+        int newPLSize = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
+        double[] newPLs = new double[newPLSize];
+
+
+        int idx = 0;
+        // First fill boolean array stating whether each original allele is present in new mapping
+        final boolean [] allelePresent = new boolean[originalAlleles.size()];
+        for ( Allele allele : originalAlleles )
+            allelePresent[idx++] = allelesToSubset.contains(allele);
+
+
+        // compute mapping from old idx to new idx
+        // This might be needed in case new allele set is not ordered in the same way as old set
+        // Example. Original alleles: {T*,C,G,A}. New alleles: {G,C}. Permutation key = [2,1]
+
+        int[] permutationKey = new int[allelesToSubset.size()];
+        for (int k=0; k < allelesToSubset.size(); k++)
+            // for each allele to subset, find corresponding index in original allele list
+            permutationKey[k] = originalAlleles.indexOf(allelesToSubset.get(k));
+
+
+        if (VERBOSE) {
+            System.out.println("permutationKey:"+Arrays.toString(permutationKey));
+        }
+
+        final SumIterator iterator = new SumIterator(originalAlleles.size(),numChromosomes);
+
+        while (iterator.hasNext()) {
+            // for each entry in logPL table, associated originally with allele count stored in vec[],
+            // see if this allele count conformation will be present in new logPL table.
+            // For entry to be present, elements in dimensions not present in requested allele list have to have count = 0
+            int[] pVec = iterator.getCurrentVector();
+            double pl = oldLikelihoods[iterator.getLinearIndex()];
+            
+            boolean keyPresent = true;
+            for (int k=0; k < allelePresent.length; k++)
+                if ( pVec[k]>0 && !allelePresent[k] )
+                    keyPresent = false;
+
+            if (keyPresent) {// skip to next entry in logPLs if this conformation is not present in subset
+
+                final int[] newCount = new int[allelesToSubset.size()];
+    
+                // map from old allele mapping count to new allele mapping
+                // In pseudo-Matlab notation: newCount = vec[permutationKey] for permutationKey vector
+                for (idx = 0; idx < newCount.length; idx++)
+                    newCount[idx] =  pVec[permutationKey[idx]];
+    
+                // get corresponding index from new count
+                int outputIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
+                newPLs[outputIdx] = pl;
+                if (VERBOSE) {
+                    System.out.println("Old Key:"+Arrays.toString(pVec));
+                    System.out.println("New Key:"+Arrays.toString(newCount));
+                }
+            }
+            iterator.next();
+        }
+
+        return  newPLs;
+    }
+
+    public static int getLinearIndex(int[] vectorIdx, int numAlleles, int ploidy) {
+
+        if (ploidy <= 0)
+            return 0;
+
+        int linearIdx = 0;
+        int cumSum = ploidy;
+        for (int k=numAlleles-1;k>=1; k--) {
+            int idx = vectorIdx[k];
+            // how many blocks are before current position
+            if (idx == 0)
+                continue;
+            for (int p=0; p < idx; p++)
+                linearIdx += getNumLikelihoodElements( k, cumSum-p);
+            
+            cumSum -= idx;
+        }
+
+        return linearIdx;
+        
+    }
+
+    /**
+     * Given a scalar index, what's the alelle count conformation corresponding to it?
+     * @param nAlleles                    Number of alleles
+     * @param numChromosomes              Ploidy
+     * @param PLindex                     Index to query
+     * @return                            Allele count conformation, according to iteration order from SumIterator
+     */
+    public static int[] getAlleleCountFromPLIndex(final int nAlleles, final int numChromosomes, final int PLindex) {
+
+        // todo - another brain-dead inefficient implementation, can do much better by computing in closed form
+        final SumIterator iterator = new SumIterator(nAlleles,numChromosomes);
+        while (iterator.hasNext()) {
+            final int[] plVec = iterator.getCurrentVector();
+            if (iterator.getLinearIndex() == PLindex)
+                return plVec;
+
+            iterator.next();
+        }
+
+        return null;
+
+    }
+
+    /*
+    * a cache of the PL ivector sizes as a function of # of alleles and pool sizes
+    */
+    
+    public static int getNumLikelihoodElements(int numAlleles, int ploidy) {
+        return GenotypeLikelihoodVectorSizes[numAlleles][ploidy];
+    }
+
+    private final static int[][] GenotypeLikelihoodVectorSizes = fillGLVectorSizeCache(MAX_NUM_ALLELES_TO_CACHE, 2*MAX_NUM_SAMPLES_PER_POOL);
+
+    private static int[][] fillGLVectorSizeCache(int maxAlleles, int maxPloidy) {
+        
+        int[][] cache = new int[maxAlleles][maxPloidy];
+        for (int numAlleles=1; numAlleles < maxAlleles; numAlleles++) {
+            for (int ploidy=0; ploidy < maxPloidy; ploidy++) {
+
+                if (numAlleles == 1)
+                    cache[numAlleles][ploidy] = 1;
+                else if (ploidy == 1)
+                    cache[numAlleles][ploidy] = numAlleles;
+                else {
+                    int acc =0;
+                    for (int k=0; k <= ploidy; k++ )
+                        acc += cache[numAlleles-1][ploidy-k];
+
+                    cache[numAlleles][ploidy] = acc;
+                }
+            }
+        }
+        return cache;
+    }
+
+    /**
+     * Return a string representation of this object in a moderately usable form
+     *
+     * @return string representation
+     */
+    public String toString() {
+        StringBuilder s = new StringBuilder(1000);
+
+        s.append("Alleles:");
+        for (Allele a: this.alleles){
+            s.append(a.getDisplayString());
+            s.append(",");
+        }
+        s.append("\nGLs:\n");
+        SumIterator iterator = new SumIterator(nAlleles,numChromosomes);
+        while (iterator.hasNext()) {
+            if (!Double.isInfinite(getLikelihoods()[iterator.getLinearIndex()])) {
+
+                s.append("Count [");
+                StringBuilder b = new StringBuilder(iterator.getCurrentVector().length*2);
+                for (int it:iterator.getCurrentVector()) {
+                    b.append(it);
+                    b.append(",");
+                }
+                s.append(b.toString());
+                s.append(String.format("] GL=%4.3f\n",this.getLikelihoods()[iterator.getLinearIndex()]) );
+            }
+            iterator.next();
+        }
+        return s.toString();
+    }
+
+
+    public void computeLikelihoods(ErrorModel errorModel,
+        List<Allele> alleleList, List<Integer> numObservations, ReadBackedPileup pileup) {
+
+        if (FAST_GL_COMPUTATION) {
+            //  queue up elements to be computed. Assumptions:
+            // GLs distributions are unimodal
+            // GLs are continuous
+            // Hence, once an AC conformation is computed, we queue up its immediate topological neighbors.
+            // If neighbors fall below maximum - threshold, we don't queue up THEIR own neighbors
+            // and we repeat until queue is empty
+            // queue of AC conformations to process
+            final LinkedList<AlleleFrequencyCalculationModel.ExactACset> ACqueue = new LinkedList<AlleleFrequencyCalculationModel.ExactACset>();
+            // mapping of ExactACset indexes to the objects
+            final HashMap<AlleleFrequencyCalculationModel.ExactACcounts, AlleleFrequencyCalculationModel.ExactACset> indexesToACset = new HashMap<AlleleFrequencyCalculationModel.ExactACcounts, AlleleFrequencyCalculationModel.ExactACset>(likelihoodDim);
+            // add AC=0 to the queue
+            final int[] zeroCounts = new int[nAlleles];
+            zeroCounts[0] = numChromosomes;
+
+            AlleleFrequencyCalculationModel.ExactACset zeroSet =
+                    new AlleleFrequencyCalculationModel.ExactACset(1, new AlleleFrequencyCalculationModel.ExactACcounts(zeroCounts));
+
+            ACqueue.add(zeroSet);
+            indexesToACset.put(zeroSet.ACcounts, zeroSet);
+
+            // keep processing while we have AC conformations that need to be calculated
+            double maxLog10L = Double.NEGATIVE_INFINITY;
+            while ( !ACqueue.isEmpty() ) {
+                // compute log10Likelihoods
+                final AlleleFrequencyCalculationModel.ExactACset ACset = ACqueue.remove();
+                final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, errorModel, alleleList, numObservations, maxLog10L, ACqueue, indexesToACset, pileup);
+
+                // adjust max likelihood seen if needed
+                maxLog10L = Math.max(maxLog10L, log10LofKs);
+                // clean up memory
+                indexesToACset.remove(ACset.ACcounts);
+                if ( VERBOSE )
+                    System.out.printf(" *** removing used set=%s%n", ACset.ACcounts);
+
+             }
+
+
+        }   else {
+            int plIdx = 0;
+            SumIterator iterator = new SumIterator(nAlleles, numChromosomes);
+            while (iterator.hasNext()) {
+                AlleleFrequencyCalculationModel.ExactACset ACset =
+                       new AlleleFrequencyCalculationModel.ExactACset(1, new AlleleFrequencyCalculationModel.ExactACcounts(iterator.getCurrentVector()));
+                // for observed base X, add Q(jX,k) to likelihood vector for all k in error model
+                //likelihood(jA,jC,jG,jT) = logsum(logPr (errorModel[k],nA*Q(jA,k) +  nC*Q(jC,k) + nG*Q(jG,k) + nT*Q(jT,k))
+                getLikelihoodOfConformation(ACset, errorModel, alleleList, numObservations, pileup);
+
+                setLogPLs(plIdx++, ACset.log10Likelihoods[0]);
+                iterator.next();
+            }
+        }
+        // normalize PL's
+        renormalize();
+
+    }
+
+    private double calculateACConformationAndUpdateQueue(final ExactAFCalculationModel.ExactACset set,
+                                                         final ErrorModel errorModel,
+                                                         final List<Allele> alleleList,
+                                                         final List<Integer> numObservations,
+                                                         final double  maxLog10L,
+                                                         final LinkedList<AlleleFrequencyCalculationModel.ExactACset> ACqueue,
+                                                         final HashMap<AlleleFrequencyCalculationModel.ExactACcounts,
+                                                         AlleleFrequencyCalculationModel.ExactACset> indexesToACset,
+                                                         final ReadBackedPileup pileup) {
+        // compute likelihood of set
+        getLikelihoodOfConformation(set, errorModel, alleleList, numObservations, pileup);
+        final double log10LofK = set.log10Likelihoods[0];
+        
+        // log result in PL vector
+        int idx = getLinearIndex(set.ACcounts.getCounts(), nAlleles, numChromosomes);
+        setLogPLs(idx, log10LofK);
+
+        // can we abort early because the log10Likelihoods are so small?
+        if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
+            if ( VERBOSE )
+                System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
+            return log10LofK;
+        }
+
+        // iterate over higher frequencies if possible
+        // by convention, ACcounts contained in set have full vector of possible pool ac counts including ref count.
+        final int ACwiggle = numChromosomes - set.getACsum() + set.ACcounts.counts[0];
+        if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
+            return log10LofK;
+
+
+        // add conformations for other cases
+        for ( int allele = 1; allele < nAlleles; allele++ ) {
+            final int[] ACcountsClone = set.ACcounts.getCounts().clone();
+            ACcountsClone[allele]++;
+            // is this a valid conformation?
+            int altSum = (int)MathUtils.sum(ACcountsClone) - ACcountsClone[0];
+            ACcountsClone[0] = numChromosomes - altSum;
+            if (ACcountsClone[0] < 0)
+                continue;
+
+
+            updateACset(ACcountsClone, ACqueue, indexesToACset);
+        }
+        return log10LofK;
+
+    }
+
+    /**
+     * Abstract methods, must be implemented in subclasses
+     *
+     * @param ACset       Count to compute
+     * @param errorModel    Site-specific error model object
+     * @param alleleList    List of alleles
+     * @param numObservations Number of observations for each allele
+     * @param pileup        Read backed pileup in case it's necessary
+     */
+    public abstract void getLikelihoodOfConformation(final AlleleFrequencyCalculationModel.ExactACset ACset,
+                                                     final ErrorModel errorModel,
+                                                     final List<Allele> alleleList,
+                                                     final List<Integer> numObservations,
+                                                     final ReadBackedPileup pileup);
+
+
+    public abstract int add(ReadBackedPileup pileup, UnifiedArgumentCollection UAC);
+
+    // Static methods
+    public static void updateACset(final int[] newSetCounts,
+                                    final LinkedList<AlleleFrequencyCalculationModel.ExactACset> ACqueue,
+                                    final HashMap<AlleleFrequencyCalculationModel.ExactACcounts, AlleleFrequencyCalculationModel.ExactACset> indexesToACset) {
+
+        final AlleleFrequencyCalculationModel.ExactACcounts index = new AlleleFrequencyCalculationModel.ExactACcounts(newSetCounts);
+        if ( !indexesToACset.containsKey(index) ) {
+            AlleleFrequencyCalculationModel.ExactACset newSet = new AlleleFrequencyCalculationModel.ExactACset(1, index);
+            indexesToACset.put(index, newSet);
+            ACqueue.add(newSet);     
+            if (VERBOSE)
+                System.out.println(" *** Adding set to queue:" + index.toString());
+        }
+
+    }
+    // -----------------------------------------------------------------------------------------------------------------
+    //
+    //
+    // helper routines
+    //
+    //
+    // -----------------------------------------------------------------------------------------------------------------
+
+
+    //
+    // Constant static data
+    //
+
+    static {
+        // cache 10^(-k/10)
+        for (int j=0; j <= SAMUtils.MAX_PHRED_SCORE; j++)
+            qualVec[j] = Math.pow(10.0,-(double)j/10.0);
+    }
+
+    private void fillCache() {
+        // cache Q(j,k) = log10(j/2N*(1-ek) + (2N-j)/2N*ek) for j = 0:2N
+
+        logMismatchProbabilityArray = new double[1+numChromosomes][1+SAMUtils.MAX_PHRED_SCORE];
+        for (int i=0; i <= numChromosomes; i++) {
+            for (int j=0; j <= SAMUtils.MAX_PHRED_SCORE; j++) {
+                double phi = (double)i/numChromosomes;
+                logMismatchProbabilityArray[i][j] = Math.log10(phi * (1.0-qualVec[j]) + qualVec[j]/3.0 * (1.0-phi));
+            }
+        }
+    }
+
+}
+
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java
@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2010, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
+import org.broadinstitute.sting.utils.variantcontext.*;
+
+import java.util.*;
+
+public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
+
+    //protected Set<String> laneIDs;
+    public enum Model {
+        SNP,
+        INDEL,
+        POOLSNP,
+        POOLINDEL,
+        BOTH
+    }
+
+    final protected UnifiedArgumentCollection UAC;
+
+    protected GeneralPloidyGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
+        super(UAC,logger);
+        this.UAC = UAC;
+
+    }
+
+
+    /*
+       Get vc with alleles from reference sample. Can be null if there's no ref sample call or no ref sample coverage at this site.
+    */
+    protected VariantContext getTrueAlleles(final RefMetaDataTracker tracker,
+                                            final ReferenceContext ref,
+                                            Map<String,AlignmentContext> contexts) {
+        // Get reference base from VCF or Reference
+        if (UAC.referenceSampleName == null)
+            return null;
+
+        AlignmentContext context = contexts.get(UAC.referenceSampleName);
+        ArrayList<Allele> trueReferenceAlleles = new ArrayList<Allele>();
+
+        VariantContext referenceSampleVC;
+
+        if (tracker != null && context != null)
+            referenceSampleVC = tracker.getFirstValue(UAC.referenceSampleRod, context.getLocation());
+        else
+            return null;
+
+        if (referenceSampleVC == null) {
+            trueReferenceAlleles.add(Allele.create(ref.getBase(),true));
+            return new VariantContextBuilder("pc",ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStop(),trueReferenceAlleles).make();
+
+        }
+        else {
+            Genotype referenceGenotype = referenceSampleVC.getGenotype(UAC.referenceSampleName);
+            List<Allele> referenceAlleles = referenceGenotype.getAlleles();
+
+            return new VariantContextBuilder("pc",referenceSampleVC.getChr(), referenceSampleVC.getStart(), referenceSampleVC.getEnd(),
+                    referenceSampleVC.getAlleles())
+                    .genotypes(new GenotypeBuilder(UAC.referenceSampleName, referenceAlleles).GQ(referenceGenotype.getGQ()).make())
+                    .make();
+        }
+    }
+
+
+    /**
+     * GATK Engine creates readgroups of the form XXX.Y.Z
+     * XXX.Y is the unique lane identifier.
+     *     Z is the id of the sample to make the read group id unique
+     * This function returns the list of lane identifiers.
+     *
+     * @param readGroups readGroups A collection of read group strings (obtained from the alignment context pileup)
+     * @return a collection of lane ids.
+     */
+    public static Set<String> parseLaneIDs(Collection<String> readGroups) {
+        HashSet<String> result = new HashSet<String>();
+        for (String readGroup : readGroups) {
+            result.add(getLaneIDFromReadGroupString(readGroup));
+        }
+        return result;
+    }
+
+    /**
+     * GATK Engine creates readgroups of the form XXX.Y.Z
+     * XXX.Y is the unique lane identifier.
+     *     Z is the id of the sample to make the read group id unique
+     *
+     * @param readGroupID the read group id string
+     * @return just the lane id (the XXX.Y string)
+     */
+    public static String getLaneIDFromReadGroupString(String readGroupID) {
+        // System.out.println(readGroupID);
+        String [] parsedID = readGroupID.split("\\.");
+        if (parsedID.length > 1)
+            return parsedID[0] + "." + parsedID[1];
+        else
+            return parsedID[0] + ".0";
+    }
+
+
+    /** Wrapper class that encapsulates likelihood object and sample name
+     *
+     */
+    protected static class PoolGenotypeData {
+
+        public final String name;
+        public final GeneralPloidyGenotypeLikelihoods GL;
+        public final int depth;
+        public final List<Allele> alleles;
+
+        public PoolGenotypeData(final String name, final GeneralPloidyGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) {
+            this.name = name;
+            this.GL = GL;
+            this.depth = depth;
+            this.alleles = alleles;
+        }
+    }
+
+    // determines the alleles to use
+    protected List<Allele> determineAlternateAlleles(final List<PoolGenotypeData> sampleDataList) {
+
+        if (sampleDataList.isEmpty())
+            return Collections.emptyList();
+
+        final int REFERENCE_IDX = 0;
+        final List<Allele> allAlleles = sampleDataList.get(0).GL.getAlleles();
+        double[] likelihoodSums = new double[allAlleles.size()];
+
+        // based on the GLs, find the alternate alleles with enough probability
+        for ( PoolGenotypeData sampleData : sampleDataList ) {
+            final Pair<int[],Double> mlACPair = sampleData.GL.getMostLikelyACCount();
+            final double topLogGL = mlACPair.second;
+
+            if (sampleData.GL.getAlleles().size() != allAlleles.size())
+                throw new ReviewedStingException("BUG: inconsistent size of alleles!");
+
+            // ref allele is always first in array list
+            if (sampleData.GL.alleles.get(0).isNonReference())
+                throw new ReviewedStingException("BUG: first allele in list is not reference!");
+
+            double refGL = sampleData.GL.getLikelihoods()[REFERENCE_IDX];
+
+            // check if maximum likelihood AC is all-ref for current pool. If so, skip
+            if (mlACPair.first[REFERENCE_IDX] == sampleData.GL.numChromosomes)
+                continue;
+
+            // most likely AC is not all-ref: for all non-ref alleles, add difference of max likelihood and all-ref likelihood
+            for (int i=0; i < mlACPair.first.length; i++) {
+                if (i==REFERENCE_IDX) continue;
+
+                if (mlACPair.first[i] > 0)
+                    likelihoodSums[i] += topLogGL - refGL;
+
+            }
+        }
+
+        final List<Allele> allelesToUse = new ArrayList<Allele>();
+        for ( int i = 0; i < likelihoodSums.length; i++ ) {
+            if ( likelihoodSums[i] > 0.0 )
+                allelesToUse.add(allAlleles.get(i));
+        }
+
+        return allelesToUse;
+    }
+
+
+    public VariantContext getLikelihoods(final RefMetaDataTracker tracker,
+                                         final ReferenceContext ref,
+                                         Map<String, AlignmentContext> contexts,
+                                         final AlignmentContextUtils.ReadOrientation contextType,
+                                         final List<Allele> allAllelesToUse,
+                                         final boolean useBAQedPileup,
+                                         final GenomeLocParser locParser) {
+
+        HashMap<String, ErrorModel> perLaneErrorModels = getPerLaneErrorModels(tracker, ref, contexts);
+        if (perLaneErrorModels == null && UAC.referenceSampleName != null)
+            return null;
+
+        if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
+            AlignmentContext mergedContext = AlignmentContextUtils.joinContexts(contexts.values());
+            Map<String,AlignmentContext> newContext = new HashMap<String,AlignmentContext>();
+            newContext.put(DUMMY_SAMPLE_NAME,mergedContext);
+            contexts = newContext;
+        }
+
+        // get initial alleles to genotype
+        final List<Allele> allAlleles = new ArrayList<Allele>();
+        if (allAllelesToUse == null || allAllelesToUse.isEmpty())
+            allAlleles.addAll(getInitialAllelesToUse(tracker, ref,contexts,contextType,locParser, allAllelesToUse));
+        else
+            allAlleles.addAll(allAllelesToUse);
+
+        if (allAlleles.isEmpty())
+            return null;
+
+        final ArrayList<PoolGenotypeData> GLs = new ArrayList<PoolGenotypeData>(contexts.size());
+
+        for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
+            // skip reference sample
+            if (UAC.referenceSampleName != null && sample.getKey().equals(UAC.referenceSampleName))
+                continue;
+
+            ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
+
+            // create the GenotypeLikelihoods object
+            final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
+            // actually compute likelihoods
+            final int nGoodBases = GL.add(pileup, UAC);
+            if ( nGoodBases > 0 )
+                // create wrapper object for likelihoods and add to list
+                GLs.add(new PoolGenotypeData(sample.getKey(), GL, getFilteredDepth(pileup), allAlleles));
+        }
+
+        // find the alternate allele(s) that we should be using
+        final List<Allele> alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs);
+        if (alleles == null || alleles.isEmpty())
+            return null;
+        // start making the VariantContext
+        final GenomeLoc loc = ref.getLocus();
+        final int endLoc = getEndLocation(tracker, ref, alleles);
+
+        final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleles);
+        builder.alleles(alleles);
+
+        final HashMap<String, Object> attributes = new HashMap<String, Object>();
+
+        if (UAC.referenceSampleName != null && perLaneErrorModels != null)
+            attributes.put(VCFConstants.REFSAMPLE_DEPTH_KEY, ErrorModel.getTotalReferenceDepth(perLaneErrorModels));
+
+        builder.attributes(attributes);
+        // create the genotypes; no-call everyone for now
+        final GenotypesContext genotypes = GenotypesContext.create();
+        final List<Allele> noCall = new ArrayList<Allele>();
+        noCall.add(Allele.NO_CALL);
+
+        for ( PoolGenotypeData sampleData : GLs ) {
+            // extract from multidimensional array
+            final double[] myLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(), sampleData.GL.numChromosomes,
+                    allAlleles, alleles);
+
+            // normalize in log space so that max element is zero.
+            final GenotypeBuilder gb = new GenotypeBuilder(sampleData.name, noCall);
+            gb.DP(sampleData.depth);
+            gb.PL(MathUtils.normalizeFromLog10(myLikelihoods, false, true));
+            genotypes.add(gb.make());
+        }
+
+        return builder.genotypes(genotypes).make();
+
+    }
+
+
+    protected HashMap<String, ErrorModel> getPerLaneErrorModels(final RefMetaDataTracker tracker,
+                                                                final ReferenceContext ref,
+                                                                Map<String, AlignmentContext> contexts) {
+        VariantContext refVC =  getTrueAlleles(tracker, ref, contexts);
+
+
+        // Build error model for site based on reference sample, and keep stratified for each lane.
+        AlignmentContext refContext = null;
+        if (UAC.referenceSampleName != null)
+            refContext = contexts.get(UAC.referenceSampleName);
+
+        ReadBackedPileup refPileup = null;
+        if (refContext != null) {
+            HashMap<String, ErrorModel> perLaneErrorModels = new HashMap<String, ErrorModel>();
+            refPileup = refContext.getBasePileup();
+
+            Set<String> laneIDs = new TreeSet<String>();
+            if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL || UAC.IGNORE_LANE_INFO)
+                laneIDs.add(DUMMY_LANE);
+            else
+                laneIDs = parseLaneIDs(refPileup.getReadGroups());
+            // build per-lane error model for all lanes present in ref sample
+            for (String laneID : laneIDs) {
+                // get reference pileup for this lane
+                ReadBackedPileup refLanePileup = refPileup;
+                // subset for this lane
+                if (refPileup != null && !(UAC.TREAT_ALL_READS_AS_SINGLE_POOL || UAC.IGNORE_LANE_INFO))
+                    refLanePileup = refPileup.getPileupForLane(laneID);
+
+                //ReferenceSample referenceSample = new ReferenceSample(UAC.referenceSampleName, refLanePileup, trueReferenceAlleles);
+                perLaneErrorModels.put(laneID, new ErrorModel(UAC,  refLanePileup, refVC, ref));
+            }
+            return perLaneErrorModels;
+
+        }
+        else
+            return null;
+
+    }
+
+    /*
+       Abstract methods - must be implemented in derived classes
+    */
+
+    protected abstract GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
+                                                                               final double[] logLikelihoods,
+                                                                               final int ploidy,
+                                                                               final HashMap<String, ErrorModel> perLaneErrorModels,
+                                                                               final boolean useBQAedPileup,
+                                                                               final ReferenceContext ref,
+                                                                               final boolean ignoreLaneInformation);
+
+    protected abstract List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
+                                                           final ReferenceContext ref,
+                                                           Map<String, AlignmentContext> contexts,
+                                                           final AlignmentContextUtils.ReadOrientation contextType,
+                                                           final GenomeLocParser locParser,
+                                                           final List<Allele> allAllelesToUse);
+
+    protected abstract List<Allele> getFinalAllelesToUse(final RefMetaDataTracker tracker,
+                                                         final ReferenceContext ref,
+                                                         final List<Allele> allAllelesToUse,
+                                                         final ArrayList<PoolGenotypeData> GLs);
+
+    protected abstract int getEndLocation(final RefMetaDataTracker tracker,
+                                          final ReferenceContext ref,
+                                          final List<Allele> alternateAllelesToUse);
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java
@ -0,0 +1,221 @@
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
+import org.broadinstitute.sting.utils.Haplotype;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+
+import java.util.*;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: delangel
+ * Date: 5/18/12
+ * Time: 10:06 AM
+ * To change this template use File | Settings | File Templates.
+ */
+public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods {
+    final PairHMMIndelErrorModel pairModel;
+    final LinkedHashMap<Allele, Haplotype> haplotypeMap;
+    final ReferenceContext refContext;
+    final int eventLength;
+    double[][] readHaplotypeLikelihoods;
+
+    final byte refBase;
+
+    public GeneralPloidyIndelGenotypeLikelihoods(final List<Allele> alleles,
+                                                 final double[] logLikelihoods,
+                                                 final int ploidy,
+                                                 final HashMap<String, ErrorModel> perLaneErrorModels,
+                                                 final boolean ignoreLaneInformation,
+                                                 final PairHMMIndelErrorModel pairModel,
+                                                 final LinkedHashMap<Allele, Haplotype> haplotypeMap,
+                                                 final ReferenceContext referenceContext) {
+        super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
+        this.pairModel = pairModel;
+        this.haplotypeMap = haplotypeMap;
+        this.refContext = referenceContext;
+        this.eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(alleles);
+        // todo - not needed if indel alleles have base at current position
+        this.refBase = referenceContext.getBase();
+    }
+
+    // -------------------------------------------------------------------------------------
+    //
+    // add() routines.  These are the workhorse routines for calculating the overall genotype
+    // likelihoods given observed bases and reads.  Includes high-level operators all the
+    // way down to single base and qual functions.
+    //
+    // -------------------------------------------------------------------------------------
+
+    /**
+     * Updates likelihoods and posteriors to reflect the additional observations contained within the
+     * read-based pileup up by calling add(observedBase, qualityScore) for each base / qual in the
+     * pileup
+     *
+     * @param pileup                    read pileup
+     * @param UAC                      the minimum base quality at which to consider a base valid
+     * @return the number of good bases found in the pileup
+     */
+    public int add(ReadBackedPileup pileup, UnifiedArgumentCollection UAC) {
+        int n = 0;
+
+        if (!hasReferenceSampleData) {
+            // no error models
+            return add(pileup, (ErrorModel)null);
+        }
+        for (String laneID : perLaneErrorModels.keySet() ) {
+            // get pileup for this lane
+            ReadBackedPileup perLanePileup;
+            if (ignoreLaneInformation)
+                perLanePileup = pileup;
+            else
+                perLanePileup = pileup.getPileupForLane(laneID);
+
+            if (perLanePileup == null || perLanePileup.isEmpty())
+                continue;
+
+            ErrorModel errorModel = perLaneErrorModels.get(laneID);
+            n += add(perLanePileup, errorModel);
+            if (ignoreLaneInformation)
+                break;
+
+        }
+
+        return n;
+    }
+
+    /**
+     * Calculates the pool's probability for all possible allele counts for all indel alleles observed.
+     * Calculation is based on the error model
+     * generated by the reference sample on the same lane. The probability is given by :
+     *
+     * Pr(ac = j1,j2,.. | pool, errorModel) = sum_over_all_Qs ( Pr(j1,j2,.. * Pr(errorModel_q) *
+     * Pr(ac=j1,j2,..| pool, errorModel) = sum_over_all_Qs ( Pr(ac=j1,j2,..) * Pr(errorModel_q) *
+     * [j1 * (1-eq)/2n + eq/3*(2*N-j1)
+     * [jA*(1-eq)/2n + eq/3*(jc+jg+jt)/2N)^nA *   jC*(1-eq)/2n + eq/3*(ja+jg+jt)/2N)^nC *
+     * jG*(1-eq)/2n + eq/3*(jc+ja+jt)/2N)^nG * jT*(1-eq)/2n + eq/3*(jc+jg+ja)/2N)^nT
+     *
+     *  log Pr(ac=jA,jC,jG,jT| pool, errorModel) = logsum( Pr(ac=jA,jC,jG,jT) * Pr(errorModel_q) *
+     * [jA*(1-eq)/2n + eq/3*(jc+jg+jt)/2N)^nA *   jC*(1-eq)/2n + eq/3*(ja+jg+jt)/2N)^nC *
+     * jG*(1-eq)/2n + eq/3*(jc+ja+jt)/2N)^nG * jT*(1-eq)/2n + eq/3*(jc+jg+ja)/2N)^nT)
+     * = logsum(logPr(ac=jA,jC,jG,jT) + log(Pr(error_Model(q)
+     * )) + nA*log(jA/2N(1-eq)+eq/3*(2N-jA)/2N) + nC*log(jC/2N(1-eq)+eq/3*(2N-jC)/2N)
+     * + log(jG/2N(1-eq)+eq/3*(2N-jG)/2N) + log(jT/2N(1-eq)+eq/3*(2N-jT)/2N)
+     *
+     * Let Q(j,k) = log(j/2N*(1-e[k]) + (2N-j)/2N*e[k]/3)
+     *
+     * Then logPr(ac=jA,jC,jG,jT|D,errorModel) = logPR(ac=Ja,jC,jG,jT) + logsum_k( logPr (errorModel[k],
+     * nA*Q(jA,k) +  nC*Q(jC,k) + nG*Q(jG,k) + nT*Q(jT,k))
+     *
+     * If pileup data comes from several error models (because lanes can have different error models),
+     * Pr(Ac=j|D,E1,E2) = sum(Pr(AC1=j1|D,E1,E2) * Pr(AC2=j-j2|D,E1,E2))
+     * = sum(Pr(AC1=j1|D,E1)*Pr(AC2=j-j1|D,E2)) from j=0..2N
+     *
+     * So, for each lane, build error model and combine lanes.
+     * To store model, can do
+     * for jA=0:2N
+     *  for jC = 0:2N-jA
+     *   for jG = 0:2N-jA-jC
+     *    for jT = 0:2N-jA-jC-jG
+     *      Q(jA,jC,jG,jT)
+     *      for k = minSiteQual:maxSiteQual
+     *        likelihood(jA,jC,jG,jT) = logsum(logPr (errorModel[k],nA*Q(jA,k) +  nC*Q(jC,k) + nG*Q(jG,k) + nT*Q(jT,k))
+     *
+     *
+     *
+     * where: nA,nC,nG,nT = counts of bases observed in pileup.
+     *
+     *
+     * @param pileup                            Base pileup
+     * @param errorModel                        Site error model
+     * @return                                  Number of bases added
+     */
+    private int add(ReadBackedPileup pileup, ErrorModel errorModel) {
+        int n=0;
+
+        // Number of alleless in pileup, in that order
+        List<Integer> numSeenBases = new ArrayList<Integer>(this.alleles.size());
+
+        if (!hasReferenceSampleData) {
+            final int numHaplotypes = haplotypeMap.size();
+
+            final int readCounts[] = new int[pileup.getNumberOfElements()];
+            readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts);
+            n = readHaplotypeLikelihoods.length;
+        } else {
+            Allele refAllele = null;
+            for (Allele a:alleles) {
+                numSeenBases.add(0);
+                if (a.isReference())
+                    refAllele = a;
+            }
+
+            if (refAllele == null)
+                throw new ReviewedStingException("BUG: no ref alleles in passed in allele list!");
+
+            // count number of elements in pileup
+            for (PileupElement elt : pileup) {
+                if (VERBOSE)
+                    System.out.format("base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d\n",elt.getBase(), elt.isBeforeDeletionStart(),elt.isBeforeInsertion(),elt.getEventBases(),elt.getEventLength());
+                int idx =0;
+                for (Allele allele : alleles) {
+                    int cnt = numSeenBases.get(idx);
+                    numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele, refBase)?1:0));
+                }
+
+                n++;
+
+            }
+        }
+        computeLikelihoods(errorModel, alleles, numSeenBases, pileup);
+        return n;
+    }
+
+    
+
+    /**
+     * Compute likelihood of current conformation
+     *
+     * @param ACset       Count to compute
+     * @param errorModel    Site-specific error model object
+     * @param alleleList    List of alleles
+     * @param numObservations Number of observations for each allele in alleleList
+     */
+    public void getLikelihoodOfConformation(final AlleleFrequencyCalculationModel.ExactACset ACset,
+                                            final ErrorModel errorModel,
+                                            final List<Allele> alleleList,
+                                            final List<Integer> numObservations,
+                                            final ReadBackedPileup pileup) {
+        final int[] currentCnt = Arrays.copyOf(ACset.ACcounts.counts, alleleList.size());
+        double p1 = 0.0;
+
+        if (!hasReferenceSampleData) {
+            // no error model: use pair HMM likelihoods
+            for (int i=0; i < readHaplotypeLikelihoods.length; i++) {
+                double acc[] = new double[alleleList.size()];
+                for (int k=0; k < acc.length; k++ )
+                    acc[k] = readHaplotypeLikelihoods[i][k] + MathUtils.log10Cache[currentCnt[k]]-LOG10_PLOIDY;
+                p1 += MathUtils.log10sumLog10(acc);
+            }
+
+        } else {
+            final int minQ = errorModel.getMinSignificantQualityScore();
+            final int maxQ = errorModel.getMaxSignificantQualityScore();
+            final double[] acVec = new double[maxQ - minQ + 1];
+
+
+            for (int k=minQ; k<=maxQ; k++) {
+                int idx=0;
+                for (int n : numObservations)
+                    acVec[k-minQ] += n*logMismatchProbabilityArray[currentCnt[idx++]][k];
+            }
+            p1 = MathUtils.logDotProduct(errorModel.getErrorModelVector().getProbabilityVector(minQ, maxQ), acVec);
+        }
+        ACset.log10Likelihoods[0] = p1;
+   }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java
@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2010.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.variantcontext.*;
+
+import java.util.*;
+
+public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
+    private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;
+
+    private PairHMMIndelErrorModel pairModel;
+ /*
+    private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
+            new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
+                protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() {
+                    return new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
+                }
+            };
+   */
+
+    private LinkedHashMap<Allele, Haplotype> haplotypeMap;
+
+     /*
+    static {
+        indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
+    }
+       */
+
+    protected GeneralPloidyIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
+        super(UAC, logger);
+
+
+        pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
+                UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
+        haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
+    }
+
+
+    protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
+                                                                               final double[] logLikelihoods,
+                                                                               final int ploidy,
+                                                                               final HashMap<String, ErrorModel> perLaneErrorModels,
+                                                                               final boolean useBQAedPileup,
+                                                                               final ReferenceContext ref,
+                                                                               final boolean ignoreLaneInformation){
+        return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
+    }
+
+    protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
+                                                  final ReferenceContext ref,
+                                                  final Map<String, AlignmentContext> contexts,
+                                                  final AlignmentContextUtils.ReadOrientation contextType,
+                                                  final GenomeLocParser locParser,
+                                                  final List<Allele> allAllelesToUse){
+
+
+        List<Allele> alleles = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
+
+        if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE)
+            alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE);
+        if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
+            IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear();
+            haplotypeMap.clear();
+        }
+        IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(alleles, ref, ref.getLocus(), haplotypeMap);
+
+        // sanity check: if haplotype map couldn't be created, clear allele list
+        if (haplotypeMap.isEmpty())
+            alleles.clear();
+        return alleles;
+
+    }
+
+    protected List<Allele> getFinalAllelesToUse(final RefMetaDataTracker tracker,
+                                                         final ReferenceContext ref,
+                                                         final List<Allele> allAllelesToUse,
+                                                         final ArrayList<PoolGenotypeData> GLs) {
+
+        // find the alternate allele(s) that we should be using
+        final List<Allele> alleles = new ArrayList<Allele>();
+        if ( allAllelesToUse != null )
+            alleles.addAll(allAllelesToUse);
+        else if (!GLs.isEmpty())
+            alleles.addAll(GLs.get(0).alleles);
+        return alleles;
+
+    }
+
+    protected int getEndLocation(final RefMetaDataTracker tracker,
+                                          final ReferenceContext ref,
+                                          final List<Allele> allelesToUse) {
+        return ref.getLocus().getStart() + allelesToUse.get(0).length() - 1;
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java
@ -0,0 +1,356 @@
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+
+import net.sf.samtools.SAMUtils;
+import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.baq.BAQ;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
+import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+
+import java.util.*;
+
+import static java.lang.Math.log10;
+import static java.lang.Math.pow;
+
+
+/**
+ * Stable, error checking version of the pool genotyper.  Useful for calculating the likelihoods, priors,
+ * and posteriors given a pile of bases and quality scores
+ *
+*/
+public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods/* implements Cloneable*/ {
+
+    final List<Allele> myAlleles;
+    final int[] alleleIndices;
+    final boolean useBAQedPileup;
+    final byte refByte;
+    int mbq;
+    //final double[] PofDGivenBase;
+
+    protected static final double[][][] qualLikelihoodCache;
+    /**
+     * Create a new GenotypeLikelhoods object with given priors and PCR error rate for each pool genotype
+     * @param alleles           Alleles associated with this likelihood object
+     * @param logLikelihoods     Likelihoods (can be null if no likelihoods known)
+     * @param ploidy            Ploidy of sample (# of chromosomes)
+     * @param perLaneErrorModels error model objects for each lane
+     * @param useBQAedPileup    Use BAQed pileup
+     * @param ignoreLaneInformation  If true, lane info is ignored
+     */
+    public GeneralPloidySNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
+                                               final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup, final boolean ignoreLaneInformation) {
+        super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
+        this.useBAQedPileup = useBQAedPileup;
+
+        myAlleles = new ArrayList<Allele>(alleles);
+
+        Allele refAllele = alleles.get(0);
+        //sanity check: by construction, first allele should ALWAYS be the reference alleles
+        if (!refAllele.isReference())
+            throw new ReviewedStingException("BUG: First allele in list passed to GeneralPloidySNPGenotypeLikelihoods should be reference!");
+
+        refByte = refAllele.getBases()[0];  // by construction, first allele in list is always ref!
+
+        if (myAlleles.size() < BaseUtils.BASES.length) {
+            // likelihood only defined for subset of possible alleles. Fill then with other alleles to have all possible ones,
+            for (byte b : BaseUtils.BASES) {
+                // if base is not included in myAlleles, add new allele
+                boolean isRef = (b==refByte);
+                if (!myAlleles.contains(Allele.create(b,isRef)))
+                    myAlleles.add(Allele.create(b,isRef));
+
+            }
+
+        }
+
+
+        // compute permutation vector to figure out mapping from indices to bases
+        int idx = 0;
+        alleleIndices = new int[myAlleles.size()];
+        for (byte b : BaseUtils.BASES) {
+            boolean isRef = (b==refByte);
+            alleleIndices[idx++] = myAlleles.indexOf(Allele.create(b,isRef));
+        }
+
+    }
+
+    // -------------------------------------------------------------------------------------
+    //
+    // add() routines.  These are the workhorse routines for calculating the overall genotype
+    // likelihoods given observed bases and reads.  Includes high-level operators all the
+    // way down to single base and qual functions.
+    //
+    // -------------------------------------------------------------------------------------
+
+    public int add(ReadBackedPileup pileup, UnifiedArgumentCollection UAC) {
+        mbq = UAC.MIN_BASE_QUALTY_SCORE; // record for later use
+        return add(pileup, true, true, mbq);
+    }
+
+    /**
+     * Updates likelihoods and posteriors to reflect the additional observations contained within the
+     * read-based pileup up by calling add(observedBase, qualityScore) for each base / qual in the
+     * pileup
+     *
+     * @param pileup                    read pileup
+     * @param ignoreBadBases            should we ignore bad bases?
+     * @param capBaseQualsAtMappingQual should we cap a base's quality by its read's mapping quality?
+     * @param minBaseQual               the minimum base quality at which to consider a base valid
+     * @return the number of good bases found in the pileup
+     */
+    public int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
+        int n = 0;
+
+        if ( useBAQedPileup )
+            pileup = createBAQedPileup( pileup );
+
+        if (!hasReferenceSampleData) {
+            return add(pileup, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual, null);
+        }
+
+        for (String laneID : perLaneErrorModels.keySet() ) {
+            // get pileup for this lane
+            ReadBackedPileup perLanePileup;
+            if (ignoreLaneInformation)
+                perLanePileup = pileup;
+            else
+                perLanePileup = pileup.getPileupForLane(laneID);
+
+            if (perLanePileup == null || perLanePileup.isEmpty())
+                continue;
+
+            ErrorModel errorModel = perLaneErrorModels.get(laneID);
+            n += add(perLanePileup, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual, errorModel);
+            if (ignoreLaneInformation)
+                break;
+
+        }
+
+        return n;
+    }
+
+    /**
+     * Calculates the pool's probability for all possible allele counts for all bases. Calculation is based on the error model
+     * generated by the reference sample on the same lane. The probability is given by :
+     *
+     * Pr(ac=jA,jC,jG,jT| pool, errorModel) = sum_over_all_Qs ( Pr(ac=jA,jC,jG,jT) * Pr(errorModel_q) *
+     * [jA*(1-eq)/2n + eq/3*(jc+jg+jt)/2N)^nA *   jC*(1-eq)/2n + eq/3*(ja+jg+jt)/2N)^nC *
+     * jG*(1-eq)/2n + eq/3*(jc+ja+jt)/2N)^nG * jT*(1-eq)/2n + eq/3*(jc+jg+ja)/2N)^nT
+     *
+     *  log Pr(ac=jA,jC,jG,jT| pool, errorModel) = logsum( Pr(ac=jA,jC,jG,jT) * Pr(errorModel_q) *
+     * [jA*(1-eq)/2n + eq/3*(jc+jg+jt)/2N)^nA *   jC*(1-eq)/2n + eq/3*(ja+jg+jt)/2N)^nC *
+     * jG*(1-eq)/2n + eq/3*(jc+ja+jt)/2N)^nG * jT*(1-eq)/2n + eq/3*(jc+jg+ja)/2N)^nT)
+     * = logsum(logPr(ac=jA,jC,jG,jT) + log(Pr(error_Model(q)
+     * )) + nA*log(jA/2N(1-eq)+eq/3*(2N-jA)/2N) + nC*log(jC/2N(1-eq)+eq/3*(2N-jC)/2N)
+     * + log(jG/2N(1-eq)+eq/3*(2N-jG)/2N) + log(jT/2N(1-eq)+eq/3*(2N-jT)/2N)
+     *
+     * Let Q(j,k) = log(j/2N*(1-e[k]) + (2N-j)/2N*e[k]/3)
+     *
+     * Then logPr(ac=jA,jC,jG,jT|D,errorModel) = logPR(ac=Ja,jC,jG,jT) + logsum_k( logPr (errorModel[k],
+     * nA*Q(jA,k) +  nC*Q(jC,k) + nG*Q(jG,k) + nT*Q(jT,k))
+     *
+     * If pileup data comes from several error models (because lanes can have different error models),
+     * Pr(Ac=j|D,E1,E2) = sum(Pr(AC1=j1|D,E1,E2) * Pr(AC2=j-j2|D,E1,E2))
+     * = sum(Pr(AC1=j1|D,E1)*Pr(AC2=j-j1|D,E2)) from j=0..2N
+     *
+     * So, for each lane, build error model and combine lanes.
+     * To store model, can do
+     * for jA=0:2N
+     *  for jC = 0:2N-jA
+     *   for jG = 0:2N-jA-jC
+     *    for jT = 0:2N-jA-jC-jG
+     *      Q(jA,jC,jG,jT)
+     *      for k = minSiteQual:maxSiteQual
+     *        likelihood(jA,jC,jG,jT) = logsum(logPr (errorModel[k],nA*Q(jA,k) +  nC*Q(jC,k) + nG*Q(jG,k) + nT*Q(jT,k))
+     *
+     *
+     *
+     * where: nA,nC,nG,nT = counts of bases observed in pileup.
+     *
+     *
+     * @param pileup                            Base pileup
+     * @param ignoreBadBases                    Whether to ignore bad bases
+     * @param capBaseQualsAtMappingQual         Cap base at mapping qual
+     * @param minBaseQual                       Minimum base quality to consider
+     * @param errorModel                        Site error model
+     * @return                                  Number of bases added
+     */
+    private int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual, ErrorModel errorModel) {
+         // Number of [A C G T]'s in pileup, in that order
+        List<Integer> numSeenBases = new ArrayList<Integer>(BaseUtils.BASES.length);
+        for (byte b: BaseUtils.BASES)
+            numSeenBases.add(0);
+
+        if (hasReferenceSampleData) {
+            // count number of elements in pileup
+            for (PileupElement elt : pileup) {
+                byte obsBase = elt.getBase();
+                byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
+                if ( qual == 0 )
+                    continue;
+    
+                int idx = 0;
+    
+                for (byte base:BaseUtils.BASES) {
+                    int cnt = numSeenBases.get(idx); 
+                    numSeenBases.set(idx++,cnt + (base == obsBase?1:0));
+    
+                }
+    
+            }
+            if (VERBOSE)
+                System.out.format("numSeenBases: %d %d %d %d\n",numSeenBases.get(0),numSeenBases.get(1),numSeenBases.get(2),numSeenBases.get(3));
+        }
+        computeLikelihoods(errorModel, myAlleles, numSeenBases, pileup);
+        return pileup.getNumberOfElements();
+    }
+
+    /**
+     * Compute likelihood of current conformation
+     *
+     * @param ACset       Count to compute
+     * @param errorModel    Site-specific error model object
+     * @param alleleList    List of alleles
+     * @param numObservations Number of observations for each allele in alleleList
+      */
+    public void getLikelihoodOfConformation(final AlleleFrequencyCalculationModel.ExactACset ACset,
+                                            final ErrorModel errorModel,
+                                            final List<Allele> alleleList,
+                                            final List<Integer> numObservations,
+                                            final ReadBackedPileup pileup) {
+        final int[] currentCnt = Arrays.copyOf(ACset.ACcounts.counts, BaseUtils.BASES.length);
+        final int[] ac = new int[BaseUtils.BASES.length];
+        
+        for (int k=0; k < BaseUtils.BASES.length; k++ )
+            ac[k] = currentCnt[alleleIndices[k]];
+
+        double p1 = 0.0;
+        
+        if (!hasReferenceSampleData) {
+            // no error model: loop throught pileup to compute likalihoods just on base qualities
+            for (final PileupElement elt : pileup) {
+                final byte obsBase = elt.getBase();
+                final byte qual = qualToUse(elt, true, true, mbq);
+                if ( qual == 0 )
+                    continue;
+                final double acc[] = new double[ACset.ACcounts.counts.length];
+                for (int k=0; k < acc.length; k++ )
+                    acc[k] = qualLikelihoodCache[BaseUtils.simpleBaseToBaseIndex(alleleList.get(k).getBases()[0])][BaseUtils.simpleBaseToBaseIndex(obsBase)][qual] +MathUtils.log10Cache[ACset.ACcounts.counts[k]]
+                            - LOG10_PLOIDY;
+                p1 += MathUtils.log10sumLog10(acc);
+            }
+        }
+        else {
+            final int minQ = errorModel.getMinSignificantQualityScore();
+            final int maxQ = errorModel.getMaxSignificantQualityScore();
+            final double[] acVec = new double[maxQ - minQ + 1];
+    
+            final int nA = numObservations.get(0);
+            final int nC = numObservations.get(1);
+            final int nG = numObservations.get(2);
+            final int nT = numObservations.get(3);
+    
+
+            for (int k=minQ; k<=maxQ; k++)
+                acVec[k-minQ] = nA*logMismatchProbabilityArray[ac[0]][k] +
+                        nC*logMismatchProbabilityArray[ac[1]][k] +
+                        nG*logMismatchProbabilityArray[ac[2]][k] +
+                        nT*logMismatchProbabilityArray[ac[3]][k];
+    
+            p1 = MathUtils.logDotProduct(errorModel.getErrorModelVector().getProbabilityVector(minQ,maxQ), acVec);
+        }
+        ACset.log10Likelihoods[0] = p1;
+        /*        System.out.println(Arrays.toString(ACset.ACcounts.getCounts())+" "+String.valueOf(p1));
+        System.out.println(Arrays.toString(errorModel.getErrorModelVector().getProbabilityVector(minQ,maxQ)));
+      */
+    }
+
+    public ReadBackedPileup createBAQedPileup( final ReadBackedPileup pileup ) {
+        final List<PileupElement> BAQedElements = new ArrayList<PileupElement>();
+        for( final PileupElement PE : pileup ) {
+            final PileupElement newPE = new BAQedPileupElement( PE );
+            BAQedElements.add( newPE );
+        }
+        return new ReadBackedPileupImpl( pileup.getLocation(), BAQedElements );
+    }
+
+    public class BAQedPileupElement extends PileupElement {
+        public BAQedPileupElement( final PileupElement PE ) {
+            super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeDeletedBase(), PE.isAfterDeletedBase(), PE.isBeforeInsertion(), PE.isAfterInsertion(), PE.isNextToSoftClip());
+        }
+
+        @Override
+        public byte getQual( final int offset ) { return BAQ.calcBAQFromTag(getRead(), offset, true); }
+    }
+
+
+    /**
+     * Helper function that returns the phred-scaled base quality score we should use for calculating
+     * likelihoods for a pileup element.  May return 0 to indicate that the observation is bad, and may
+     * cap the quality score by the mapping quality of the read itself.
+     *
+     * @param p                            Pileup element
+     * @param ignoreBadBases               Flag to ignore bad bases
+     * @param capBaseQualsAtMappingQual    Whether to cap base Q at mapping quality
+     * @param minBaseQual                  Min qual to use
+     * @return                             New phred-scaled base quality
+     */
+    private static byte qualToUse(PileupElement p, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
+        if ( ignoreBadBases && !BaseUtils.isRegularBase( p.getBase() ) )
+            return 0;
+
+        byte qual = p.getQual();
+
+        if ( qual > SAMUtils.MAX_PHRED_SCORE )
+            throw new UserException.MalformedBAM(p.getRead(), String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s.  Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, qual, p.getRead().getReadName()));
+        if ( capBaseQualsAtMappingQual )
+            qual = (byte)Math.min((int)qual, p.getMappingQual());
+        if ( (int)qual < minBaseQual )
+            qual = (byte)0;
+
+        return qual;
+    }
+
+    static {
+        qualLikelihoodCache = new double[BaseUtils.BASES.length][BaseUtils.BASES.length][1+SAMUtils.MAX_PHRED_SCORE];
+        for (byte j=0; j <= SAMUtils.MAX_PHRED_SCORE; j++) {
+            for (byte b1:BaseUtils.BASES) {
+                for (byte b2:BaseUtils.BASES) {
+                    qualLikelihoodCache[BaseUtils.simpleBaseToBaseIndex(b1)][BaseUtils.simpleBaseToBaseIndex(b2)][j] = log10PofObservingBaseGivenChromosome(b1,b2,j);   
+                }
+            }                
+        }
+
+    }
+
+    /**
+     *
+     * @param observedBase observed base
+     * @param chromBase    target base
+     * @param qual         base quality
+     * @return log10 likelihood
+     */
+    private static double log10PofObservingBaseGivenChromosome(byte observedBase, byte chromBase, byte qual) {
+        final double log10_3 = log10(3.0);
+        double logP;
+
+        if ( observedBase == chromBase ) {
+            // the base is consistent with the chromosome -- it's 1 - e
+            //logP = oneMinusData[qual];
+            double e = pow(10, (qual / -10.0));
+            logP = log10(1.0 - e);
+        } else {
+            // the base is inconsistent with the chromosome -- it's e * P(chromBase | observedBase is an error)
+            logP = qual / -10.0 + (-log10_3);
+        }
+
+        //System.out.printf("%c %c %d => %f%n", observedBase, chromBase, qual, logP);
+        return logP;
+    }
+    
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java
@ -0,0 +1,128 @@
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+/*
+ * Copyright (c) 2010.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.variantcontext.*;
+
+import java.util.*;
+
+public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
+
+
+    protected GeneralPloidySNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
+        super(UAC, logger);
+
+    }
+
+    protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
+                                                                               final double[] logLikelihoods,
+                                                                               final int ploidy,
+                                                                               final HashMap<String, ErrorModel> perLaneErrorModels,
+                                                                               final boolean useBQAedPileup,
+                                                                               final ReferenceContext ref,
+                                                                               final boolean ignoreLaneInformation) {
+        return new GeneralPloidySNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
+    }
+
+    protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
+                                                  final ReferenceContext ref,
+                                                  Map<String, AlignmentContext> contexts,
+                                                  final AlignmentContextUtils.ReadOrientation contextType,
+                                                  final GenomeLocParser locParser,
+                                                  final List<Allele> allAllelesToUse) {
+
+        if (allAllelesToUse != null)
+            return allAllelesToUse;
+
+
+        final byte refBase = ref.getBase();
+        final List<Allele> allAlleles = new ArrayList<Allele>();
+        // first add ref allele
+        allAlleles.add(Allele.create(refBase, true));
+        // add all possible alt alleles
+        for (byte b: BaseUtils.BASES) {
+            if (refBase != b)
+                allAlleles.add(Allele.create(b));
+        }
+
+        return allAlleles;
+    } 
+    
+    protected List<Allele> getFinalAllelesToUse(final RefMetaDataTracker tracker,
+                                                final ReferenceContext ref,
+                                                final List<Allele> allAllelesToUse,
+                                                final ArrayList<PoolGenotypeData> GLs) {
+        // find the alternate allele(s) that we should be using
+        final List<Allele> alleles = new ArrayList<Allele>();
+        if ( allAllelesToUse != null ) {
+            alleles.addAll(allAllelesToUse);
+        } else if ( UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
+            final VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
+
+            // ignore places where we don't have a SNP
+            if ( vc == null || !vc.isSNP() )
+                return null;
+
+            alleles.addAll(vc.getAlleles());
+        } else {
+
+            alleles.add(Allele.create(ref.getBase(),true));
+            alleles.addAll(determineAlternateAlleles( GLs));
+
+            // if there are no non-ref alleles...
+            if ( alleles.size() == 1 ) {
+                final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
+                // if we only want variants, then we don't need to calculate genotype likelihoods
+                if ( UAC.OutputMode != UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
+                    // otherwise, choose any alternate allele (it doesn't really matter)
+                    alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(indexOfRefBase == 0 ? 1 : 0)));
+            }
+        }
+        return alleles;
+    }
+
+    /**
+     * @param tracker           dummy parameter here
+     * @param ref               Reference context
+     * @param alternateAllelesToUse alt allele list
+     * @return end location for vc to be created
+      */
+    protected int getEndLocation(final RefMetaDataTracker tracker,
+                                 final ReferenceContext ref,
+                                 final List<Allele> alternateAllelesToUse) {
+        // for SNPs, end loc is is the same as start loc
+        return ref.getLocus().getStart();
+
+    }
+
+
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypePriors.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypePriors.java
@ -0,0 +1,58 @@
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
+import org.broadinstitute.sting.utils.MathUtils;
+
+public class PoolGenotypePriors implements GenotypePriors {
+    private final double[] flatPriors;
+    private final double heterozygosity;
+    private final int samplesPerPool;
+    private double[] priors = null;
+
+    /**
+     * Create a new DiploidGenotypePriors object with flat priors for each diploid genotype
+     */
+    public PoolGenotypePriors(double heterozygosity, int samplesPerPool) {
+        flatPriors = new double[2*samplesPerPool+1];
+        for (int k=0; k <flatPriors.length; k++)
+            flatPriors[k] = Math.log10(heterozygosity);
+        priors = flatPriors.clone();
+        this.samplesPerPool = samplesPerPool;
+        
+        this.heterozygosity = heterozygosity;
+    }
+
+
+    /**
+     * Returns an array of priors for each genotype, indexed by DiploidGenotype.ordinal values().
+     *
+     * @return log10 prior as a double array
+     */
+    public double[] getPriors() {
+        return priors;
+    }
+
+    public double getHeterozygosity() { return heterozygosity; }
+    public int getNSamplesPerPool() { return samplesPerPool; }
+
+    public boolean validate(boolean throwException) {
+        try {
+
+            for (int i=0; i < priors.length; i++ ) {
+                if ( ! MathUtils.wellFormedDouble(priors[i]) || ! MathUtils.isNegativeOrZero(priors[i]) ) {
+                    String bad = String.format("Prior %f is badly formed %b", priors[i], MathUtils.isNegativeOrZero(priors[i]));
+                    throw new IllegalStateException(String.format("At %d: %s", i, bad));
+                }
+            }
+        } catch ( IllegalStateException e ) {
+            if ( throwException )
+                throw new RuntimeException(e);
+            else
+                return false;
+        }
+
+        return true;
+    }
+
+}
+
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ProbabilityVector.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ProbabilityVector.java
@ -0,0 +1,159 @@
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.Arrays;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: delangel
+ * Date: 4/11/12
+ * Time: 10:25 AM
+ * To change this template use File | Settings | File Templates.
+ */
+public class ProbabilityVector {
+    private final double[] probabilityArray;
+    private final int minVal;
+    private final int maxVal;
+
+    final static double LOG_DYNAMIC_RANGE = 10; // values X below max vector value will be removed
+
+    /**
+     * Default constructor: take vector in log-space, with support from range [0,len-1]
+     * @param vec                                  Probability (or likelihood) vector in log space
+     * @param compressRange                        If true, compress by eliminating edges with little support
+     */
+    public ProbabilityVector(double[] vec, boolean compressRange) {
+
+        int maxValIdx = MathUtils.maxElementIndex(vec);
+        double maxv = vec[maxValIdx];
+        if (maxv > 0.0)
+            throw new ReviewedStingException("BUG: Attempting to create a log-probability vector with positive elements");
+
+        if (compressRange) {
+            minVal = getMinIdx(vec, maxValIdx);
+            maxVal = getMaxIdx(vec, maxValIdx);
+            probabilityArray = Arrays.copyOfRange(vec, minVal, maxVal+1);
+
+        }   else {
+            probabilityArray = vec;
+            minVal = 0;
+            maxVal = vec.length-1;
+
+        }
+    }
+
+    public ProbabilityVector(double[] vec) {
+        this(vec,true);
+    }
+
+    public ProbabilityVector(ProbabilityVector other, boolean compressRange) {
+        // create new probability vector from other.
+        this(other.getUncompressedProbabilityVector(), compressRange);
+        
+    }
+    public int getMinVal() { return minVal;}
+    public int getMaxVal() { return maxVal;}
+    public double[] getProbabilityVector() { return probabilityArray;}
+    
+    public double[] getProbabilityVector(int minVal, int maxVal) {
+        // get vector in specified range. If range is outside of current vector, fill with negative infinities
+        double[] x = new double[maxVal - minVal + 1];
+
+        for (int k=minVal; k <= maxVal; k++)
+            x[k-minVal] = getLogProbabilityForIndex(k);
+
+
+        return x;
+    }
+
+    public double[] getUncompressedProbabilityVector() {
+        double x[] = new double[maxVal+1];
+        
+        for (int i=0; i < minVal; i++)
+            x[i] = Double.NEGATIVE_INFINITY;
+        for (int i=minVal; i <=maxVal; i++)
+            x[i] = probabilityArray[i-minVal];
+
+        return x;
+    }
+    /**
+     * Return log Probability for original index i
+     * @param idx   Index to probe
+     * @return      log10(Pr X = i) )
+     */
+    public double getLogProbabilityForIndex(int idx) {
+    if (idx < minVal || idx > maxVal)
+        return Double.NEGATIVE_INFINITY;
+    else
+        return probabilityArray[idx-minVal];
+    }
+
+    //public ProbabilityVector
+    public static ProbabilityVector compressVector(double[] vec ) {
+        return new ProbabilityVector(vec, true);
+    }
+
+    /**
+     * Determine left-most index where a vector exceeds (max Value - DELTA)
+     * @param vec                    Input vector
+     * @param maxValIdx              Index to stop - usually index with max value in vector
+     * @return                       Min index where vector > vec[maxValIdx]-LOG_DYNAMIC_RANGE
+     */
+    private static int getMinIdx(double[] vec, int maxValIdx) {
+        int edgeIdx;
+        for (edgeIdx=0; edgeIdx<=maxValIdx; edgeIdx++ ) {
+            if (vec[edgeIdx] > vec[maxValIdx]-LOG_DYNAMIC_RANGE)
+                break;
+        }
+
+        return edgeIdx;
+
+
+    }
+
+    /**
+     * Determine right-most index where a vector exceeds (max Value - DELTA)
+     * @param vec                    Input vector
+     * @param maxValIdx              Index to stop - usually index with max value in vector
+     * @return                       Max index where vector > vec[maxValIdx]-LOG_DYNAMIC_RANGE
+     */
+    private static int getMaxIdx(double[] vec, int maxValIdx) {
+        int edgeIdx;
+        for (edgeIdx=vec.length-1; edgeIdx>=maxValIdx; edgeIdx-- ) {
+            if (vec[edgeIdx] > vec[maxValIdx]-LOG_DYNAMIC_RANGE)
+                break;
+        }
+
+        return edgeIdx;
+
+
+    }
+
+    /**
+     *
+     * @param other
+     * @return
+     */
+    public double logDotProduct(ProbabilityVector other) {
+        // find overlap in range
+        int minRange = Math.max(this.minVal, other.getMinVal());
+        int maxRange = Math.min(this.maxVal, other.getMaxVal());
+        if (minRange > maxRange)
+            return Double.NEGATIVE_INFINITY;
+
+        // x = 0,1,2,   y = 2,3,4. minRange = 2, maxRange = 2
+        double[] result = new double[maxRange - minRange+1];
+        for (int k=0; k <= maxRange-minRange; k++) {
+            int startI = minRange - this.minVal;
+            int startJ = minRange - other.getMinVal();
+            result[k] = this.probabilityArray[k+startI] + other.probabilityArray[k+startJ];
+            
+
+
+        }
+        return MathUtils.approximateLog10SumLog10(result);
+    }
+
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnEdge.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnEdge.java
@ -0,0 +1,60 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import org.jgrapht.graph.DefaultDirectedGraph;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: ebanks
+ * Date: Mar 23, 2011
+ */
+
+// simple edge class for connecting nodes in the graph
+public class DeBruijnEdge implements Comparable<DeBruijnEdge> {
+
+    private int multiplicity;
+    private boolean isRef;
+
+    public DeBruijnEdge() {
+        multiplicity = 1;
+        isRef = false;
+    }
+
+    public DeBruijnEdge( final boolean isRef ) {
+        multiplicity = 1;
+        this.isRef = isRef;
+    }
+
+    public DeBruijnEdge( final boolean isRef, final int multiplicity ) {
+        this.multiplicity = multiplicity;
+        this.isRef = isRef;
+    }
+
+    public int getMultiplicity() {
+        return multiplicity;
+    }
+
+    public void setMultiplicity( final int value ) {
+        multiplicity = value;
+    }
+
+    public boolean getIsRef() {
+        return isRef;
+    }
+
+    public void setIsRef( final boolean isRef ) {
+        this.isRef = isRef;
+    }
+
+    public boolean equals( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge ) {
+        return (graph.getEdgeSource(this).equals(graph.getEdgeSource(edge))) && (graph.getEdgeTarget(this).equals(graph.getEdgeTarget(edge)));
+    }
+
+    public boolean equals( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph2 ) {
+        return (graph.getEdgeSource(this).equals(graph2.getEdgeSource(edge))) && (graph.getEdgeTarget(this).equals(graph2.getEdgeTarget(edge)));
+    }
+
+    @Override
+    public int compareTo( final DeBruijnEdge that ) {
+        return this.multiplicity - that.multiplicity;
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnVertex.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnVertex.java
@ -0,0 +1,46 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import java.util.Arrays;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: ebanks
+ * Date: Mar 23, 2011
+ */
+// simple node class for storing kmer sequences
+public class DeBruijnVertex {
+
+    protected final byte[] sequence;
+    public final int kmer;
+
+    public DeBruijnVertex( final byte[] sequence, final int kmer ) {
+        this.sequence = sequence;
+        this.kmer = kmer;
+    }
+
+    @Override
+    public boolean equals( Object v ) {
+        return v instanceof DeBruijnVertex && Arrays.equals(sequence, ((DeBruijnVertex) v).sequence);
+    }
+
+    @Override
+    public int hashCode() { // necessary to override here so that graph.containsVertex() works the same way as vertex.equals() as one might expect
+        return Arrays.hashCode(sequence);
+    }
+
+    public String toString() {
+        return new String(sequence);
+    }   
+    
+    public String getSuffixString() {
+        return new String( getSuffix() );
+    }
+
+    public byte[] getSequence() {
+        return sequence;
+    }
+
+    public byte[] getSuffix() {
+        return Arrays.copyOfRange( sequence, kmer - 1, sequence.length );
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
@ -0,0 +1,616 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+import net.sf.samtools.Cigar;
+import net.sf.samtools.CigarElement;
+import org.apache.commons.lang.ArrayUtils;
+import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
+import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.variantcontext.*;
+
+import java.util.*;
+
+public class GenotypingEngine {
+
+    private final boolean DEBUG;
+    private final int MNP_LOOK_AHEAD;
+    private final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
+    private final static List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
+    private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("<UNASSEMBLED_EVENT>", false);
+
+    public GenotypingEngine( final boolean DEBUG, final int MNP_LOOK_AHEAD, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) {
+        this.DEBUG = DEBUG;
+        this.MNP_LOOK_AHEAD = MNP_LOOK_AHEAD;
+        this.OUTPUT_FULL_HAPLOTYPE_SEQUENCE = OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
+        noCall.add(Allele.NO_CALL);
+    }
+
+    // This function is the streamlined approach, currently not being used
+    @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
+    public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList<Haplotype> haplotypes, final byte[] ref, final GenomeLoc refLoc,
+                                                                                                                             final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser ) {
+        // Prepare the list of haplotype indices to genotype
+        final ArrayList<Allele> allelesToGenotype = new ArrayList<Allele>();
+
+        for( final Haplotype h : haplotypes ) {
+            allelesToGenotype.add( Allele.create(h.getBases(), h.isReference()) );
+        }
+        final int numHaplotypes = haplotypes.size();
+
+        // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
+        final GenotypesContext genotypes = GenotypesContext.create(haplotypes.get(0).getSampleKeySet().size());
+        for( final String sample : haplotypes.get(0).getSampleKeySet() ) { // BUGBUG: assume all haplotypes saw the same samples
+            final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2];
+            final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(haplotypes, sample);
+            int glIndex = 0;
+            for( int iii = 0; iii < numHaplotypes; iii++ ) {
+                for( int jjj = 0; jjj <= iii; jjj++ ) {
+                    genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC
+                }
+            }
+            genotypes.add(new GenotypeBuilder(sample, noCall).PL(genotypeLikelihoods).make());
+        }
+        final VariantCallContext call = UG_engine.calculateGenotypes(new VariantContextBuilder().loc(activeRegionWindow).alleles(allelesToGenotype).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel);
+        if( call == null ) { return Collections.emptyList(); } // exact model says that the call confidence is below the specified confidence threshold so nothing to do here
+
+        // Prepare the list of haplotypes that need to be run through Smith-Waterman for output to VCF
+        final ArrayList<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
+        for( final Haplotype h : haplotypes ) {
+            if( call.getAllele(h.getBases()) == null ) { // exact model removed this allele from the list so no need to run SW and output to VCF
+                haplotypesToRemove.add(h);
+            }
+        }
+        haplotypes.removeAll(haplotypesToRemove);
+
+        if( OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) {
+            final List<Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>>> returnVCs = new ArrayList<Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>>>();
+            // set up the default 1-to-1 haplotype mapping object
+            final HashMap<Allele,ArrayList<Haplotype>> haplotypeMapping = new HashMap<Allele,ArrayList<Haplotype>>();
+            for( final Haplotype h : haplotypes ) {
+                final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
+                list.add(h);
+                haplotypeMapping.put(call.getAllele(h.getBases()), list);
+            }
+            returnVCs.add( new Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>>(call,haplotypeMapping) );
+            return returnVCs;
+        }
+
+        final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();
+
+        // Using the cigar from each called haplotype figure out what events need to be written out in a VCF file
+        final TreeSet<Integer> startPosKeySet = new TreeSet<Integer>();
+        int count = 0;
+        if( DEBUG ) { System.out.println("=== Best Haplotypes ==="); }
+        for( final Haplotype h : haplotypes ) {
+            if( DEBUG ) {
+                System.out.println( h.toString() );
+                System.out.println( "> Cigar = " + h.getCigar() );
+            }
+            // Walk along the alignment and turn any difference from the reference into an event
+            h.setEventMap( generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) );
+            startPosKeySet.addAll(h.getEventMap().keySet());
+        }
+        
+        // Create the VC merge priority list
+        final ArrayList<String> priorityList = new ArrayList<String>();
+        for( int iii = 0; iii < haplotypes.size(); iii++ ) {
+            priorityList.add("HC" + iii);
+        }
+        
+        // Walk along each position in the key set and create each event to be outputted
+        for( final int loc : startPosKeySet ) {
+            if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) {
+                final ArrayList<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>();
+                for( final Haplotype h : haplotypes ) {
+                    final HashMap<Integer,VariantContext> eventMap = h.getEventMap();
+                    final VariantContext vc = eventMap.get(loc);
+                    if( vc != null && !containsVCWithMatchingAlleles(eventsAtThisLoc, vc) ) {
+                        eventsAtThisLoc.add(vc);
+                    }
+                }
+                
+                // Create the allele mapping object which maps the original haplotype alleles to the alleles present in just this event
+                final ArrayList<ArrayList<Haplotype>> alleleMapper = createAlleleMapper( loc, eventsAtThisLoc, haplotypes );
+
+                // Merge the event to find a common reference representation
+                final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
+
+                final HashMap<Allele, ArrayList<Haplotype>> alleleHashMap = new HashMap<Allele, ArrayList<Haplotype>>();
+                int aCount = 0;
+                for( final Allele a : mergedVC.getAlleles() ) {
+                    alleleHashMap.put(a, alleleMapper.get(aCount++)); // BUGBUG: needs to be cleaned up and merged with alleleMapper
+                }
+
+                if( DEBUG ) {
+                    System.out.println("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
+                    //System.out.println("Event/haplotype allele mapping = " + alleleMapper);
+                }
+
+                // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
+                final GenotypesContext myGenotypes = GenotypesContext.create(haplotypes.get(0).getSampleKeySet().size());
+                for( final String sample : haplotypes.get(0).getSampleKeySet() ) { // BUGBUG: assume all haplotypes saw the same samples
+                    final int myNumHaplotypes = alleleMapper.size();
+                    final double[] genotypeLikelihoods = new double[myNumHaplotypes * (myNumHaplotypes+1) / 2];
+                    final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleMapper);
+                    int glIndex = 0;
+                    for( int iii = 0; iii < myNumHaplotypes; iii++ ) {
+                        for( int jjj = 0; jjj <= iii; jjj++ ) {
+                            genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC
+                        }
+                    }
+
+                    // using the allele mapping object translate the haplotype allele into the event allele
+                    final Genotype g = new GenotypeBuilder(sample)
+                            .alleles(findEventAllelesInSample(mergedVC.getAlleles(), call.getAlleles(), call.getGenotype(sample).getAlleles(), alleleMapper, haplotypes))
+                            .phased(loc != startPosKeySet.first())
+                            .PL(genotypeLikelihoods).make();
+                    myGenotypes.add(g);
+                }
+                returnCalls.add( new Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>(
+                                 new VariantContextBuilder(mergedVC).log10PError(call.getLog10PError()).genotypes(myGenotypes).make(), alleleHashMap) );
+            }
+        }
+        return returnCalls;
+    }
+
+    @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
+    public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
+                                                                                                                               final ArrayList<Haplotype> haplotypes,
+                                                                                                                               final byte[] ref,
+                                                                                                                               final GenomeLoc refLoc,
+                                                                                                                               final GenomeLoc activeRegionWindow,
+                                                                                                                               final GenomeLocParser genomeLocParser,
+                                                                                                                               final ArrayList<VariantContext> activeAllelesToGenotype ) {
+
+        final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();
+
+        // Using the cigar from each called haplotype figure out what events need to be written out in a VCF file
+        final TreeSet<Integer> startPosKeySet = new TreeSet<Integer>();
+        int count = 0;
+        if( DEBUG ) { System.out.println("=== Best Haplotypes ==="); }
+        for( final Haplotype h : haplotypes ) {
+            // Walk along the alignment and turn any difference from the reference into an event
+            h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) );
+            if( activeAllelesToGenotype.isEmpty() ) { startPosKeySet.addAll(h.getEventMap().keySet()); }
+            if( DEBUG ) {
+                System.out.println( h.toString() );
+                System.out.println( "> Cigar = " + h.getCigar() );
+                System.out.println( "> Left and right breaks = (" + h.leftBreakPoint + " , " + h.rightBreakPoint + ")");
+                System.out.println( ">> Events = " + h.getEventMap());
+            }
+        }
+        // Create the VC merge priority list
+        final ArrayList<String> priorityList = new ArrayList<String>();
+        for( int iii = 0; iii < haplotypes.size(); iii++ ) {
+            priorityList.add("HC" + iii);
+        }
+
+        cleanUpSymbolicUnassembledEvents( haplotypes, priorityList );
+        if( activeAllelesToGenotype.isEmpty() && haplotypes.get(0).getSampleKeySet().size() >= 3 ) { // if not in GGA mode and have at least 3 samples try to create MNP and complex events by looking at LD structure
+            mergeConsecutiveEventsBasedOnLD( haplotypes, startPosKeySet, ref, refLoc );
+        }
+        if( !activeAllelesToGenotype.isEmpty() ) { // we are in GGA mode!
+            for( final VariantContext compVC : activeAllelesToGenotype ) {
+                startPosKeySet.add( compVC.getStart() );
+            }
+        }
+
+
+        // Walk along each position in the key set and create each event to be outputted
+        for( final int loc : startPosKeySet ) {
+            if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) {
+                final ArrayList<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>();
+                if( activeAllelesToGenotype.isEmpty() ) {
+                    for( final Haplotype h : haplotypes ) {
+                        final HashMap<Integer,VariantContext> eventMap = h.getEventMap();
+                        final VariantContext vc = eventMap.get(loc);
+                        if( vc != null && !containsVCWithMatchingAlleles(eventsAtThisLoc, vc) ) {
+                            eventsAtThisLoc.add(vc);
+                        }
+                    }
+                } else { // we are in GGA mode!
+                    for( final VariantContext compVC : activeAllelesToGenotype ) {
+                        if( compVC.getStart() == loc ) {
+                            priorityList.clear();
+                            int alleleCount = 0;
+                            for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
+                                HashSet<Allele> alleleSet = new HashSet<Allele>(2);
+                                alleleSet.add(compVC.getReference());
+                                alleleSet.add(compAltAllele);
+                                priorityList.add("Allele" + alleleCount);
+                                eventsAtThisLoc.add(new VariantContextBuilder(compVC).alleles(alleleSet).source("Allele"+alleleCount).make());
+                                alleleCount++;
+                            }
+                        }
+                    }
+                }
+
+                if( eventsAtThisLoc.isEmpty() ) { continue; }
+
+                // Create the allele mapping object which maps the original haplotype alleles to the alleles present in just this event
+                final ArrayList<ArrayList<Haplotype>> alleleMapper = createAlleleMapper( loc, eventsAtThisLoc, haplotypes );
+
+                // Merge the event to find a common reference representation
+                final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
+                if( mergedVC == null ) { continue; }
+
+                final HashMap<Allele, ArrayList<Haplotype>> alleleHashMap = new HashMap<Allele, ArrayList<Haplotype>>();
+                int aCount = 0;
+                for( final Allele a : mergedVC.getAlleles() ) {
+                    alleleHashMap.put(a, alleleMapper.get(aCount++)); // BUGBUG: needs to be cleaned up and merged with alleleMapper
+                }
+
+                if( DEBUG ) {
+                    System.out.println("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
+                    //System.out.println("Event/haplotype allele mapping = " + alleleMapper);
+                }
+
+                // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
+                final GenotypesContext genotypes = GenotypesContext.create(haplotypes.get(0).getSampleKeySet().size());
+                for( final String sample : haplotypes.get(0).getSampleKeySet() ) { // BUGBUG: assume all haplotypes saw the same samples
+                    final int numHaplotypes = alleleMapper.size();
+                    final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2];
+                    final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleMapper);
+                    int glIndex = 0;
+                    for( int iii = 0; iii < numHaplotypes; iii++ ) {
+                        for( int jjj = 0; jjj <= iii; jjj++ ) {
+                            genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC
+                        }
+                    }
+                    genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() );
+                }
+                final VariantCallContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel);
+
+                if( call != null ) {
+                    returnCalls.add( new Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>(call, alleleHashMap) );
+                }
+            }
+        }
+        return returnCalls;
+    }
+
+    protected static void cleanUpSymbolicUnassembledEvents( final ArrayList<Haplotype> haplotypes, final ArrayList<String> priorityList ) {
+        final ArrayList<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
+        final ArrayList<String> stringsToRemove = new ArrayList<String>();
+        for( final Haplotype h : haplotypes ) {
+            for( final VariantContext vc : h.getEventMap().values() ) {
+                if( vc.isSymbolic() ) {
+                    for( final Haplotype h2 : haplotypes ) {
+                        for( final VariantContext vc2 : h2.getEventMap().values() ) {
+                            if( vc.getStart() == vc2.getStart() && vc2.isIndel() ) {
+                                haplotypesToRemove.add(h);
+                                stringsToRemove.add(vc.getSource());
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        haplotypes.removeAll(haplotypesToRemove);
+        priorityList.removeAll(stringsToRemove);
+    }
+
+    protected void mergeConsecutiveEventsBasedOnLD( final ArrayList<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {
+        final int MAX_SIZE_TO_COMBINE = 15;
+        final double MERGE_EVENTS_R2_THRESHOLD = 0.95;
+        if( startPosKeySet.size() <= 1 ) { return; }
+
+        boolean mapWasUpdated = true;
+        while( mapWasUpdated ) {
+            mapWasUpdated = false;
+
+            // loop over the set of start locations and consider pairs that start near each other
+            final Iterator<Integer> iter = startPosKeySet.iterator();
+            int thisStart = iter.next();
+            while( iter.hasNext() ) {
+                final int nextStart = iter.next();
+                if( nextStart - thisStart < MAX_SIZE_TO_COMBINE) {
+                    boolean isBiallelic = true;
+                    VariantContext thisVC = null;
+                    VariantContext nextVC = null;
+                    double x11 = Double.NEGATIVE_INFINITY;
+                    double x12 = Double.NEGATIVE_INFINITY;
+                    double x21 = Double.NEGATIVE_INFINITY;
+                    double x22 = Double.NEGATIVE_INFINITY;
+
+                    for( final Haplotype h : haplotypes ) {
+                        // only make complex substitutions out of consecutive biallelic sites
+                        final VariantContext thisHapVC = h.getEventMap().get(thisStart);
+                        if( thisHapVC != null && !thisHapVC.isSymbolic() ) { // something was found at this location on this haplotype
+                            if( thisVC == null ) {
+                                thisVC = thisHapVC;
+                            } else if( !thisHapVC.hasSameAllelesAs( thisVC ) ) {
+                                isBiallelic = false;
+                                break;
+                            }
+                        }
+                        final VariantContext nextHapVC = h.getEventMap().get(nextStart);
+                        if( nextHapVC != null && !nextHapVC.isSymbolic() ) { // something was found at the next location on this haplotype
+                            if( nextVC == null ) {
+                                nextVC = nextHapVC;
+                            } else if( !nextHapVC.hasSameAllelesAs( nextVC ) ) {
+                                isBiallelic = false;
+                                break;
+                            }
+                        }
+                        // count up the co-occurrences of the events for the R^2 calculation
+                        final ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
+                        haplotypeList.add(h);
+                        for( final String sample : haplotypes.get(0).getSampleKeySet() ) {
+                            final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( haplotypeList, sample )[0][0];
+                            if( thisHapVC == null ) {
+                                if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
+                                else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }
+                            } else {
+                                if( nextHapVC == null ) { x21 = MathUtils.approximateLog10SumLog10(x21, haplotypeLikelihood); }
+                                else { x22 = MathUtils.approximateLog10SumLog10(x22, haplotypeLikelihood); }
+                            }
+                        }
+                    }
+                    if( thisVC == null || nextVC == null ) {
+                        continue;
+                    }
+                    if( isBiallelic ) {
+                        final double R2 = calculateR2LD( Math.pow(10.0, x11), Math.pow(10.0, x12), Math.pow(10.0, x21), Math.pow(10.0, x22) );
+                        if( DEBUG ) {
+                            System.out.println("Found consecutive biallelic events with R^2 = " + String.format("%.4f", R2));
+                            System.out.println("-- " + thisVC);
+                            System.out.println("-- " + nextVC);
+                        }
+                        if( R2 > MERGE_EVENTS_R2_THRESHOLD ) {
+
+                            final VariantContext mergedVC = createMergedVariantContext(thisVC, nextVC, ref, refLoc);
+
+                            // remove the old event from the eventMap on every haplotype and the start pos key set, replace with merged event
+                            for( final Haplotype h : haplotypes ) {
+                                final HashMap<Integer, VariantContext> eventMap = h.getEventMap();
+                                if( eventMap.containsKey(thisStart) && eventMap.containsKey(nextStart) ) {
+                                    eventMap.remove(thisStart);
+                                    eventMap.remove(nextStart);
+                                    eventMap.put(mergedVC.getStart(), mergedVC);
+                                }
+                            }
+                            startPosKeySet.add(mergedVC.getStart());
+                            boolean containsStart = false;
+                            boolean containsNext = false;
+                            for( final Haplotype h : haplotypes ) {
+                                final HashMap<Integer, VariantContext> eventMap = h.getEventMap();
+                                if( eventMap.containsKey(thisStart) ) { containsStart = true; }
+                                if( eventMap.containsKey(nextStart) ) { containsNext = true; }
+                            }
+                            if(!containsStart) { startPosKeySet.remove(thisStart); }
+                            if(!containsNext) { startPosKeySet.remove(nextStart); }
+
+                            if( DEBUG ) { System.out.println("====> " + mergedVC); }
+                            mapWasUpdated = true;
+                            break; // break out of tree set iteration since it was just updated, start over from the beginning and keep merging events
+                        }
+                    }
+                }
+                thisStart = nextStart;
+            }
+        }
+    }
+
+    // BUGBUG: make this merge function more general
+    protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) {
+        final int thisStart = thisVC.getStart();
+        final int nextStart = nextVC.getStart();
+        byte[] refBases = new byte[]{};
+        byte[] altBases = new byte[]{};
+        refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases());
+        altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases());
+        int locus;
+        for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
+            final byte refByte = ref[locus - refLoc.getStart()];
+            refBases = ArrayUtils.add(refBases, refByte);
+            altBases = ArrayUtils.add(altBases, refByte);
+        }
+        refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel
+        altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases());
+
+        int iii = 0;
+        if( refBases.length == altBases.length ) { // insertion + deletion of same length creates an MNP --> trim common prefix bases off the beginning of the allele
+            while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; }
+        }
+        final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>();
+        mergedAlleles.add( Allele.create( ArrayUtils.subarray(refBases, iii, refBases.length), true ) );
+        mergedAlleles.add( Allele.create( ArrayUtils.subarray(altBases, iii, altBases.length), false ) );
+        return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), mergedAlleles).make();
+    }
+
+    protected static double calculateR2LD( final double x11, final double x12, final double x21, final double x22 ) {
+        final double total = x11 + x12 + x21 + x22;
+        final double pa1b1 = x11 / total;
+        final double pa1b2 = x12 / total;
+        final double pa2b1 = x21 / total;
+        final double pa1 = pa1b1 + pa1b2;
+        final double pb1 = pa1b1 + pa2b1;
+        return ((pa1b1 - pa1*pb1) * (pa1b1 - pa1*pb1)) / ( pa1 * (1.0 - pa1) * pb1 * (1.0 - pb1) );
+    }
+
+    @Requires({"haplotypes.size() >= eventsAtThisLoc.size() + 1"})
+    @Ensures({"result.size() == eventsAtThisLoc.size() + 1"})
+    protected static ArrayList<ArrayList<Haplotype>> createAlleleMapper( final int loc, final ArrayList<VariantContext> eventsAtThisLoc, final ArrayList<Haplotype> haplotypes ) {
+        final ArrayList<ArrayList<Haplotype>> alleleMapper = new ArrayList<ArrayList<Haplotype>>();
+        final ArrayList<Haplotype> refList = new ArrayList<Haplotype>();
+        for( final Haplotype h : haplotypes ) {
+            if( h.getEventMap().get(loc) == null ) { // no event at this location so this is a reference-supporting haplotype
+                refList.add(h);
+            } else {
+                boolean foundInEventList = false;
+                for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) {
+                    if( h.getEventMap().get(loc).hasSameAllelesAs(vcAtThisLoc) ) {
+                        foundInEventList = true;
+                    }
+                }
+                if( !foundInEventList ) { // event at this location isn't one of the genotype-able options (during GGA) so this is a reference-supporting haplotype
+                    refList.add(h);
+                }
+            }
+        }
+        alleleMapper.add(refList);
+        for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) {
+            final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
+            for( final Haplotype h : haplotypes ) {
+                if( h.getEventMap().get(loc) != null && h.getEventMap().get(loc).hasSameAllelesAs(vcAtThisLoc) ) {
+                    list.add(h);
+                }
+            }
+            alleleMapper.add(list);
+        }
+        return alleleMapper;
+    }
+
+    @Ensures({"result.size() == haplotypeAllelesForSample.size()"})
+    protected static List<Allele> findEventAllelesInSample( final List<Allele> eventAlleles, final List<Allele> haplotypeAlleles, final List<Allele> haplotypeAllelesForSample, final ArrayList<ArrayList<Haplotype>> alleleMapper, final ArrayList<Haplotype> haplotypes ) {
+        if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return noCall; }
+        final ArrayList<Allele> eventAllelesForSample = new ArrayList<Allele>();
+        for( final Allele a : haplotypeAllelesForSample ) {
+            final Haplotype haplotype = haplotypes.get(haplotypeAlleles.indexOf(a));
+            for( int iii = 0; iii < alleleMapper.size(); iii++ ) {
+                final ArrayList<Haplotype> mappedHaplotypes = alleleMapper.get(iii);
+                if( mappedHaplotypes.contains(haplotype) ) {
+                    eventAllelesForSample.add(eventAlleles.get(iii));
+                    break;
+                }
+            }
+        }
+        return eventAllelesForSample;
+    }
+
+    protected static boolean containsVCWithMatchingAlleles( final List<VariantContext> list, final VariantContext vcToTest ) {
+        for( final VariantContext vc : list ) {
+            if( vc.hasSameAllelesAs(vcToTest) ) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    protected static HashMap<Integer,VariantContext> generateVCsFromAlignment( final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) {
+        return generateVCsFromAlignment(null, alignmentStartHapwrtRef, cigar, ref, alignment, refLoc, sourceNameToAdd, MNP_LOOK_AHEAD); // BUGBUG: needed for compatibility with HaplotypeResolver code
+    }
+
+    protected static HashMap<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) {
+        final HashMap<Integer,VariantContext> vcs = new HashMap<Integer,VariantContext>();
+
+        int refPos = alignmentStartHapwrtRef;
+        if( refPos < 0 ) { return null; } // Protection against SW failures
+        int alignmentPos = 0;
+
+        for( final CigarElement ce : cigar.getCigarElements() ) {
+            final int elementLength = ce.getLength();
+            switch( ce.getOperator() ) {
+                case I:
+                    final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
+                    final int insertionStart = refLoc.getStart() + refPos - 1;
+                    insertionAlleles.add( Allele.create(ref[refPos-1], true) );
+                    if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
+                        insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
+                    } else {
+                        byte[] insertionBases = new byte[]{};
+                        insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
+                        insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
+                        insertionAlleles.add( Allele.create(insertionBases, false) );
+                    }
+                    vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
+                    alignmentPos += elementLength;
+                    break;
+                case S:
+                    alignmentPos += elementLength;
+                    break;
+                case D:
+                    final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength );  // add padding base
+                    final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>();
+                    final int deletionStart = refLoc.getStart() + refPos - 1;
+                    // BUGBUG: how often does this symbolic deletion allele case happen?
+                    //if( haplotype != null && ( (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 >= deletionStart && haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 < deletionStart + elementLength)
+                    //        || (haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 >= deletionStart && haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 < deletionStart + elementLength) ) ) {
+                    //    deletionAlleles.add( Allele.create(ref[refPos-1], true) );
+                    //    deletionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
+                    //    vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make());
+                    //} else {
+                        deletionAlleles.add( Allele.create(deletionBases, true) );
+                        deletionAlleles.add( Allele.create(ref[refPos-1], false) );
+                        vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
+                    //}
+                    refPos += elementLength;
+                    break;
+                case M:
+                    int numSinceMismatch = -1;
+                    int stopOfMismatch = -1;
+                    int startOfMismatch = -1;
+                    int refPosStartOfMismatch = -1;
+                    for( int iii = 0; iii < elementLength; iii++ ) {
+                        if( ref[refPos] != alignment[alignmentPos] && alignment[alignmentPos] != ((byte) 'N') ) {
+                            // SNP or start of possible MNP
+                            if( stopOfMismatch == -1 ) {
+                                startOfMismatch = alignmentPos;
+                                stopOfMismatch = alignmentPos;
+                                numSinceMismatch = 0;
+                                refPosStartOfMismatch = refPos;
+                            } else {
+                                stopOfMismatch = alignmentPos;
+                            }
+                        }
+                        if( stopOfMismatch != -1) {
+                            numSinceMismatch++;
+                        }
+                        if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) {
+                            final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 );
+                            final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 );
+                            final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
+                            snpAlleles.add( Allele.create( refBases, true ) );
+                            snpAlleles.add( Allele.create( mismatchBases, false ) );
+                            final int snpStart = refLoc.getStart() + refPosStartOfMismatch;
+                            vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make());
+                            numSinceMismatch = -1;
+                            stopOfMismatch = -1;
+                            startOfMismatch = -1;
+                            refPosStartOfMismatch = -1;
+                        }
+                        refPos++;
+                        alignmentPos++;
+                    }
+                    break;
+                case N:
+                case H:
+                case P:
+                default:
+                    throw new ReviewedStingException( "Unsupported cigar operator created during SW alignment: " + ce.getOperator() );
+            }
+        }
+        return vcs;
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@ -0,0 +1,567 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import com.google.java.contract.Ensures;
+import net.sf.picard.reference.IndexedFastaSequenceFile;
+import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
+import org.broadinstitute.sting.commandline.*;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.filters.BadMateFilter;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension;
+import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
+import org.broadinstitute.sting.gatk.walkers.PartitionBy;
+import org.broadinstitute.sting.gatk.walkers.PartitionType;
+import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
+import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
+import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
+import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
+import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.clipping.ReadClipper;
+import org.broadinstitute.sting.utils.codecs.vcf.*;
+import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
+import org.broadinstitute.sting.utils.fragments.FragmentCollection;
+import org.broadinstitute.sting.utils.fragments.FragmentUtils;
+import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.sam.AlignmentUtils;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
+import org.broadinstitute.sting.utils.variantcontext.*;
+import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
+
+import java.io.FileNotFoundException;
+import java.io.PrintStream;
+import java.util.*;
+
+/**
+ * Call SNPs and indels simultaneously via local de-novo assembly of haplotypes in an active region. Haplotypes are evaluated using an affine gap penalty Pair HMM.
+ *
+ * <h2>Input</h2>
+ * <p>
+ * Input bam file(s) from which to make calls
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * VCF file with raw, unrecalibrated SNP and indel calls.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ *   java
+ *     -jar GenomeAnalysisTK.jar
+ *     -T HaplotypeCaller
+ *     -R reference/human_g1k_v37.fasta
+ *     -I sample1.bam [-I sample2.bam ...] \
+ *     --dbsnp dbSNP.vcf \
+ *     -stand_call_conf [50.0] \
+ *     -stand_emit_conf 10.0 \
+ *     [-L targets.interval_list]
+ *     -o output.raw.snps.indels.vcf
+ * </pre>
+ *
+ * <h2>Caveats</h2>
+ * <ul>
+ * <li>The system is under active and continuous development. All outputs, the underlying likelihood model, and command line arguments are likely to change often.</li>
+ * </ul>
+ *
+ * @author rpoplin
+ * @since 8/22/11
+ */
+
+@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
+@PartitionBy(PartitionType.LOCUS)
+@ActiveRegionExtension(extension=65, maxRegion=300)
+public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {
+
+    /**
+     * A raw, unfiltered, highly sensitive callset in VCF format.
+     */
+    @Output(doc="File to which variants should be written", required = true)
+    protected VariantContextWriter vcfWriter = null;
+
+    @Output(fullName="graphOutput", shortName="graph", doc="File to which debug assembly graph information should be written", required = false)
+    protected PrintStream graphWriter = null;
+
+    @Hidden
+    @Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false)
+    protected String keepRG = null;
+
+    @Hidden
+    @Argument(fullName="mnpLookAhead", shortName="mnpLookAhead", doc = "The number of bases to combine together to form MNPs out of nearby consecutive SNPs on the same haplotype", required = false)
+    protected int MNP_LOOK_AHEAD = 0;
+
+    @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
+    protected int MIN_PRUNE_FACTOR = 1;
+
+    @Advanced
+    @Argument(fullName="genotypeFullActiveRegion", shortName="genotypeFullActiveRegion", doc = "If specified, alternate alleles are considered to be the full active region for the purposes of genotyping", required = false)
+    protected boolean GENOTYPE_FULL_ACTIVE_REGION = false;
+
+    @Advanced
+    @Argument(fullName="fullHaplotype", shortName="fullHaplotype", doc = "If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference", required = false)
+    protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false;
+
+    @Advanced
+    @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Gap continuation penalty for use in the Pair HMM", required = false)
+    protected int gcpHMM = 10;
+
+    @Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
+    protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000;
+
+    @Argument(fullName="useAllelesTrigger", shortName="allelesTrigger", doc = "If specified, use additional trigger on variants found in an external alleles file", required=false)
+    protected boolean USE_ALLELES_TRIGGER = false;
+
+    /**
+     * rsIDs from this file are used to populate the ID column of the output.  Also, the DB INFO flag will be set when appropriate.
+     * dbSNP is not used in any way for the calculations themselves.
+     */
+    @ArgumentCollection
+    protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
+    public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
+
+    /**
+     * If a call overlaps with a record from the provided comp track, the INFO field will be annotated
+     *  as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field).
+     *  Records that are filtered in the comp track will be ignored.
+     *  Note that 'dbSNP' has been special-cased (see the --dbsnp argument).
+     */
+    @Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false)
+    public List<RodBinding<VariantContext>> comps = Collections.emptyList();
+    public List<RodBinding<VariantContext>> getCompRodBindings() { return comps; }
+
+    // The following are not used by the Unified Genotyper
+    public RodBinding<VariantContext> getSnpEffRodBinding() { return null; }
+    public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
+    public boolean alwaysAppendDbsnpId() { return false; }
+
+    /**
+     * Which annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available annotations.
+     */
+    @Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
+    protected List<String> annotationsToUse = new ArrayList<String>(Arrays.asList(new String[]{"ClippingRankSumTest"}));
+
+    /**
+     * Which annotations to exclude from output in the VCF file.  Note that this argument has higher priority than the -A or -G arguments,
+     * so annotations will be excluded even if they are explicitly included with the other options.
+     */
+    @Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false)
+    protected List<String> annotationsToExclude = new ArrayList<String>(Arrays.asList(new String[]{"HaplotypeScore", "MappingQualityZero", "SpanningDeletions", "TandemRepeatAnnotator"}));
+
+    /**
+     * Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups.
+     */
+    @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
+    protected String[] annotationClassesToUse = { "Standard" };
+
+    @ArgumentCollection
+    private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
+
+    // the calculation arguments
+    private UnifiedGenotyperEngine UG_engine = null;
+    private UnifiedGenotyperEngine UG_engine_simple_genotyper = null;
+    
+    @Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information about each triggering active region", required = false)
+    protected boolean DEBUG;
+
+    // the assembly engine
+    LocalAssemblyEngine assemblyEngine = null;
+
+    // the likelihoods engine
+    LikelihoodCalculationEngine likelihoodCalculationEngine = null;
+
+    // the genotyping engine
+    GenotypingEngine genotypingEngine = null;
+
+    // the annotation engine
+    private VariantAnnotatorEngine annotationEngine;
+
+    // fasta reference reader to supplement the edges of the reference sequence
+    private IndexedFastaSequenceFile referenceReader;
+
+    // reference base padding size
+    private static final int REFERENCE_PADDING = 900;
+
+    // bases with quality less than or equal to this value are trimmed off the tails of the reads
+    private static final byte MIN_TAIL_QUALITY = 20;
+
+    private ArrayList<String> samplesList = new ArrayList<String>();
+    private final static double LOG_ONE_HALF = -Math.log10(2.0);
+    private final static double LOG_ONE_THIRD = -Math.log10(3.0);
+    private final ArrayList<VariantContext> allelesToGenotype = new ArrayList<VariantContext>();
+
+    private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
+    private final static Allele FAKE_ALT_ALLELE = Allele.create("<FAKE_ALT>", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // initialize
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    public void initialize() {
+        super.initialize();
+
+        // get all of the unique sample names
+        Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
+        samplesList.addAll( samples );
+        // initialize the UnifiedGenotyper Engine which is used to call into the exact model
+        UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; // the GLmodel isn't used by the HaplotypeCaller but it is dangerous to let the user change this argument
+        UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
+        UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
+        UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
+        UAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING);
+        UAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
+        UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
+
+        // initialize the output VCF header
+        annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
+
+        Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
+
+        // all annotation fields from VariantAnnotatorEngine
+        headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
+        // all callers need to add these standard annotation header lines
+        VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true,
+                VCFConstants.DOWNSAMPLED_KEY,
+                VCFConstants.MLE_ALLELE_COUNT_KEY,
+                VCFConstants.MLE_ALLELE_FREQUENCY_KEY);
+        // all callers need to add these standard FORMAT field header lines
+        VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true,
+                VCFConstants.GENOTYPE_KEY,
+                VCFConstants.GENOTYPE_QUALITY_KEY,
+                VCFConstants.DEPTH_KEY,
+                VCFConstants.GENOTYPE_PL_KEY);
+        // header lines for the experimental HaplotypeCaller-specific annotations
+        headerInfo.add(new VCFInfoHeaderLine("NVH", 1, VCFHeaderLineType.Integer, "Number of variants found on the haplotype that contained this variant"));
+        headerInfo.add(new VCFInfoHeaderLine("NumHapEval", 1, VCFHeaderLineType.Integer, "Number of haplotypes that were chosen for evaluation in this active region"));
+        headerInfo.add(new VCFInfoHeaderLine("NumHapAssembly", 1, VCFHeaderLineType.Integer, "Number of haplotypes created during the assembly of this active region"));
+        headerInfo.add(new VCFInfoHeaderLine("ActiveRegionSize", 1, VCFHeaderLineType.Integer, "Number of base pairs that comprise this active region"));
+        headerInfo.add(new VCFInfoHeaderLine("EVENTLENGTH", 1, VCFHeaderLineType.Integer, "Max length of all the alternate alleles"));
+        headerInfo.add(new VCFInfoHeaderLine("TYPE", 1, VCFHeaderLineType.String, "Type of event: SNP or INDEL"));
+        headerInfo.add(new VCFInfoHeaderLine("extType", 1, VCFHeaderLineType.String, "Extended type of event: SNP, MNP, INDEL, or COMPLEX"));
+        headerInfo.add(new VCFInfoHeaderLine("QDE", 1, VCFHeaderLineType.Float, "QD value divided by the number of variants found on the haplotype that contained this variant"));
+
+        vcfWriter.writeHeader(new VCFHeader(headerInfo, samples));
+
+        try {
+            // fasta reference reader to supplement the edges of the reference sequence
+            referenceReader = new CachingIndexedFastaSequenceFile(getToolkit().getArguments().referenceFile);
+        } catch( FileNotFoundException e ) {
+            throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e);
+        }
+
+        assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter );
+        likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, false );
+        genotypingEngine = new GenotypingEngine( DEBUG, MNP_LOOK_AHEAD, OUTPUT_FULL_HAPLOTYPE_SEQUENCE );
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // isActive
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    // enable deletions in the pileup
+    @Override
+    public boolean includeReadsWithDeletionAtLoci() { return true; }
+
+    // enable non primary reads in the active region
+    @Override
+    public boolean wantsNonPrimaryReads() { return true; }
+
+    @Override
+    @Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
+    public ActivityProfileResult isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {
+
+        if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
+            for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) {
+                if( !allelesToGenotype.contains(vc) ) {
+                    allelesToGenotype.add(vc); // save for later for processing during the ActiveRegion's map call. Should be folded into a ReadMetaDataTracker object
+                }
+            }
+            if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) {
+                return new ActivityProfileResult(1.0);
+            }
+        }
+
+        if( USE_ALLELES_TRIGGER ) {
+            return new ActivityProfileResult( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
+        }
+
+        if( context == null ) { return new ActivityProfileResult(0.0); }
+
+        final List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
+        noCall.add(Allele.NO_CALL);
+
+        final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
+        final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size());
+        final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage();
+        for( final Map.Entry<String, AlignmentContext> sample : splitContexts.entrySet() ) {
+            final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event)
+            Arrays.fill(genotypeLikelihoods, 0.0);
+
+            for( final PileupElement p : sample.getValue().getBasePileup() ) {
+                final byte qual = p.getQual();
+                if( p.isDeletion() || qual > (byte) 18) {
+                    int AA = 0; final int AB = 1; int BB = 2;
+                    if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) {
+                        AA = 2;
+                        BB = 0;
+                        if( p.isNextToSoftClip() ) {
+                            averageHQSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28));
+                        }
+                    }
+                    genotypeLikelihoods[AA] += p.getRepresentativeCount() * QualityUtils.qualToProbLog10(qual);
+                    genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF );
+                    genotypeLikelihoods[BB] += p.getRepresentativeCount() * QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD;
+                }
+            }
+            genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() );
+        }
+
+        final ArrayList<Allele> alleles = new ArrayList<Allele>();
+        alleles.add( FAKE_REF_ALLELE );
+        alleles.add( FAKE_ALT_ALLELE );
+        final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
+        final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() );
+
+        return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() );
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // map
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    @Override
+    public Integer map( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker ) {
+
+        final ArrayList<VariantContext> activeAllelesToGenotype = new ArrayList<VariantContext>();
+
+        if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
+            for( final VariantContext vc : allelesToGenotype ) {
+                if( activeRegion.getLocation().overlapsP( getToolkit().getGenomeLocParser().createGenomeLoc(vc) ) ) {
+                    activeAllelesToGenotype.add(vc); // do something with these VCs during GGA mode
+                }
+            }
+            allelesToGenotype.removeAll( activeAllelesToGenotype );
+        }
+
+        if( !activeRegion.isActive ) { return 0; } // Not active so nothing to do!
+        if( activeRegion.size() == 0 && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { return 0; } // No reads here so nothing to do!
+        if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && activeAllelesToGenotype.isEmpty() ) { return 0; } // No alleles found in this region so nothing to do!
+
+        finalizeActiveRegion( activeRegion ); // merge overlapping fragments, clip adapter and low qual tails
+        final Haplotype referenceHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader)); // Create the reference haplotype which is the bases from the reference that make up the active region
+        referenceHaplotype.setIsReference(true);
+        final byte[] fullReferenceWithPadding = activeRegion.getFullReference(referenceReader, REFERENCE_PADDING);
+        //int PRUNE_FACTOR = Math.max(MIN_PRUNE_FACTOR, determinePruneFactorFromCoverage( activeRegion ));
+        final ArrayList<Haplotype> haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, getPaddedLoc(activeRegion), MIN_PRUNE_FACTOR, activeAllelesToGenotype );
+        if( haplotypes.size() == 1 ) { return 1; } // only the reference haplotype remains so nothing else to do!
+
+        activeRegion.hardClipToActiveRegion(); // only evaluate the parts of reads that are overlapping the active region
+        final List<GATKSAMRecord> filteredReads = filterNonPassingReads( activeRegion ); // filter out reads from genotyping which fail mapping quality based criteria
+        if( activeRegion.size() == 0 ) { return 1; } // no reads remain after filtering so nothing else to do!
+
+        // evaluate each sample's reads against all haplotypes
+        final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList = splitReadsBySample( activeRegion.getReads() );
+        final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList = splitReadsBySample( filteredReads );
+        likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, perSampleReadList );
+
+        // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes )
+        final ArrayList<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes );
+
+        for( final Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>> callResult :
+                ( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
+                  ? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser() )
+                  : genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) {
+            if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); }
+
+            final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult );
+            final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
+
+            // add some custom annotations to the calls
+            final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes());
+            // Calculate the number of variants on the haplotype
+            int maxNumVar = 0;
+            for( final Allele allele : callResult.getFirst().getAlleles() ) {
+                if( !allele.isReference() ) {
+                    for( final Haplotype haplotype : callResult.getSecond().get(allele) ) {
+                        final int numVar = haplotype.getEventMap().size();
+                        if( numVar > maxNumVar ) { maxNumVar = numVar; }
+                    }
+                }
+            }
+            // Calculate the event length
+            int maxLength = 0;
+            for ( final Allele a : annotatedCall.getAlternateAlleles() ) {
+                final int length = a.length() - annotatedCall.getReference().length();
+                if( Math.abs(length) > Math.abs(maxLength) ) { maxLength = length; }
+            }
+
+            myAttributes.put("NVH", maxNumVar);
+            myAttributes.put("NumHapEval", bestHaplotypes.size());
+            myAttributes.put("NumHapAssembly", haplotypes.size());
+            myAttributes.put("ActiveRegionSize", activeRegion.getLocation().size());
+            myAttributes.put("EVENTLENGTH", maxLength);
+            myAttributes.put("TYPE", (annotatedCall.isSNP() || annotatedCall.isMNP() ? "SNP" : "INDEL") );
+            myAttributes.put("extType", annotatedCall.getType().toString() );
+
+            //if( likelihoodCalculationEngine.haplotypeScore != null ) {
+            //    myAttributes.put("HaplotypeScore", String.format("%.4f", likelihoodCalculationEngine.haplotypeScore));
+            //}
+            if( annotatedCall.hasAttribute("QD") ) {
+                myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) );
+            }
+
+            vcfWriter.add( new VariantContextBuilder(annotatedCall).attributes(myAttributes).make() );
+        }
+
+        if( DEBUG ) { System.out.println("----------------------------------------------------------------------------------"); }
+
+        return 1; // One active region was processed during this map call
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // reduce
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    @Override
+    public Integer reduceInit() {
+        return 0;
+    }
+
+    @Override
+    public Integer reduce(Integer cur, Integer sum) {
+        return cur + sum;
+    }
+
+    @Override
+    public void onTraversalDone(Integer result) {
+        logger.info("Ran local assembly on " + result + " active regions");
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // private helper functions
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
+        if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads:    (with overlap region = " + activeRegion.getExtendedLoc() + ")"); }
+        final ArrayList<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>();
+        final FragmentCollection<GATKSAMRecord> fragmentCollection = FragmentUtils.create( ReadUtils.sortReadsByCoordinate(activeRegion.getReads()) );
+        activeRegion.clearReads();
+
+        // Join overlapping paired reads to create a single longer read
+        finalizedReadList.addAll( fragmentCollection.getSingletonReads() );
+        for( final List<GATKSAMRecord> overlappingPair : fragmentCollection.getOverlappingPairs() ) {
+            finalizedReadList.addAll( FragmentUtils.mergeOverlappingPairedFragments(overlappingPair) );
+        }
+
+        Collections.shuffle(finalizedReadList, GenomeAnalysisEngine.getRandomGenerator());
+
+        // Loop through the reads hard clipping the adaptor and low quality tails
+        for( final GATKSAMRecord myRead : finalizedReadList ) {
+            final GATKSAMRecord postAdapterRead = ( myRead.getReadUnmappedFlag() ? myRead : ReadClipper.hardClipAdaptorSequence( myRead ) );
+            if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) {
+                final GATKSAMRecord clippedRead = ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY );
+                // protect against INTERVALS with abnormally high coverage
+                if( clippedRead.getReadLength() > 0 && activeRegion.size() < samplesList.size() * DOWNSAMPLE_PER_SAMPLE_PER_REGION ) {
+                    activeRegion.add(clippedRead);
+                }
+            }
+        }
+    }
+
+    private List<GATKSAMRecord> filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
+        final ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>();
+        for( final GATKSAMRecord rec : activeRegion.getReads() ) {
+            if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
+                readsToRemove.add(rec);
+            }
+        }
+        activeRegion.removeAll( readsToRemove );
+        return readsToRemove;
+    }
+
+    private GenomeLoc getPaddedLoc( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
+        final int padLeft = Math.max(activeRegion.getReferenceLoc().getStart()-REFERENCE_PADDING, 1);
+        final int padRight = Math.min(activeRegion.getReferenceLoc().getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(activeRegion.getReferenceLoc().getContig()).getSequenceLength());
+        return getToolkit().getGenomeLocParser().createGenomeLoc(activeRegion.getReferenceLoc().getContig(), padLeft, padRight);
+    }
+
+    private HashMap<String, ArrayList<GATKSAMRecord>> splitReadsBySample( final List<GATKSAMRecord> reads ) {
+        final HashMap<String, ArrayList<GATKSAMRecord>> returnMap = new HashMap<String, ArrayList<GATKSAMRecord>>();
+        for( final String sample : samplesList) {
+            ArrayList<GATKSAMRecord> readList = returnMap.get( sample );
+            if( readList == null ) {
+                readList = new ArrayList<GATKSAMRecord>();
+                returnMap.put(sample, readList);
+            }
+        }
+        for( final GATKSAMRecord read : reads ) {
+            returnMap.get(read.getReadGroup().getSample()).add(read);
+        }
+
+        return returnMap;
+    }
+
+    /*
+    private int determinePruneFactorFromCoverage( final ActiveRegion activeRegion ) {
+        final ArrayList<Integer> readLengthDistribution = new ArrayList<Integer>();
+        for( final GATKSAMRecord read : activeRegion.getReads() ) {
+            readLengthDistribution.add(read.getReadLength());
+        }
+        final double meanReadLength = MathUtils.average(readLengthDistribution);
+        final double meanCoveragePerSample = (double) activeRegion.getReads().size() / ((double) activeRegion.getExtendedLoc().size() / meanReadLength) / (double) samplesList.size();
+        int PRUNE_FACTOR = 0;
+        if( meanCoveragePerSample > 8.5 ) {
+            PRUNE_FACTOR = (int) Math.floor( Math.sqrt( meanCoveragePerSample - 5.0 ) );
+        } else if( meanCoveragePerSample > 3.0 ) {
+            PRUNE_FACTOR = 1;
+        }
+
+        if( DEBUG ) { System.out.println(String.format("Mean coverage per sample = %.1f --> prune factor = %d", meanCoveragePerSample, PRUNE_FACTOR)); }
+        return PRUNE_FACTOR;
+    }
+    */
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java
@ -0,0 +1,444 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Input;
+import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.Reference;
+import org.broadinstitute.sting.gatk.walkers.RodWalker;
+import org.broadinstitute.sting.gatk.walkers.Window;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.SWPairwiseAlignment;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
+import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
+import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
+
+import java.util.*;
+
+/**
+ * Haplotype-based resolution of variants in 2 different eval files.
+ *
+ * <p>
+ * HaplotypeResolver is a tool that takes 2 VCF files and constructs haplotypes based on the variants inside them.
+ * From that, it can resolve potential differences in variant calls that are inherently the same (or similar) variants.
+ * Records are annotated with the set and status attributes.
+ *
+ * <h2>Input</h2>
+ * <p>
+ * 2 variant files to resolve.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * A single consensus VCF.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx1g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T HaplotypeResolver \
+ *   -V:v1 input1.vcf \
+ *   -V:v2 input2.vcf \
+ *   -o output.vcf
+ * </pre>
+ *
+ */
+@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} )
+@Reference(window=@Window(start=-HaplotypeResolver.ACTIVE_WINDOW,stop= HaplotypeResolver.ACTIVE_WINDOW))
+public class HaplotypeResolver extends RodWalker<Integer, Integer> {
+
+    protected static final String INTERSECTION_SET = "intersection";
+    protected static final String SAME_STATUS = "same";
+    protected static final String SOME_ALLELES_MATCH_STATUS = "someAllelesMatch";
+    protected static final String SAME_START_DIFFERENT_ALLELES_STATUS = "sameStartDifferentAlleles";
+    protected static final String SAME_BY_HAPLOTYPE_STATUS = "sameByHaplotype";
+    protected static final String ONE_ALLELE_SUBSET_OF_OTHER_STATUS = "OneAlleleSubsetOfOther";
+    protected static final String OVERLAPPING_EVENTS_STATUS = "overlappingEvents";
+
+    protected final static int MAX_DISTANCE_BETWEEN_MERGED_RECORDS = 50;
+    protected final static int MAX_HAPLOTYPE_TO_CONSIDER = 1000;
+    protected final static int MAX_VARIANT_SIZE_TO_CONSIDER = 100;
+    protected final static int ACTIVE_WINDOW = MAX_HAPLOTYPE_TO_CONSIDER + MAX_VARIANT_SIZE_TO_CONSIDER;
+
+    @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
+    public List<RodBinding<VariantContext>> variants;
+
+    @Output(doc="File to which variants should be written", required=true)
+    protected VariantContextWriter baseWriter = null;
+    private VariantContextWriter writer;
+
+    /**
+     * Set to 'null' if you don't want the set field emitted.
+     */
+    @Argument(fullName="setKey", shortName="setKey", doc="Key used in the INFO key=value tag emitted describing which set the combined VCF record came from", required=false)
+    protected String SET_KEY = "set";
+
+    /**
+     * Set to 'null' if you don't want the status field emitted.
+     */
+    @Argument(fullName="statusKey", shortName="statusKey", doc="Key used in the INFO key=value tag emitted describing the extent to which records match", required=false)
+    protected String STATUS_KEY = "status";
+
+    private final LinkedList<VCcontext> queue = new LinkedList<VCcontext>();
+    private String source1, source2;
+    private final List<VariantContext> sourceVCs1 = new ArrayList<VariantContext>();
+    private final List<VariantContext> sourceVCs2 = new ArrayList<VariantContext>();
+
+
+    private class VCcontext {
+        public final Collection<VariantContext> vcs;
+        public final GenomeLoc loc;
+        public final ReferenceContext ref;
+
+        public VCcontext(final Collection<VariantContext> vcs, final ReferenceContext ref) {
+            this.vcs = vcs;
+            this.loc = getToolkit().getGenomeLocParser().createGenomeLoc(vcs.iterator().next());
+            this.ref = ref;
+        }
+    }
+
+    public void initialize() {
+
+        if ( variants.size() != 2 ) {
+            throw new UserException.BadArgumentValue("variant", "this tool requires exactly 2 input variant files");
+        }
+        source1 = variants.get(0).getName();
+        source2 = variants.get(1).getName();
+
+        if ( SET_KEY.toLowerCase().equals("null") )
+            SET_KEY = null;
+        if ( STATUS_KEY.toLowerCase().equals("null") )
+            STATUS_KEY = null;
+
+        // for now, INFO and FORMAT fields are not propagated to the output VCF (so they aren't put into the header)
+        Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
+        if ( SET_KEY != null )
+            headerLines.add(new VCFInfoHeaderLine(SET_KEY, 1, VCFHeaderLineType.String, "Source VCF for the merged record"));
+        if ( STATUS_KEY != null )
+            headerLines.add(new VCFInfoHeaderLine(STATUS_KEY, 1, VCFHeaderLineType.String, "Extent to which records match"));
+        final VCFHeader vcfHeader = new VCFHeader(headerLines, Collections.<String>emptySet());
+        baseWriter.writeHeader(vcfHeader);
+        writer = VariantContextWriterFactory.sortOnTheFly(baseWriter, ACTIVE_WINDOW);
+    }
+
+    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        if ( tracker == null )
+            return 0;
+
+        final Collection<VariantContext> VCs = tracker.getValues(variants, context.getLocation());
+        if ( VCs.size() == 0 )
+            return 0;
+
+        final VCcontext vc = new VCcontext(VariantContextUtils.sitesOnlyVariantContexts(VCs), ref);
+
+        // TODO -- what should we do about filtered records?
+
+        if ( !queue.isEmpty() ) {
+
+            final VCcontext previous = queue.getLast();
+            if ( !previous.loc.onSameContig(vc.loc) ||
+                    previous.loc.distance(vc.loc) > MAX_DISTANCE_BETWEEN_MERGED_RECORDS ||
+                    queue.getFirst().loc.distance(vc.loc) > MAX_HAPLOTYPE_TO_CONSIDER ) {
+                purgeQueue();
+            }
+        }
+
+        queue.addLast(vc);
+        return 0;
+    }
+
+    public Integer reduceInit() { return 0; }
+
+    public Integer reduce(Integer value, Integer sum) {
+        return sum + value;
+    }
+
+    public void onTraversalDone(Integer result) {
+        if ( !queue.isEmpty() )
+            purgeQueue();
+        writer.close();
+    }
+
+    private void purgeQueue() {
+
+        final ReferenceContext refContext = queue.getFirst().ref;
+
+        // divide them up by source
+        while ( !queue.isEmpty() ) {
+            VCcontext context = queue.removeFirst();
+            for ( final VariantContext vc: context.vcs ) {
+                if ( vc.getSource().equals(source1) )
+                    sourceVCs1.add(vc);
+                else
+                    sourceVCs2.add(vc);
+            }
+        }
+
+        writeAndPurgeAllEqualVariants(sourceVCs1, sourceVCs2, SAME_STATUS);
+
+        if ( sourceVCs1.isEmpty() ) {
+            writeAll(sourceVCs2, source2, null);
+        } else if ( sourceVCs2.isEmpty() ) {
+            writeAll(sourceVCs1, source1, null);
+        } else {
+            resolveByHaplotype(refContext);
+        }
+
+        // allow for GC of the data
+        sourceVCs1.clear();
+        sourceVCs2.clear();
+    }
+
+    private void writeAll(final List<VariantContext> sourceVCs, final String set, final String status) {
+        for ( final VariantContext vc : sourceVCs ) {
+            writeOne(vc, set, status);
+        }
+    }
+
+    private void writeOne(final VariantContext vc, final String set, final String status) {
+        final Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
+        if ( SET_KEY != null && set != null )
+            attrs.put(SET_KEY, set);
+        if ( STATUS_KEY != null && status != null )
+            attrs.put(STATUS_KEY, status);
+        writer.add(new VariantContextBuilder(vc).attributes(attrs).make());
+    }
+
+    private void writeAndPurgeAllEqualVariants(final List<VariantContext> sourceVCs1, final List<VariantContext> sourceVCs2, final String status) {
+
+        int currentIndex1 = 0, currentIndex2 = 0;
+        int size1 = sourceVCs1.size(), size2 = sourceVCs2.size();
+        VariantContext current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1): null);
+        VariantContext current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2): null);
+
+        while ( current1 != null && current2 != null ) {
+
+            final GenomeLoc loc1 = getToolkit().getGenomeLocParser().createGenomeLoc(current1);
+            final GenomeLoc loc2 = getToolkit().getGenomeLocParser().createGenomeLoc(current2);
+
+            if ( loc1.equals(loc2) ||
+                    (loc1.getStart() == loc2.getStart() && (current1.getAlternateAlleles().size() > 1 || current2.getAlternateAlleles().size() > 1)) ) {
+                // test the alleles
+                if ( determineAndWriteOverlap(current1, current2, status) ) {
+                    sourceVCs1.remove(currentIndex1);
+                    sourceVCs2.remove(currentIndex2);
+                    size1--;
+                    size2--;
+                } else {
+                    currentIndex1++;
+                    currentIndex2++;
+                }
+                current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1): null);
+                current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2): null);
+            } else if ( loc1.isBefore(loc2) ) {
+                currentIndex1++;
+                current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1): null);
+            } else {
+                currentIndex2++;
+                current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2): null);
+            }
+        }
+    }
+
+    private boolean determineAndWriteOverlap(final VariantContext vc1, final VariantContext vc2, final String status) {
+        final int allelesFrom1In2 = findOverlap(vc1, vc2);
+        final int allelesFrom2In1 = findOverlap(vc2, vc1);
+        final int totalAllelesIn1 = vc1.getAlternateAlleles().size();
+        final int totalAllelesIn2 = vc2.getAlternateAlleles().size();
+
+        final boolean allAllelesFrom1Overlap = allelesFrom1In2 == totalAllelesIn1;
+        final boolean allAllelesFrom2Overlap = allelesFrom2In1 == totalAllelesIn2;
+
+        boolean thereIsOverlap = true;
+
+        if ( allAllelesFrom1Overlap && allAllelesFrom2Overlap ) {
+            writeOne(vc1, INTERSECTION_SET, status);
+        } else if ( allAllelesFrom1Overlap ) {
+            writeOne(vc2, INTERSECTION_SET, source1 + "IsSubsetOf" + source2);
+        } else if ( allAllelesFrom2Overlap ) {
+            writeOne(vc1, INTERSECTION_SET, source2 + "IsSubsetOf" + source1);
+        } else if ( allelesFrom1In2 > 0 ) {
+            writeOne(vc1, INTERSECTION_SET, SOME_ALLELES_MATCH_STATUS);
+        } else if ( totalAllelesIn1 > 1 || totalAllelesIn2 > 1 ) { // we don't handle multi-allelics in the haplotype-based reconstruction
+            writeOne(vc1, INTERSECTION_SET, SAME_START_DIFFERENT_ALLELES_STATUS);
+        } else {
+            thereIsOverlap = false;
+        }
+
+        return thereIsOverlap;
+    }
+
+    private static int findOverlap(final VariantContext target, final VariantContext comparison) {
+        int overlap = 0;
+        for ( final Allele allele : target.getAlternateAlleles() ) {
+            if ( comparison.hasAlternateAllele(allele) )
+                overlap++;
+        }
+        return overlap;
+    }
+
+    private static final double SW_MATCH = 4.0;
+    private static final double SW_MISMATCH = -10.0;
+    private static final double SW_GAP = -25.0;
+    private static final double SW_GAP_EXTEND = -1.3;
+    private void resolveByHaplotype(final ReferenceContext refContext) {
+
+        final byte[] source1Haplotype = generateHaplotype(sourceVCs1, refContext);
+        final byte[] source2Haplotype = generateHaplotype(sourceVCs2, refContext);
+
+        final SWPairwiseAlignment swConsensus1 = new SWPairwiseAlignment( refContext.getBases(), source1Haplotype, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
+        final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( refContext.getBases(), source2Haplotype, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
+
+        // protect against SW failures
+        if( swConsensus1.getCigar().toString().contains("S") || swConsensus1.getCigar().getReferenceLength() < 20 ||
+                swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() < 20 ) {
+            // TODO -- handle errors appropriately
+            logger.debug("Bad SW alignment; aborting at " + refContext.getLocus());
+            return;
+        }
+
+        // order results by start position
+        final TreeMap<Integer, VariantContext> source1Map = new TreeMap<Integer, VariantContext>(GenotypingEngine.generateVCsFromAlignment(0, swConsensus1.getCigar(), refContext.getBases(), source1Haplotype, refContext.getWindow(), source1, 0));
+        final TreeMap<Integer, VariantContext> source2Map = new TreeMap<Integer, VariantContext>(GenotypingEngine.generateVCsFromAlignment(0, swConsensus2.getCigar(), refContext.getBases(), source2Haplotype, refContext.getWindow(), source2, 0));
+        if ( source1Map.size() == 0 || source2Map.size() == 0 ) {
+            // TODO -- handle errors appropriately
+            logger.debug("No source alleles; aborting at " + refContext.getLocus());
+            return;
+        }
+
+        // create lists and test for equality
+        final List<VariantContext> source1Alleles = new ArrayList<VariantContext>(source1Map.values());
+        final List<VariantContext> source2Alleles = new ArrayList<VariantContext>(source2Map.values());
+
+        writeAndPurgeAllEqualVariants(source1Alleles, source2Alleles, SAME_BY_HAPLOTYPE_STATUS);
+        if ( source1Alleles.isEmpty() ) {
+            writeAll(source2Alleles, source2, null);
+        } else if ( source2Alleles.isEmpty() ) {
+            writeAll(source1Alleles, source1, null);
+        } else {
+            writeDifferences(source1Alleles, source2Alleles);
+        }
+    }
+
+    private byte[] generateHaplotype(final List<VariantContext> sourceVCs, final ReferenceContext refContext) {
+
+        final StringBuilder sb = new StringBuilder();
+
+        final int startPos = refContext.getWindow().getStart();
+        int currentPos = startPos;
+        final byte[] reference = refContext.getBases();
+
+        for ( final VariantContext vc : sourceVCs ) {
+            // add any missing reference context
+            int vcStart = vc.getStart();
+            final int refAlleleLength = vc.getReference().length();
+            if ( refAlleleLength == vc.getEnd() - vc.getStart() ) // this is a deletion (whereas for other events the padding base isn't part of the position)
+                vcStart++;
+
+            while ( currentPos < vcStart )
+                sb.append((char)reference[currentPos++ - startPos]);
+
+            // add the alt allele
+            sb.append(vc.getAlternateAllele(0).getBaseString());
+
+            // skip the reference allele
+            currentPos += refAlleleLength;
+        }
+        // add any missing reference context
+        final int stopPos = refContext.getWindow().getStop();
+        while ( currentPos < stopPos )
+            sb.append((char)reference[currentPos++ - startPos]);
+
+        return sb.toString().getBytes();
+    }
+
+    private void writeDifferences(final List<VariantContext> source1Alleles, final List<VariantContext> source2Alleles) {
+        int currentIndex1 = 0, currentIndex2 = 0;
+        final int size1 = source1Alleles.size(), size2 = source2Alleles.size();
+        VariantContext current1 = source1Alleles.get(0);
+        VariantContext current2 = source2Alleles.get(0);
+
+        while ( currentIndex1 < size1 || currentIndex2 < size2 ) {
+            if ( current1 == null ) {
+                writeOne(current2, source2, null);
+                currentIndex2++;
+                current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2): null);
+            } else if ( current2 == null ) {
+                writeOne(current1, source1, null);
+                currentIndex1++;
+                current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1): null);
+            } else {
+
+                final GenomeLoc loc1 = getToolkit().getGenomeLocParser().createGenomeLoc(current1);
+                final GenomeLoc loc2 = getToolkit().getGenomeLocParser().createGenomeLoc(current2);
+
+                if ( loc1.getStart() == loc2.getStart() || loc1.overlapsP(loc2) ) {
+                    String status;
+                    if ( loc1.getStart() == loc2.getStart() ) {
+                        final String allele1 = current1.getAlternateAllele(0).getBaseString();
+                        final String allele2 = current2.getAlternateAllele(0).getBaseString();
+                        if ( allele1.indexOf(allele2) != -1 || allele2.indexOf(allele1) != -1 )
+                            status = ONE_ALLELE_SUBSET_OF_OTHER_STATUS;
+                        else
+                            status = SAME_START_DIFFERENT_ALLELES_STATUS;
+                    } else {
+                        status = OVERLAPPING_EVENTS_STATUS;
+                    }
+
+                    writeOne(current1, INTERSECTION_SET, status);
+                    currentIndex1++;
+                    currentIndex2++;
+                    current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1): null);
+                    current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2): null);
+                } else if ( loc1.isBefore(loc2) ) {
+                    writeOne(current1, source1, null);
+                    currentIndex1++;
+                    current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1): null);
+                } else {
+                    writeOne(current2, source2, null);
+                    currentIndex2++;
+                    current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2): null);
+                }
+            }
+        }
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java
@ -0,0 +1,149 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.jgrapht.graph.DefaultDirectedGraph;
+
+import java.util.*;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: ebanks
+ * Date: Mar 23, 2011
+ */
+// Class for finding the K best paths (as determined by the sum of multiplicities of the edges) in a graph.
+// This is different from most graph traversals because we want to test paths from any source node to any sink node.
+public class KBestPaths {
+
+    // static access only
+    protected KBestPaths() { }
+    private static int MAX_PATHS_TO_HOLD = 100;
+
+    protected static class MyInt { public int val = 0; }
+
+    // class to keep track of paths
+    protected static class Path {
+
+        // the last vertex seen in the path
+        private DeBruijnVertex lastVertex;
+
+        // the list of edges comprising the path
+        private ArrayList<DeBruijnEdge> edges;
+
+        // the scores for the path
+        private int totalScore = 0, lowestEdge = -1;
+
+        public Path( final DeBruijnVertex initialVertex ) {
+            lastVertex = initialVertex;
+            edges = new ArrayList<DeBruijnEdge>(0);
+        }
+
+        public Path( final Path p, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge ) {
+            lastVertex = graph.getEdgeTarget(edge);
+            edges = new ArrayList<DeBruijnEdge>(p.edges);
+            edges.add(edge);
+            totalScore = p.totalScore + edge.getMultiplicity();
+            lowestEdge = ( p.lowestEdge == -1 ) ? edge.getMultiplicity() : Math.min(p.lowestEdge, edge.getMultiplicity());
+        }
+
+        public boolean containsEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge ) {
+            final DeBruijnVertex targetVertex = graph.getEdgeTarget(edge);
+            for( final DeBruijnEdge e : edges ) {
+                if( e.equals(graph, edge) || graph.getEdgeTarget(e).equals(targetVertex) ) {
+                    return true;
+                }
+            }
+            
+            return false;
+        }
+
+        public ArrayList<DeBruijnEdge> getEdges() { return edges; }
+
+        public int getScore() { return totalScore; }
+
+        public int getLowestEdge() { return lowestEdge; }
+
+        public DeBruijnVertex getLastVertexInPath() { return lastVertex; }
+
+        public byte[] getBases( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
+            if( edges.size() == 0 ) { return lastVertex.getSequence(); }
+            
+            byte[] bases = graph.getEdgeSource( edges.get(0) ).getSequence();
+            for( final DeBruijnEdge e : edges ) {
+                bases = ArrayUtils.addAll(bases, graph.getEdgeTarget( e ).getSuffix());
+            }
+            return bases;
+        }
+    }
+
+    protected static class PathComparatorTotalScore implements Comparator<Path> {
+        public int compare(final Path path1, final Path path2) {
+            return path1.totalScore - path2.totalScore;
+        }
+    }
+
+    //protected static class PathComparatorLowestEdge implements Comparator<Path> {
+    //    public int compare(final Path path1, final Path path2) {
+    //        return path2.lowestEdge - path1.lowestEdge;
+    //    }
+    //}
+
+    public static List<Path> getKBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int k ) {
+        if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); }
+        final ArrayList<Path> bestPaths = new ArrayList<Path>();
+        
+        // run a DFS for best paths
+        for( final DeBruijnVertex v : graph.vertexSet() ) {
+            if( graph.inDegreeOf(v) == 0 ) {
+                findBestPaths(graph, new Path(v), bestPaths);
+            }
+        }
+
+        Collections.sort(bestPaths, new PathComparatorTotalScore() );
+        Collections.reverse(bestPaths);
+        return bestPaths.subList(0, Math.min(k, bestPaths.size()));
+    }
+
+    private static void findBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Path path, final List<Path> bestPaths ) {
+        findBestPaths(graph, path, bestPaths, new MyInt());
+    }
+
+    private static void findBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Path path, final List<Path> bestPaths, MyInt n ) {
+
+        // did we hit the end of a path?
+        if ( allOutgoingEdgesHaveBeenVisited(graph, path) ) {
+            if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) {
+                // clean out some low scoring paths
+                Collections.sort(bestPaths, new PathComparatorTotalScore() );
+                for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); } // BUGBUG: assumes MAX_PATHS_TO_HOLD >> 20
+            }
+            bestPaths.add(path);
+        } else if( n.val > 10000) {
+            // do nothing, just return
+        } else {
+            // recursively run DFS
+            final ArrayList<DeBruijnEdge> edgeArrayList = new ArrayList<DeBruijnEdge>();
+            edgeArrayList.addAll(graph.outgoingEdgesOf(path.lastVertex));
+            Collections.sort(edgeArrayList);
+            Collections.reverse(edgeArrayList);
+            for ( final DeBruijnEdge edge : edgeArrayList ) {
+                // make sure the edge is not already in the path
+                if ( path.containsEdge(graph, edge) )
+                    continue;
+
+                final Path newPath = new Path(path, graph, edge);
+                n.val++;
+                findBestPaths(graph, newPath, bestPaths, n);
+            }
+        }
+    }
+
+    private static boolean allOutgoingEdgesHaveBeenVisited( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Path path ) {
+        for( final DeBruijnEdge edge : graph.outgoingEdgesOf(path.lastVertex) ) {
+            if( !path.containsEdge(graph, edge) ) {
+                return false;
+            }
+        }
+        return true;
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+import java.util.*;
+
+public class LikelihoodCalculationEngine {
+
+    private static final double LOG_ONE_HALF = -Math.log10(2.0);
+    private static final double BEST_LIKELIHOOD_THRESHOLD = 0.1;
+    private final byte constantGCP;
+    private final boolean DEBUG;
+    private final PairHMM pairHMM;
+
+    public LikelihoodCalculationEngine( final byte constantGCP, final boolean debug, final boolean noBanded ) {
+        pairHMM = new PairHMM( noBanded );
+        this.constantGCP = constantGCP;
+        DEBUG = debug;
+    }
+
+    public void computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList ) {
+
+        int X_METRIC_LENGTH = 0;
+        for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
+            for( final GATKSAMRecord read : sample.getValue() ) {
+                final int readLength = read.getReadLength();
+                if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; }
+            }
+        }
+        int Y_METRIC_LENGTH = 0;
+        for( final Haplotype h : haplotypes ) {
+            final int haplotypeLength = h.getBases().length;
+            if( haplotypeLength > Y_METRIC_LENGTH ) { Y_METRIC_LENGTH = haplotypeLength; }
+        }
+
+        // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
+        X_METRIC_LENGTH += 2;
+        Y_METRIC_LENGTH += 2;
+
+        // initial arrays to hold the probabilities of being in the match, insertion and deletion cases
+        final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+
+        PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
+
+        // for each sample's reads
+        for( final String sample : perSampleReadList.keySet() ) {
+            //if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); }
+            // evaluate the likelihood of the reads given those haplotypes
+            computeReadLikelihoods( haplotypes, perSampleReadList.get(sample), sample, matchMetricArray, XMetricArray, YMetricArray );
+        }
+    }
+
+    private void computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final ArrayList<GATKSAMRecord> reads, final String sample,
+                                         final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
+
+        final int numHaplotypes = haplotypes.size();
+        final int numReads = reads.size();
+        final double[][] readLikelihoods = new double[numHaplotypes][numReads];
+        final int[][] readCounts = new int[numHaplotypes][numReads];
+        for( int iii = 0; iii < numReads; iii++ ) {
+            final GATKSAMRecord read = reads.get(iii);
+            final int readCount = ReadUtils.getMeanRepresentativeReadCount(read);
+
+            final byte[] overallGCP = new byte[read.getReadLength()];
+            Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data?
+            Haplotype previousHaplotypeSeen = null;
+            final byte[] readQuals = read.getBaseQualities();
+            final byte[] readInsQuals = read.getBaseInsertionQualities();
+            final byte[] readDelQuals = read.getBaseDeletionQualities();
+            for( int kkk = 0; kkk < readQuals.length; kkk++ ) {
+                readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality
+                //readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated
+                //readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated
+                readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
+            }
+
+            for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
+                final Haplotype haplotype = haplotypes.get(jjj);
+                final int haplotypeStart = ( previousHaplotypeSeen == null ? 0 : computeFirstDifferingPosition(haplotype.getBases(), previousHaplotypeSeen.getBases()) );
+                previousHaplotypeSeen = haplotype;
+
+                readLikelihoods[jjj][iii] = pairHMM.computeReadLikelihoodGivenHaplotype(haplotype.getBases(), read.getReadBases(),
+                        readQuals, readInsQuals, readDelQuals, overallGCP,
+                        haplotypeStart, matchMetricArray, XMetricArray, YMetricArray);
+                readCounts[jjj][iii] = readCount;
+            }
+        }
+        for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
+            haplotypes.get(jjj).addReadLikelihoods( sample, readLikelihoods[jjj], readCounts[jjj] );
+        }
+    }
+
+    private static int computeFirstDifferingPosition( final byte[] b1, final byte[] b2 ) {
+        for( int iii = 0; iii < b1.length && iii < b2.length; iii++ ){
+            if( b1[iii] != b2[iii] ) {
+                return iii;
+            }
+        }
+        return b1.length;
+    }
+
+    @Requires({"haplotypes.size() > 0"})
+    @Ensures({"result.length == result[0].length", "result.length == haplotypes.size()"})
+    public static double[][] computeDiploidHaplotypeLikelihoods( final ArrayList<Haplotype> haplotypes, final String sample ) {
+        // set up the default 1-to-1 haplotype mapping object, BUGBUG: target for future optimization?
+        final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
+        for( final Haplotype h : haplotypes ) {
+            final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
+            list.add(h);
+            haplotypeMapping.add(list);
+        }
+        return computeDiploidHaplotypeLikelihoods( sample, haplotypeMapping );
+    }
+
+    // This function takes just a single sample and a haplotypeMapping
+    @Requires({"haplotypeMapping.size() > 0"})
+    @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
+    public static double[][] computeDiploidHaplotypeLikelihoods( final String sample, final ArrayList<ArrayList<Haplotype>> haplotypeMapping ) {
+        final TreeSet<String> sampleSet = new TreeSet<String>();
+        sampleSet.add(sample);
+        return computeDiploidHaplotypeLikelihoods(sampleSet, haplotypeMapping);
+    }
+
+    // This function takes a set of samples to pool over and a haplotypeMapping
+    @Requires({"haplotypeMapping.size() > 0"})
+    @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
+    public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples, final ArrayList<ArrayList<Haplotype>> haplotypeMapping ) {
+
+        final int numHaplotypes = haplotypeMapping.size();
+        final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
+        for( int iii = 0; iii < numHaplotypes; iii++ ) {
+            Arrays.fill(haplotypeLikelihoodMatrix[iii], Double.NEGATIVE_INFINITY);
+        }
+
+        // compute the diploid haplotype likelihoods
+        // todo - needs to be generalized to arbitrary ploidy, cleaned and merged with PairHMMIndelErrorModel code
+        for( int iii = 0; iii < numHaplotypes; iii++ ) {
+            for( int jjj = 0; jjj <= iii; jjj++ ) {                
+                for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) {
+                    for( final Haplotype jjj_mapped : haplotypeMapping.get(jjj) ) {
+                        double haplotypeLikelihood = 0.0;
+                        for( final String sample : samples ) {
+                            final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
+                            final int[] readCounts_iii = iii_mapped.getReadCounts(sample);
+                            final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
+                            for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
+                                // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
+                                // log10(10^(a*x1) + 10^(b*x2))  ???
+                                // First term is approximated by Jacobian log with table lookup.
+                                haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF );
+                            }
+                        }
+                        haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum?
+                    }
+                }       
+            }
+        }
+
+        // normalize the diploid likelihoods matrix
+        return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );        
+    }
+
+    @Requires({"likelihoodMatrix.length == likelihoodMatrix[0].length"})
+    @Ensures({"result.length == result[0].length", "result.length == likelihoodMatrix.length"})
+    protected static double[][] normalizeDiploidLikelihoodMatrixFromLog10( final double[][] likelihoodMatrix ) {
+        final int numHaplotypes = likelihoodMatrix.length;
+        double[] genotypeLikelihoods = new double[numHaplotypes*(numHaplotypes+1)/2];
+        int index = 0;
+        for( int iii = 0; iii < numHaplotypes; iii++ ) {
+            for( int jjj = 0; jjj <= iii; jjj++ ){
+                genotypeLikelihoods[index++] = likelihoodMatrix[iii][jjj];
+            }
+        }
+        genotypeLikelihoods = MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true);
+        index = 0;
+        for( int iii = 0; iii < numHaplotypes; iii++ ) {
+            for( int jjj = 0; jjj <= iii; jjj++ ){
+                likelihoodMatrix[iii][jjj] = genotypeLikelihoods[index++];
+            }
+        }
+        return likelihoodMatrix;
+    }
+
+    /*
+    @Requires({"haplotypes.size() > 0"})
+    @Ensures({"result.size() <= haplotypes.size()"})
+    public ArrayList<Haplotype> selectBestHaplotypes( final ArrayList<Haplotype> haplotypes ) {
+
+        // BUGBUG: This function needs a lot of work. Need to use 4-gamete test or Tajima's D to decide to break up events into separate pieces for genotyping
+
+        final int numHaplotypes = haplotypes.size();
+        final Set<String> sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples
+        final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
+        bestHaplotypesIndexList.add(0); // always start with the reference haplotype
+        final double[][][] haplotypeLikelihoodMatrix = new double[sampleKeySet.size()][numHaplotypes][numHaplotypes];
+
+        int sampleCount = 0;
+        for( final String sample : sampleKeySet ) {
+            haplotypeLikelihoodMatrix[sampleCount++] = computeDiploidHaplotypeLikelihoods( haplotypes, sample );
+        }
+
+        int hap1 = 0;
+        int hap2 = 0;
+        int chosenSample = 0;
+        //double bestElement = Double.NEGATIVE_INFINITY;
+        final int maxChosenHaplotypes = Math.min( 15, sampleKeySet.size() * 2 + 1 );
+        while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) {
+            double maxElement = Double.NEGATIVE_INFINITY;
+            for( int kkk = 0; kkk < sampleCount; kkk++ ) {
+                for( int iii = 0; iii < numHaplotypes; iii++ ) {
+                    for( int jjj = 0; jjj <= iii; jjj++ ) {
+                        if( haplotypeLikelihoodMatrix[kkk][iii][jjj] > maxElement ) {
+                            maxElement = haplotypeLikelihoodMatrix[kkk][iii][jjj];
+                            hap1 = iii;
+                            hap2 = jjj;
+                            chosenSample = kkk;
+                        }
+                    }
+                }
+            }
+            if( maxElement == Double.NEGATIVE_INFINITY ) { break; }
+
+            if( !bestHaplotypesIndexList.contains(hap1) ) { bestHaplotypesIndexList.add(hap1); }
+            if( !bestHaplotypesIndexList.contains(hap2) ) { bestHaplotypesIndexList.add(hap2); }
+
+            for( int iii = 0; iii < numHaplotypes; iii++ ) {
+                for( int jjj = 0; jjj <= iii; jjj++ ) {
+                    haplotypeLikelihoodMatrix[chosenSample][iii][jjj] = Double.NEGATIVE_INFINITY;
+                }
+            }
+        }
+
+        if( DEBUG ) { System.out.println("Chose " + (bestHaplotypesIndexList.size() - 1) + " alternate haplotypes to genotype in all samples."); }
+
+        final ArrayList<Haplotype> bestHaplotypes = new ArrayList<Haplotype>();
+        for( final int hIndex : bestHaplotypesIndexList ) {
+            bestHaplotypes.add( haplotypes.get(hIndex) );
+        }
+        return bestHaplotypes;
+    }
+    */
+
+    @Requires({"haplotypes.size() > 0"})
+    @Ensures({"result.size() <= haplotypes.size()"})
+    public ArrayList<Haplotype> selectBestHaplotypes( final ArrayList<Haplotype> haplotypes ) {
+
+        final int numHaplotypes = haplotypes.size();
+        final Set<String> sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples
+        final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
+        bestHaplotypesIndexList.add(0); // always start with the reference haplotype
+        // set up the default 1-to-1 haplotype mapping object
+        final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
+        for( final Haplotype h : haplotypes ) {
+            final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
+            list.add(h);
+            haplotypeMapping.add(list);
+        }
+        final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( sampleKeySet, haplotypeMapping ); // all samples pooled together
+
+        int hap1 = 0;
+        int hap2 = 0;
+        //double bestElement = Double.NEGATIVE_INFINITY;
+        final int maxChosenHaplotypes = Math.min( 13, sampleKeySet.size() * 2 + 1 );
+        while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) {
+            double maxElement = Double.NEGATIVE_INFINITY;
+            for( int iii = 0; iii < numHaplotypes; iii++ ) {
+                for( int jjj = 0; jjj <= iii; jjj++ ) {
+                    if( haplotypeLikelihoodMatrix[iii][jjj] > maxElement ) {
+                        maxElement = haplotypeLikelihoodMatrix[iii][jjj];
+                        hap1 = iii;
+                        hap2 = jjj;
+                    }
+                }
+            }
+            if( maxElement == Double.NEGATIVE_INFINITY ) { break; }
+            if( DEBUG ) { System.out.println("Chose haplotypes " + hap1 + " and " + hap2 + " with diploid likelihood = " + haplotypeLikelihoodMatrix[hap1][hap2]); }
+            haplotypeLikelihoodMatrix[hap1][hap2] = Double.NEGATIVE_INFINITY;
+
+            if( !bestHaplotypesIndexList.contains(hap1) ) { bestHaplotypesIndexList.add(hap1); }
+            if( !bestHaplotypesIndexList.contains(hap2) ) { bestHaplotypesIndexList.add(hap2); }
+        }
+
+        if( DEBUG ) { System.out.println("Chose " + (bestHaplotypesIndexList.size() - 1) + " alternate haplotypes to genotype in all samples."); }
+
+        final ArrayList<Haplotype> bestHaplotypes = new ArrayList<Haplotype>();
+        for( final int hIndex : bestHaplotypesIndexList ) {
+            bestHaplotypes.add( haplotypes.get(hIndex) );
+        }
+        return bestHaplotypes;
+    }
+
+    public static Map<String, Map<Allele, List<GATKSAMRecord>>> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList, final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) {
+        final Map<String, Map<Allele, List<GATKSAMRecord>>> returnMap = new HashMap<String, Map<Allele, List<GATKSAMRecord>>>();
+        final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst());
+        for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
+            final Map<Allele, List<GATKSAMRecord>> alleleReadMap = new HashMap<Allele, List<GATKSAMRecord>>();
+            final ArrayList<GATKSAMRecord> readsForThisSample = sample.getValue();
+            for( int iii = 0; iii < readsForThisSample.size(); iii++ ) {
+                final GATKSAMRecord read = readsForThisSample.get(iii); // BUGBUG: assumes read order in this list and haplotype likelihood list are the same!
+                // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
+                if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
+                    final double likelihoods[] = new double[call.getFirst().getAlleles().size()];
+                    int count = 0;
+                    for( final Allele a : call.getFirst().getAlleles() ) { // find the allele with the highest haplotype likelihood
+                        double maxLikelihood = Double.NEGATIVE_INFINITY;
+                        for( final Haplotype h : call.getSecond().get(a) ) { // use the max likelihood from all the haplotypes which mapped to this allele (achieved via the haplotype mapper object)
+                            final double likelihood = h.getReadLikelihoods(sample.getKey())[iii];
+                            if( likelihood > maxLikelihood ) {
+                                maxLikelihood = likelihood;
+                            }
+                        }
+                        likelihoods[count++] = maxLikelihood;
+                    }
+                    final int bestAllele = MathUtils.maxElementIndex(likelihoods);
+                    final double bestLikelihood = likelihoods[bestAllele];
+                    Allele allele = Allele.NO_CALL;
+                    boolean isInformativeRead = false;
+                    for( final double likelihood : likelihoods ) {
+                        if( bestLikelihood - likelihood > BEST_LIKELIHOOD_THRESHOLD ) {
+                            isInformativeRead = true;
+                            break;
+                        }
+                    }
+                    // uninformative reads get the no call Allele
+                    if( isInformativeRead ) {
+                        allele = call.getFirst().getAlleles().get(bestAllele);
+                    }
+                    List<GATKSAMRecord> readList = alleleReadMap.get(allele);
+                    if( readList == null ) {
+                        readList = new ArrayList<GATKSAMRecord>();
+                        alleleReadMap.put(allele, readList);
+                    }
+                    readList.add(read);
+                }
+            }
+            // add all filtered reads to the NO_CALL list because they weren't given any likelihoods
+            List<GATKSAMRecord> readList = alleleReadMap.get(Allele.NO_CALL);
+            if( readList == null ) {
+                readList = new ArrayList<GATKSAMRecord>();
+                alleleReadMap.put(Allele.NO_CALL, readList);
+            }
+            for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
+                // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
+                if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
+                    readList.add(read);
+                }
+            }
+            returnMap.put(sample.getKey(), alleleReadMap);
+        }
+        return returnMap;
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java
@ -0,0 +1,25 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.Haplotype;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+import java.util.ArrayList;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: ebanks
+ * Date: Mar 14, 2011
+ */
+public abstract class LocalAssemblyEngine {
+
+    public enum ASSEMBLER {
+        SIMPLE_DE_BRUIJN
+    }
+
+    protected LocalAssemblyEngine() {
+    }
+
+    public abstract ArrayList<Haplotype> runLocalAssembly(ActiveRegion activeRegion, Haplotype refHaplotype, byte[] fullReferenceWithPadding, GenomeLoc refLoc, int PRUNE_FACTOR, ArrayList<VariantContext> activeAllelesToGenotype);
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
@ -0,0 +1,384 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import com.google.java.contract.Ensures;
+import org.apache.commons.lang.ArrayUtils;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.Haplotype;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.SWPairwiseAlignment;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.sam.AlignmentUtils;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.jgrapht.graph.DefaultDirectedGraph;
+
+import java.io.PrintStream;
+import java.util.*;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: ebanks, rpoplin
+ * Date: Mar 14, 2011
+ */
+
+public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
+
+    private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers
+    private static final int NUM_BEST_PATHS_PER_KMER_GRAPH = 11;
+    private static final byte MIN_QUALITY = (byte) 17;
+
+    // Smith-Waterman parameters originally copied from IndelRealigner
+    private static final double SW_MATCH = 5.0;      // 1.0;
+    private static final double SW_MISMATCH = -10.0;  //-1.0/3.0;
+    private static final double SW_GAP = -22.0;       //-1.0-1.0/3.0;
+    private static final double SW_GAP_EXTEND = -1.2; //-1.0/.0;
+
+    private final boolean DEBUG;
+    private final PrintStream GRAPH_WRITER;
+    private final ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>> graphs = new ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>>();
+
+    private int PRUNE_FACTOR = 1;
+    
+    public SimpleDeBruijnAssembler( final boolean debug, final PrintStream graphWriter ) {
+        super();
+        DEBUG = debug;
+        GRAPH_WRITER = graphWriter;
+    }
+
+    public ArrayList<Haplotype> runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final ArrayList<VariantContext> activeAllelesToGenotype ) {
+        this.PRUNE_FACTOR = PRUNE_FACTOR;
+
+        // create the graphs
+        createDeBruijnGraphs( activeRegion.getReads(), refHaplotype );
+
+        // clean up the graphs by pruning and merging
+        for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
+            pruneGraph( graph, PRUNE_FACTOR );
+            //eliminateNonRefPaths( graph );
+            mergeNodes( graph );
+        }
+
+        if( GRAPH_WRITER != null ) {
+            printGraphs();
+        }
+
+        // find the best paths in the graphs
+        return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() );
+    }
+
+    protected void createDeBruijnGraphs( final List<GATKSAMRecord> reads, final Haplotype refHaplotype ) {
+        graphs.clear();
+
+        // create the graph
+        for( int kmer = 31; kmer <= 75; kmer += 6 ) {
+            final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+            if( createGraphFromSequences( graph, reads, kmer, refHaplotype, DEBUG ) ) {
+                graphs.add(graph);
+            }
+        }
+    }
+
+    protected static void mergeNodes( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
+        boolean foundNodesToMerge = true;
+        while( foundNodesToMerge ) {
+            foundNodesToMerge = false;
+            for( final DeBruijnEdge e : graph.edgeSet() ) {
+                final DeBruijnVertex outgoingVertex = graph.getEdgeTarget(e);
+                final DeBruijnVertex incomingVertex = graph.getEdgeSource(e);
+                if( !outgoingVertex.equals(incomingVertex) && graph.inDegreeOf(outgoingVertex) == 1 && graph.outDegreeOf(incomingVertex) == 1) {
+                    final Set<DeBruijnEdge> outEdges = graph.outgoingEdgesOf(outgoingVertex);
+                    final Set<DeBruijnEdge> inEdges = graph.incomingEdgesOf(incomingVertex);
+                    if( inEdges.size() == 1 && outEdges.size() == 1 ) {
+                        inEdges.iterator().next().setMultiplicity( inEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() / 2 ) );
+                        outEdges.iterator().next().setMultiplicity( outEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() / 2 ) );
+                    } else if( inEdges.size() == 1 ) {
+                        inEdges.iterator().next().setMultiplicity( inEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() - 1 ) );
+                    } else if( outEdges.size() == 1 ) {
+                        outEdges.iterator().next().setMultiplicity( outEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() - 1 ) );
+                    }
+
+                    final DeBruijnVertex addedVertex = new DeBruijnVertex( ArrayUtils.addAll(incomingVertex.getSequence(), outgoingVertex.getSuffix()), outgoingVertex.kmer );
+                    graph.addVertex(addedVertex);
+                    for( final DeBruijnEdge edge : outEdges ) {
+                        graph.addEdge(addedVertex, graph.getEdgeTarget(edge), new DeBruijnEdge(edge.getIsRef(), edge.getMultiplicity()));
+                    }
+                    for( final DeBruijnEdge edge : inEdges ) {
+                        graph.addEdge(graph.getEdgeSource(edge), addedVertex, new DeBruijnEdge(edge.getIsRef(), edge.getMultiplicity()));
+                    }
+
+                    graph.removeVertex( incomingVertex );
+                    graph.removeVertex( outgoingVertex );
+                    foundNodesToMerge = true;
+                    break;
+                }
+            }
+        }
+    }
+
+    protected static void pruneGraph( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int pruneFactor ) {
+        final ArrayList<DeBruijnEdge> edgesToRemove = new ArrayList<DeBruijnEdge>();
+        for( final DeBruijnEdge e : graph.edgeSet() ) {
+            if( e.getMultiplicity() <= pruneFactor && !e.getIsRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor
+                edgesToRemove.add(e);
+            }
+        }
+        graph.removeAllEdges(edgesToRemove);
+
+        // Run through the graph and clean up singular orphaned nodes
+        final ArrayList<DeBruijnVertex> verticesToRemove = new ArrayList<DeBruijnVertex>();
+        for( final DeBruijnVertex v : graph.vertexSet() ) {
+            if( graph.inDegreeOf(v) == 0 && graph.outDegreeOf(v) == 0 ) {
+                verticesToRemove.add(v);
+            }
+        }
+        graph.removeAllVertices(verticesToRemove);
+    }
+
+    protected static void eliminateNonRefPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
+        final ArrayList<DeBruijnVertex> verticesToRemove = new ArrayList<DeBruijnVertex>();
+        boolean done = false;
+        while( !done ) {
+            done = true;
+            for( final DeBruijnVertex v : graph.vertexSet() ) {
+                if( graph.inDegreeOf(v) == 0 || graph.outDegreeOf(v) == 0 ) {
+                    boolean isRefNode = false;
+                    for( final DeBruijnEdge e : graph.edgesOf(v) ) {
+                        if( e.getIsRef() ) {
+                            isRefNode = true;
+                            break;
+                        }
+                    }
+                    if( !isRefNode ) {
+                        done = false;
+                        verticesToRemove.add(v);
+                    }
+                }
+            }
+            graph.removeAllVertices(verticesToRemove);
+            verticesToRemove.clear();
+        }
+    }
+
+    private static boolean createGraphFromSequences( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Collection<GATKSAMRecord> reads, final int KMER_LENGTH, final Haplotype refHaplotype, final boolean DEBUG ) {
+        final byte[] refSequence = refHaplotype.getBases();
+        if( refSequence.length >= KMER_LENGTH + KMER_OVERLAP ) {
+            final int kmersInSequence = refSequence.length - KMER_LENGTH + 1;
+            for (int i = 0; i < kmersInSequence - 1; i++) {
+                // get the kmers
+                final byte[] kmer1 = new byte[KMER_LENGTH];
+                System.arraycopy(refSequence, i, kmer1, 0, KMER_LENGTH);
+                final byte[] kmer2 = new byte[KMER_LENGTH];
+                System.arraycopy(refSequence, i+1, kmer2, 0, KMER_LENGTH);
+                if( !addKmersToGraph(graph, kmer1, kmer2, true) ) {
+                    if( DEBUG ) {
+                        System.out.println("Cycle detected in reference graph for kmer = " + KMER_LENGTH + " ...skipping");
+                    }
+                    return false;
+                }
+            }
+        }
+
+        for( final GATKSAMRecord read : reads ) {
+            final byte[] sequence = read.getReadBases();
+            final byte[] qualities = read.getBaseQualities();
+            final byte[] reducedReadCounts = read.getReducedReadCounts();  // will be null if read is not readuced
+            if( sequence.length > KMER_LENGTH + KMER_OVERLAP ) {
+                final int kmersInSequence = sequence.length - KMER_LENGTH + 1;
+                for( int iii = 0; iii < kmersInSequence - 1; iii++ ) {                    
+                    // if the qualities of all the bases in the kmers are high enough
+                    boolean badKmer = false;
+                    for( int jjj = iii; jjj < iii + KMER_LENGTH + 1; jjj++) {
+                        if( qualities[jjj] < MIN_QUALITY ) {
+                            badKmer = true;
+                            break;
+                        }
+                    }
+                    int countNumber = 1;
+                    if (read.isReducedRead()) {
+                        // compute mean number of reduced read counts in current kmer span
+                        final byte[] counts = Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1);
+                        // precise rounding can make a difference with low consensus counts
+                        countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length);
+                    }
+
+                    if( !badKmer ) {
+                        // get the kmers
+                        final byte[] kmer1 = new byte[KMER_LENGTH];
+                        System.arraycopy(sequence, iii, kmer1, 0, KMER_LENGTH);
+                        final byte[] kmer2 = new byte[KMER_LENGTH];
+                        System.arraycopy(sequence, iii+1, kmer2, 0, KMER_LENGTH);
+
+                        for (int k=0; k < countNumber; k++)
+                            addKmersToGraph(graph, kmer1, kmer2, false);
+                    }
+                }
+            }
+        }
+        return true;
+    }
+
+    protected static boolean addKmersToGraph( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final byte[] kmer1, final byte[] kmer2, final boolean isRef ) {
+
+        final int numVertexBefore = graph.vertexSet().size();
+        final DeBruijnVertex v1 = new DeBruijnVertex( kmer1, kmer1.length );
+        graph.addVertex(v1);
+        final DeBruijnVertex v2 = new DeBruijnVertex( kmer2, kmer2.length );
+        graph.addVertex(v2);
+        if( isRef && graph.vertexSet().size() == numVertexBefore ) { return false; }
+
+        final DeBruijnEdge targetEdge = graph.getEdge(v1, v2);
+        if ( targetEdge == null ) {
+            graph.addEdge(v1, v2, new DeBruijnEdge( isRef ));
+        } else {
+            if( isRef ) {
+                targetEdge.setIsRef( true );
+            }
+            targetEdge.setMultiplicity(targetEdge.getMultiplicity() + 1);
+        }
+        return true;
+    }
+
+    protected void printGraphs() {
+        int count = 0;
+        for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
+            GRAPH_WRITER.println("digraph kmer" + count++ +" {");
+            for( final DeBruijnEdge edge : graph.edgeSet() ) {
+                if( edge.getMultiplicity() > PRUNE_FACTOR ) {
+                    GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [" + (edge.getMultiplicity() <= PRUNE_FACTOR ? "style=dotted,color=grey" : "label=\""+ edge.getMultiplicity() +"\"") + "];");
+                }
+                if( edge.getIsRef() ) {
+                    GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [color=red];");
+                }
+                if( !edge.getIsRef() && edge.getMultiplicity() <= PRUNE_FACTOR ) { System.out.println("Graph pruning warning!"); }
+            }
+            for( final DeBruijnVertex v : graph.vertexSet() ) {
+                final String label = ( graph.inDegreeOf(v) == 0 ? v.toString() : v.getSuffixString() );
+                GRAPH_WRITER.println("\t" + v.toString() + " [label=\"" + label + "\"]");
+            }
+            GRAPH_WRITER.println("}");
+        }
+    }
+
+    @Ensures({"result.contains(refHaplotype)"})
+    private ArrayList<Haplotype> findBestPaths( final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final ArrayList<VariantContext> activeAllelesToGenotype, final GenomeLoc activeRegionWindow ) {
+        final ArrayList<Haplotype> returnHaplotypes = new ArrayList<Haplotype>();
+
+        // add the reference haplotype separately from all the others
+        final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( fullReferenceWithPadding, refHaplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
+        refHaplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
+        refHaplotype.setCigar( swConsensus.getCigar() );
+        if( !returnHaplotypes.add( refHaplotype ) ) {
+            throw new ReviewedStingException("Unable to add reference haplotype during assembly: " + refHaplotype);
+        }
+
+        final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
+        final int activeRegionStop = refHaplotype.getAlignmentStartHapwrtRef() + refHaplotype.getCigar().getReferenceLength();
+
+        for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype
+            for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
+                final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart());
+                if( !addHaplotype( insertedRefHaplotype, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
+                    return returnHaplotypes;
+                    //throw new ReviewedStingException("Unable to add reference+allele haplotype during GGA-enabled assembly: " + insertedRefHaplotype);
+                }
+            }
+        }
+
+        for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
+            for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) {
+                final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
+                if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
+                    if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
+                        final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
+                        for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
+                            final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
+                            if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {
+                                for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
+                                    addHaplotype( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart()), fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop );
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        if( DEBUG ) { 
+            if( returnHaplotypes.size() > 1 ) {
+                System.out.println("Found " + returnHaplotypes.size() + " candidate haplotypes to evaluate every read against.");
+            } else {
+                System.out.println("Found only the reference haplotype in the assembly graph.");
+            }
+            for( final Haplotype h : returnHaplotypes ) {
+                System.out.println( h.toString() );
+                System.out.println( "> Cigar = " + h.getCigar() );
+            }
+        }
+
+        return returnHaplotypes;
+    }
+
+    private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop ) {
+        if( haplotype == null ) { return false; }
+
+        final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
+        haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
+        haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0) );
+
+        if( swConsensus.getCigar().toString().contains("S") || swConsensus.getCigar().getReferenceLength() < 60 ) { // protect against SW failures
+            return false;
+        }
+
+        final int hapStart = ReadUtils.getReadCoordinateForReferenceCoordinate( haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStart, ReadUtils.ClippingTail.LEFT_TAIL, true );
+        int hapStop = ReadUtils.getReadCoordinateForReferenceCoordinate( haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStop, ReadUtils.ClippingTail.RIGHT_TAIL, true );
+        if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED && activeRegionStop == haplotype.getAlignmentStartHapwrtRef() + haplotype.getCigar().getReferenceLength() ) {
+            hapStop = activeRegionStop; // contract for getReadCoordinateForReferenceCoordinate function says that if read ends at boundary then it is outside of the clipping goal
+        }
+        byte[] newHaplotypeBases;
+        // extend partial haplotypes to contain the full active region sequence
+        int leftBreakPoint = 0;
+        int rightBreakPoint = 0;
+        if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED && hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) {
+            newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.addAll( ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()),
+                                                   haplotype.getBases()),
+                                                   ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop) );
+            leftBreakPoint = swConsensus.getAlignmentStart2wrt1() - activeRegionStart;
+            rightBreakPoint = leftBreakPoint + haplotype.getBases().length;
+            //newHaplotypeBases = haplotype.getBases();
+            //return false; // piece of haplotype isn't anchored within the active region so don't build a haplotype out of it
+        } else if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) {
+            //return false;
+            newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()), ArrayUtils.subarray(haplotype.getBases(), 0, hapStop) );
+            //newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), 0, hapStop);
+            leftBreakPoint = swConsensus.getAlignmentStart2wrt1() - activeRegionStart;
+        } else if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) {
+            //return false;
+            newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(haplotype.getBases(), hapStart, haplotype.getBases().length), ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop) );
+            //newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), hapStart, haplotype.getBases().length);
+            rightBreakPoint = haplotype.getBases().length - hapStart;
+        } else {
+            newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), hapStart, hapStop);
+        }
+
+        final Haplotype h = new Haplotype( newHaplotypeBases );
+        final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( ref, h.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
+
+        h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() );
+        h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0) );
+        h.leftBreakPoint = leftBreakPoint;
+        h.rightBreakPoint = rightBreakPoint;
+        if( swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() != activeRegionStop - activeRegionStart ) { // protect against SW failures
+            return false;
+        }
+
+        if( !haplotypeList.contains(h) ) {
+            haplotypeList.add(h);
+            return true;
+        } else {
+            return false;
+        }
+    }
+}
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java
@ -0,0 +1,134 @@
+package org.broadinstitute.sting.gatk.walkers.bqsr;
+
+import org.broadinstitute.sting.WalkerTest;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.Arrays;
+
+/**
+ * @author ebanks
+ * @since 7/16/12
+ */
+public class BQSRIntegrationTest extends WalkerTest {
+
+    private static class BQSRTest {
+        final String reference;
+        final String interval;
+        final String bam;
+        final String args;
+        final String md5;
+
+        private BQSRTest(String reference, String bam, String interval, String args, String md5) {
+            this.reference = reference;
+            this.bam = bam;
+            this.interval = interval;
+            this.args = args;
+            this.md5 = md5;
+        }
+
+        public String getCommandLine() {
+            return " -T BaseRecalibrator" +
+                    " -R " + reference +
+                    " -I " + bam +
+                    " -L " + interval +
+                    args +
+                    " --no_plots" +
+                    " -knownSites " + (reference.equals(b36KGReference) ? b36dbSNP129 : hg18dbSNP132) +
+                    " -o %s";
+        }
+
+        @Override
+        public String toString() {
+            return String.format("BQSR(bam='%s', args='%s')", bam, args);
+        }
+    }
+
+    @DataProvider(name = "BQSRTest")
+    public Object[][] createBQSRTestData() {
+        String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
+        String HiSeqInterval = "chr1:10,000,000-10,100,000";
+        return new Object[][]{
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "239ce3387b4540faf44ec000d844ccd1")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "d69127341938910c38166dd18449598d")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "b77e621bed1b0dc57970399a35efd0da")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "2697f38d467a7856c40abce0f778456a")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "a55018b1643ca3964dbb50783db9f3e4")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "54fe8d1f5573845e6a2aa9688f6dd950")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "6b518ad3c56d66c6f5ea812d058f5c4d")},
+                {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "3ddb9730f00ee3a612b42209ed9f7e03")},
+                {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "4cd4fb754e1ef142ad691cb35c74dc4c")},
+                {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "364eab693e5e4c7d18a77726b6460f3f")},
+                {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "c449cfca61d605b534f0dce35581339d")},
+                {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "5268cb5a4b69335568751d5e5ab80d43")},
+                {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "3ddb9730f00ee3a612b42209ed9f7e03")},
+                {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "4a786ba42e38e7fd101947c34a6883ed")},
+        };
+    }
+
+    @Test(dataProvider = "BQSRTest")
+    public void testBQSR(BQSRTest params) {
+        WalkerTestSpec spec = new WalkerTestSpec(
+                params.getCommandLine(),
+                Arrays.asList(params.md5));
+        executeTest("testBQSR-"+params.args, spec).getFirst();
+
+        WalkerTestSpec specNT2 = new WalkerTestSpec(
+                params.getCommandLine() + " -nt 2",
+                Arrays.asList(params.md5));
+        executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst();
+    }
+
+    @Test
+    public void testBQSRFailWithoutDBSNP() {
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                " -T BaseRecalibrator" +
+                        " -R " + b36KGReference +
+                        " -I " + validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam" +
+                        " -L 1:10,000,000-10,200,000" +
+                        " --no_plots" +
+                        " -o %s",
+                1, // just one output file
+                UserException.CommandLineException.class);
+        executeTest("testBQSRFailWithoutDBSNP", spec);
+    }
+
+    private static class PRTest {
+        final String args;
+        final String md5;
+
+        private PRTest(String args, String md5) {
+            this.args = args;
+            this.md5 = md5;
+        }
+
+        @Override
+        public String toString() {
+            return String.format("PrintReads(args='%s')", args);
+        }
+    }
+
+    @DataProvider(name = "PRTest")
+    public Object[][] createPRTestData() {
+        return new Object[][]{
+                {new PRTest("", "d2d6ed8667cdba7e56f5db97d6262676")},
+                {new PRTest(" -qq -1", "b7053d3d67aba6d8892f0a60f0ded338")},
+                {new PRTest(" -qq 6", "bfbf0855185b2b70aa35237fb71e4487")},
+                {new PRTest(" -DIQ", "66aa65223f192ee39c1773aa187fd493")}
+        };
+    }
+
+    @Test(dataProvider = "PRTest")
+    public void testPR(PRTest params) {
+        WalkerTestSpec spec = new WalkerTestSpec(
+                "-T PrintReads" +
+                        " -R " + hg18Reference +
+                        " -I " + privateTestDir + "HiSeq.1mb.1RG.bam" +
+                        " -BQSR " + privateTestDir + "HiSeq.1mb.1RG.table" +
+                        params.args +
+                        " -o %s",
+                Arrays.asList(params.md5));
+        executeTest("testPrintReads-"+params.args, spec).getFirst();
+    }
+}
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
@ -11,6 +11,8 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
    final String DELETION_BAM = validationDataLocation + "filtered_deletion_for_reduce_reads.bam";
    final String STASH_BAM = validationDataLocation + "ReduceReadsStashBug.bam";
    final String STASH_L = " -L 14:73718184-73718284 -L 14:73718294-73718330 -L 14:73718360-73718556";
+    final String DIVIDEBYZERO_BAM = validationDataLocation + "ReduceReadsDivideByZeroBug.bam";
+    final String DIVIDEBYZERO_L = " -L " + validationDataLocation + "ReduceReadsDivideByZeroBug.intervals";
    final String L = " -L 20:10,100,000-10,120,000 ";

    private void RRTest(String testName, String args, String md5) {
@ -21,28 +23,28 @@ public class ReduceReadsIntegrationTest extends WalkerTest {

    @Test(enabled = true)
    public void testDefaultCompression() {
-        RRTest("testDefaultCompression ", L, "323dd4deabd7767efa0f2c6e7fa4189f");
+        RRTest("testDefaultCompression ", L, "72eb6db9d7a09a0cc25eaac1aafa97b7");
    }

    @Test(enabled = true)
    public void testMultipleIntervals() {
        String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
-        RRTest("testMultipleIntervals ", intervals, "c437fb160547ff271f8eba30e5f3ff76");
+        RRTest("testMultipleIntervals ", intervals, "104b1a1d9fa5394c6fea95cd32967b78");
    }

    @Test(enabled = true)
    public void testHighCompression() {
-        RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "3a607bc3ebaf84e9dc44e005c5f8a047");
+        RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "c55140cec60fa8c35161680289d74d47");
    }

    @Test(enabled = true)
    public void testLowCompression() {
-        RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "afd39459c841b68a442abdd5ef5f8f27");
+        RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "0f2e57b7f6de03cc4da1ffcc8cf8f1a7");
    }

    @Test(enabled = true)
    public void testIndelCompression() {
-        RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "f7b9fa44c10bc4b2247813d2b8dc1973");
+        RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "dda0c95f56f90e5f633c2437c2b21031");
    }

    @Test(enabled = true)
@ -64,5 +66,16 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
        String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
        executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("886b43e1f26ff18425814dc7563931c6")));
    }
+
+    /**
+     * Divide by zero bug reported by GdA and users in the forum. Happens when the downsampler goes over a region where all reads get
+     * filtered out.
+     */
+    @Test(enabled = true)
+    public void testDivideByZero() {
+        String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
+        executeTest("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("137505c3efd1e9f8d9209dbdf8419ff9")));
+    }
+
 }

--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyAFCalculationModelUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyAFCalculationModelUnitTest.java
@ -0,0 +1,156 @@
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.Genotype;
+import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
+import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
+import org.testng.Assert;
+import org.testng.annotations.BeforeSuite;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.Arrays;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: delangel
+ * Date: 3/28/12
+ * Time: 7:44 AM
+ * To change this template use File | Settings | File Templates.
+ */
+public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest {
+
+    static double[] AA1, AB1, BB1;
+    static double[] AA2, AB2, AC2, BB2, BC2, CC2;
+    static double[] A4_1, B4_1, C4_1, D4_1, E4_1,F4_1;
+    static double[] A4_400, B4_310, C4_220, D4_130, E4_121, F4_013;
+    static final int numSamples = 4;
+    static final int samplePloidy = 4;   // = 2*samplesPerPool
+
+    @BeforeSuite
+    public void before() {
+        // legacy diploid cases
+        AA1 = new double[]{-5.0, -20.0, -20.0};
+        AB1 = new double[]{-20.0, 0.0, -20.0};
+        BB1 = new double[]{-20.0, -20.0, 0.0};
+
+        // diploid, nAlleles = 3. Ordering is [2 0 0] [1 1 0] [0 2 0] [1 0 1] [0 1 1] [0 0 2], ie AA AB BB AC BC CC
+        AA2 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0};
+        AB2 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0};
+        AC2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0};
+        BB2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0};
+        BC2 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0, -20.0};
+        CC2 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, 0.0};
+        
+        // pool (i.e. polyploid cases)
+        // NAlleles = 2, ploidy=4
+        // ordering is [4 0] [3 1] [2 2 ] [1 3] [0 4]
+
+        A4_1 = new double[]{-3.0, -20.0, -20.0, -20.0, -20.0};
+        B4_1 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0};
+        C4_1 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0};
+        D4_1 = new double[]{-20.0, -20.0, 0.0,   0.0, -20.0};
+        E4_1 = new double[]{-20.0, -20.0, 0.0,   0.0, -20.0};
+        F4_1 = new double[]{-20.0, -20.0, -20.0,   -20.0, 0.0};
+
+        // NAlleles = 3, ploidy = 4
+        // ordering is [4 0 0] [3 1 0] [2 2 0] [1 3 0] [0 4 0] [3 0 1] [2 1 1] [1 2 1] [0 3 1] [2 0 2] [1 1 2] [0 2 2] [1 0 3] [0 1 3] [0 0 4]
+        A4_400 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0};
+        B4_310 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0};
+        C4_220 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0};
+        D4_130 = new double[]{-20.0, -20.0, -20.0,   0.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0};
+        E4_121 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0,   0.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0};
+        F4_013 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, 0.0, -20.0};
+
+    }
+
+    private class GetGLsTest extends TestDataProvider {
+        GenotypesContext GLs;
+        int numAltAlleles;
+        String name;
+        int ploidy;
+        private GetGLsTest(String name, int numAltAlleles, int ploidy, Genotype... arg) {
+            super(GetGLsTest.class, name);
+            GLs = GenotypesContext.create(arg);
+            this.name = name;
+            this.numAltAlleles = numAltAlleles;
+            this.ploidy = ploidy;
+        }
+
+        public String toString() {
+            return String.format("%s input=%s", super.toString(), GLs);
+        }
+    }
+
+    private static Genotype createGenotype(String name, double[] gls, int ploidy) {
+        Allele[] alleles = new Allele[ploidy];
+        
+        for (int i=0; i < ploidy; i++)
+            alleles[i] = Allele.NO_CALL;
+        
+        return new GenotypeBuilder(name, Arrays.asList(alleles)).PL(gls).make();
+    }                              
+
+    @DataProvider(name = "getGLs")
+    public Object[][] createGLsData() {
+
+        // bi-allelic diploid case
+        new GetGLsTest("B0", 1, 2, createGenotype("AA1", AA1,2), createGenotype("AA2", AA1,2), createGenotype("AA3", AA1,2));
+        new GetGLsTest("B1", 1, 2, createGenotype("AA1", AA1,2), createGenotype("AA2", AA1,2), createGenotype("AB", AB1,2));
+        new GetGLsTest("B2", 1, 2, createGenotype("AA1", AA1,2), createGenotype("BB", BB1,2), createGenotype("AA2", AA1,2));
+        new GetGLsTest("B3a", 1, 2, createGenotype("AB", AB1,2), createGenotype("AA", AA1,2), createGenotype("BB", BB1,2));
+        new GetGLsTest("B3b", 1, 2, createGenotype("AB1", AB1,2), createGenotype("AB2", AB1,2), createGenotype("AB3", AB1,2));
+        new GetGLsTest("B4", 1, 2, createGenotype("BB1", BB1,2), createGenotype("BB2", BB1,2), createGenotype("AA", AA1,2));
+        new GetGLsTest("B5", 1, 2, createGenotype("BB1", BB1,2), createGenotype("AB", AB1,2), createGenotype("BB2", BB1,2));
+        new GetGLsTest("B6", 1, 2, createGenotype("BB1", BB1,2), createGenotype("BB2", BB1,2), createGenotype("BB3", BB1,2));
+
+        // tri-allelic diploid case
+        new GetGLsTest("B1C0", 2, 2, createGenotype("AA1", AA2,2), createGenotype("AA2", AA2,2), createGenotype("AB", AB2,2));
+        new GetGLsTest("B0C1", 2, 2, createGenotype("AA1", AA2,2), createGenotype("AA2", AA2,2), createGenotype("AC", AC2,2));
+        new GetGLsTest("B1C1a", 2,2,  createGenotype("AA", AA2,2), createGenotype("AB", AB2,2), createGenotype("AC", AC2,2));
+        new GetGLsTest("B1C1b", 2,2,  createGenotype("AA1", AA2,2), createGenotype("AA2", AA2,2), createGenotype("BC", BC2,2));
+        new GetGLsTest("B2C1", 2, 2, createGenotype("AB1", AB2,2), createGenotype("AB2", AB2,2), createGenotype("AC", AC2,2));
+        new GetGLsTest("B3C2a", 2, 2, createGenotype("AB", AB2,2), createGenotype("BC1", BC2,2), createGenotype("BC2", BC2,2));
+        new GetGLsTest("B3C2b", 2, 2, createGenotype("AB", AB2,2), createGenotype("BB", BB2,2), createGenotype("CC", CC2,2));
+
+        // bi-allelic pool case
+        new GetGLsTest("P0", 1, samplePloidy, createGenotype("A4_1", A4_1,samplePloidy), createGenotype("A4_1", A4_1,samplePloidy), createGenotype("A4_1", A4_1,samplePloidy));
+        new GetGLsTest("P1", 1, samplePloidy,createGenotype("A4_1", A4_1,samplePloidy), createGenotype("B4_1", B4_1,samplePloidy), createGenotype("A4_1", A4_1,samplePloidy));
+        new GetGLsTest("P2a", 1,samplePloidy, createGenotype("A4_1", A4_1,samplePloidy), createGenotype("C4_1", C4_1,samplePloidy), createGenotype("A4_1", A4_1,samplePloidy));
+        new GetGLsTest("P2b", 1, samplePloidy,createGenotype("B4_1", B4_1,samplePloidy), createGenotype("B4_1", B4_1,samplePloidy), createGenotype("A4_1", A4_1,samplePloidy));
+        new GetGLsTest("P4", 1, samplePloidy,createGenotype("A4_1", A4_1,samplePloidy), createGenotype("C4_1", C4_1,samplePloidy), createGenotype("C4_1", C4_1,samplePloidy));
+        new GetGLsTest("P6", 1, samplePloidy,createGenotype("A4_1", A4_1,samplePloidy), createGenotype("F4_1", F4_1,samplePloidy), createGenotype("C4_1", C4_1,samplePloidy));
+        new GetGLsTest("P8", 1, samplePloidy,createGenotype("A4_1", A4_1,samplePloidy), createGenotype("F4_1", F4_1,samplePloidy), createGenotype("F4_1", F4_1,samplePloidy));
+
+        // multi-allelic pool case
+        new GetGLsTest("B1C3", 2, samplePloidy,createGenotype("A4_400", A4_400,samplePloidy), createGenotype("A4_400", A4_400,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy));
+        new GetGLsTest("B3C9", 2, samplePloidy,createGenotype("F4_013", F4_013,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy));
+        new GetGLsTest("B6C0", 2, samplePloidy,createGenotype("B4_310", B4_310,samplePloidy), createGenotype("C4_220", C4_220,samplePloidy), createGenotype("D4_130", D4_130,samplePloidy));
+        new GetGLsTest("B6C4", 2, samplePloidy,createGenotype("D4_130", D4_130,samplePloidy), createGenotype("E4_121", E4_121,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy));
+        new GetGLsTest("B4C7", 2, samplePloidy,createGenotype("F4_013", F4_013,samplePloidy), createGenotype("E4_121", E4_121,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy));
+        new GetGLsTest("B2C3", 2, samplePloidy,createGenotype("A4_400", A4_400,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy), createGenotype("B4_310", B4_310,samplePloidy));
+
+        return GetGLsTest.getTests(GetGLsTest.class);
+    }
+
+    @Test(dataProvider = "getGLs")
+    public void testGLs(GetGLsTest cfg) {
+
+        final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(cfg.numAltAlleles);
+        final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
+        double[] priors = new double[len];  // flat priors
+
+        GeneralPloidyExactAFCalculationModel.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors, result);
+        int nameIndex = 1;
+        for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
+            int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
+            int calculatedAlleleCount = result.getAlleleCountsOfMAP()[allele];
+
+//            System.out.format( "%s Expected:%d Calc:%d\n",cfg.toString(),expectedAlleleCount, calculatedAlleleCount);
+            Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
+        }
+    }
+
+
+}
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java
@ -0,0 +1,514 @@
+/*
+ * Copyright (c) 2010.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import net.sf.samtools.SAMUtils;
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.walkers.Walker;
+import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
+import org.broadinstitute.sting.utils.variantcontext.*;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.PrintStream;
+import java.util.*;
+
+
+public class GeneralPloidyGenotypeLikelihoodsUnitTest {
+
+    final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
+    final Logger logger = Logger.getLogger(Walker.class);
+    private static final boolean VERBOSE = false;
+    private static final boolean SIMULATE_NOISY_PILEUP = false;
+    private static final int NUM_SIMULATED_OBS = 10;
+
+    void PoolGenotypeLikelihoodsUnitTest() {
+        UAC.minQualityScore = 5;
+        UAC.maxQualityScore = 40;
+        UAC.phredScaledPrior = (byte)20;
+        UAC.minPower = 0.0;
+
+    }
+    @Test
+    public void testStoringLikelihoodElements() {
+
+
+        // basic test storing a given PL vector in a GeneralPloidyGenotypeLikelihoods object and then retrieving it back
+
+        int ploidy = 20;
+        int numAlleles = 4;
+        int res = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy);
+        //       System.out.format("Alt Alleles: %d, Ploidy: %d, #Likelihoods: %d\n", numAltAlleles, ploidy, res);
+
+        List<Allele> alleles = new ArrayList<Allele>();
+        alleles.add(Allele.create("T",true));
+        alleles.add(Allele.create("C",false));
+        alleles.add(Allele.create("A",false));
+        alleles.add(Allele.create("G",false));
+
+        double[] gls = new double[res];
+
+        for (int k=0; k < gls.length; k++)
+            gls[k]= (double)k;
+
+        GeneralPloidyGenotypeLikelihoods gl = new GeneralPloidySNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
+        double[] glnew = gl.getLikelihoods();
+
+        Assert.assertEquals(gls, glnew);
+    }
+
+    @Test
+    public void testElementStorageCache() {
+        // compare cached element storage with compuationally hard-coded iterative computation
+
+        for (int ploidy = 2; ploidy < 10; ploidy++) {
+            for (int nAlleles = 2; nAlleles < 10; nAlleles++)
+                Assert.assertEquals(GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(nAlleles, ploidy),
+                        GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy));
+        }
+
+    }
+
+    @Test
+    public void testVectorToLinearIndex() {
+
+        // create iterator, compare linear index given by iterator with closed form function
+        int numAlleles = 4;
+        int ploidy = 2;
+        GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles, ploidy);
+
+        while(iterator.hasNext()) {
+            System.out.format("\n%d:",iterator.getLinearIndex());
+            int[] a =  iterator.getCurrentVector();
+            for (int aa: a)
+                System.out.format("%d ",aa);
+
+
+            int computedIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
+            System.out.format("Computed idx = %d\n",computedIdx);
+            iterator.next();
+        }
+
+    }
+    @Test
+    public void testSubsetToAlleles() {
+
+        int ploidy = 2;
+        int numAlleles = 4;
+        int res = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy);
+        //       System.out.format("Alt Alleles: %d, Ploidy: %d, #Likelihoods: %d\n", numAltAlleles, ploidy, res);
+
+        List<Allele> originalAlleles = new ArrayList<Allele>();
+        originalAlleles.add(Allele.create("T",true));
+        originalAlleles.add(Allele.create("C",false));
+        originalAlleles.add(Allele.create("A",false));
+        originalAlleles.add(Allele.create("G",false));
+
+        double[] oldLikelihoods = new double[res];
+
+        for (int k=0; k < oldLikelihoods.length; k++)
+            oldLikelihoods[k]= (double)k;
+
+        List<Allele> allelesToSubset = new ArrayList<Allele>();
+        allelesToSubset.add(Allele.create("A",false));
+        allelesToSubset.add(Allele.create("C",false));
+
+        double[] newGLs = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
+                originalAlleles, allelesToSubset);
+
+
+        /*
+            For P=2, N=4, default iteration order:
+                0:2 0 0 0
+                1:1 1 0 0
+                2:0 2 0 0
+                3:1 0 1 0
+                4:0 1 1 0
+                5:0 0 2 0
+                6:1 0 0 1
+                7:0 1 0 1
+                8:0 0 1 1
+                9:0 0 0 2
+
+            For P=2,N=2, iteration order is:
+                0:2 0
+                1:1 1
+                2:0 2
+
+            From first list, if we're extracting alleles 2 and 1, we need all elements that have zero at positions 0 and 3.
+            These are only elements {2,4,5}. Since test is flipping alleles 2 and 1, order is reversed.
+  */
+        Assert.assertEquals(newGLs,new double[]{5.0,4.0,2.0});
+    }
+    @Test
+    public void testIndexIterator() {
+        int[] seed = new int[]{1,2,3,4};
+        GeneralPloidyGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
+        // Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
+        Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1);
+
+        seed = new int[]{1,0,1,1};
+        iterator = runIterator(seed,-1);
+        //  Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
+        Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1);
+
+        seed = new int[]{5};
+        iterator = runIterator(seed,-1);
+        //  Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
+        Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1);
+
+        // Diploid, # alleles = 4
+        seed = new int[]{2,2,2,2};
+        iterator = runIterator(seed,2);
+        //  Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
+        Assert.assertEquals(iterator.getLinearIndex(),9);
+
+        // Diploid, # alleles = 2
+        seed = new int[]{2,2};
+        iterator = runIterator(seed,2);
+        //  Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
+        Assert.assertEquals(iterator.getLinearIndex(),2);
+
+        // Diploid, # alleles = 3
+        seed = new int[]{2,2,2};
+        iterator = runIterator(seed,2);
+        //  Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
+        Assert.assertEquals(iterator.getLinearIndex(),5);
+
+        // Triploid, # alleles = 2
+        seed = new int[]{3,3};
+        iterator = runIterator(seed,3);
+        //  Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
+        Assert.assertEquals(iterator.getLinearIndex(),3);
+        // Triploid, # alleles = 3
+        seed = new int[]{3,3,3};
+        iterator = runIterator(seed,3);
+        //  Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
+        Assert.assertEquals(iterator.getLinearIndex(),9);
+
+        // Triploid, # alleles = 4
+        seed = new int[]{3,3,3,3};
+        iterator = runIterator(seed,3);
+        //  Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
+        Assert.assertEquals(iterator.getLinearIndex(),19);
+
+        // 8-ploid, # alleles = 6
+        seed = new int[]{8,8,8,8,8,8};
+        iterator = runIterator(seed,8);
+        //  Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
+        Assert.assertEquals(iterator.getLinearIndex(),1286);
+
+
+    }
+
+    private GeneralPloidyGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
+        GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(seed, restrictSumTo);
+
+        while(iterator.hasNext()) {
+            int[] a =  iterator.getCurrentVector();
+            int idx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
+            if (VERBOSE)   {
+                System.out.format("%d:",iterator.getLinearIndex());
+                for (int i=0; i < seed.length; i++)
+                    System.out.format("%d ",a[i]);
+                System.out.format(" LI:%d\n", idx);
+            }
+            iterator.next();
+        }
+
+        return iterator;
+
+    }
+
+    private static int prod(int[] x) {
+        int prod = 1;
+        for (int xx : x) {
+            prod *= (1+xx);
+        }
+        return prod;
+    }
+
+    @Test
+    public void testErrorModel() {
+        final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
+        final byte refByte = refPileupTestProvider.getRefByte();
+        final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
+        final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
+        final List<Allele> trueAlleles = new ArrayList<Allele>();
+        trueAlleles.add(Allele.create(refByte, true));
+
+        final VariantContext refVC = new VariantContextBuilder("test","chr1",5, 5,
+                trueAlleles).genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make();
+        final int[] matchArray = {95, 995, 9995, 10000};
+        final int[] mismatchArray = {1,5,10,20};
+        if (VERBOSE) System.out.println("Running SNP error model test");
+
+        for (int matches: matchArray) {
+            for (int mismatches: mismatchArray) {
+                // get artificial alignment context for ref sample - no noise
+                Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, new String(new byte[]{altByte}), new int[]{matches, mismatches}, false, 30);
+                final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
+                final ErrorModel emodel = new ErrorModel(UAC, refPileup, refVC, refPileupTestProvider.getReferenceContext());
+                final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
+
+                final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
+                final int peakIdx = (int)Math.round(mlEst);
+                if (VERBOSE) System.out.format("Matches:%d Mismatches:%d maxV:%d peakIdx:%d\n",matches, mismatches, MathUtils.maxElementIndex(errorVec),peakIdx);
+                Assert.assertEquals(MathUtils.maxElementIndex(errorVec),peakIdx);
+
+            }
+        }
+
+
+    }
+
+    @Test
+    public void testIndelErrorModel() {
+        final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
+        final byte refByte = refPileupTestProvider.getRefByte();
+        final String altBases = "TCA";
+        final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
+        final List<Allele> trueAlleles = new ArrayList<Allele>();
+        trueAlleles.add(Allele.create(refByte, true));
+        trueAlleles.add(Allele.create((char)refByte + "TC", false));
+
+        final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases());
+        final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
+                refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles).
+                genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make();
+
+
+        final int[] matchArray = {95, 995, 9995, 10000};
+        final int[] mismatchArray = {1,5,10,20};
+
+        if (VERBOSE) System.out.println("Running indel error model test");
+        for (int matches: matchArray) {
+            for (int mismatches: mismatchArray) {
+                // get artificial alignment context for ref sample - no noise
+                // CASE 1: Test HET insertion
+                // Ref sample has TC insertion but pileup will have TCA inserted instead to test mismatches
+                Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(1+altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
+                final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
+                final ErrorModel emodel = new ErrorModel(UAC, refPileup, refInsertionVC, refPileupTestProvider.getReferenceContext());
+                final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
+
+                final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
+                final int peakIdx = (int)Math.round(mlEst);
+                if (VERBOSE) System.out.format("Matches:%d Mismatches:%d peakIdx:%d\n",matches, mismatches, peakIdx);
+                Assert.assertEquals(MathUtils.maxElementIndex(errorVec),peakIdx);
+
+                // CASE 2: Test HET deletion
+
+            }
+        }
+
+        // create deletion VC
+        final int delLength = 4;
+        final List<Allele> delAlleles = new ArrayList<Allele>();
+        delAlleles.add(Allele.create(fw.substring(0,delLength+1), true));
+        delAlleles.add(Allele.create(refByte, false));
+
+        final VariantContext refDeletionVC =  new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
+                refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles).
+                genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).make();
+
+        for (int matches: matchArray) {
+            for (int mismatches: mismatchArray) {
+                // get artificial alignment context for ref sample - no noise
+                // CASE 1: Test HET deletion
+                // Ref sample has 4bp deletion but pileup will have 3 bp deletion instead to test mismatches
+                Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(-delLength+1, altBases, new int[]{matches, mismatches}, false, 30);
+                final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
+                final ErrorModel emodel = new ErrorModel(UAC, refPileup, refDeletionVC, refPileupTestProvider.getReferenceContext());
+                final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
+
+                final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
+                final int peakIdx = (int)Math.round(mlEst);
+                if (VERBOSE) System.out.format("Matches:%d Mismatches:%d peakIdx:%d\n",matches, mismatches, peakIdx);
+                Assert.assertEquals(MathUtils.maxElementIndex(errorVec),peakIdx);
+
+                // CASE 2: Test HET deletion
+
+            }
+        }
+
+    }
+
+    @Test
+    public void testAddPileupToPoolGL() {
+
+        // dummy error model - Q=infinity FAPP so that there's no source of uncertainty
+        final double[] emv = new double[SAMUtils.MAX_PHRED_SCORE+1];
+        
+        // error rate for noisy tests
+        final int PHRED_SITE_ERROR_RATE = 20;
+
+        Arrays.fill(emv, Double.NEGATIVE_INFINITY);
+        emv[SAMUtils.MAX_PHRED_SCORE] = 0;
+
+        final int numSamples = 1;
+
+        // have a high quality site say Q40 site, and create artificial pileups for one single sample, at coverage N, with given
+        // true pool AC = x.
+
+        final ArtificialReadPileupTestProvider readPileupTestProvider = new ArtificialReadPileupTestProvider(numSamples,"sample", (byte)SAMUtils.MAX_PHRED_SCORE);
+        final ErrorModel noiselessErrorModel = new ErrorModel(emv);
+
+        final double[] emverr = new double[SAMUtils.MAX_PHRED_SCORE+1];
+        Arrays.fill(emverr, Double.NEGATIVE_INFINITY);
+        emverr[PHRED_SITE_ERROR_RATE] = 0;
+        final ErrorModel Q30ErrorModel = new ErrorModel(emverr);
+
+
+        final int eventLength = 0; // test snp only
+        final byte refByte = readPileupTestProvider.getRefByte();
+        final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
+
+
+        final List<Allele> allAlleles = new ArrayList<Allele>();  // this contains only ref Allele up to now
+        final Set<String> laneIDs = new TreeSet<String>();
+        laneIDs.add(GenotypeLikelihoodsCalculationModel.DUMMY_LANE);
+
+        final HashMap<String, ErrorModel> noiselessErrorModels = new HashMap<String, ErrorModel>();
+
+        // build per-lane error model for all lanes present in ref sample
+        for (String laneID : laneIDs)
+            noiselessErrorModels.put(laneID, noiselessErrorModel);
+
+        final HashMap<String, ErrorModel> noisyErrorModels = new HashMap<String, ErrorModel>();
+
+        // build per-lane error model for all lanes present in ref sample
+        for (String laneID : laneIDs)
+            noisyErrorModels.put(laneID, Q30ErrorModel);
+
+         // all first ref allele
+        allAlleles.add(Allele.create(refByte,true));
+        for (byte b: BaseUtils.BASES) {
+            if (refByte != b)
+                allAlleles.add(Allele.create(b, false));
+        }
+
+        final int refIdx = 0;
+        int altIdx = -1;
+
+        for (int k=0; k < allAlleles.size(); k++)
+            if (altByte == allAlleles.get(k).getBases()[0]) {
+                altIdx = k;
+                break;
+            }
+
+
+
+        PrintStream out = null;
+        if (SIMULATE_NOISY_PILEUP) {
+            try {
+                out = new PrintStream(new File("GLUnitTest.table"));
+    //                            out = new PrintStream(new File("/Users/delangel/GATK/Sting_unstable/GLUnitTest.table"));
+            }
+            catch (Exception e) {}
+            // write header
+            out.format("Depth\tPoolPloidy\tACTrue\tACEst\tREF\tALTTrue\tALTEst\n");
+        }
+        final int[] depthVector = {1000,10000};
+        //final double[] alleleFrequencyVector = {0.01,0.1,0.5,1.0};
+        final int[] spVector = {10,100};
+        //final int[] spVector = {1};
+        for (int depth : depthVector) {
+            for (int nSamplesPerPool : spVector) {
+                final int ploidy = 2*nSamplesPerPool;
+                for (int ac =2; ac <=ploidy; ac++) {
+
+                    // simulate pileup with given AC and depth
+                    int altDepth = (int)Math.round( (double)ac/(double)ploidy * (double)depth);
+                    final int[] numReadsPerAllele = {depth-altDepth,altDepth};
+                    final Map<String,AlignmentContext> alignmentContextMap =
+                            readPileupTestProvider.getAlignmentContextFromAlleles(eventLength, new String(new byte[]{altByte}), numReadsPerAllele);
+
+                    // get now likelihoods for this
+
+                    final GeneralPloidySNPGenotypeLikelihoods GL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
+                    final int nGoodBases = GL.add(alignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
+                    if (VERBOSE) {
+                        System.out.format("Depth:%d, AC:%d, altDepth:%d, samplesPerPool:%d\nGLs:", depth,ac,altDepth, nSamplesPerPool);
+                       System.out.println(GL.toString());
+                    }
+                    Assert.assertEquals(nGoodBases, depth);
+                    Pair<int[],Double> mlPair = GL.getMostLikelyACCount();
+
+                    // Most likely element has to be conformation REF = nSamples-AC,ALT = AC
+                    if (ac == 0) {
+                        Assert.assertEquals(mlPair.first[refIdx],ploidy);
+                    } else {
+                        Assert.assertEquals(mlPair.first[altIdx],ac);
+                        Assert.assertEquals(mlPair.first[refIdx],ploidy-ac);
+                    }
+
+
+                    // simulate now pileup with base error rate
+                    if (SIMULATE_NOISY_PILEUP) {
+                        System.out.format("Depth:%d, AC:%d, altDepth:%d, samplesPerPool:%d\n", depth,ac,altDepth, nSamplesPerPool);
+
+                         for (int k=0; k < NUM_SIMULATED_OBS; k++) {
+                            final Map<String,AlignmentContext> noisyAlignmentContextMap =
+                                    readPileupTestProvider.getAlignmentContextFromAlleles(eventLength, new String(new byte[]{altByte}), numReadsPerAllele,
+                                            true, PHRED_SITE_ERROR_RATE);
+
+                            // get now likelihoods for this
+
+                            final GeneralPloidySNPGenotypeLikelihoods noisyGL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
+                            noisyGL.add(noisyAlignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
+                            mlPair = noisyGL.getMostLikelyACCount();
+
+                            // Most likely element has to be conformation REF = nSamples-AC,ALT = AC
+                            int acEst;
+                            if (ac == 0) {
+                                acEst =  mlPair.first[refIdx];
+                            } else {
+                                acEst = mlPair.first[altIdx];
+                            }
+                            byte altEst = BaseUtils.baseIndexToSimpleBase(MathUtils.maxElementIndex(mlPair.first));
+                            out.format("%d\t%d\t%d\t%d\t%c\t%c\t%c\n",depth, ploidy, ac, acEst, refByte, altByte, altEst);
+
+                        }
+                     }
+                }
+            }
+
+
+        }
+        if (SIMULATE_NOISY_PILEUP)
+            out.close();
+
+
+    }
+
+
+
+}
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
@ -0,0 +1,78 @@
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import org.broadinstitute.sting.WalkerTest;
+
+import java.util.Arrays;
+import org.testng.annotations.Test;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: delangel
+ * Date: 4/5/12
+ * Time: 11:28 AM
+ * To change this template use File | Settings | File Templates.
+ */
+public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
+    final static String REF = b37KGReference;
+    final String CEUTRIO_BAM = "/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list";
+    final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam";
+    final String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf";
+    final String REFSAMPLE_NAME = "NA12878";
+    final String MTINTERVALS = "MT:1-3000";
+    final String LSVINTERVALS = "20:40,000,000-41,000,000";
+    final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf";
+    final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
+    final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
+    private void PC_MT_Test(String bam, String args, String name, String md5) {
+        final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -ignoreLane ",
+                REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s";
+        final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
+        executeTest("testPoolCaller:"+name+" args=" + args, spec);
+    }
+
+    private void PC_LSV_Test(String args, String name, String model, String md5) {
+        final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane ",
+                REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s";
+        final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
+        executeTest("testPoolCaller:"+name+" args=" + args, spec);
+    }
+
+    private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
+        final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane",
+                REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
+        final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
+        executeTest("testPoolCaller:"+name+" args=" + args, spec);
+    }
+
+    @Test
+    public void testBOTH_GGA_Pools() {
+        PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","0934f72865388999efec64bd9d4a9b93");
+    }
+
+    @Test
+    public void testINDEL_GGA_Pools() {
+        PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES  -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","126581c72d287722437274d41b6fed7b");
+    }
+
+    @Test
+    public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
+        PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","b543aa1c3efedb301e525c1d6c50ed8d");
+    }
+
+    @Test
+    public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
+        PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","55b20557a836bb92688e68f12d7f5dc4");
+    }
+
+    @Test
+    public void testMT_SNP_DISCOVERY_sp4() {
+         PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","7eb889e8e07182f4c3d64609591f9459");
+    }
+
+    @Test
+    public void testMT_SNP_GGA_sp10() {
+
+        PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES  -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "db8114877b99b14f7180fdcd24b040a7");
+    }
+
+}
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java
@ -0,0 +1,400 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 3/15/12
+ */
+
+import net.sf.picard.reference.ReferenceSequenceFile;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.*;
+
+/**
+ * Unit tests for GenotypingEngine
+ */
+public class GenotypingEngineUnitTest extends BaseTest {
+
+    private static ReferenceSequenceFile seq;
+    private GenomeLocParser genomeLocParser;
+
+    @BeforeClass
+    public void init() throws FileNotFoundException {
+        // sequence
+        seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
+        genomeLocParser = new GenomeLocParser(seq);
+    }
+
+    @Test
+    public void testFindHomVarEventAllelesInSample() {
+        final List<Allele> eventAlleles = new ArrayList<Allele>();
+        eventAlleles.add( Allele.create("A", true) );
+        eventAlleles.add( Allele.create("C", false) );
+        final List<Allele> haplotypeAlleles = new ArrayList<Allele>();
+        haplotypeAlleles.add( Allele.create("AATA", true) );
+        haplotypeAlleles.add( Allele.create("AACA", false) );
+        haplotypeAlleles.add( Allele.create("CATA", false) );
+        haplotypeAlleles.add( Allele.create("CACA", false) );
+        final ArrayList<Haplotype> haplotypes = new ArrayList<Haplotype>();
+        haplotypes.add(new Haplotype("AATA".getBytes()));
+        haplotypes.add(new Haplotype("AACA".getBytes()));
+        haplotypes.add(new Haplotype("CATA".getBytes()));
+        haplotypes.add(new Haplotype("CACA".getBytes()));
+        final List<Allele> haplotypeAllelesForSample = new ArrayList<Allele>();
+        haplotypeAllelesForSample.add( Allele.create("CATA", false) );
+        haplotypeAllelesForSample.add( Allele.create("CACA", false) );
+        final ArrayList<ArrayList<Haplotype>> alleleMapper = new ArrayList<ArrayList<Haplotype>>();
+        ArrayList<Haplotype> Aallele = new ArrayList<Haplotype>();
+        Aallele.add(haplotypes.get(0));
+        Aallele.add(haplotypes.get(1));
+        ArrayList<Haplotype> Callele = new ArrayList<Haplotype>();
+        Callele.add(haplotypes.get(2));
+        Callele.add(haplotypes.get(3));
+        alleleMapper.add(Aallele);
+        alleleMapper.add(Callele);
+        final List<Allele> eventAllelesForSample = new ArrayList<Allele>();
+        eventAllelesForSample.add( Allele.create("C", false) );
+        eventAllelesForSample.add( Allele.create("C", false) );
+
+        if(!compareAlleleLists(eventAllelesForSample, GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes))) {
+            logger.warn("calc alleles = " + GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes));
+            logger.warn("expected alleles = " + eventAllelesForSample);
+        }
+        Assert.assertTrue(compareAlleleLists(eventAllelesForSample, GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes)));
+    }
+
+    @Test
+    public void testFindHetEventAllelesInSample() {
+        final List<Allele> eventAlleles = new ArrayList<Allele>();
+        eventAlleles.add( Allele.create("A", true) );
+        eventAlleles.add( Allele.create("C", false) );
+        eventAlleles.add( Allele.create("T", false) );
+        final List<Allele> haplotypeAlleles = new ArrayList<Allele>();
+        haplotypeAlleles.add( Allele.create("AATA", true) );
+        haplotypeAlleles.add( Allele.create("AACA", false) );
+        haplotypeAlleles.add( Allele.create("CATA", false) );
+        haplotypeAlleles.add( Allele.create("CACA", false) );
+        haplotypeAlleles.add( Allele.create("TACA", false) );
+        haplotypeAlleles.add( Allele.create("TTCA", false) );
+        haplotypeAlleles.add( Allele.create("TTTA", false) );
+        final ArrayList<Haplotype> haplotypes = new ArrayList<Haplotype>();
+        haplotypes.add(new Haplotype("AATA".getBytes()));
+        haplotypes.add(new Haplotype("AACA".getBytes()));
+        haplotypes.add(new Haplotype("CATA".getBytes()));
+        haplotypes.add(new Haplotype("CACA".getBytes()));
+        haplotypes.add(new Haplotype("TACA".getBytes()));
+        haplotypes.add(new Haplotype("TTCA".getBytes()));
+        haplotypes.add(new Haplotype("TTTA".getBytes()));
+        final List<Allele> haplotypeAllelesForSample = new ArrayList<Allele>();
+        haplotypeAllelesForSample.add( Allele.create("TTTA", false) );
+        haplotypeAllelesForSample.add( Allele.create("AATA", true) );
+        final ArrayList<ArrayList<Haplotype>> alleleMapper = new ArrayList<ArrayList<Haplotype>>();
+        ArrayList<Haplotype> Aallele = new ArrayList<Haplotype>();
+        Aallele.add(haplotypes.get(0));
+        Aallele.add(haplotypes.get(1));
+        ArrayList<Haplotype> Callele = new ArrayList<Haplotype>();
+        Callele.add(haplotypes.get(2));
+        Callele.add(haplotypes.get(3));
+        ArrayList<Haplotype> Tallele = new ArrayList<Haplotype>();
+        Tallele.add(haplotypes.get(4));
+        Tallele.add(haplotypes.get(5));
+        Tallele.add(haplotypes.get(6));
+        alleleMapper.add(Aallele);
+        alleleMapper.add(Callele);
+        alleleMapper.add(Tallele);
+        final List<Allele> eventAllelesForSample = new ArrayList<Allele>();
+        eventAllelesForSample.add( Allele.create("A", true) );
+        eventAllelesForSample.add( Allele.create("T", false) );
+
+        if(!compareAlleleLists(eventAllelesForSample, GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes))) {
+            logger.warn("calc alleles = " + GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes));
+            logger.warn("expected alleles = " + eventAllelesForSample);
+        }
+        Assert.assertTrue(compareAlleleLists(eventAllelesForSample, GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes)));
+    }
+
+    private boolean compareAlleleLists(List<Allele> l1, List<Allele> l2) {
+        if( l1.size() != l2.size() ) {
+            return false; // sanity check
+        }
+
+        for( int i=0; i < l1.size(); i++ ){
+            if ( !l2.contains(l1.get(i)) )
+                return false;
+        }
+        return true;
+    }
+
+    
+    private class BasicGenotypingTestProvider extends TestDataProvider {
+        byte[] ref;
+        byte[] hap;
+        HashMap<Integer,Byte> expected;
+        GenotypingEngine ge = new GenotypingEngine(false, 0, false);
+
+        public BasicGenotypingTestProvider(String refString, String hapString, HashMap<Integer, Byte> expected) {
+            super(BasicGenotypingTestProvider.class, String.format("Haplotype to VCF test: ref = %s, alignment = %s", refString,hapString));
+            ref = refString.getBytes();
+            hap = hapString.getBytes();
+            this.expected = expected;
+        }
+        
+        public HashMap<Integer,VariantContext> calcAlignment() {
+            final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap);
+            return ge.generateVCsFromAlignment( alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name", 0);
+        }
+    }
+
+    @DataProvider(name = "BasicGenotypingTestProvider")
+    public Object[][] makeBasicGenotypingTests() {
+
+        for( int contextSize : new int[]{0,1,5,9,24,36} ) {
+            HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
+            map.put(1 + contextSize, (byte)'M');
+            final String context = Utils.dupString('G', contextSize);
+            new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGACTAGCCGATAG", map);
+        }
+
+        for( int contextSize : new int[]{0,1,5,9,24,36} ) {
+            HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
+            map.put(2 + contextSize, (byte)'M');
+            map.put(21 + contextSize, (byte)'M');
+            final String context = Utils.dupString('G', contextSize);
+            new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG", "ATCTCGCATCGCGAGCATCGCCTAGCCGATAG", map);
+        }
+
+        for( int contextSize : new int[]{0,1,5,9,24,36} ) {
+            HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
+            map.put(1 + contextSize, (byte)'M');
+            map.put(20 + contextSize, (byte)'I');
+            final String context = Utils.dupString('G', contextSize);
+            new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGACACTAGCCGATAG", map);
+        }
+
+        for( int contextSize : new int[]{0,1,5,9,24,36} ) {
+            HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
+            map.put(1 + contextSize, (byte)'M');
+            map.put(20 + contextSize, (byte)'D');
+            final String context = Utils.dupString('G', contextSize);
+            new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGCTAGCCGATAG", map);
+        }
+
+        for( int contextSize : new int[]{1,5,9,24,36} ) {
+            HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
+            map.put(1, (byte)'M');
+            map.put(20, (byte)'D');
+            final String context = Utils.dupString('G', contextSize);
+            new BasicGenotypingTestProvider("AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGCTAGCCGATAG", map);
+        }
+
+        for( int contextSize : new int[]{0,1,5,9,24,36} ) {
+            HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
+            map.put(2 + contextSize, (byte)'M');
+            map.put(20 + contextSize, (byte)'I');
+            map.put(30 + contextSize, (byte)'D');
+            final String context = Utils.dupString('G', contextSize);
+            new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "ACCTCGCATCGCGAGCATCGTTACTAGCCGATG", map);
+        }
+
+        for( int contextSize : new int[]{0,1,5,9,24,36} ) {
+            HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
+            map.put(1 + contextSize, (byte)'M');
+            map.put(20 + contextSize, (byte)'D');
+            map.put(28 + contextSize, (byte)'M');
+            final String context = Utils.dupString('G', contextSize);
+            new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGCTAGCCCATAG", map);
+        }
+
+        return BasicGenotypingTestProvider.getTests(BasicGenotypingTestProvider.class);
+    }
+    
+    @Test(dataProvider = "BasicGenotypingTestProvider", enabled = true)
+    public void testHaplotypeToVCF(BasicGenotypingTestProvider cfg) {
+        HashMap<Integer,VariantContext> calculatedMap = cfg.calcAlignment();
+        HashMap<Integer,Byte> expectedMap = cfg.expected;
+        logger.warn(String.format("Test: %s", cfg.toString()));
+        if(!compareVCMaps(calculatedMap, expectedMap)) {
+            logger.warn("calc map = " + calculatedMap);
+            logger.warn("expected map = " + expectedMap);
+        }
+        Assert.assertTrue(compareVCMaps(calculatedMap, expectedMap));
+    }
+
+    /**
+     * Tests that we get the right values from the R^2 calculation
+     */
+    @Test
+    public void testCalculateR2LD() {
+        logger.warn("Executing testCalculateR2LD");
+
+        Assert.assertEquals(GenotypingEngine.calculateR2LD(1,1,1,1), 0.0, 0.00001);
+        Assert.assertEquals(GenotypingEngine.calculateR2LD(100,100,100,100), 0.0, 0.00001);
+        Assert.assertEquals(GenotypingEngine.calculateR2LD(1,0,0,1), 1.0, 0.00001);
+        Assert.assertEquals(GenotypingEngine.calculateR2LD(100,0,0,100), 1.0, 0.00001);
+        Assert.assertEquals(GenotypingEngine.calculateR2LD(1,2,3,4), (0.1 - 0.12) * (0.1 - 0.12) / (0.3 * 0.7 * 0.4 * 0.6), 0.00001);
+    }
+
+    @Test
+    public void testCreateMergedVariantContext() {
+        logger.warn("Executing testCreateMergedVariantContext");
+
+        final byte[] ref = "AATTCCGGAATTCCGGAATT".getBytes();
+        final GenomeLoc refLoc = genomeLocParser.createGenomeLoc("2", 1700, 1700 + ref.length);
+
+        // SNP + SNP = simple MNP
+        VariantContext thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
+        VariantContext nextVC = new VariantContextBuilder().loc("2", 1704, 1704).alleles("C","G").make();
+        VariantContext truthVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","GG").source("merged").make();
+        VariantContext mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
+        logger.warn(truthVC + " == " + mergedVC);
+        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
+        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
+        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
+
+        // SNP + ref + SNP = MNP with ref base gap
+        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
+        nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
+        truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCG").source("merged").make();
+        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
+        logger.warn(truthVC + " == " + mergedVC);
+        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
+        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
+        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
+
+        // insertion + SNP
+        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
+        nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
+        truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TAAAAACG").source("merged").make();
+        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
+        logger.warn(truthVC + " == " + mergedVC);
+        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
+        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
+        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
+
+        // SNP + insertion
+        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
+        nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CAAAAA").make();
+        truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCCAAAAA").source("merged").make();
+        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
+        logger.warn(truthVC + " == " + mergedVC);
+        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
+        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
+        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
+
+        // deletion + SNP
+        thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","T").make();
+        nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
+        truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TG").source("merged").make();
+        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
+        logger.warn(truthVC + " == " + mergedVC);
+        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
+        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
+        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
+
+        // SNP + deletion
+        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
+        nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
+        truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","GCC").source("merged").make();
+        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
+        logger.warn(truthVC + " == " + mergedVC);
+        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
+        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
+        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
+
+        // insertion + deletion = MNP
+        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
+        nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
+        truthVC = new VariantContextBuilder().loc("2", 1704, 1706).alleles("CCG","ACC").source("merged").make();
+        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
+        logger.warn(truthVC + " == " + mergedVC);
+        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
+        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
+        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
+
+        // insertion + deletion
+        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
+        nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
+        truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","TAAAAACC").source("merged").make();
+        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
+        logger.warn(truthVC + " == " + mergedVC);
+        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
+        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
+        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
+
+        // insertion + insertion
+        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
+        nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CA").make();
+        truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TACCA").source("merged").make();
+        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
+        logger.warn(truthVC + " == " + mergedVC);
+        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
+        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
+        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
+
+        // deletion + deletion
+        thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
+        nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
+        truthVC = new VariantContextBuilder().loc("2", 1701, 1706).alleles("ATTCCG","ATCC").source("merged").make();
+        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
+        logger.warn(truthVC + " == " + mergedVC);
+        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
+        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
+        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
+
+        // deletion + insertion (abutting)
+        thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
+        nextVC = new VariantContextBuilder().loc("2", 1702, 1702).alleles("T","GCGCGC").make();
+        truthVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","AGCGCGC").source("merged").make();
+        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
+        logger.warn(truthVC + " == " + mergedVC);
+        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
+        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
+        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
+
+        // complex + complex
+        thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make();
+        nextVC = new VariantContextBuilder().loc("2", 1706, 1707).alleles("GG","AC").make();
+        truthVC = new VariantContextBuilder().loc("2", 1703, 1707).alleles("TCCGG","AAACAC").source("merged").make();
+        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
+        logger.warn(truthVC + " == " + mergedVC);
+        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
+        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
+        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
+    }
+    
+    /**
+     * Private function to compare HashMap of VCs, it only checks the types and start locations of the VariantContext
+     */
+    private boolean compareVCMaps(HashMap<Integer, VariantContext> calc, HashMap<Integer, Byte> expected) {
+        if( !calc.keySet().equals(expected.keySet()) ) { return false; } // sanity check
+        for( Integer loc : expected.keySet() ) {
+            Byte type = expected.get(loc);
+            switch( type ) {
+                case 'I':
+                    if( !calc.get(loc).isSimpleInsertion() ) { return false; }
+                    break;
+                case 'D':
+                    if( !calc.get(loc).isSimpleDeletion() ) { return false; }
+                    break;
+                case 'M':
+                    if( !(calc.get(loc).isMNP() || calc.get(loc).isSNP()) ) { return false; }
+                    break;
+                default:
+                    return false;
+            }
+        }
+        return true;
+    }
+}
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@ -0,0 +1,47 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import org.broadinstitute.sting.WalkerTest;
+import org.testng.annotations.Test;
+
+import java.util.Arrays;
+
+public class HaplotypeCallerIntegrationTest extends WalkerTest {
+    final static String REF = b37KGReference;
+    final String NA12878_BAM = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
+    final String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
+    final String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals";
+    //final String RECAL_FILE = validationDataLocation + "NA12878.kmer.8.subset.recal_data.bqsr";
+
+    private void HCTest(String bam, String args, String md5) {
+        final String base = String.format("-T HaplotypeCaller -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3";
+        final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
+        executeTest("testHaplotypeCaller: args=" + args, spec);
+    }
+
+    @Test
+    public void testHaplotypeCallerMultiSample() {
+        HCTest(CEUTRIO_BAM, "", "29ebfabcd4a42d4c5c2a576219cffb3d");
+    }
+
+    @Test
+    public void testHaplotypeCallerSingleSample() {
+        HCTest(NA12878_BAM, "", "9732313b8a12faa347f6ebe96518c5df");
+    }
+
+    @Test
+    public void testHaplotypeCallerMultiSampleGGA() {
+        HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "5e1d49d4110cd96c2e25f8e1da217e9e");
+    }
+
+    private void HCTestComplexVariants(String bam, String args, String md5) {
+        final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 3";
+        final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
+        executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec);
+    }
+
+    @Test
+    public void testHaplotypeCallerMultiSampleComplex() {
+        HCTestComplexVariants(CEUTRIO_BAM, "", "53df51e6071664725f6e7497f5ee5adf");
+    }
+}
+
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java
@ -0,0 +1,174 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 3/14/12
+ */
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.Haplotype;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.*;
+
+/**
+ * Unit tests for LikelihoodCalculationEngine
+ */
+public class LikelihoodCalculationEngineUnitTest extends BaseTest {
+
+    @Test
+    public void testNormalizeDiploidLikelihoodMatrixFromLog10() {
+        double[][] likelihoodMatrix = {
+            {-90.2,     0,      0},
+            {-190.1, -2.1,      0},
+            {-7.0,  -17.5,  -35.9}
+        };
+        double[][] normalizedMatrix = {
+            {-88.1,     0,      0},
+            {-188.0,  0.0,      0},
+            {-4.9,  -15.4,  -33.8}
+        };
+
+
+        Assert.assertTrue(compareDoubleArrays(LikelihoodCalculationEngine.normalizeDiploidLikelihoodMatrixFromLog10(likelihoodMatrix), normalizedMatrix));
+
+        double[][] likelihoodMatrix2 = {
+                {-90.2,     0,      0,        0},
+                {-190.1, -2.1,      0,        0},
+                {-7.0,  -17.5,  -35.9,        0},
+                {-7.0,  -17.5,  -35.9,  -1000.0},
+        };
+        double[][] normalizedMatrix2 = {
+                {-88.1,     0,      0,        0},
+                {-188.0,  0.0,      0,        0},
+                {-4.9,  -15.4,  -33.8,        0},
+                {-4.9,  -15.4,  -33.8,   -997.9},
+        };
+        Assert.assertTrue(compareDoubleArrays(LikelihoodCalculationEngine.normalizeDiploidLikelihoodMatrixFromLog10(likelihoodMatrix2), normalizedMatrix2));
+    }
+
+    private class BasicLikelihoodTestProvider extends TestDataProvider {
+        public Double readLikelihoodForHaplotype1;
+        public Double readLikelihoodForHaplotype2;
+        public Double readLikelihoodForHaplotype3;
+        
+        public BasicLikelihoodTestProvider(double a, double b) {
+            super(BasicLikelihoodTestProvider.class, String.format("Diploid haplotype likelihoods for reads %f / %f",a,b));
+            readLikelihoodForHaplotype1 = a;
+            readLikelihoodForHaplotype2 = b;
+            readLikelihoodForHaplotype3 = null;
+        }
+
+        public BasicLikelihoodTestProvider(double a, double b, double c) {
+            super(BasicLikelihoodTestProvider.class, String.format("Diploid haplotype likelihoods for reads %f / %f / %f",a,b,c));
+            readLikelihoodForHaplotype1 = a;
+            readLikelihoodForHaplotype2 = b;
+            readLikelihoodForHaplotype3 = c;
+        }
+        
+        public double[][] expectedDiploidHaplotypeMatrix() {
+            if( readLikelihoodForHaplotype3 == null ) {
+                double maxValue = Math.max(readLikelihoodForHaplotype1,readLikelihoodForHaplotype2);
+                double[][] normalizedMatrix = {
+                        {readLikelihoodForHaplotype1 - maxValue, Double.NEGATIVE_INFINITY},
+                        {Math.log10(0.5*Math.pow(10,readLikelihoodForHaplotype1) + 0.5*Math.pow(10,readLikelihoodForHaplotype2)) - maxValue, readLikelihoodForHaplotype2 - maxValue}
+                };
+                return normalizedMatrix;
+            } else {
+                double maxValue = MathUtils.max(readLikelihoodForHaplotype1,readLikelihoodForHaplotype2,readLikelihoodForHaplotype3);
+                double[][] normalizedMatrix = {
+                        {readLikelihoodForHaplotype1 - maxValue, Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY},
+                        {Math.log10(0.5*Math.pow(10,readLikelihoodForHaplotype1) + 0.5*Math.pow(10,readLikelihoodForHaplotype2)) - maxValue, readLikelihoodForHaplotype2 - maxValue, Double.NEGATIVE_INFINITY},
+                        {Math.log10(0.5*Math.pow(10,readLikelihoodForHaplotype1) + 0.5*Math.pow(10,readLikelihoodForHaplotype3)) - maxValue,
+                         Math.log10(0.5*Math.pow(10,readLikelihoodForHaplotype2) + 0.5*Math.pow(10,readLikelihoodForHaplotype3)) - maxValue, readLikelihoodForHaplotype3 - maxValue}
+                };
+                return normalizedMatrix;
+            }
+        }
+        
+        public double[][] calcDiploidHaplotypeMatrix() {
+            ArrayList<Haplotype> haplotypes = new ArrayList<Haplotype>();
+            for( int iii = 1; iii <= 3; iii++) {
+                Double readLikelihood = ( iii == 1 ? readLikelihoodForHaplotype1 : ( iii == 2 ? readLikelihoodForHaplotype2 : readLikelihoodForHaplotype3) );
+                int readCount = 1;
+                if( readLikelihood != null ) {
+                    Haplotype haplotype = new Haplotype( (iii == 1 ? "AAAA" : (iii == 2 ? "CCCC" : "TTTT")).getBytes() );
+                    haplotype.addReadLikelihoods("myTestSample", new double[]{readLikelihood}, new int[]{readCount});
+                    haplotypes.add(haplotype);
+                }
+            }
+            return LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(haplotypes, "myTestSample");
+        }
+    }
+
+    @DataProvider(name = "BasicLikelihoodTestProvider")
+    public Object[][] makeBasicLikelihoodTests() {
+        new BasicLikelihoodTestProvider(-1.1, -2.2);
+        new BasicLikelihoodTestProvider(-2.2, -1.1);
+        new BasicLikelihoodTestProvider(-1.1, -1.1);
+        new BasicLikelihoodTestProvider(-9.7, -15.0);
+        new BasicLikelihoodTestProvider(-1.1, -2000.2);
+        new BasicLikelihoodTestProvider(-1000.1, -2.2);
+        new BasicLikelihoodTestProvider(0, 0);
+        new BasicLikelihoodTestProvider(-1.1, 0);
+        new BasicLikelihoodTestProvider(0, -2.2);
+        new BasicLikelihoodTestProvider(-100.1, -200.2);
+
+        new BasicLikelihoodTestProvider(-1.1, -2.2, 0);
+        new BasicLikelihoodTestProvider(-2.2, -1.1, 0);
+        new BasicLikelihoodTestProvider(-1.1, -1.1, 0);
+        new BasicLikelihoodTestProvider(-9.7, -15.0, 0);
+        new BasicLikelihoodTestProvider(-1.1, -2000.2, 0);
+        new BasicLikelihoodTestProvider(-1000.1, -2.2, 0);
+        new BasicLikelihoodTestProvider(0, 0, 0);
+        new BasicLikelihoodTestProvider(-1.1, 0, 0);
+        new BasicLikelihoodTestProvider(0, -2.2, 0);
+        new BasicLikelihoodTestProvider(-100.1, -200.2, 0);
+
+        new BasicLikelihoodTestProvider(-1.1, -2.2, -12.121);
+        new BasicLikelihoodTestProvider(-2.2, -1.1, -12.121);
+        new BasicLikelihoodTestProvider(-1.1, -1.1, -12.121);
+        new BasicLikelihoodTestProvider(-9.7, -15.0, -12.121);
+        new BasicLikelihoodTestProvider(-1.1, -2000.2, -12.121);
+        new BasicLikelihoodTestProvider(-1000.1, -2.2, -12.121);
+        new BasicLikelihoodTestProvider(0, 0, -12.121);
+        new BasicLikelihoodTestProvider(-1.1, 0, -12.121);
+        new BasicLikelihoodTestProvider(0, -2.2, -12.121);
+        new BasicLikelihoodTestProvider(-100.1, -200.2, -12.121);
+
+        return BasicLikelihoodTestProvider.getTests(BasicLikelihoodTestProvider.class);
+    }
+
+    @Test(dataProvider = "BasicLikelihoodTestProvider", enabled = true)
+    public void testOneReadWithTwoOrThreeHaplotypes(BasicLikelihoodTestProvider cfg) {
+        double[][] calculatedMatrix = cfg.calcDiploidHaplotypeMatrix();
+        double[][] expectedMatrix = cfg.expectedDiploidHaplotypeMatrix();
+        logger.warn(String.format("Test: %s", cfg.toString()));
+        Assert.assertTrue(compareDoubleArrays(calculatedMatrix, expectedMatrix));
+    }
+
+    /**
+     * Private function to compare 2d arrays
+     */
+    private boolean compareDoubleArrays(double[][] b1, double[][] b2) {
+        if( b1.length != b2.length ) {
+            return false; // sanity check
+        }
+
+        for( int i=0; i < b1.length; i++ ){
+            if( b1[i].length != b2[i].length) {
+                return false; // sanity check
+            }
+            for( int j=0; j < b1.length; j++ ){
+                if ( MathUtils.compareDoubles(b1[i][j], b2[i][j]) != 0 && !Double.isInfinite(b1[i][j]) && !Double.isInfinite(b2[i][j]))
+                    return false;
+            }
+        }
+        return true;
+    }
+}
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java
@ -0,0 +1,298 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 3/27/12
+ */
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.walkers.genotyper.ArtificialReadPileupTestProvider;
+import org.broadinstitute.sting.utils.Haplotype;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.jgrapht.graph.DefaultDirectedGraph;
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.PrintStream;
+import java.util.*;
+
+public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
+
+
+    private class MergeNodesWithNoVariationTestProvider extends TestDataProvider {
+        public byte[] sequence;
+        public int KMER_LENGTH;
+
+        public MergeNodesWithNoVariationTestProvider(String seq, int kmer) {
+            super(MergeNodesWithNoVariationTestProvider.class, String.format("Merge nodes with no variation test. kmer = %d, seq = %s", kmer, seq));
+            sequence = seq.getBytes();
+            KMER_LENGTH = kmer;
+        }
+
+        public DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> expectedGraph() {
+            DeBruijnVertex v = new DeBruijnVertex(sequence, 0);
+            DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+            graph.addVertex(v);
+            return graph;
+        }
+
+        public DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> calcGraph() {
+
+            DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+            final int kmersInSequence = sequence.length - KMER_LENGTH + 1;
+            for (int i = 0; i < kmersInSequence - 1; i++) {
+                // get the kmers
+                final byte[] kmer1 = new byte[KMER_LENGTH];
+                System.arraycopy(sequence, i, kmer1, 0, KMER_LENGTH);
+                final byte[] kmer2 = new byte[KMER_LENGTH];
+                System.arraycopy(sequence, i+1, kmer2, 0, KMER_LENGTH);
+
+                SimpleDeBruijnAssembler.addKmersToGraph(graph, kmer1, kmer2, false);
+            }
+            SimpleDeBruijnAssembler.mergeNodes(graph);
+            return graph;
+        }
+    }
+
+    @DataProvider(name = "MergeNodesWithNoVariationTestProvider")
+    public Object[][] makeMergeNodesWithNoVariationTests() {
+        new MergeNodesWithNoVariationTestProvider("GGTTAACC", 3);
+        new MergeNodesWithNoVariationTestProvider("GGTTAACC", 4);
+        new MergeNodesWithNoVariationTestProvider("GGTTAACC", 5);
+        new MergeNodesWithNoVariationTestProvider("GGTTAACC", 6);
+        new MergeNodesWithNoVariationTestProvider("GGTTAACC", 7);
+        new MergeNodesWithNoVariationTestProvider("GGTTAACCATGCAGACGGGAGGCTGAGCGAGAGTTTT", 6);
+        new MergeNodesWithNoVariationTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", 66);
+        new MergeNodesWithNoVariationTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", 76);
+
+        return MergeNodesWithNoVariationTestProvider.getTests(MergeNodesWithNoVariationTestProvider.class);
+    }
+
+    @Test(dataProvider = "MergeNodesWithNoVariationTestProvider", enabled = true)
+    public void testMergeNodesWithNoVariation(MergeNodesWithNoVariationTestProvider cfg) {
+        logger.warn(String.format("Test: %s", cfg.toString()));
+        Assert.assertTrue(graphEquals(cfg.calcGraph(), cfg.expectedGraph()));
+    }
+
+    @Test(enabled = true)
+    public void testPruneGraph() {
+        DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+        DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> expectedGraph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+
+        DeBruijnVertex v = new DeBruijnVertex("ATGG".getBytes(), 0);
+        DeBruijnVertex v2 = new DeBruijnVertex("ATGGA".getBytes(), 0);
+        DeBruijnVertex v3 = new DeBruijnVertex("ATGGT".getBytes(), 0);
+        DeBruijnVertex v4 = new DeBruijnVertex("ATGGG".getBytes(), 0);
+        DeBruijnVertex v5 = new DeBruijnVertex("ATGGC".getBytes(), 0);
+        DeBruijnVertex v6 = new DeBruijnVertex("ATGGCCCCCC".getBytes(), 0);
+
+        graph.addVertex(v);
+        graph.addVertex(v2);
+        graph.addVertex(v3);
+        graph.addVertex(v4);
+        graph.addVertex(v5);
+        graph.addVertex(v6);
+        graph.addEdge(v, v2, new DeBruijnEdge(false, 1));
+        graph.addEdge(v2, v3, new DeBruijnEdge(false, 3));
+        graph.addEdge(v3, v4, new DeBruijnEdge(false, 5));
+        graph.addEdge(v4, v5, new DeBruijnEdge(false, 3));
+        graph.addEdge(v5, v6, new DeBruijnEdge(false, 2));
+
+        expectedGraph.addVertex(v2);
+        expectedGraph.addVertex(v3);
+        expectedGraph.addVertex(v4);
+        expectedGraph.addVertex(v5);
+        expectedGraph.addEdge(v2, v3, new DeBruijnEdge(false, 3));
+        expectedGraph.addEdge(v3, v4, new DeBruijnEdge(false, 5));
+        expectedGraph.addEdge(v4, v5, new DeBruijnEdge(false, 3));
+
+        SimpleDeBruijnAssembler.pruneGraph(graph, 2);
+
+        Assert.assertTrue(graphEquals(graph, expectedGraph));
+
+        graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+        expectedGraph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+
+        graph.addVertex(v);
+        graph.addVertex(v2);
+        graph.addVertex(v3);
+        graph.addVertex(v4);
+        graph.addVertex(v5);
+        graph.addVertex(v6);
+        graph.addEdge(v, v2, new DeBruijnEdge(true, 1));
+        graph.addEdge(v2, v3, new DeBruijnEdge(false, 3));
+        graph.addEdge(v3, v4, new DeBruijnEdge(false, 5));
+        graph.addEdge(v4, v5, new DeBruijnEdge(false, 3));
+
+        expectedGraph.addVertex(v);
+        expectedGraph.addVertex(v2);
+        expectedGraph.addVertex(v3);
+        expectedGraph.addVertex(v4);
+        expectedGraph.addVertex(v5);
+        expectedGraph.addEdge(v, v2, new DeBruijnEdge(true, 1));
+        expectedGraph.addEdge(v2, v3, new DeBruijnEdge(false, 3));
+        expectedGraph.addEdge(v3, v4, new DeBruijnEdge(false, 5));
+        expectedGraph.addEdge(v4, v5, new DeBruijnEdge(false, 3));
+
+        SimpleDeBruijnAssembler.pruneGraph(graph, 2);
+
+        Assert.assertTrue(graphEquals(graph, expectedGraph));
+    }
+
+    @Test(enabled=false)
+// not ready yet
+    public void testBasicGraphCreation() {
+        final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
+        final byte refBase = refPileupTestProvider.getReferenceContext().getBase();
+        final String altBase = (refBase==(byte)'A'?"C":"A");
+        final int matches = 50;
+        final int mismatches = 50;
+        Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, altBase, new int[]{matches, mismatches}, false, 30);
+        PrintStream graphWriter = null;
+
+        try{
+            graphWriter = new PrintStream("du.txt");
+        } catch (Exception e) {}
+
+
+        SimpleDeBruijnAssembler assembler = new SimpleDeBruijnAssembler(true,graphWriter);
+        final Haplotype refHaplotype = new Haplotype(refPileupTestProvider.getReferenceContext().getBases());
+        refHaplotype.setIsReference(true);
+        assembler.createDeBruijnGraphs(refContext.get(refPileupTestProvider.getSampleNames().get(0)).getBasePileup().getReads(), refHaplotype);
+
+/*        // clean up the graphs by pruning and merging
+        for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
+            SimpleDeBruijnAssembler.pruneGraph( graph, PRUNE_FACTOR );
+            //eliminateNonRefPaths( graph );
+            SimpleDeBruijnAssembler.mergeNodes( graph );
+        }
+  */
+        if( graphWriter != null ) {
+            assembler.printGraphs();
+        }
+
+        int k=2;
+
+        // find the best paths in the graphs
+    //    return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() );
+
+    }
+    @Test(enabled = true)
+    public void testEliminateNonRefPaths() {
+        DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+        DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> expectedGraph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+
+        DeBruijnVertex v = new DeBruijnVertex("ATGG".getBytes(), 0);
+        DeBruijnVertex v2 = new DeBruijnVertex("ATGGA".getBytes(), 0);
+        DeBruijnVertex v3 = new DeBruijnVertex("ATGGT".getBytes(), 0);
+        DeBruijnVertex v4 = new DeBruijnVertex("ATGGG".getBytes(), 0);
+        DeBruijnVertex v5 = new DeBruijnVertex("ATGGC".getBytes(), 0);
+        DeBruijnVertex v6 = new DeBruijnVertex("ATGGCCCCCC".getBytes(), 0);
+        
+        graph.addVertex(v);
+        graph.addVertex(v2);
+        graph.addVertex(v3);
+        graph.addVertex(v4);
+        graph.addVertex(v5);
+        graph.addVertex(v6);
+        graph.addEdge(v, v2, new DeBruijnEdge(false));
+        graph.addEdge(v2, v3, new DeBruijnEdge(true));
+        graph.addEdge(v3, v4, new DeBruijnEdge(true));
+        graph.addEdge(v4, v5, new DeBruijnEdge(true));
+        graph.addEdge(v5, v6, new DeBruijnEdge(false));
+
+        expectedGraph.addVertex(v2);
+        expectedGraph.addVertex(v3);
+        expectedGraph.addVertex(v4);
+        expectedGraph.addVertex(v5);
+        expectedGraph.addEdge(v2, v3, new DeBruijnEdge());
+        expectedGraph.addEdge(v3, v4, new DeBruijnEdge());
+        expectedGraph.addEdge(v4, v5, new DeBruijnEdge());
+
+        SimpleDeBruijnAssembler.eliminateNonRefPaths(graph);
+
+        Assert.assertTrue(graphEquals(graph, expectedGraph));
+
+        
+        
+        
+        graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+        expectedGraph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+
+        graph.addVertex(v);
+        graph.addVertex(v2);
+        graph.addVertex(v3);
+        graph.addVertex(v4);
+        graph.addVertex(v5);
+        graph.addVertex(v6);
+        graph.addEdge(v, v2, new DeBruijnEdge(true));
+        graph.addEdge(v2, v3, new DeBruijnEdge(true));
+        graph.addEdge(v4, v5, new DeBruijnEdge(false));
+        graph.addEdge(v5, v6, new DeBruijnEdge(false));
+
+        expectedGraph.addVertex(v);
+        expectedGraph.addVertex(v2);
+        expectedGraph.addVertex(v3);
+        expectedGraph.addEdge(v, v2, new DeBruijnEdge());
+        expectedGraph.addEdge(v2, v3, new DeBruijnEdge());
+
+        SimpleDeBruijnAssembler.eliminateNonRefPaths(graph);
+
+        Assert.assertTrue(graphEquals(graph, expectedGraph));
+
+
+
+        graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+        expectedGraph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+
+        graph.addVertex(v);
+        graph.addVertex(v2);
+        graph.addVertex(v3);
+        graph.addVertex(v4);
+        graph.addVertex(v5);
+        graph.addVertex(v6);
+        graph.addEdge(v, v2, new DeBruijnEdge(true));
+        graph.addEdge(v2, v3, new DeBruijnEdge(true));
+        graph.addEdge(v4, v5, new DeBruijnEdge(false));
+        graph.addEdge(v5, v6, new DeBruijnEdge(false));
+        graph.addEdge(v4, v2, new DeBruijnEdge(false));
+
+        expectedGraph.addVertex(v);
+        expectedGraph.addVertex(v2);
+        expectedGraph.addVertex(v3);
+        expectedGraph.addEdge(v, v2, new DeBruijnEdge());
+        expectedGraph.addEdge(v2, v3, new DeBruijnEdge());
+
+        SimpleDeBruijnAssembler.eliminateNonRefPaths(graph);
+
+        Assert.assertTrue(graphEquals(graph, expectedGraph));
+    }
+
+    private boolean graphEquals(DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> g1, DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> g2) {
+        if( !(g1.vertexSet().containsAll(g2.vertexSet()) && g2.vertexSet().containsAll(g1.vertexSet())) ) {
+            return false;
+        }
+        for( DeBruijnEdge e1 : g1.edgeSet() ) {
+            boolean found = false;
+            for( DeBruijnEdge e2 : g2.edgeSet() ) {
+                if( e1.equals(g1, e2, g2) ) { found = true; break; }
+            }
+            if( !found ) { return false; }
+        }
+        for( DeBruijnEdge e2 : g2.edgeSet() ) {
+            boolean found = false;
+            for( DeBruijnEdge e1 : g1.edgeSet() ) {
+                if( e2.equals(g2, e1, g1) ) { found = true; break; }
+            }
+            if( !found ) { return false; }
+        }
+        return true;
+    }
+}
--- a/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_OtherCovariate.R
+++ b/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_OtherCovariate.R
@ -1,121 +0,0 @@
-#!/bin/env Rscript
-
-library(tools)
-
-args <- commandArgs(TRUE)
-verbose = TRUE
-
-input = args[1]
-covariateName = args[2]
-
-outfile = paste(input, ".qual_diff_v_", covariateName, ".pdf", sep="")
-pdf(outfile, height=7, width=7)
-par(cex=1.1)
-c <- read.table(input, header=T)
-c <- c[sort.list(c[,1]),]
-
-#
-# Plot residual error as a function of the covariate
-#
-
-d.good <- c[c$nBases >= 1000,]
-d.1000 <- c[c$nBases < 1000,]
-rmseGood = sqrt( sum(as.numeric((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases)) / sum(as.numeric(d.good$nBases)) ) # prevent integer overflow with as.numeric, ugh
-rmseAll = sqrt( sum(as.numeric((c$Qempirical-c$Qreported)^2 * c$nBases)) / sum(as.numeric(c$nBases)) )
-theTitle = paste("RMSE_good =", round(rmseGood,digits=3), ", RMSE_all =", round(rmseAll,digits=3))
-if( length(d.good$nBases) == length(c$nBases) ) {
-	theTitle = paste("RMSE =", round(rmseAll,digits=3))
-}
-# Don't let residual error go off the edge of the plot
-d.good$residualError = d.good$Qempirical-d.good$Qreported
-d.good$residualError[which(d.good$residualError > 10)] = 10
-d.good$residualError[which(d.good$residualError < -10)] = -10
-d.1000$residualError = d.1000$Qempirical-d.1000$Qreported
-d.1000$residualError[which(d.1000$residualError > 10)] = 10
-d.1000$residualError[which(d.1000$residualError < -10)] = -10
-c$residualError = c$Qempirical-c$Qreported
-c$residualError[which(c$residualError > 10)] = 10
-c$residualError[which(c$residualError < -10)] = -10
-pointType = "p"
-if( length(c$Covariate) <= 20 ) {
-    pointType = "o"
-}
-if( is.numeric(c$Covariate) ) {
-	plot(d.good$Covariate, d.good$residualError, type=pointType, main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", pch=20, ylim=c(-10, 10), xlim=c(min(c$Covariate),max(c$Covariate)))
-	points(d.1000$Covariate, d.1000$residualError, type=pointType, col="cornflowerblue", pch=20)
-} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice
-	plot(c$Covariate, c$residualError, type="l", main=theTitle, ylab="Empirical - Reported Quality", xlab=covariateName, col="blue", ylim=c(-10, 10))
-	points(d.1000$Covariate, d.1000$residualError, type="l", col="cornflowerblue")
-}
-dev.off()
-
-if (exists('compactPDF')) {
-  compactPDF(outfile)
-}
-
-#
-# Plot mean quality versus the covariate
-#
-
-outfile = paste(input, ".reported_qual_v_", covariateName, ".pdf", sep="")
-pdf(outfile, height=7, width=7)
-par(cex=1.1)
-pointType = "p"
-if( length(c$Covariate) <= 20 ) {
-    pointType = "o"
-}
-theTitle = paste("Quality By", covariateName);
-if( is.numeric(c$Covariate) ) {
-	plot(d.good$Covariate, d.good$Qreported, type=pointType, main=theTitle, ylab="Mean Reported Quality", xlab=covariateName, col="blue", pch=20, ylim=c(0, 40), xlim=c(min(c$Covariate),max(c$Covariate)))
-	points(d.1000$Covariate, d.1000$Qreported, type=pointType, col="cornflowerblue", pch=20)
-} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice
-	plot(c$Covariate, c$Qreported, type="l", main=theTitle, ylab="Mean Reported Quality", xlab=covariateName, col="blue", ylim=c(0, 40))
-	points(d.1000$Covariate, d.1000$Qreported, type="l", col="cornflowerblue")
-}
-dev.off()
-
-if (exists('compactPDF')) {
-  compactPDF(outfile)
-}
-
-#
-# Plot histogram of the covariate
-#
-
-e = d.good
-f = d.1000
-outfile = paste(input, ".", covariateName,"_hist.pdf", sep="")
-pdf(outfile, height=7, width=7)
-hst=subset(data.frame(e$Covariate, e$nBases), e.nBases != 0)
-hst2=subset(data.frame(f$Covariate, f$nBases), f.nBases != 0)
-
-lwdSize=2
-if( length(c$Covariate) <= 20 ) {
-    lwdSize=7
-} else if( length(c$Covariate) <= 70 ) {
-    lwdSize=4
-}
-
-if( is.numeric(c$Covariate) ) {
-    if( length(hst$e.Covariate) == 0 ) {
-        plot(hst2$f.Covariate, hst2$f.nBases, type="h", lwd=lwdSize, col="cornflowerblue", main=paste(covariateName,"histogram"), ylim=c(0, max(hst2$f.nBases)), xlab=covariateName, ylab="Count",yaxt="n",xlim=c(min(c$Covariate),max(c$Covariate)))
-    } else {
-	    plot(hst$e.Covariate, hst$e.nBases, type="h", lwd=lwdSize, main=paste(covariateName,"histogram"), xlab=covariateName, ylim=c(0, max(hst$e.nBases)),ylab="Number of Bases",yaxt="n",xlim=c(min(c$Covariate),max(c$Covariate)))
-	    points(hst2$f.Covariate, hst2$f.nBases, type="h", lwd=lwdSize, col="cornflowerblue")
-	}
-	axis(2,axTicks(2), format(axTicks(2), scientific=F))
-} else { # Dinuc (and other non-numeric covariates) are different to make their plots look nice
-	hst=subset(data.frame(c$Covariate, c$nBases), c.nBases != 0)
-	plot(1:length(hst$c.Covariate), hst$c.nBases, type="h", lwd=lwdSize, main=paste(covariateName,"histogram"), ylim=c(0, max(hst$c.nBases)),xlab=covariateName, ylab="Number of Bases",yaxt="n",xaxt="n")
-	if( length(hst$c.Covariate) > 9 ) {
-	    axis(1, at=seq(1,length(hst$c.Covariate),2), labels = hst$c.Covariate[seq(1,length(hst$c.Covariate),2)])
-	} else {
-	    axis(1, at=seq(1,length(hst$c.Covariate),1), labels = hst$c.Covariate)
-	}
-	axis(2,axTicks(2), format(axTicks(2), scientific=F))
-}
-dev.off()
-
-if (exists('compactPDF')) {
-  compactPDF(outfile)
-}
--- a/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_QualityScoreCovariate.R
+++ b/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_QualityScoreCovariate.R
@ -1,84 +0,0 @@
-#!/bin/env Rscript
-
-library(tools)
-
-args <- commandArgs(TRUE)
-
-input = args[1]
-Qcutoff = as.numeric(args[2])
-maxQ = as.numeric(args[3])
-maxHist = as.numeric(args[4])
-
-t=read.table(input, header=T)
-
-#
-# Plot of reported quality versus empirical quality
-#
-
-outfile = paste(input, ".quality_emp_v_stated.pdf", sep="")
-pdf(outfile, height=7, width=7)
-d.good <- t[t$nBases >= 10000 & t$Qreported >= Qcutoff,]
-d.1000 <- t[t$nBases < 1000  & t$Qreported >= Qcutoff,]
-d.10000 <- t[t$nBases < 10000 & t$nBases >= 1000  & t$Qreported >= Qcutoff,]
-f <- t[t$Qreported < Qcutoff,]
-e <- rbind(d.good, d.1000, d.10000)
-rmseGood = sqrt( sum(as.numeric((d.good$Qempirical-d.good$Qreported)^2 * d.good$nBases)) / sum(as.numeric(d.good$nBases)) ) # prevent integer overflow with as.numeric, ugh
-rmseAll = sqrt( sum(as.numeric((e$Qempirical-e$Qreported)^2 * e$nBases)) / sum(as.numeric(e$nBases)) )
-theTitle = paste("RMSE_good =", round(rmseGood,digits=3), ", RMSE_all =", round(rmseAll,digits=3))
-if( length(t$nBases) - length(f$nBases) == length(d.good$nBases) ) {
-	theTitle = paste("RMSE =", round(rmseAll,digits=3));
-}
-plot(d.good$Qreported, d.good$Qempirical, type="p", col="blue", main=theTitle, xlim=c(0,maxQ), ylim=c(0,maxQ), pch=16, xlab="Reported quality score", ylab="Empirical quality score")
-points(d.1000$Qreported, d.1000$Qempirical, type="p", col="lightblue", pch=16)
-points(d.10000$Qreported, d.10000$Qempirical, type="p", col="cornflowerblue", pch=16)
-points(f$Qreported, f$Qempirical, type="p", col="maroon1", pch=16)
-abline(0,1, lty=2)
-dev.off()
-
-if (exists('compactPDF')) {
-  compactPDF(outfile)
-}
-
-#
-# Plot Q empirical histogram
-#
-
-outfile = paste(input, ".quality_emp_hist.pdf", sep="")
-pdf(outfile, height=7, width=7)
-hst=subset(data.frame(e$Qempirical, e$nBases), e.nBases != 0)
-hst2=subset(data.frame(f$Qempirical, f$nBases), f.nBases != 0)
-percentBases=hst$e.nBases / sum(as.numeric(hst$e.nBases))
-entropy = -sum(log2(percentBases)*percentBases)
-yMax = max(hst$e.nBases)
-if(maxHist != 0) {
-yMax = maxHist
-}
-plot(hst$e.Qempirical, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Empirical quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Empirical quality score", ylab="Number of Bases",yaxt="n")
-points(hst2$f.Qempirical, hst2$f.nBases, type="h", lwd=4, col="maroon1")
-axis(2,axTicks(2), format(axTicks(2), scientific=F))
-dev.off()
-
-if (exists('compactPDF')) {
-  compactPDF(outfile)
-}
-
-#
-# Plot Q reported histogram
-#
-
-outfile = paste(input, ".quality_rep_hist.pdf", sep="")
-pdf(outfile, height=7, width=7)
-hst=subset(data.frame(e$Qreported, e$nBases), e.nBases != 0)
-hst2=subset(data.frame(f$Qreported, f$nBases), f.nBases != 0)
-yMax = max(hst$e.nBases)
-if(maxHist != 0) {
-yMax = maxHist
-}
-plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yMax), main=paste("Reported quality score histogram, entropy = ",round(entropy,digits=3)), xlab="Reported quality score", ylab="Number of Bases",yaxt="n")
-points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1")
-axis(2,axTicks(2), format(axTicks(2), scientific=F))
-dev.off()
-
-if (exists('compactPDF')) {
-  compactPDF(outfile)
-}
--- a/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R
+++ b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R
@ -0,0 +1,101 @@
+library("ggplot2")
+library("tools") #For compactPDF in R 2.13+
+
+args <- commandArgs(TRUE)
+data <- read.csv(args[1])
+data <- within(data, EventType <- factor(EventType, levels = rev(levels(EventType))))
+
+numRG = length(unique(data$ReadGroup))
+blankTheme = opts(panel.grid.major = theme_blank(), panel.grid.minor = theme_blank(), panel.background = theme_blank(), axis.ticks = theme_blank())
+
+# Viewport (layout 2 graphs top to bottom)
+distributeGraphRows <- function(graphs, heights = c()) {
+  if (length(heights) == 0) {
+    heights <- rep.int(1, length(graphs))
+  }
+  heights <- heights[!is.na(graphs)]
+  graphs <- graphs[!is.na(graphs)]
+  numGraphs <- length(graphs)
+  Layout <- grid.layout(nrow = numGraphs, ncol = 1, heights=heights)
+  grid.newpage()
+  pushViewport(viewport(layout = Layout))
+  subplot <- function(x) viewport(layout.pos.row = x, layout.pos.col = 1)
+  for (i in 1:numGraphs) {
+    print(graphs[[i]], vp = subplot(i))
+  }
+}
+
+
+for(cov in levels(data$CovariateName)) {    # for each covariate in turn  
+  d = data[data$CovariateName==cov,]        # pull out just the data for this covariate so we can treat the non-numeric values appropriately
+  if( cov == "Context" ) {
+    d$CovariateValue = as.character(d$CovariateValue)
+    d$CovariateValue = substring(d$CovariateValue,nchar(d$CovariateValue)-2,nchar(d$CovariateValue))
+  } else {
+    d$CovariateValue = as.numeric(levels(d$CovariateValue))[as.integer(d$CovariateValue)] # efficient way to convert factors back to their real values
+  }
+  #d=subset(d,Observations>2000) # only show bins which have enough data to actually estimate the quality
+  dSub=subset(d,EventType=="Base Substitution")
+  dIns=subset(d,EventType=="Base Insertion")
+  dDel=subset(d,EventType=="Base Deletion")
+  dSub=dSub[sample.int(length(dSub[,1]),min(length(dSub[,1]),2000)),] # don't plot too many values because it makes the PDFs too massive
+  dIns=dIns[sample.int(length(dIns[,1]),min(length(dIns[,1]),2000)),] # don't plot too many values because it makes the PDFs too massive
+  dDel=dDel[sample.int(length(dDel[,1]),min(length(dDel[,1]),2000)),] # don't plot too many values because it makes the PDFs too massive
+  d=rbind(dSub, dIns, dDel)
+
+  if( cov != "QualityScore" ) {    
+    p <- ggplot(d, aes(x=CovariateValue,y=Accuracy,alpha=log10(Observations))) +
+      geom_abline(intercept=0, slope=0, linetype=2) + 
+      xlab(paste(cov,"Covariate")) +
+      ylab("Quality Score Accuracy") +
+      blankTheme
+    if(cov == "Cycle") {
+      b <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) +
+        opts(axis.text.x=theme_text(angle=90, hjust=0))
+      
+      p <- ggplot(d, aes(x=CovariateValue,y=AverageReportedQuality,alpha=log10(Observations))) +
+        xlab(paste(cov,"Covariate")) +
+        ylab("Mean Quality Score") +
+        blankTheme
+      e <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) +
+        opts(axis.text.x=theme_text(angle=90, hjust=0))
+      
+      
+    } else {
+      c <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) +
+        opts(axis.text.x=theme_text(angle=90, hjust=0)) + xlab(paste(cov,"Covariate (3 base suffix)"))
+      p <- ggplot(d, aes(x=CovariateValue,y=AverageReportedQuality,alpha=log10(Observations))) +
+        xlab(paste(cov,"Covariate (3 base suffix)")) +
+        ylab("Mean Quality Score") +
+        blankTheme
+      f <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) +
+        opts(axis.text.x=theme_text(angle=90, hjust=0))
+      
+    }
+  } else {
+    p <- ggplot(d, aes(x=AverageReportedQuality,y=EmpiricalQuality,alpha=log10(Observations))) +
+      geom_abline(intercept=0, slope=1, linetype=2) + 
+      xlab("Reported Quality Score") +
+      ylab("Empirical Quality Score") +
+      blankTheme
+    a <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("maroon1","blue")) + facet_grid(.~EventType)
+    
+    p <- ggplot(d, aes(x=CovariateValue)) +
+      xlab(paste(cov,"Covariate")) +
+      ylab("Number of Observations") +
+      blankTheme
+    d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations),alpha=0.6,binwidth=1,position="identity") + scale_fill_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) +     
+      scale_y_continuous(formatter="comma")
+    
+  }
+}
+
+pdf(args[2],height=9,width=15)
+distributeGraphRows(list(a,b,c), c(1,1,1))
+distributeGraphRows(list(d,e,f), c(1,1,1))
+dev.off()
+
+
+if (exists('compactPDF')) {
+  compactPDF(args[2])
+}
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd
@ -19,9 +19,9 @@ Medical and Population Genetics Program
 Maintainer: Kiran Garimella
 }
 \references{
-GSA wiki page: http://www.broadinstitute.org/gsa/wiki
+GSA wiki page: http://www.broadinstitute.org/gatk

-GATK help forum: http://www.getsatisfaction.com/gsa
+GATK help forum: http://www.broadinstitute.org/gatk
 }
 \examples{
 ## get script arguments in interactive and non-interactive mode
--- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java
+++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java
@ -29,11 +29,13 @@ import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
 import org.broadinstitute.sting.alignment.bwa.BWTFiles;
 import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
 import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.util.Iterator;
@ -46,7 +48,8 @@ import java.util.Iterator;
 * @author mhanna
 * @version 0.1
 */
-public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
+@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
+public class AlignmentValidation extends ReadWalker<Integer,Integer> {
    /**
     * The supporting BWT index generated using BWT. 
     */
--- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java
+++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java
@ -34,11 +34,13 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles;
 import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.gatk.walkers.WalkerName;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.io.File;
@ -50,6 +52,7 @@ import java.io.File;
 * @author mhanna
 * @version 0.1
 */
+@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@WalkerName("Align")
 public class AlignmentWalker extends ReadWalker<Integer,Integer> {
    @Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned.  Alongside this reference should sit index files " +
--- a/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java
@ -30,9 +30,11 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles;
 import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.io.PrintStream;
@ -48,7 +50,8 @@ import java.util.TreeMap;
 * @author mhanna
 * @version 0.1
 */
-public class CountBestAlignmentsWalker extends ReadWalker<Integer,Integer> {
+@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
+public class CountBestAlignments extends ReadWalker<Integer,Integer> {
    /**
     * The supporting BWT index generated using BWT.
     */
--- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java
@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.analyzecovariates;
-
-import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
-import org.broadinstitute.sting.utils.collections.NestedHashMap;
-
-import java.util.ArrayList;
-
-/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Dec 1, 2009
- *
- * The difference between this AnalysisDataManager and the RecalDataManager used by the Recalibration walkers is that here the collapsed data tables are indexed
- *  by only read group and the given covariate, while in the recalibrator the collapsed tables are indexed by read group, reported quality, and the given covariate.
- */
-
-public class AnalysisDataManager {
-    
-    private NestedHashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed
-    private ArrayList<NestedHashMap> dataCollapsedByCovariate; // Tables where everything except read group and given covariate has been collapsed
-
-    AnalysisDataManager() {
-    }
-
-    AnalysisDataManager( final int numCovariates ) {
-        dataCollapsedReadGroup = new NestedHashMap();
-        dataCollapsedByCovariate = new ArrayList<NestedHashMap>();
-        for( int iii = 0; iii < numCovariates - 1; iii++ ) { // readGroup isn't counted here, its table is separate
-            dataCollapsedByCovariate.add( new NestedHashMap() );
-        }
-    }
-
-    /**
-     * Add the given mapping to all of the collapsed hash tables
-     * @param key The list of comparables that is the key for this mapping
-     * @param fullDatum The RecalDatum which is the data for this mapping
-     * @param IGNORE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
-     */
-    public final void addToAllTables( final Object[] key, final RecalDatum fullDatum, final int IGNORE_QSCORES_LESS_THAN ) {
-
-        final int qscore = Integer.parseInt( key[1].toString() );
-        RecalDatum collapsedDatum;
-        final Object[] readGroupCollapsedKey = new Object[1];
-        final Object[] covariateCollapsedKey = new Object[2];
-
-        if( !(qscore < IGNORE_QSCORES_LESS_THAN) ) {
-            // Create dataCollapsedReadGroup, the table where everything except read group has been collapsed
-            readGroupCollapsedKey[0] = key[0]; // Make a new key with just the read group
-            collapsedDatum = (RecalDatum)dataCollapsedReadGroup.get( readGroupCollapsedKey );
-            if( collapsedDatum == null ) {
-                dataCollapsedReadGroup.put( new RecalDatum(fullDatum), readGroupCollapsedKey );
-            } else {
-                collapsedDatum.combine( fullDatum ); // using combine instead of increment in order to calculate overall aggregateQReported
-            }
-        }
-
-        // Create dataCollapsedByCovariate's, the tables where everything except read group and given covariate has been collapsed
-        for( int iii = 0; iii < dataCollapsedByCovariate.size(); iii++ ) {
-            if( iii == 0 || !(qscore < IGNORE_QSCORES_LESS_THAN) ) { // use all data for the plot versus reported quality, but not for the other plots versus cycle and etc.
-                covariateCollapsedKey[0] = key[0]; // Make a new key with the read group ...
-                Object theCovariateElement = key[iii + 1]; //           and the given covariate
-                if( theCovariateElement != null ) {
-                    covariateCollapsedKey[1] = theCovariateElement;
-                    collapsedDatum = (RecalDatum)dataCollapsedByCovariate.get(iii).get( covariateCollapsedKey );
-                    if( collapsedDatum == null ) {
-                        dataCollapsedByCovariate.get(iii).put( new RecalDatum(fullDatum), covariateCollapsedKey );
-                    } else {
-                        collapsedDatum.combine( fullDatum );
-                    }
-                }
-            }
-        }
-    }
-
-    /**
-     * Get the appropriate collapsed table out of the set of all the tables held by this Object
-     * @param covariate Which covariate indexes the desired collapsed HashMap
-     * @return The desired collapsed HashMap
-     */
-    public final NestedHashMap getCollapsedTable( final int covariate ) {
-        if( covariate == 0) {
-            return dataCollapsedReadGroup; // Table where everything except read group has been collapsed
-        } else {
-            return dataCollapsedByCovariate.get( covariate - 1 ); // Table where everything except read group, quality score, and given covariate has been collapsed
-        }
-    }
-
-}
--- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
+++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
@ -1,383 +0,0 @@
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.analyzecovariates;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.io.IOUtils;
-import org.apache.log4j.Logger;
-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.Hidden;
-import org.broadinstitute.sting.commandline.CommandLineProgram;
-import org.broadinstitute.sting.commandline.Input;
-import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
-import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
-import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
-import org.broadinstitute.sting.utils.R.RScriptExecutor;
-import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.utils.classloader.PluginManager;
-import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
-import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
-import org.broadinstitute.sting.utils.io.Resource;
-import org.broadinstitute.sting.utils.text.XReadLines;
-
-import java.io.*;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Map;
-import java.util.regex.Pattern;
-
-/**
- * Call R scripts to plot residual error versus the various covariates.
- *
- * <p>
- * After counting covariates in either the initial BAM File or again in the recalibrated BAM File, an analysis tool is available which
- * reads the .csv file and outputs several PDF (and .dat) files for each read group in the given BAM. These PDF files graphically
- * show the various metrics and characteristics of the reported quality scores (often in relation to the empirical qualities).
- * In order to show that any biases in the reported quality scores have been generally fixed through recalibration one should run
- * CountCovariates again on a bam file produced by TableRecalibration. In this way users can compare the analysis plots generated
- * by pre-recalibration and post-recalibration .csv files. Our usual chain of commands that we use to generate plots of residual
- * error is: CountCovariates, TableRecalibrate, samtools index on the recalibrated bam file, CountCovariates again on the recalibrated
- * bam file, and then AnalyzeCovariates on both the before and after recal_data.csv files to see the improvement in recalibration.
- *
- * <p>
- * The color coding along with the RMSE is included in the plots to give some indication of the number of observations that went into
- * each of the quality score estimates. It is defined as follows for N, the number of observations:
- *
- * <ul>
- * <li>light blue means N < 1,000</li>
- * <li>cornflower blue means 1,000 <= N < 10,000</li>
- * <li>dark blue means N >= 10,000</li>
- * <li>The pink dots indicate points whose quality scores are special codes used by the aligner and which are mathematically
- * meaningless and so aren't included in any of the numerical calculations.</li>
- * </ul>
- *
- * <p>
- * NOTE: Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version).
- * See <a target="r-project" href="http://www.r-project.org">http://www.r-project.org</a> for more info on how to download and install R.
- *
- * <p>
- * See the GATK wiki for a tutorial and example recalibration accuracy plots.
- * <a target="gatkwiki" href="http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration"
- * >http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration</a>
- *
- * <h2>Input</h2>
- * <p>
- * The recalibration table file in CSV format that was generated by the CountCovariates walker.
- * </p>
- *
- * <h2>Examples</h2>
- * <pre>
- * java -Xmx4g -jar AnalyzeCovariates.jar \
- *   -recalFile /path/to/recal.table.csv  \
- *   -outputDir /path/to/output_dir/  \
- *   -ignoreQ 5
- * </pre>
- *
- */
-
-@DocumentedGATKFeature(
-        groupName = "AnalyzeCovariates",
-        summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator")
-public class AnalyzeCovariates extends CommandLineProgram {
-    final private static Logger logger = Logger.getLogger(AnalyzeCovariates.class);
-
-    private static final String PLOT_RESDIUAL_ERROR_QUALITY_SCORE_COVARIATE = "plot_residualError_QualityScoreCovariate.R";
-    private static final String PLOT_RESDIUAL_ERROR_OTHER_COVARIATE = "plot_residualError_OtherCovariate.R";
-    private static final String PLOT_INDEL_QUALITY_RSCRIPT = "plot_indelQuality.R";
-
-    /////////////////////////////
-    // Command Line Arguments
-    /////////////////////////////
-    /**
-     * After the header, data records occur one per line until the end of the file. The first several items on a line are the
-     * values of the individual covariates and will change depending on which covariates were specified at runtime. The last
-     * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
-     * and the raw empirical quality score calculated by phred-scaling the mismatch rate.
-     */
-    @Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false)
-    private String RECAL_FILE = "output.recal_data.csv";
-    @Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
-    private File OUTPUT_DIR = new File("analyzeCovariates");
-    @Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false)
-    private int IGNORE_QSCORES_LESS_THAN = 5;
-    @Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
-    private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups
-
-    /**
-     * Combinations of covariates in which there are zero mismatches technically have infinite quality. We get around this situation
-     * by capping at the specified value. We've found that Q40 is too low when using a more completely database of known variation like dbSNP build 132 or later.
-     */
-    @Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 50")
-    private int MAX_QUALITY_SCORE = 50;
-
-    /**
-     * This argument is useful for comparing before/after plots and you want the axes to match each other.
-     */
-    @Argument(fullName="max_histogram_value", shortName="maxHist", required = false, doc="If supplied, this value will be the max value of the histogram plots")
-    private int MAX_HISTOGRAM_VALUE = 0;
-
-    @Hidden
-    @Argument(fullName="do_indel_quality", shortName="indels", required = false, doc="If supplied, do indel quality plotting")
-    private boolean DO_INDEL_QUALITY = false;
-
-    /////////////////////////////
-    // Private Member Variables
-    /////////////////////////////
-    private AnalysisDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
-    private ArrayList<Covariate> requestedCovariates; // List of covariates to be used in this calculation
-    private final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
-    private final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
-    private final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
-    protected static final String EOF_MARKER = "EOF";
-
-    protected int execute() {
-
-        // create the output directory where all the data tables and plots will go
-        if (!OUTPUT_DIR.exists() && !OUTPUT_DIR.mkdirs())
-            throw new UserException.BadArgumentValue("--output_dir/-outDir", "Unable to create output directory: " + OUTPUT_DIR);
-
-        if (!RScriptExecutor.RSCRIPT_EXISTS)
-            Utils.warnUser(logger, "Rscript not found in environment path. Plots will not be generated.");
-
-        // initialize all the data from the csv file and allocate the list of covariates
-        logger.info("Reading in input csv file...");
-        initializeData();
-        logger.info("...Done!");
-
-        // output data tables for Rscript to read in
-        logger.info("Writing out intermediate tables for R...");
-        writeDataTables();
-        logger.info("...Done!");
-
-        // perform the analysis using Rscript and output the plots
-        logger.info("Calling analysis R scripts and writing out figures...");
-        callRScripts();
-        logger.info("...Done!");
-
-        return 0;
-    }
-
-    private void initializeData() {
-
-        // Get a list of all available covariates
-        Collection<Class<? extends Covariate>> classes = new PluginManager<Covariate>(Covariate.class).getPlugins();
-
-        int lineNumber = 0;
-        boolean foundAllCovariates = false;
-
-        // Read in the covariates that were used from the input file
-        requestedCovariates = new ArrayList<Covariate>();
-
-        try {
-            for ( final String line : new XReadLines(new File( RECAL_FILE )) ) {
-                lineNumber++;
-                if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() || line.equals(EOF_MARKER) )  {
-                    ; // Skip over the comment lines, (which start with '#')
-                }
-                else if( COVARIATE_PATTERN.matcher(line).matches() ) { // The line string is either specifying a covariate or is giving csv data
-                    if( foundAllCovariates ) {
-                        throw new RuntimeException( "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
-                    } else { // Found the covariate list in input file, loop through all of them and instantiate them
-                        String[] vals = line.split(",");
-                        for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
-                            boolean foundClass = false;
-                            for( Class<?> covClass : classes ) {
-                                if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
-                                    foundClass = true;
-                                    try {
-                                        Covariate covariate = (Covariate)covClass.newInstance();
-                                        requestedCovariates.add( covariate );
-                                    } catch (Exception e) {
-                                        throw new DynamicClassResolutionException(covClass, e);
-                                    }
-                                }
-                            }
-
-                            if( !foundClass ) {
-                                throw new RuntimeException( "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option." );
-                            }
-                        }
-
-                    }
-
-                } else { // Found a line of data
-                    if( !foundAllCovariates ) {
-
-                        foundAllCovariates = true;
-
-                        // At this point all the covariates should have been found and initialized
-                        if( requestedCovariates.size() < 2 ) {
-                            throw new RuntimeException( "Malformed input recalibration file. Covariate names can't be found in file: " + RECAL_FILE );
-                        }
-
-                        // Initialize any covariate member variables using the shared argument collection
-                        for( Covariate cov : requestedCovariates ) {
-                            cov.initialize( new RecalibrationArgumentCollection() );
-                        }
-
-                        // Initialize the data hashMaps
-                        dataManager = new AnalysisDataManager( requestedCovariates.size() );
-
-                    }
-                    addCSVData(line); // Parse the line and add the data to the HashMap
-                }
-            }
-
-        } catch ( FileNotFoundException e ) {
-            throw new RuntimeException("Can not find input file: " + RECAL_FILE);
-        } catch ( NumberFormatException e ) {
-            throw new RuntimeException("Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
-        }
-    }
-
-    private void addCSVData(String line) {
-        String[] vals = line.split(",");
-
-        // Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
-        if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical
-            throw new RuntimeException("Malformed input recalibration file. Found data line with too many fields: " + line +
-                    " --Perhaps the read group string contains a comma and isn't being parsed correctly.");
-        }
-
-        Object[] key = new Object[requestedCovariates.size()];
-        Covariate cov;
-        int iii;
-        for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
-            cov = requestedCovariates.get( iii );
-            key[iii] = cov.getValue( vals[iii] );
-        }
-        // Create a new datum using the number of observations, number of mismatches, and reported quality score
-        final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
-        // Add that datum to all the collapsed tables which will be used in the sequential calculation
-        dataManager.addToAllTables( key, datum, IGNORE_QSCORES_LESS_THAN );
-    }
-
-    private void writeDataTables() {
-
-        int numReadGroups = 0;
-
-        // for each read group
-        for( final Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) {
-
-            if( NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS ) {
-                final String readGroup = readGroupKey.toString();
-                final RecalDatum readGroupDatum = (RecalDatum) dataManager.getCollapsedTable(0).data.get(readGroupKey);
-                logger.info(String.format(
-                        "Writing out data tables for read group: %s\twith %s observations\tand aggregate residual error = %.3f",
-                        readGroup, readGroupDatum.getNumObservations(),
-                        readGroupDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE) - readGroupDatum.getEstimatedQReported()));
-
-                // for each covariate
-                for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
-                    Covariate cov = requestedCovariates.get(iii);
-
-                    // Create a PrintStream
-                    File outputFile = new File(OUTPUT_DIR, readGroup + "." + cov.getClass().getSimpleName()+ ".dat");
-                    PrintStream output;
-                    try {
-                        output = new PrintStream(FileUtils.openOutputStream(outputFile));
-                    } catch (IOException e) {
-                        throw new UserException.CouldNotCreateOutputFile(outputFile, e);
-                    }
-
-                    try {
-                        // Output the header
-                        output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases");
-
-                        for( final Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet() ) {
-                            output.print( covariateKey.toString() + "\t" );                                                     // Covariate
-                            final RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey);
-                            output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" );                    // Qreported
-                            output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE)) + "\t" );  // Qempirical
-                            output.print( thisDatum.getNumMismatches() + "\t" );                                                // nMismatches
-                            output.println( thisDatum.getNumObservations() );                                                   // nBases
-                        }
-                    } finally {
-                        // Close the PrintStream
-                        IOUtils.closeQuietly(output);
-                    }
-                }
-            } else {
-                break;
-            }
-
-        }
-    }
-
-    private void callRScripts() {
-        int numReadGroups = 0;
-
-        // for each read group
-        for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) {
-            if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) {
-
-                String readGroup = readGroupKey.toString();
-                logger.info("Analyzing read group: " + readGroup);
-
-                // for each covariate
-                for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
-                    final Covariate cov = requestedCovariates.get(iii);
-                    final File outputFile = new File(OUTPUT_DIR, readGroup + "." + cov.getClass().getSimpleName()+ ".dat");
-                    if (DO_INDEL_QUALITY) {
-                        RScriptExecutor executor = new RScriptExecutor();
-                        executor.addScript(new Resource(PLOT_INDEL_QUALITY_RSCRIPT, AnalyzeCovariates.class));
-                        // The second argument is the name of the covariate in order to make the plots look nice
-                        executor.addArgs(outputFile, cov.getClass().getSimpleName().split("Covariate")[0]);
-                        executor.exec();
-                    } else {
-                        if( iii == 1 ) {
-                            // Analyze reported quality
-                            RScriptExecutor executor = new RScriptExecutor();
-                            executor.addScript(new Resource(PLOT_RESDIUAL_ERROR_QUALITY_SCORE_COVARIATE, AnalyzeCovariates.class));
-                            // The second argument is the Q scores that should be turned pink in the plot because they were ignored
-                            executor.addArgs(outputFile, IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE);
-                            executor.exec();
-                        } else { // Analyze all other covariates
-                            RScriptExecutor executor = new RScriptExecutor();
-                            executor.addScript(new Resource(PLOT_RESDIUAL_ERROR_OTHER_COVARIATE, AnalyzeCovariates.class));
-                            // The second argument is the name of the covariate in order to make the plots look nice
-                            executor.addArgs(outputFile, cov.getClass().getSimpleName().split("Covariate")[0]);
-                            executor.exec();
-                        }
-                    }
-                }
-            } else { // at the maximum number of read groups so break out
-                break;
-            }
-        }
-    }
-
-    public static void main(String args[]) {
-        try {
-            AnalyzeCovariates clp = new AnalyzeCovariates();
-            start(clp, args);
-            System.exit(CommandLineProgram.result);
-        } catch (Exception e) {
-            exitSystemWithError(e);
-        }
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/analyzecovariates/package-info.java
+++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/package-info.java
@ -1,4 +0,0 @@
-/**
- * Package to plot residual accuracy versus error covariates for the base quality score recalibrator.
- */
-package org.broadinstitute.sting.analyzecovariates;
--- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
@ -287,8 +287,8 @@ public abstract class CommandLineProgram {
     * a function used to indicate an error occurred in the command line tool
     */
    private static void printDocumentationReference() {
-        errorPrintf("Visit our wiki for extensive documentation http://www.broadinstitute.org/gsa/wiki%n");
-        errorPrintf("Visit our forum to view answers to commonly asked questions http://getsatisfaction.com/gsa%n");
+        errorPrintf("Visit our website and forum for extensive documentation and answers to %n");
+        errorPrintf("commonly asked questions http://www.broadinstitute.org/gatk%n");
    }


@ -369,9 +369,9 @@ public abstract class CommandLineProgram {
        System.exit(1);
    }

-    public static void exitSystemWithSamError(final Exception e) {
-        if ( e.getMessage() == null )
-            throw new ReviewedStingException("SamException found with no message!", e);
+    public static void exitSystemWithSamError(final Throwable t) {
+        if ( t.getMessage() == null )
+            throw new ReviewedStingException("SamException found with no message!", t);

        errorPrintf("------------------------------------------------------------------------------------------%n");
        errorPrintf("A BAM ERROR has occurred (version %s): %n", CommandLineGATK.getVersionNumber());
@ -383,7 +383,7 @@ public abstract class CommandLineProgram {
        errorPrintf("Also, please ensure that your BAM index is not corrupted: delete the current one and regenerate it with 'samtools index'%n");
        printDocumentationReference();
        errorPrintf("%n");
-        errorPrintf("MESSAGE: %s%n", e.getMessage().trim());
+        errorPrintf("MESSAGE: %s%n", t.getMessage().trim());
        errorPrintf("------------------------------------------------------------------------------------------%n");
        System.exit(1);
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java
@ -130,8 +130,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
             getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) {
            if ( getArgumentCollection().gatkKeyFile == null ) {
                throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " +
-                                        "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " +
-                                        "for more information and instructions on how to obtain a key.");
+                                        "Please see " + GATKRunReport.PHONE_HOME_DOCS_URL +
+                                        " for more information and instructions on how to obtain a key.");
            }
            else {
                PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey();
--- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
@ -36,22 +36,23 @@ import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
 import org.broadinstitute.sting.gatk.walkers.Attribution;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.help.*;
+import org.broadinstitute.sting.utils.help.ApplicationDetails;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
+import org.broadinstitute.sting.utils.help.GATKDocUtils;
 import org.broadinstitute.sting.utils.text.TextFormattingUtils;

 import java.util.*;

 /**
+ * All command line parameters accepted by all tools in the GATK.
+ *
 * The GATK engine itself.  Manages map/reduce data access and runs walkers.
 *
 * We run command line GATK programs using this class.  It gets the command line args, parses them, and hands the
 * gatk all the parsed out information.  Pretty much anything dealing with the underlying system should go here,
 * the gatk engine should  deal with any data related information.
 */
-@DocumentedGATKFeature(
-        groupName = "GATK Engine",
-        summary = "Features and arguments for the GATK engine itself, available to all walkers.",
-        extraDocs = { UserException.class })
+@DocumentedGATKFeature(groupName = "GATK Engine")
 public class CommandLineGATK extends CommandLineExecutable {
    @Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run")
    private String analysisName = null;
@ -101,20 +102,41 @@ public class CommandLineGATK extends CommandLineExecutable {
            // TODO: Should Picard exceptions be, in general, UserExceptions or ReviewedStingExceptions?
            exitSystemWithError(e);
        } catch (SAMException e) {
-            checkForTooManyOpenFilesProblem(e.getMessage());
+            checkForMaskedUserErrors(e);
            exitSystemWithSamError(e);
        } catch (OutOfMemoryError e) {
            exitSystemWithUserError(new UserException.NotEnoughMemory());
        } catch (Throwable t) {
-            checkForTooManyOpenFilesProblem(t.getMessage());
+            checkForMaskedUserErrors(t);
            exitSystemWithError(t);
        }
    }

-    private static void checkForTooManyOpenFilesProblem(String message) {
-        // Special case the "Too many open files" error because it's a common User Error for which we know what to do
-        if ( message != null && message.indexOf("Too many open files") != -1 )
+    protected static final String PICARD_TEXT_SAM_FILE_ERROR_1 = "Cannot use index file with textual SAM file";
+    protected static final String PICARD_TEXT_SAM_FILE_ERROR_2 = "Cannot retrieve file pointers within SAM text files";
+    private static void checkForMaskedUserErrors(final Throwable t) {
+        final String message = t.getMessage();
+        if ( message == null )
+            return;
+
+        // we know what to do about the common "Too many open files" error
+        if ( message.indexOf("Too many open files") != -1 )
            exitSystemWithUserError(new UserException.TooManyOpenFiles());
+
+        // malformed BAM looks like a SAM file
+        if ( message.indexOf(PICARD_TEXT_SAM_FILE_ERROR_1) != -1 ||
+                message.indexOf(PICARD_TEXT_SAM_FILE_ERROR_2) != -1 )
+            exitSystemWithSamError(t);
+
+        // can't close tribble index when writing
+        if ( message.indexOf("Unable to close index for") != -1 )
+            exitSystemWithUserError(new UserException(t.getCause() == null ? message : t.getCause().getMessage()));
+
+        // disk is full
+        if ( message.indexOf("No space left on device") != -1 )
+            exitSystemWithUserError(new UserException(t.getMessage()));
+        if ( t.getCause() != null && t.getCause().getMessage().indexOf("No space left on device") != -1 )
+            exitSystemWithUserError(new UserException(t.getCause().getMessage()));
    }

    /**
@ -126,8 +148,7 @@ public class CommandLineGATK extends CommandLineExecutable {
        List<String> header = new ArrayList<String>();
        header.add(String.format("The Genome Analysis Toolkit (GATK) v%s, Compiled %s",getVersionNumber(), getBuildTime()));
        header.add("Copyright (c) 2010 The Broad Institute");
-        header.add("Please view our documentation at http://www.broadinstitute.org/gsa/wiki");
-        header.add("For support, please view our support site at http://getsatisfaction.com/gsa");
+        header.add("For support and documentation go to http://www.broadinstitute.org/gatk");
        return header;
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@ -51,6 +51,7 @@ import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
 import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.baq.BAQ;
+import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
 import org.broadinstitute.sting.utils.collections.Pair;
@ -197,7 +198,16 @@ public class GenomeAnalysisEngine {
    private BaseRecalibration baseRecalibration = null;
    public BaseRecalibration getBaseRecalibration() { return baseRecalibration; }
    public boolean hasBaseRecalibration() { return baseRecalibration != null; }
-    public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean noIndelQuals) { baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, noIndelQuals); }
+    public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan, final boolean emitOriginalQuals) {
+        baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, disableIndelQuals, preserveQLessThan, emitOriginalQuals);
+    }
+
+    /**
+     * Utility method to determine whether this is the lite version of the GATK
+     */
+    public boolean isGATKLite() {
+        return GATKLiteUtils.isGATKLite();
+    }

    /**
     * Actually run the GATK with the specified walker.
@ -209,8 +219,10 @@ public class GenomeAnalysisEngine {
        //monitor.start();
        setStartTime(new java.util.Date());

+        final GATKArgumentCollection args = this.getArguments();
+
        // validate our parameters
-        if (this.getArguments() == null) {
+        if (args == null) {
            throw new ReviewedStingException("The GATKArgumentCollection passed to GenomeAnalysisEngine can not be null.");
        }

@ -218,16 +230,16 @@ public class GenomeAnalysisEngine {
        if (this.walker == null)
            throw new ReviewedStingException("The walker passed to GenomeAnalysisEngine can not be null.");

-        if (this.getArguments().nonDeterministicRandomSeed)
+        if (args.nonDeterministicRandomSeed)
            resetRandomGenerator(System.currentTimeMillis());

        // TODO -- REMOVE ME WHEN WE STOP BCF testing
-        if ( this.getArguments().USE_SLOW_GENOTYPES )
+        if ( args.USE_SLOW_GENOTYPES )
            GenotypeBuilder.MAKE_FAST_BY_DEFAULT = false;

        // if the use specified an input BQSR recalibration table then enable on the fly recalibration
-        if (this.getArguments().BQSR_RECAL_FILE != null)
-            setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE, this.getArguments().quantizationLevels, this.getArguments().noIndelQuals);
+        if (args.BQSR_RECAL_FILE != null)
+            setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.disableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN, args.emitOriginalQuals);

        // Determine how the threads should be divided between CPU vs. IO.
        determineThreadAllocation();
@ -262,6 +274,38 @@ public class GenomeAnalysisEngine {
        //return result;
    }

+    // TODO -- Let's move this to a utility class in unstable - but which one?
+    // **************************************************************************************
+    // *                            Handle Deprecated Walkers                               *
+    // **************************************************************************************
+
+    // Mapping from walker name to major version number where the walker first disappeared
+    private static Map<String, String> deprecatedGATKWalkers = new HashMap<String, String>();
+    static {
+        deprecatedGATKWalkers.put("CountCovariates", "2.0");
+        deprecatedGATKWalkers.put("TableRecalibration", "2.0");
+    }
+
+    /**
+     * Utility method to check whether a given walker has been deprecated in a previous GATK release
+     *
+     * @param walkerName   the walker class name (not the full package) to check
+     */
+    public static boolean isDeprecatedWalker(final String walkerName) {
+        return deprecatedGATKWalkers.containsKey(walkerName);
+    }
+
+    /**
+     * Utility method to check whether a given walker has been deprecated in a previous GATK release
+     *
+     * @param walkerName   the walker class name (not the full package) to check
+     */
+    public static String getDeprecatedMajorVersionNumber(final String walkerName) {
+        return deprecatedGATKWalkers.get(walkerName);
+    }
+
+    // **************************************************************************************
+
    /**
     * Retrieves an instance of the walker based on the walker name.
     *
@ -269,7 +313,17 @@ public class GenomeAnalysisEngine {
     * @return An instance of the walker.
     */
    public Walker<?, ?> getWalkerByName(String walkerName) {
-        return walkerManager.createByName(walkerName);
+        try {
+            return walkerManager.createByName(walkerName);
+        } catch ( UserException e ) {
+            if ( isGATKLite() && GATKLiteUtils.isAvailableOnlyInFullGATK(walkerName) ) {
+                e = new UserException.NotSupportedInGATKLite("the " + walkerName + " walker is available only in the full version of the GATK");
+            }
+            else if ( isDeprecatedWalker(walkerName) ) {
+                e = new UserException.DeprecatedWalker(walkerName, getDeprecatedMajorVersionNumber(walkerName));
+            }
+            throw e;
+        }
    }

    /**
@ -743,6 +797,14 @@ public class GenomeAnalysisEngine {
        if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
            throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");

+        if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
+            throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
+
+        boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class);
+
+        if (argCollection.keepProgramRecords)
+            removeProgramRecords = false;
+
        return new SAMDataSource(
                samReaderIDs,
                threadAllocation,
@ -759,7 +821,8 @@ public class GenomeAnalysisEngine {
                getWalkerBAQQualityMode(),
                refReader,
                getBaseRecalibration(),
-                argCollection.defaultBaseQualities);
+                argCollection.defaultBaseQualities,
+                removeProgramRecords);
    }

    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
@ -49,7 +49,7 @@ public class WalkerManager extends PluginManager<Walker> {
    private ResourceBundle helpText;

    public WalkerManager() {
-        super(Walker.class,"walker","Walker");
+        super(Walker.class,"walker","");
        helpText = TextFormattingUtils.loadResourceBundle("StingText");
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@ -35,6 +35,7 @@ import org.broadinstitute.sting.gatk.DownsampleType;
 import org.broadinstitute.sting.gatk.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
 import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
+import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
 import org.broadinstitute.sting.utils.interval.IntervalSetRule;
@ -65,10 +66,10 @@ public class GATKArgumentCollection {
    @Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
    public Integer readBufferSize = null;

-    @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
+    @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false)
    public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;

-    @Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
+    @Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false)
    public File gatkKeyFile = null;

    @Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
@ -190,37 +191,70 @@ public class GATKArgumentCollection {
    @Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false)
    public Boolean useOriginalBaseQualities = false;

+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // BQSR arguments
+    //
+    // --------------------------------------------------------------------------------------------------------------
+
    /**
-     * After the header, data records occur one per line until the end of the file. The first several items on a line are the
-     * values of the individual covariates and will change depending on which covariates were specified at runtime. The last
-     * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
-     * and the raw empirical quality score calculated by phred-scaling the mismatch rate.
+     * Enables on-the-fly recalibrate of base qualities.  The covariates tables are produced by the BaseQualityScoreRecalibrator tool.
+     * Please be aware that one should only run recalibration with the covariates file created on the same input bam(s).
     */
-    @Input(fullName="BQSR", shortName="BQSR", required=false, doc="Filename for the input covariates table recalibration .csv file which enables on the fly base quality score recalibration")
-    public File BQSR_RECAL_FILE = null; // BUGBUG: need a better argument name once we decide how BQSRs v1 and v2 will live in the code base simultaneously
+    @Input(fullName="BQSR", shortName="BQSR", required=false, doc="The input covariates table file which enables on-the-fly base quality score recalibration")
+    public File BQSR_RECAL_FILE = null;

    /**
     * Turns on the base quantization module. It requires a recalibration report (-BQSR).
     *
     * A value of 0 here means "do not quantize".
-     * Any value greater than zero will be used to recalculate the quantization using this many levels.
-     * Negative values do nothing (i.e. quantize using the recalibration report's quantization level -- same as not providing this parameter at all)
+     * Any value greater than zero will be used to recalculate the quantization using that many levels.
+     * Negative values mean that we should quantize using the recalibration report's quantization level.
     */
-    @Argument(fullName="quantize_quals", shortName = "qq", doc = "Quantize quality scores to a given number of levels.", required=false)
-    public int quantizationLevels = -1;
+    @Hidden
+    @Argument(fullName="quantize_quals", shortName = "qq", doc = "Quantize quality scores to a given number of levels (with -BQSR)", required=false)
+    public int quantizationLevels = 0;

    /**
-     * Turns off printing of the base insertion and base deletion tags when using the -BQSR argument.  Only the base substitution qualities will be produced.
+     * Turns off printing of the base insertion and base deletion tags when using the -BQSR argument and only the base substitution qualities will be produced.
     */
-    @Argument(fullName="no_indel_quals", shortName = "NIQ", doc = "If true, inhibits printing of base insertion and base deletion tags.", required=false)
-    public boolean noIndelQuals = false;
+    @Argument(fullName="disable_indel_quals", shortName = "DIQ", doc = "If true, disables printing of base insertion and base deletion tags (with -BQSR)", required=false)
+    public boolean disableIndelQuals = false;
+
+    /**
+     * By default, the OQ tag in not emitted when using the -BQSR argument.
+     */
+    @Argument(fullName="emit_original_quals", shortName = "EOQ", doc = "If true, enables printing of the OQ tag with the original base qualities (with -BQSR)", required=false)
+    public boolean emitOriginalQuals = false;
+
+    /**
+     * Do not modify quality scores less than this value but rather just write them out unmodified in the recalibrated BAM file.
+     * In general it's unsafe to change qualities scores below < 6, since base callers use these values to indicate random or bad bases.
+     * For example, Illumina writes Q2 bases when the machine has really gone wrong. This would be fine in and of itself,
+     * but when you select a subset of these reads based on their ability to align to the reference and their dinucleotide effect,
+     * your Q2 bin can be elevated to Q8 or Q10, leading to issues downstream.
+     */
+    @Argument(fullName = "preserve_qscores_less_than", shortName = "preserveQ", doc = "Bases with quality scores less than this threshold won't be recalibrated (with -BQSR)", required = false)
+    public int PRESERVE_QSCORES_LESS_THAN = QualityUtils.MIN_USABLE_Q_SCORE;

    @Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false)
    public byte defaultBaseQualities = -1;

+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // Other utility arguments
+    //
+    // --------------------------------------------------------------------------------------------------------------
+
    @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
    public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;

+    @Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we override the Walker's default and remove program records from the SAM header", required = false)
+    public boolean removeProgramRecords = false;
+
+    @Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Should we override the Walker's default and keep program records from the SAM header", required = false)
+    public boolean keepProgramRecords = false;
+
    @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime.  For expert users only who know what they are doing.  We do not support usage of this argument.", required = false)
    public ValidationExclusion.TYPE unsafe;

--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
@ -24,6 +24,7 @@
 package org.broadinstitute.sting.gatk.datasources.reads;

 import net.sf.samtools.*;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;

@ -300,7 +301,7 @@ public class GATKBAMIndex {
            fileChannel = fileStream.getChannel();
        }
        catch (IOException exc) {
-            throw new ReviewedStingException("Unable to open index file " + mFile, exc);            
+            throw new ReviewedStingException("Unable to open index file (" + exc.getMessage() +")" + mFile, exc);
        }
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@ -89,6 +89,11 @@ public class SAMDataSource {
     */
    private final SAMFileReader.ValidationStringency validationStringency;

+    /**
+     * Do we want to remove the program records from this data source?
+     */
+    private final boolean removeProgramRecords;
+
    /**
     * Store BAM indices for each reader present.
     */
@ -200,7 +205,8 @@ public class SAMDataSource {
                BAQ.QualityMode.DONT_MODIFY,
                null, // no BAQ
                null, // no BQSR
-                (byte) -1);
+                (byte) -1,
+                false);
    }

    /**
@ -233,7 +239,8 @@ public class SAMDataSource {
            BAQ.QualityMode qmode,
            IndexedFastaSequenceFile refReader,
            BaseRecalibration bqsrApplier,
-            byte defaultBaseQualities) {
+            byte defaultBaseQualities,
+            boolean removeProgramRecords) {
        this.readMetrics = new ReadMetrics();
        this.genomeLocParser = genomeLocParser;

@ -249,6 +256,7 @@ public class SAMDataSource {
            dispatcher = null;

        validationStringency = strictness;
+        this.removeProgramRecords = removeProgramRecords;
        if(readBufferSize != null)
            ReadShard.setReadBufferSize(readBufferSize);
        else {
@ -748,7 +756,7 @@ public class SAMDataSource {
        private synchronized void createNewResource() {
            if(allResources.size() > maxEntries)
                throw new ReviewedStingException("Cannot create a new resource pool.  All resources are in use.");
-            SAMReaders readers = new SAMReaders(readerIDs, validationStringency);
+            SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords);
            allResources.add(readers);
            availableResources.add(readers);
        }
@ -777,9 +785,11 @@ public class SAMDataSource {
        /**
         * Derive a new set of readers from the Reads metadata.
         * @param readerIDs reads to load.
+         * TODO: validationStringency is not used here
         * @param validationStringency validation stringency.
+         * @param removeProgramRecords indicate whether to clear program records from the readers
         */
-        public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
+        public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) {
            final int totalNumberOfFiles = readerIDs.size();
            int readerNumber = 1;
            final SimpleTimer timer = new SimpleTimer().start();
@ -790,6 +800,9 @@ public class SAMDataSource {
            long lastTick = timer.currentTime();
            for(final SAMReaderID readerID: readerIDs) {
                final ReaderInitializer init = new ReaderInitializer(readerID).call();
+                if (removeProgramRecords) {
+                    init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
+                }
                if (threadAllocation.getNumIOThreads() > 0) {
                    inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
                }
--- a/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java
@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.examples;

 import net.sf.samtools.SAMReadGroupRecord;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.LocusWalker;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

@ -17,8 +19,9 @@ import java.util.List;
 import java.util.Map;

 /**
- * Computes the coverage per sample.
+ * Computes the coverage per sample for every position (use with -L argument!).
 */
+@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
 public class CoverageBySample extends LocusWalker<Integer, Integer> {
    @Output
    protected PrintStream out;    
--- a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.examples;

 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -35,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
 import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
 import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
 import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

 import java.io.PrintStream;
@ -46,6 +48,7 @@ import java.io.PrintStream;
 *
 * @author aaron
 */
+@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
 public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements TreeReducible<Long> {
    // the possible diploid genotype strings
    private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker;
 import org.broadinstitute.sting.gatk.walkers.TreeReducible;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import org.broadinstitute.sting.utils.exceptions.StingException;
 import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;

 import java.util.Collection;
@ -41,6 +40,11 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar

    private final Queue<TreeReduceTask> reduceTasks = new LinkedList<TreeReduceTask>();

+    /**
+     * An exception that's occurred in this traversal.  If null, no exception has occurred.
+     */
+    private RuntimeException error = null;
+
    /**
     * Queue of incoming shards.
     */
@ -91,13 +95,11 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
        ReduceTree reduceTree = new ReduceTree(this);
        initializeWalker(walker);

-        //
-        // exception handling here is a bit complex.  We used to catch and rethrow exceptions all over
-        // the place, but that just didn't work well.  Now we have a specific execution exception (inner class)
-        // to use for multi-threading specific exceptions.  All RuntimeExceptions that occur within the threads are rethrown
-        // up the stack as their underlying causes
-        //
        while (isShardTraversePending() || isTreeReducePending()) {
+            // Check for errors during execution.
+            if(hasTraversalErrorOccurred())
+                throw getTraversalError();
+
            // Too many files sitting around taking up space?  Merge them.
            if (isMergeLimitExceeded())
                mergeExistingOutput(false);
@ -113,6 +115,9 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
                queueNextShardTraverse(walker, reduceTree);
        }

+        if(hasTraversalErrorOccurred())
+            throw getTraversalError();
+
        threadPool.shutdown();

        // Merge any lingering output files.  If these files aren't ready,
@ -123,9 +128,14 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
        try {
            result = reduceTree.getResult().get();
            notifyTraversalDone(walker,result);
+        } catch (ReviewedStingException ex) {
+            throw ex;
+        } catch ( ExecutionException ex ) {
+            // the thread died and we are failing to get the result, rethrow it as a runtime exception
+            throw toRuntimeException(ex.getCause());
+        } catch (Exception ex) {
+            throw new ReviewedStingException("Unable to retrieve result", ex);
        }
-        catch( InterruptedException ex ) { handleException(ex); }
-        catch( ExecutionException ex ) { handleException(ex); }

        // do final cleanup operations
        outputTracker.close();
@ -328,39 +338,35 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
    }

    /**
-     * Handle an exception that occurred in a worker thread as needed by this scheduler.
-     *
-     * The way to use this function in a worker is:
-     *
-     * try { doSomeWork();
-     * catch ( InterruptedException ex ) { hms.handleException(ex); }
-     * catch ( ExecutionException ex ) { hms.handleException(ex); }
-     *
-     * @param ex the exception that occurred in the worker thread
+     * Detects whether an execution error has occurred.
+     * @return True if an error has occurred.  False otherwise.
     */
-    protected final void handleException(InterruptedException ex) {
-        throw new HierarchicalMicroScheduler.ExecutionFailure("Hierarchical reduce interrupted", ex);
+    private synchronized boolean hasTraversalErrorOccurred() {
+        return error != null;
+    }
+
+    private synchronized RuntimeException getTraversalError() {
+        if(!hasTraversalErrorOccurred())
+            throw new ReviewedStingException("User has attempted to retrieve a traversal error when none exists");
+        return error;
    }

    /**
-     * Handle an exception that occurred in a worker thread as needed by this scheduler.
-     *
-     * The way to use this function in a worker is:
-     *
-     * try { doSomeWork();
-     * catch ( InterruptedException ex ) { hms.handleException(ex); }
-     * catch ( ExecutionException ex ) { hms.handleException(ex); }
-     *
-     * @param ex the exception that occurred in the worker thread
+     * Allows other threads to notify of an error during traversal.
     */
-    protected final void handleException(ExecutionException ex) {
-        if ( ex.getCause() instanceof RuntimeException )
-            // if the cause was a runtime exception that's what we want to send up the stack
-            throw (RuntimeException )ex.getCause();
-        else
-            throw new HierarchicalMicroScheduler.ExecutionFailure("Hierarchical reduce failed", ex);
+    protected synchronized RuntimeException notifyOfTraversalError(Throwable error) {
+        // If the error is already a Runtime, pass it along as is.  Otherwise, wrap it.
+        this.error = toRuntimeException(error);
+        return this.error;
    }

+    private final RuntimeException toRuntimeException(final Throwable error) {
+        // If the error is already a Runtime, pass it along as is.  Otherwise, wrap it.
+        if (error instanceof RuntimeException)
+            return (RuntimeException)error;
+        else
+            return new ReviewedStingException("An error occurred during the traversal.  Message=" + error.getMessage(), error);
+    }


    /** A small wrapper class that provides the TreeReducer interface along with the FutureTask semantics. */
@ -381,17 +387,6 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
        }
    }

-    /**
-     * A specific exception class for HMS-specific failures such as
-     * Interrupted or ExecutionFailures that aren't clearly the fault
-     * of the underlying walker code
-     */
-    public static class ExecutionFailure extends ReviewedStingException {
-        public ExecutionFailure(final String s, final Throwable throwable) {
-            super(s, throwable);
-        }
-    }
-
    /**
     * Used by the ShardTraverser to report time consumed traversing a given shard.
     *
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
 import org.broadinstitute.sting.gatk.io.OutputTracker;
 import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
-import org.broadinstitute.sting.gatk.walkers.LocusWalker;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.utils.SampleUtils;

--- a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java
@ -6,11 +6,11 @@ import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
 import org.broadinstitute.sting.gatk.datasources.reads.Shard;
 import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker;
 import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
-import org.broadinstitute.sting.gatk.walkers.LocusWalker;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

 import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
 /**
 * User: hanna
 * Date: Apr 29, 2009
@ -79,6 +79,9 @@ public class ShardTraverser implements Callable {
            microScheduler.reportShardTraverseTime(endTime-startTime);

            return accumulator;
+        } catch(Throwable t) {
+            // Notify that an exception has occurred and rethrow it.
+            throw microScheduler.notifyOfTraversalError(t);
        } finally {
            synchronized(this) {
                complete = true;
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java
@ -79,8 +79,14 @@ public class TreeReducer implements Callable {
            else
                result = walker.treeReduce( lhs.get(), rhs.get() );
        }
-        catch( InterruptedException ex ) { microScheduler.handleException(ex); }
-        catch( ExecutionException ex ) { microScheduler.handleException(ex); }
+        catch( InterruptedException ex ) {
+            microScheduler.notifyOfTraversalError(ex);
+            throw new ReviewedStingException("Hierarchical reduce interrupted", ex);
+        }
+        catch( ExecutionException ex ) {
+            microScheduler.notifyOfTraversalError(ex);
+            throw new ReviewedStingException("Hierarchical reduce failed", ex);
+        }

        final long endTime = System.currentTimeMillis();

--- a/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/DuplicateReadFilter.java
@ -28,11 +28,10 @@ import net.sf.samtools.SAMRecord;
 */

 /**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Dec 9, 2009
- *
 * Filter out duplicate reads.
+ *
+ * @author rpoplin
+ * @since Dec 9, 2009
 */

 public class DuplicateReadFilter extends ReadFilter {
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckFilter.java
@ -27,11 +27,10 @@ package org.broadinstitute.sting.gatk.filters;
 import net.sf.samtools.SAMRecord;

 /**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Jul 19, 2010
+ * Filter out reads that fail the vendor quality check.
 *
- * Filter out FailsVendorQualityCheck reads.
+ * @author rpoplin
+ * @since Jul 19, 2010
 */

 public class FailsVendorQualityCheckFilter extends ReadFilter {
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/MateSameStrandFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MateSameStrandFilter.java
@ -3,11 +3,10 @@ package org.broadinstitute.sting.gatk.filters;
 import net.sf.samtools.SAMRecord;

 /**
- * Created by IntelliJ IDEA.
- * User: chartl
- * Date: 5/18/11
- * Time: 4:25 PM
- * To change this template use File | Settings | File Templates.
+ * Filter out reads that are not paired, have their mate unmapped, are duplicates, fail vendor quality check or both mate and read are in the same strand.
+ *
+ * @author chartl
+ * @since 5/18/11
 */
 public class MateSameStrandFilter extends ReadFilter {

--- a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MaxInsertSizeFilter.java
@ -4,11 +4,10 @@ import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.commandline.Argument;

 /**
- * Created by IntelliJ IDEA.
- * User: chartl
- * Date: 5/2/11
- * Time: 12:20 PM
- * To change this template use File | Settings | File Templates.
+ * Filter out reads that exceed a given max insert size
+ *
+ * @author chartl
+ * @since 5/2/11
 */
 public class MaxInsertSizeFilter extends ReadFilter {
    @Argument(fullName = "maxInsertSize", shortName = "maxInsert", doc="Discard reads with insert size greater than the specified value, defaults to 1000000", required=false)
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java
@ -28,11 +28,10 @@ import net.sf.samtools.SAMRecord;
 */

 /**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Nov 19, 2009
+ * Filter out reads that don't have base an original quality quality score tag (usually added by BQSR)
 *
- * Filter out reads that don't have Original Quality scores inside. 
+ * @author rpoplin
+ * @since Nov 19, 2009
 */
 public class NoOriginalQualityScoresFilter extends ReadFilter {
    public boolean filterOut( final SAMRecord read ) {
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentFilter.java
@ -27,11 +27,10 @@ package org.broadinstitute.sting.gatk.filters;
 import net.sf.samtools.SAMRecord;

 /**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Dec 9, 2009
- *
 * Filter out duplicate reads.
+ *
+ * @author rpoplin
+ * @since Dec 9, 2009
 */

 public class NotPrimaryAlignmentFilter extends ReadFilter {
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java
@ -8,11 +8,10 @@ import java.util.HashSet;
 import java.util.Set;

 /**
- * Created by IntelliJ IDEA.
- * User: asivache
- * Date: Sep 21, 2009
- * Time: 2:54:23 PM
- * To change this template use File | Settings | File Templates.
+ * Filter out reads that have blacklisted platform unit tags. (See code documentation for how to create the blacklist).
+ *
+ * @author asivache
+ * @since Sep 21, 2009
 */
 public class PlatformUnitFilter extends ReadFilter {
    // a hack: use static in order to be able to fill it with the data from command line at runtime
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java
@ -1,17 +1,13 @@
 package org.broadinstitute.sting.gatk.filters;

-import net.sf.samtools.Cigar;
-import net.sf.samtools.CigarElement;
-import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.commandline.Argument;

 /**
- * Created by IntelliJ IDEA.
- * User: chartl
- * Date: 9/19/11
- * Time: 4:09 PM
- * To change this template use File | Settings | File Templates.
+ * Filter out all reads except those with this read name
+ *
+ * @author chartl
+ * @since 9/19/11
 */
 public class ReadNameFilter extends ReadFilter {
     @Argument(fullName = "readName", shortName = "rn", doc="Filter out all reads except those with this read name", required=true)
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/SampleFilter.java
@ -31,6 +31,9 @@ import org.broadinstitute.sting.commandline.Argument;

 import java.util.Set;

+/**
+ * Filter out all reads except those with this sample
+ */
 public class SampleFilter extends ReadFilter {
    @Argument(fullName = "sample_to_keep", shortName = "goodSM", doc="The name of the sample(s) to keep, filtering out all others", required=true)
    private Set SAMPLES_TO_KEEP = null;
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java
@ -30,11 +30,11 @@ import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.commandline.Argument;

 /**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Nov 27, 2009
- *
 * Only use reads from the specified read group.
+ *
+ * @author rpoplin
+ * @since Nov 27, 2009
+ *
 */

 public class SingleReadGroupFilter extends ReadFilter {
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/UnmappedReadFilter.java
@ -27,11 +27,10 @@ package org.broadinstitute.sting.gatk.filters;
 import net.sf.samtools.SAMRecord;

 /**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Dec 9, 2009
+ * Filter out unmapped reads.
 *
- * Filter out duplicate reads.
+ * @author rpoplin
+ * @since Dec 9, 2009
 */

 public class UnmappedReadFilter extends ReadFilter {
--- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java
@ -62,6 +62,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
        if (stub.getGenerateMD5())
            factory.setCreateMd5File(true);
        // Adjust max records in RAM.
+        // TODO -- this doesn't actually work because of a bug in Picard; do not use until fixed
        if(stub.getMaxRecordsInRam() != null)
            factory.setMaxRecordsInRam(stub.getMaxRecordsInRam());

--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
@ -279,7 +279,6 @@ public class LocusIteratorByState extends LocusIterator {
     */
    private void lazyLoadNextAlignmentContext() {
        while (nextAlignmentContext == null && readStates.hasNext()) {
-            // this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
            readStates.collectPendingReads();

            final GenomeLoc location = getLocation();
@ -378,7 +377,7 @@ public class LocusIteratorByState extends LocusIterator {
                CigarOperator op = state.stepForwardOnGenome();
                if (op == null) {
                    // we discard the read only when we are past its end AND indel at the end of the read (if any) was
-                    // already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe
+                    // already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
                    // as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
                    it.remove();                                                // we've stepped off the end of the object
                }
--- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
@ -52,6 +52,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Date;
 import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.zip.GZIPOutputStream;


@ -84,6 +85,11 @@ public class GATKRunReport {
     */
    private static File REPORT_SENTINEL = new File(REPORT_DIR.getAbsolutePath() + "/ENABLE");

+    // number of milliseconds before the S3 put operation is timed-out:
+    private static final long S3PutTimeOut = 10 * 1000;
+
+    public static final String PHONE_HOME_DOCS_URL = "http://gatkforums.broadinstitute.org/discussion/1250/what-is-phone-home-and-how-does-it-affect-me#latest";
+
    /**
     * our log
     */
@ -263,6 +269,58 @@ public class GATKRunReport {
        }
    }

+    private class S3PutRunnable implements Runnable {
+
+        public AtomicBoolean isSuccess;
+        private final String key;
+        private final byte[] report;
+
+        public S3Object s3Object;
+        public String errorMsg;
+        public Throwable errorThrow;
+
+        public S3PutRunnable(String key, byte[] report){
+            isSuccess = new AtomicBoolean();
+            this.key = key;
+            this.report = report;
+        }
+
+        public void run() {
+            try {
+                // Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials
+                // are stored in an AWSCredentials object:
+
+                // IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket
+                String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user
+                String awsSecretKey = "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"; // GATK AWS user
+                AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
+
+                // To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP
+                // implementation based on HttpClient, as this is the most robust implementation provided with JetS3t.
+                S3Service s3Service = new RestS3Service(awsCredentials);
+
+                // Create an S3Object based on a file, with Content-Length set automatically and
+                // Content-Type set based on the file's extension (using the Mimetypes utility class)
+                S3Object fileObject = new S3Object(key, report);
+                //logger.info("Created S3Object" + fileObject);
+                //logger.info("Uploading " + localFile + " to AWS bucket");
+                s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
+                isSuccess.set(true);
+            } catch ( S3ServiceException e ) {
+                setException("S3 exception occurred", e);
+            } catch ( NoSuchAlgorithmException e ) {
+                setException("Couldn't calculate MD5", e);
+            } catch ( IOException e ) {
+                setException("Couldn't read report file", e);
+            }
+        }
+
+        private void setException(String msg, Throwable e){
+            errorMsg=msg;
+            errorThrow=e;
+        }
+    }
+
    private void postReportToAWSS3() {
        // modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html
        this.hostName = Utils.resolveHostname(); // we want to fill in the host name
@ -280,32 +338,32 @@ public class GATKRunReport {
            Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
            mimeTypeLogger.setLevel(Level.FATAL);

-            // Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials
-            // are stored in an AWSCredentials object:
+            // Set the S3 upload on its own thread with timeout:
+            S3PutRunnable s3run = new S3PutRunnable(key,report);
+            Thread s3thread = new Thread(s3run);
+            s3thread.setDaemon(true);
+            s3thread.setName("S3Put-Thread");
+            s3thread.start();

-            // IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket
-            String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user
-            String awsSecretKey = "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"; // GATK AWS user
-            AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
+            s3thread.join(S3PutTimeOut);

-            // To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP
-            // implementation based on HttpClient, as this is the most robust implementation provided with JetS3t.
-            S3Service s3Service = new RestS3Service(awsCredentials);
-
-            // Create an S3Object based on a file, with Content-Length set automatically and
-            // Content-Type set based on the file's extension (using the Mimetypes utility class)
-            S3Object fileObject = new S3Object(key, report);
-            //logger.info("Created S3Object" + fileObject);
-            //logger.info("Uploading " + localFile + " to AWS bucket");
-            S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
-            logger.debug("Uploaded to AWS: " + s3Object);
-            logger.info("Uploaded run statistics report to AWS S3");
-        } catch ( S3ServiceException e ) {
-            exceptDuringRunReport("S3 exception occurred", e);
-        } catch ( NoSuchAlgorithmException e ) {
-            exceptDuringRunReport("Couldn't calculate MD5", e);
+            if(s3thread.isAlive()){
+                s3thread.interrupt();
+                exceptDuringRunReport("Run statistics report upload to AWS S3 timed-out");
+            } else if(s3run.isSuccess.get()) {
+                logger.info("Uploaded run statistics report to AWS S3");
+                logger.debug("Uploaded to AWS: " + s3run.s3Object);
+            } else {
+                if((s3run.errorMsg != null) && (s3run.errorThrow != null)){
+                    exceptDuringRunReport(s3run.errorMsg,s3run.errorThrow);
+                } else {
+                    exceptDuringRunReport("Run statistics report upload to AWS S3 failed");
+                }
+            }
        } catch ( IOException e ) {
            exceptDuringRunReport("Couldn't read report file", e);
+        } catch ( InterruptedException e) {
+            exceptDuringRunReport("Run statistics report upload interrupted", e);
        }
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java
@ -163,43 +163,58 @@ public class VariantContextAdaptors {
        @Override        
        public VariantContext convert(String name, Object input, ReferenceContext ref) {
            OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
-            if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
-                return null;
-            Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true);

-            if ( isSNP(dbsnp) || isIndel(dbsnp) || isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
-                // add the reference allele
-                List<Allele> alleles = new ArrayList<Allele>();
-                alleles.add(refAllele);
+            int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
+            if ( index < 0 )
+                return null; // we weren't given enough reference context to create the VariantContext

-                // add all of the alt alleles
-                boolean sawNullAllele = refAllele.isNull();
-                for ( String alt : getAlternateAlleleList(dbsnp) ) {
-                    if ( ! Allele.acceptableAlleleBases(alt) ) {
-                        //System.out.printf("Excluding dbsnp record %s%n", dbsnp);
-                        return null;
-                    }
-                    Allele altAllele = Allele.create(alt, false);
-                    alleles.add(altAllele);
-                    if ( altAllele.isNull() )
-                        sawNullAllele = true;
-                }
+            final byte refBaseForIndel = ref.getBases()[index];

-                Map<String, Object> attributes = new HashMap<String, Object>();
-
-                int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
-                if ( index < 0 )
-                    return null; // we weren't given enough reference context to create the VariantContext
-                Byte refBaseForIndel = new Byte(ref.getBases()[index]);
-
-                final VariantContextBuilder builder = new VariantContextBuilder();
-                builder.source(name).id(dbsnp.getRsID());
-                builder.loc(dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0));
-                builder.alleles(alleles);
-                builder.referenceBaseForIndel(refBaseForIndel);
-                return builder.make();
-            } else
+            boolean addPaddingBase;
+            if ( isSNP(dbsnp) || isMNP(dbsnp) )
+                addPaddingBase = false;
+            else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") )
+                addPaddingBase = VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
+            else
                return null; // can't handle anything else
+
+            Allele refAllele;
+            if ( dbsnp.getNCBIRefBase().equals("-") )
+                refAllele = Allele.create(refBaseForIndel, true);
+            else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
+                return null;
+            else
+                refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true);
+
+            final List<Allele> alleles = new ArrayList<Allele>();
+            alleles.add(refAllele);
+
+            // add all of the alt alleles
+            for ( String alt : getAlternateAlleleList(dbsnp) ) {
+                if ( Allele.wouldBeNullAllele(alt.getBytes()))
+                    alt = "";
+                else if ( ! Allele.acceptableAlleleBases(alt) )
+                    return null;
+
+                alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false));
+            }
+
+            final VariantContextBuilder builder = new VariantContextBuilder();
+            builder.source(name).id(dbsnp.getRsID());
+            builder.loc(dbsnp.getChr(), dbsnp.getStart() - (addPaddingBase ? 1 : 0), dbsnp.getEnd() - (addPaddingBase && refAllele.length() == 1 ? 1 : 0));
+            builder.alleles(alleles);
+            return builder.make();
+        }
+
+        private static List<String> stripNullDashes(final List<String> alleles) {
+            final List<String> newAlleles = new ArrayList<String>(alleles.size());
+            for ( final String allele : alleles ) {
+                if ( allele.equals("-") )
+                    newAlleles.add("");
+                else
+                    newAlleles.add(allele);
+            }
+            return newAlleles;
        }
    }

@ -294,7 +309,6 @@ public class VariantContextAdaptors {
            int index = hapmap.getStart() - ref.getWindow().getStart();
            if ( index < 0 )
                return null; // we weren't given enough reference context to create the VariantContext
-            Byte refBaseForIndel = new Byte(ref.getBases()[index]);

            HashSet<Allele> alleles = new HashSet<Allele>();
            Allele refSNPAllele = Allele.create(ref.getBase(), true);
@ -351,7 +365,7 @@ public class VariantContextAdaptors {
            long end = hapmap.getEnd();
            if ( deletionLength > 0 )
                end += deletionLength;
-            VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).referenceBaseForIndel(refBaseForIndel).make();
+            VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make();
            return vc;
       }
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
@ -89,9 +89,9 @@ public class GATKReport {
            reader = new BufferedReader(new FileReader(file));
            reportHeader = reader.readLine();
        } catch (FileNotFoundException e) {
-            throw new ReviewedStingException("Could not open file : " + file);
+            throw new UserException.CouldNotReadInputFile(file, "it does not exist");
        } catch (IOException e) { 
-            throw new ReviewedStingException("Could not read file : " + file);                            
+            throw new UserException.CouldNotReadInputFile(file, e);
        }   


--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
@ -208,11 +208,23 @@ public class GATKReportTable {
    }

    /**
-     * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
-     *
-     * @param name the name of the table or column
-     * @return true if the name is valid, false if otherwise
+     * Create a new GATKReportTable with the same structure
+     * @param tableToCopy
     */
+    public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) {
+        this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID);
+        for ( final GATKReportColumn column : tableToCopy.getColumnInfo() )
+            addColumn(column.getColumnName(), column.getFormat());
+        if ( copyData )
+            throw new IllegalArgumentException("sorry, copying data in GATKReportTable isn't supported");
+    }
+
+        /**
+        * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
+        *
+        * @param name the name of the table or column
+        * @return true if the name is valid, false if otherwise
+        */
    private boolean isValidName(String name) {
        Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX);
        Matcher m = p.matcher(name);
@ -490,6 +502,17 @@ public class GATKReportTable {
        return get(rowIdToIndex.get(rowID), columnNameToIndex.get(columnName));
    }

+    /**
+     * Get a value from the given position in the table
+     *
+     * @param rowIndex       the row ID
+     * @param columnName  the name of the column
+     * @return the value stored at the specified position in the table
+     */
+    public Object get(final int rowIndex, final String columnName) {
+        return get(rowIndex, columnNameToIndex.get(columnName));
+    }
+
    /**
     * Get a value from the given position in the table
     *
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@ -6,11 +6,14 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.datasources.providers.*;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.*;
+import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension;
+import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
+import org.broadinstitute.sting.gatk.walkers.DataSource;
+import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
-import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
 import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
+import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

@ -26,9 +29,9 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
    /**
     * our log, which we want to capture anything from this class
     */
-    protected static Logger logger = Logger.getLogger(TraversalEngine.class);
+    protected final static Logger logger = Logger.getLogger(TraversalEngine.class);

-    private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
+    private final LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion> workQueue = new LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion>();
    private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();

    @Override
@ -67,8 +70,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                    for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) {
                        final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
                        if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
-                            final double isActiveProb = ( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 );
-                            profile.add(fakeLoc, isActiveProb);
+                            profile.add(fakeLoc, new ActivityProfileResult( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ));
                        }
                    }
                }
@ -84,8 +86,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio

                // Call the walkers isActive function for this locus and add them to the list to be integrated later
                if( initialIntervals == null || initialIntervals.overlaps( location ) ) {
-                    final double isActiveProb = walkerActiveProb(walker, tracker, refContext, locus, location);
-                    profile.add(location, isActiveProb);
+                    profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location));
                }

                // Grab all the previously unseen reads from this pileup and add them to the massive read list
@ -109,18 +110,18 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
            // add these blocks of work to the work queue
            // band-pass filter the list of isActive probabilities and turn into active regions
            final ActivityProfile bandPassFiltered = profile.bandPassFilter();
-            final List<ActiveRegion> activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize );
+            final List<org.broadinstitute.sting.utils.activeregion.ActiveRegion> activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize );

            // add active regions to queue of regions to process
            // first check if can merge active regions over shard boundaries
            if( !activeRegions.isEmpty() ) {
                if( !workQueue.isEmpty() ) {
-                    final ActiveRegion last = workQueue.getLast();
-                    final ActiveRegion first = activeRegions.get(0);
+                    final org.broadinstitute.sting.utils.activeregion.ActiveRegion last = workQueue.getLast();
+                    final org.broadinstitute.sting.utils.activeregion.ActiveRegion first = activeRegions.get(0);
                    if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
                        workQueue.removeLast();
                        activeRegions.remove(first);
-                        workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) );
+                        workQueue.add( new org.broadinstitute.sting.utils.activeregion.ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) );
                    }
                }
                workQueue.addAll( activeRegions );
@ -142,11 +143,11 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
    //
    // --------------------------------------------------------------------------------

-    private final double walkerActiveProb(final ActiveRegionWalker<M,T> walker,
+    private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
                                          final RefMetaDataTracker tracker, final ReferenceContext refContext,
                                          final AlignmentContext locus, final GenomeLoc location) {
        if ( walker.hasPresetActiveRegions() ) {
-            return walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0;
+            return new ActivityProfileResult(walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
        } else {
            return walker.isActive( tracker, refContext, locus );
        }
@ -183,7 +184,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
     */
    private void writeActiveRegionsToStream( final ActiveRegionWalker<M,T> walker ) {
        // Just want to output the active regions to a file, not actually process them
-        for( final ActiveRegion activeRegion : workQueue ) {
+        for( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion : workQueue ) {
            if( activeRegion.isActive ) {
                walker.activeRegionOutStream.println( activeRegion.getLocation() );
            }
@ -196,7 +197,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
        while( workQueue.peek() != null ) {
            final GenomeLoc extendedLoc = workQueue.peek().getExtendedLoc();
            if ( extendedLoc.getStop() < minStart || (currentContig != null && !workQueue.peek().getExtendedLoc().getContig().equals(currentContig))) {
-                final ActiveRegion activeRegion = workQueue.remove();
+                final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion = workQueue.remove();
                sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
            } else {
                break;
@ -206,15 +207,15 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
        return sum;
    }

-    private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet<GATKSAMRecord> reads, final Queue<ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
+    private T processActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final LinkedHashSet<GATKSAMRecord> reads, final Queue<org.broadinstitute.sting.utils.activeregion.ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
        final ArrayList<GATKSAMRecord> placedReads = new ArrayList<GATKSAMRecord>();
        for( final GATKSAMRecord read : reads ) {
            final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
            if( activeRegion.getLocation().overlapsP( readLoc ) ) {
                // The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
                long maxOverlap = activeRegion.getLocation().sizeOfOverlap( readLoc );
-                ActiveRegion bestRegion = activeRegion;
-                for( final ActiveRegion otherRegionToTest : workQueue ) {
+                org.broadinstitute.sting.utils.activeregion.ActiveRegion bestRegion = activeRegion;
+                for( final org.broadinstitute.sting.utils.activeregion.ActiveRegion otherRegionToTest : workQueue ) {
                    if( otherRegionToTest.getLocation().sizeOfOverlap(readLoc) >= maxOverlap ) {
                        maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( readLoc );
                        bestRegion = otherRegionToTest;
@ -227,7 +228,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                    if( !bestRegion.equals(activeRegion) ) {
                        activeRegion.add( read );
                    }
-                    for( final ActiveRegion otherRegionToTest : workQueue ) {
+                    for( final org.broadinstitute.sting.utils.activeregion.ActiveRegion otherRegionToTest : workQueue ) {
                        if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
                            otherRegionToTest.add( read );
                        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
@ -12,7 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
-import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
+import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
 import org.broadinstitute.sting.utils.interval.IntervalSetRule;
 import org.broadinstitute.sting.utils.interval.IntervalUtils;
@ -32,6 +32,7 @@ import java.util.List;
@PartitionBy(PartitionType.READ)
@ActiveRegionExtension(extension=50,maxRegion=1500)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
+@RemoveProgramRecords
 public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {

    @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
@ -73,10 +74,10 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
    }

    // Determine probability of active status over the AlignmentContext
-    public abstract double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
+    public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);

    // Map over the ActiveRegion
-    public abstract MapType map(final ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);
+    public abstract MapType map(final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);

    public final GenomeLocSortedSet extendIntervals( final GenomeLocSortedSet intervals, final GenomeLocParser genomeLocParser, IndexedFastaSequenceFile reference ) {
        final int activeRegionExtension = this.getClass().getAnnotation(ActiveRegionExtension.class).extension();
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java
@ -33,6 +33,7 @@ import org.broadinstitute.sting.commandline.Advanced;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.clipping.ClippingOp;
 import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
 import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.io.File;
@ -148,8 +150,9 @@ import java.util.regex.Pattern;
 * @author Mark DePristo
 * @since 2010
 */
+@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS})
-public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithData, ClipReadsWalker.ClippingData> {
+public class ClipReads extends ReadWalker<ClipReads.ReadClipperWithData, ClipReads.ClippingData> {
    /**
     * If provided, ClipReads will write summary statistics about the clipping operations applied
     * to the reads to this file.
@ -571,7 +574,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
        }
    }

-    public class ReadClipperWithData extends ReadClipper {
+    public static class ReadClipperWithData extends ReadClipper {
        private ClippingData data;

        public ReadClipperWithData(GATKSAMRecord read, List<SeqToClip> clipSeqs) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java
@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers;
-
-import net.sf.samtools.SAMFileWriter;
-import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.Output;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
-import org.broadinstitute.sting.utils.baq.BAQ;
-import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
-import org.broadinstitute.sting.utils.text.XReadLines;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.util.HashSet;
-import java.util.Set;
-
-/**
- * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear
- * in the input file.  It can dynamically merge the contents of multiple input BAM files, resulting
- * in merged output sorted in coordinate order.  Can also optionally filter reads based on the --read-filter
- * command line argument.
- */
-@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
-@Requires({DataSource.READS, DataSource.REFERENCE})
-public class FindReadsWithNamesWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
-    /** an optional argument to dump the reads out to a BAM file */
-    @Output(doc="Write output to this BAM filename instead of STDOUT")
-    SAMFileWriter out;
-    @Argument(fullName = "readNamesToKeep", shortName = "rn", doc="names to keep", required = true)
-    File readNamesFile = null;
-
-    Set<String> namesToKeep;
-
-
-    /**
-     * The initialize function.
-     */
-    public void initialize() {
-        try {
-            namesToKeep = new HashSet<String>(new XReadLines(readNamesFile).readLines());
-        } catch (FileNotFoundException e) {
-            throw new UserException.CouldNotReadInputFile(readNamesFile, e);
-        }
-    }
-
-    /**
-     * The reads filter function.
-     *
-     * @param ref the reference bases that correspond to our read, if a reference was provided
-     * @param read the read itself, as a SAMRecord
-     * @return true if the read passes the filter, false if it doesn't
-     */
-    public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
-        return namesToKeep.contains(read.getReadName());
-	}
-
-    /**
-     * The reads map function.
-     *
-     * @param ref the reference bases that correspond to our read, if a reference was provided
-     * @param read the read itself, as a SAMRecord
-     * @return the read itself
-     */
-    public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
-        return read;
-    }
-
-    /**
-     * reduceInit is called once before any calls to the map function.  We use it here to setup the output
-     * bam file, if it was specified on the command line
-     * @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise
-     */
-    public SAMFileWriter reduceInit() {
-        return out;
-    }
-
-    /**
-     * given a read and a output location, reduce by emitting the read
-     * @param read the read itself
-     * @param output the output source
-     * @return the SAMFileWriter, so that the next reduce can emit to the same source
-     */
-    public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) {
-        output.addAlignment(read);
-        return output;
-    }
-
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java
@ -1,8 +1,10 @@
 package org.broadinstitute.sting.gatk.walkers;

 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.io.PrintStream;
@ -41,13 +43,14 @@ import java.text.NumberFormat;
 * reads with QC failure flag set, number of duplicates, percentage mapped, etc.
 * @author aaron
 */
+@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS})
-public class FlagStatWalker extends ReadWalker<Integer, Integer> {
+public class FlagStat extends ReadWalker<Integer, Integer> {
    @Output
    PrintStream out;

    // what comes out of the flagstat
-    static class FlagStat {
+    static class FlagStatus {
        long readCount = 0L;
        long QC_failure = 0L;
        long duplicates = 0L;
@ -117,7 +120,7 @@ public class FlagStatWalker extends ReadWalker<Integer, Integer> {
    }


-    private FlagStat myStat = new FlagStat();
+    private FlagStatus myStat = new FlagStatus();

    public Integer map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
        myStat.readCount++;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
@ -19,6 +19,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
@PartitionBy(PartitionType.LOCUS)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
+@RemoveProgramRecords
 public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
    // Do we actually want to operate on the context?
    public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java
@ -30,10 +30,12 @@ import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

@ -60,7 +62,8 @@ import java.util.List;
 * Associated command:
 * samtools pileup [-f in.ref.fasta] [-t in.ref_list] [-l in.site_list] [-iscg] [-T theta] [-N nHap] [-r pairDiffRate] <in.alignment>
 */
-public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeReducible<Integer> {
+@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
+public class Pileup extends LocusWalker<Integer, Integer> implements TreeReducible<Integer> {
    @Output
    PrintStream out;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java
@ -29,9 +29,11 @@ import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;

 import java.io.PrintStream;

@ -39,7 +41,8 @@ import java.io.PrintStream;
 * Prints out all of the RODs in the input data set. Data is rendered using the toString() method
 * of the given ROD.
 */
-public class PrintRODsWalker extends RodWalker<Integer, Integer> {
+@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
+public class PrintRODs extends RodWalker<Integer, Integer> {
    @Input(fullName="input", shortName = "input", doc="The input ROD which should be printed out.", required=true)
    public RodBinding<Feature> input;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
@ -29,11 +29,13 @@ import net.sf.samtools.SAMFileWriter;
 import net.sf.samtools.SAMReadGroupRecord;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.baq.BAQ;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.io.File;
@ -88,9 +90,10 @@ import java.util.TreeSet;
 * </pre>
 *
 */
+@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
@Requires({DataSource.READS, DataSource.REFERENCE})
-public class PrintReadsWalker extends ReadWalker<GATKSAMRecord, SAMFileWriter> {
+public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> {

    @Output(doc="Write output to this BAM filename instead of STDOUT", required = true)
    SAMFileWriter out;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java
@ -0,0 +1,21 @@
+package org.broadinstitute.sting.gatk.walkers;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: thibault
+ * Date: 8/2/12
+ * Time: 1:58 PM
+ * To change this template use File | Settings | File Templates.
+ */
+
+import java.lang.annotation.*;
+
+/**
+ * Indicates that program records should be removed from SAM headers by default for this walker
+ */
+@Documented
+@Inherited
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface RemoveProgramRecords {
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java
@ -31,8 +31,10 @@ import net.sf.samtools.SAMReadGroupRecord;
 import net.sf.samtools.SAMRecord;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;

@ -45,16 +47,17 @@ import java.util.Map;
 * Divides the input data set into separate BAM files, one for each sample in the input data set.  The split
 * files are named concatenating the sample name to the end of the provided outputRoot command-line argument.
 */
+@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@WalkerName("SplitSamFile")
@Requires({DataSource.READS})
-public class SplitSamFileWalker extends ReadWalker<SAMRecord, Map<String, SAMFileWriter>> {
+public class SplitSamFile extends ReadWalker<SAMRecord, Map<String, SAMFileWriter>> {
    @Argument(fullName="outputRoot", doc="output BAM file", required=false)
    public String outputRoot = null;

    @Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false)
    public Integer BAMcompression = 5;    

-    private static Logger logger = Logger.getLogger(SplitSamFileWalker.class);
+    private static Logger logger = Logger.getLogger(SplitSamFile.class);
    private static String VERSION = "0.0.1";

    public void initialize() {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java
@ -49,10 +49,7 @@ import java.util.List;
@ReadFilters(MalformedReadFilter.class)
@PartitionBy(PartitionType.NONE)
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
-@DocumentedGATKFeature(
-        groupName = "GATK walkers",
-        summary = "General tools available for running on the command line as part of the GATK package",
-        extraDocs = {CommandLineGATK.class})
+@DocumentedGATKFeature(groupName = "Uncategorized", extraDocs = {CommandLineGATK.class})
 public abstract class Walker<MapType, ReduceType> {
    final protected static Logger logger = Logger.getLogger(Walker.class);
    private GenomeAnalysisEngine toolkit;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@ -51,7 +51,7 @@ public class AlleleBalance extends InfoFieldAnnotation {


    char[] BASES = {'A','C','G','T'};
-    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
+    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
        if ( stratifiedContexts.size() == 0 )
            return null;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
 import org.broadinstitute.sting.utils.MathUtils;
@ -14,7 +14,9 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;

-import java.util.*;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;


 /**
@ -22,7 +24,7 @@ import java.util.*;
 */
 public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {

-    public void annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, final GenotypeBuilder gb) {
+    public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, final GenotypeBuilder gb) {
        Double ratio = annotateSNP(stratifiedContext, vc, g);
        if (ratio == null)
            return;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java
@ -34,7 +34,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@ -52,7 +52,7 @@ import java.util.Map;
 */
 public class BaseCounts extends InfoFieldAnnotation {

-    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
+    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
        if ( stratifiedContexts.size() == 0 )
            return null;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java
@ -1,5 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers.annotator;

+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
 import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
@ -15,7 +16,7 @@ import java.util.*;
 * The u-based z-approximation from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele).
 * Note that the base quality rank sum test can not be calculated for homozygous sites.
 */
-public class BaseQualityRankSumTest extends RankSumTest {
+public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnotation {
    public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }

    public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("BaseQRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities")); }
@ -64,12 +65,12 @@ public class BaseQualityRankSumTest extends RankSumTest {
                // by design, first element in LinkedHashMap was ref allele
                double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;

-                for (Allele a : el.keySet()) {
+                for (Map.Entry<Allele, Double> entry : el.entrySet()) {

-                    if (a.isReference())
-                        refLikelihood =el.get(a);
+                    if (entry.getKey().isReference())
+                        refLikelihood = entry.getValue();
                    else {
-                        double like = el.get(a);
+                        double like = entry.getValue();
                        if (like >= altLikelihood)
                            altLikelihood = like;
                    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java
@ -29,18 +29,17 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.gatk.walkers.Walker;
-import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;

@ -62,14 +61,14 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn

    private Set<String> founderIds = new HashSet<String>();

-    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
+    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
        if ( ! vc.hasGenotypes() )
            return null;

        return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true,founderIds);
    }

-    public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ){
+    public void initialize ( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ){
        //If families were given, get the founders ids
        founderIds = ((Walker)walker).getSampleDB().getFounderIds();
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java
@ -18,7 +18,7 @@ import java.util.*;
 * Date: 6/28/12
 */

-public class ClippingRankSumTest /*extends RankSumTest*/ {
+public class ClippingRankSumTest extends RankSumTest {

    public List<String> getKeyNames() { return Arrays.asList("ClippingRankSum"); }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
@ -4,7 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
@ -38,7 +38,7 @@ import java.util.Map;
 */
 public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {

-    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
+    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
        if ( stratifiedContexts.size() == 0 )
            return null;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
-import org.broadinstitute.sting.utils.codecs.vcf.*;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -14,7 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;

-import java.util.*;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;


 /**
@ -38,21 +42,17 @@ import java.util.*;
 */
 public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {

-    private static final String REF_ALLELE = "REF";
-
-    private static final String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
-
-    public void annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) {
+    public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) {
        if ( g == null || !g.isCalled() )
            return;

        if ( vc.isSNP() )
            annotateSNP(stratifiedContext, vc, gb);
        else if ( vc.isIndel() )
-            annotateIndel(stratifiedContext, vc, gb);
+            annotateIndel(stratifiedContext, ref.getBase(), vc, gb);
    }

-    private void annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
+    private void annotateSNP(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) {

        HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
        for ( Allele allele : vc.getAlleles() )
@ -73,62 +73,47 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
        gb.AD(counts);
    }

-    private void annotateIndel(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
+    private void annotateIndel(final AlignmentContext stratifiedContext, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) {
        ReadBackedPileup pileup = stratifiedContext.getBasePileup();
        if ( pileup == null )
            return;

-        final HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
-        alleleCounts.put(REF_ALLELE, 0);
+        final HashMap<Allele, Integer> alleleCounts = new HashMap<Allele, Integer>();
        final Allele refAllele = vc.getReference();

-        for ( Allele allele : vc.getAlternateAlleles() ) {
-
-            if ( allele.isNoCall() ) {
-                continue; // this does not look so good, should we die???
-            }
-
-            alleleCounts.put(getAlleleRepresentation(allele), 0);
+        for ( final Allele allele : vc.getAlleles() ) {
+            alleleCounts.put(allele, 0);
        }

        for ( PileupElement p : pileup ) {
            if ( p.isBeforeInsertion() ) {

-                final String b = p.getEventBases();
-                if ( alleleCounts.containsKey(b) ) {
-                    alleleCounts.put(b, alleleCounts.get(b)+1);
+                final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false);
+                if ( alleleCounts.containsKey(insertion) ) {
+                    alleleCounts.put(insertion, alleleCounts.get(insertion)+1);
                }

            } else if ( p.isBeforeDeletionStart() ) {
-                    if ( p.getEventLength() == refAllele.length() ) {
-                        // this is indeed the deletion allele recorded in VC
-                        final String b = DEL;
-                        if ( alleleCounts.containsKey(b) ) {
-                            alleleCounts.put(b, alleleCounts.get(b)+1);
-                        }
+                if ( p.getEventLength() == refAllele.length() - 1 ) {
+                    // this is indeed the deletion allele recorded in VC
+                    final Allele deletion = Allele.create(refBase);
+                    if ( alleleCounts.containsKey(deletion) ) {
+                        alleleCounts.put(deletion, alleleCounts.get(deletion)+1);
                    }
+                }
            } else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) {
-                alleleCounts.put(REF_ALLELE, alleleCounts.get(REF_ALLELE)+1);
+                alleleCounts.put(refAllele, alleleCounts.get(refAllele)+1);
            }
        }

-        int[] counts = new int[alleleCounts.size()];
-        counts[0] = alleleCounts.get(REF_ALLELE);
+        final int[] counts = new int[alleleCounts.size()];
+        counts[0] = alleleCounts.get(refAllele);
        for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
-            counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) );
+            counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) );

        gb.AD(counts);
    }

-    private String getAlleleRepresentation(Allele allele) {
-        if ( allele.isNull() ) { // deletion wrt the ref
-             return DEL;
-        } else { // insertion, pass actual bases
-            return allele.getBaseString();
-        }
-
-    }
-
 //   public String getIndelBases()
    public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }

--- a/Show More
+++ b/Show More