Merge pull request #1186 from broadinstitute/ms_RankSum

Removes Dithering from Rank Sum Test
2016-02-29 14:10:37 -05:00 · 2016-02-29 14:10:37 -05:00 · 32bd02aa9d
parent 3b327ac0e4 c7e0f5b225
commit 32bd02aa9d
17 changed files with 677 additions and 650 deletions
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AS_RankSumTest.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AS_RankSumTest.java
@ -59,11 +59,7 @@ import org.apache.log4j.Logger;
 import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
 import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
 import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ReducibleAnnotation;
-import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller;
-import org.broadinstitute.gatk.tools.walkers.variantutils.CombineGVCFs;
-import org.broadinstitute.gatk.tools.walkers.variantutils.GenotypeGVCFs;
 import org.broadinstitute.gatk.utils.MannWhitneyU;
-import org.broadinstitute.gatk.utils.collections.Pair;
 import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
 import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
 import org.broadinstitute.gatk.utils.exceptions.GATKException;
@ -298,14 +294,16 @@ public abstract class AS_RankSumTest extends RankSumTest implements ReducibleAnn
        for (final Allele alt : perAlleleValues.keySet()) {
            if (alt.equals(ref, false))
                continue;
-            final MannWhitneyU mannWhitneyU = new MannWhitneyU(useDithering);
-            //load alts
+            final MannWhitneyU mannWhitneyU = new MannWhitneyU();
+            //load alts (series 1)
+            List<Double> alts = new ArrayList<>();
            for (final Number qual : perAlleleValues.get(alt)) {
-                mannWhitneyU.add(qual, MannWhitneyU.USet.SET1);
+                alts.add((double) qual.intValue());
            }
-            //load refs
+            //load refs (series 2)
+            List<Double> refs = new ArrayList<>();
            for (final Number qual : perAlleleValues.get(ref)) {
-                mannWhitneyU.add(qual, MannWhitneyU.USet.SET2);
+                refs.add((double) qual.intValue());
            }

            if (DEBUG) {
@ -320,8 +318,8 @@ public abstract class AS_RankSumTest extends RankSumTest implements ReducibleAnn

            }
            // we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases)
-            final Pair<Double, Double> testResults = mannWhitneyU.runOneSidedTest(MannWhitneyU.USet.SET1);
-            perAltRankSumResults.put(alt, testResults.first);
+            final MannWhitneyU.Result result = mannWhitneyU.test(convertToArray(alts), convertToArray(refs), MannWhitneyU.TestType.FIRST_DOMINATES);
+            perAltRankSumResults.put(alt, result.getZ());
        }
        return perAltRankSumResults;
    }
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/RankSumTest.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/RankSumTest.java
@ -51,7 +51,6 @@

 package org.broadinstitute.gatk.tools.walkers.annotator;

-import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
 import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.*;
 import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
 import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
@ -61,8 +60,6 @@ import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
 import org.broadinstitute.gatk.utils.MannWhitneyU;
 import org.broadinstitute.gatk.utils.QualityUtils;
 import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
-import htsjdk.variant.vcf.VCFHeaderLine;
-import org.broadinstitute.gatk.utils.collections.Pair;
 import org.broadinstitute.gatk.utils.pileup.PileupElement;
 import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
 import htsjdk.variant.variantcontext.Allele;
@ -79,7 +76,6 @@ import java.util.*;
 //TODO: will eventually implement ReducibleAnnotation in order to preserve accuracy for CombineGVCFs and GenotypeGVCFs -- see RMSAnnotation.java for an example of an abstract ReducibleAnnotation
 public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveRegionBasedAnnotation {
    static final boolean DEBUG = false;
-    protected boolean useDithering = true;

    public Map<String, Object> annotate(final RefMetaDataTracker tracker,
                                        final AnnotatorCompatible walker,
@ -122,13 +118,7 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
        if ( refQuals.isEmpty() && altQuals.isEmpty() )
            return null;

-        final MannWhitneyU mannWhitneyU = new MannWhitneyU(useDithering);
-        for (final Double qual : altQuals) {
-            mannWhitneyU.add(qual, MannWhitneyU.USet.SET1);
-        }
-        for (final Double qual : refQuals) {
-            mannWhitneyU.add(qual, MannWhitneyU.USet.SET2);
-        }
+        final MannWhitneyU mannWhitneyU = new MannWhitneyU();

        if (DEBUG) {
            System.out.format("%s, REF QUALS:", this.getClass().getName());
@ -142,14 +132,26 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR

        }
        // we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases)
-        final Pair<Double, Double> testResults = mannWhitneyU.runOneSidedTest(MannWhitneyU.USet.SET1);
+        final MannWhitneyU.Result result = mannWhitneyU.test(convertToArray(altQuals), convertToArray(refQuals), MannWhitneyU.TestType.FIRST_DOMINATES);
+        final double zScore = result.getZ();
+

        final Map<String, Object> map = new HashMap<>();
-        if (!Double.isNaN(testResults.first))
-            map.put(getKeyNames().get(0), String.format("%.3f", testResults.first));
+        if (!Double.isNaN(zScore))
+            map.put(getKeyNames().get(0), String.format("%.3f", zScore));
        return map;
    }

+    public static double[] convertToArray(List<Double> list){
+        double[] ret = new double[list.size()];
+        Iterator<Double> iterator = list.iterator();
+        for (int i = 0; i < ret.length; i++)
+        {
+            ret[i] = iterator.next().doubleValue();
+        }
+        return ret;
+    }
+
    private void fillQualsFromPileup(final List<Allele> alleles,
                                     final ReadBackedPileup pileup,
                                     final List<Double> refQuals,
@ -255,14 +257,4 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
                read.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE );
    }

-    /**
-     * Initialize the rank sum test annotation using walker and engine information. Right now this checks to see if
-     * engine randomization is turned off, and if so does not dither.
-     * @param walker            the walker
-     * @param toolkit           the GATK engine
-     * @param headerLines       the header lines
-     */
-    public void initialize ( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
-        useDithering = ! toolkit.getArguments().disableDithering;
-    }
 }
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/RankSumUnitTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/RankSumUnitTest.java
@ -65,7 +65,7 @@ import java.util.List;

 public class RankSumUnitTest {

-    List<Integer> distribution20, distribution30, distribution20_40;
+    List<Double> distribution20, distribution30, distribution20_40;
    static final int observations = 100;

    @BeforeClass
@ -86,11 +86,11 @@ public class RankSumUnitTest {
        Collections.shuffle(distribution20_40, Utils.getRandomGenerator());
    }

-    private static void makeDistribution(final List<Integer> result, final int target, final int skew, final int numObservations) {
+    private static void makeDistribution(final List<Double> result, final int target, final int skew, final int numObservations) {
        final int rangeStart = target - skew;
        final int rangeEnd = target + skew;

-        int current = rangeStart;
+        double current = rangeStart;
        for ( int i = 0; i < numObservations; i++ ) {
            result.add(current++);
            if ( current > rangeEnd )
@ -118,40 +118,33 @@ public class RankSumUnitTest {
    }

    @Test(enabled = true, dataProvider = "DistributionData")
-    public void testDistribution(final List<Integer> distribution1, final List<Integer> distribution2, final int numToReduceIn2, final boolean distributionsShouldBeEqual, final String debugString) {
-        final MannWhitneyU mannWhitneyU = new MannWhitneyU(true);
+    public void testDistribution(final List<Double> distribution1, final List<Double> distribution2, final int numToReduceIn2, final boolean distributionsShouldBeEqual, final String debugString) {
+        final MannWhitneyU mannWhitneyU = new MannWhitneyU();

-        for ( final Integer num : distribution1 )
-            mannWhitneyU.add(num, MannWhitneyU.USet.SET1);
-
-        final List<Integer> dist2 = new ArrayList<>(distribution2);
+        final List<Double> dist2 = new ArrayList<>(distribution2);
        if ( numToReduceIn2 > 0 ) {
-            int counts = 0;
-            int quals = 0;
+            Double counts = 0.0;
+            Double quals = 0.0;

            for ( int i = 0; i < numToReduceIn2; i++ ) {
                counts++;
                quals += dist2.remove(0);
            }

-            final int qual = quals / counts;
+            final Double qual = quals / counts;
            for ( int i = 0; i < numToReduceIn2; i++ )
                dist2.add(qual);
        }

-        for ( final Integer num : dist2 )
-            mannWhitneyU.add(num, MannWhitneyU.USet.SET2);
-
-        final Double result = mannWhitneyU.runTwoSidedTest().second;
+        final Double result = mannWhitneyU.test(RankSumTest.convertToArray(distribution1),RankSumTest.convertToArray(dist2), MannWhitneyU.TestType.TWO_SIDED).getP();
        Assert.assertFalse(Double.isNaN(result));

        if ( distributionsShouldBeEqual ) {
-            // TODO -- THIS IS THE FAILURE POINT OF USING REDUCED READS WITH RANK SUM TESTS
            if ( numToReduceIn2 >= observations / 2 )
                return;
-            Assert.assertTrue(result > 0.1, String.format("%f %d %d", result, numToReduceIn2, dist2.get(0)));
+            Assert.assertTrue(result > 0.1, String.format("%f %d %f", result, numToReduceIn2, dist2.get(0)));
        } else {
-            Assert.assertTrue(result < 0.01, String.format("%f %d %d", result, numToReduceIn2, dist2.get(0)));
+            Assert.assertTrue(result < 0.01, String.format("%f %d %f", result, numToReduceIn2, dist2.get(0)));
        }
    }
 }
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java
@ -100,7 +100,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testHasAnnotsAsking1() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
-                Arrays.asList("d6cd81fc2f483f29d44fbb27d1772841"));
+                Arrays.asList("832861ecdc6344cfb7097d02903ffd4d"));
        executeTest("test file has annotations, asking for annotations, #1", spec);
    }

@ -108,7 +108,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testHasAnnotsAsking2() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
-                Arrays.asList("300836de4e2c8424734d2ee0ca4261c1"));
+                Arrays.asList("5120f5f419cb32b37d5c56101f4c5da2"));
        executeTest("test file has annotations, asking for annotations, #2", spec);
    }

@ -134,7 +134,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testNoAnnotsAsking1() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
-                Arrays.asList("192f393da4e28aecf16112562e65083a"));
+                Arrays.asList("95a4be70c46496fb6791fdd0c3bcf8a3"));
        executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
    }

@ -142,7 +142,8 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testNoAnnotsAsking2() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
-                Arrays.asList("52baff55535f7c87545a7818052a2d5c"));
+                Arrays.asList("8436bf4acb81f2c03bb5ecf8a514d90a"));
+
        executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
    }

@ -150,7 +151,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testExcludeAnnotations() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + STANDARD_ANNOTATIONS + "-XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
-                Arrays.asList("334915d90fa92ee9fa07d4647912ceac"));
+                Arrays.asList("4995f77bedf7f476a15d46b5a8a392bd"));
        executeTest("test exclude annotations", spec);
    }

@ -183,7 +184,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testOverwritingHeader() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
-                Arrays.asList("ab84654ac412a0aaaec99e86e357f0fd"));
+                Arrays.asList("de96b1e62f414b107430d179a154534d"));
        executeTest("test overwriting header", spec);
    }

--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java
@ -74,12 +74,12 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe

    @Test(enabled = true)
    public void testBOTH_GGA_Pools() {
-        executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "217d9108c3014261dbe8befa383a2226");
+        executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "1b834608171f67e52a4e7617458a3ba6");
    }

    @Test(enabled = true)
    public void testINDEL_GGA_Pools() {
-        executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES  -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "6a7f00e7f26cbc1891f40c9ed8b6579b");
+        executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES  -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "5b9e08bb141c48f826dc513066cb8a13");
    }

    @Test(enabled = true)
@ -88,6 +88,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
        //TODO the old MD5 is kept for the record.
        //TODO this should be revisit once we get into addressing inaccuracies by the independent allele approach.
 //        executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "b5ff7530827f4b9039a58bdc8a3560d2");
-        executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "c0271a4c281991a86490c1955456af26");
+        executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "8449ea4fa85eac8834b049a0e3daee5a");
    }
 }
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java
@ -63,16 +63,16 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe

    @Test(enabled = true)
    public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
-        executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","c445bcd828c540c009bc8fc9f48916bc");
+        executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","2a0dcd92f3a07f45b7927e66404bc37e");
    }

    @Test(enabled = true)
    public void testMT_SNP_DISCOVERY_sp4() {
-        executor.PC_MT_Test(CEUTRIO_BAM, "-A AlleleCountBySample -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","8d8c7926ca9c68251c41eb03b8efaba2");
+        executor.PC_MT_Test(CEUTRIO_BAM, "-A AlleleCountBySample -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","b9f5d1db0d2f5eb00eeb72ea29130de6");
    }

    @Test(enabled = true)
    public void testMT_SNP_GGA_sp10() {
-        executor.PC_MT_Test(CEUTRIO_BAM, String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES  -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "34242c0ae59b0645fb6df5c322e92f01");
+        executor.PC_MT_Test(CEUTRIO_BAM, String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES  -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "d90e41081b4a910a50116ff18c311245");
    }
 }
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java
@ -78,7 +78,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
                        " -o %s" +
                        " -L 1:10,000,000-10,500,000",
                1,
-                Arrays.asList("b132da42e1017c8825d84a06cf79a1e9"));
+                Arrays.asList("f78975d22ac27a841d807765242d3c3f"));
        executeTest(String.format("test indel caller in SLX"), spec);
    }

@ -92,7 +92,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
                        " -minIndelCnt 1" +
                        " -L 1:10,000,000-10,100,000",
                1,
-                Arrays.asList("23e32822d81f198ee52676f24ef74343"));
+                Arrays.asList("75381df9490adf4206ebbfbe0f49226e"));

        executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
    }
@ -105,7 +105,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
                        " -o %s" +
                        " -L 1:10,000,000-10,500,000",
                1,
-                Arrays.asList("93580986b871890a4fc86ebf98877efa"));
+                Arrays.asList("e89c2a0d30a409087af25fc06fd46404"));

        executeTest(String.format("test indel calling, multiple technologies"), spec);
    }
@ -115,7 +115,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
                        "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
-                Arrays.asList("4da9878ee36d0fb387d5a58631383ffa"));
+                Arrays.asList("af5475e45b6ffca990dd035448e6b0fe"));
        executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
    }

@ -125,7 +125,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
                baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
                        + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
                        "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
-                Arrays.asList("9ab4f5306438b5a64cb2cb90020b7b82"));
+                Arrays.asList("f9b4ed435a53198d30199e8a5c98252d"));
        executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
    }

@ -140,7 +140,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
        WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
                baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
                        "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1,
-                Arrays.asList("6de8d740908e22b46785d5eba6278eb2"));
+                Arrays.asList("feef70172d1edf2f9782143c8bbbf1d0"));
        executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
    }

@ -181,7 +181,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
    public void testMinIndelFraction0() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                assessMinIndelFraction + " -minIndelFrac 0.0", 1,
-                Arrays.asList("56673c166ba25f625d026e593d5cc667"));
+                Arrays.asList("0369f9c03df86486709a35a5a2f23192"));
        executeTest("test minIndelFraction 0.0", spec);
    }

@ -189,7 +189,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
    public void testMinIndelFraction25() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                assessMinIndelFraction + " -minIndelFrac 0.25", 1,
-                Arrays.asList("2ce1754313561b9cb134152b8f98bf43"));
+                Arrays.asList("8bd1f903445255f24bd9ef677e1ba9a8"));
        executeTest("test minIndelFraction 0.25", spec);
    }

--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testMinBaseQualityScore() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
-                Arrays.asList("21369e50334d2b77b0e638e47e1b8c64"));
+                Arrays.asList("b4201a4d30b6ed5c6fc80248676be5ff"));
        executeTest("test min_base_quality_score 26", spec);
    }

@ -94,7 +94,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testSLOD() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
-                Arrays.asList("e58d9a5758c5b11f86558608260d93d5"));
+                Arrays.asList("45f867c73dc3008e874eb3c009cd9674"));
        executeTest("test SLOD", spec);
    }

@ -102,7 +102,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testNDA() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
-                Arrays.asList("fc8cdf9eeb475773303809c077f83c65"));
+                Arrays.asList("905e9a2ae183fc152101314774707372"));
        executeTest("test NDA", spec);
    }

@ -110,7 +110,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testCompTrack() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
-                Arrays.asList("c98294d321bde3e1e3c4fcee3e88d6d9"));
+                Arrays.asList("ac14f19fa1e9cecc8246814eac393662"));
        executeTest("test using comp track", spec);
    }

@ -124,17 +124,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {

    @Test
    public void testOutputParameterSitesOnly() {
-        testOutputParameters("-sites_only", "5b8938ed55a2b7ae8a52056c9130367b");
+        testOutputParameters("-sites_only", "be100d3f04592d0865d907177ee8c7e4");
    }

    @Test
    public void testOutputParameterAllConfident() {
-        testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "f353b36db7305f47963446220e39debe");
+        testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "9024aa3ccf7339b873444b792ff5cb91");
    }

    @Test
    public void testOutputParameterAllSites() {
-        testOutputParameters("--output_mode EMIT_ALL_SITES", "364aec53db79d20698fe0d088828736f");
+        testOutputParameters("--output_mode EMIT_ALL_SITES", "0d1e1fcc70fb4c159eac016f3085e5e9");
    }

    private void testOutputParameters(final String args, final String md5) {
@ -148,7 +148,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testConfidence() {
        WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
-                Arrays.asList("ec59b34bedf40d70850ab5ffe42bbddd"));
+                Arrays.asList("13d2cc1e0871f85a97fb1f73c052ea3e"));
        executeTest("test confidence 1", spec1);
    }

@ -156,7 +156,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testNoPrior() {
        WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -inputPrior 0.33333 -inputPrior 0.33333", 1,
-                Arrays.asList("7e8a51e658debdaadbcf17761ed011da"));
+                Arrays.asList("d94ab69be622fb23c97f524234eb05b3"));
        executeTest("test no prior 1", spec1);

    }
@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testUserPrior() {
        WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -inputPrior 0.001 -inputPrior 0.495", 1,
-                Arrays.asList("b3514f5b3510b6667fd2c85ecc529de7"));
+                Arrays.asList("2f3c5fa06537c8a3f440fb70837702f6"));
        executeTest("test user prior 1", spec1);

    }
@ -174,7 +174,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void emitPLsAtAllSites() {
        WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --output_mode EMIT_ALL_SITES -allSitePLs", 1,
-                Arrays.asList("6fd14930acb08f0dd9749a8c4d7df831"));
+                Arrays.asList("76f7a8508ad40cf35b68b07bc7e29a72"));
        // GDA: TODO: BCF encoder/decoder doesn't seem to support non-standard values in genotype fields. IE even if there is a field defined in FORMAT and in the header the BCF2 encoder will still fail
        spec1.disableShadowBCF();

@ -190,12 +190,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {

    @Test
    public void testHeterozyosity1() {
-        testHeterozosity( 0.01, "815f01ef28bf576beb5528ac6fdd5248" );
+        testHeterozosity( 0.01, "e3c708bfeea99835512c1fac477d02e4" );
    }

    @Test
    public void testHeterozyosity2() {
-        testHeterozosity( 1.0 / 1850, "8dd14ba8ef6314a99921849b2544b8c6" );
+        testHeterozosity( 1.0 / 1850, "1226aaa771b8f057646474d306a84866" );
    }

    private void testHeterozosity(final double arg, final String md5) {
@ -274,7 +274,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                        " -o %s" +
                        " -L 1:10,000,000-10,100,000",
                1,
-                Arrays.asList("34980dbce4fcd2aa21c46ea0e1897422"));
+                Arrays.asList("589f9dd567bb86ce74e5d48fd7cae0b2"));

        executeTest(String.format("test multiple technologies"), spec);
    }
@ -293,7 +293,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                        " -L 1:10,000,000-10,100,000" +
                        " -baq CALCULATE_AS_NECESSARY",
                1,
-                Arrays.asList("5f02a7449305b26aab3ff994dfb53fda"));
+                Arrays.asList("be978fc8807fcbd514b2678adcdc2a28"));

        executeTest(String.format("test calling with BAQ"), spec);
    }
@ -310,7 +310,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000 " +
                "-A SnpEff",
                1,
-                Arrays.asList("b61c0dece2d77544f9313c24191e0089"));
+                Arrays.asList("52bac28a85b85d9596d668d3f934f792"));

        executeTest("testSnpEffAnnotationRequestedWithoutRodBinding", spec);
    }
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java
@ -70,7 +70,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
    public void testMultiSamplePilot1() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
-                Arrays.asList("0f9eff0ad2f8cb8e277922bd037825f7"));
+                Arrays.asList("46dd44e69ff078a48057ff08d278a293"));
        executeTest("test MultiSample Pilot1", spec);
    }

@ -78,7 +78,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
    public void testWithAllelesPassedIn1() {
        WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
-                Arrays.asList("196979c91f84c01fb5d89e73339c09fa"));
+                Arrays.asList("a19e5f74606cd980c3083d41d23b4acb"));
        executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
    }

@ -86,7 +86,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
    public void testWithAllelesPassedIn2() {
        WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
                baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
-                Arrays.asList("391c4e1437a3fa5c99584b466a56d7bb"));
+                Arrays.asList("fe3aec195ceb9180b6ffc474a4bb37b3"));
        executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
    }

@ -94,7 +94,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
    public void testSingleSamplePilot2() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
-                Arrays.asList("41b1d92cdea4581f7843b714345547d6"));
+                Arrays.asList("21b346710ccdbbfc904eafda2b0c7443"));
        executeTest("test SingleSample Pilot2", spec);
    }

@ -102,7 +102,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
    public void testMultipleSNPAlleles() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
-                Arrays.asList("4ae7c090dfb85f6286f1187a746def58"));
+                Arrays.asList("03e6cb2c81775b94c81af45560062705"));
        executeTest("test Multiple SNP alleles", spec);
    }

@ -118,7 +118,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
    public void testReverseTrim() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
-                Arrays.asList("5c2c49bb5e276df933b2d264e9d4a327"));
+                Arrays.asList("710d308a32012911fc275042b612e3d0"));
        executeTest("test reverse trim", spec);
    }

@ -126,7 +126,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
    public void testMismatchedPLs() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
-                Arrays.asList("ed5f90897c9a348e4f861eff8992b4e2"));
+                Arrays.asList("55f9bd98155a7a4b768fba97f90de282"));
        executeTest("test mismatched PLs", spec);
    }
 }
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java
@ -72,7 +72,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa

    @Test
    public void testHaplotypeCallerMultiSampleComplex1() {
-        HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "066f1ce9e9826bcfedf6cd80bc560ab8");
+        HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "6be5c8cbedea0a571c98fb1f4718042f");
    }

    private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -96,13 +96,13 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
    @Test
    public void testHaplotypeCallerMultiSampleGGAComplex() {
        HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
-                "2a5de432f04198737732064206c7d63d");
+                "af3a490e5f41e890c59927426ac0fe9a");
    }

    @Test
    public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
        HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
-                "afaa41101f492d37f57bb18cc638c6bc");
+                "12b1826ee23243c64a002c3cbdfa569a");
    }

    private void HCTestComplexConsensusMode(String bam, String args, String md5) {
@ -114,7 +114,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
    @Test
    public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
        HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
-                "d42ba795fe346d8372cae3c00c9c2f23");
+                "9765065daf3a008bd92c755d882c5f07");
    }

 }
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java
@ -79,12 +79,12 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
        //TODO this might need to be addressed at some point.
        //TODO the following test is commented out for the record
        //tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "7f09c261950bf86e435edfa69ed2ec71"});
-        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "02300a1f64e085cc0f4420d8160743c1"});
-        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "f71ea433b1334a2d146cc6ad76b46d98"});
-        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "daddb5349c34e9190f0563e220894748"});
-        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "03e5815cc351f1ec2feed89d2aed8268"});
-        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "5d5cca382bdf6987b2aef87918ed374c"});
-        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "532888aff96c356397a21ef790636818"});
+        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "1c1ca2d76bc5d7dd45a5e7aef756ad8f"});
+        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7ddfd33f2efcd2617d896826434fb43c"});
+        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "30accd7a171e9a13969fa543fde3fed1"});
+        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "20d896bbf750739f937ccd2fb152d902"});
+        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "99dd66a5b36ba9e2d1b0f408ecfbb50b"});
+        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "72d90fe0f5641b373744f75a21d4d14c"});

        return tests.toArray(new Object[][]{});
    }
@ -98,13 +98,13 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
        final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;

        // this functionality can be adapted to provide input data for whatever you might want in your data
-        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "cfe629c5a3be3b6524258ad1f9145488"});
-        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "12fbfffa4bb2b8d520f8021a40b37d19"});
-        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "a5ea6d4052bbf9e8bba9011bc6f0d203"});
-        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "31da2254620f4a9c34ccf7c311cc133f"});
-        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "c95f52fe395392331dc3102902d54408"});
+        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "9b2914cf121c411aad3a65cfdc98c1d4"});
+        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "259110565155a3a27ab3e98113bedef8"});
+        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "8058ce63339701d33425769217bffed1"});
+        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "3abdde9d9ddbe9ec7cef28c65844a409"});
+        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "f5f0cef38a529e1542a8fd8bcce7ab1e"});

-        final String NA12878bandedResolutionMD5 = "fbe6099d138a069a65e4713bcae1e873";
+        final String NA12878bandedResolutionMD5 = "0f848ee7240ca48827bdfb85b455c1ad";
        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5});
        tests.add(new Object[]{NA12878_WEx + " -I " + privateTestDir + "NA20313.highCoverageRegion.bam -sn NA12878",
                ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5});
@ -121,12 +121,12 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
        final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;

        // this functionality can be adapted to provide input data for whatever you might want in your data
-        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "5088d6cf80a6da7c769f97b1ab44c745"});
-        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "0895f09731d0ef89ec131c9f75aafe70"});
-        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "195017f498d3508e6eee492eb00da97b"});
-        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "4f8e3e249509a24da21d5dd8e3594f92"});
-        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "ef96f5295b048ef31f5ba82d078a44a2"});
-        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "cd809158689ddbbfd18a4eaae016f9a0"});
+        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "1f99396292974df55ef6497be79b1917"});
+        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "eabb44785fd4a84ade3c674a0bda16d9"});
+        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "904feadac0e4a2a0c6b2cc7e55718f3b"});
+        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "b430a401310b3812346d7496f9c62011"});
+        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "6e481747c50734b7fb0c4de39405044f"});
+        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "cff5e49456d5ecc0dafd31cd014def4f"});

        return tests.toArray(new Object[][]{});
    }
@ -139,12 +139,12 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
        final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;

        // this functionality can be adapted to provide input data for whatever you might want in your data
-        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "bb85582fcc2ce45407640fa8a70421ab"});
-        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "e01603c5d4c3142978a91b8cd6a98618"});
-        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "b9e471935b960a7dba3eb2b13939ccaf"});
-        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "c724ecdaea7eab5a6239ff4daaa6e034"});
-        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "194d27bd2321ff1a8b895a4e9a8d2938"});
-        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "4a3dfcfc2f5d27b75725346d63e0b83a"});
+        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "50156c819a0096fa22ed1b9749affecc"});
+        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "d91aace2100dc040659d77f366053a2e"});
+        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "d86dc51757e69aa3f2608fbbfaa90069"});
+        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "27c73b8b1ec384a880bf60daf0b0f80e"});
+        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "a6c70203e43d62b42c4b751fe9018410"});
+        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "7ebbbcab78d090d70a24819093812748"});

        return tests.toArray(new Object[][]{});
    }
@ -270,7 +270,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
    public void testWrongGVCFNonVariantRecordOrderBugFix() {
        final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
                HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
-        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9b3b232ceeb109f2624826ea20825a82"));
+        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("a22bafa2f91c197ea1ab13e02a0c08fb"));
        spec.disableShadowBCF();
        executeTest("testMissingGVCFIndexingStrategyException", spec);
    }
@ -320,7 +320,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
    public void testAlleleSpecificAnnotations() {
        final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -G Standard -G AS_Standard --disableDithering",
                HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "20:10433000-10437000", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
-        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("a53ac6cb2379d8adccfca6495e32e225"));
+        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9db19d4706e32ce883c6e3f71e5f4178"));
        spec.disableShadowBCF();
        executeTest(" testAlleleSpecificAnnotations", spec);
    }
@ -329,7 +329,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
    public void testASMQMateRankSumAnnotation() {
        final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -A AS_MQMateRankSumTest --disableDithering",
                HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "20:10433000-10437000", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
-        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("b3c0eccf035c0c5d3ac09dc93b8ee97a"));
+        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("85b26109c4586ba33c44e253297050be"));
        spec.disableShadowBCF();
        executeTest(" testASMQMateRankSumAnnotation", spec);
    }
@ -338,7 +338,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
    public void testASInsertSizeRankSum() {
        final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -G Standard -G AS_Standard --disableDithering -A AS_InsertSizeRankSum",
                HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "20:10433000-10437000", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
-        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("207e9c996f5c289a33d178c16d854bd0"));
+        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("f1531ed03d6f9538d4d6af7630ff2502"));
        spec.disableShadowBCF();
        executeTest(" testASInsertSizeRankSum", spec);
    }
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@ -96,82 +96,82 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void testHaplotypeCallerMultiSample() throws IOException {
-        HCTest(CEUTRIO_BAM, "", "bdc5a942a7df2d4c61388ecc74a163cf");
+        HCTest(CEUTRIO_BAM, "", "b86311f6a0b4b5f2cfd05662f03fbe6c");
    }

    @Test
    public void testHaplotypeCallerSingleSample() throws IOException {

-        HCTest(NA12878_BAM, "", "5941f88fa1372eb598880a4711ac08de");
+        HCTest(NA12878_BAM, "", "1ceedd80a36e042a93d60e25b94c0b03");
    }

    @Test
    public void testHaplotypeCallerMultiSampleHaploid() throws IOException {
-        HCTest(CEUTRIO_BAM, "-ploidy 1", "4c32640204a11fe46a23d97b1012bca2");
+        HCTest(CEUTRIO_BAM, "-ploidy 1", "a7e83df05d8ef3558fa98e8c6a86e12d");
    }

    @Test
    public void testHaplotypeCallerSingleSampleHaploid() throws IOException {
-        HCTest(NA12878_BAM, "-ploidy 1", "e09c5fd862f86f69289d847e3b293e19");
+        HCTest(NA12878_BAM, "-ploidy 1", "6e9855e22ed78a60eaaf0a06ed967b77");
    }

    @Test
    public void testHaplotypeCallerSingleSampleTetraploid() throws IOException {
-        HCTest(NA12878_BAM, "-ploidy 4", "bbae9f77683591200b2034d5461e6425");
+        HCTest(NA12878_BAM, "-ploidy 4", "ae463cceb8cb0937440cfe5b56904feb");
    }

    @Test
    public void testHaplotypeCallerMinBaseQuality() throws IOException {
-        HCTest(NA12878_BAM, "-mbq 15", "5941f88fa1372eb598880a4711ac08de");
+        HCTest(NA12878_BAM, "-mbq 15", "1ceedd80a36e042a93d60e25b94c0b03");
    }

    @Test
    public void testHaplotypeCallerMinBaseQualityHaploid() throws IOException {
-        HCTest(NA12878_BAM, "-mbq 15 -ploidy 1", "e09c5fd862f86f69289d847e3b293e19");
+        HCTest(NA12878_BAM, "-mbq 15 -ploidy 1", "6e9855e22ed78a60eaaf0a06ed967b77");
    }

    @Test
    public void testHaplotypeCallerMinBaseQualityTetraploid() throws IOException {
-        HCTest(NA12878_BAM, "-mbq 15 -ploidy 4", "bbae9f77683591200b2034d5461e6425");
+        HCTest(NA12878_BAM, "-mbq 15 -ploidy 4", "ae463cceb8cb0937440cfe5b56904feb");
    }

    @Test
    public void testHaplotypeCallerGraphBasedSingleSample() throws IOException {
-        HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "9a4ba54146449914b1fbaed5465b692e");
+        HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "58abaabab25b9a75aaf17ab48478f42a");
    }

    @Test
    public void testHaplotypeCallerGraphBasedMultiSampleHaploid() throws IOException {
-        HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "5333e3ff4d7fc53624eab801250be4f0");
+        HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "d7c7ff3883e8a50be38ec32b1a8fa5b4");
    }

    @Test
    public void testHaplotypeCallerGraphBasedMultiSample() throws IOException {
-        HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "e9759bf254b7b4d06e4a68c94a417cf1");
+        HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "ccec6b941d41ac2642450964c3ab85d7");
    }

    @Test
    public void testHaplotypeCallerSingleSampleWithDbsnp() throws IOException {
-        HCTest(NA12878_BAM, "-D " + b37dbSNP132, "7466c4d9ce6a1d23bcb428fc9f446843");
+        HCTest(NA12878_BAM, "-D " + b37dbSNP132, "48a008f41644c8e4bb256141d095a140");
    }

    @Test
    public void testHaplotypeCallerMultiSampleGGA() throws IOException {
        HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf" +
                " -isr INTERSECTION -L " + GGA_INTERVALS_FILE,
-                "c2fe156912e59626c0393b9b47c9419e");
+                "b7d2576ac00c175f4136b2bfbffc4dcd");
    }

    @Test
    public void testHaplotypeCallerMultiSampleGGAHaploid() throws IOException {
        HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 1 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf -isr INTERSECTION -L 20:10080000-10100000",
-                "49f737983375c740361daa2d48ed0249");
+                "07e0a87e762173bcd6bba5e7af704159");
    }

    @Test
    public void testHaplotypeCallerMultiSampleGGATetraploid() throws IOException {
        HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 4 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf -isr INTERSECTION -L 20:10080000-10100000",
-                "6ba0fa930899f70fee9a7b1161508f93");
+                "be7315e2866eccfb951ed63090f9119c");
    }

    @Test
@ -187,7 +187,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void testHaplotypeCallerSingleSampleIndelQualityScores() {
-        HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "22028b104dd60b23a399e5e3a877a1fb");
+        HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "6f5678f8fb6a4c0b21df1e6b0d1019ee");
    }

    private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
@ -224,7 +224,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void testHaplotypeCallerNearbySmallIntervals() {
-        HCTestNearbySmallIntervals(NA12878_BAM, "", "6e55de7bf49ecdc71f6d4c9565a19853");
+        HCTestNearbySmallIntervals(NA12878_BAM, "", "796f4a44a29fc3c1a1461f90aef45846");
    }

    // This problem bam came from a user on the forum and it spotted a problem where the ReadClipper
@ -234,7 +234,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    @Test
    public void HCTestProblematicReadsModifiedInActiveRegions() {
        final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
-        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("f8774326a268f7d394b333818d15d05c"));
+        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("392ac1966cfa1cae78e8d674065a0d28"));
        executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
    }

@ -296,7 +296,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void HCTestDBSNPAnnotationWGS() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub " +  HMM_SUB_IMPLEMENTATION + " " + ALWAYS_LOAD_VECTOR_HMM + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
-                Arrays.asList("ab816ff6facc08acb19c55bd6e828f02"));
+                Arrays.asList("5430b91902813cf64f5bb781b25d76c6"));
        executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
    }

@ -305,7 +305,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub " +  HMM_SUB_IMPLEMENTATION + " " + ALWAYS_LOAD_VECTOR_HMM + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,100,000-11,000,000 -D " + b37dbSNP132
                        + " -L " + hg19Intervals + " -isr INTERSECTION", 1,
-                Arrays.asList("fd1a539e14902f6957eb939aac1412f0"));
+                Arrays.asList("8b775d84ea981f1e8207fa69a92b5e1f"));
        executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec);
    }

@ -313,7 +313,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void HCTestDBSNPAnnotationWGSGraphBased() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -pairHMMSub " +  HMM_SUB_IMPLEMENTATION + " " + ALWAYS_LOAD_VECTOR_HMM + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
-                Arrays.asList("1810c35e8298dff4aa1b7b04fb5f4962"));
+                Arrays.asList("f6f5b7e9348fc6dccd63110de0371d78"));
        executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
    }

@ -322,7 +322,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -pairHMMSub " +  HMM_SUB_IMPLEMENTATION + " " + ALWAYS_LOAD_VECTOR_HMM + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132
                        + " -L " + hg19Intervals + " -isr INTERSECTION", 1,
-                Arrays.asList("8a06a53388d73ec667b353379f3b351e"));
+                Arrays.asList("82cd0142146521fd659905737dc6192c"));
        executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec);
    }

@ -345,7 +345,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void HCTestAggressivePcrIndelModelWGS() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -pairHMMSub " + HMM_SUB_IMPLEMENTATION +  " " + ALWAYS_LOAD_VECTOR_HMM + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,270,000-10,300,000", 1,
-                Arrays.asList("be71ab16d55437cbe3005ea3b93cece6"));
+                Arrays.asList("c43caebd09b69dbc9d1e4b5f5f449712"));
        executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec);
    }

@ -353,7 +353,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void HCTestConservativePcrIndelModelWGS() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -pairHMMSub " + HMM_SUB_IMPLEMENTATION +  " " + ALWAYS_LOAD_VECTOR_HMM  + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,270,000-10,300,000", 1,
-                Arrays.asList("8a5185d0a9400c8b3f4b12da65181c4b"));
+                Arrays.asList("54e7595f6d82a69566f4c0163045688d"));
        executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
    }

@ -382,7 +382,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void testLackSensitivityDueToBadHaplotypeSelectionFix() {
        final String commandLine = String.format("-T HaplotypeCaller -pairHMMSub %s %s -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16",
                HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list");
-        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("02ec3090c4a6359fa10e6d8b30e1d5a2"));
+        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("8d44a2e1967e6e844f221960d6ea42bb"));
        spec.disableShadowBCF();
        executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
    }
@ -391,7 +391,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void testMissingKeyAlternativeHaplotypesBugFix() {
        final String commandLine = String.format("-T HaplotypeCaller -pairHMMSub %s %s -R %s -I %s -L %s --no_cmdline_in_header ",
                HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list");
-        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("13c9f094b9c54960dc2fd3a1815a2645"));
+        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("cf8912f84153b2357a4d1ddb361f786f"));
        spec.disableShadowBCF();
        executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec);
    }
@ -468,12 +468,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void testHaplotypeCallerTandemRepeatAnnotator() throws IOException{
-        HCTest(NA12878_BAM, " -L 20:10001000-10010000 -A TandemRepeatAnnotator -XA MappingQualityZero -XA SpanningDeletions", "03738462e7f0b6f149f40b790a3a7261");
+        HCTest(NA12878_BAM, " -L 20:10001000-10010000 -A TandemRepeatAnnotator -XA MappingQualityZero -XA SpanningDeletions", "abc2daf88b9c5b3c0989f5d06df84ab3");
    }

    @Test
    public void testHBaseCountsBySample() throws IOException{
-        HCTest(NA12878_BAM, " -L 20:10001000-10010000 -A BaseCountsBySample", "8739d92898113436666f1cff3bc39bc5");
+        HCTest(NA12878_BAM, " -L 20:10001000-10010000 -A BaseCountsBySample", "3e0ee74ec384ae38bda8ac15d8d67cca");
    }
 }

--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
@ -260,7 +260,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
        final WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString(" -V " + gVCF.getAbsolutePath(), b37KGReference),
                1,
-                Arrays.asList("eeff965cd79c0b7085c7d4d7ecf82b68"));
+                Arrays.asList("d8eb4a64a2ae7e7dad1efc4fe8b4b3ed"));
        spec.disableShadowBCF();  //TODO: Remove when BaseTest.assertAttributesEquals() works with SAC
        executeTest("testStrandAlleleCountsBySample", spec);
    }
@ -566,7 +566,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
    public void testAlleleSpecificAnnotations() {
        final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard --disableDithering -V "
                + privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.chr20snippet.g.vcf";
-        final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("35daaea8dea591d35ca99854c8d36e5f"));
+        final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("7261cde50d4556c6dc190b10222a8538"));
        spec.disableShadowBCF();
        executeTest("testAlleleSpecificAnnotations", spec);
    }
@ -575,7 +575,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
    public void testASMateRankSumAnnotation() {
        final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard -A AS_MQMateRankSumTest --disableDithering -V "
                + privateTestDir + "NA12878.AS.MateRankSum.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.MateRankSum.chr20snippet.g.vcf";
-        final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("f8bf55e16358b35449621f896b084b7a"));
+        final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("08dc30527c98d78d09f8575efc3b84ae"));
        spec.disableShadowBCF();
        executeTest("testASMateRankSumAnnotation", spec);
    }
@ -584,7 +584,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
    public void testASInsertSizeRankSumAnnotation() {
        final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard --disableDithering -V "
                + privateTestDir + "NA12878.AS.InsertSizeRankSum.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.InsertSizeRankSum.chr20snippet.g.vcf";
-        final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("8a27e5fbcb0d8f91684c2887167eb043"));
+        final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("723522fe1d05f2d11efda008b65884d2"));
        spec.disableShadowBCF();
        executeTest("testASInsertSizeRankSumAnnotation", spec);
    }
@ -597,7 +597,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
    public void testAlleleSpecificAnnotations_oneSample() {
        final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard --disableDithering -V "
                + privateTestDir + "NA12878.AS.chr20snippet.g.vcf";
-        final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("b6026b2a2d2da39f181a4905b2225dad"));
+        final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("cc2fe42eafd9789be49b550a639a7a1b"));
        spec.disableShadowBCF();
        executeTest("testAlleleSpecificAnnotations_oneSample", spec);
    }
@ -616,7 +616,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
    public void testFractionInformativeReads() {
        final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -G AS_Standard -o %s --no_cmdline_in_header -A FractionInformativeReads --disableDithering -V "
                + privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.chr20snippet.g.vcf";
-        final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Collections.singletonList("794cfec86a8bee1f6955766b5a98b950"));
+        final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Collections.singletonList("16dbeec9d640e61453e902918427343a"));
        spec.disableShadowBCF();
        executeTest("testAlleleSpecificAnnotations", spec);
    }
--- a/public/gatk-root/pom.xml
+++ b/public/gatk-root/pom.xml
@ -196,6 +196,11 @@
                <artifactId>commons-math</artifactId>
                <version>2.2</version>
            </dependency>
+            <dependency>
+                <groupId>org.apache.commons</groupId>
+                <artifactId>commons-math3</artifactId>
+                <version>3.5</version>
+            </dependency>
            <dependency>
                <groupId>net.java.dev.jna</groupId>
                <artifactId>jna</artifactId>
--- a/public/gatk-utils/pom.xml
+++ b/public/gatk-utils/pom.xml
@ -66,6 +66,10 @@
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-math</artifactId>
        </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-math3</artifactId>
+        </dependency>
        <dependency>
            <groupId>net.java.dev.jna</groupId>
            <artifactId>jna</artifactId>
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/MannWhitneyU.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/MannWhitneyU.java
@ -25,483 +25,534 @@

 package org.broadinstitute.gatk.utils;

-import cern.jet.math.Arithmetic;
-import cern.jet.random.Normal;
-import com.google.java.contract.Ensures;
-import com.google.java.contract.Requires;
-import org.apache.commons.math.MathException;
-import org.apache.commons.math.distribution.NormalDistribution;
-import org.apache.commons.math.distribution.NormalDistributionImpl;
-import org.broadinstitute.gatk.utils.collections.Pair;
-import org.broadinstitute.gatk.utils.exceptions.GATKException;
+import htsjdk.samtools.util.Histogram;
+import org.apache.commons.math3.distribution.NormalDistribution;

-import java.io.Serializable;
-import java.util.Comparator;
-import java.util.TreeSet;
+
+import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;

 /**
- * Created by IntelliJ IDEA.
- * User: chartl
+ * Imported with changes from Picard private.
+ *
+ * @author Tim Fennell
 */
 public class MannWhitneyU {

-    private static Normal STANDARD_NORMAL = new Normal(0.0,1.0,null);
-    private static NormalDistribution APACHE_NORMAL = new NormalDistributionImpl(0.0,1.0,1e-2);
-    private static double LNSQRT2PI = Math.log(Math.sqrt(2.0*Math.PI));
+    private static final class Rank implements Comparable<Rank> {
+        final double value;
+        float rank;
+        final int series;

-    private TreeSet<Pair<Number,USet>> observations;
-    private int sizeSet1;
-    private int sizeSet2;
-    private ExactMode exactMode;
-
-    public MannWhitneyU(ExactMode mode, boolean dither) {
-        if ( dither )
-            observations = new TreeSet<Pair<Number,USet>>(new DitheringComparator());
-        else
-            observations = new TreeSet<Pair<Number,USet>>(new NumberedPairComparator());
-        sizeSet1 = 0;
-        sizeSet2 = 0;
-        exactMode = mode;
-    }
-
-    public MannWhitneyU() {
-        this(ExactMode.POINT,true);
-    }
-
-    public MannWhitneyU(boolean dither) {
-        this(ExactMode.POINT,dither);
-    }
-
-    public MannWhitneyU(ExactMode mode) {
-        this(mode,true);
-    }
-
-    /**
-     * Add an observation into the observation tree
-     * @param n: the observation (a number)
-     * @param set: whether the observation comes from set 1 or set 2
-     */
-    public void add(Number n, USet set) {
-        observations.add(new Pair<Number,USet>(n,set));
-        if ( set == USet.SET1 ) {
-            ++sizeSet1;
-        } else {
-            ++sizeSet2;
-        }
-    }
-
-    public Pair<Long,Long> getR1R2() {
-        long u1 = calculateOneSidedU(observations,MannWhitneyU.USet.SET1);
-        long n1 = sizeSet1*(sizeSet1+1)/2;
-        long r1 = u1 + n1;
-        long n2 = sizeSet2*(sizeSet2+1)/2;
-        long u2 = n1*n2-u1;
-        long r2 = u2 + n2;
-
-        return new Pair<Long,Long>(r1,r2);
-    }
-
-    /**
-     * Runs the one-sided test under the hypothesis that the data in set "lessThanOther" stochastically
-     * dominates the other set
-     * @param lessThanOther - either Set1 or Set2
-     * @return - u-based z-approximation, and p-value associated with the test (p-value is exact for small n,m)
-     */
-    @Requires({"lessThanOther != null"})
-    @Ensures({"validateObservations(observations) || Double.isNaN(result.getFirst())","result != null", "! Double.isInfinite(result.getFirst())", "! Double.isInfinite(result.getSecond())"})
-    public Pair<Double,Double> runOneSidedTest(USet lessThanOther) {
-        long u = calculateOneSidedU(observations, lessThanOther);
-        int n = lessThanOther == USet.SET1 ? sizeSet1 : sizeSet2;
-        int m = lessThanOther == USet.SET1 ? sizeSet2 : sizeSet1;
-        if ( n == 0 || m == 0 ) {
-            // test is uninformative as one or both sets have no observations
-            return new Pair<Double,Double>(Double.NaN,Double.NaN);
+        private Rank(double value, float rank, int series) {
+            this.value = value;
+            this.rank = rank;
+            this.series = series;
        }

-        // the null hypothesis is that {N} is stochastically less than {M}, so U has counted
-        // occurrences of {M}s before {N}s. We would expect that this should be less than (n*m+1)/2 under
-        // the null hypothesis, so we want to integrate from K=0 to K=U for cumulative cases. Always.
-        return calculateP(n, m, u, false, exactMode);
-    }
-
-    /**
-     * Runs the standard two-sided test,
-     * returns the u-based z-approximate and p values.
-     * @return a pair holding the u and p-value.
-     */
-    @Ensures({"result != null", "! Double.isInfinite(result.getFirst())", "! Double.isInfinite(result.getSecond())"})
-    //@Requires({"validateObservations(observations)"})
-    public Pair<Double,Double> runTwoSidedTest() {
-        Pair<Long,USet> uPair = calculateTwoSidedU(observations);
-        long u = uPair.first;
-        int n = uPair.second == USet.SET1 ? sizeSet1 : sizeSet2;
-        int m = uPair.second == USet.SET1 ? sizeSet2 : sizeSet1;
-        if ( n == 0 || m == 0 ) {
-            // test is uninformative as one or both sets have no observations
-            return new Pair<Double,Double>(Double.NaN,Double.NaN);
-        }
-        return calculateP(n, m, u, true, exactMode);
-    }
-
-    /**
-     * Given a u statistic, calculate the p-value associated with it, dispatching to approximations where appropriate
-     * @param n - The number of entries in the stochastically smaller (dominant) set
-     * @param m - The number of entries in the stochastically larger (dominated) set
-     * @param u - the Mann-Whitney U value
-     * @param twoSided - is the test twosided
-     * @return the (possibly approximate) p-value associated with the MWU test, and the (possibly approximate) z-value associated with it
-     * todo -- there must be an approximation for small m and large n
-     */
-    @Requires({"m > 0","n > 0"})
-    @Ensures({"result != null", "! Double.isInfinite(result.getFirst())", "! Double.isInfinite(result.getSecond())"})
-    protected static Pair<Double,Double> calculateP(int n, int m, long u, boolean twoSided, ExactMode exactMode) {
-        Pair<Double,Double> zandP;
-        if ( n > 8 && m > 8 ) {
-            // large m and n - normal approx
-            zandP = calculatePNormalApproximation(n,m,u, twoSided);
-        } else if ( n > 5 && m > 7 ) {
-            // large m, small n - sum uniform approx
-            // todo -- find the appropriate regimes where this approximation is actually better enough to merit slowness
-            // pval = calculatePUniformApproximation(n,m,u);
-            zandP = calculatePNormalApproximation(n, m, u, twoSided);
-        } else if ( n > 8 || m > 8 ) {
-            zandP = calculatePFromTable(n, m, u, twoSided);
-        } else {
-            // small m and n - full approx
-            zandP = calculatePRecursively(n,m,u,twoSided,exactMode);
+        @Override
+        public int compareTo(Rank that) {
+            return (int) (this.value - that.value);
        }

-        return zandP;
-    }
-
-    public static Pair<Double,Double> calculatePFromTable(int n, int m, long u, boolean twoSided) {
-        // todo -- actually use a table for:
-        // todo      - n large, m small
-        return calculatePNormalApproximation(n,m,u, twoSided);
-    }
-
-    /**
-     * Uses a normal approximation to the U statistic in order to return a cdf p-value. See Mann, Whitney [1947]
-     * @param n - The number of entries in the stochastically smaller (dominant) set
-     * @param m - The number of entries in the stochastically larger (dominated) set
-     * @param u - the Mann-Whitney U value
-     * @param twoSided - whether the test should be two sided
-     * @return p-value associated with the normal approximation
-     */
-    @Requires({"m > 0","n > 0"})
-    @Ensures({"result != null", "! Double.isInfinite(result.getFirst())", "! Double.isInfinite(result.getSecond())"})
-    public static Pair<Double,Double> calculatePNormalApproximation(int n,int m,long u, boolean twoSided) {
-        double z = getZApprox(n,m,u);
-        if ( twoSided ) {
-            return new Pair<Double,Double>(z,2.0*(z < 0 ? STANDARD_NORMAL.cdf(z) : 1.0-STANDARD_NORMAL.cdf(z)));
-        } else {
-            return new Pair<Double,Double>(z,STANDARD_NORMAL.cdf(z));
+        @Override
+        public String toString() {
+            return "Rank{" +
+                    "value=" + value +
+                    ", rank=" + rank +
+                    ", series=" + series +
+                    '}';
        }
    }

    /**
-     * Calculates the Z-score approximation of the u-statistic
-     * @param n - The number of entries in the stochastically smaller (dominant) set
-     * @param m - The number of entries in the stochastically larger (dominated) set
-     * @param u - the Mann-Whitney U value
-     * @return the asymptotic z-approximation corresponding to the MWU p-value for n < m
+     * The results of performing a rank sum test.
     */
-    @Requires({"m > 0","n > 0"})
-    @Ensures({"! Double.isNaN(result)", "! Double.isInfinite(result)"})
-    private static double getZApprox(int n, int m, long u) {
-        double mean = ( ((long)m)*n+1.0)/2;
-        double var = (((long) n)*m*(n+m+1.0))/12;
-        double z = ( u - mean )/Math.sqrt(var);
-        return z;
+    public static class Result {
+        private final double u;
+        private final double z;
+        private final double p;
+        private final double medianShift;
+
+        public Result(double u, double z, double p, double medianShift) {
+            this.u = u;
+            this.z = z;
+            this.p = p;
+            this.medianShift = medianShift;
+        }
+
+        public double getU() {
+            return u;
+        }
+
+        public double getZ() {
+            return z;
+        }
+
+        public double getP() {
+            return p;
+        }
+
+        public double getMedianShift() {
+            return medianShift;
+        }
    }

    /**
-     * Uses a sum-of-uniform-0-1 random variable approximation to the U statistic in order to return an approximate
-     * p-value. See Buckle, Kraft, van Eeden [1969] (approx) and Billingsly [1995] or Stephens, MA [1966, biometrika] (sum of uniform CDF)
-     * @param n - The number of entries in the stochastically smaller (dominant) set
-     * @param m - The number of entries in the stochastically larger (dominated) set
-     * @param u - mann-whitney u value
-     * @return p-value according to sum of uniform approx
-     * todo -- this is currently not called due to not having a good characterization of where it is significantly more accurate than the
-     * todo -- normal approxmation (e.g. enough to merit the runtime hit)
+     * The values of U1, U2 and the transformed number of ties needed for the calculation of sigma
+     * in the normal approximation.
     */
-    public static double calculatePUniformApproximation(int n, int m, long u) {
-        long R = u + (n*(n+1))/2;
-        double a = Math.sqrt(m*(n+m+1));
-        double b = (n/2.0)*(1-Math.sqrt((n+m+1)/m));
-        double z = b + ((double)R)/a;
-        if ( z < 0 ) { return 1.0; }
-        else if ( z > n ) { return 0.0; }
-        else {
-            if ( z > ((double) n) /2 ) {
-                return 1.0-1/(Arithmetic.factorial(n))*uniformSumHelper(z, (int) Math.floor(z), n, 0);
-            } else {
-                return 1/(Arithmetic.factorial(n))*uniformSumHelper(z, (int) Math.floor(z), n, 0);
+    public static class TestStatistic {
+        private final double u1;
+        private final double u2;
+        private final double trueU;
+        private final double numOfTiesTransformed;
+
+        public TestStatistic(double u1, double u2, double numOfTiesTransformed) {
+            this.u1 = u1;
+            this.u2 = u2;
+            this.numOfTiesTransformed = numOfTiesTransformed;
+            this.trueU = Double.NaN;
+        }
+
+        public TestStatistic(double trueU, double numOfTiesTransformed) {
+            this.trueU = trueU;
+            this.numOfTiesTransformed = numOfTiesTransformed;
+            this.u1 = Double.NaN;
+            this.u2 = Double.NaN;
+        }
+
+        public double getU1() {
+            return u1;
+        }
+
+        public double getU2() {
+            return u2;
+        }
+
+        public double getTies() {
+            return numOfTiesTransformed;
+        }
+
+        public double getTrueU() {
+            return trueU;
+        }
+    }
+
+    /**
+     * The ranked data in one list and a list of the number of ties.
+     */
+    public static class RankedData {
+        private final Rank[] rank;
+        private final ArrayList numOfTies;
+
+        public RankedData(Rank[] rank, ArrayList numOfTies) {
+            this.rank = rank;
+            this.numOfTies = numOfTies;
+        }
+
+        public Rank[] getRank() {
+            return rank;
+        }
+
+        public ArrayList getNumOfTies() {
+            return numOfTies;
+        }
+    }
+
+    /**
+     * Key for the map from Integer[] to set of all permutations of that array.
+     */
+    private static class Key {
+        final Integer[] listToPermute;
+
+        private Key(Integer[] listToPermute) {
+            this.listToPermute = listToPermute;
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (o == null || getClass() != o.getClass()) return false;
+
+            Key that = (Key) o;
+            return (Arrays.deepEquals(this.listToPermute, that.listToPermute));
+        }
+
+        @Override
+        public int hashCode() {
+            int result = 17;
+            for (Integer i : listToPermute) {
+                result = 31 * result + listToPermute[i];
            }
+            return result;
        }
    }

+    // Constructs a normal distribution; actual values of mean and SD don't matter since it's
+    // just used to convert a z-score into a cumulative probability
+    private static final double NORMAL_MEAN = 100;
+    private static final double NORMAL_SD = 15;
+    private static final NormalDistribution NORMAL = new NormalDistribution(NORMAL_MEAN, NORMAL_SD);
+
    /**
-     * Helper function for the sum of n uniform random variables
-     * @param z - value at which to compute the (un-normalized) cdf
-     * @param m - a cutoff integer (defined by m <= z < m + 1)
-     * @param n - the number of uniform random variables
-     * @param k - holder variable for the recursion (alternatively, the index of the term in the sequence)
-     * @return the (un-normalized) cdf for the sum of n random variables
+     * A map of an Integer[] of the labels to the set of all possible permutations of those labels.
     */
-    private static double uniformSumHelper(double z, int m, int n, int k) {
-        if ( k > m ) { return 0; }
-        int coef = (k % 2 == 0) ? 1 : -1;
-        return coef*Arithmetic.binomial(n,k)*Math.pow(z-k,n) + uniformSumHelper(z,m,n,k+1);
+    private static Map<Key, Set<List<Integer>>> PERMUTATIONS = new ConcurrentHashMap<Key, Set<List<Integer>>>();
+
+    /**
+     * The minimum length for both data series in order to use a normal distribution
+     * to calculate Z and p. If both series are shorter than this value then a permutation test
+     * will be used.
+     */
+    private int minimumNormalN = 10;
+
+    /**
+     * If the exact p-value is 1 then the z-score is infinite, so we need a max cutoff instead.
+     */
+    private double maxZScore = 9.0;
+
+    /**
+     * Sets the minimum number of values in each data series to use the normal distribution approximation.
+     */
+    public void setMinimumSeriesLengthForNormalApproximation(final int n) {
+        this.minimumNormalN = n;
    }

    /**
-     * Calculates the U-statistic associated with a two-sided test (e.g. the RV from which one set is drawn
-     * stochastically dominates the RV from which the other set is drawn); two-sidedness is accounted for
-     * later on simply by multiplying the p-value by 2.
-     *
-     * Recall: If X stochastically dominates Y, the test is for occurrences of Y before X, so the lower value of u is chosen
-     * @param observed - the observed data
-     * @return the minimum of the U counts (set1 dominates 2, set 2 dominates 1)
+     * A variable that indicates if the test is one sided or two sided and if it's one sided
+     * which group is the dominator in the null hypothesis.
     */
-    @Requires({"observed != null", "observed.size() > 0"})
-    @Ensures({"result != null","result.first > 0"})
-    public static Pair<Long,USet> calculateTwoSidedU(TreeSet<Pair<Number,USet>> observed) {
-        int set1SeenSoFar = 0;
-        int set2SeenSoFar = 0;
-        long uSet1DomSet2 = 0;
-        long uSet2DomSet1 = 0;
-        USet previous = null;
-        for ( Pair<Number,USet> dataPoint : observed ) {
+    public enum TestType {
+        FIRST_DOMINATES,
+        SECOND_DOMINATES,
+        TWO_SIDED
+    }

-            if ( dataPoint.second == USet.SET1 ) {
-                ++set1SeenSoFar;
-            } else {
-                ++set2SeenSoFar;
-            }
+    public RankedData calculateRank(final double[] series1, final double[] series2) {
+        Arrays.sort(series1);
+        Arrays.sort(series2);

-            if ( previous != null ) {
-                if ( dataPoint.second == USet.SET1 ) {
-                    uSet2DomSet1 += set2SeenSoFar;
+        // Make a merged ranks array
+        final Rank[] ranks = new Rank[series1.length + series2.length];
+        {
+            int i = 0, j = 0, r = 0;
+            while (r < ranks.length) {
+                if (i >= series1.length) {
+                    ranks[r++] = new Rank(series2[j++], r, 2);
+                } else if (j >= series2.length) {
+                    ranks[r++] = new Rank(series1[i++], r, 1);
+                } else if (series1[i] <= series2[j]) {
+                    ranks[r++] = new Rank(series1[i++], r, 1);
                } else {
-                    uSet1DomSet2 += set1SeenSoFar;
+                    ranks[r++] = new Rank(series2[j++], r, 2);
                }
            }
-
-            previous = dataPoint.second;
        }

-        return uSet1DomSet2 < uSet2DomSet1 ? new Pair<Long,USet>(uSet1DomSet2,USet.SET1) : new Pair<Long,USet>(uSet2DomSet1,USet.SET2);
+        ArrayList<Integer> numOfTies = new ArrayList<>();
+
+        // Now sort out any tie bands
+        for (int i = 0; i < ranks.length; ) {
+            float rank = ranks[i].rank;
+            int count = 1;
+
+            for (int j = i + 1; j < ranks.length && ranks[j].value == ranks[i].value; ++j) {
+                rank += ranks[j].rank;
+                ++count;
+            }
+
+            if (count > 1) {
+                rank /= count;
+                for (int j = i; j < i + count; ++j) {
+                    ranks[j].rank = rank;
+                }
+                numOfTies.add(count);
+            }
+
+            // Skip forward the right number of items
+            i += count;
+        }
+
+        return new RankedData(ranks, numOfTies);
    }

    /**
-     * Calculates the U-statistic associated with the one-sided hypothesis that "dominator" stochastically dominates
-     * the other U-set. Note that if S1 dominates S2, we want to count the occurrences of points in S2 coming before points in S1.
-     * @param observed - the observed data points, tagged by each set
-     * @param dominator - the set that is hypothesized to be stochastically dominating
-     * @return the u-statistic associated with the hypothesis that dominator stochastically dominates the other set
+     * Rank both groups together and return a TestStatistic object that includes U1, U2 and number of ties for sigma
     */
-    @Requires({"observed != null","dominator != null","observed.size() > 0"})
-    @Ensures({"result >= 0"})
-    public static long calculateOneSidedU(TreeSet<Pair<Number,USet>> observed,USet dominator) {
-        long otherBeforeDominator = 0l;
-        int otherSeenSoFar = 0;
-        for ( Pair<Number,USet> dataPoint : observed ) {
-            if ( dataPoint.second != dominator ) {
-                ++otherSeenSoFar;
-            } else {
-                otherBeforeDominator += otherSeenSoFar;
+    public TestStatistic calculateU1andU2(final double[] series1, final double[] series2) {
+        RankedData ranked = calculateRank(series1, series2);
+        Rank[] ranks = ranked.getRank();
+        ArrayList<Integer> numOfTies = ranked.getNumOfTies();
+
+        //Calculate number of ties transformed for formula for Sigma to calculate Z-score
+        ArrayList<Integer> transformedTies = new ArrayList<>();
+        for (int count : numOfTies) {
+            //If every single datapoint is tied then we want to return a p-value of .5 and
+            //the formula for sigma that includes the number of ties breaks down. Setting
+            //the number of ties to 0 in this case gets the desired result.
+            if (count != ranks.length) {
+                transformedTies.add((count * count * count) - count);
            }
        }

-        return otherBeforeDominator;
+        double numOfTiesForSigma = 0.0;
+        for (int count : transformedTies) {
+            numOfTiesForSigma += count;
+        }
+
+        // Calculate R1 and R2 and U.
+        float r1 = 0, r2 = 0;
+        for (Rank rank : ranks) {
+            if (rank.series == 1) r1 += rank.rank;
+            else r2 += rank.rank;
+        }
+
+        double n1 = series1.length;
+        double n2 = series2.length;
+        double u1 = r1 - ((n1 * (n1 + 1)) / 2);
+        double u2 = r2 - ((n2 * (n2 + 1)) / 2);
+
+        TestStatistic result = new TestStatistic(u1, u2, numOfTiesForSigma);
+        return result;
    }

    /**
-     * The Mann-Whitney U statistic follows a recursive equation (that enumerates the proportion of possible
-     * binary strings of "n" zeros, and "m" ones, where a one precedes a zero "u" times). This accessor
-     * calls into that recursive calculation.
-     * @param n: number of set-one entries (hypothesis: set one is stochastically less than set two)
-     * @param m: number of set-two entries
-     * @param u: number of set-two entries that precede set-one entries (e.g. 0,1,0,1,0 -> 3 )
-     * @param twoSided: whether the test is two sided or not. The recursive formula is symmetric, multiply by two for two-sidedness.
-     * @param  mode: whether the mode is a point probability, or a cumulative distribution
-     * @return the probability under the hypothesis that all sequences are equally likely of finding a set-two entry preceding a set-one entry "u" times.
+     * Calculates the rank-sum test statisic U (sometimes W) from two sets of input data for a one-sided test
+     * with an int indicating which group is the dominator. Returns a test statistic object with trueU and number of
+     * ties for sigma.
     */
-    @Requires({"m > 0","n > 0","u >= 0"})
-    @Ensures({"result != null","! Double.isInfinite(result.getFirst())", "! Double.isInfinite(result.getSecond())"})
-    public static Pair<Double,Double> calculatePRecursively(int n, int m, long u, boolean twoSided, ExactMode mode) {
-        if ( m > 8 && n > 5 ) { throw new GATKException(String.format("Please use the appropriate (normal or sum of uniform) approximation. Values n: %d, m: %d",n,m)); }
-        double p = mode == ExactMode.POINT ? cpr(n,m,u) : cumulativeCPR(n,m,u);
-        //p *= twoSided ? 2.0 : 1.0;
+    public TestStatistic calculateOneSidedU(final double[] series1, final double[] series2, final TestType whichSeriesDominates) {
+        TestStatistic stat = calculateU1andU2(series1, series2);
+        TestStatistic result;
+        if (whichSeriesDominates == TestType.FIRST_DOMINATES) {
+            result = new TestStatistic(stat.getU1(), stat.getTies());
+        } else {
+            result = new TestStatistic(stat.getU2(), stat.getTies());
+        }
+        return result;
+    }
+
+    /**
+     * Calculates the two-sided rank-sum test statisic U (sometimes W) from two sets of input data.
+     * Returns a test statistic object with trueU and number of ties for sigma.
+     */
+    public TestStatistic calculateTwoSidedU(final double[] series1, final double[] series2) {
+        TestStatistic u1AndU2 = calculateU1andU2(series1, series2);
+        double u = Math.min(u1AndU2.getU1(), u1AndU2.getU2());
+        TestStatistic result = new TestStatistic(u, u1AndU2.getTies());
+        return result;
+    }
+
+    /**
+     * Calculates the Z score (i.e. standard deviations from the mean) of the rank sum
+     * test statistics given input data of lengths n1 and n2 respectively, as well as the number of ties, for normal
+     * approximation only.
+     */
+    public double calculateZ(final double u, final int n1, final int n2, final double nties, final TestType whichSide) {
+        double m = (n1 * n2) / 2d;
+
+        //Adds a continuity correction
+        double correction;
+        if (whichSide == TestType.TWO_SIDED) {
+            correction = (u - m) >= 0 ? .5 : -.5;
+        } else {
+            correction = whichSide == TestType.FIRST_DOMINATES ? -.5 : .5;
+        }
+
+        //If all the data is tied, the number of ties for sigma is set to 0. In order to get a p-value of .5 we need to
+        //remove the continuity correction.
+        if (nties == 0) {
+            correction = 0;
+        }
+
+        double sigma = Math.sqrt((n1 * n2 / 12d) * ((n1 + n2 + 1) - nties / ((n1 + n2) * (n1 + n2 - 1))));
+        return (u - m - correction) / sigma;
+    }
+
+    /**
+     * Finds or calculates the median value of a sorted array of double.
+     */
+    public double median(final double[] data) {
+        final int len = data.length;
+        final int mid = len / 2;
+        if (data.length % 2 == 0) {
+            return (data[mid] + data[mid - 1]) / 2d;
+        } else {
+            return data[mid];
+        }
+    }
+
+
+    /**
+     * Constructs a new rank sum test with the given data.
+     *
+     * @param series1   group 1 data
+     * @param series2   group 2 data
+     * @param whichSide indicator of two sided test, 0 for two sided, 1 for series1 as dominator, 2 for series2 as dominator
+     * @return Result including U statistic, Z score, p-value, and difference in medians.
+     */
+    public Result test(final double[] series1, final double[] series2, final TestType whichSide) {
+        final int n1 = series1.length;
+        final int n2 = series2.length;
+
+        //If one of the groups is empty we return NaN
+        if (n1 == 0 || n2 == 0) {
+            return new Result(Float.NaN, Float.NaN, Float.NaN, Float.NaN);
+        }
+
+        double u;
+        double nties;
+
+        if (whichSide == TestType.TWO_SIDED) {
+            TestStatistic result = calculateTwoSidedU(series1, series2);
+            u = result.getTrueU();
+            nties = result.getTies();
+        } else {
+            TestStatistic result = calculateOneSidedU(series1, series2, whichSide);
+            u = result.getTrueU();
+            nties = result.getTies();
+        }
+
        double z;
-        try {
+        double p;

-            if ( mode == ExactMode.CUMULATIVE ) {
-                z = APACHE_NORMAL.inverseCumulativeProbability(p);
+        if (n1 >= this.minimumNormalN || n2 >= this.minimumNormalN) {
+            z = calculateZ(u, n1, n2, nties, whichSide);
+            p = 2 * NORMAL.cumulativeProbability(NORMAL_MEAN + z * NORMAL_SD);
+            if (whichSide != TestType.TWO_SIDED) {
+                p = p / 2;
+            }
+        } else {
+            // TODO -- This exact test is only implemented for the one sided test, but we currently don't call the two sided version
+            Histogram<Double> distribution = permutationTest(series1, series2);
+            p = distribution.getCumulativeProbability(u);
+            if (p == 1) {
+                z = maxZScore;
+            } else if (p == 0) {
+                z = -1.0 * maxZScore;
            } else {
-                double sd = Math.sqrt((1.0+1.0/(1+n+m))*(n*m)*(1.0+n+m)/12); // biased variance empirically better fit to distribution then asymptotic variance
-                //System.out.printf("SD is %f and Max is %f and prob is %f%n",sd,1.0/Math.sqrt(sd*sd*2.0*Math.PI),p);
-                if ( p > 1.0/Math.sqrt(sd*sd*2.0*Math.PI) ) { // possible for p-value to be outside the range of the normal. Happens at the mean, so z is 0.
-                    z = 0.0;
-                } else {
-                    if ( u >= n*m/2 ) {
-                        z = Math.sqrt(-2.0*(Math.log(sd)+Math.log(p)+LNSQRT2PI));
-                    } else {
-                        z = -Math.sqrt(-2.0*(Math.log(sd)+Math.log(p)+LNSQRT2PI));
-                    }
+                z = NORMAL.inverseCumulativeProbability(p);
+            }
+        }
+
+        return new Result(u, z, p, Math.abs(median(series1) - median(series2)));
+    }
+
+    private void swap(Integer[] arr, int i, int j) {
+        int temp = arr[i];
+        arr[i] = arr[j];
+        arr[j] = temp;
+    }
+
+    /**
+     * Uses a method that generates permutations in lexicographic order. (https://en.wikipedia.org/wiki/Permutation#Generation_in_lexicographic_order)
+     * @param temp Sorted list of elements to be permuted.
+     * @param allPermutations Empty set that will hold all possible permutations.
+     */
+    private void calculatePermutations(Integer[] temp, Set<List<Integer>> allPermutations) {
+        allPermutations.add(new ArrayList<>(Arrays.asList(temp)));
+        while (true) {
+            int k = -1;
+            for (int i = temp.length - 2; i >= 0; i--) {
+                if (temp[i] < temp[i + 1]) {
+                    k = i;
+                    break;
                }
            }

-        } catch (MathException me) {
-            throw new GATKException("A math exception occurred in inverting the probability",me);
+            if (k == -1) {
+                break;
+            }
+
+            int l = -1;
+            for (int i = temp.length - 1; i >= k + 1; i--) {
+                if (temp[k] < temp[i]) {
+                    l = i;
+                    break;
+                }
+            }
+
+            swap(temp, k, l);
+
+            int end = temp.length - 1;
+            for (int begin = k + 1; begin < end; begin++) {
+                swap(temp, begin, end);
+                end--;
+            }
+            allPermutations.add(new ArrayList<>(Arrays.asList(temp)));
        }
-
-        return new Pair<Double,Double>(z,(twoSided ? 2.0*p : p));
    }

    /**
-     * Hook into CPR with sufficient warning (for testing purposes)
-     * calls into that recursive calculation.
-     * @param n: number of set-one entries (hypothesis: set one is stochastically less than set two)
-     * @param m: number of set-two entries
-     * @param u: number of set-two entries that precede set-one entries (e.g. 0,1,0,1,0 -> 3 )
-     * @return same as cpr
+     * Checks to see if the permutations have already been computed before creating them from scratch.
+     * @param listToPermute List of tags in numerical order to be permuted
+     * @param numOfPermutations The number of permutations this list will have (n1+n2 choose n1)
+     * @return Set of all possible permutations for the given list.
     */
-    protected static double calculatePRecursivelyDoNotCheckValuesEvenThoughItIsSlow(int n, int m, long u) {
-        return cpr(n,m,u);
+    Set<List<Integer>> getPermutations(final Integer[] listToPermute, int numOfPermutations) {
+        Key key = new Key(listToPermute);
+        Set<List<Integer>> permutations = PERMUTATIONS.get(key);
+        if (permutations == null) {
+            permutations = new HashSet<>(numOfPermutations);
+            calculatePermutations(listToPermute, permutations);
+            PERMUTATIONS.put(key, permutations);
+        }
+        return permutations;
    }

    /**
-     * For testing
+     * Creates histogram of test statistics from a permutation test.
     *
-     * @param n: number of set-one entries (hypothesis: set one is stochastically less than set two)
-     * @param m: number of set-two entries
-     * @param u: number of set-two entries that precede set-one entries (e.g. 0,1,0,1,0 -> 3 )
+     * @param series1 Data from group 1
+     * @param series2 Data from group 2
+     * @return Histogram with u calculated for every possible permutation of group tag.
     */
-    protected static long countSequences(int n, int m, long u) {
-        if ( u < 0 ) { return 0; }
-        if ( m == 0 || n == 0 ) { return u == 0 ? 1 : 0; }
+    public Histogram permutationTest(final double[] series1, final double[] series2) {
+        final Histogram<Double> histo = new Histogram<Double>();
+        final int n1 = series1.length;
+        final int n2 = series2.length;

-        return countSequences(n-1,m,u-m) + countSequences(n,m-1,u);
-    }
+        RankedData rankedGroups = calculateRank(series1, series2);
+        Rank[] ranks = rankedGroups.getRank();

-    /**
-     * : just a shorter name for calculatePRecursively. See Mann, Whitney, [1947]
-     * @param n: number of set-1 entries
-     * @param m: number of set-2 entries
-     * @param u: number of times a set-2 entry as preceded a set-1 entry
-     * @return recursive p-value
-     */
-    private static double cpr(int n, int m, long u) {
-        if ( u < 0 ) {
-            return 0.0;
-        }
-        if ( m == 0 || n == 0 ) {
-            // there are entries in set 1 or set 2, so no set-2 entry can precede a set-1 entry; thus u must be zero.
-            // note that this exists only for edification, as when we reach this point, the coefficient on this term is zero anyway
-            return ( u == 0 ) ? 1.0 : 0.0;
-        }
+        Integer[] firstPermutation = new Integer[n1 + n2];

-
-        return (((double)n)/(n+m))*cpr(n-1,m,u-m) + (((double)m)/(n+m))*cpr(n,m-1,u);
-    }
-
-    private static double cumulativeCPR(int n, int m, long u ) {
-        // from above:
-        // the null hypothesis is that {N} is stochastically less than {M}, so U has counted
-        // occurrences of {M}s before {N}s. We would expect that this should be less than (n*m+1)/2 under
-        // the null hypothesis, so we want to integrate from K=0 to K=U for cumulative cases. Always.
-        double p = 0.0;
-        // optimization using symmetry, use the least amount of sums possible
-        long uSym = ( u <= n*m/2 ) ? u : ((long)n)*m-u;
-        for ( long uu = 0; uu < uSym; uu++ ) {
-            p += cpr(n,m,uu);
-        }
-        // correct by 1.0-p if the optimization above was used (e.g. 1-right tail = left tail)
-        return (u <= n*m/2) ? p : 1.0-p;
-    }
-
-    /**
-     * hook into the data tree, for testing purposes only
-     * @return  observations
-     */
-    protected TreeSet<Pair<Number,USet>> getObservations() {
-        return observations;
-    }
-
-    /**
-     * hook into the set sizes, for testing purposes only
-     * @return size set 1, size set 2
-     */
-    protected Pair<Integer,Integer> getSetSizes() {
-        return new Pair<Integer,Integer>(sizeSet1,sizeSet2);
-    }
-
-    /**
-     * Validates that observations are in the correct format for a MWU test -- this is only called by the contracts API during testing
-     * @param tree - the collection of labeled observations
-     * @return true iff the tree set is valid (no INFs or NaNs, at least one data point in each set)
-     */
-    protected static boolean validateObservations(TreeSet<Pair<Number,USet>> tree) {
-        boolean seen1 = false;
-        boolean seen2 = false;
-        boolean seenInvalid = false;
-        for ( Pair<Number,USet> p : tree) {
-            if ( ! seen1 && p.getSecond() == USet.SET1 ) {
-                seen1 = true;
+        for (int i = 0; i < firstPermutation.length; i++) {
+            if (i < n1) {
+                firstPermutation[i] = 0;
+            } else {
+                firstPermutation[i] = 1;
            }
+        }

-            if ( ! seen2 && p.getSecond() == USet.SET2 ) {
-                seen2 = true;
-            }
-
-            if ( Double.isNaN(p.getFirst().doubleValue()) || Double.isInfinite(p.getFirst().doubleValue())) {
-                seenInvalid = true;
+        final int numOfPerms = (int) MathUtils.binomialCoefficient(n1 + n2, n2);
+        Set<List<Integer>> allPermutations = getPermutations(firstPermutation, numOfPerms);
+
+        double[] newSeries1 = new double[n1];
+        double[] newSeries2 = new double[n2];
+
+        //iterate over all permutations
+        for (List<Integer> currPerm : allPermutations) {
+            int series1End = 0;
+            int series2End = 0;
+            for (int i = 0; i < currPerm.size(); i++) {
+                int grouping = currPerm.get(i);
+                if (grouping == 0) {
+                    newSeries1[series1End] = ranks[i].rank;
+                    series1End++;
+                } else {
+                    newSeries2[series2End] = ranks[i].rank;
+                    series2End++;
+                }
            }
+            assert (series1End == n1);
+            assert (series2End == n2);

+            double newU = MathUtils.sum(newSeries1) - ((n1 * (n1 + 1)) / 2.0);
+            histo.increment(newU);
        }

-            return ! seenInvalid && seen1 && seen2;
+        return histo;
    }

-    /**
-     * A comparator class which uses dithering on tie-breaking to ensure that the internal treeset drops no values
-     * and to ensure that rank ties are broken at random.
-     */
-    private static class DitheringComparator implements Comparator<Pair<Number,USet>>, Serializable {
-
-        public DitheringComparator() {}
-
-        @Override
-        public boolean equals(Object other) { return false; }
-
-        @Override
-        public int compare(Pair<Number,USet> left, Pair<Number,USet> right) {
-            double comp = Double.compare(left.first.doubleValue(),right.first.doubleValue());
-            if ( comp > 0 ) { return 1; }
-            if ( comp < 0 ) { return -1; }
-            return Utils.getRandomGenerator().nextBoolean() ? -1 : 1;
-        }
-    }
-
-    /**
-     * A comparator that reaches into the pair and compares numbers without tie-braking.
-     */
-    private static class NumberedPairComparator implements Comparator<Pair<Number,USet>>, Serializable {
-
-        public NumberedPairComparator() {}
-
-        @Override
-        public boolean equals(Object other) { return false; }
-
-        @Override
-        public int compare(Pair<Number,USet> left, Pair<Number,USet> right ) {
-            return Double.compare(left.first.doubleValue(),right.first.doubleValue());
-        }
-    }
-
-    public enum USet { SET1, SET2 }
-    public enum ExactMode { POINT, CUMULATIVE }
-
 }
--- a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/MWUnitTest.java
+++ b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/MWUnitTest.java
@ -25,13 +25,13 @@

 package org.broadinstitute.gatk.utils;

-import org.broadinstitute.gatk.utils.BaseTest;
-import org.broadinstitute.gatk.utils.collections.Pair;
-
 import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 import org.testng.Assert;

+import java.util.Arrays;
+
 /**
 * Created by IntelliJ IDEA.
 * User: Ghost
@ -40,92 +40,75 @@ import org.testng.Assert;
 * To change this template use File | Settings | File Templates.
 */
 public class MWUnitTest extends BaseTest {
+    private static double DELTA_PRECISION = 0.00001;
+
    @BeforeClass
    public void init() { }

-    @Test
-    private void testMWU() {
-        logger.warn("Testing MWU");
-        MannWhitneyU mwu = new MannWhitneyU();
-        mwu.add(0, MannWhitneyU.USet.SET1);
-        mwu.add(1,MannWhitneyU.USet.SET2);
-        mwu.add(2,MannWhitneyU.USet.SET2);
-        mwu.add(3,MannWhitneyU.USet.SET2);
-        mwu.add(4,MannWhitneyU.USet.SET2);
-        mwu.add(5,MannWhitneyU.USet.SET2);
-        mwu.add(6,MannWhitneyU.USet.SET1);
-        mwu.add(7,MannWhitneyU.USet.SET1);
-        mwu.add(8,MannWhitneyU.USet.SET1);
-        mwu.add(9,MannWhitneyU.USet.SET1);
-        mwu.add(10,MannWhitneyU.USet.SET1);
-        mwu.add(11,MannWhitneyU.USet.SET2);
-        Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu.getObservations(), MannWhitneyU.USet.SET1),25L);
-        Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu.getObservations(),MannWhitneyU.USet.SET2),11L);
+    private static final MannWhitneyU rst = new MannWhitneyU();

-        MannWhitneyU mwu2 = new MannWhitneyU();
-        MannWhitneyU mwuNoDither = new MannWhitneyU(false);
-        for ( int dp : new int[]{2,4,5,6,8} ) {
-            mwu2.add(dp,MannWhitneyU.USet.SET1);
-            mwuNoDither.add(dp,MannWhitneyU.USet.SET1);
-        }
+    @Test(dataProvider = "rankSumTestData")
+    public void testSimpleU(String name, double[] series1, double[] series2, double U) {
+        MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.TWO_SIDED);
+        System.out.println("==========================================================");
+        System.out.println("Series 1: " + Arrays.toString(series1));
+        System.out.println("Series 2: " + Arrays.toString(series2));
+        System.out.println("       U: " + test.getU());
+        System.out.println("       Z: " + test.getZ());
+        System.out.println("2-side p: " + test.getP());
+        Assert.assertEquals(test.getU(), U, name);
+    }

-        for ( int dp : new int[]{1,3,7,9,10,11,12,13} ) {
-            mwu2.add(dp,MannWhitneyU.USet.SET2);
-            mwuNoDither.add(dp,MannWhitneyU.USet.SET2);
-        }
+    @Test(dataProvider = "oneSidedPTestData")
+    public void testOnesidedP(String name, double[] series1, double[] series2, double P) {
+        MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.FIRST_DOMINATES);
+        System.out.println("==========================================================");
+        System.out.println("Series 1: " + Arrays.toString(series1));
+        System.out.println("Series 2: " + Arrays.toString(series2));
+        System.out.println("       U: " + test.getU());
+        System.out.println("       Z: " + test.getZ());
+        System.out.println("1-side p: " + test.getP());
+        Assert.assertEquals(test.getP(), P, DELTA_PRECISION, name);
+    }

-        MannWhitneyU.ExactMode pm = MannWhitneyU.ExactMode.POINT;
-        MannWhitneyU.ExactMode cm = MannWhitneyU.ExactMode.CUMULATIVE;
-
-        // tests using the hypothesis that set 2 dominates set 1 (U value = 10)
-        Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu2.getObservations(),MannWhitneyU.USet.SET1),10L);
-        Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu2.getObservations(),MannWhitneyU.USet.SET2),30L);
-        Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwuNoDither.getObservations(),MannWhitneyU.USet.SET1),10L);
-        Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwuNoDither.getObservations(),MannWhitneyU.USet.SET2),30L);
-
-        Pair<Integer,Integer> sizes = mwu2.getSetSizes();
-
-        Assert.assertEquals(MannWhitneyU.calculatePUniformApproximation(sizes.first,sizes.second,10L),0.4180519701814064,1e-14);
-        Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.first,sizes.second,10L,false,pm).second,0.021756021756021756,1e-14);
-        Assert.assertEquals(MannWhitneyU.calculatePNormalApproximation(sizes.first,sizes.second,10L,false).second,0.06214143703127617,1e-14);
-        logger.warn("Testing two-sided");
-        Assert.assertEquals((double)mwu2.runTwoSidedTest().second,2*0.021756021756021756,1e-8);
-
-        // tests using the hypothesis that set 1 dominates set 2 (U value = 30) -- empirical should be identical, normall approx close, uniform way off
-        Assert.assertEquals(MannWhitneyU.calculatePNormalApproximation(sizes.second,sizes.first,30L,true).second,2.0*0.08216463976903321,1e-14);
-        Assert.assertEquals(MannWhitneyU.calculatePUniformApproximation(sizes.second,sizes.first,30L),0.0023473625009559074,1e-14);
-        Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,30L,false,pm).second,0.021756021756021756,1e-14); // note -- exactly same value as above
-        Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,29L,false,cm).second,1.0-0.08547008547008,1e-14); // r does a correction, subtracting 1 from U
-        Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,11L,false,cm).second,0.08547008547008,1e-14); // r does a correction, subtracting 1 from U
-        Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,11L,false,cm).first,-1.36918910442,1e-2); // apache inversion set to be good only to 1e-2
-        Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,29L,false,cm).first,1.36918910442,1e-2); // apache inversion set to be good only to 1e-2
-        Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,29L,false,pm).first,1.2558754796642067,1e-8); // PDF should be similar
-        Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,11L,false,pm).first,-1.2558754796642067,1e-8); // PDF should be similar
-        Assert.assertEquals(MannWhitneyU.calculatePRecursively(4,5,10L,false,pm).second,0.0952381,1e-5);
-        Assert.assertEquals(MannWhitneyU.calculatePRecursively(4,5,10L,false,pm).first,0.0,1e-14);
-
-        logger.warn("Set 1");
-        Assert.assertEquals((double)mwu2.runOneSidedTest(MannWhitneyU.USet.SET1).second,0.021756021756021756,1e-8);
-        logger.warn("Set 2");
-        Assert.assertEquals((double)mwu2.runOneSidedTest(MannWhitneyU.USet.SET2).second,0.021756021756021756,1e-8);
-
-        MannWhitneyU mwu3 = new MannWhitneyU();
-        for ( int dp : new int[]{0,2,4} ) {
-            mwu3.add(dp,MannWhitneyU.USet.SET1);
-        }
-        for ( int dp : new int[]{1,5,6,7,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34} ) {
-            mwu3.add(dp,MannWhitneyU.USet.SET2);
-        }
-        long u = MannWhitneyU.calculateOneSidedU(mwu3.getObservations(),MannWhitneyU.USet.SET1);
-        //logger.warn(String.format("U is: %d",u));
-        Pair<Integer,Integer> nums = mwu3.getSetSizes();
-        //logger.warn(String.format("Corrected p is: %.4e",MannWhitneyU.calculatePRecursivelyDoNotCheckValuesEvenThoughItIsSlow(nums.first,nums.second,u)));
-        //logger.warn(String.format("Counted sequences: %d",MannWhitneyU.countSequences(nums.first, nums.second, u)));
-        //logger.warn(String.format("Possible sequences: %d", (long) Arithmetic.binomial(nums.first+nums.second,nums.first)));
-        //logger.warn(String.format("Ratio: %.4e",MannWhitneyU.countSequences(nums.first,nums.second,u)/Arithmetic.binomial(nums.first+nums.second,nums.first)));
-        Assert.assertEquals(MannWhitneyU.calculatePRecursivelyDoNotCheckValuesEvenThoughItIsSlow(nums.first, nums.second, u), 3.665689149560116E-4, 1e-14);
-        Assert.assertEquals(MannWhitneyU.calculatePNormalApproximation(nums.first,nums.second,u,false).second,0.0032240865760884696,1e-14);
-        Assert.assertEquals(MannWhitneyU.calculatePUniformApproximation(nums.first,nums.second,u),0.0026195003025784036,1e-14);
+    @DataProvider(name="rankSumTestData")
+    public Object[][] dataProvider() {
+        return new Object[][] {
+                new Object[] {"test1", new double[] {20,20,20,20,20}, new double[] {20,20,20,20,21}, 10d},
+                new Object[] {"test2", new double[] {20,20,20,20,21}, new double[] {20,20,20,20,21}, 12.5d},
+                new Object[] {"test3", new double[] {13,14,15,15,16}, new double[] {16,20,20,21,21}, 0.5d},
+                new Object[] {"test4", new double[] {13,14,15,15,16}, new double[] {16,20,20,21,21,21,21,22,23,27}, 0.5d},
+                new Object[] {"test5", new double[] {13,14,15,15,16,18,20,22,25,24,25,26,27,28,22,23,19,30,28,22,17},
+                        new double[] {16,20,20,21,21,21,21,22,23,27,26,28,29,32,31,22,21,19,16,24,29},
+                        180.5d},
+                new Object[] {"test6", new double[] {13,14,15,15,16,18,13,14,15,15,16,18,13,14,15,15,16,18,13,14,15,15,16,18},
+                        new double[] {21,22,23,27,26,28,29,21,22,23,27,26,28,29,21,22,23,27,26,28,29,21,22,23,27,26,28,29},
+                        0d},
+                new Object[] {"test7", new double[] {11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20},
+                        new double[] {12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21},
+                        145d},
+                new Object[] {"test7", new double[] {11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20},
+                        new double[] {12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21},
+                        162d},
+                new Object[] {"test8", new double[] {20,20,20,20,20}, new double[] {20,20,20,20,20,20,20,20,20,20}, 25d},
+        };
+    }

+    @DataProvider(name="oneSidedPTestData")
+    public Object[][] oneSidedDataProvider() {
+        return new Object[][] {
+                new Object[] {"test0", new double[] {0,0}, new double[] {1,1}, .16666666},
+                new Object[] {"test1", new double[] {20,20,20,20,20}, new double[] {20,20,20,20,21}, .5},
+                new Object[] {"test2", new double[] {20,20,20,20,21}, new double[] {20,20,20,20,21}, .77778},
+                new Object[] {"test3", new double[] {13,14,15,15,16}, new double[] {16,20,20,21,21}, 0.0079365},
+                new Object[] {"test4", new double[] {13,14,15,15,16}, new double[] {16,20,20,21,21,21,21,22,23,27}, 0.001469192},
+                new Object[] {"test5", new double[] {20,20,20,20,20}, new double[] {20,20,20,20,20,20,20,20,20,20}, .5},
+                new Object[] {"test6", new double[] {1,2,3,4,5}, new double[] {6,7,8,9,10}, .00396},
+                new Object[] {"test7", new double[] {6,7,8,9,10}, new double[] {1,2,3,4,5}, 1},
+                new Object[] {"test8", new double[] {16,20,20,21,21,21,21,22,23,27,16,20,20,21,21,21,21,22,23,27},
+                    new double[] {22,23,27,16,20,20,21,21,21,21,22,23,27,60,60}, .08303102},
+                new Object[] {"test8", new double[] {16,20,20,21,21,21,21,21,20},
+                        new double[] {22,23,27,16,20,20,20,20,21}, .395763},
+        };
    }
 }