The UG calculations are now driven by an independent engine.

This completely separates the genotyper walker from other walkers. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2758 348d0f76-0448-11de-a6fe-93d51630548a
2010-02-01 20:57:31 +00:00 · 2010-02-01 20:57:31 +00:00 · 506d39f751
parent d8e75cf631
commit 506d39f751
7 changed files with 251 additions and 264 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalculationArguments.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalculationArguments.java
@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers.genotyper;
-
-import java.util.Set;
-import java.util.HashSet;
-
-/**
- * Class for maintaining the arguments that need to be passed to UnifiedGenotyper.runGenotyper()
- * (so that they only need to be computed one time)
- */
-public class UGCalculationArguments {
-    // should we annotate dbsnp?
-    protected boolean annotateDbsnp = false;
-    // should we annotate hapmap2?
-    protected boolean annotateHapmap2 = false;
-    // should we annotate hapmap3?
-    protected boolean annotateHapmap3 = false;
-
-    // the unified argument collection
-    protected UnifiedArgumentCollection UAC = null;
-
-    // the model used for calculating genotypes
-    protected ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
-
-    // samples in input
-    protected Set<String> samples = new HashSet<String>();
-}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@ -26,19 +26,13 @@
 package org.broadinstitute.sting.gatk.walkers.genotyper;

 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.contexts.*;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
-import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
 import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator;
 import org.broadinstitute.sting.utils.*;
-import org.broadinstitute.sting.utils.pileup.*;
 import org.broadinstitute.sting.utils.cmdLine.*;
 import org.broadinstitute.sting.utils.genotype.*;
-import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
-import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
 import org.broadinstitute.sting.utils.genotype.vcf.*;

 import java.util.*;
@ -65,7 +59,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
    public PrintStream beagleWriter = null;

    // the calculation arguments
-    private UGCalculationArguments UG_args = null;
+    private UnifiedGenotyperEngine UG_engine = null;

    // Enable deletions in the pileup
    public boolean includeReadsWithDeletionAtLoci() { return true; }
@ -88,172 +82,13 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
        double percentCalledOfCallable() { return (100.0 * nBasesCalledConfidently) / nBasesCallable; }
    }

-
-    /**
-     * Creates the argument calculation object for the UnifiedGenotyper.
-     *
-     * @param toolkit the GATK Engine
-     * @param UAC     the UnifiedArgumentCollection
-     * @return UG calculation arguments object
-     **/
-    public static UGCalculationArguments getUnifiedCalculationArguments(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
-        return getUnifiedCalculationArguments(toolkit, UAC, null);
-    }
-
-    /**
-     * Creates the argument calculation object for the UnifiedGenotyper.
-     *
-     * @param toolkit      the GATK Engine
-     * @param UAC          the UnifiedArgumentCollection
-     * @param writer       the genotype writer
-     * @return UG calculation arguments object
-     *
-     **/
-    private static UGCalculationArguments getUnifiedCalculationArguments(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, GenotypeWriter writer) {
-        UGCalculationArguments UG_args = new UGCalculationArguments();
-        UG_args.UAC = UAC;
-
-        // deal with input errors
-        if ( UAC.POOLSIZE > 0 && UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
-            throw new IllegalArgumentException("Attempting to use a model other than POOLED with pooled data. Please set the model to POOLED.");
-        }
-        if ( UAC.POOLSIZE < 1 && UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ) {
-            throw new IllegalArgumentException("Attempting to use the POOLED model with a pool size less than 1. Please set the pool size to an appropriate value.");
-        }
-        if ( toolkit.getArguments().numberOfThreads > 1 && UAC.ASSUME_SINGLE_SAMPLE != null ) {
-            // the ASSUME_SINGLE_SAMPLE argument can't be handled (at least for now) while we are multi-threaded because the IO system doesn't know how to get the sample name
-            throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
-        }
-
-        // get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names
-        if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
-            // if we're supposed to assume a single sample, do so
-            if ( UAC.ASSUME_SINGLE_SAMPLE != null )
-                UG_args.samples.add(UAC.ASSUME_SINGLE_SAMPLE);
-            else
-                UG_args.samples = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader());
-        }
-
-        // in pooled mode we need to check that the format is acceptable
-        if ( UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED && writer != null ) {
-            // only multi-sample calls use Variations
-            if ( !writer.supportsMultiSample() )
-                throw new IllegalArgumentException("The POOLED model is not compatible with the specified format; try using VCF instead");
-
-            // when using VCF with multiple threads, we need to turn down the validation stringency so that writing temporary files will work
-            if ( toolkit.getArguments().numberOfThreads > 1 && writer instanceof VCFGenotypeWriter )
-                ((VCFGenotypeWriter)writer).setValidationStringency(VCFGenotypeWriterAdapter.VALIDATION_STRINGENCY.SILENT);
-        }
-
-        // check to see whether a dbsnp rod was included
-        List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
-        for ( ReferenceOrderedDataSource source : dataSources ) {
-            ReferenceOrderedData rod = source.getReferenceOrderedData();
-            if ( rod.getType().equals(rodDbSNP.class) ) {
-                UG_args.annotateDbsnp = true;
-            }
-            if ( rod.getName().equals("hapmap2") ) {
-                UG_args.annotateHapmap2 = true;
-            }
-            if ( rod.getName().equals("hapmap3") ) {
-                UG_args.annotateHapmap3 = true;
-            }
-        }
-
-        return UG_args;
-    }
-
-    /**
-     * Compute at a given locus.
-     *
-     * @param tracker    the meta data tracker
-     * @param refContext the reference base
-     * @param rawContext contextual information around the locus
-     * @param UG_args    the calculation argument collection
-     * @return the VariantCallContext object
-     */
-    public static VariantCallContext runGenotyper(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, UGCalculationArguments UG_args) {
-
-        char ref = Character.toUpperCase(refContext.getBase());
-        if ( !BaseUtils.isRegularBase(ref) )
-            return null;
-
-        // don't try to call if we couldn't read in all reads at this locus (since it wasn't properly downsampled)
-        if ( rawContext.hasExceededMaxPileup() )
-            return null;
-
-        ReadBackedPileup rawPileup = rawContext.getBasePileup();
-        
-        // filter the context based on min base and mapping qualities
-        ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UG_args.UAC.MIN_BASE_QUALTY_SCORE, UG_args.UAC.MIN_MAPPING_QUALTY_SCORE);
-
-        // filter the context based on mapping quality and mismatch rate
-        pileup = filterPileup(pileup, refContext, UG_args.UAC);
-
-        // don't call when there is no coverage
-        if ( pileup.size() == 0 )
-            return null;
-
-        // are there too many deletions in the pileup?
-        if ( isValidDeletionFraction(UG_args.UAC.MAX_DELETION_FRACTION) &&
-             (double)pileup.getNumberOfDeletions() / (double)pileup.size() > UG_args.UAC.MAX_DELETION_FRACTION )
-            return null;
-
-        // stratify the AlignmentContext and cut by sample
-        // Note that for testing purposes, we may want to throw multi-samples at pooled mode
-        Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UG_args.UAC.ASSUME_SINGLE_SAMPLE, (UG_args.UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
-        if ( stratifiedContexts == null )
-            return null;
-
-        DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UG_args.UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_REFERENCE_ERROR);
-        VariantCallContext call = UG_args.gcm.get().callLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
-
-        // annotate the call, if possible
-        if ( call != null && call.variation != null && call.variation instanceof ArbitraryFieldsBacked ) {
-            // first off, we want to use the *unfiltered* context for the annotations
-            stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup());
-
-            Map<String, String> annotations;
-            if ( UG_args.UAC.ALL_ANNOTATIONS )
-                annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, UG_args.annotateDbsnp, UG_args.annotateHapmap2, UG_args.annotateHapmap3);
-            else
-                annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, UG_args.annotateDbsnp, UG_args.annotateHapmap2, UG_args.annotateHapmap3);
-            ((ArbitraryFieldsBacked)call.variation).setFields(annotations);
-        }
-
-        return call;
-    }
-
-    // filter based on maximum mismatches and bad mates
-    private static ReadBackedPileup filterPileup(ReadBackedPileup pileup, ReferenceContext refContext, UnifiedArgumentCollection UAC) {
-
-        ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
-        for ( PileupElement p : pileup ) {
-            if  ( (UAC.USE_BADLY_MATED_READS || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) &&
-                  AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
-                filteredPileup.add(p);
-        }
-        return new ReadBackedPileup(pileup.getLocation(), filteredPileup);
-    }
-
-    private static boolean isValidDeletionFraction(double d) {
-        return ( d >= 0.0 && d <= 1.0 );
-    }
-
-    // ------------------------------------------------------------------------------------------------
-    //
-    // The following methods are walker-specific; don't use them unless you are a traversal.
-    //   If you are a walker, stick to the static methods defined above.
-    //
-    // ------------------------------------------------------------------------------------------------
-
    /**
     * Initialize the samples, output, and genotype calculation model
     *
     **/
    public void initialize() {

-        UG_args = getUnifiedCalculationArguments(getToolkit(), UAC, writer);
+        UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, writer, verboseWriter, beagleWriter);

        // initialize the writers
        if ( verboseWriter != null ) {
@ -267,16 +102,16 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
        }
        if ( beagleWriter != null ) {
            beagleWriter.print("marker alleleA alleleB");
-            for ( String sample : UG_args.samples )
+            for ( String sample : UG_engine.samples )
                beagleWriter.print(String.format(" %s %s %s", sample, sample, sample));
            beagleWriter.println();
        }

        // initialize the header
-        GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), UG_args.samples, getHeaderInfo(UG_args));
+        GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), UG_engine.samples, getHeaderInfo());
    }

-    private Set<VCFHeaderLine> getHeaderInfo(UGCalculationArguments UG_args) {
+    private Set<VCFHeaderLine> getHeaderInfo() {
        Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();

        // this is only applicable to VCF
@ -295,11 +130,11 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen

        // annotation (INFO) fields from UnifiedGenotyper
        headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_FREQUENCY_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Allele Frequency"));
-        if ( UG_args.annotateDbsnp )
+        if ( UG_engine.annotateDbsnp )
            headerInfo.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "dbSNP Membership"));
-        if ( UG_args.annotateHapmap2 )
+        if ( UG_engine.annotateHapmap2 )
            headerInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP2_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "HapMap2 Membership"));
-        if ( UG_args.annotateHapmap3 )
+        if ( UG_engine.annotateHapmap3 )
            headerInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP3_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "HapMap3 Membership"));
        if ( !UAC.NO_SLOD )
            headerInfo.add(new VCFInfoHeaderLine(VCFRecord.STRAND_BIAS_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Strand Bias"));
@ -331,24 +166,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
     * @return the VariantCallContext object
     */
    public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
-
-        // initialize the GenotypeCalculationModel for this thread if that hasn't been done yet
-        if ( UG_args.gcm.get() == null ) {
-            GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
-            if ( writer != null ) {
-                if ( writer instanceof VCFGenotypeWriter )
-                    format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
-                else if ( writer instanceof GLFGenotypeWriter )
-                    format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
-                else if ( writer instanceof GeliGenotypeWriter )
-                    format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
-                else
-                    throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
-            }
-            UG_args.gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(UG_args.samples, logger, UAC, format, verboseWriter, beagleWriter));
-        }
-
-        return runGenotyper(tracker, refContext, rawContext, UG_args);
+        return UG_engine.runGenotyper(tracker, refContext, rawContext);
    }

    public UGStatistics reduceInit() { return new UGStatistics(); }
@ -381,7 +199,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
        }

        // if we have a single-sample call (single sample from PointEstimate model returns no VariationCall data)
-        if ( value.variation == null || (!writer.supportsMultiSample() && UG_args.samples.size() <= 1) ) {
+        if ( value.variation == null || (!writer.supportsMultiSample() && UG_engine.samples.size() <= 1) ) {
            writer.addGenotypeCall(value.genotypes.get(0));
        }

--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
+import org.broadinstitute.sting.gatk.contexts.*;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
+import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
+import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator;
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.pileup.*;
+import org.broadinstitute.sting.utils.genotype.*;
+import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
+import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
+import org.broadinstitute.sting.utils.genotype.vcf.*;
+import org.apache.log4j.Logger;
+
+import java.util.*;
+import java.io.PrintStream;
+
+
+public class UnifiedGenotyperEngine {
+
+    // should we annotate dbsnp?
+    protected boolean annotateDbsnp = false;
+    // should we annotate hapmap2?
+    protected boolean annotateHapmap2 = false;
+    // should we annotate hapmap3?
+    protected boolean annotateHapmap3 = false;
+
+    // the unified argument collection
+    protected UnifiedArgumentCollection UAC = null;
+
+    // the model used for calculating genotypes
+    protected ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
+
+    // the various loggers and writers
+    protected Logger logger = null;
+    protected GenotypeWriter genotypeWriter = null;
+    protected PrintStream verboseWriter = null;
+    protected PrintStream beagleWriter = null;
+
+    // samples in input
+    protected Set<String> samples = new HashSet<String>();
+
+
+    public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
+        initialize(toolkit, UAC, null, null, null, null);
+    }
+
+    public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, GenotypeWriter genotypeWriter, PrintStream verboseWriter, PrintStream beagleWriter) {
+        initialize(toolkit, UAC, logger, genotypeWriter, verboseWriter, beagleWriter);
+
+    }
+
+    private void initialize(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, GenotypeWriter genotypeWriter, PrintStream verboseWriter, PrintStream beagleWriter) {
+        this.UAC = UAC;
+        this.logger = logger;
+        this.genotypeWriter = genotypeWriter;
+        this.verboseWriter = verboseWriter;
+        this.beagleWriter = beagleWriter;
+
+        // deal with input errors
+        if ( UAC.POOLSIZE > 0 && UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
+            throw new IllegalArgumentException("Attempting to use a model other than POOLED with pooled data. Please set the model to POOLED.");
+        }
+        if ( UAC.POOLSIZE < 1 && UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ) {
+            throw new IllegalArgumentException("Attempting to use the POOLED model with a pool size less than 1. Please set the pool size to an appropriate value.");
+        }
+        if ( toolkit.getArguments().numberOfThreads > 1 && UAC.ASSUME_SINGLE_SAMPLE != null ) {
+            // the ASSUME_SINGLE_SAMPLE argument can't be handled (at least for now) while we are multi-threaded because the IO system doesn't know how to get the sample name
+            throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
+        }
+
+        // get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names
+        if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
+            // if we're supposed to assume a single sample, do so
+            if ( UAC.ASSUME_SINGLE_SAMPLE != null )
+                this.samples.add(UAC.ASSUME_SINGLE_SAMPLE);
+            else
+                this.samples = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader());
+        }
+
+        // in pooled mode we need to check that the format is acceptable
+        if ( UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED && genotypeWriter != null ) {
+            // only multi-sample calls use Variations
+            if ( !genotypeWriter.supportsMultiSample() )
+                throw new IllegalArgumentException("The POOLED model is not compatible with the specified format; try using VCF instead");
+
+            // when using VCF with multiple threads, we need to turn down the validation stringency so that writing temporary files will work
+            if ( toolkit.getArguments().numberOfThreads > 1 && genotypeWriter instanceof VCFGenotypeWriter )
+                ((VCFGenotypeWriter)genotypeWriter).setValidationStringency(VCFGenotypeWriterAdapter.VALIDATION_STRINGENCY.SILENT);
+        }
+
+        // check to see whether a dbsnp rod was included
+        List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
+        for ( ReferenceOrderedDataSource source : dataSources ) {
+            ReferenceOrderedData rod = source.getReferenceOrderedData();
+            if ( rod.getType().equals(rodDbSNP.class) ) {
+                this.annotateDbsnp = true;
+            }
+            if ( rod.getName().equals("hapmap2") ) {
+                this.annotateHapmap2 = true;
+            }
+            if ( rod.getName().equals("hapmap3") ) {
+                this.annotateHapmap3 = true;
+            }
+        }
+    }
+
+    /**
+     * Compute at a given locus.
+     *
+     * @param tracker    the meta data tracker
+     * @param refContext the reference base
+     * @param rawContext contextual information around the locus
+     * @return the VariantCallContext object
+     */
+    public VariantCallContext runGenotyper(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
+
+        // initialize the GenotypeCalculationModel for this thread if that hasn't been done yet
+        if ( gcm.get() == null ) {
+            GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
+            if ( genotypeWriter != null ) {
+                if ( genotypeWriter instanceof VCFGenotypeWriter )
+                    format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
+                else if ( genotypeWriter instanceof GLFGenotypeWriter)
+                    format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
+                else if ( genotypeWriter instanceof GeliGenotypeWriter)
+                    format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
+                else
+                    throw new StingException("Unsupported genotype format: " + genotypeWriter.getClass().getName());
+            }
+            gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter, beagleWriter));
+        }
+
+        char ref = Character.toUpperCase(refContext.getBase());
+        if ( !BaseUtils.isRegularBase(ref) )
+            return null;
+
+        // don't try to call if we couldn't read in all reads at this locus (since it wasn't properly downsampled)
+        if ( rawContext.hasExceededMaxPileup() )
+            return null;
+
+        ReadBackedPileup rawPileup = rawContext.getBasePileup();
+
+        // filter the context based on min base and mapping qualities
+        ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
+
+        // filter the context based on mapping quality and mismatch rate
+        pileup = filterPileup(pileup, refContext);
+
+        // don't call when there is no coverage
+        if ( pileup.size() == 0 )
+            return null;
+
+        // are there too many deletions in the pileup?
+        if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
+             (double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION )
+            return null;
+
+        // stratify the AlignmentContext and cut by sample
+        // Note that for testing purposes, we may want to throw multi-samples at pooled mode
+        Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
+        if ( stratifiedContexts == null )
+            return null;
+
+        DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_REFERENCE_ERROR);
+        VariantCallContext call = gcm.get().callLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
+
+        // annotate the call, if possible
+        if ( call != null && call.variation != null && call.variation instanceof ArbitraryFieldsBacked ) {
+            // first off, we want to use the *unfiltered* context for the annotations
+            stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup());
+
+            Map<String, String> annotations;
+            if ( UAC.ALL_ANNOTATIONS )
+                annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3);
+            else
+                annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3);
+            ((ArbitraryFieldsBacked)call.variation).setFields(annotations);
+        }
+
+        return call;
+    }
+
+    // filter based on maximum mismatches and bad mates
+    private ReadBackedPileup filterPileup(ReadBackedPileup pileup, ReferenceContext refContext) {
+
+        ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
+        for ( PileupElement p : pileup ) {
+            if  ( (UAC.USE_BADLY_MATED_READS || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) &&
+                  AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
+                filteredPileup.add(p);
+        }
+        return new ReadBackedPileup(pileup.getLocation(), filteredPileup);
+    }
+
+    private static boolean isValidDeletionFraction(double d) {
+        return ( d >= 0.0 && d <= 1.0 );
+    }
+}
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/BaseTransitionTableCalculatorJavaWalker.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/BaseTransitionTableCalculatorJavaWalker.java
@ -49,7 +49,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
    @Argument(fullName="forcePreviousReadBasesToMatchRef", doc="Forces previous read bases to match the reference", required = false)
    boolean readBasesMustMatchRef = false;

-    private UGCalculationArguments ug;
+    private UnifiedGenotyperEngine ug;
    // private ReferenceContextWindow refWindow;
    // private Set<BaseTransitionTable> conditionalTables;
    private List<Boolean> usePreviousBases;
@ -62,7 +62,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
        UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
        uac.baseModel = BaseMismatchModel.THREE_STATE;
        uac.ALL_BASES = true;
-        ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
+        ug = new UnifiedGenotyperEngine(getToolkit(), uac);
        // refWindow = new ReferenceContextWindow(nPreviousBases);
        usePreviousBases = new ArrayList<Boolean>();
        previousBaseLoci = new ArrayList<GenomeLoc>();
@ -359,7 +359,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
    public boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
        if ( !BaseUtils.isRegularBase(ref.getBase()) )
            return false;
-        VariantCallContext calls = UnifiedGenotyper.runGenotyper(tracker,ref,context,ug);
+        VariantCallContext calls = ug.runGenotyper(tracker,ref,context);
        if ( calls == null || calls.genotypes == null)
            return false;
        return  ( calls.genotypes.size() > 0 && !calls.genotypes.get(0).isVariant(ref.getBase()) && calls.genotypes.get(0).getNegLog10PError() > confidentRefThreshold );
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/LocusMismatchWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/LocusMismatchWalker.java
@ -12,7 +12,6 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.genotype.*;

-import java.util.*;

 /**
 * Walker to calculate the number of mismatches, their base counts, and their quality sums at confidence ref sites" 
@ -39,13 +38,13 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
    @Argument(fullName="skip", doc = "Only display every skip eligable sites.  Defaults to all sites", required = false)
    int skip = 1;

-    private UGCalculationArguments ug;
+    private UnifiedGenotyperEngine ug;

    public void initialize() {
        UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
        uac.baseModel = BaseMismatchModel.THREE_STATE;
        uac.ALL_BASES = true;
-        ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
+        ug = new UnifiedGenotyperEngine(getToolkit(), uac);

        // print the header
        out.printf("loc ref genotype genotypeQ depth nMM qSumMM A C G T%n");
@ -54,7 +53,7 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
    public String map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
        String result = null;

-        ReadBackedPileup pileup = context.getPileup();
+        ReadBackedPileup pileup = context.getBasePileup();
        if ( locusIsUsable(tracker, ref, pileup, context) ) {
            Genotype g = getGenotype(tracker, ref, context);
            if ( g != null && g.isPointGenotype() )
@ -163,7 +162,7 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
    }

    private Genotype getGenotype( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
-        VariantCallContext calls = UnifiedGenotyper.runGenotyper(tracker,ref,context, ug);
+        VariantCallContext calls = ug.runGenotyper(tracker,ref,context);
        if ( calls == null || calls.variation == null || calls.genotypes == null )
            return null;
        else {
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/contamination/FindContaminatingReadGroupsWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/contamination/FindContaminatingReadGroupsWalker.java
@ -5,10 +5,6 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.*;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.utils.genotype.Genotype;
-import org.broadinstitute.sting.utils.genotype.VariationCall;
-import org.broadinstitute.sting.utils.genotype.GenotypeCall;
-import org.broadinstitute.sting.utils.Pair;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.cmdLine.Argument;
@ -16,8 +12,6 @@ import org.broadinstitute.sting.playground.utils.NamedTable;
 import net.sf.samtools.SAMRecord;
 import net.sf.samtools.SAMReadGroupRecord;

-import java.util.List;
-
 import cern.jet.stat.Probability;

 /**
@ -34,13 +28,13 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
    @Argument(fullName="limit", shortName="lim", doc="The pValue limit for which a read group will be deemed to be a contaminant", required=false)
    private Double LIMIT = 1e-9;

-    private UGCalculationArguments ug;
+    private UnifiedGenotyperEngine ug;
    private NamedTable altTable;

    public void initialize() {
        UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
        uac.CONFIDENCE_THRESHOLD = 50;
-        ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
+        ug = new UnifiedGenotyperEngine(getToolkit(), uac);

        altTable = new NamedTable();
    }
@ -58,7 +52,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
        int altCount = 0;
        int totalCount = 0;

-        ReadBackedPileup pileup = context.getPileup();
+        ReadBackedPileup pileup = context.getBasePileup();
        int refIndex = BaseUtils.simpleBaseToBaseIndex(ref.getBase());

        for (byte base : pileup.getBases() ) {
@ -73,7 +67,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
        double altBalance = ((double) altCount)/((double) totalCount);

        if (altBalance > 0.70) {
-            VariantCallContext ugResult = UnifiedGenotyper.runGenotyper(tracker, ref, context, ug);
+            VariantCallContext ugResult = ug.runGenotyper(tracker, ref, context);

            if (ugResult != null && ugResult.genotypes != null && ugResult.genotypes.size() > 0) {
                return ugResult.genotypes.get(0).isHet();
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/secondaryBases/SecondaryBaseTransitionTableWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/secondaryBases/SecondaryBaseTransitionTableWalker.java
@ -26,14 +26,14 @@ import java.util.HashMap;
 public class SecondaryBaseTransitionTableWalker extends LocusWalker<Integer, Integer> {

    HashMap<String,Long> counts = new HashMap<String,Long>();
-    private UGCalculationArguments ug;
+    private UnifiedGenotyperEngine ug;
    private NamedTable altTable;

    public void initialize() {
        UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
        uac.CONFIDENCE_THRESHOLD = 50;
        uac.ALL_BASES = true;
-        ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
+        ug = new UnifiedGenotyperEngine(getToolkit(), uac);

        altTable = new NamedTable();
    }
@ -46,7 +46,7 @@ public class SecondaryBaseTransitionTableWalker extends LocusWalker<Integer, Int
        char nextBase = Character.toUpperCase(contextBases[contextBases.length - 1]);

        if (contextBases.length == 3 && refBase != 'N' && pileup.getBases() != null && pileup.getSecondaryBases() != null) {
-            VariantCallContext ugResult = UnifiedGenotyper.runGenotyper(tracker,ref,context,ug);
+            VariantCallContext ugResult = ug.runGenotyper(tracker,ref,context);
            if (ugResult != null && ugResult.variation != null) {
                Genotype res = ugResult.genotypes.get(0);
                String call = res.getBases();