Merge branch 'master' of ssh://chartl@ni.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

2012-01-26 12:38:24 -05:00 · 2012-01-26 12:38:24 -05:00 · 9d4b84f6bd
parent 8f7d9bff0a 9a63a9ae3c
commit 9d4b84f6bd
17 changed files with 450 additions and 351 deletions
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@ -92,7 +92,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                // Call the walkers isActive function for this locus and add them to the list to be integrated later
                if( initialIntervals.overlaps(location) ) {
-                    final boolean isActive = walker.isActive( tracker, refContext, locus );
+                    final boolean isActive = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus ) : walker.presetActiveRegions.overlaps(location) );
                    isActiveList.add( new ActiveRegion(location, isActive, engine.getGenomeLocParser(), activeRegionExtension ) );
                }
@ -109,7 +109,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                if( !locusView.hasNext() ) {
                    // Call the walkers isActive function for this locus and add them to the list to be integrated later
                    if( initialIntervals.overlaps(location) ) {
-                        final boolean isActive = walker.isActive( tracker, refContext, locus );
+                        final boolean isActive = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus ) : walker.presetActiveRegions.overlaps(location) );
                        isActiveList.add( new ActiveRegion(location, isActive, engine.getGenomeLocParser(), activeRegionExtension ) );
                    }
@ -128,7 +128,16 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
            // add these blocks of work to the work queue
            final ArrayList<ActiveRegion> activeRegions = integrateActiveList( isActiveList );
            logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." );
-            workQueue.addAll( activeRegions );
+            if( walker.activeRegionOutStream == null ) { 
                workQueue.addAll( activeRegions ); 
            } else { // Just want to output the active regions to a file, not actually process them
                for( final ActiveRegion activeRegion : activeRegions ) {
                    if( activeRegion.isActive ) {
                        walker.activeRegionOutStream.println( activeRegion.getLocation() );
                    }
                }
            }
            // Since we've sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
            if( !workQueue.isEmpty() ) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
@ -1,6 +1,11 @@
 package org.broadinstitute.sting.gatk.walkers;
 import net.sf.picard.reference.IndexedFastaSequenceFile;
 import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.IntervalBinding;
 import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
@ -14,8 +19,10 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
 import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
 import org.broadinstitute.sting.utils.interval.IntervalSetRule;
 import org.broadinstitute.sting.utils.interval.IntervalUtils;
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.List;
@ -32,6 +39,31 @@ import java.util.List;
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class})
 public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
    @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
    public PrintStream activeRegionOutStream = null;
    @Input(fullName="activeRegionIn", shortName="AR", doc="Use this interval list file as the active regions to process", required = false)
    protected List<IntervalBinding<Feature>> activeRegionBindings = null;
    public GenomeLocSortedSet presetActiveRegions = null;
    @Override
    public void initialize() {
        if( activeRegionBindings == null ) { return; }
        List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>(0);
        for ( IntervalBinding intervalBinding : activeRegionBindings ) {
            List<GenomeLoc> intervals = intervalBinding.getIntervals(this.getToolkit());
            if ( intervals.isEmpty() ) {
                logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed.");
            }
            allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, IntervalSetRule.UNION);
        }
        presetActiveRegions = IntervalUtils.sortAndMergeIntervals(this.getToolkit().getGenomeLocParser(), allIntervals, IntervalMergingRule.ALL);
    }
    // Do we actually want to operate on the context?
    public boolean filter(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
        return true;    // We are keeping all the reads
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.samples.SampleDB;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
 import org.broadinstitute.sting.utils.MendelianViolation;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
@ -23,7 +24,7 @@ import java.util.*;
 * Time: 12:24 PM
 * To change this template use File | Settings | File Templates.
 */
-public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
+public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
    private MendelianViolation mendelianViolation = null;
    private String motherId;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
@ -32,7 +32,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.samples.SampleDB;
 import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
 import org.broadinstitute.sting.utils.BaseUtils;
@ -84,7 +83,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
    @ArgumentCollection
    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
    public RodBinding<VariantContext> getVariantRodBinding() { return variantCollection.variants; }
    /**
     * The INFO field will be annotated with information on the most biologically-significant effect
@ -163,6 +161,13 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
    @Argument(fullName="list", shortName="ls", doc="List the available annotations and exit")
    protected Boolean LIST = false;
    /**
     * By default, the dbSNP ID is added only when the ID field in the variant VCF is empty.
     */
    @Argument(fullName="alwaysAppendDbsnpId", shortName="alwaysAppendDbsnpId", doc="In conjunction with the dbSNP binding, append the dbSNP ID even when the variant VCF already has the ID field populated")
    protected Boolean ALWAYS_APPEND_DBSNP_ID = false;
    public boolean alwaysAppendDbsnpId() { return ALWAYS_APPEND_DBSNP_ID; }
    @Hidden
    @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
    protected boolean indelsOnly = false;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
@ -195,11 +195,20 @@ public class VariantAnnotatorEngine {
    private VariantContext annotateDBs(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc, Map<String, Object> infoAnnotations) {
        for ( Map.Entry<RodBinding<VariantContext>, String> dbSet : dbAnnotations.entrySet() ) {
            if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) {
-                String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType());
+                final String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType());
                // put the DB key into the INFO field
                infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null);
-                // annotate dbsnp id if available and not already there
+                
-                if ( rsID != null && vc.emptyID() )
+                // add the ID if appropriate
-                    vc = new VariantContextBuilder(vc).id(rsID).make();
+                if ( rsID != null ) {
                    if ( vc.emptyID() ) {
                        vc = new VariantContextBuilder(vc).id(rsID).make();
                    } else if ( walker.alwaysAppendDbsnpId() && vc.getID().indexOf(rsID) == -1 ) {
                        final String newRsID = vc.getID() + VCFConstants.ID_FIELD_SEPARATOR + rsID;
                        vc = new VariantContextBuilder(vc).id(newRsID).make();
                    }
                }
            } else {
                boolean overlapsComp = false;
                for ( VariantContext comp : tracker.getValues(dbSet.getKey(), ref.getLocus()) ) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java
@ -8,9 +8,9 @@ import java.util.List;
 public interface AnnotatorCompatibleWalker {
    // getter methods for various used bindings
    public abstract RodBinding<VariantContext> getVariantRodBinding();
    public abstract RodBinding<VariantContext> getSnpEffRodBinding();
    public abstract RodBinding<VariantContext> getDbsnpRodBinding();
    public abstract List<RodBinding<VariantContext>> getCompRodBindings();
    public abstract List<RodBinding<VariantContext>> getResourceRodBindings();
    public abstract boolean alwaysAppendDbsnpId();
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
@ -39,7 +39,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
    private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
    protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
        super(UAC, N, logger, verboseWriter);
    }
@ -166,7 +165,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        final int numChr = 2*numSamples;
        // queue of AC conformations to process
-        final Queue<ExactACset> ACqueue = new LinkedList<ExactACset>();
+        final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>();
        // mapping of ExactACset indexes to the objects
        final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>(numChr+1);
@ -177,11 +176,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        ACqueue.add(zeroSet);
        indexesToACset.put(zeroSet.ACcounts, zeroSet);
-	// optimization: create the temporary storage for computing L(j,k) just once
+	    // optimization: create the temporary storage for computing L(j,k) just once
-	final int maxPossibleDependencies = numAlternateAlleles + (numAlternateAlleles * (numAlternateAlleles + 1) / 2) + 1;
+	    final int maxPossibleDependencies = numAlternateAlleles + (numAlternateAlleles * (numAlternateAlleles + 1) / 2) + 1;
-	final double[][] tempLog10ConformationLikelihoods = new double[numSamples+1][maxPossibleDependencies];
+	    final double[][] tempLog10ConformationLikelihoods = new double[numSamples+1][maxPossibleDependencies];
-	for ( int i = 0; i < maxPossibleDependencies; i++ )
+	    for ( int i = 0; i < maxPossibleDependencies; i++ )
-	    tempLog10ConformationLikelihoods[0][i] = Double.NEGATIVE_INFINITY;
+	        tempLog10ConformationLikelihoods[0][i] = Double.NEGATIVE_INFINITY;
        // keep processing while we have AC conformations that need to be calculated
        double maxLog10L = Double.NEGATIVE_INFINITY;
@ -195,16 +194,26 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        }
    }
    private static final class DependentSet {
        public final int[] ACcounts;
        public final int PLindex;
        public DependentSet(final int[] ACcounts, final int PLindex) {
            this.ACcounts = ACcounts;
            this.PLindex = PLindex;
        }
    }
    private static double calculateAlleleCountConformation(final ExactACset set,
                                                           final ArrayList<double[]> genotypeLikelihoods,
                                                           final double maxLog10L,
                                                           final int numChr,
                                                           final boolean preserveData,
-                                                           final Queue<ExactACset> ACqueue,
+                                                           final LinkedList<ExactACset> ACqueue,
                                                           final HashMap<ExactACcounts, ExactACset> indexesToACset,
                                                           final double[][] log10AlleleFrequencyPriors,
                                                           final AlleleFrequencyCalculationResult result,
-							   final double[][] tempLog10ConformationLikelihoods) {
+                                                           final double[][] tempLog10ConformationLikelihoods) {
        //if ( DEBUG )
        //    System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
@ -215,7 +224,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        // clean up memory
        if ( !preserveData ) {
            for ( ExactACcounts index : set.dependentACsetsToDelete ) {
-                indexesToACset.put(index, null);
+                indexesToACset.remove(index);
                //if ( DEBUG )
                //    System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts);
            }
@ -230,7 +239,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
            // no reason to keep this data around because nothing depends on it
            if ( !preserveData )
-                indexesToACset.put(set.ACcounts, null);
+                indexesToACset.remove(set.ACcounts);
            return log10LofK;
        }
@ -240,7 +249,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
            return log10LofK;
        ExactACset lastSet = null; // keep track of the last set placed in the queue so that we can tell it to clean us up when done processing
        final int numAltAlleles = set.ACcounts.getCounts().length;
        // genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods.
@ -251,30 +259,40 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        for ( int allele = 0; allele < numAltAlleles; allele++ ) {
            final int[] ACcountsClone = set.ACcounts.getCounts().clone();
            ACcountsClone[allele]++;
-            lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex, ACqueue, indexesToACset);
+            updateACset(ACcountsClone, numChr, set, ++PLindex, ACqueue, indexesToACset);
        }
        // add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
        if ( ACwiggle > 1 ) {
            final ArrayList<DependentSet> differentAlleles = new ArrayList<DependentSet>(numAltAlleles * numAltAlleles);
            final ArrayList<DependentSet> sameAlleles = new ArrayList<DependentSet>(numAltAlleles);
            for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) {
                for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) {
                    final int[] ACcountsClone = set.ACcounts.getCounts().clone();
                    ACcountsClone[allele_i]++;
                    ACcountsClone[allele_j]++;
-                    lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex , ACqueue, indexesToACset);
+
                    if ( allele_i == allele_j )
                        sameAlleles.add(new DependentSet(ACcountsClone, ++PLindex));
                    else
                        differentAlleles.add(new DependentSet(ACcountsClone, ++PLindex));
                }
            }
            // IMPORTANT: we must first add the cases where the 2 new alleles are different so that the queue maintains its ordering
            for ( DependentSet dependent : differentAlleles )
                updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset);
            for ( DependentSet dependent : sameAlleles )
                updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset);
        }
-        // if the last dependent set was not at the back of the queue (i.e. not just added), then we need to iterate
+        // determine which is the last dependent set in the queue (not necessarily the last one added above) so we can know when it is safe to clean up this column
-        // over all the dependent sets to find the last one in the queue (otherwise it will be cleaned up too early)
+        if ( !preserveData ) {
-        if ( !preserveData && lastSet == null ) {
+            final ExactACset lastSet = determineLastDependentSetInQueue(set.ACcounts, ACqueue);
-            //if ( DEBUG )
+            if ( lastSet != null )
-            //    System.out.printf(" *** iterating over dependent sets for set=%s%n", set.ACcounts);
+                lastSet.dependentACsetsToDelete.add(set.ACcounts);
            lastSet = determineLastDependentSetInQueue(set.ACcounts, ACqueue);
        }
        if ( lastSet != null )
            lastSet.dependentACsetsToDelete.add(set.ACcounts);
        return log10LofK;
    }
@ -282,34 +300,36 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
    // adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and
    // also adds it as a dependency to the given callingSetIndex.
    // returns the ExactACset if that set was not already in the queue and null otherwise.
-    private static ExactACset updateACset(final int[] ACcounts,
+    private static void updateACset(final int[] ACcounts,
-                                          final int numChr,
+                                    final int numChr,
-                                          final ExactACset callingSet,
+                                    final ExactACset callingSet,
-                                          final int PLsetIndex,
+                                    final int PLsetIndex,
-                                          final Queue<ExactACset> ACqueue,
+                                    final Queue<ExactACset> ACqueue,
-                                          final HashMap<ExactACcounts, ExactACset> indexesToACset) {
+                                    final HashMap<ExactACcounts, ExactACset> indexesToACset) {
        final ExactACcounts index = new ExactACcounts(ACcounts);
        boolean wasInQueue = true;
        if ( !indexesToACset.containsKey(index) ) {
            ExactACset set = new ExactACset(numChr/2 +1, index);
            indexesToACset.put(index, set);
            ACqueue.add(set);
            wasInQueue = false;
        }
        // add the given dependency to the set
        //if ( DEBUG )
        //    System.out.println(" *** adding dependency from " + index + " to " + callingSet.ACcounts);
        final ExactACset set = indexesToACset.get(index);
        set.ACsetIndexToPLIndex.put(callingSet.ACcounts, PLsetIndex);
        return wasInQueue ? null : set;
    }
-    private static ExactACset determineLastDependentSetInQueue(final ExactACcounts callingSetIndex, final Queue<ExactACset> ACqueue) {
+    private static ExactACset determineLastDependentSetInQueue(final ExactACcounts callingSetIndex, final LinkedList<ExactACset> ACqueue) {
-        ExactACset set = null;
+        Iterator<ExactACset> reverseIterator = ACqueue.descendingIterator();
-        for ( ExactACset queued : ACqueue ) {
+        while ( reverseIterator.hasNext() ) {
-            if ( queued.dependentACsetsToDelete.contains(callingSetIndex) )
+            final ExactACset queued = reverseIterator.next();
-                set = queued;
+            if ( queued.ACsetIndexToPLIndex.containsKey(callingSetIndex) )
                return queued;
        }
-        return set;
+
        // shouldn't get here
        throw new ReviewedStingException("Error: no sets in the queue currently hold " + callingSetIndex + " as a dependent!");
    }
    private static void computeLofK(final ExactACset set,
@ -317,7 +337,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                                    final HashMap<ExactACcounts, ExactACset> indexesToACset,
                                    final double[][] log10AlleleFrequencyPriors,
                                    final AlleleFrequencyCalculationResult result,
-				    final double[][] tempLog10ConformationLikelihoods) {
+                                    final double[][] tempLog10ConformationLikelihoods) {
        set.log10Likelihoods[0] = 0.0; // the zero case
        final int totalK = set.getACsum();
@ -329,40 +349,40 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        }
        // k > 0 for at least one k
        else {
-	    // deal with the non-AA possible conformations
+	        // deal with the non-AA possible conformations
-	    int conformationIndex = 1;
+            int conformationIndex = 1;
-	    for ( Map.Entry<ExactACcounts, Integer> mapping : set.ACsetIndexToPLIndex.entrySet() ) {
+            for ( Map.Entry<ExactACcounts, Integer> mapping : set.ACsetIndexToPLIndex.entrySet() ) {
-		//if ( DEBUG )
+		        //if ( DEBUG )
-		//    System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey());
+		        //    System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey());
-		ExactACset dependent = indexesToACset.get(mapping.getKey());
+                ExactACset dependent = indexesToACset.get(mapping.getKey());
-		for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
+                for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
-		    if ( totalK <= 2*j ) { // skip impossible conformations
+                    if ( totalK <= 2*j ) { // skip impossible conformations
-			final double[] gl = genotypeLikelihoods.get(j);
+                        final double[] gl = genotypeLikelihoods.get(j);
-			tempLog10ConformationLikelihoods[j][conformationIndex] =
+                        tempLog10ConformationLikelihoods[j][conformationIndex] =
-			    determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + dependent.log10Likelihoods[j-1] + gl[mapping.getValue()];
+                                determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + dependent.log10Likelihoods[j-1] + gl[mapping.getValue()];
                    } else {
-			tempLog10ConformationLikelihoods[j][conformationIndex] = Double.NEGATIVE_INFINITY;
+                        tempLog10ConformationLikelihoods[j][conformationIndex] = Double.NEGATIVE_INFINITY;
-		    }
+                    }
                }
-		conformationIndex++;
+                conformationIndex++;
-	    }
+            }
-	    // finally, deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value
+	        // finally, deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value
            final int numPaths = set.ACsetIndexToPLIndex.size() + 1;
-	    for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
+            for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
-		if ( totalK < 2*j-1 ) {
+                if ( totalK < 2*j-1 ) {
-		    final double[] gl = genotypeLikelihoods.get(j);
+                    final double[] gl = genotypeLikelihoods.get(j);
-		    tempLog10ConformationLikelihoods[j][0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
+                    tempLog10ConformationLikelihoods[j][0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
-		} else {
+                } else {
-		    tempLog10ConformationLikelihoods[j][0] = Double.NEGATIVE_INFINITY;
+                    tempLog10ConformationLikelihoods[j][0] = Double.NEGATIVE_INFINITY;
-		}
+                }
-		final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
+                final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
                final double log10Max = MathUtils.approximateLog10SumLog10(tempLog10ConformationLikelihoods[j], numPaths);
                set.log10Likelihoods[j] = log10Max - logDenominator;
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java
@ -1,114 +0,0 @@
 /*
 * Copyright (c) 2010 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.gatk.walkers.genotyper;
 import org.broadinstitute.sting.commandline.ArgumentCollection;
 import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.gatk.DownsampleType;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.util.HashSet;
 import java.util.Set;
 /**
 * Uses the UG engine to determine per-sample genotype likelihoods and emits them as a VCF (using PLs).
 * Absolutely not supported or recommended for public use.
 * Run this as you would the UnifiedGenotyper, except that you must additionally pass in a VCF bound to
 * the name 'allele' so we know which alternate allele to use at each site.
 */
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
@Reference(window=@Window(start=-200,stop=200))
@By(DataSource.READS)
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
 public class UGCalcLikelihoods extends LocusWalker<VariantCallContext, Integer> implements TreeReducible<Integer> {
    @ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
    // control the output
    @Output(doc="File to which variants should be written",required=true)
    protected VCFWriter writer = null;
    // the calculation arguments
    private UnifiedGenotyperEngine UG_engine = null;
    // enable deletions in the pileup
    public boolean includeReadsWithDeletionAtLoci() { return true; }
    // enable extended events for indels
    public boolean generateExtendedEvents() { return UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP; }
    public void initialize() {
        // get all of the unique sample names
        Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
        UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples);
        // initialize the header
        Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
        headerInfo.add(new VCFInfoHeaderLine(VCFConstants.DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?"));
        headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
        headerInfo.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
        headerInfo.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
        writer.writeHeader(new VCFHeader(headerInfo, samples)) ;
    }
    public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
        VariantContext call = UG_engine.calculateLikelihoods(tracker, refContext, rawContext);
        return call == null ? null : new VariantCallContext(call, true);
    }
    public Integer reduceInit() { return 0; }
    public Integer treeReduce(Integer lhs, Integer rhs) {
        return lhs + rhs;
    }
    public Integer reduce(VariantCallContext value, Integer sum) {
        if ( value == null )
            return sum;
        try {
            writer.add(value);
        } catch (IllegalArgumentException e) {
            throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
        }
        return sum + 1;
    }
    public void onTraversalDone(Integer sum) {
        logger.info(String.format("Visited bases: %d", sum));
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
@ -1,152 +0,0 @@
 /*
 * Copyright (c) 2010, The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.gatk.walkers.genotyper;
 import org.broadinstitute.sting.commandline.ArgumentCollection;
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.variantcontext.*;
 import java.util.*;
 /**
 * Uses the UG engine to call variants based off of VCFs annotated with GLs (or PLs).
 * Absolutely not supported or recommended for public use.
 * Run this as you would the UnifiedGenotyper, except that instead of '-I reads' it expects any number
 * of GL/PL-annotated VCFs bound to a name starting with 'variant'.
 */
 public class UGCallVariants extends RodWalker<VariantCallContext, Integer> {
    @ArgumentCollection
    private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
    @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
    public List<RodBinding<VariantContext>> variants;
    // control the output
    @Output(doc="File to which variants should be written",required=true)
    protected VCFWriter writer = null;
    // the calculation arguments
    private UnifiedGenotyperEngine UG_engine = null;
    // variant track names
    private Set<String> trackNames = new HashSet<String>();
    public void initialize() {
        for ( RodBinding<VariantContext> rb : variants )
            trackNames.add(rb.getName());
        Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), trackNames);
        UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples);
        Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
        headerInfo.add(new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"));
        headerInfo.add(new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, -1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"));
        headerInfo.add(new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes"));
        headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
        headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
        headerInfo.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
        headerInfo.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
        if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING )
            headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
        // initialize the header
        writer.writeHeader(new VCFHeader(headerInfo, samples));
    }
    public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        if ( tracker == null )
            return null;
        List<VariantContext> VCs = tracker.getValues(variants, context.getLocation());
        VariantContext mergedVC = mergeVCsWithGLs(VCs);
        if ( mergedVC == null )
            return null;
        return UG_engine.calculateGenotypes(tracker, ref, context, mergedVC);
    }
    public Integer reduceInit() { return 0; }
    public Integer reduce(VariantCallContext value, Integer sum) {
        if ( value == null )
            return sum;
        try {
            VariantContextBuilder builder = new VariantContextBuilder(value);
            VariantContextUtils.calculateChromosomeCounts(builder, true);
            writer.add(builder.make());
        } catch (IllegalArgumentException e) {
            throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
        }
        return sum + 1;
    }
    public void onTraversalDone(Integer result) {
        logger.info(String.format("Visited sites: %d", result));
    }
    private static VariantContext mergeVCsWithGLs(List<VariantContext> VCs) {
        // we can't use the VCUtils classes because our VCs can all be no-calls
        if ( VCs.size() == 0 )
            return null;
        VariantContext variantVC = null;
        GenotypesContext genotypes = GenotypesContext.create();
        for ( VariantContext vc : VCs ) {
            if ( variantVC == null && vc.isVariant() )
                variantVC = vc;
            genotypes.addAll(getGenotypesWithGLs(vc.getGenotypes()));
        }
        if ( variantVC == null ) {
            VariantContext vc = VCs.get(0);
            throw new UserException("There is no ALT allele in any of the VCF records passed in at " + vc.getChr() + ":" + vc.getStart());
        }
        return new VariantContextBuilder(variantVC).source("VCwithGLs").genotypes(genotypes).make();
    }
    private static GenotypesContext getGenotypesWithGLs(GenotypesContext genotypes) {
        GenotypesContext genotypesWithGLs = GenotypesContext.create(genotypes.size());
        for ( final Genotype g : genotypes ) {
            if ( g.hasLikelihoods() && g.getLikelihoods().getAsVector() != null )
                genotypesWithGLs.add(g);
        }
        return genotypesWithGLs;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@ -126,10 +126,10 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
    @ArgumentCollection
    protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
    public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
    public RodBinding<VariantContext> getVariantRodBinding() { return null; }
    public RodBinding<VariantContext> getSnpEffRodBinding() { return null; }
    public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
    public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
    public boolean alwaysAppendDbsnpId() { return false; }
    /**
     * A raw, unfiltered, highly specific callset in VCF format.
@ -205,6 +205,12 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
     *
     **/
    public void initialize() {
        // warn the user for misusing EMIT_ALL_SITES
        if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES &&
                UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY &&
                UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP )
            logger.warn("WARNING: note that the EMIT_ALL_SITES option is intended only for point mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by no means produce a comprehensive set of indels in DISCOVERY mode");
        // get all of the unique sample names
        Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@ -54,8 +54,9 @@ public class UnifiedGenotyperEngine {
        EMIT_VARIANTS_ONLY,
        /** produces calls at variant sites and confident reference sites */
        EMIT_ALL_CONFIDENT_SITES,
-        /** produces calls at any callable site regardless of confidence; this argument is intended for point
+        /** produces calls at any callable site regardless of confidence; this argument is intended only for point
-         * mutations (SNPs) only and while some indel calls may be produced they are by no means comprehensive */
+         * mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by
         * no means produce a comprehensive set of indels in DISCOVERY mode */
        EMIT_ALL_SITES
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java
@ -0,0 +1,233 @@
 /*
 * Copyright (c) 2011, The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
 import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
 import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
 import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.variantcontext.*;
 import java.util.*;
@Analysis(description = "Evaluation summary for multi-allelic variants")
 public class MultiallelicSummary extends VariantEvaluator { // implements StandardEval {
    final protected static Logger logger = Logger.getLogger(MultiallelicSummary.class);
    public enum Type {
        SNP, INDEL
    }
    // basic counts on various rates found
    @DataPoint(description = "Number of processed loci")
    public long nProcessedLoci = 0;
    @DataPoint(description = "Number of SNPs")
    public int nSNPs = 0;
    @DataPoint(description = "Number of multi-allelic SNPs")
    public int nMultiSNPs = 0;
    @DataPoint(description = "% processed sites that are multi-allelic SNPs", format = "%.5f")
    public double processedMultiSnpRatio = 0;
    @DataPoint(description = "% SNP sites that are multi-allelic", format = "%.3f")
    public double variantMultiSnpRatio = 0;
    @DataPoint(description = "Number of Indels")
    public int nIndels = 0;
    @DataPoint(description = "Number of multi-allelic Indels")
    public int nMultiIndels = 0;
    @DataPoint(description = "% processed sites that are multi-allelic Indels", format = "%.5f")
    public double processedMultiIndelRatio = 0;
    @DataPoint(description = "% Indel sites that are multi-allelic", format = "%.3f")
    public double variantMultiIndelRatio = 0;
    @DataPoint(description = "Number of Transitions")
    public int nTi = 0;
    @DataPoint(description = "Number of Transversions")
    public int nTv = 0;
    @DataPoint(description = "Overall TiTv ratio", format = "%.2f")
    public double TiTvRatio = 0;
    @DataPoint(description = "Multi-allelic SNPs partially known")
    public int knownSNPsPartial = 0;
    @DataPoint(description = "Multi-allelic SNPs completely known")
    public int knownSNPsComplete = 0;
    @DataPoint(description = "Multi-allelic SNP Novelty Rate")
    public String SNPNoveltyRate = "NA";
    @DataPoint(description = "Multi-allelic Indels partially known")
    public int knownIndelsPartial = 0;
    @DataPoint(description = "Multi-allelic Indels completely known")
    public int knownIndelsComplete = 0;
    @DataPoint(description = "Multi-allelic Indel Novelty Rate")
    public String indelNoveltyRate = "NA";
    @DataPoint(description="Histogram of allele frequencies")
    AFHistogram AFhistogram = new AFHistogram();
    /*
     * AF histogram table object
     */
    static class AFHistogram implements TableType {
        private Object[] colKeys, rowKeys = {"pairwise_AF"};
        private int[] AFhistogram;
        private static final double AFincrement = 0.01;
        private static final int numBins = (int)(1.00 / AFincrement);
        public AFHistogram() {
            colKeys = initColKeys();
            AFhistogram = new int[colKeys.length];
        }
        public Object[] getColumnKeys() {
            return colKeys;
        }
        public Object[] getRowKeys() {
            return rowKeys;
        }
        public Object getCell(int row, int col) {
            return AFhistogram[col];
        }
        private static Object[] initColKeys() {
            ArrayList<String> keyList = new ArrayList<String>(numBins + 1);
            for ( double a = 0.00; a <= 1.01; a += AFincrement ) {
                keyList.add(String.format("%.2f", a));
            }
            return keyList.toArray();
        }
        public String getName() { return "AFHistTable"; }
        public void update(VariantContext vc) {
            final Object obj = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY, null);
            if ( obj == null || !(obj instanceof List) )
                return;
            List<String> list = (List<String>)obj;
            for ( String str : list ) {
                final double AF = Double.valueOf(str);
                final int bin = (int)(numBins * MathUtils.round(AF, 2));
                AFhistogram[bin]++;
            }
        }
    }
    public void initialize(VariantEvalWalker walker) {}
    @Override public boolean enabled() { return true; }
    public int getComparisonOrder() {
        return 2;
    }
    public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
    }
    public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        if ( eval == null || eval.isMonomorphicInSamples() )
            return null;
        // update counts
        switch ( eval.getType() ) {
            case SNP:
                nSNPs++;
                if ( !eval.isBiallelic() ) {
                    nMultiSNPs++;
                    calculatePairwiseTiTv(eval);
                    calculateSNPPairwiseNovelty(eval, comp);
                }
                break;
            case INDEL:
                nIndels++;
                if ( !eval.isBiallelic() ) {
                    nMultiIndels++;
                    calculateIndelPairwiseNovelty(eval, comp);
                }
                break;
            default:
                throw new UserException.BadInput("Unexpected variant context type: " + eval);
        }
        AFhistogram.update(eval);
        return null; // we don't capture any interesting sites
    }
    private void calculatePairwiseTiTv(VariantContext vc) {
        for ( Allele alt : vc.getAlternateAlleles() ) {
            if ( VariantContextUtils.isTransition(vc.getReference(), alt) )
                nTi++;
            else
                nTv++;
        }
    }
    private void calculateSNPPairwiseNovelty(VariantContext eval, VariantContext comp) {
        if ( comp == null )
            return;
        int knownAlleles = 0;
        for ( Allele alt : eval.getAlternateAlleles() ) {
            if ( comp.getAlternateAlleles().contains(alt) )
                knownAlleles++;
        }
        if ( knownAlleles == eval.getAlternateAlleles().size() )
            knownSNPsComplete++;
        else if ( knownAlleles > 0 )
            knownSNPsPartial++;
    }
    private void calculateIndelPairwiseNovelty(VariantContext eval, VariantContext comp) {
    }
    private final String noveltyRate(final int all, final int known) {
        final int novel = all - known;
        final double rate = (novel / (1.0 * all));
        return all == 0 ? "NA" : String.format("%.2f", rate);
    }
    public void finalizeEvaluation() {
        processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci;
        variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs;
        processedMultiIndelRatio = (double)nMultiIndels / (double)nProcessedLoci;
        variantMultiIndelRatio = (double)nMultiIndels / (double)nIndels;
        TiTvRatio = (double)nTi / (double)nTv;
        SNPNoveltyRate = noveltyRate(nMultiSNPs, knownSNPsPartial + knownSNPsComplete);
        indelNoveltyRate = noveltyRate(nMultiSNPs, knownIndelsPartial + knownIndelsComplete);
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
@ -120,6 +120,10 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
    @Argument(shortName="filteredRecordsMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required=false)
    public VariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED;
    @Hidden
    @Argument(shortName="multipleAllelesMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel)", required=false)
    public VariantContextUtils.MultipleAllelesMergeType multipleAllelesMergeType = VariantContextUtils.MultipleAllelesMergeType.BY_TYPE;
    /**
     * Used when taking the union of variants that contain genotypes.  A complete priority list MUST be provided.
     */
@ -236,13 +240,24 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
            return 0;
        List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
-        Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
+
-        // iterate over the types so that it's deterministic
+        if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.BY_TYPE) {
-        for ( VariantContext.Type type : VariantContext.Type.values() ) {
+            Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
-            if ( VCsByType.containsKey(type) )
+            // iterate over the types so that it's deterministic
-                mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
+            for (VariantContext.Type type : VariantContext.Type.values()) {
-                        priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
+                if (VCsByType.containsKey(type))
-                        SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
+                    mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
                            priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
                            SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
            }
        }
        else if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) {
            mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcs,
                    priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
                    SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
        }
        else {
            logger.warn("Ignoring all records at site " + ref.getLocus());
        }
        for ( VariantContext mergedVC : mergedVCs ) {
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java
@ -24,6 +24,7 @@
 package org.broadinstitute.sting.utils.codecs.vcf;
 import org.broad.tribble.TribbleException;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import java.util.Arrays;
@ -149,7 +150,11 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
            count = Integer.valueOf(numberStr);
        }
-        type = VCFHeaderLineType.valueOf(mapping.get("Type"));
+        try {
            type = VCFHeaderLineType.valueOf(mapping.get("Type"));
        } catch (Exception e) {
            throw new TribbleException(mapping.get("Type") + " is not a valid type in the VCF specification (note that types are case-sensitive)");
        }
        if (type == VCFHeaderLineType.Flag && !allowFlagValues())
            throw new IllegalArgumentException("Flag is an unsupported type for this kind of field");
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
@ -29,6 +29,7 @@ import org.apache.commons.jexl2.Expression;
 import org.apache.commons.jexl2.JexlEngine;
 import org.apache.log4j.Logger;
 import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
 import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
@ -471,6 +472,18 @@ public class VariantContextUtils {
        KEEP_UNCONDITIONAL
    }
    @Hidden
    public enum MultipleAllelesMergeType {
        /**
         * Combine only alleles of the same type (SNP, indel, etc.) into a single VCF record.
         */
        BY_TYPE,
        /**
         * Merge all allele types at the same start position into the same VCF record.
         */
        MIX_TYPES
    }
    /**
     * Merges VariantContexts into a single hybrid.  Takes genotypes for common samples in priority order, if provided.
     * If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
@ -1060,6 +1073,14 @@ public class VariantContextUtils {
        return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSVERSION;
    }
    public static boolean isTransition(Allele ref, Allele alt) {
        return BaseUtils.SNPSubstitutionType(ref.getBases()[0], alt.getBases()[0]) == BaseUtils.BaseSubstitutionType.TRANSITION;
    }
    public static boolean isTransversion(Allele ref, Allele alt) {
        return BaseUtils.SNPSubstitutionType(ref.getBases()[0], alt.getBases()[0]) == BaseUtils.BaseSubstitutionType.TRANSVERSION;
    }
    /**
     * create a genome location, given a variant context
     * @param genomeLocParser parser
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
@ -110,6 +110,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
        executeTest("getting DB tag with dbSNP", spec);
    }
    @Test
    public void testMultipleIdsWithDbsnp() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + validationDataLocation + "vcfexample3withIDs.vcf -L " + validationDataLocation + "vcfexample3withIDs.vcf", 1,
                Arrays.asList("cd7e3d43b8f5579c461b3e588a295fa8"));
        executeTest("adding multiple IDs with dbSNP", spec);
    }
    @Test
    public void testDBTagWithHapMap() {
        WalkerTestSpec spec = new WalkerTestSpec(
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -294,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
        WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
                baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
                        "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
-                Arrays.asList("877de5b0cc61dc54636062df6399b978"));
+                Arrays.asList("1d1956fd7b0f0d30935674b2f5019860"));
        executeTest("test MultiSample Phase1 indels with complicated records", spec4);
    }