Merge branch 'master' of ssh://chartl@ni.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

2012-01-26 12:38:24 -05:00 · 2012-01-26 12:38:24 -05:00 · 9d4b84f6bd
parent 8f7d9bff0a 9a63a9ae3c
commit 9d4b84f6bd
17 changed files with 450 additions and 351 deletions
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@ -92,7 +92,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio

                // Call the walkers isActive function for this locus and add them to the list to be integrated later
                if( initialIntervals.overlaps(location) ) {
-                    final boolean isActive = walker.isActive( tracker, refContext, locus );
+                    final boolean isActive = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus ) : walker.presetActiveRegions.overlaps(location) );
                    isActiveList.add( new ActiveRegion(location, isActive, engine.getGenomeLocParser(), activeRegionExtension ) );
                }
                
@ -109,7 +109,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                if( !locusView.hasNext() ) {
                    // Call the walkers isActive function for this locus and add them to the list to be integrated later
                    if( initialIntervals.overlaps(location) ) {
-                        final boolean isActive = walker.isActive( tracker, refContext, locus );
+                        final boolean isActive = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus ) : walker.presetActiveRegions.overlaps(location) );
                        isActiveList.add( new ActiveRegion(location, isActive, engine.getGenomeLocParser(), activeRegionExtension ) );
                    }

@ -128,7 +128,16 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
            // add these blocks of work to the work queue
            final ArrayList<ActiveRegion> activeRegions = integrateActiveList( isActiveList );
            logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." );
-            workQueue.addAll( activeRegions );
+            if( walker.activeRegionOutStream == null ) { 
+                workQueue.addAll( activeRegions ); 
+            } else { // Just want to output the active regions to a file, not actually process them
+                for( final ActiveRegion activeRegion : activeRegions ) {
+                    if( activeRegion.isActive ) {
+                        walker.activeRegionOutStream.println( activeRegion.getLocation() );
+                    }
+                }
+            }
+            

            // Since we've sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
            if( !workQueue.isEmpty() ) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
@ -1,6 +1,11 @@
 package org.broadinstitute.sting.gatk.walkers;

 import net.sf.picard.reference.IndexedFastaSequenceFile;
+import org.broad.tribble.Feature;
+import org.broadinstitute.sting.commandline.Input;
+import org.broadinstitute.sting.commandline.IntervalBinding;
+import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
@ -14,8 +19,10 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
 import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
+import org.broadinstitute.sting.utils.interval.IntervalSetRule;
 import org.broadinstitute.sting.utils.interval.IntervalUtils;

+import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.List;

@ -32,6 +39,31 @@ import java.util.List;
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class})
 public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {

+    @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
+    public PrintStream activeRegionOutStream = null;
+
+    @Input(fullName="activeRegionIn", shortName="AR", doc="Use this interval list file as the active regions to process", required = false)
+    protected List<IntervalBinding<Feature>> activeRegionBindings = null;
+
+    public GenomeLocSortedSet presetActiveRegions = null;
+
+    @Override
+    public void initialize() {
+        if( activeRegionBindings == null ) { return; }
+        List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>(0);
+        for ( IntervalBinding intervalBinding : activeRegionBindings ) {
+            List<GenomeLoc> intervals = intervalBinding.getIntervals(this.getToolkit());
+
+            if ( intervals.isEmpty() ) {
+                logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed.");
+            }
+
+            allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, IntervalSetRule.UNION);
+        }
+
+        presetActiveRegions = IntervalUtils.sortAndMergeIntervals(this.getToolkit().getGenomeLocParser(), allIntervals, IntervalMergingRule.ALL);
+    }
+
    // Do we actually want to operate on the context?
    public boolean filter(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
        return true;    // We are keeping all the reads
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.samples.SampleDB;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
 import org.broadinstitute.sting.utils.MendelianViolation;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
@ -23,7 +24,7 @@ import java.util.*;
 * Time: 12:24 PM
 * To change this template use File | Settings | File Templates.
 */
-public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
+public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {

    private MendelianViolation mendelianViolation = null;
    private String motherId;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
@ -32,7 +32,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.samples.SampleDB;
 import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
 import org.broadinstitute.sting.utils.BaseUtils;
@ -84,7 +83,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann

    @ArgumentCollection
    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
-    public RodBinding<VariantContext> getVariantRodBinding() { return variantCollection.variants; }

    /**
     * The INFO field will be annotated with information on the most biologically-significant effect
@ -163,6 +161,13 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
    @Argument(fullName="list", shortName="ls", doc="List the available annotations and exit")
    protected Boolean LIST = false;

+    /**
+     * By default, the dbSNP ID is added only when the ID field in the variant VCF is empty.
+     */
+    @Argument(fullName="alwaysAppendDbsnpId", shortName="alwaysAppendDbsnpId", doc="In conjunction with the dbSNP binding, append the dbSNP ID even when the variant VCF already has the ID field populated")
+    protected Boolean ALWAYS_APPEND_DBSNP_ID = false;
+    public boolean alwaysAppendDbsnpId() { return ALWAYS_APPEND_DBSNP_ID; }
+
    @Hidden
    @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
    protected boolean indelsOnly = false;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
@ -195,11 +195,20 @@ public class VariantAnnotatorEngine {
    private VariantContext annotateDBs(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc, Map<String, Object> infoAnnotations) {
        for ( Map.Entry<RodBinding<VariantContext>, String> dbSet : dbAnnotations.entrySet() ) {
            if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) {
-                String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType());
+                final String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType());
+                
+                // put the DB key into the INFO field
                infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null);
-                // annotate dbsnp id if available and not already there
-                if ( rsID != null && vc.emptyID() )
-                    vc = new VariantContextBuilder(vc).id(rsID).make();
+                
+                // add the ID if appropriate
+                if ( rsID != null ) {
+                    if ( vc.emptyID() ) {
+                        vc = new VariantContextBuilder(vc).id(rsID).make();
+                    } else if ( walker.alwaysAppendDbsnpId() && vc.getID().indexOf(rsID) == -1 ) {
+                        final String newRsID = vc.getID() + VCFConstants.ID_FIELD_SEPARATOR + rsID;
+                        vc = new VariantContextBuilder(vc).id(newRsID).make();
+                    }
+                }
            } else {
                boolean overlapsComp = false;
                for ( VariantContext comp : tracker.getValues(dbSet.getKey(), ref.getLocus()) ) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java
@ -8,9 +8,9 @@ import java.util.List;
 public interface AnnotatorCompatibleWalker {

    // getter methods for various used bindings
-    public abstract RodBinding<VariantContext> getVariantRodBinding();
    public abstract RodBinding<VariantContext> getSnpEffRodBinding();
    public abstract RodBinding<VariantContext> getDbsnpRodBinding();
    public abstract List<RodBinding<VariantContext>> getCompRodBindings();
    public abstract List<RodBinding<VariantContext>> getResourceRodBindings();
+    public abstract boolean alwaysAppendDbsnpId();
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
@ -39,7 +39,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {

    private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6

-
    protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
        super(UAC, N, logger, verboseWriter);
    }
@ -166,7 +165,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        final int numChr = 2*numSamples;

        // queue of AC conformations to process
-        final Queue<ExactACset> ACqueue = new LinkedList<ExactACset>();
+        final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>();

        // mapping of ExactACset indexes to the objects
        final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>(numChr+1);
@ -177,11 +176,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        ACqueue.add(zeroSet);
        indexesToACset.put(zeroSet.ACcounts, zeroSet);

-	// optimization: create the temporary storage for computing L(j,k) just once
-	final int maxPossibleDependencies = numAlternateAlleles + (numAlternateAlleles * (numAlternateAlleles + 1) / 2) + 1;
-	final double[][] tempLog10ConformationLikelihoods = new double[numSamples+1][maxPossibleDependencies];
-	for ( int i = 0; i < maxPossibleDependencies; i++ )
-	    tempLog10ConformationLikelihoods[0][i] = Double.NEGATIVE_INFINITY;
+	    // optimization: create the temporary storage for computing L(j,k) just once
+	    final int maxPossibleDependencies = numAlternateAlleles + (numAlternateAlleles * (numAlternateAlleles + 1) / 2) + 1;
+	    final double[][] tempLog10ConformationLikelihoods = new double[numSamples+1][maxPossibleDependencies];
+	    for ( int i = 0; i < maxPossibleDependencies; i++ )
+	        tempLog10ConformationLikelihoods[0][i] = Double.NEGATIVE_INFINITY;

        // keep processing while we have AC conformations that need to be calculated
        double maxLog10L = Double.NEGATIVE_INFINITY;
@ -195,16 +194,26 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        }
    }

+    private static final class DependentSet {
+        public final int[] ACcounts;
+        public final int PLindex;
+        
+        public DependentSet(final int[] ACcounts, final int PLindex) {
+            this.ACcounts = ACcounts;
+            this.PLindex = PLindex;
+        }
+    }
+
    private static double calculateAlleleCountConformation(final ExactACset set,
                                                           final ArrayList<double[]> genotypeLikelihoods,
                                                           final double maxLog10L,
                                                           final int numChr,
                                                           final boolean preserveData,
-                                                           final Queue<ExactACset> ACqueue,
+                                                           final LinkedList<ExactACset> ACqueue,
                                                           final HashMap<ExactACcounts, ExactACset> indexesToACset,
                                                           final double[][] log10AlleleFrequencyPriors,
                                                           final AlleleFrequencyCalculationResult result,
-							   final double[][] tempLog10ConformationLikelihoods) {
+                                                           final double[][] tempLog10ConformationLikelihoods) {

        //if ( DEBUG )
        //    System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
@ -215,7 +224,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        // clean up memory
        if ( !preserveData ) {
            for ( ExactACcounts index : set.dependentACsetsToDelete ) {
-                indexesToACset.put(index, null);
+                indexesToACset.remove(index);
                //if ( DEBUG )
                //    System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts);
            }
@ -230,7 +239,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {

            // no reason to keep this data around because nothing depends on it
            if ( !preserveData )
-                indexesToACset.put(set.ACcounts, null);
+                indexesToACset.remove(set.ACcounts);

            return log10LofK;
        }
@ -240,7 +249,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
            return log10LofK;

-        ExactACset lastSet = null; // keep track of the last set placed in the queue so that we can tell it to clean us up when done processing
        final int numAltAlleles = set.ACcounts.getCounts().length;

        // genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods.
@ -251,30 +259,40 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        for ( int allele = 0; allele < numAltAlleles; allele++ ) {
            final int[] ACcountsClone = set.ACcounts.getCounts().clone();
            ACcountsClone[allele]++;
-            lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex, ACqueue, indexesToACset);
+            updateACset(ACcountsClone, numChr, set, ++PLindex, ACqueue, indexesToACset);
        }

        // add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
        if ( ACwiggle > 1 ) {
+            final ArrayList<DependentSet> differentAlleles = new ArrayList<DependentSet>(numAltAlleles * numAltAlleles);
+            final ArrayList<DependentSet> sameAlleles = new ArrayList<DependentSet>(numAltAlleles);
+
            for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) {
                for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) {
                    final int[] ACcountsClone = set.ACcounts.getCounts().clone();
                    ACcountsClone[allele_i]++;
                    ACcountsClone[allele_j]++;
-                    lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex , ACqueue, indexesToACset);
+
+                    if ( allele_i == allele_j )
+                        sameAlleles.add(new DependentSet(ACcountsClone, ++PLindex));
+                    else
+                        differentAlleles.add(new DependentSet(ACcountsClone, ++PLindex));
                }
            }
+
+            // IMPORTANT: we must first add the cases where the 2 new alleles are different so that the queue maintains its ordering
+            for ( DependentSet dependent : differentAlleles )
+                updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset);
+            for ( DependentSet dependent : sameAlleles )
+                updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset);
        }

-        // if the last dependent set was not at the back of the queue (i.e. not just added), then we need to iterate
-        // over all the dependent sets to find the last one in the queue (otherwise it will be cleaned up too early)
-        if ( !preserveData && lastSet == null ) {
-            //if ( DEBUG )
-            //    System.out.printf(" *** iterating over dependent sets for set=%s%n", set.ACcounts);
-            lastSet = determineLastDependentSetInQueue(set.ACcounts, ACqueue);
+        // determine which is the last dependent set in the queue (not necessarily the last one added above) so we can know when it is safe to clean up this column
+        if ( !preserveData ) {
+            final ExactACset lastSet = determineLastDependentSetInQueue(set.ACcounts, ACqueue);
+            if ( lastSet != null )
+                lastSet.dependentACsetsToDelete.add(set.ACcounts);
        }
-        if ( lastSet != null )
-            lastSet.dependentACsetsToDelete.add(set.ACcounts);

        return log10LofK;
    }
@ -282,34 +300,36 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
    // adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and
    // also adds it as a dependency to the given callingSetIndex.
    // returns the ExactACset if that set was not already in the queue and null otherwise.
-    private static ExactACset updateACset(final int[] ACcounts,
-                                          final int numChr,
-                                          final ExactACset callingSet,
-                                          final int PLsetIndex,
-                                          final Queue<ExactACset> ACqueue,
-                                          final HashMap<ExactACcounts, ExactACset> indexesToACset) {
+    private static void updateACset(final int[] ACcounts,
+                                    final int numChr,
+                                    final ExactACset callingSet,
+                                    final int PLsetIndex,
+                                    final Queue<ExactACset> ACqueue,
+                                    final HashMap<ExactACcounts, ExactACset> indexesToACset) {
        final ExactACcounts index = new ExactACcounts(ACcounts);
-        boolean wasInQueue = true;
        if ( !indexesToACset.containsKey(index) ) {
            ExactACset set = new ExactACset(numChr/2 +1, index);
            indexesToACset.put(index, set);
            ACqueue.add(set);
-            wasInQueue = false;
        }

        // add the given dependency to the set
+        //if ( DEBUG )
+        //    System.out.println(" *** adding dependency from " + index + " to " + callingSet.ACcounts);
        final ExactACset set = indexesToACset.get(index);
        set.ACsetIndexToPLIndex.put(callingSet.ACcounts, PLsetIndex);
-        return wasInQueue ? null : set;
    }

-    private static ExactACset determineLastDependentSetInQueue(final ExactACcounts callingSetIndex, final Queue<ExactACset> ACqueue) {
-        ExactACset set = null;
-        for ( ExactACset queued : ACqueue ) {
-            if ( queued.dependentACsetsToDelete.contains(callingSetIndex) )
-                set = queued;
+    private static ExactACset determineLastDependentSetInQueue(final ExactACcounts callingSetIndex, final LinkedList<ExactACset> ACqueue) {
+        Iterator<ExactACset> reverseIterator = ACqueue.descendingIterator();
+        while ( reverseIterator.hasNext() ) {
+            final ExactACset queued = reverseIterator.next();
+            if ( queued.ACsetIndexToPLIndex.containsKey(callingSetIndex) )
+                return queued;
        }
-        return set;
+
+        // shouldn't get here
+        throw new ReviewedStingException("Error: no sets in the queue currently hold " + callingSetIndex + " as a dependent!");
    }

    private static void computeLofK(final ExactACset set,
@ -317,7 +337,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                                    final HashMap<ExactACcounts, ExactACset> indexesToACset,
                                    final double[][] log10AlleleFrequencyPriors,
                                    final AlleleFrequencyCalculationResult result,
-				    final double[][] tempLog10ConformationLikelihoods) {
+                                    final double[][] tempLog10ConformationLikelihoods) {

        set.log10Likelihoods[0] = 0.0; // the zero case
        final int totalK = set.getACsum();
@ -329,40 +349,40 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        }
        // k > 0 for at least one k
        else {
-	    // deal with the non-AA possible conformations
-	    int conformationIndex = 1;
-	    for ( Map.Entry<ExactACcounts, Integer> mapping : set.ACsetIndexToPLIndex.entrySet() ) {
-		//if ( DEBUG )
-		//    System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey());
+	        // deal with the non-AA possible conformations
+            int conformationIndex = 1;
+            for ( Map.Entry<ExactACcounts, Integer> mapping : set.ACsetIndexToPLIndex.entrySet() ) {
+		        //if ( DEBUG )
+		        //    System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey());

-		ExactACset dependent = indexesToACset.get(mapping.getKey());
+                ExactACset dependent = indexesToACset.get(mapping.getKey());

-		for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
+                for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {

-		    if ( totalK <= 2*j ) { // skip impossible conformations
-			final double[] gl = genotypeLikelihoods.get(j);
-			tempLog10ConformationLikelihoods[j][conformationIndex] =
-			    determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + dependent.log10Likelihoods[j-1] + gl[mapping.getValue()];
+                    if ( totalK <= 2*j ) { // skip impossible conformations
+                        final double[] gl = genotypeLikelihoods.get(j);
+                        tempLog10ConformationLikelihoods[j][conformationIndex] =
+                                determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + dependent.log10Likelihoods[j-1] + gl[mapping.getValue()];
                    } else {
-			tempLog10ConformationLikelihoods[j][conformationIndex] = Double.NEGATIVE_INFINITY;
-		    }
+                        tempLog10ConformationLikelihoods[j][conformationIndex] = Double.NEGATIVE_INFINITY;
+                    }
                }

-		conformationIndex++;
-	    }
+                conformationIndex++;
+            }

-	    // finally, deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value
+	        // finally, deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value
            final int numPaths = set.ACsetIndexToPLIndex.size() + 1;
-	    for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
+            for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {

-		if ( totalK < 2*j-1 ) {
-		    final double[] gl = genotypeLikelihoods.get(j);
-		    tempLog10ConformationLikelihoods[j][0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
-		} else {
-		    tempLog10ConformationLikelihoods[j][0] = Double.NEGATIVE_INFINITY;
-		}
+                if ( totalK < 2*j-1 ) {
+                    final double[] gl = genotypeLikelihoods.get(j);
+                    tempLog10ConformationLikelihoods[j][0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
+                } else {
+                    tempLog10ConformationLikelihoods[j][0] = Double.NEGATIVE_INFINITY;
+                }

-		final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
+                final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
                final double log10Max = MathUtils.approximateLog10SumLog10(tempLog10ConformationLikelihoods[j], numPaths);
                set.log10Likelihoods[j] = log10Max - logDenominator;
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java
@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers.genotyper;
-
-import org.broadinstitute.sting.commandline.ArgumentCollection;
-import org.broadinstitute.sting.commandline.Output;
-import org.broadinstitute.sting.gatk.DownsampleType;
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.*;
-import org.broadinstitute.sting.utils.SampleUtils;
-import org.broadinstitute.sting.utils.baq.BAQ;
-import org.broadinstitute.sting.utils.codecs.vcf.*;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-
-import java.util.HashSet;
-import java.util.Set;
-
-
-/**
- * Uses the UG engine to determine per-sample genotype likelihoods and emits them as a VCF (using PLs).
- * Absolutely not supported or recommended for public use.
- * Run this as you would the UnifiedGenotyper, except that you must additionally pass in a VCF bound to
- * the name 'allele' so we know which alternate allele to use at each site.
- */
-@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
-@Reference(window=@Window(start=-200,stop=200))
-@By(DataSource.READS)
-@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
-public class UGCalcLikelihoods extends LocusWalker<VariantCallContext, Integer> implements TreeReducible<Integer> {
-
-    @ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
-
-    // control the output
-    @Output(doc="File to which variants should be written",required=true)
-    protected VCFWriter writer = null;
-
-    // the calculation arguments
-    private UnifiedGenotyperEngine UG_engine = null;
-
-    // enable deletions in the pileup
-    public boolean includeReadsWithDeletionAtLoci() { return true; }
-
-    // enable extended events for indels
-    public boolean generateExtendedEvents() { return UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP; }
-
-    public void initialize() {
-        // get all of the unique sample names
-        Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
-
-        UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples);
-
-        // initialize the header
-        Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
-        headerInfo.add(new VCFInfoHeaderLine(VCFConstants.DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?"));
-        headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
-        headerInfo.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
-        headerInfo.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
-
-        writer.writeHeader(new VCFHeader(headerInfo, samples)) ;
-    }
-
-    public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
-        VariantContext call = UG_engine.calculateLikelihoods(tracker, refContext, rawContext);
-        return call == null ? null : new VariantCallContext(call, true);
-    }
-
-    public Integer reduceInit() { return 0; }
-
-    public Integer treeReduce(Integer lhs, Integer rhs) {
-        return lhs + rhs;
-    }
-
-    public Integer reduce(VariantCallContext value, Integer sum) {
-        if ( value == null )
-            return sum;
-
-        try {
-            writer.add(value);
-        } catch (IllegalArgumentException e) {
-            throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
-        }
-
-        return sum + 1;
-    }
-
-    public void onTraversalDone(Integer sum) {
-        logger.info(String.format("Visited bases: %d", sum));
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2010, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers.genotyper;
-
-import org.broadinstitute.sting.commandline.ArgumentCollection;
-import org.broadinstitute.sting.commandline.Input;
-import org.broadinstitute.sting.commandline.Output;
-import org.broadinstitute.sting.commandline.RodBinding;
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.RodWalker;
-import org.broadinstitute.sting.utils.SampleUtils;
-import org.broadinstitute.sting.utils.codecs.vcf.*;
-import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.variantcontext.*;
-
-import java.util.*;
-
-/**
- * Uses the UG engine to call variants based off of VCFs annotated with GLs (or PLs).
- * Absolutely not supported or recommended for public use.
- * Run this as you would the UnifiedGenotyper, except that instead of '-I reads' it expects any number
- * of GL/PL-annotated VCFs bound to a name starting with 'variant'.
- */
-public class UGCallVariants extends RodWalker<VariantCallContext, Integer> {
-
-    @ArgumentCollection
-    private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
-
-    @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
-    public List<RodBinding<VariantContext>> variants;
-
-    // control the output
-    @Output(doc="File to which variants should be written",required=true)
-    protected VCFWriter writer = null;
-
-    // the calculation arguments
-    private UnifiedGenotyperEngine UG_engine = null;
-
-    // variant track names
-    private Set<String> trackNames = new HashSet<String>();
-
-    public void initialize() {
-
-        for ( RodBinding<VariantContext> rb : variants )
-            trackNames.add(rb.getName());
-        Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), trackNames);
-
-        UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples);
-
-        Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
-        headerInfo.add(new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"));
-        headerInfo.add(new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, -1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"));
-        headerInfo.add(new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes"));
-        headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
-        headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
-        headerInfo.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
-        headerInfo.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
-        if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING )
-            headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
-
-        // initialize the header
-        writer.writeHeader(new VCFHeader(headerInfo, samples));
-    }
-
-    public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-        if ( tracker == null )
-            return null;
-
-        List<VariantContext> VCs = tracker.getValues(variants, context.getLocation());
-
-        VariantContext mergedVC = mergeVCsWithGLs(VCs);
-        if ( mergedVC == null )
-            return null;
-
-        return UG_engine.calculateGenotypes(tracker, ref, context, mergedVC);
-    }
-
-    public Integer reduceInit() { return 0; }
-
-    public Integer reduce(VariantCallContext value, Integer sum) {
-        if ( value == null )
-            return sum;
-
-        try {
-            VariantContextBuilder builder = new VariantContextBuilder(value);
-            VariantContextUtils.calculateChromosomeCounts(builder, true);
-            writer.add(builder.make());
-        } catch (IllegalArgumentException e) {
-            throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
-        }
-
-        return sum + 1;
-    }
-
-    public void onTraversalDone(Integer result) {
-        logger.info(String.format("Visited sites: %d", result));
-    }
-
-    private static VariantContext mergeVCsWithGLs(List<VariantContext> VCs) {
-        // we can't use the VCUtils classes because our VCs can all be no-calls
-        if ( VCs.size() == 0 )
-            return null;
-
-        VariantContext variantVC = null;
-        GenotypesContext genotypes = GenotypesContext.create();
-        for ( VariantContext vc : VCs ) {
-            if ( variantVC == null && vc.isVariant() )
-                variantVC = vc;
-            genotypes.addAll(getGenotypesWithGLs(vc.getGenotypes()));
-        }
-
-        if ( variantVC == null ) {
-            VariantContext vc = VCs.get(0);
-            throw new UserException("There is no ALT allele in any of the VCF records passed in at " + vc.getChr() + ":" + vc.getStart());
-        }
-
-        return new VariantContextBuilder(variantVC).source("VCwithGLs").genotypes(genotypes).make();
-    }
-
-    private static GenotypesContext getGenotypesWithGLs(GenotypesContext genotypes) {
-        GenotypesContext genotypesWithGLs = GenotypesContext.create(genotypes.size());
-        for ( final Genotype g : genotypes ) {
-            if ( g.hasLikelihoods() && g.getLikelihoods().getAsVector() != null )
-                genotypesWithGLs.add(g);
-        }
-        return genotypesWithGLs;
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@ -126,10 +126,10 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
    @ArgumentCollection
    protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
    public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
-    public RodBinding<VariantContext> getVariantRodBinding() { return null; }
    public RodBinding<VariantContext> getSnpEffRodBinding() { return null; }
    public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
    public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
+    public boolean alwaysAppendDbsnpId() { return false; }

    /**
     * A raw, unfiltered, highly specific callset in VCF format.
@ -205,6 +205,12 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
     *
     **/
    public void initialize() {
+        // warn the user for misusing EMIT_ALL_SITES
+        if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES &&
+                UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY &&
+                UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP )
+            logger.warn("WARNING: note that the EMIT_ALL_SITES option is intended only for point mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by no means produce a comprehensive set of indels in DISCOVERY mode");
+        
        // get all of the unique sample names
        Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@ -54,8 +54,9 @@ public class UnifiedGenotyperEngine {
        EMIT_VARIANTS_ONLY,
        /** produces calls at variant sites and confident reference sites */
        EMIT_ALL_CONFIDENT_SITES,
-        /** produces calls at any callable site regardless of confidence; this argument is intended for point
-         * mutations (SNPs) only and while some indel calls may be produced they are by no means comprehensive */
+        /** produces calls at any callable site regardless of confidence; this argument is intended only for point
+         * mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by
+         * no means produce a comprehensive set of indels in DISCOVERY mode */
        EMIT_ALL_SITES
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java
@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
+
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
+import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
+import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
+import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.variantcontext.*;
+
+import java.util.*;
+
+@Analysis(description = "Evaluation summary for multi-allelic variants")
+public class MultiallelicSummary extends VariantEvaluator { // implements StandardEval {
+    final protected static Logger logger = Logger.getLogger(MultiallelicSummary.class);
+
+    public enum Type {
+        SNP, INDEL
+    }
+
+    // basic counts on various rates found
+    @DataPoint(description = "Number of processed loci")
+    public long nProcessedLoci = 0;
+
+    @DataPoint(description = "Number of SNPs")
+    public int nSNPs = 0;
+    @DataPoint(description = "Number of multi-allelic SNPs")
+    public int nMultiSNPs = 0;
+    @DataPoint(description = "% processed sites that are multi-allelic SNPs", format = "%.5f")
+    public double processedMultiSnpRatio = 0;
+    @DataPoint(description = "% SNP sites that are multi-allelic", format = "%.3f")
+    public double variantMultiSnpRatio = 0;
+
+    @DataPoint(description = "Number of Indels")
+    public int nIndels = 0;
+    @DataPoint(description = "Number of multi-allelic Indels")
+    public int nMultiIndels = 0;
+    @DataPoint(description = "% processed sites that are multi-allelic Indels", format = "%.5f")
+    public double processedMultiIndelRatio = 0;
+    @DataPoint(description = "% Indel sites that are multi-allelic", format = "%.3f")
+    public double variantMultiIndelRatio = 0;
+
+    @DataPoint(description = "Number of Transitions")
+    public int nTi = 0;
+    @DataPoint(description = "Number of Transversions")
+    public int nTv = 0;
+    @DataPoint(description = "Overall TiTv ratio", format = "%.2f")
+    public double TiTvRatio = 0;
+
+    @DataPoint(description = "Multi-allelic SNPs partially known")
+    public int knownSNPsPartial = 0;
+    @DataPoint(description = "Multi-allelic SNPs completely known")
+    public int knownSNPsComplete = 0;
+    @DataPoint(description = "Multi-allelic SNP Novelty Rate")
+    public String SNPNoveltyRate = "NA";
+
+    @DataPoint(description = "Multi-allelic Indels partially known")
+    public int knownIndelsPartial = 0;
+    @DataPoint(description = "Multi-allelic Indels completely known")
+    public int knownIndelsComplete = 0;
+    @DataPoint(description = "Multi-allelic Indel Novelty Rate")
+    public String indelNoveltyRate = "NA";
+
+    @DataPoint(description="Histogram of allele frequencies")
+    AFHistogram AFhistogram = new AFHistogram();
+
+    /*
+     * AF histogram table object
+     */
+    static class AFHistogram implements TableType {
+        private Object[] colKeys, rowKeys = {"pairwise_AF"};
+        private int[] AFhistogram;
+
+        private static final double AFincrement = 0.01;
+        private static final int numBins = (int)(1.00 / AFincrement);
+
+        public AFHistogram() {
+            colKeys = initColKeys();
+            AFhistogram = new int[colKeys.length];
+        }
+
+        public Object[] getColumnKeys() {
+            return colKeys;
+        }
+
+        public Object[] getRowKeys() {
+            return rowKeys;
+        }
+
+        public Object getCell(int row, int col) {
+            return AFhistogram[col];
+        }
+
+        private static Object[] initColKeys() {
+            ArrayList<String> keyList = new ArrayList<String>(numBins + 1);
+            for ( double a = 0.00; a <= 1.01; a += AFincrement ) {
+                keyList.add(String.format("%.2f", a));
+            }
+            return keyList.toArray();
+        }
+
+        public String getName() { return "AFHistTable"; }
+
+        public void update(VariantContext vc) {
+            final Object obj = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY, null);
+            if ( obj == null || !(obj instanceof List) )
+                return;
+
+            List<String> list = (List<String>)obj;
+            for ( String str : list ) {
+                final double AF = Double.valueOf(str);
+                final int bin = (int)(numBins * MathUtils.round(AF, 2));
+                AFhistogram[bin]++;
+            }
+        }
+    }
+
+    public void initialize(VariantEvalWalker walker) {}
+
+    @Override public boolean enabled() { return true; }
+
+    public int getComparisonOrder() {
+        return 2;
+    }
+
+    public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
+    }
+
+    public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        if ( eval == null || eval.isMonomorphicInSamples() )
+            return null;
+
+        // update counts
+        switch ( eval.getType() ) {
+            case SNP:
+                nSNPs++;
+                if ( !eval.isBiallelic() ) {
+                    nMultiSNPs++;
+                    calculatePairwiseTiTv(eval);
+                    calculateSNPPairwiseNovelty(eval, comp);
+                }
+                break;
+            case INDEL:
+                nIndels++;
+                if ( !eval.isBiallelic() ) {
+                    nMultiIndels++;
+                    calculateIndelPairwiseNovelty(eval, comp);
+                }
+                break;
+            default:
+                throw new UserException.BadInput("Unexpected variant context type: " + eval);
+        }
+        AFhistogram.update(eval);
+        
+        return null; // we don't capture any interesting sites
+    }
+
+    private void calculatePairwiseTiTv(VariantContext vc) {
+        for ( Allele alt : vc.getAlternateAlleles() ) {
+            if ( VariantContextUtils.isTransition(vc.getReference(), alt) )
+                nTi++;
+            else
+                nTv++;
+        }
+    }
+
+    private void calculateSNPPairwiseNovelty(VariantContext eval, VariantContext comp) {
+        if ( comp == null )
+            return;
+
+        int knownAlleles = 0;
+        for ( Allele alt : eval.getAlternateAlleles() ) {
+            if ( comp.getAlternateAlleles().contains(alt) )
+                knownAlleles++;
+        }
+
+        if ( knownAlleles == eval.getAlternateAlleles().size() )
+            knownSNPsComplete++;
+        else if ( knownAlleles > 0 )
+            knownSNPsPartial++;
+    }
+
+    private void calculateIndelPairwiseNovelty(VariantContext eval, VariantContext comp) {
+    }
+
+    private final String noveltyRate(final int all, final int known) {
+        final int novel = all - known;
+        final double rate = (novel / (1.0 * all));
+        return all == 0 ? "NA" : String.format("%.2f", rate);
+    }
+
+    public void finalizeEvaluation() {
+        processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci;
+        variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs;
+        processedMultiIndelRatio = (double)nMultiIndels / (double)nProcessedLoci;
+        variantMultiIndelRatio = (double)nMultiIndels / (double)nIndels;
+
+        TiTvRatio = (double)nTi / (double)nTv;
+
+        SNPNoveltyRate = noveltyRate(nMultiSNPs, knownSNPsPartial + knownSNPsComplete);
+        indelNoveltyRate = noveltyRate(nMultiSNPs, knownIndelsPartial + knownIndelsComplete);
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
@ -120,6 +120,10 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
    @Argument(shortName="filteredRecordsMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required=false)
    public VariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED;

+    @Hidden
+    @Argument(shortName="multipleAllelesMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel)", required=false)
+    public VariantContextUtils.MultipleAllelesMergeType multipleAllelesMergeType = VariantContextUtils.MultipleAllelesMergeType.BY_TYPE;
+
    /**
     * Used when taking the union of variants that contain genotypes.  A complete priority list MUST be provided.
     */
@ -236,13 +240,24 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
            return 0;

        List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
-        Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
-        // iterate over the types so that it's deterministic
-        for ( VariantContext.Type type : VariantContext.Type.values() ) {
-            if ( VCsByType.containsKey(type) )
-                mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
-                        priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
-                        SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
+
+        if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.BY_TYPE) {
+            Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
+            // iterate over the types so that it's deterministic
+            for (VariantContext.Type type : VariantContext.Type.values()) {
+                if (VCsByType.containsKey(type))
+                    mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
+                            priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
+                            SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
+            }
+        }
+        else if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) {
+            mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcs,
+                    priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
+                    SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
+        }
+        else {
+            logger.warn("Ignoring all records at site " + ref.getLocus());
        }

        for ( VariantContext mergedVC : mergedVCs ) {
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java
@ -24,6 +24,7 @@

 package org.broadinstitute.sting.utils.codecs.vcf;

+import org.broad.tribble.TribbleException;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

 import java.util.Arrays;
@ -149,7 +150,11 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
            count = Integer.valueOf(numberStr);

        }
-        type = VCFHeaderLineType.valueOf(mapping.get("Type"));
+        try {
+            type = VCFHeaderLineType.valueOf(mapping.get("Type"));
+        } catch (Exception e) {
+            throw new TribbleException(mapping.get("Type") + " is not a valid type in the VCF specification (note that types are case-sensitive)");
+        }
        if (type == VCFHeaderLineType.Flag && !allowFlagValues())
            throw new IllegalArgumentException("Flag is an unsupported type for this kind of field");

--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
@ -29,6 +29,7 @@ import org.apache.commons.jexl2.Expression;
 import org.apache.commons.jexl2.JexlEngine;
 import org.apache.log4j.Logger;
 import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
+import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
@ -471,6 +472,18 @@ public class VariantContextUtils {
        KEEP_UNCONDITIONAL
    }

+    @Hidden
+    public enum MultipleAllelesMergeType {
+        /**
+         * Combine only alleles of the same type (SNP, indel, etc.) into a single VCF record.
+         */
+        BY_TYPE,
+        /**
+         * Merge all allele types at the same start position into the same VCF record.
+         */
+        MIX_TYPES
+    }
+
    /**
     * Merges VariantContexts into a single hybrid.  Takes genotypes for common samples in priority order, if provided.
     * If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
@ -1060,6 +1073,14 @@ public class VariantContextUtils {
        return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSVERSION;
    }

+    public static boolean isTransition(Allele ref, Allele alt) {
+        return BaseUtils.SNPSubstitutionType(ref.getBases()[0], alt.getBases()[0]) == BaseUtils.BaseSubstitutionType.TRANSITION;
+    }
+
+    public static boolean isTransversion(Allele ref, Allele alt) {
+        return BaseUtils.SNPSubstitutionType(ref.getBases()[0], alt.getBases()[0]) == BaseUtils.BaseSubstitutionType.TRANSVERSION;
+    }
+
    /**
     * create a genome location, given a variant context
     * @param genomeLocParser parser
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
@ -110,6 +110,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
        executeTest("getting DB tag with dbSNP", spec);
    }

+    @Test
+    public void testMultipleIdsWithDbsnp() {
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + validationDataLocation + "vcfexample3withIDs.vcf -L " + validationDataLocation + "vcfexample3withIDs.vcf", 1,
+                Arrays.asList("cd7e3d43b8f5579c461b3e588a295fa8"));
+        executeTest("adding multiple IDs with dbSNP", spec);
+    }
+
    @Test
    public void testDBTagWithHapMap() {
        WalkerTestSpec spec = new WalkerTestSpec(
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -294,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
        WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
                baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
                        "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
-                Arrays.asList("877de5b0cc61dc54636062df6399b978"));
+                Arrays.asList("1d1956fd7b0f0d30935674b2f5019860"));
        executeTest("test MultiSample Phase1 indels with complicated records", spec4);
    }