Sending latest bug fixes to Reduce Reads to the main repository

2011-09-21 17:43:11 -04:00 · 2011-09-21 17:43:11 -04:00 · 96768c8a18
parent 70335b2b0a 1b47dcb1b5
commit 96768c8a18
14 changed files with 166 additions and 91 deletions
--- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
+++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
@ -114,7 +114,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
    private String RECAL_FILE = "output.recal_data.csv";
    @Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
    private String OUTPUT_DIR = "analyzeCovariates/";
-    @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
+    @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
    private String PATH_TO_RSCRIPT = "Rscript";
    @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
    private String PATH_TO_RESOURCES = "public/R/";
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
@ -162,6 +162,12 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
    @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
    protected boolean indelsOnly = false;

+    @Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation")
+    public String familyStr = null;
+
+    @Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
+    public double minGenotypeQualityP = 0.0;
+
    private VariantAnnotatorEngine engine;

    private Collection<VariantContext> indelBufferContext;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
@ -155,7 +155,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
    private double[] TS_TRANCHES = new double[] {100.0, 99.9, 99.0, 90.0};
    @Argument(fullName="ignore_filter", shortName="ignoreFilter", doc="If specified the variant recalibrator will use variants even if the specified filter name is marked in the input VCF file", required=false)
    private String[] IGNORE_INPUT_FILTERS = null;
-    @Argument(fullName="path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required=false)
+    @Argument(fullName="path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required=false)
    private String PATH_TO_RSCRIPT = "Rscript";
    @Argument(fullName="rscript_file", shortName="rscriptFile", doc="The output rscript file generated by the VQSR to aid in visualization of the input data and learned model", required=false)
    private String RSCRIPT_FILE = null;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
@ -452,7 +452,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
                        throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e);   }
            }
            else
-                mvSet.add(new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD));
+                mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
        }
        else if (!FAMILY_STRUCTURE.isEmpty()) {
            mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
--- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java
@ -1,11 +1,13 @@
 package org.broadinstitute.sting.utils;

+import org.apache.commons.lang.ArrayUtils;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.datasources.sample.Sample;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;

+import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
 import java.util.regex.Matcher;
@ -32,6 +34,9 @@ public class MendelianViolation {

    private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");

+    static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
+    static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
+

    public String getSampleMom() {
        return sampleMom;
@ -168,4 +173,41 @@ public class MendelianViolation {
        return true;
    }

+    /**
+     * @return the likelihood ratio for a mendelian violation
+     */
+    public double violationLikelihoodRatio(VariantContext vc) {
+        double[] logLikAssignments = new double[27];
+        // the matrix to set up is
+        // MOM   DAD    CHILD
+        //                    |-  AA
+        //   AA     AA    |    AB
+        //                    |-   BB
+        //                    |- AA
+        //  AA     AB     |   AB
+        //                    |- BB
+        // etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
+        double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
+        double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
+        double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
+        int offset = 0;
+        for ( int oMom = 0; oMom < 3; oMom++ ) {
+            for ( int oDad = 0; oDad < 3; oDad++ ) {
+                for ( int oChild = 0; oChild < 3; oChild ++ ) {
+                    logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild];
+                }
+            }
+        }
+        double[] mvLiks = new double[12];
+        double[] nonMVLiks = new double[15];
+        for ( int i = 0; i < 12; i ++ ) {
+            mvLiks[i] = logLikAssignments[mvOffsets[i]];
+        }
+
+        for ( int i = 0; i < 15; i++) {
+            nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]];
+        }
+
+        return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks);
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java
+++ b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java
@ -53,7 +53,7 @@ public class RScriptExecutor {

    public static class RScriptArgumentCollection {
        @Advanced
-        @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
+        @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
        public String PATH_TO_RSCRIPT = "Rscript";

        @Advanced
--- a/public/java/src/org/broadinstitute/sting/utils/Utils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java
@ -240,22 +240,34 @@ public class Utils {
        return ret.toString();
    }

-    //public static String join(String separator, Collection<String> strings) {
-    //    return join( separator, strings.toArray(new String[0]) );
-    //}
-
-    public static <T> String join(String separator, Collection<T> objects) {
-        if(objects.isEmpty()) {
+    /**
+     * Returns a string of the form elt1.toString() [sep elt2.toString() ... sep elt.toString()] for a collection of
+     * elti objects (note there's no actual space between sep and the elti elements).  Returns
+     * "" if collection is empty.  If collection contains just elt, then returns elt.toString()
+     *
+     * @param separator the string to use to separate objects
+     * @param objects a collection of objects.  the element order is defined by the iterator over objects
+     * @param <T> the type of the objects
+     * @return a non-null string
+     */
+    public static <T> String join(final String separator, final Collection<T> objects) {
+        if (objects.isEmpty()) { // fast path for empty collection
            return "";
-        }
-        Iterator<T> iter = objects.iterator();
-        final StringBuilder ret = new StringBuilder(iter.next().toString());
-        while(iter.hasNext()) {
-            ret.append(separator);
-            ret.append(iter.next().toString());
-        }
+        } else {
+            final Iterator<T> iter = objects.iterator();
+            final T first = iter.next();

-        return ret.toString();
+            if ( ! iter.hasNext() ) // fast path for singleton collections
+                return first.toString();
+            else { // full path for 2+ collection that actually need a join
+                final StringBuilder ret = new StringBuilder(first.toString());
+                while(iter.hasNext()) {
+                    ret.append(separator);
+                    ret.append(iter.next().toString());
+                }
+                return ret.toString();
+            }
+        }
    }

    public static String dupString(char c, int nCopies) {
--- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
+++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
@ -5,6 +5,7 @@ import net.sf.picard.reference.ReferenceSequence;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMUtils;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -131,19 +132,18 @@ public class BAQ {
    private final static double EM = 0.33333333333;
    private final static double EI = 0.25;

-    private double[][][] EPSILONS = new double[256][256][64];
+    private double[][][] EPSILONS = new double[256][256][SAMUtils.MAX_PHRED_SCORE+1];

    private void initializeCachedData() {
        for ( int i = 0; i < 256; i++ )
            for ( int j = 0; j < 256; j++ )
-                for ( int q = 0; q < 64; q++ ) {
-                    double qual = qual2prob[q < minBaseQual ? minBaseQual : q];
+                for ( int q = 0; q <= SAMUtils.MAX_PHRED_SCORE; q++ ) {
                    EPSILONS[i][j][q] = 1.0;
                }

        for ( char b1 : "ACGTacgt".toCharArray() ) {
            for ( char b2 : "ACGTacgt".toCharArray() ) {
-                for ( int q = 0; q < 64; q++ ) {
+                for ( int q = 0; q <= SAMUtils.MAX_PHRED_SCORE; q++ ) {
                    double qual = qual2prob[q < minBaseQual ? minBaseQual : q];
                    double e = Character.toLowerCase(b1) == Character.toLowerCase(b2) ? 1 - qual : qual * EM;
                    EPSILONS[(byte)b1][(byte)b2][q] = e;
@ -152,7 +152,7 @@ public class BAQ {
        }
    }

-    private double calcEpsilon( byte ref, byte read, byte qualB ) {
+    protected double calcEpsilon( byte ref, byte read, byte qualB ) {
        return EPSILONS[ref][read][qualB];
    }

--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
@ -6,6 +6,7 @@ import org.broad.tribble.FeatureCodec;
 import org.broad.tribble.NameAwareCodec;
 import org.broad.tribble.TribbleException;
 import org.broad.tribble.readers.LineReader;
+import org.broad.tribble.util.BlockCompressedInputStream;
 import org.broad.tribble.util.ParsingUtils;
 import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -215,7 +216,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
            int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true);

            // if we have don't have a header, or we have a header with no genotyping data check that we have eight columns.  Otherwise check that we have nine (normal colummns + genotyping data)
-            if (( (header == null || (header != null && !header.hasGenotypingData())) && nParts != NUM_STANDARD_FIELDS) ||
+            if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
                 (header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
                throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
                        " tokens, and saw " + nParts + " )", lineNo);
@ -345,6 +346,9 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
            generateException("The VCF specification requires a valid info field");

        if ( !infoField.equals(VCFConstants.EMPTY_INFO_FIELD) ) {
+            if ( infoField.indexOf("\t") != -1 || infoField.indexOf(" ") != -1 )
+                generateException("The VCF specification does not allow for whitespace in the INFO field");
+
            int infoValueSplitSize = ParsingUtils.split(infoField, infoValueArray, VCFConstants.INFO_FIELD_SEPARATOR_CHAR);
            for (int i = 0; i < infoValueSplitSize; i++) {
                String key;
@ -587,7 +591,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
    public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) {
        try {
            return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) ||
-                    isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE);
+                    isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE) ||
+                    isVCFStream(new BlockCompressedInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE);
        } catch ( FileNotFoundException e ) {
            return false;
        } catch ( IOException e ) {
@ -598,12 +603,17 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
    private final static boolean isVCFStream(final InputStream stream, final String MAGIC_HEADER_LINE) {
        try {
            byte[] buff = new byte[MAGIC_HEADER_LINE.length()];
-            stream.read(buff, 0, MAGIC_HEADER_LINE.length());
-            String firstLine = new String(buff);
-            stream.close();
-            return firstLine.startsWith(MAGIC_HEADER_LINE);
+            int nread = stream.read(buff, 0, MAGIC_HEADER_LINE.length());
+            boolean eq = Arrays.equals(buff, MAGIC_HEADER_LINE.getBytes());
+            return eq;
+//            String firstLine = new String(buff);
+//            return firstLine.startsWith(MAGIC_HEADER_LINE);
        } catch ( IOException e ) {
            return false;
+        } catch ( RuntimeException e ) {
+            return false;
+        } finally {
+            try { stream.close(); } catch ( IOException e ) {}
        }
    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
@ -316,19 +316,22 @@ public class VariantContextUtils {
        return pruneVariantContext(vc, null);
    }

-    public static VariantContext pruneVariantContext(VariantContext vc, Collection<String> keysToPreserve ) {
-        MutableVariantContext mvc = new MutableVariantContext(vc);
+    public static VariantContext pruneVariantContext(final VariantContext vc, final Collection<String> keysToPreserve ) {
+        final MutableVariantContext mvc = new MutableVariantContext(vc);

        if ( keysToPreserve == null || keysToPreserve.size() == 0 )
            mvc.clearAttributes();
        else {
-            Map<String, Object> d = mvc.getAttributes();
+            final Map<String, Object> d = mvc.getAttributes();
            mvc.clearAttributes();
            for ( String key : keysToPreserve )
                if ( d.containsKey(key) )
                    mvc.putAttribute(key, d.get(key));
        }

+        // this must be done as the ID is stored in the attributes field
+        if ( vc.hasID() ) mvc.setID(vc.getID());
+
        Collection<Genotype> gs = mvc.getGenotypes().values();
        mvc.clearGenotypes();
        for ( Genotype g : gs ) {
@ -443,34 +446,6 @@ public class VariantContextUtils {
        throw new ReviewedStingException(String.format("Couldn't find master VCF %s at %s", masterName, unsortedVCs.iterator().next()));
    }

-
-    public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, byte refBase) {
-        return simpleMerge(genomeLocParser, unsortedVCs, null, FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GenotypeMergeType.UNSORTED, false, false, refBase);
-    }
-
-
-    /**
-     * Merges VariantContexts into a single hybrid.  Takes genotypes for common samples in priority order, if provided.
-     * If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
-     * the sample name
-     *
-     * @param genomeLocParser           loc parser
-     * @param unsortedVCs               collection of unsorted VCs
-     * @param priorityListOfVCs         priority list detailing the order in which we should grab the VCs
-     * @param filteredRecordMergeType   merge type for filtered records
-     * @param genotypeMergeOptions      merge option for genotypes
-     * @param annotateOrigin            should we annotate the set it came from?
-     * @param printMessages             should we print messages?
-     * @param inputRefBase              the ref base
-     * @return new VariantContext
-     */
-    public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
-                                             FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
-                                             boolean annotateOrigin, boolean printMessages, byte inputRefBase ) {
-
-        return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, "set", false, false);
-    }
-
    /**
     * Merges VariantContexts into a single hybrid.  Takes genotypes for common samples in priority order, if provided.
     * If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
@ -486,12 +461,18 @@ public class VariantContextUtils {
     * @param setKey                    the key name of the set
     * @param filteredAreUncalled       are filtered records uncalled?
     * @param mergeInfoWithMaxAC        should we merge in info from the VC with maximum allele count?
-     * @return new VariantContext
+     * @return new VariantContext       representing the merge of unsortedVCs
     */
-    public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
-                                             FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
-                                             boolean annotateOrigin, boolean printMessages, String setKey,
-                                             boolean filteredAreUncalled, boolean mergeInfoWithMaxAC ) {
+    public static VariantContext simpleMerge(final GenomeLocParser genomeLocParser,
+                                             final Collection<VariantContext> unsortedVCs,
+                                             final List<String> priorityListOfVCs,
+                                             final FilteredRecordMergeType filteredRecordMergeType,
+                                             final GenotypeMergeType genotypeMergeOptions,
+                                             final boolean annotateOrigin,
+                                             final boolean printMessages,
+                                             final String setKey,
+                                             final boolean filteredAreUncalled,
+                                             final boolean mergeInfoWithMaxAC ) {
        if ( unsortedVCs == null || unsortedVCs.size() == 0 )
            return null;

@ -514,26 +495,28 @@ public class VariantContextUtils {
            return null;

        // establish the baseline info from the first VC
-        VariantContext first = VCs.get(0);
-        String name = first.getSource();
-        GenomeLoc loc = getLocation(genomeLocParser,first);
+        final VariantContext first = VCs.get(0);
+        final String name = first.getSource();
+        final Allele refAllele = determineReferenceAllele(VCs);

-        Set<Allele> alleles = new TreeSet<Allele>();
-        Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
-        double negLog10PError = -1;
-        Set<String> filters = new TreeSet<String>();
-        Map<String, Object> attributes = new TreeMap<String, Object>();
-        Set<String> inconsistentAttributes = new HashSet<String>();
-        String rsID = null;
+        final Set<Allele> alleles = new TreeSet<Allele>();
+        final Set<String> filters = new TreeSet<String>();
+        final Map<String, Object> attributes = new TreeMap<String, Object>();
+        final Set<String> inconsistentAttributes = new HashSet<String>();
+        final Set<String> variantSources = new HashSet<String>(); // contains the set of sources we found in our set of VCs that are variant
+        final Set<String> rsIDs = new LinkedHashSet<String>(1); // most of the time there's one id
+
+        GenomeLoc loc = getLocation(genomeLocParser,first);
        int depth = 0;
        int maxAC = -1;
-        Map<String, Object> attributesWithMaxAC = new TreeMap<String, Object>();
+        final Map<String, Object> attributesWithMaxAC = new TreeMap<String, Object>();
+        double negLog10PError = -1;
        VariantContext vcWithMaxAC = null;
+        Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();

        // counting the number of filtered and variant VCs
-        int nFiltered = 0, nVariant = 0;
+        int nFiltered = 0;

-        Allele refAllele = determineReferenceAllele(VCs);
        boolean remapped = false;

        // cycle through and add info from the other VCs, making sure the loc/reference matches
@ -546,7 +529,7 @@ public class VariantContextUtils {
                loc = getLocation(genomeLocParser,vc); // get the longest location

            nFiltered += vc.isFiltered() ? 1 : 0;
-            nVariant += vc.isVariant() ? 1 : 0;
+            if ( vc.isVariant() ) variantSources.add(vc.getSource());

            AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles);
            remapped = remapped || alleleMapping.needsRemapping();
@ -566,8 +549,7 @@ public class VariantContextUtils {
            //
            if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
                depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);
-            if (rsID == null && vc.hasID())
-                rsID = vc.getID();
+            if ( vc.hasID() && ! vc.getID().equals(VCFConstants.EMPTY_ID_FIELD) ) rsIDs.add(vc.getID());
            if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
                String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null);
                // lets see if the string contains a , separator
@ -627,17 +609,16 @@ public class VariantContextUtils {
        if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() )
            filters.clear();

-        // we care about where the call came from
-        if ( annotateOrigin ) {
+        if ( annotateOrigin ) { // we care about where the call came from
            String setValue;
-            if ( nFiltered == 0 && nVariant == priorityListOfVCs.size() )                   // nothing was unfiltered
+            if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered
                setValue = "Intersection";
            else if ( nFiltered == VCs.size() )     // everything was filtered out
                setValue = "FilteredInAll";
-            else if ( nVariant == 0 )               // everyone was reference
+            else if ( variantSources.isEmpty() )               // everyone was reference
                setValue = "ReferenceInAll";
-            else {                                  // we are filtered in some subset
-                List<String> s = new ArrayList<String>();
+            else {
+                LinkedHashSet<String> s = new LinkedHashSet<String>();
                for ( VariantContext vc : VCs )
                    if ( vc.isVariant() )
                        s.add( vc.isFiltered() ? "filterIn" + vc.getSource() : vc.getSource() );
@ -652,8 +633,10 @@ public class VariantContextUtils {

        if ( depth > 0 )
            attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));
-        if ( rsID != null )
-            attributes.put(VariantContext.ID_KEY, rsID);
+
+        if ( ! rsIDs.isEmpty() ) {
+            attributes.put(VariantContext.ID_KEY, Utils.join(",", rsIDs));
+        }

        VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, (mergeInfoWithMaxAC ? attributesWithMaxAC : attributes) );
        // Trim the padded bases of all alleles if necessary
--- a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java
@ -56,6 +56,7 @@ public class FeatureManagerUnitTest extends BaseTest {
    private static final File VCF3_FILE = new File(validationDataLocation + "vcfexample3.vcf");
    private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf");
    private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz");
+    private static final File VCF4_FILE_BGZIP = new File(testDir + "HiSeq.10000.bgzip.vcf.gz");

    private FeatureManager manager;
    private GenomeLocParser genomeLocParser;
@ -109,6 +110,7 @@ public class FeatureManagerUnitTest extends BaseTest {
        new FMTest(VariantContext.class, VCF3Codec.class, "VCF3", VCF3_FILE);
        new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE);
        new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_GZ);
+        new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_BGZIP);
        new FMTest(TableFeature.class, BedTableCodec.class, "bedtable", null);
        return FMTest.getTests(FMTest.class);
    }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java
@ -96,8 +96,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {

    @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c"); }

-    @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "4836086891f6cbdd40eebef3076d215a"); }
-    @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "6a34b5d743efda8b2f3b639f3a2f5de8"); }
+    @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "c6adeda751cb2a08690dd9202356629f"); }
+    @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "3a08fd5ee18993dfc8882156ccf5d2e9"); }

    @Test public void threeWayWithRefs() {
        WalkerTestSpec spec = new WalkerTestSpec(
@ -131,4 +131,13 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
    @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); }
    @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f704caeaaaed6711943014b847fe381a"); }
    @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); }
+
+    @Test
+    public void combineDBSNPDuplicateSites() {
+         WalkerTestSpec spec = new WalkerTestSpec(
+                 "-T CombineVariants -NO_HEADER -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
+                 1,
+                 Arrays.asList(""));
+         executeTest("combineDBSNPDuplicateSites:", spec);
+    }
 }
--- a/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java
@ -172,6 +172,17 @@ public class BAQUnitTest extends BaseTest {
        }
    }

+    @Test(enabled = true)
+    public void testBAQQualRange() {
+        BAQ baq = new BAQ(1e-3, 0.1, 7, (byte)4, false);         // matches current samtools parameters
+        final byte ref = (byte)'A';
+        final byte alt = (byte)'A';
+
+        for ( int i = 0; i <= SAMUtils.MAX_PHRED_SCORE; i++ )
+            Assert.assertTrue(baq.calcEpsilon( ref, alt, (byte)i) >= 0.0, "Failed to get baq epsilon range");
+    }
+
+
    public void testBAQ(BAQTest test, boolean lookupWithFasta) {
        BAQ baqHMM = new BAQ(1e-3, 0.1, 7, (byte)4, false);         // matches current samtools parameters

--- a/public/testdata/HiSeq.10000.bgzip.vcf.gz
+++ b/public/testdata/HiSeq.10000.bgzip.vcf.gz