Bunch of oneoff stuff that I don't want to lose. Also:

VCFRecord - "." dbsnp-ID entries now taken into account (thought these were represented as null; but I guess not) VCFGenotypeRecord - added a replaceFormat option; since intersecting Broad/BC call sets required genotype formats also be intersected (no changing on-the-fly) VCFCombine - altered doc to instruct user to give complete priority list (was throwing exception if not) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2760 348d0f76-0448-11de-a6fe-93d51630548a
2010-02-01 21:35:10 +00:00 · 2010-02-01 21:35:10 +00:00 · 2c4f709f6f
parent 421282cfa3
commit 2c4f709f6f
9 changed files with 401 additions and 28 deletions
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ThousandGenomesAnnotator.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ThousandGenomesAnnotator.java
@ -0,0 +1,55 @@
+package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
+
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
+import org.broadinstitute.sting.gatk.refdata.RODRecordList;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
+import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotation;
+import org.broadinstitute.sting.oneoffprojects.refdata.HapmapVCFROD;
+import org.broadinstitute.sting.utils.genotype.Variation;
+import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
+import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
+ *
+ * @Author chartl
+ * @Date Feb 1, 2010
+ */
+public class ThousandGenomesAnnotator implements VariantAnnotation {
+
+    public String getKeyName() {
+        return "1KG";
+    }
+
+    public VCFInfoHeaderLine getDescription() {
+        return new VCFInfoHeaderLine(getKeyName(),
+                1,VCFInfoHeaderLine.INFO_TYPE.String,"Is this site seen in Pilot1 or Pilot2 of 1KG?");
+    }
+
+    public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> context, Variation variation) {
+        if ( tracker == null ) {
+            return null;
+        }
+
+        RODRecordList pilot1 = tracker.getTrackData("pilot1",null);
+        RODRecordList pilot2 = tracker.getTrackData("pilot2",null);
+
+        if ( pilot1 == null && pilot2 == null) {
+            return "0";
+        } else {
+            if ( pilot1 != null && ! ( (HapmapVCFROD) pilot1.getRecords().get(0)).getRecord().isFiltered() ) {
+                return "1";
+            } else if ( pilot2 != null && ! ( (HapmapVCFROD) pilot2.getRecords().get(0)).getRecord().isFiltered() ) {
+                return "1";
+            } else {
+                return "0";
+            }
+        }
+    }
+}
+
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/LocusConcordanceInfo.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/LocusConcordanceInfo.java
@ -19,7 +19,7 @@ import java.util.Set;
 class LocusConcordanceInfo {

    public enum ConcordanceType {
-        TRUTH_SET,VARIANT_SET,BOTH_SETS
+        TRUTH_SET,TRUTH_SET_VARIANT_FILTERED,VARIANT_SET,BOTH_SETS
    }

    private ConcordanceType concordanceType;
@ -70,8 +70,12 @@ class LocusConcordanceInfo {
        return false;
    }

+    public boolean isVariantFiltered() {
+        return this.concordanceType == ConcordanceType.TRUTH_SET_VARIANT_FILTERED;
+    }
+
    public GenomeLoc getLoc() {
-        if ( concordanceType == ConcordanceType.TRUTH_SET || concordanceType == ConcordanceType.BOTH_SETS) {
+        if ( concordanceType == ConcordanceType.TRUTH_SET || concordanceType == ConcordanceType.BOTH_SETS || concordanceType == ConcordanceType.TRUTH_SET_VARIANT_FILTERED) {
            return truthVCFRecord.getLocation();
        } else {
            return variantVCFRecord.getLocation();
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceSet.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceSet.java
@ -19,14 +19,21 @@ class MultiSampleConcordanceSet {
    private long truthOnlyVariantSites;
    private long variantOnlySites;
    private long overlappingSites;
+    private long truthSitesFilteredOut;
    private int genotypeQuality;

+    private int truePositiveLoci;
+    private int trueNegativeLoci;
+    private int falsePositiveLoci;
+    private int falseNegativeLoci;
+
    public MultiSampleConcordanceSet(int minDepth, boolean assumeRef, int genotypeQuality) {
        concordanceSet = new HashSet<VCFConcordanceCalculator>();
        truthOnlySites = 0l;
        truthOnlyVariantSites = 0l;
        variantOnlySites = 0l;
        overlappingSites = 0l;
+        truthSitesFilteredOut = 0l;
        minimumDepthForTest = minDepth;
        treatTruthOnlyAsFalseNegative = assumeRef;
        this.genotypeQuality = genotypeQuality;
@ -59,7 +66,12 @@ class MultiSampleConcordanceSet {
                    }
                }
            }
-        } else {
+        } else if ( info.isVariantFiltered() ) {
+            for ( VCFConcordanceCalculator concordance : concordanceSet ) {
+                concordance.updateFilteredLocus(info);
+                truthSitesFilteredOut++;
+            }
+        } else{
            variantOnlySites++;
        }
    }
@ -83,4 +95,8 @@ class MultiSampleConcordanceSet {
    public long numberOfOverlappingSites() {
        return overlappingSites;
    }
+
+    public long numberOfFilteredTrueSites() {
+        return truthSitesFilteredOut;
+    }
 }
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java
@ -29,7 +29,7 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
    @Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+
            "DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1;
    @Argument(fullName="genotypeConfidence", shortName="GC", doc="The quality score for genotypes below which to count genotyping as a no-call", required=false)
-    int genotypeQuality = 0;
+    int genotypeQuality = Integer.MIN_VALUE;
    @Argument(fullName = "ignoreKnownSites", shortName = "novel", doc="Only run concordance over novel sites (sites marked in the VCF as being in dbSNP or Hapmap 2 or 3)", required=false )
    boolean ignoreKnownSites = false;
    @Argument(fullName="missingLocusAsConfidentRef", shortName="assumeRef", doc="Assume a missing locus in the variant VCF is a confident ref call with sufficient depth"+
@ -45,9 +45,16 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
    }

    public LocusConcordanceInfo map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext c) {
-        if ( tracker == null || ( ignoreKnownSites && ! ( (RodVCF) tracker.lookup("variants",null)).isNovel() ) ) {
+        if ( tracker == null ) {
            return null;
        }
+
+        if ( ignoreKnownSites ) { // ignoreKnownSites && tracker.lookup("variants",null) != null && ! ( (RodVCF) tracker.lookup("variants",null)).isNovel() ) )
+            if ( tracker.lookup("variants",null) != null && ! ( (RodVCF) tracker.lookup("variants",null)).isNovel() ) {
+                //logger.info("Not novel: "+( (RodVCF) tracker.lookup("variants",null)).getID());
+                return null;
+            }
+        }
        ReferenceOrderedDatum truthData = tracker.lookup("truth", null);
        ReferenceOrderedDatum variantData = tracker.lookup("variants",null);
        LocusConcordanceInfo concordance;
@ -77,7 +84,7 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
            } else if ( truth_filter ) {
                concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null, ( (RodVCF) variantData ).getRecord(),ref);
            } else if ( call_filter ) {
-                concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET,( (RodVCF) truthData).getRecord(),null,ref);
+                concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET_VARIANT_FILTERED,( (RodVCF) truthData).getRecord(), null ,ref);
            } else {
                concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.BOTH_SETS,( (RodVCF) truthData).getRecord(),( (RodVCF) variantData).getRecord(),ref);
            }
@ -102,12 +109,12 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
    }

    public void onTraversalDone(MultiSampleConcordanceSet cSet) {
-        out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Homs","Concordant_Hets","Correct_But_Low_Genotype_Qual","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call");
+        out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Homs","Concordant_Hets","Correct_But_Low_Genotype_Qual","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call","False_Negatives_Due_To_Filtration");
        for ( VCFConcordanceCalculator sample : cSet.getConcordanceSet() ) {
            out.print(String.format("%s%n",sample));
        }
        logger.info("Overlapping="+cSet.numberOfOverlappingSites()+"\tTruthOnly="+cSet.numberOfTruthOnlySites()+"\tTruthOnlyVariantSites="+
-                    cSet.numberOfTruthOnlyVariantSites()+"\tVariantOnly="+cSet.numberOfVariantOnlySites());
+                    cSet.numberOfTruthOnlyVariantSites()+"\tVariantOnly="+cSet.numberOfVariantOnlySites()+"\tTruthSitesFilteredOut="+cSet.numberOfFilteredTrueSites());
    }

 }
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/VCFConcordanceCalculator.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/VCFConcordanceCalculator.java
@ -23,6 +23,7 @@ class VCFConcordanceCalculator {
    private Set<GenomeLoc> falsePositiveLoci;
    private Set<GenomeLoc> falseNegativeLoci;
    private Set<GenomeLoc> falseNegativeLociDueToNoCall;
+    private Set<GenomeLoc> falseNegativeLociDueToFilters;
    private Set<GenomeLoc> hetsCalledHoms;
    private Set<GenomeLoc> homsCalledHets;
    private Set<GenomeLoc> nonConfidentGenotypeCalls;
@ -37,6 +38,7 @@ class VCFConcordanceCalculator {
        falseNegativeLoci = new HashSet<GenomeLoc>();
        falseNegativeLociDueToNoCall = new HashSet<GenomeLoc>();
        falsePositiveLoci = new HashSet<GenomeLoc>();
+        falseNegativeLociDueToFilters = new HashSet<GenomeLoc>();
        hetsCalledHoms = new HashSet<GenomeLoc>();
        homsCalledHets = new HashSet<GenomeLoc>();
        nonConfidentGenotypeCalls = new HashSet<GenomeLoc>();
@ -61,19 +63,25 @@ class VCFConcordanceCalculator {
        }
    }

+    public void updateFilteredLocus(LocusConcordanceInfo info) {
+
+        if ( info.getTruthGenotype(name).isVariant( (char) info.getReferenceBase()) ) {
+            falseNegativeLociDueToFilters.add(info.getLoc());
+        }
+    }
+
+
    public String toString() {
-        return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(),
+        return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(),
                concordantGenotypeReferenceCalls.size(),concordantHomCalls.size(),concordantHetCalls.size(),nonConfidentGenotypeCalls.size(),
                homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(),
-                falseNegativeLociDueToNoCall.size());
+                falseNegativeLociDueToNoCall.size(),falseNegativeLociDueToFilters.size());
    }

    private void compareGenotypes(VCFGenotypeRecord truth, VCFGenotypeRecord call, GenomeLoc loc, byte ref) {
        if ( minimumDepthForUpdate > 0 && call.getReadCount() < minimumDepthForUpdate ) {
            ignoredDueToDepth.add(loc);
-            return; // do not update; just go away
-        }
-        if ( truth.isNoCall() ) {
+        } else if ( truth.isNoCall() ) {
            chipNoCalls.add(loc);
        } else if ( truth.isVariant(( char) ref) ) {
            if ( call.isNoCall() ) {
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/SimpleVCFIntersectWalker.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/SimpleVCFIntersectWalker.java
@ -3,40 +3,316 @@ package org.broadinstitute.sting.oneoffprojects.walkers.vcftools;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.refdata.RodVCF;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
-import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
+import org.broadinstitute.sting.utils.ListUtils;
+import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.utils.cmdLine.Argument;
+import org.broadinstitute.sting.utils.genotype.vcf.*;
+
 import java.lang.Long;
+import java.util.*;

 /**
- * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
+ * This walker intersects the samples for two VCF files and returns a set of calls determined
+ * by command-line arguments. Intersect indicates that calls should be returned only where
+ * the same locus appears in the two VCF files, while Union indicates that all calls should be
+ * returned. Priority indicates which file to prefer when setting genotypes; if the "GQ" flag
+ * is omitted, genotypes are merged according to highest genotype quality.
 *
 * @Author chartl
 * @Date Jan 29, 2010
 */
-public abstract class SimpleVCFIntersectWalker extends RodWalker<VCFRecordPair,Long>{
+public class SimpleVCFIntersectWalker extends RodWalker<VCFRecordMerger,Long>{
+    @Argument(fullName="locusUnion", shortName="union", doc="Will union loci rather than intersecting", required=false)
+    boolean union = false;
+    @Argument(fullName="mergeByQual", shortName="GQ", doc="Will merge genotypes according to quality rather than priority", required=false)
+    boolean useQuality = false;
+
+    private VCFRecordMerger merger; // no cost of repeated instantiation
+    private VCFWriter writer;
+    private boolean headerNotWritten;

    public void initialize() {
-
+        merger = new VCFRecordMerger();
+        writer = new VCFWriter(out);
+        headerNotWritten = true;
    }

    public Long reduceInit() {
        return 0l;
    }

-    public VCFRecordPair map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-        return null;
+    public VCFRecordMerger map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        if ( tracker == null ) {
+            return null;
+        }
+
+        RodVCF priorityCall = ( RodVCF ) tracker.lookup("priority",null);
+        RodVCF otherCall = ( RodVCF ) tracker.lookup("other",null);
+
+        if ( priorityCall == null && otherCall == null ) {
+            return null;
+        }
+
+        if ( priorityCall == null ) {
+
+            if ( ! union ) {
+                return null;
+            } else if ( merger.hasBeenInstantiated() ) {
+                merger.update(null,otherCall.getRecord());
+            } else {
+                merger.hold(otherCall.getRecord());
+            }
+
+        } else if ( otherCall == null ) {
+
+            if ( ! union ) {
+                return null;
+            } else if ( merger.hasBeenInstantiated() ) {
+                merger.update(priorityCall.getRecord(),null);
+            } else {
+                merger.hold(priorityCall.getRecord());
+            }
+
+        } else {
+
+            if ( merger.hasBeenInstantiated() ) {
+                merger.update(priorityCall.getRecord(), otherCall.getRecord());
+            } else {
+                merger = instantiateMerger(priorityCall.getRecord(),otherCall.getRecord());
+            }
+
+        }
+
+        return merger;
    }

-    public Long reduce(VCFRecordPair records, long prevReduce) {
-        return 0l;
+    public Long reduce(VCFRecordMerger records, Long prevReduce) {
+        if ( records == null ) {
+            return prevReduce;
+        } else {
+            if ( records.hasBeenInstantiated() ) {
+                if ( headerNotWritten ) {
+                    writeHeader(records);
+                }
+                while ( records.hasHeldRecords() ) {
+                    writer.addRecord(records.getHeldRecord());
+                    prevReduce++;
+                }
+
+                writer.addRecord(records.getMergedRecord());
+                prevReduce++;
+            }
+        }
+
+        return prevReduce;
    }

-    public void onTraversalDone(long finalReduce) {
-        return;
+
+    public void onTraversalDone(Long variantsAdded) {
+        logger.info(variantsAdded);
    }
+
+    private VCFRecordMerger instantiateMerger(VCFRecord priority, VCFRecord other) {
+        List<String> samples = Arrays.asList(priority.getSampleNames());
+        samples.retainAll(Arrays.asList(other.getSampleNames()));
+
+        return new VCFRecordMerger(priority,other,useQuality,true,samples,merger.getAllHeldRecords());
+    }
+
+    private void writeHeader(VCFRecordMerger records) {
+        VCFHeader header = new VCFHeader(new HashSet<VCFHeaderLine>(),records.getSamples());
+        writer.writeHeader(header);
+        headerNotWritten = false;
+    }
+
 }

-class VCFRecordPair {
-    public VCFRecord rec1;
-    public VCFRecord rec2;
+class VCFRecordMerger {
+    private List<VCFRecord> holder;
+    private VCFGenotypeMerger genotypeMerger;
+    private VCFRecord record1;
+    private VCFRecord record2;
+    private boolean isMergeByQual;
+    private boolean record1Priority;
+    private Collection<String> samples;
+
+    public VCFRecordMerger(VCFRecord firstRecord, VCFRecord secondRecord, boolean useQual, boolean record1Priority, Collection<String> intersectedSamples, List<VCFRecord> heldRecords) {
+        this.record1 = firstRecord;
+        this.record2 = secondRecord;
+        this.isMergeByQual = useQual;
+        this.record1Priority = record1Priority;
+        this.samples = intersectedSamples;
+        instantiateGenotypeMerger();
+        holder = heldRecords;
+    }
+
+    public Set<String> getSamples() {
+        Set<String> sam = new HashSet<String>();
+        for ( String s : samples ) {
+            sam.add(s);
+        }
+        return sam;
+    }
+
+    public VCFRecordMerger() {
+        holder = new ArrayList<VCFRecord>();
+    }
+
+    public List<VCFRecord> getAllHeldRecords() {
+        return holder;
+    }
+
+    public void update(VCFRecord firstRecord, VCFRecord secondRecord) {
+        record1 = firstRecord;
+        record2 = secondRecord;
+    }
+
+    public void hold(VCFRecord record) {
+        holder.add(record);
+    }
+
+    public boolean hasHeldRecords() {
+        return holder.size() > 0;
+    }
+
+    public VCFRecord getHeldRecord() {
+        if ( samples == null ) {
+            throw new IllegalStateException("Held records are being accessed before intersection sample set have been created");
+        }
+
+        return downsample(holder.remove(0));
+    }
+
+    private void instantiateGenotypeMerger() {
+        if ( record1 != null && record2 != null ) {
+            ArrayList<String> keyset1 =  new ArrayList<String>(Arrays.asList(record1.getGenotypeFormatString().split(":")));
+            ArrayList<String> keyset2 =  new ArrayList<String>(Arrays.asList(record2.getGenotypeFormatString().split(":")));
+            keyset2.retainAll(keyset1);
+            genotypeMerger = new VCFGenotypeMerger(keyset2);
+        }
+    }
+
+    public VCFRecord getMergedRecord() {
+        if ( record2 == null ) {
+            return downsample(record1);
+        } else if ( record1 == null ) {
+            return downsample(record2);
+        } else {
+            HashMap<VCFHeader.HEADER_FIELDS,String> newFields = getFields(record1,record2);
+            List<VCFGenotypeRecord> newGenotypes = mergeGenotypes(record1,record2);
+            return new VCFRecord(newFields,genotypeMerger.getFormatString(),newGenotypes);
+        }
+    }
+
+    private VCFRecord downsample(VCFRecord record) {
+        HashMap<VCFHeader.HEADER_FIELDS,String> newFields = getFields(record,null);
+        List<VCFGenotypeRecord> newGenotypes = new ArrayList<VCFGenotypeRecord>();
+        for ( String s : samples ) {
+            newGenotypes.add(record.getGenotype(s) );
+        }
+
+        return new VCFRecord(newFields,record.getGenotypeFormatString(),newGenotypes);
+    }
+
+    private List<VCFGenotypeRecord> mergeGenotypes(VCFRecord first, VCFRecord second) {
+        List<VCFGenotypeRecord> genotypes = new ArrayList<VCFGenotypeRecord>();
+        for ( String s : samples ) {
+            if ( isMergeByQual ) {
+                genotypes.add(genotypeMerger.mergeByQual(first.getGenotype(s), second.getGenotype(s)));
+            } else if ( record1Priority ) {
+                genotypes.add(genotypeMerger.resetKeys(first.getGenotype(s)));
+            } else {
+                genotypes.add(genotypeMerger.resetKeys(second.getGenotype(s)));
+            }
+        }
+
+        return genotypes;
+    }
+
+    private HashMap<VCFHeader.HEADER_FIELDS,String> getFields(VCFRecord record1, VCFRecord record2) {
+        HashMap<VCFHeader.HEADER_FIELDS,String> fields = new HashMap<VCFHeader.HEADER_FIELDS,String>();
+        fields.put(VCFHeader.HEADER_FIELDS.CHROM, record1.getLocation().getContig());
+        fields.put(VCFHeader.HEADER_FIELDS.POS, String.format("%d",record1.getLocation().getStart()));
+        fields.put(VCFHeader.HEADER_FIELDS.ID, record1.getID());
+        fields.put(VCFHeader.HEADER_FIELDS.ALT, listAsString(record1.getAlternateAlleleList()));
+        fields.put(VCFHeader.HEADER_FIELDS.REF, record1.getReference());
+        fields.put(VCFHeader.HEADER_FIELDS.QUAL, String.format("%f", record1.getQual()));
+        if ( record2 != null && record1.isFiltered() && record2.isFiltered() ) {
+            fields.put(VCFHeader.HEADER_FIELDS.FILTER,"BOTH_FILTERED");
+        } else if ( record1.isFiltered() || ( record2 != null && record2.isFiltered() ) ) {
+            fields.put(VCFHeader.HEADER_FIELDS.FILTER,"ONE_FILTERED");
+        }
+        return fields;
+    }
+
+    private String listAsString(List<String> lString) {
+        StringBuilder b = new StringBuilder();
+        b.append(lString.remove(0));
+        for ( String s : lString ) {
+            b.append(",");
+            b.append(s);
+        }
+
+        return b.toString();
+    }
+
+    public boolean hasBeenInstantiated() {
+        return genotypeMerger != null;
+    }
+
 }
+
+class VCFGenotypeMerger {
+    private List<String> intersectKeys;
+    private String intersectFormatString;
+
+    public VCFGenotypeMerger(List<String> keys) {
+        intersectKeys = keys;
+        intersectFormatString = keyJoin(keys);
+    }
+
+    public String getFormatString() {
+        return intersectFormatString;
+    }
+
+    public VCFGenotypeRecord mergeByQual(VCFGenotypeRecord record1, VCFGenotypeRecord record2) {
+        VCFGenotypeRecord toReturn;
+        if ( record2 == null || record2.isNoCall() || record2.isFiltered() || record1.getNegLog10PError() > record2.getNegLog10PError() ) {
+            toReturn = record1;
+        } else {
+            toReturn = record2;
+        }
+
+        resetKeys(toReturn);
+
+        return toReturn;
+    }
+
+    public VCFGenotypeRecord resetKeys(VCFGenotypeRecord r) {
+
+        HashMap<String,String> newFields = new HashMap<String,String>();
+
+        for ( String key : intersectKeys ) {
+            newFields.put(key,r.getFields().get(key));
+        }
+
+        r.replaceFields(newFields);
+
+        return r;
+    }
+
+
+    private String keyJoin(List<String> sList) {
+        StringBuffer format = new StringBuffer(sList.get(0));
+        for ( int i = 1; i < sList.size(); i ++ ) {
+            format.append(":");
+            format.append(sList.get(i));
+        }
+
+        return format.toString();
+    }
+
+
+}
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFCombine.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFCombine.java
@ -30,7 +30,7 @@ public class VCFCombine extends RodWalker<VCFRecord, VCFWriter> {
    @Argument(fullName="combination_type", shortName="type", doc="combination type; currently UNION and MERGE are supported", required=true)
    protected COMBINATION_TYPE COMBO_TYPE;

-    @Argument(fullName="rod_priority_list", shortName="priority", doc="For the UNION combination type: a comma-separated string describing the priority ordering for the rods as far as which record gets emitted; if not specified, then the first rod seen will be used", required=false)
+    @Argument(fullName="rod_priority_list", shortName="priority", doc="For the UNION combination type: a comma-separated string describing the priority ordering for the rods as far as which record gets emitted; a complete priority list MUST be provided", required=false)
    protected String PRIORITY_STRING = null;

    @Argument(fullName="annotateUnion", shortName="A", doc="For the UNION combination type: if provided, the output union VCF will contain venn information about where each call came from", required=false)
--- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java
@ -307,4 +307,11 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
        //result.add(new VCFFormatHeaderLine(HAPLOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Haplotype Quality"));
        return result;
    }
+
+    public void replaceFields(HashMap<String,String> newFields) {
+        mFields.clear();
+        for ( String s : newFields.keySet() ) {
+            mFields.put(s,newFields.get(s));    
+        }
+    }
 }
--- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java
@ -329,7 +329,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
    }

    public boolean isInDBSNP() {
-        return ( mID != null || ( mInfoFields.get(DBSNP_KEY) != null && mInfoFields.get(DBSNP_KEY).equals("1") ) );
+        return ( ( mID != null && ! mID.equals(".") ) || ( mInfoFields.get(DBSNP_KEY) != null && mInfoFields.get(DBSNP_KEY).equals("1") ) );
    }

    public boolean isInHapmap() {