Validate VCF with sequence dictionary

2015-11-10 14:22:47 -05:00 · 2015-11-10 14:22:47 -05:00 · ccaddefa19
parent 29e86a0d01
commit ccaddefa19
18 changed files with 149 additions and 64 deletions
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/bqsr/BQSRIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/bqsr/BQSRIntegrationTest.java
@ -120,7 +120,7 @@ public class BQSRIntegrationTest extends WalkerTest {
                {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "0b5a8e259e997e4c7b5836d4c28e6f4d")},
                {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "281682124584ab384f23359934df0c3b")},
                {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "0a92fdff5fd26227c29d34eda5a32f49")},
-                {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "90d8c24077e8ae9a0037a9aad5f09e31")},
+                {new BQSRTest(hg18Reference, privateTestDir + "originalQuals.1kg.chr1.1-1K.1RG.bam", "chr1:1-1,000", " -OQ", "90d8c24077e8ae9a0037a9aad5f09e31")},
                {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "c41ef02c640ef1fed4bfc03b9b33b616")},
                {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "b577cd1d529425f66db49620db09fdca")},
                {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "0b5a8e259e997e4c7b5836d4c28e6f4d")},
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariantsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariantsIntegrationTest.java
@ -189,7 +189,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
    @Test
    public void testComplexEvents() {
        WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString("complexEvents.vcf", "ALL"),
+                baseTestString("complexEvents.vcf", "ALL", DEFAULT_REGION, b37KGReference),
                0,
                Arrays.asList(EMPTY_MD5)
        );
@ -197,6 +197,15 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
        executeTest("test validating complex events", spec);
    }

+    @Test(description = "Checks out of order header contigs")
+    public void testOutOfOrderHeaderContigsError() {
+
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString("complexEvents-outOfOrder.vcf", "ALL", DEFAULT_REGION, b37KGReference),
+                0, UserException.LexicographicallySortedSequenceDictionary.class);
+        executeTest("test out of order header contigs error", spec);
+    }
+
    @Test(description = "Fixes '''bug''' reported in story https://www.pivotaltracker.com/story/show/68725164")
    public void testUnusedAlleleFix() {
        WalkerTestSpec spec = new WalkerTestSpec(
@ -255,4 +264,31 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
        // Set the log level back
        logger.setLevel(level);
    }
+
+    @Test(description = "Checks '''issue''' reported in issue https://github.com/broadinstitute/gsa-unstable/issues/964")
+    public void testWrongContigHeaderLengthError()  {
+
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString("longAlleles-wrongLength.vcf", "ALL", "1", b37KGReference),
+                0, UserException.IncompatibleSequenceDictionaries.class);
+        executeTest("test wrong header contig length error", spec);
+    }
+
+    @Test
+    public void testAllowWrongContigHeaderLengthDictIncompat()  {
+
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString("longAlleles-wrongLength.vcf", "ALL", "1", b37KGReference) + "  --reference_window_stop 208 -U ALLOW_SEQ_DICT_INCOMPATIBILITY ",
+                0, Arrays.asList(EMPTY_MD5));
+        executeTest("test to allow wrong header contig length, not checking dictionary incompatibility", spec);
+    }
+
+    @Test
+    public void testAllowWrongContigHeaderLength()  {
+
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString("longAlleles-wrongLength.vcf", "ALL", "1", b37KGReference) + "  --reference_window_stop 208 -U ",
+                0, Arrays.asList(EMPTY_MD5));
+        executeTest("test to allow wrong header contig length, no compatibility checks", spec);
+    }
 }
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java
+++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java
@ -886,8 +886,8 @@ public class GenomeAnalysisEngine {
            // Compile a set of sequence names that exist in the BAM files.
            SAMSequenceDictionary readsDictionary = reads.getHeader().getSequenceDictionary();

-            if (readsDictionary.size() == 0) {
-                logger.info("Reads file is unmapped.  Skipping validation against reference.");
+            if (readsDictionary.isEmpty()) {
+                logger.info("Reads file is unmapped. Skipping validation against reference.");
                return;
            }

--- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java
+++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java
@ -39,18 +39,18 @@ public class CramIntegrationTest extends WalkerTest {
    @DataProvider(name="cramData")
    public Object[][] getCRAMData() {
        return new Object[][] {
-                {"PrintReads", "exampleBAM.bam", "", "cram", "95cce9111a377d8e84fd076f7f0744df"},
-                {"PrintReads", "exampleCRAM.cram", "", "cram", "95cce9111a377d8e84fd076f7f0744df"},
-                {"PrintReads", "exampleCRAM.cram", "", "bam", "7c716d5103c20e95a8115b3c2848bf0d"},
-                {"PrintReads", "exampleCRAM.cram", " -L chr1:200", "bam", "5cc9cac96f7f9819688d47a28ed97778"},
+                {"PrintReads", "exampleBAM.bam", "", "cram", "424c725c4ffe7215e358ecf5abd5e5e8"},
+                {"PrintReads", "exampleCRAM.cram", "", "cram", "424c725c4ffe7215e358ecf5abd5e5e8"},
+                {"PrintReads", "exampleCRAM.cram", "", "bam", "247805098718dd74b8a871796424d359"},
+                {"PrintReads", "exampleCRAM.cram", " -L chr1:200", "bam", "a5b26631cd89f86f6184bcac7bc9c9ca"},
                {"CountLoci", "exampleCRAM.cram", "", "txt", "ade93df31a6150321c1067e749cae9be"},
                {"CountLoci", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"},
                {"CountReads", "exampleCRAM.cram", "", "txt", "4fbafd6948b6529caa2b78e476359875"},
                {"CountReads", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"},
-                {"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "2e1b175c9b36154e2bbd1a23ebaf4c22"},
+                {"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "24dbd14b60220461f47ec5517962cb7f"},
                {"CountLoci", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
                {"CountReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
-                {"PrintReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "bam", "2e1b175c9b36154e2bbd1a23ebaf4c22"},
+                {"PrintReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "bam", "84bee5063d8fa0d07e7c3ff7e825ae3a"},
                {"CountLoci", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
                {"CountReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
        };
--- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/crypt/GATKKeyIntegrationTest.java
+++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/crypt/GATKKeyIntegrationTest.java
@ -38,7 +38,7 @@ public class GATKKeyIntegrationTest extends WalkerTest {
    public static final String BASE_COMMAND = String.format("-T TestPrintReadsWalker -R %s -I %s -o %%s",
                                                            publicTestDir + "exampleFASTA.fasta",
                                                            publicTestDir + "exampleBAM.bam");
-    public static final String MD5_UPON_SUCCESSFUL_RUN = "69b6432aed2d0cebf02e5d414f654425";
+    public static final String MD5_UPON_SUCCESSFUL_RUN = "462656ec9632f8c21ee534d35093c3f8";


    private void runGATKKeyTest ( String testName, String etArg, String keyArg, Class expectedException, String md5 ) {
--- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/filters/BadReadGroupsIntegrationTest.java
+++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/filters/BadReadGroupsIntegrationTest.java
@ -35,7 +35,7 @@ public class BadReadGroupsIntegrationTest extends WalkerTest {
    @Test
    public void testMissingReadGroup() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
-                "-T TestPrintReadsWalker -R " + b36KGReference + " -I " + privateTestDir + "missingReadGroup.bam -o /dev/null",
+                "-T TestPrintReadsWalker -R " + hg18Reference + " -I " + privateTestDir + "missingReadGroup.bam -o /dev/null",
                0,
                UserException.ReadMissingReadGroup.class);
        executeTest("test Missing Read Group", spec);
@ -44,7 +44,7 @@ public class BadReadGroupsIntegrationTest extends WalkerTest {
    @Test
    public void testUndefinedReadGroup() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
-                "-T TestPrintReadsWalker -R " + b36KGReference + " -I " + privateTestDir + "undefinedReadGroup.bam -o /dev/null",
+                "-T TestPrintReadsWalker -R " + hg18Reference + " -I " + privateTestDir + "undefinedReadGroup.bam -o /dev/null",
                0,
                UserException.ReadHasUndefinedReadGroup.class);
        executeTest("test Undefined Read Group", spec);
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/SequenceDictionaryUtils.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/SequenceDictionaryUtils.java
@ -25,6 +25,7 @@

 package org.broadinstitute.gatk.utils;

+import java.math.BigInteger;
 import htsjdk.samtools.SAMSequenceDictionary;
 import htsjdk.samtools.SAMSequenceRecord;
 import org.apache.log4j.Logger;
@ -79,7 +80,7 @@ public class SequenceDictionaryUtils {
        IDENTICAL,                      // the dictionaries are identical
        COMMON_SUBSET,                  // there exists a common subset of equivalent contigs
        NO_COMMON_CONTIGS,              // no overlap between dictionaries
-        UNEQUAL_COMMON_CONTIGS,         // common subset has contigs that have the same name but different lengths
+        UNEQUAL_COMMON_CONTIGS,         // common subset has contigs that have the same name but different lengths and/or MD5s
        NON_CANONICAL_HUMAN_ORDER,      // human reference detected but the order of the contigs is non-standard (lexicographic, for examine)
        OUT_OF_ORDER,                   // the two dictionaries overlap but the overlapping contigs occur in different
                                        // orders with respect to each other
@ -92,7 +93,7 @@ public class SequenceDictionaryUtils {
     * @param validationExclusion exclusions to validation
     * @return Returns true if the engine is in tolerant mode and we'll let through dangerous but not fatal dictionary inconsistency
     */
-    private static boolean allowNonFatalIncompabilities(ValidationExclusion.TYPE validationExclusion) {
+    private static boolean allowNonFatalIncompabilities(final ValidationExclusion.TYPE validationExclusion) {
        return ( validationExclusion == ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY ||
                        validationExclusion == ValidationExclusion.TYPE.ALL );
    }
@ -133,15 +134,23 @@ public class SequenceDictionaryUtils {
                throw new UserException.IncompatibleSequenceDictionaries("No overlapping contigs found", name1, dict1, name2, dict2);

            case UNEQUAL_COMMON_CONTIGS: {
-                List<SAMSequenceRecord> x = findDisequalCommonContigs(getCommonContigsByName(dict1, dict2), dict1, dict2);
-                SAMSequenceRecord elt1 = x.get(0);
-                SAMSequenceRecord elt2 = x.get(1);
+                final List<SAMSequenceRecord> x = findNotEqualCommonContigs(getCommonContigsByName(dict1, dict2), dict1, dict2);
+                final SAMSequenceRecord elt1 = x.get(0);
+                final SAMSequenceRecord elt2 = x.get(1);
+
+                String msg = "Found contigs with the same name but different lengths";
+                String contig1  = "  contig  " + name1 + " is named " + elt1.getSequenceName()  + " with length " + Integer.toString(elt1.getSequenceLength());
+                if ( elt1.getMd5() != null )
+                    contig1 += " and MD5 " + elt1.getMd5();
+                String contig2  = "  contig  " + name2 + " is named " + elt2.getSequenceName()  + " with length " + Integer.toString(elt2.getSequenceLength());
+                if ( elt2.getMd5() != null )
+                    contig2 += " and MD5 " + elt2.getMd5();
+                if ( elt1.getMd5() != null ||  elt2.getMd5() != null )
+                    msg += " or MD5s:";
+                msg += "\n" + contig1 + "\n" + contig2;

                // todo -- replace with toString when SAMSequenceRecord has a nice toString routine
-                UserException ex = new UserException.IncompatibleSequenceDictionaries(String.format("Found contigs with the same name but different lengths:\n  contig %s = %s / %d\n  contig %s = %s / %d",
-                        name1, elt1.getSequenceName(), elt1.getSequenceLength(),
-                        name2, elt2.getSequenceName(), elt2.getSequenceLength()),
-                        name1, dict1, name2, dict2);
+                final UserException ex = new UserException.IncompatibleSequenceDictionaries(msg, name1, dict1, name2, dict2);

                if ( allowNonFatalIncompabilities(validationExclusion) )
                    logger.warn(ex.getMessage());
@ -226,7 +235,7 @@ public class SequenceDictionaryUtils {

        final Set<String> commonContigs = getCommonContigsByName(dict1, dict2);

-        if (commonContigs.size() == 0)
+        if (commonContigs.isEmpty())
            return SequenceDictionaryCompatibility.NO_COMMON_CONTIGS;
        else if ( ! commonContigsHaveSameLengths(commonContigs, dict1, dict2) )
            return SequenceDictionaryCompatibility.UNEQUAL_COMMON_CONTIGS;
@ -250,12 +259,12 @@ public class SequenceDictionaryUtils {
     * @param dict2
     * @return true if all of the common contigs are equivalent
     */
-    private static boolean commonContigsHaveSameLengths(Set<String> commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
-        return findDisequalCommonContigs(commonContigs, dict1, dict2) == null;
+    private static boolean commonContigsHaveSameLengths(final Set<String> commonContigs, final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2) {
+        return findNotEqualCommonContigs(commonContigs, dict1, dict2) == null;
    }

    /**
-     * Returns a List(x,y) that contains two disequal sequence records among the common contigs in both dicts.  Returns
+     * Returns a List(x,y) that contains two sequence records that are not equal among the common contigs in both dicts.  Returns
     * null if all common contigs are equivalent
     *
     * @param commonContigs
@ -263,7 +272,7 @@ public class SequenceDictionaryUtils {
     * @param dict2
     * @return
     */
-    private static List<SAMSequenceRecord> findDisequalCommonContigs(Set<String> commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
+    private static List<SAMSequenceRecord> findNotEqualCommonContigs(final Set<String> commonContigs, final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2) {
        for ( String name : commonContigs ) {
            SAMSequenceRecord elt1 = dict1.getSequence(name);
            SAMSequenceRecord elt2 = dict2.getSequence(name);
@ -275,32 +284,32 @@ public class SequenceDictionaryUtils {
    }

    /**
-     * Helper routine that returns two sequence records are equivalent, defined as having the same name and
-     * lengths, if both are non-zero
+     * Helper routine that determines if two sequence records are equivalent, defined as having the same name,
+     * lengths (if both are non-zero) and MD5 (if present)
     *
-     * @param me
-     * @param that
-     * @return
+     * @param record1  a SAMSequenceRecord
+     * @param record2  a SAMSequenceRecord
+     * @return true if the records are equivalent, false otherwise
     */
-    private static boolean sequenceRecordsAreEquivalent(final SAMSequenceRecord me, final SAMSequenceRecord that) {
-        if (me == that) return true;
-        if (that == null) return false;
+    private static boolean sequenceRecordsAreEquivalent(final SAMSequenceRecord record1, final SAMSequenceRecord record2) {
+        if ( record1 == record2 ) return true;
+        if ( record1 == null || record2 == null ) return false;

-        if (me.getSequenceLength() != 0 && that.getSequenceLength() != 0 && me.getSequenceLength() != that.getSequenceLength())
+        // compare length
+        if ( record1.getSequenceLength() != 0 && record2.getSequenceLength() != 0 && record1.getSequenceLength() != record2.getSequenceLength() )
            return false;

-            // todo -- reenable if we want to be really strict here
-//        if (me.getExtendedAttribute(SAMSequenceRecord.MD5_TAG) != null && that.getExtendedAttribute(SAMSequenceRecord.MD5_TAG) != null) {
-//            final BigInteger thisMd5 = new BigInteger((String)me.getExtendedAttribute(SAMSequenceRecord.MD5_TAG), 16);
-//            final BigInteger thatMd5 = new BigInteger((String)that.getExtendedAttribute(SAMSequenceRecord.MD5_TAG), 16);
-//            if (!thisMd5.equals(thatMd5)) {
-//                return false;
-//            }
-//        }
-//        else {
-        if (me.getSequenceName() != that.getSequenceName())
-            return false; // Compare using == since we intern() the Strings
-//        }
+        // compare name
+        if ( !record1.getSequenceName().equals(record2.getSequenceName() ))
+            return false;
+
+         // compare MD5
+         if ( record1.getMd5() != null && record2.getMd5() != null ){
+            final BigInteger firstMd5 = new BigInteger(record1.getMd5(), 16);
+            final BigInteger secondMd5 = new BigInteger(record2.getMd5(), 16);
+            if ( !firstMd5.equals(secondMd5) )
+                return false;
+       }

        return true;
    }
@ -313,13 +322,13 @@ public class SequenceDictionaryUtils {
     * @param dict
     * @return
     */
-    private static boolean nonCanonicalHumanContigOrder(SAMSequenceDictionary dict) {
+    private static boolean nonCanonicalHumanContigOrder(final SAMSequenceDictionary dict) {
        if ( ! ENABLE_LEXICOGRAPHIC_REQUIREMENT_FOR_HUMAN ) // if we don't want to enable this test, just return false
            return false;

        SAMSequenceRecord chr1 = null, chr2 = null, chr10 = null;

-        for ( SAMSequenceRecord elt : dict.getSequences() ) {
+        for ( final SAMSequenceRecord elt : dict.getSequences() ) {
            if ( isHumanSeqRecord(elt, CHR1_HG18, CHR1_HG19 ) ) chr1 = elt;
            if ( isHumanSeqRecord(elt, CHR2_HG18, CHR2_HG19 ) ) chr2 = elt;
            if ( isHumanSeqRecord(elt, CHR10_HG18, CHR10_HG19 ) ) chr10 = elt;
@ -355,7 +364,7 @@ public class SequenceDictionaryUtils {
     * @param dict2 second SAMSequenceDictionary
     * @return true if the common contigs occur in the same relative order in both dict1 and dict2, otherwise false
     */
-    private static boolean commonContigsAreInSameRelativeOrder(Set<String> commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
+    private static boolean commonContigsAreInSameRelativeOrder(final Set<String> commonContigs, final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2) {
        List<SAMSequenceRecord> list1 = sortSequenceListByIndex(getSequencesOfName(commonContigs, dict1));
        List<SAMSequenceRecord> list2 = sortSequenceListByIndex(getSequencesOfName(commonContigs, dict2));

@ -376,8 +385,8 @@ public class SequenceDictionaryUtils {
     * @param dict
     * @return
     */
-    private static List<SAMSequenceRecord> getSequencesOfName(Set<String> commonContigs, SAMSequenceDictionary dict) {
-        List<SAMSequenceRecord> l = new ArrayList<SAMSequenceRecord>(commonContigs.size());
+    private static List<SAMSequenceRecord> getSequencesOfName(final Set<String> commonContigs, final SAMSequenceDictionary dict) {
+        final List<SAMSequenceRecord> l = new ArrayList<SAMSequenceRecord>(commonContigs.size());
        for ( String name : commonContigs ) {
            l.add(dict.getSequence(name) );
        }
@ -401,7 +410,7 @@ public class SequenceDictionaryUtils {
     * @param unsorted
     * @return
     */
-    private static List<SAMSequenceRecord> sortSequenceListByIndex(List<SAMSequenceRecord> unsorted) {
+    private static List<SAMSequenceRecord> sortSequenceListByIndex(final List<SAMSequenceRecord> unsorted) {
        Collections.sort(unsorted, new CompareSequenceRecordsByIndex());
        return unsorted;
    }
@ -418,8 +427,8 @@ public class SequenceDictionaryUtils {
     */
    private static boolean commonContigsAreAtSameIndices( final Set<String> commonContigs, final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2 ) {
        for ( String commonContig : commonContigs ) {
-            SAMSequenceRecord dict1Record = dict1.getSequence(commonContig);
-            SAMSequenceRecord dict2Record = dict2.getSequence(commonContig);
+            final SAMSequenceRecord dict1Record = dict1.getSequence(commonContig);
+            final SAMSequenceRecord dict2Record = dict2.getSequence(commonContig);

            // Each common contig must have the same index in both dictionaries
            if ( dict1Record.getSequenceIndex() != dict2Record.getSequenceIndex() ) {
@ -489,13 +498,13 @@ public class SequenceDictionaryUtils {
     * @return
     */
    public static Set<String> getCommonContigsByName(SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
-        Set<String> intersectingSequenceNames = getContigNames(dict1);
+        final Set<String> intersectingSequenceNames = getContigNames(dict1);
        intersectingSequenceNames.retainAll(getContigNames(dict2));
        return intersectingSequenceNames;
    }

    public static Set<String> getContigNames(SAMSequenceDictionary dict) {
-        Set<String> contigNames = new HashSet<String>(Utils.optimumHashSize(dict.size()));
+        final Set<String> contigNames = new HashSet<String>(Utils.optimumHashSize(dict.size()));
        for (SAMSequenceRecord dictionaryEntry : dict.getSequences())
            contigNames.add(dictionaryEntry.getSequenceName());
        return contigNames;
@ -515,7 +524,7 @@ public class SequenceDictionaryUtils {
            throw new IllegalArgumentException("Sequence dictionary must be non-null");
        }

-        StringBuilder s = new StringBuilder("[ ");
+        final StringBuilder s = new StringBuilder("[ ");

        for ( SAMSequenceRecord dictionaryEntry : dict.getSequences() ) {
            s.append(dictionaryEntry.getSequenceName());
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/ValidationExclusion.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/ValidationExclusion.java
@ -40,7 +40,7 @@ public class ValidationExclusion {
        ALLOW_UNINDEXED_BAM,        // allow bam files that do not have an index; we'll traverse them using monolithic shard
        ALLOW_UNSET_BAM_SORT_ORDER, // assume that the bam is sorted, even if the SO (sort-order) flag is not set
        NO_READ_ORDER_VERIFICATION, // do not validate that the reads are in order as we take them from the bam file
-        ALLOW_SEQ_DICT_INCOMPATIBILITY, // allow dangerous, but not fatal, sequence dictionary incompabilities
+        ALLOW_SEQ_DICT_INCOMPATIBILITY, // allow dangerous, but not fatal, sequence dictionary incompatibilities
        LENIENT_VCF_PROCESSING,         // allow non-standard values for standard VCF header lines.  Don't worry about size differences between header and values, etc.
        @EnumerationArgumentDefault // set the ALL value to the default value, so if they specify just -U, we get the ALL
        ALL                         // do not check for all of the above conditions, DEFAULT
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/IndexDictionaryUtils.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/IndexDictionaryUtils.java
@ -102,7 +102,7 @@ public class IndexDictionaryUtils {
                                                       final SAMSequenceDictionary referenceDict,
                                                       final ValidationExclusion.TYPE validationExclusionType ) {
        // if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
-        if (trackDict == null || trackDict.size() == 0)
+        if (trackDict == null || trackDict.isEmpty())
            logger.warn("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
        else {
            Set<String> trackSequences = new TreeSet<String>();
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/RMDTrack.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/RMDTrack.java
@ -93,7 +93,7 @@ public class RMDTrack {
     * @param dict the sam sequence dictionary
     * @param codec the feature codec we use to decode this type
     */
-    public RMDTrack(Class type, String name, File file, AbstractFeatureReader reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) {
+    public RMDTrack(final Class type, final String name, final File file, final AbstractFeatureReader reader, final SAMSequenceDictionary dict, final GenomeLocParser genomeLocParser, final FeatureCodec codec) {
        this.type = type;
        this.name = name;
        this.file = file;
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/RMDTrackBuilder.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/RMDTrackBuilder.java
@ -26,6 +26,9 @@
 package org.broadinstitute.gatk.utils.refdata.tracks;

 import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.variant.vcf.VCFContigHeaderLine;
+import htsjdk.variant.vcf.VCFHeader;
 import org.apache.log4j.Logger;
 import htsjdk.tribble.AbstractFeatureReader;
 import htsjdk.tribble.FeatureCodec;
@ -34,6 +37,7 @@ import htsjdk.tribble.TribbleException;
 import htsjdk.tribble.index.Index;
 import htsjdk.tribble.index.IndexFactory;
 import htsjdk.tribble.util.LittleEndianOutputStream;
+import org.broadinstitute.gatk.utils.SequenceDictionaryUtils;
 import org.broadinstitute.gatk.utils.commandline.ArgumentTypeDescriptor;
 import org.broadinstitute.gatk.utils.commandline.Tags;
 import org.broadinstitute.gatk.utils.ValidationExclusion;
@ -49,6 +53,8 @@ import org.broadinstitute.gatk.utils.instrumentation.Sizeof;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Map;


@ -131,7 +137,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
     *
     * @return an instance of the track
     */
-    public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) {
+    public RMDTrack createInstanceOfTrack(final RMDTriplet fileDescriptor) {
        String name = fileDescriptor.getName();
        File inputFile = new File(fileDescriptor.getFile());

@ -146,9 +152,43 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
        else
            pair = getFeatureSource(descriptor, name, inputFile, fileDescriptor.getStorageType());
        if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file");
+
+        validateVariantAgainstSequenceDictionary(name, descriptor.getName(), pair.first, pair.second);
+
        return new RMDTrack(descriptor.getCodecClass(), name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(descriptor, name, inputFile));
    }

+    /**
+     * Validate the VCF dictionary against the sequence dictionary.
+     *
+     * @param name      the name of this specific track
+     * @param descriptorName  the name of the feature
+     * @param reader    the feature reader to use as the underlying data source
+     * @param dict      the sam sequence dictionary
+     */
+    private void validateVariantAgainstSequenceDictionary(final String name, final String descriptorName, final AbstractFeatureReader reader, final SAMSequenceDictionary dict ) throws UserException {
+        // only process if the variant is a VCF
+        if ( name.equals("variant") && descriptorName.equals("VCF") ){
+            if ( reader != null && dict != null && reader.getHeader() != null ){
+                final List<VCFContigHeaderLine> contigs = ((VCFHeader) reader.getHeader()).getContigLines();
+                if (contigs != null) {
+                    // make the VCF dictionary from the contig header fields
+                    final List<SAMSequenceRecord> vcfContigRecords = new ArrayList<SAMSequenceRecord>();
+                    for (final VCFContigHeaderLine contig : contigs)
+                        vcfContigRecords.add(contig.getSAMSequenceRecord());
+
+                    // have VCF contig fields so can make a dictionary and compare it to the sequence dictionary
+                    if (!vcfContigRecords.isEmpty()) {
+                        final SAMSequenceDictionary vcfDictionary = new SAMSequenceDictionary(vcfContigRecords);
+                        final SAMSequenceDictionary sequenceDictionary = new SAMSequenceDictionary(dict.getSequences());
+
+                        SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, name, vcfDictionary, "sequence", sequenceDictionary, false, null);
+                    }
+                }
+            }
+        }
+    }
+
    /**
     * Convenience method simplifying track creation.  Assume unnamed track based on a file rather than a stream.
     * @param codecClass Type of Tribble codec class to build.
@ -228,7 +268,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
                sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);

                // if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match
-                if (sequenceDictionary.size() == 0 && dict != null) {
+                if (sequenceDictionary.isEmpty() && dict != null) {
                    validateAndUpdateIndexSequenceDictionary(inputFile, index, dict);

                    if ( ! disableAutoIndexCreation ) {
--- a/public/gatk-utils/src/test/resources/exampleBAM.bam
+++ b/public/gatk-utils/src/test/resources/exampleBAM.bam
--- a/public/gatk-utils/src/test/resources/exampleBAM.bam.bai
+++ b/public/gatk-utils/src/test/resources/exampleBAM.bam.bai
--- a/public/gatk-utils/src/test/resources/exampleCRAM-nobai-nocrai.cram
+++ b/public/gatk-utils/src/test/resources/exampleCRAM-nobai-nocrai.cram
--- a/public/gatk-utils/src/test/resources/exampleCRAM-nobai-withcrai.cram
+++ b/public/gatk-utils/src/test/resources/exampleCRAM-nobai-withcrai.cram
--- a/public/gatk-utils/src/test/resources/exampleCRAM-nobai-withcrai.cram.crai
+++ b/public/gatk-utils/src/test/resources/exampleCRAM-nobai-withcrai.cram.crai
--- a/public/gatk-utils/src/test/resources/exampleCRAM.cram
+++ b/public/gatk-utils/src/test/resources/exampleCRAM.cram
--- a/public/gatk-utils/src/test/resources/exampleCRAM.cram.crai
+++ b/public/gatk-utils/src/test/resources/exampleCRAM.cram.crai