diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 070898654..ba25ac957 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -48,6 +48,7 @@ import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.gatk.iterators.ReadTransformersMode; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; +import org.broadinstitute.sting.gatk.refdata.tracks.IndexDictionaryUtils; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; @@ -254,13 +255,16 @@ public class GenomeAnalysisEngine { // Prepare the data for traversal. initializeDataSources(); - // initialize sampleDB - initializeSampleDB(); - // initialize and validate the interval list initializeIntervals(); validateSuppliedIntervals(); + // check to make sure that all sequence dictionaries are compatible with the reference's sequence dictionary + validateDataSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), rodDataSources); + + // initialize sampleDB + initializeSampleDB(); + // our microscheduler, which is in charge of running everything MicroScheduler microScheduler = createMicroscheduler(); threadEfficiencyMonitor = microScheduler.getThreadEfficiencyMonitor(); @@ -753,9 +757,8 @@ public class GenomeAnalysisEngine { * @param reads Reads data source. * @param reference Reference data source. * @param rods a collection of the reference ordered data tracks - * @param manager manager */ - private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection rods, RMDTrackBuilder manager) { + private void validateDataSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection rods) { if ((reads.isEmpty() && (rods == null || rods.isEmpty())) || reference == null ) return; @@ -772,11 +775,12 @@ public class GenomeAnalysisEngine { } // compare the reads to the reference - SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, "reads", readsDictionary, "reference", referenceDictionary); + SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, "reads", readsDictionary, + "reference", referenceDictionary, true, intervals); } for (ReferenceOrderedDataSource rod : rods) - manager.validateTrackSequenceDictionary(rod.getName(),rod.getSequenceDictionary(),referenceDictionary); + IndexDictionaryUtils.validateTrackSequenceDictionary(rod.getName(), rod.getSequenceDictionary(), referenceDictionary, getArguments().unsafe); } /** @@ -858,9 +862,6 @@ public class GenomeAnalysisEngine { genomeLocParser, flashbackData())); - // validation: check to make sure everything the walker needs is present, and that all sequence dictionaries match. - validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), dataSources, builder); - return dataSources; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/IndexDictionaryUtils.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/IndexDictionaryUtils.java index 17f9ef561..e0b5dd4cb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/IndexDictionaryUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/IndexDictionaryUtils.java @@ -101,7 +101,7 @@ public class IndexDictionaryUtils { Set trackSequences = new TreeSet(); for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences()) trackSequences.add(dictionaryEntry.getSequenceName()); - SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict); + SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict, false, null); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java b/public/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java index 2c897407a..5e834d273 100644 --- a/public/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java @@ -53,40 +53,55 @@ public class SequenceDictionaryUtils { // // for detecting lexicographically sorted human references // - - private static boolean ENABLE_LEXICOGRAPHIC_REQUIREMENT_FOR_HUMAN = true; + private static final boolean ENABLE_LEXICOGRAPHIC_REQUIREMENT_FOR_HUMAN = true; // hg18 - private static SAMSequenceRecord CHR1_HG18 = new SAMSequenceRecord("chr1", 247249719); - private static SAMSequenceRecord CHR2_HG18 = new SAMSequenceRecord("chr2", 242951149); - private static SAMSequenceRecord CHR10_HG18 = new SAMSequenceRecord("chr10", 135374737); + protected static final SAMSequenceRecord CHR1_HG18 = new SAMSequenceRecord("chr1", 247249719); + protected static final SAMSequenceRecord CHR2_HG18 = new SAMSequenceRecord("chr2", 242951149); + protected static final SAMSequenceRecord CHR10_HG18 = new SAMSequenceRecord("chr10", 135374737); // hg19 - private static SAMSequenceRecord CHR1_HG19 = new SAMSequenceRecord("chr1", 249250621); - private static SAMSequenceRecord CHR2_HG19 = new SAMSequenceRecord("chr2", 243199373); - private static SAMSequenceRecord CHR10_HG19 = new SAMSequenceRecord("chr10", 135534747); + protected static final SAMSequenceRecord CHR1_HG19 = new SAMSequenceRecord("chr1", 249250621); + protected static final SAMSequenceRecord CHR2_HG19 = new SAMSequenceRecord("chr2", 243199373); + protected static final SAMSequenceRecord CHR10_HG19 = new SAMSequenceRecord("chr10", 135534747); + + // b36 + protected static final SAMSequenceRecord CHR1_B36 = new SAMSequenceRecord("1", 247249719); + protected static final SAMSequenceRecord CHR2_B36 = new SAMSequenceRecord("2", 242951149); + protected static final SAMSequenceRecord CHR10_B36 = new SAMSequenceRecord("10", 135374737); + + // b37 + protected static final SAMSequenceRecord CHR1_B37 = new SAMSequenceRecord("1", 249250621); + protected static final SAMSequenceRecord CHR2_B37 = new SAMSequenceRecord("2", 243199373); + protected static final SAMSequenceRecord CHR10_B37 = new SAMSequenceRecord("10", 135534747); + public enum SequenceDictionaryCompatibility { IDENTICAL, // the dictionaries are identical COMMON_SUBSET, // there exists a common subset of equivalent contigs NO_COMMON_CONTIGS, // no overlap between dictionaries - UNEQUAL_COMMON_CONTIGS, // common subset has contigs that have the same name but aren't equivalent + UNEQUAL_COMMON_CONTIGS, // common subset has contigs that have the same name but different lengths NON_CANONICAL_HUMAN_ORDER, // human reference detected but the order of the contigs is non-standard (lexicographic, for examine) - OUT_OF_ORDER // the two dictionaries overlap but the contigs occur out of order w.r.t each other + OUT_OF_ORDER, // the two dictionaries overlap but the overlapping contigs occur in different + // orders with respect to each other + DIFFERENT_INDICES // the two dictionaries overlap and the overlapping contigs occur in the same + // order with respect to each other, but one or more of them have different + // indices in the two dictionaries. Eg., { chrM, chr1, chr2 } vs. { chr1, chr2 } } /** * @param validationExclusion exclusions to validation * @return Returns true if the engine is in tolerant mode and we'll let through dangerous but not fatal dictionary inconsistency */ - public static boolean allowNonFatalIncompabilities(ValidationExclusion.TYPE validationExclusion) { + private static boolean allowNonFatalIncompabilities(ValidationExclusion.TYPE validationExclusion) { return ( validationExclusion == ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY || validationExclusion == ValidationExclusion.TYPE.ALL ); } /** - * Testings for compatbility between dict1 and dict2. If the dictionaries are incompatible, then UserExceptions are - * thrown with detailed error messages. If the engine is in permissive mode, then logger.warnings of generated instead + * Tests for compatibility between two sequence dictionaries. If the dictionaries are incompatible, then + * UserExceptions are thrown with detailed error messages. If the engine is in permissive mode, then + * logger warnings are generated instead. * * @param logger for warnings * @param validationExclusion exclusions to validation @@ -94,9 +109,22 @@ public class SequenceDictionaryUtils { * @param dict1 the sequence dictionary dict1 * @param name2 name associated with dict2 * @param dict2 the sequence dictionary dict2 + * @param isReadsToReferenceComparison true if one of the dictionaries comes from a reads data source (eg., a BAM), + * and the other from a reference data source + * @param intervals the user-specified genomic intervals: only required when isReadsToReferenceComparison is true, + * otherwise can be null */ - public static void validateDictionaries(Logger logger, ValidationExclusion.TYPE validationExclusion, String name1, SAMSequenceDictionary dict1, String name2, SAMSequenceDictionary dict2) { - SequenceDictionaryCompatibility type = compareDictionaries(dict1, dict2); + public static void validateDictionaries( final Logger logger, + final ValidationExclusion.TYPE validationExclusion, + final String name1, + final SAMSequenceDictionary dict1, + final String name2, + final SAMSequenceDictionary dict2, + final boolean isReadsToReferenceComparison, + final GenomeLocSortedSet intervals ) { + + final SequenceDictionaryCompatibility type = compareDictionaries(dict1, dict2); + switch ( type ) { case IDENTICAL: return; @@ -134,15 +162,48 @@ public class SequenceDictionaryUtils { logger.warn(ex.getMessage()); else throw ex; + break; } case OUT_OF_ORDER: { - UserException ex = new UserException.IncompatibleSequenceDictionaries("Order of contigs differences, which is unsafe", name1, dict1, name2, dict2); + UserException ex = new UserException.IncompatibleSequenceDictionaries("Relative ordering of overlapping contigs differs, which is unsafe", name1, dict1, name2, dict2); if ( allowNonFatalIncompabilities(validationExclusion) ) logger.warn(ex.getMessage()); else throw ex; - } break; + break; + } + + case DIFFERENT_INDICES: { + // This is currently only known to be problematic when the index mismatch is between a bam and the + // reference AND when the user's intervals actually include one or more of the contigs that are + // indexed differently from the reference. In this case, the engine will fail to correctly serve + // up the reads from those contigs, so throw an exception unless unsafe operations are enabled. + if ( isReadsToReferenceComparison && intervals != null ) { + + final Set misindexedContigs = findMisindexedContigsInIntervals(intervals, dict1, dict2); + + if ( ! misindexedContigs.isEmpty() ) { + final String msg = String.format("The following contigs included in the intervals to process have " + + "different indices in the sequence dictionaries for the reads vs. " + + "the reference: %s. As a result, the GATK engine will not correctly " + + "process reads from these contigs. You should either fix the sequence " + + "dictionaries for your reads so that these contigs have the same indices " + + "as in the sequence dictionary for your reference, or exclude these contigs " + + "from your intervals. This error can be disabled via -U %s, " + + "however this is not recommended as the GATK engine will not behave correctly.", + misindexedContigs, ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY); + final UserException ex = new UserException.IncompatibleSequenceDictionaries(msg, name1, dict1, name2, dict2); + + if ( allowNonFatalIncompabilities(validationExclusion) ) + logger.warn(ex.getMessage()); + else + throw ex; + } + } + break; + } + default: throw new ReviewedStingException("Unexpected SequenceDictionaryComparison type: " + type); } @@ -151,32 +212,33 @@ public class SequenceDictionaryUtils { /** * Workhorse routine that takes two dictionaries and returns their compatibility. * - * @param dict1 - * @param dict2 - * @return + * @param dict1 first sequence dictionary + * @param dict2 second sequence dictionary + * @return A SequenceDictionaryCompatibility enum value describing the compatibility of the two dictionaries */ - public static SequenceDictionaryCompatibility compareDictionaries(SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) { - // If there's no overlap between reads and reference, data will be bogus. Throw an exception. - if ( nonCanonicalHumanContigOrder( dict1 ) || nonCanonicalHumanContigOrder(dict2) ) + public static SequenceDictionaryCompatibility compareDictionaries( final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2) { + if ( nonCanonicalHumanContigOrder(dict1) || nonCanonicalHumanContigOrder(dict2) ) return SequenceDictionaryCompatibility.NON_CANONICAL_HUMAN_ORDER; - Set commonContigs = getCommonContigsByName(dict1, dict2); + final Set commonContigs = getCommonContigsByName(dict1, dict2); if (commonContigs.size() == 0) return SequenceDictionaryCompatibility.NO_COMMON_CONTIGS; - else if ( ! commonContigsAreEquivalent( commonContigs, dict1, dict2 ) ) + else if ( ! commonContigsHaveSameLengths(commonContigs, dict1, dict2) ) return SequenceDictionaryCompatibility.UNEQUAL_COMMON_CONTIGS; - else if ( ! commonContigsAreInOrder( commonContigs, dict1, dict2 ) ) + else if ( ! commonContigsAreInSameRelativeOrder(commonContigs, dict1, dict2) ) return SequenceDictionaryCompatibility.OUT_OF_ORDER; else if ( commonContigs.size() == dict1.size() && commonContigs.size() == dict2.size() ) return SequenceDictionaryCompatibility.IDENTICAL; + else if ( ! commonContigsAreAtSameIndices(commonContigs, dict1, dict2) ) + return SequenceDictionaryCompatibility.DIFFERENT_INDICES; else { return SequenceDictionaryCompatibility.COMMON_SUBSET; } } /** - * Utility function that tests whether the commonContigs in both dicts are equivalent. Equivalece means + * Utility function that tests whether the commonContigs in both dicts are equivalent. Equivalence means * that the seq records have the same length, if both are non-zero. * * @param commonContigs @@ -184,7 +246,7 @@ public class SequenceDictionaryUtils { * @param dict2 * @return true if all of the common contigs are equivalent */ - private static boolean commonContigsAreEquivalent(Set commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) { + private static boolean commonContigsHaveSameLengths(Set commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) { return findDisequalCommonContigs(commonContigs, dict1, dict2) == null; } @@ -201,7 +263,7 @@ public class SequenceDictionaryUtils { for ( String name : commonContigs ) { SAMSequenceRecord elt1 = dict1.getSequence(name); SAMSequenceRecord elt2 = dict2.getSequence(name); - if ( ! SequenceRecordsAreEquivalent(elt1, elt2) ) + if ( ! sequenceRecordsAreEquivalent(elt1, elt2) ) return Arrays.asList(elt1,elt2); } @@ -216,12 +278,10 @@ public class SequenceDictionaryUtils { * @param that * @return */ - private static boolean SequenceRecordsAreEquivalent(final SAMSequenceRecord me, final SAMSequenceRecord that) { + private static boolean sequenceRecordsAreEquivalent(final SAMSequenceRecord me, final SAMSequenceRecord that) { if (me == that) return true; if (that == null) return false; - // I don't care if the indices are difference - //if (me.getSequenceIndex() != that.getSequenceIndex()) return false; if (me.getSequenceLength() != 0 && that.getSequenceLength() != 0 && me.getSequenceLength() != that.getSequenceLength()) return false; @@ -280,18 +340,18 @@ public class SequenceDictionaryUtils { return elt.getSequenceLength() == rec1.getSequenceLength() || elt.getSequenceLength() == rec2.getSequenceLength(); } - /** - * Returns true if the common contigs in dict1 and dict2 are in the same order. This is accomplished by getting the - * common contigs in both dictionaries, sorting these according to their indices, and the walking through - * the sorted list to ensure that each ordered contig is equivalent + * Returns true if the common contigs in dict1 and dict2 are in the same relative order, without regard to + * absolute index position. This is accomplished by getting the common contigs in both dictionaries, sorting + * these according to their indices, and then walking through the sorted list to ensure that each ordered contig + * is equivalent * - * @param commonContigs - * @param dict1 - * @param dict2 - * @return + * @param commonContigs names of the contigs common to both dictionaries + * @param dict1 first SAMSequenceDictionary + * @param dict2 second SAMSequenceDictionary + * @return true if the common contigs occur in the same relative order in both dict1 and dict2, otherwise false */ - public static boolean commonContigsAreInOrder(Set commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) { + private static boolean commonContigsAreInSameRelativeOrder(Set commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) { List list1 = sortSequenceListByIndex(getSequencesOfName(commonContigs, dict1)); List list2 = sortSequenceListByIndex(getSequencesOfName(commonContigs, dict2)); @@ -321,10 +381,6 @@ public class SequenceDictionaryUtils { return l; } - // -------------------------------------------------------------------------------------------------------------- - // Utilities for comparing the order of sequence records - // -------------------------------------------------------------------------------------------------------------- - /** * Compares sequence records by their order */ @@ -346,6 +402,81 @@ public class SequenceDictionaryUtils { return unsorted; } + /** + * Checks whether the common contigs in the given sequence dictionaries occur at the same indices + * in both dictionaries + * + * @param commonContigs Set of names of the contigs that occur in both dictionaries + * @param dict1 first sequence dictionary + * @param dict2 second sequence dictionary + * @return true if the contigs common to dict1 and dict2 occur at the same indices in both dictionaries, + * otherwise false + */ + private static boolean commonContigsAreAtSameIndices( final Set commonContigs, final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2 ) { + for ( String commonContig : commonContigs ) { + SAMSequenceRecord dict1Record = dict1.getSequence(commonContig); + SAMSequenceRecord dict2Record = dict2.getSequence(commonContig); + + // Each common contig must have the same index in both dictionaries + if ( dict1Record.getSequenceIndex() != dict2Record.getSequenceIndex() ) { + return false; + } + } + + return true; + } + + /** + * Gets the set of names of the contigs found in both sequence dictionaries that have different indices + * in the two dictionaries. + * + * @param commonContigs Set of names of the contigs common to both dictionaries + * @param dict1 first sequence dictionary + * @param dict2 second sequence dictionary + * @return a Set containing the names of the common contigs indexed differently in dict1 vs. dict2, + * or an empty Set if there are no such contigs + */ + private static Set getDifferentlyIndexedCommonContigs( final Set commonContigs, + final SAMSequenceDictionary dict1, + final SAMSequenceDictionary dict2 ) { + + final Set differentlyIndexedCommonContigs = new LinkedHashSet(Utils.optimumHashSize(commonContigs.size())); + + for ( String commonContig : commonContigs ) { + if ( dict1.getSequence(commonContig).getSequenceIndex() != dict2.getSequence(commonContig).getSequenceIndex() ) { + differentlyIndexedCommonContigs.add(commonContig); + } + } + + return differentlyIndexedCommonContigs; + } + + /** + * Finds the names of any contigs indexed differently in the two sequence dictionaries that also + * occur in the provided set of intervals. + * + * @param intervals GenomeLocSortedSet containing the intervals to check + * @param dict1 first sequence dictionary + * @param dict2 second sequence dictionary + * @return a Set of the names of the contigs indexed differently in dict1 vs dict2 that also + * occur in the provided intervals, or an empty Set if there are no such contigs + */ + private static Set findMisindexedContigsInIntervals( final GenomeLocSortedSet intervals, + final SAMSequenceDictionary dict1, + final SAMSequenceDictionary dict2 ) { + + final Set differentlyIndexedCommonContigs = getDifferentlyIndexedCommonContigs(getCommonContigsByName(dict1, dict2), dict1, dict2); + final Set misindexedContigsInIntervals = new LinkedHashSet(Utils.optimumHashSize(differentlyIndexedCommonContigs.size())); + + // We know differentlyIndexedCommonContigs is a HashSet, so this loop is O(intervals) + for ( GenomeLoc interval : intervals ) { + if ( differentlyIndexedCommonContigs.contains(interval.getContig()) ) { + misindexedContigsInIntervals.add(interval.getContig()); + } + } + + return misindexedContigsInIntervals; + } /** * Returns the set of contig names found in both dicts. @@ -360,9 +491,37 @@ public class SequenceDictionaryUtils { } public static Set getContigNames(SAMSequenceDictionary dict) { - Set contigNames = new HashSet((int)(dict.size() / 0.75f) + 1, 0.75f); + Set contigNames = new HashSet(Utils.optimumHashSize(dict.size())); for (SAMSequenceRecord dictionaryEntry : dict.getSequences()) contigNames.add(dictionaryEntry.getSequenceName()); return contigNames; } + + /** + * Returns a compact String representation of the sequence dictionary it's passed + * + * The format of the returned String is: + * [ contig1Name(length: contig1Length) contig2Name(length: contig2Length) ... ] + * + * @param dict a non-null SAMSequenceDictionary + * @return A String containing all of the contig names and lengths from the sequence dictionary it's passed + */ + public static String getDictionaryAsString( final SAMSequenceDictionary dict ) { + if ( dict == null ) { + throw new IllegalArgumentException("Sequence dictionary must be non-null"); + } + + StringBuilder s = new StringBuilder("[ "); + + for ( SAMSequenceRecord dictionaryEntry : dict.getSequences() ) { + s.append(dictionaryEntry.getSequenceName()); + s.append("(length:"); + s.append(dictionaryEntry.getSequenceLength()); + s.append(") "); + } + + s.append("]"); + + return s.toString(); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/DictionaryConsistencyIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/DictionaryConsistencyIntegrationTest.java index 853d79c90..88086314a 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/DictionaryConsistencyIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/DictionaryConsistencyIntegrationTest.java @@ -64,22 +64,18 @@ public class DictionaryConsistencyIntegrationTest extends WalkerTest { } - @Test public void failBAM1() { executeTest("b36bam-v-b37", testBAM(b37KGReference, b36BAM)); } - @Test public void failBAM2() { executeTest("b36bam-v-hg18", testBAM(hg18Reference, b36BAM)); } - @Test public void failBAM3() { executeTest("b36bam-v-hg19", testBAM(hg19Reference, b36BAM)); } - @Test public void failBAM4() { executeTest("b36bam-v-lexhg18", testBAM(lexHG18, b36BAM, UserException.LexicographicallySortedSequenceDictionary.class)); } + @Test public void failBAM1() { executeTest("b36bam-v-b37", testBAM(b37KGReference, b36BAM, "1", UserException.IncompatibleSequenceDictionaries.class)); } + @Test public void failBAM2() { executeTest("b36bam-v-hg18", testBAM(hg18Reference, b36BAM, "chr1", UserException.IncompatibleSequenceDictionaries.class)); } + @Test public void failBAM3() { executeTest("b36bam-v-hg19", testBAM(hg19Reference, b36BAM, "1", UserException.IncompatibleSequenceDictionaries.class)); } + @Test public void failBAM4() { executeTest("b36bam-v-lexhg18", testBAM(lexHG18, b36BAM, "chr1", UserException.LexicographicallySortedSequenceDictionary.class)); } - @Test public void failBAM5() { executeTest("hg18bam-v-b36", testBAM(b36KGReference, hg18BAM)); } - @Test public void failBAM6() { executeTest("hg18bam-v-b37", testBAM(b37KGReference, hg18BAM)); } - @Test public void failBAM7() { executeTest("hg18bam-v-hg19", testBAM(hg19Reference, hg18BAM)); } - @Test public void failBAM8() { executeTest("hg18bam-v-lexhg18", testBAM(lexHG18, hg18BAM, UserException.LexicographicallySortedSequenceDictionary.class)); } + @Test public void failBAM5() { executeTest("hg18bam-v-b36", testBAM(b36KGReference, hg18BAM, "1", UserException.IncompatibleSequenceDictionaries.class)); } + @Test public void failBAM6() { executeTest("hg18bam-v-b37", testBAM(b37KGReference, hg18BAM, "1", UserException.IncompatibleSequenceDictionaries.class)); } + @Test public void failBAM7() { executeTest("hg18bam-v-hg19", testBAM(hg19Reference, hg18BAM, "1", UserException.IncompatibleSequenceDictionaries.class)); } + @Test public void failBAM8() { executeTest("hg18bam-v-lexhg18", testBAM(lexHG18, hg18BAM, "chr1", UserException.LexicographicallySortedSequenceDictionary.class)); } - private WalkerTest.WalkerTestSpec testBAM(String ref, String bam) { - return testBAM(ref, bam, UserException.IncompatibleSequenceDictionaries.class); - } - - private WalkerTest.WalkerTestSpec testBAM(String ref, String bam, Class c) { - return new WalkerTest.WalkerTestSpec("-T UnifiedGenotyper -I " + bam + " -R " + ref + " -L 1:10,000,000-11,000,000 -o %s", + private WalkerTest.WalkerTestSpec testBAM(String ref, String bam, String contig, Class c) { + return new WalkerTest.WalkerTestSpec("-T UnifiedGenotyper -I " + bam + " -R " + ref + " -L " + contig + ":10,000,000-11,000,000 -o %s", 1, c); } diff --git a/public/java/test/org/broadinstitute/sting/utils/SequenceDictionaryUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/SequenceDictionaryUtilsUnitTest.java new file mode 100644 index 000000000..a95b5b6ce --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/SequenceDictionaryUtilsUnitTest.java @@ -0,0 +1,241 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.utils; + +import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.SAMSequenceRecord; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; +import org.testng.Assert; + +import static org.broadinstitute.sting.utils.SequenceDictionaryUtils.*; +import static org.broadinstitute.sting.utils.SequenceDictionaryUtils.SequenceDictionaryCompatibility.*; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class SequenceDictionaryUtilsUnitTest extends BaseTest { + + private static Logger logger = Logger.getLogger(SequenceDictionaryUtilsUnitTest.class); + + + @DataProvider( name = "SequenceDictionaryDataProvider" ) + public Object[][] generateSequenceDictionaryTestData() { + final SAMSequenceRecord CHRM_HG19 = new SAMSequenceRecord("chrM", 16571); + final SAMSequenceRecord CHR_NONSTANDARD1 = new SAMSequenceRecord("NonStandard1", 8675309); + final SAMSequenceRecord CHR_NONSTANDARD2 = new SAMSequenceRecord("NonStandard2", 8675308); + + final Class NO_COMMON_CONTIGS_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class; + final Class UNEQUAL_COMMON_CONTIGS_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class; + final Class NON_CANONICAL_HUMAN_ORDER_EXCEPTION = UserException.LexicographicallySortedSequenceDictionary.class; + final Class OUT_OF_ORDER_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class; + final Class DIFFERENT_INDICES_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class; + + final List hg19Sequences = Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19, CHR10_HG19); + final GenomeLocParser hg19GenomeLocParser = new GenomeLocParser(new SAMSequenceDictionary(hg19Sequences)); + final List hg19AllContigsIntervals = Arrays.asList(hg19GenomeLocParser.createGenomeLoc("chrM", 0, 1), + hg19GenomeLocParser.createGenomeLoc("chr1", 0, 1), + hg19GenomeLocParser.createGenomeLoc("chr2", 0, 1), + hg19GenomeLocParser.createGenomeLoc("chr10", 0, 1)); + final List hg19PartialContigsIntervals = Arrays.asList(hg19GenomeLocParser.createGenomeLoc("chrM", 0, 1), + hg19GenomeLocParser.createGenomeLoc("chr1", 0, 1)); + final GenomeLocSortedSet hg19AllContigsIntervalSet = new GenomeLocSortedSet(hg19GenomeLocParser, hg19AllContigsIntervals); + final GenomeLocSortedSet hg19PartialContigsIntervalSet = new GenomeLocSortedSet(hg19GenomeLocParser, hg19PartialContigsIntervals); + + return new Object[][] { + // Identical dictionaries: + { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG19), null, IDENTICAL, null, false, null }, + { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), null, IDENTICAL, null, false, null }, + { Arrays.asList(CHR1_B37), Arrays.asList(CHR1_B37), null, IDENTICAL, null, false, null }, + { Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), null, IDENTICAL, null, false, null }, + + // Dictionaries with a common subset: + { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1), null, COMMON_SUBSET, null, false, null }, + { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG19, CHR_NONSTANDARD2), null, COMMON_SUBSET, null, false, null }, + { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19), null, COMMON_SUBSET, null, false, null }, + { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHRM_HG19), null, COMMON_SUBSET, null, false, null }, + { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD2), null, COMMON_SUBSET, null, false, null }, + { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), null, COMMON_SUBSET, null, false, null }, + { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), null, COMMON_SUBSET, null, false, null }, + { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHR2_HG19, CHR10_HG19), null, COMMON_SUBSET, null, false, null }, + { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHR2_HG19, CHR10_HG19, CHRM_HG19), null, COMMON_SUBSET, null, false, null }, + { Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37, CHR_NONSTANDARD1), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37, CHR_NONSTANDARD2), null, COMMON_SUBSET, null, false, null }, + { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), null, COMMON_SUBSET, null, false, null }, + + // Dictionaries with no common contigs: + { Arrays.asList(CHR1_HG19), Arrays.asList(CHR2_HG19), null, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null }, + { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_B37), null, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null }, + { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), null, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null }, + { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), null, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null }, + + // Dictionaries with unequal common contigs: + { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG18), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null }, + { Arrays.asList(CHR1_B36), Arrays.asList(CHR1_B37), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null }, + { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null }, + { Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), Arrays.asList(CHR1_B36, CHR2_B36, CHR10_B36), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null }, + { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18, CHR_NONSTANDARD2), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null }, + { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG18, CHR2_HG18, CHR10_HG18), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null }, + { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null }, + + // One or both dictionaries in non-canonical human order: + { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null }, + { Arrays.asList(CHR1_HG18, CHR10_HG18, CHR2_HG18), Arrays.asList(CHR1_HG18, CHR10_HG18, CHR2_HG18), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null }, + { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null }, + { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null }, + { Arrays.asList(CHR1_B37, CHR10_B37, CHR2_B37), Arrays.asList(CHR1_B37, CHR10_B37, CHR2_B37), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null }, + { Arrays.asList(CHR1_B36, CHR10_B36, CHR2_B36), Arrays.asList(CHR1_B36, CHR10_B36, CHR2_B36), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null }, + + // Dictionaries with a common subset, but different relative ordering within that subset: + { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19), null, OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, null }, + { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19, CHRM_HG19), null, OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, null }, + { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR2_HG19, CHR1_HG19), null, OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, null }, + { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHRM_HG19, CHR1_HG19), null, OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, null }, + { Arrays.asList(CHR1_B37, CHR2_B37), Arrays.asList(CHR2_B37, CHR1_B37), null, OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, null }, + + + // Dictionaries with a common subset in the same relative order, but with different indices. + // This will only throw an exception during validation if isReadsToReferenceComparison is true, + // and there are intervals overlapping the misindexed contigs: + + // These have isReadsToReferenceComparison == true and overlapping intervals, so we expect an exception: + { Arrays.asList(CHRM_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet }, + { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet }, + { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet }, + { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet }, + { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet }, + { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet }, + { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet }, + + // These have isReadsToReferenceComparison == true but no overlapping intervals, so we don't expect an exception: + { Arrays.asList(CHR2_HG19, CHR10_HG19), Arrays.asList(CHR10_HG19), null, DIFFERENT_INDICES, null, true, hg19PartialContigsIntervalSet }, + { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19), null, DIFFERENT_INDICES, null, true, hg19PartialContigsIntervalSet }, + { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), null, DIFFERENT_INDICES, null, true, hg19PartialContigsIntervalSet }, + + // These have isReadsToReferenceComparison == false, so we don't expect an exception: + { Arrays.asList(CHRM_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19), null, DIFFERENT_INDICES, null, false, hg19AllContigsIntervalSet }, + { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHRM_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19), null, DIFFERENT_INDICES, null, false, hg19AllContigsIntervalSet }, + + + // Tests for validation exclusions. Note that errors resulting from NO_COMMON_CONTIGs cannot be suppressed + { Arrays.asList(CHR1_HG19), Arrays.asList(CHR2_HG19), ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null }, + { Arrays.asList(CHR1_HG19), Arrays.asList(CHR2_HG19), ValidationExclusion.TYPE.ALL, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null }, + { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG18), ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY, UNEQUAL_COMMON_CONTIGS, null, false, null }, + { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG18), ValidationExclusion.TYPE.ALL, UNEQUAL_COMMON_CONTIGS, null, false, null }, + { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY, NON_CANONICAL_HUMAN_ORDER, null, false, null }, + { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), ValidationExclusion.TYPE.ALL, NON_CANONICAL_HUMAN_ORDER, null, false, null }, + { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19), ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY, OUT_OF_ORDER, null, false, null }, + { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19), ValidationExclusion.TYPE.ALL, OUT_OF_ORDER, null, false, null }, + { Arrays.asList(CHRM_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19), ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY, DIFFERENT_INDICES, null, true, hg19AllContigsIntervalSet }, + { Arrays.asList(CHRM_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19), ValidationExclusion.TYPE.ALL, DIFFERENT_INDICES, null, true, hg19AllContigsIntervalSet } + }; + } + + @Test( dataProvider = "SequenceDictionaryDataProvider" ) + public void testSequenceDictionaryValidation( final List firstDictionaryContigs, + final List secondDictionaryContigs, + final ValidationExclusion.TYPE validationExclusions, + final SequenceDictionaryUtils.SequenceDictionaryCompatibility dictionaryCompatibility, + final Class expectedExceptionUponValidation, + final boolean isReadsToReferenceComparison, + final GenomeLocSortedSet intervals ) { + + final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs); + final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs); + final String testDescription = String.format("First dictionary: %s Second dictionary: %s Validation exclusions: %s", + SequenceDictionaryUtils.getDictionaryAsString(firstDictionary), + SequenceDictionaryUtils.getDictionaryAsString(secondDictionary), + validationExclusions); + + Exception exceptionThrown = null; + try { + SequenceDictionaryUtils.validateDictionaries(logger, + validationExclusions, + "firstDictionary", + firstDictionary, + "secondDictionary", + secondDictionary, + isReadsToReferenceComparison, + intervals); + } + catch ( Exception e ) { + exceptionThrown = e; + } + + if ( expectedExceptionUponValidation != null ) { + Assert.assertTrue(exceptionThrown != null && expectedExceptionUponValidation.isInstance(exceptionThrown), + String.format("Expected exception %s but saw %s instead. %s", + expectedExceptionUponValidation.getSimpleName(), + exceptionThrown == null ? "no exception" : exceptionThrown.getClass().getSimpleName(), + testDescription)); + } + else { + Assert.assertTrue(exceptionThrown == null, + String.format("Expected no exception but saw exception %s instead. %s", + exceptionThrown != null ? exceptionThrown.getClass().getSimpleName() : "none", + testDescription)); + } + } + + @Test( dataProvider = "SequenceDictionaryDataProvider" ) + public void testSequenceDictionaryComparison( final List firstDictionaryContigs, + final List secondDictionaryContigs, + final ValidationExclusion.TYPE validationExclusions, + final SequenceDictionaryUtils.SequenceDictionaryCompatibility dictionaryCompatibility, + final Class expectedExceptionUponValidation, + final boolean isReadsToReferenceComparison, + final GenomeLocSortedSet intervals ) { + + final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs); + final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs); + final String testDescription = String.format("First dictionary: %s Second dictionary: %s", + SequenceDictionaryUtils.getDictionaryAsString(firstDictionary), + SequenceDictionaryUtils.getDictionaryAsString(secondDictionary)); + + final SequenceDictionaryUtils.SequenceDictionaryCompatibility reportedCompatibility = + SequenceDictionaryUtils.compareDictionaries(firstDictionary, secondDictionary); + + Assert.assertTrue(reportedCompatibility == dictionaryCompatibility, + String.format("Dictionary comparison should have returned %s but instead returned %s. %s", + dictionaryCompatibility, reportedCompatibility, testDescription)); + } + + private SAMSequenceDictionary createSequenceDictionary( final List contigs ) { + final List clonedContigs = new ArrayList(contigs.size()); + + // Clone the individual SAMSequenceRecords to avoid contig-index issues with shared objects + // across multiple dictionaries in tests + for ( SAMSequenceRecord contig : contigs ) { + clonedContigs.add(contig.clone()); + } + + return new SAMSequenceDictionary(clonedContigs); + } +}