Sequence dictionary validation: detect problematic contig indexing differences

The GATK engine does not behave correctly when contigs are indexed
differently in the reads sequence dictionaries vs. the reference
sequence dictionary, and the inconsistently-indexed contigs are included
in the user's intervals. For example, given the dictionaries:

Reference dictionary = { chrM, chr1, chr2, ... }
BAM dictionary       = { chr1, chr2, ... }

and the interval "-L chr1", the engine would fail to correctly retrieve
the reads from chr1, since chr1 has a different index in the two dictionaries.

With this patch, we throw an exception if there are contig index differences
between the dictionaries for reads and reference, AND the user's intervals
include at least one of the mismatching contigs.

The user can disable this exception via -U ALLOW_SEQ_DICT_INCOMPATIBILITY

In all other cases, dictionary validation behaves as before.

I also added comprehensive unit tests for the (previously-untested)
SequenceDictionaryUtils class.

GSA-768 #resolve
This commit is contained in:
David Roazen 2013-02-21 15:31:16 -05:00
parent f62dd84869
commit 3645ea9bb6
5 changed files with 468 additions and 71 deletions

View File

@ -48,6 +48,7 @@ import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.iterators.ReadTransformersMode;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.gatk.refdata.tracks.IndexDictionaryUtils;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
@ -254,13 +255,16 @@ public class GenomeAnalysisEngine {
// Prepare the data for traversal.
initializeDataSources();
// initialize sampleDB
initializeSampleDB();
// initialize and validate the interval list
initializeIntervals();
validateSuppliedIntervals();
// check to make sure that all sequence dictionaries are compatible with the reference's sequence dictionary
validateDataSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), rodDataSources);
// initialize sampleDB
initializeSampleDB();
// our microscheduler, which is in charge of running everything
MicroScheduler microScheduler = createMicroscheduler();
threadEfficiencyMonitor = microScheduler.getThreadEfficiencyMonitor();
@ -753,9 +757,8 @@ public class GenomeAnalysisEngine {
* @param reads Reads data source.
* @param reference Reference data source.
* @param rods a collection of the reference ordered data tracks
* @param manager manager
*/
private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, RMDTrackBuilder manager) {
private void validateDataSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
if ((reads.isEmpty() && (rods == null || rods.isEmpty())) || reference == null )
return;
@ -772,11 +775,12 @@ public class GenomeAnalysisEngine {
}
// compare the reads to the reference
SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, "reads", readsDictionary, "reference", referenceDictionary);
SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, "reads", readsDictionary,
"reference", referenceDictionary, true, intervals);
}
for (ReferenceOrderedDataSource rod : rods)
manager.validateTrackSequenceDictionary(rod.getName(),rod.getSequenceDictionary(),referenceDictionary);
IndexDictionaryUtils.validateTrackSequenceDictionary(rod.getName(), rod.getSequenceDictionary(), referenceDictionary, getArguments().unsafe);
}
/**
@ -858,9 +862,6 @@ public class GenomeAnalysisEngine {
genomeLocParser,
flashbackData()));
// validation: check to make sure everything the walker needs is present, and that all sequence dictionaries match.
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), dataSources, builder);
return dataSources;
}

View File

@ -101,7 +101,7 @@ public class IndexDictionaryUtils {
Set<String> trackSequences = new TreeSet<String>();
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
trackSequences.add(dictionaryEntry.getSequenceName());
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict, false, null);
}
}
}

View File

@ -53,40 +53,55 @@ public class SequenceDictionaryUtils {
//
// for detecting lexicographically sorted human references
//
private static boolean ENABLE_LEXICOGRAPHIC_REQUIREMENT_FOR_HUMAN = true;
private static final boolean ENABLE_LEXICOGRAPHIC_REQUIREMENT_FOR_HUMAN = true;
// hg18
private static SAMSequenceRecord CHR1_HG18 = new SAMSequenceRecord("chr1", 247249719);
private static SAMSequenceRecord CHR2_HG18 = new SAMSequenceRecord("chr2", 242951149);
private static SAMSequenceRecord CHR10_HG18 = new SAMSequenceRecord("chr10", 135374737);
protected static final SAMSequenceRecord CHR1_HG18 = new SAMSequenceRecord("chr1", 247249719);
protected static final SAMSequenceRecord CHR2_HG18 = new SAMSequenceRecord("chr2", 242951149);
protected static final SAMSequenceRecord CHR10_HG18 = new SAMSequenceRecord("chr10", 135374737);
// hg19
private static SAMSequenceRecord CHR1_HG19 = new SAMSequenceRecord("chr1", 249250621);
private static SAMSequenceRecord CHR2_HG19 = new SAMSequenceRecord("chr2", 243199373);
private static SAMSequenceRecord CHR10_HG19 = new SAMSequenceRecord("chr10", 135534747);
protected static final SAMSequenceRecord CHR1_HG19 = new SAMSequenceRecord("chr1", 249250621);
protected static final SAMSequenceRecord CHR2_HG19 = new SAMSequenceRecord("chr2", 243199373);
protected static final SAMSequenceRecord CHR10_HG19 = new SAMSequenceRecord("chr10", 135534747);
// b36
protected static final SAMSequenceRecord CHR1_B36 = new SAMSequenceRecord("1", 247249719);
protected static final SAMSequenceRecord CHR2_B36 = new SAMSequenceRecord("2", 242951149);
protected static final SAMSequenceRecord CHR10_B36 = new SAMSequenceRecord("10", 135374737);
// b37
protected static final SAMSequenceRecord CHR1_B37 = new SAMSequenceRecord("1", 249250621);
protected static final SAMSequenceRecord CHR2_B37 = new SAMSequenceRecord("2", 243199373);
protected static final SAMSequenceRecord CHR10_B37 = new SAMSequenceRecord("10", 135534747);
public enum SequenceDictionaryCompatibility {
IDENTICAL, // the dictionaries are identical
COMMON_SUBSET, // there exists a common subset of equivalent contigs
NO_COMMON_CONTIGS, // no overlap between dictionaries
UNEQUAL_COMMON_CONTIGS, // common subset has contigs that have the same name but aren't equivalent
UNEQUAL_COMMON_CONTIGS, // common subset has contigs that have the same name but different lengths
NON_CANONICAL_HUMAN_ORDER, // human reference detected but the order of the contigs is non-standard (lexicographic, for examine)
OUT_OF_ORDER // the two dictionaries overlap but the contigs occur out of order w.r.t each other
OUT_OF_ORDER, // the two dictionaries overlap but the overlapping contigs occur in different
// orders with respect to each other
DIFFERENT_INDICES // the two dictionaries overlap and the overlapping contigs occur in the same
// order with respect to each other, but one or more of them have different
// indices in the two dictionaries. Eg., { chrM, chr1, chr2 } vs. { chr1, chr2 }
}
/**
* @param validationExclusion exclusions to validation
* @return Returns true if the engine is in tolerant mode and we'll let through dangerous but not fatal dictionary inconsistency
*/
public static boolean allowNonFatalIncompabilities(ValidationExclusion.TYPE validationExclusion) {
private static boolean allowNonFatalIncompabilities(ValidationExclusion.TYPE validationExclusion) {
return ( validationExclusion == ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY ||
validationExclusion == ValidationExclusion.TYPE.ALL );
}
/**
* Testings for compatbility between dict1 and dict2. If the dictionaries are incompatible, then UserExceptions are
* thrown with detailed error messages. If the engine is in permissive mode, then logger.warnings of generated instead
* Tests for compatibility between two sequence dictionaries. If the dictionaries are incompatible, then
* UserExceptions are thrown with detailed error messages. If the engine is in permissive mode, then
* logger warnings are generated instead.
*
* @param logger for warnings
* @param validationExclusion exclusions to validation
@ -94,9 +109,22 @@ public class SequenceDictionaryUtils {
* @param dict1 the sequence dictionary dict1
* @param name2 name associated with dict2
* @param dict2 the sequence dictionary dict2
* @param isReadsToReferenceComparison true if one of the dictionaries comes from a reads data source (eg., a BAM),
* and the other from a reference data source
* @param intervals the user-specified genomic intervals: only required when isReadsToReferenceComparison is true,
* otherwise can be null
*/
public static void validateDictionaries(Logger logger, ValidationExclusion.TYPE validationExclusion, String name1, SAMSequenceDictionary dict1, String name2, SAMSequenceDictionary dict2) {
SequenceDictionaryCompatibility type = compareDictionaries(dict1, dict2);
public static void validateDictionaries( final Logger logger,
final ValidationExclusion.TYPE validationExclusion,
final String name1,
final SAMSequenceDictionary dict1,
final String name2,
final SAMSequenceDictionary dict2,
final boolean isReadsToReferenceComparison,
final GenomeLocSortedSet intervals ) {
final SequenceDictionaryCompatibility type = compareDictionaries(dict1, dict2);
switch ( type ) {
case IDENTICAL:
return;
@ -134,15 +162,48 @@ public class SequenceDictionaryUtils {
logger.warn(ex.getMessage());
else
throw ex;
break;
}
case OUT_OF_ORDER: {
UserException ex = new UserException.IncompatibleSequenceDictionaries("Order of contigs differences, which is unsafe", name1, dict1, name2, dict2);
UserException ex = new UserException.IncompatibleSequenceDictionaries("Relative ordering of overlapping contigs differs, which is unsafe", name1, dict1, name2, dict2);
if ( allowNonFatalIncompabilities(validationExclusion) )
logger.warn(ex.getMessage());
else
throw ex;
} break;
break;
}
case DIFFERENT_INDICES: {
// This is currently only known to be problematic when the index mismatch is between a bam and the
// reference AND when the user's intervals actually include one or more of the contigs that are
// indexed differently from the reference. In this case, the engine will fail to correctly serve
// up the reads from those contigs, so throw an exception unless unsafe operations are enabled.
if ( isReadsToReferenceComparison && intervals != null ) {
final Set<String> misindexedContigs = findMisindexedContigsInIntervals(intervals, dict1, dict2);
if ( ! misindexedContigs.isEmpty() ) {
final String msg = String.format("The following contigs included in the intervals to process have " +
"different indices in the sequence dictionaries for the reads vs. " +
"the reference: %s. As a result, the GATK engine will not correctly " +
"process reads from these contigs. You should either fix the sequence " +
"dictionaries for your reads so that these contigs have the same indices " +
"as in the sequence dictionary for your reference, or exclude these contigs " +
"from your intervals. This error can be disabled via -U %s, " +
"however this is not recommended as the GATK engine will not behave correctly.",
misindexedContigs, ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY);
final UserException ex = new UserException.IncompatibleSequenceDictionaries(msg, name1, dict1, name2, dict2);
if ( allowNonFatalIncompabilities(validationExclusion) )
logger.warn(ex.getMessage());
else
throw ex;
}
}
break;
}
default:
throw new ReviewedStingException("Unexpected SequenceDictionaryComparison type: " + type);
}
@ -151,32 +212,33 @@ public class SequenceDictionaryUtils {
/**
* Workhorse routine that takes two dictionaries and returns their compatibility.
*
* @param dict1
* @param dict2
* @return
* @param dict1 first sequence dictionary
* @param dict2 second sequence dictionary
* @return A SequenceDictionaryCompatibility enum value describing the compatibility of the two dictionaries
*/
public static SequenceDictionaryCompatibility compareDictionaries(SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
// If there's no overlap between reads and reference, data will be bogus. Throw an exception.
if ( nonCanonicalHumanContigOrder( dict1 ) || nonCanonicalHumanContigOrder(dict2) )
public static SequenceDictionaryCompatibility compareDictionaries( final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2) {
if ( nonCanonicalHumanContigOrder(dict1) || nonCanonicalHumanContigOrder(dict2) )
return SequenceDictionaryCompatibility.NON_CANONICAL_HUMAN_ORDER;
Set<String> commonContigs = getCommonContigsByName(dict1, dict2);
final Set<String> commonContigs = getCommonContigsByName(dict1, dict2);
if (commonContigs.size() == 0)
return SequenceDictionaryCompatibility.NO_COMMON_CONTIGS;
else if ( ! commonContigsAreEquivalent( commonContigs, dict1, dict2 ) )
else if ( ! commonContigsHaveSameLengths(commonContigs, dict1, dict2) )
return SequenceDictionaryCompatibility.UNEQUAL_COMMON_CONTIGS;
else if ( ! commonContigsAreInOrder( commonContigs, dict1, dict2 ) )
else if ( ! commonContigsAreInSameRelativeOrder(commonContigs, dict1, dict2) )
return SequenceDictionaryCompatibility.OUT_OF_ORDER;
else if ( commonContigs.size() == dict1.size() && commonContigs.size() == dict2.size() )
return SequenceDictionaryCompatibility.IDENTICAL;
else if ( ! commonContigsAreAtSameIndices(commonContigs, dict1, dict2) )
return SequenceDictionaryCompatibility.DIFFERENT_INDICES;
else {
return SequenceDictionaryCompatibility.COMMON_SUBSET;
}
}
/**
* Utility function that tests whether the commonContigs in both dicts are equivalent. Equivalece means
* Utility function that tests whether the commonContigs in both dicts are equivalent. Equivalence means
* that the seq records have the same length, if both are non-zero.
*
* @param commonContigs
@ -184,7 +246,7 @@ public class SequenceDictionaryUtils {
* @param dict2
* @return true if all of the common contigs are equivalent
*/
private static boolean commonContigsAreEquivalent(Set<String> commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
private static boolean commonContigsHaveSameLengths(Set<String> commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
return findDisequalCommonContigs(commonContigs, dict1, dict2) == null;
}
@ -201,7 +263,7 @@ public class SequenceDictionaryUtils {
for ( String name : commonContigs ) {
SAMSequenceRecord elt1 = dict1.getSequence(name);
SAMSequenceRecord elt2 = dict2.getSequence(name);
if ( ! SequenceRecordsAreEquivalent(elt1, elt2) )
if ( ! sequenceRecordsAreEquivalent(elt1, elt2) )
return Arrays.asList(elt1,elt2);
}
@ -216,12 +278,10 @@ public class SequenceDictionaryUtils {
* @param that
* @return
*/
private static boolean SequenceRecordsAreEquivalent(final SAMSequenceRecord me, final SAMSequenceRecord that) {
private static boolean sequenceRecordsAreEquivalent(final SAMSequenceRecord me, final SAMSequenceRecord that) {
if (me == that) return true;
if (that == null) return false;
// I don't care if the indices are difference
//if (me.getSequenceIndex() != that.getSequenceIndex()) return false;
if (me.getSequenceLength() != 0 && that.getSequenceLength() != 0 && me.getSequenceLength() != that.getSequenceLength())
return false;
@ -280,18 +340,18 @@ public class SequenceDictionaryUtils {
return elt.getSequenceLength() == rec1.getSequenceLength() || elt.getSequenceLength() == rec2.getSequenceLength();
}
/**
* Returns true if the common contigs in dict1 and dict2 are in the same order. This is accomplished by getting the
* common contigs in both dictionaries, sorting these according to their indices, and the walking through
* the sorted list to ensure that each ordered contig is equivalent
* Returns true if the common contigs in dict1 and dict2 are in the same relative order, without regard to
* absolute index position. This is accomplished by getting the common contigs in both dictionaries, sorting
* these according to their indices, and then walking through the sorted list to ensure that each ordered contig
* is equivalent
*
* @param commonContigs
* @param dict1
* @param dict2
* @return
* @param commonContigs names of the contigs common to both dictionaries
* @param dict1 first SAMSequenceDictionary
* @param dict2 second SAMSequenceDictionary
* @return true if the common contigs occur in the same relative order in both dict1 and dict2, otherwise false
*/
public static boolean commonContigsAreInOrder(Set<String> commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
private static boolean commonContigsAreInSameRelativeOrder(Set<String> commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
List<SAMSequenceRecord> list1 = sortSequenceListByIndex(getSequencesOfName(commonContigs, dict1));
List<SAMSequenceRecord> list2 = sortSequenceListByIndex(getSequencesOfName(commonContigs, dict2));
@ -321,10 +381,6 @@ public class SequenceDictionaryUtils {
return l;
}
// --------------------------------------------------------------------------------------------------------------
// Utilities for comparing the order of sequence records
// --------------------------------------------------------------------------------------------------------------
/**
* Compares sequence records by their order
*/
@ -346,6 +402,81 @@ public class SequenceDictionaryUtils {
return unsorted;
}
/**
* Checks whether the common contigs in the given sequence dictionaries occur at the same indices
* in both dictionaries
*
* @param commonContigs Set of names of the contigs that occur in both dictionaries
* @param dict1 first sequence dictionary
* @param dict2 second sequence dictionary
* @return true if the contigs common to dict1 and dict2 occur at the same indices in both dictionaries,
* otherwise false
*/
private static boolean commonContigsAreAtSameIndices( final Set<String> commonContigs, final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2 ) {
for ( String commonContig : commonContigs ) {
SAMSequenceRecord dict1Record = dict1.getSequence(commonContig);
SAMSequenceRecord dict2Record = dict2.getSequence(commonContig);
// Each common contig must have the same index in both dictionaries
if ( dict1Record.getSequenceIndex() != dict2Record.getSequenceIndex() ) {
return false;
}
}
return true;
}
/**
* Gets the set of names of the contigs found in both sequence dictionaries that have different indices
* in the two dictionaries.
*
* @param commonContigs Set of names of the contigs common to both dictionaries
* @param dict1 first sequence dictionary
* @param dict2 second sequence dictionary
* @return a Set containing the names of the common contigs indexed differently in dict1 vs. dict2,
* or an empty Set if there are no such contigs
*/
private static Set<String> getDifferentlyIndexedCommonContigs( final Set<String> commonContigs,
final SAMSequenceDictionary dict1,
final SAMSequenceDictionary dict2 ) {
final Set<String> differentlyIndexedCommonContigs = new LinkedHashSet<String>(Utils.optimumHashSize(commonContigs.size()));
for ( String commonContig : commonContigs ) {
if ( dict1.getSequence(commonContig).getSequenceIndex() != dict2.getSequence(commonContig).getSequenceIndex() ) {
differentlyIndexedCommonContigs.add(commonContig);
}
}
return differentlyIndexedCommonContigs;
}
/**
* Finds the names of any contigs indexed differently in the two sequence dictionaries that also
* occur in the provided set of intervals.
*
* @param intervals GenomeLocSortedSet containing the intervals to check
* @param dict1 first sequence dictionary
* @param dict2 second sequence dictionary
* @return a Set of the names of the contigs indexed differently in dict1 vs dict2 that also
* occur in the provided intervals, or an empty Set if there are no such contigs
*/
private static Set<String> findMisindexedContigsInIntervals( final GenomeLocSortedSet intervals,
final SAMSequenceDictionary dict1,
final SAMSequenceDictionary dict2 ) {
final Set<String> differentlyIndexedCommonContigs = getDifferentlyIndexedCommonContigs(getCommonContigsByName(dict1, dict2), dict1, dict2);
final Set<String> misindexedContigsInIntervals = new LinkedHashSet<String>(Utils.optimumHashSize(differentlyIndexedCommonContigs.size()));
// We know differentlyIndexedCommonContigs is a HashSet, so this loop is O(intervals)
for ( GenomeLoc interval : intervals ) {
if ( differentlyIndexedCommonContigs.contains(interval.getContig()) ) {
misindexedContigsInIntervals.add(interval.getContig());
}
}
return misindexedContigsInIntervals;
}
/**
* Returns the set of contig names found in both dicts.
@ -360,9 +491,37 @@ public class SequenceDictionaryUtils {
}
public static Set<String> getContigNames(SAMSequenceDictionary dict) {
Set<String> contigNames = new HashSet<String>((int)(dict.size() / 0.75f) + 1, 0.75f);
Set<String> contigNames = new HashSet<String>(Utils.optimumHashSize(dict.size()));
for (SAMSequenceRecord dictionaryEntry : dict.getSequences())
contigNames.add(dictionaryEntry.getSequenceName());
return contigNames;
}
/**
* Returns a compact String representation of the sequence dictionary it's passed
*
* The format of the returned String is:
* [ contig1Name(length: contig1Length) contig2Name(length: contig2Length) ... ]
*
* @param dict a non-null SAMSequenceDictionary
* @return A String containing all of the contig names and lengths from the sequence dictionary it's passed
*/
public static String getDictionaryAsString( final SAMSequenceDictionary dict ) {
if ( dict == null ) {
throw new IllegalArgumentException("Sequence dictionary must be non-null");
}
StringBuilder s = new StringBuilder("[ ");
for ( SAMSequenceRecord dictionaryEntry : dict.getSequences() ) {
s.append(dictionaryEntry.getSequenceName());
s.append("(length:");
s.append(dictionaryEntry.getSequenceLength());
s.append(") ");
}
s.append("]");
return s.toString();
}
}

View File

@ -64,22 +64,18 @@ public class DictionaryConsistencyIntegrationTest extends WalkerTest {
}
@Test public void failBAM1() { executeTest("b36bam-v-b37", testBAM(b37KGReference, b36BAM)); }
@Test public void failBAM2() { executeTest("b36bam-v-hg18", testBAM(hg18Reference, b36BAM)); }
@Test public void failBAM3() { executeTest("b36bam-v-hg19", testBAM(hg19Reference, b36BAM)); }
@Test public void failBAM4() { executeTest("b36bam-v-lexhg18", testBAM(lexHG18, b36BAM, UserException.LexicographicallySortedSequenceDictionary.class)); }
@Test public void failBAM1() { executeTest("b36bam-v-b37", testBAM(b37KGReference, b36BAM, "1", UserException.IncompatibleSequenceDictionaries.class)); }
@Test public void failBAM2() { executeTest("b36bam-v-hg18", testBAM(hg18Reference, b36BAM, "chr1", UserException.IncompatibleSequenceDictionaries.class)); }
@Test public void failBAM3() { executeTest("b36bam-v-hg19", testBAM(hg19Reference, b36BAM, "1", UserException.IncompatibleSequenceDictionaries.class)); }
@Test public void failBAM4() { executeTest("b36bam-v-lexhg18", testBAM(lexHG18, b36BAM, "chr1", UserException.LexicographicallySortedSequenceDictionary.class)); }
@Test public void failBAM5() { executeTest("hg18bam-v-b36", testBAM(b36KGReference, hg18BAM)); }
@Test public void failBAM6() { executeTest("hg18bam-v-b37", testBAM(b37KGReference, hg18BAM)); }
@Test public void failBAM7() { executeTest("hg18bam-v-hg19", testBAM(hg19Reference, hg18BAM)); }
@Test public void failBAM8() { executeTest("hg18bam-v-lexhg18", testBAM(lexHG18, hg18BAM, UserException.LexicographicallySortedSequenceDictionary.class)); }
@Test public void failBAM5() { executeTest("hg18bam-v-b36", testBAM(b36KGReference, hg18BAM, "1", UserException.IncompatibleSequenceDictionaries.class)); }
@Test public void failBAM6() { executeTest("hg18bam-v-b37", testBAM(b37KGReference, hg18BAM, "1", UserException.IncompatibleSequenceDictionaries.class)); }
@Test public void failBAM7() { executeTest("hg18bam-v-hg19", testBAM(hg19Reference, hg18BAM, "1", UserException.IncompatibleSequenceDictionaries.class)); }
@Test public void failBAM8() { executeTest("hg18bam-v-lexhg18", testBAM(lexHG18, hg18BAM, "chr1", UserException.LexicographicallySortedSequenceDictionary.class)); }
private WalkerTest.WalkerTestSpec testBAM(String ref, String bam) {
return testBAM(ref, bam, UserException.IncompatibleSequenceDictionaries.class);
}
private WalkerTest.WalkerTestSpec testBAM(String ref, String bam, Class c) {
return new WalkerTest.WalkerTestSpec("-T UnifiedGenotyper -I " + bam + " -R " + ref + " -L 1:10,000,000-11,000,000 -o %s",
private WalkerTest.WalkerTestSpec testBAM(String ref, String bam, String contig, Class c) {
return new WalkerTest.WalkerTestSpec("-T UnifiedGenotyper -I " + bam + " -R " + ref + " -L " + contig + ":10,000,000-11,000,000 -o %s",
1, c);
}

View File

@ -0,0 +1,241 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.testng.Assert;
import static org.broadinstitute.sting.utils.SequenceDictionaryUtils.*;
import static org.broadinstitute.sting.utils.SequenceDictionaryUtils.SequenceDictionaryCompatibility.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class SequenceDictionaryUtilsUnitTest extends BaseTest {
private static Logger logger = Logger.getLogger(SequenceDictionaryUtilsUnitTest.class);
@DataProvider( name = "SequenceDictionaryDataProvider" )
public Object[][] generateSequenceDictionaryTestData() {
final SAMSequenceRecord CHRM_HG19 = new SAMSequenceRecord("chrM", 16571);
final SAMSequenceRecord CHR_NONSTANDARD1 = new SAMSequenceRecord("NonStandard1", 8675309);
final SAMSequenceRecord CHR_NONSTANDARD2 = new SAMSequenceRecord("NonStandard2", 8675308);
final Class NO_COMMON_CONTIGS_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
final Class UNEQUAL_COMMON_CONTIGS_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
final Class NON_CANONICAL_HUMAN_ORDER_EXCEPTION = UserException.LexicographicallySortedSequenceDictionary.class;
final Class OUT_OF_ORDER_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
final Class DIFFERENT_INDICES_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
final List<SAMSequenceRecord> hg19Sequences = Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19, CHR10_HG19);
final GenomeLocParser hg19GenomeLocParser = new GenomeLocParser(new SAMSequenceDictionary(hg19Sequences));
final List<GenomeLoc> hg19AllContigsIntervals = Arrays.asList(hg19GenomeLocParser.createGenomeLoc("chrM", 0, 1),
hg19GenomeLocParser.createGenomeLoc("chr1", 0, 1),
hg19GenomeLocParser.createGenomeLoc("chr2", 0, 1),
hg19GenomeLocParser.createGenomeLoc("chr10", 0, 1));
final List<GenomeLoc> hg19PartialContigsIntervals = Arrays.asList(hg19GenomeLocParser.createGenomeLoc("chrM", 0, 1),
hg19GenomeLocParser.createGenomeLoc("chr1", 0, 1));
final GenomeLocSortedSet hg19AllContigsIntervalSet = new GenomeLocSortedSet(hg19GenomeLocParser, hg19AllContigsIntervals);
final GenomeLocSortedSet hg19PartialContigsIntervalSet = new GenomeLocSortedSet(hg19GenomeLocParser, hg19PartialContigsIntervals);
return new Object[][] {
// Identical dictionaries:
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG19), null, IDENTICAL, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), null, IDENTICAL, null, false, null },
{ Arrays.asList(CHR1_B37), Arrays.asList(CHR1_B37), null, IDENTICAL, null, false, null },
{ Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), null, IDENTICAL, null, false, null },
// Dictionaries with a common subset:
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG19, CHR_NONSTANDARD2), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHRM_HG19), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD2), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHR2_HG19, CHR10_HG19), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHR2_HG19, CHR10_HG19, CHRM_HG19), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37, CHR_NONSTANDARD1), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37, CHR_NONSTANDARD2), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), null, COMMON_SUBSET, null, false, null },
// Dictionaries with no common contigs:
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR2_HG19), null, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_B37), null, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), null, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), null, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null },
// Dictionaries with unequal common contigs:
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG18), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_B36), Arrays.asList(CHR1_B37), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), Arrays.asList(CHR1_B36, CHR2_B36, CHR10_B36), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18, CHR_NONSTANDARD2), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG18, CHR2_HG18, CHR10_HG18), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null },
// One or both dictionaries in non-canonical human order:
{ Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG18, CHR10_HG18, CHR2_HG18), Arrays.asList(CHR1_HG18, CHR10_HG18, CHR2_HG18), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHR1_B37, CHR10_B37, CHR2_B37), Arrays.asList(CHR1_B37, CHR10_B37, CHR2_B37), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHR1_B36, CHR10_B36, CHR2_B36), Arrays.asList(CHR1_B36, CHR10_B36, CHR2_B36), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null },
// Dictionaries with a common subset, but different relative ordering within that subset:
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19), null, OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19, CHRM_HG19), null, OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR2_HG19, CHR1_HG19), null, OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHRM_HG19, CHR1_HG19), null, OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHR1_B37, CHR2_B37), Arrays.asList(CHR2_B37, CHR1_B37), null, OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, null },
// Dictionaries with a common subset in the same relative order, but with different indices.
// This will only throw an exception during validation if isReadsToReferenceComparison is true,
// and there are intervals overlapping the misindexed contigs:
// These have isReadsToReferenceComparison == true and overlapping intervals, so we expect an exception:
{ Arrays.asList(CHRM_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet },
// These have isReadsToReferenceComparison == true but no overlapping intervals, so we don't expect an exception:
{ Arrays.asList(CHR2_HG19, CHR10_HG19), Arrays.asList(CHR10_HG19), null, DIFFERENT_INDICES, null, true, hg19PartialContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19), null, DIFFERENT_INDICES, null, true, hg19PartialContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), null, DIFFERENT_INDICES, null, true, hg19PartialContigsIntervalSet },
// These have isReadsToReferenceComparison == false, so we don't expect an exception:
{ Arrays.asList(CHRM_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19), null, DIFFERENT_INDICES, null, false, hg19AllContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHRM_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19), null, DIFFERENT_INDICES, null, false, hg19AllContigsIntervalSet },
// Tests for validation exclusions. Note that errors resulting from NO_COMMON_CONTIGs cannot be suppressed
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR2_HG19), ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR2_HG19), ValidationExclusion.TYPE.ALL, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG18), ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY, UNEQUAL_COMMON_CONTIGS, null, false, null },
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG18), ValidationExclusion.TYPE.ALL, UNEQUAL_COMMON_CONTIGS, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY, NON_CANONICAL_HUMAN_ORDER, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), ValidationExclusion.TYPE.ALL, NON_CANONICAL_HUMAN_ORDER, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19), ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY, OUT_OF_ORDER, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19), ValidationExclusion.TYPE.ALL, OUT_OF_ORDER, null, false, null },
{ Arrays.asList(CHRM_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19), ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY, DIFFERENT_INDICES, null, true, hg19AllContigsIntervalSet },
{ Arrays.asList(CHRM_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19), ValidationExclusion.TYPE.ALL, DIFFERENT_INDICES, null, true, hg19AllContigsIntervalSet }
};
}
@Test( dataProvider = "SequenceDictionaryDataProvider" )
public void testSequenceDictionaryValidation( final List<SAMSequenceRecord> firstDictionaryContigs,
final List<SAMSequenceRecord> secondDictionaryContigs,
final ValidationExclusion.TYPE validationExclusions,
final SequenceDictionaryUtils.SequenceDictionaryCompatibility dictionaryCompatibility,
final Class expectedExceptionUponValidation,
final boolean isReadsToReferenceComparison,
final GenomeLocSortedSet intervals ) {
final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs);
final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs);
final String testDescription = String.format("First dictionary: %s Second dictionary: %s Validation exclusions: %s",
SequenceDictionaryUtils.getDictionaryAsString(firstDictionary),
SequenceDictionaryUtils.getDictionaryAsString(secondDictionary),
validationExclusions);
Exception exceptionThrown = null;
try {
SequenceDictionaryUtils.validateDictionaries(logger,
validationExclusions,
"firstDictionary",
firstDictionary,
"secondDictionary",
secondDictionary,
isReadsToReferenceComparison,
intervals);
}
catch ( Exception e ) {
exceptionThrown = e;
}
if ( expectedExceptionUponValidation != null ) {
Assert.assertTrue(exceptionThrown != null && expectedExceptionUponValidation.isInstance(exceptionThrown),
String.format("Expected exception %s but saw %s instead. %s",
expectedExceptionUponValidation.getSimpleName(),
exceptionThrown == null ? "no exception" : exceptionThrown.getClass().getSimpleName(),
testDescription));
}
else {
Assert.assertTrue(exceptionThrown == null,
String.format("Expected no exception but saw exception %s instead. %s",
exceptionThrown != null ? exceptionThrown.getClass().getSimpleName() : "none",
testDescription));
}
}
@Test( dataProvider = "SequenceDictionaryDataProvider" )
public void testSequenceDictionaryComparison( final List<SAMSequenceRecord> firstDictionaryContigs,
final List<SAMSequenceRecord> secondDictionaryContigs,
final ValidationExclusion.TYPE validationExclusions,
final SequenceDictionaryUtils.SequenceDictionaryCompatibility dictionaryCompatibility,
final Class expectedExceptionUponValidation,
final boolean isReadsToReferenceComparison,
final GenomeLocSortedSet intervals ) {
final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs);
final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs);
final String testDescription = String.format("First dictionary: %s Second dictionary: %s",
SequenceDictionaryUtils.getDictionaryAsString(firstDictionary),
SequenceDictionaryUtils.getDictionaryAsString(secondDictionary));
final SequenceDictionaryUtils.SequenceDictionaryCompatibility reportedCompatibility =
SequenceDictionaryUtils.compareDictionaries(firstDictionary, secondDictionary);
Assert.assertTrue(reportedCompatibility == dictionaryCompatibility,
String.format("Dictionary comparison should have returned %s but instead returned %s. %s",
dictionaryCompatibility, reportedCompatibility, testDescription));
}
private SAMSequenceDictionary createSequenceDictionary( final List<SAMSequenceRecord> contigs ) {
final List<SAMSequenceRecord> clonedContigs = new ArrayList<SAMSequenceRecord>(contigs.size());
// Clone the individual SAMSequenceRecords to avoid contig-index issues with shared objects
// across multiple dictionaries in tests
for ( SAMSequenceRecord contig : contigs ) {
clonedContigs.add(contig.clone());
}
return new SAMSequenceDictionary(clonedContigs);
}
}