Merge pull request #55 from broadinstitute/dr_fix_sequence_dictionary_validation_GSA-768

Sequence dictionary validation: detect problematic contig indexing differences
This commit is contained in:
depristo 2013-02-25 12:39:56 -08:00
commit ed5aff3702
5 changed files with 468 additions and 71 deletions

View File

@ -48,6 +48,7 @@ import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.iterators.ReadTransformersMode;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.gatk.refdata.tracks.IndexDictionaryUtils;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
@ -254,13 +255,16 @@ public class GenomeAnalysisEngine {
// Prepare the data for traversal.
initializeDataSources();
// initialize sampleDB
initializeSampleDB();
// initialize and validate the interval list
initializeIntervals();
validateSuppliedIntervals();
// check to make sure that all sequence dictionaries are compatible with the reference's sequence dictionary
validateDataSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), rodDataSources);
// initialize sampleDB
initializeSampleDB();
// our microscheduler, which is in charge of running everything
MicroScheduler microScheduler = createMicroscheduler();
threadEfficiencyMonitor = microScheduler.getThreadEfficiencyMonitor();
@ -753,9 +757,8 @@ public class GenomeAnalysisEngine {
* @param reads Reads data source.
* @param reference Reference data source.
* @param rods a collection of the reference ordered data tracks
* @param manager manager
*/
private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, RMDTrackBuilder manager) {
private void validateDataSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
if ((reads.isEmpty() && (rods == null || rods.isEmpty())) || reference == null )
return;
@ -772,11 +775,12 @@ public class GenomeAnalysisEngine {
}
// compare the reads to the reference
SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, "reads", readsDictionary, "reference", referenceDictionary);
SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, "reads", readsDictionary,
"reference", referenceDictionary, true, intervals);
}
for (ReferenceOrderedDataSource rod : rods)
manager.validateTrackSequenceDictionary(rod.getName(),rod.getSequenceDictionary(),referenceDictionary);
IndexDictionaryUtils.validateTrackSequenceDictionary(rod.getName(), rod.getSequenceDictionary(), referenceDictionary, getArguments().unsafe);
}
/**
@ -858,9 +862,6 @@ public class GenomeAnalysisEngine {
genomeLocParser,
flashbackData()));
// validation: check to make sure everything the walker needs is present, and that all sequence dictionaries match.
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), dataSources, builder);
return dataSources;
}

View File

@ -101,7 +101,7 @@ public class IndexDictionaryUtils {
Set<String> trackSequences = new TreeSet<String>();
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
trackSequences.add(dictionaryEntry.getSequenceName());
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict, false, null);
}
}
}

View File

@ -53,40 +53,55 @@ public class SequenceDictionaryUtils {
//
// for detecting lexicographically sorted human references
//
private static boolean ENABLE_LEXICOGRAPHIC_REQUIREMENT_FOR_HUMAN = true;
private static final boolean ENABLE_LEXICOGRAPHIC_REQUIREMENT_FOR_HUMAN = true;
// hg18
private static SAMSequenceRecord CHR1_HG18 = new SAMSequenceRecord("chr1", 247249719);
private static SAMSequenceRecord CHR2_HG18 = new SAMSequenceRecord("chr2", 242951149);
private static SAMSequenceRecord CHR10_HG18 = new SAMSequenceRecord("chr10", 135374737);
protected static final SAMSequenceRecord CHR1_HG18 = new SAMSequenceRecord("chr1", 247249719);
protected static final SAMSequenceRecord CHR2_HG18 = new SAMSequenceRecord("chr2", 242951149);
protected static final SAMSequenceRecord CHR10_HG18 = new SAMSequenceRecord("chr10", 135374737);
// hg19
private static SAMSequenceRecord CHR1_HG19 = new SAMSequenceRecord("chr1", 249250621);
private static SAMSequenceRecord CHR2_HG19 = new SAMSequenceRecord("chr2", 243199373);
private static SAMSequenceRecord CHR10_HG19 = new SAMSequenceRecord("chr10", 135534747);
protected static final SAMSequenceRecord CHR1_HG19 = new SAMSequenceRecord("chr1", 249250621);
protected static final SAMSequenceRecord CHR2_HG19 = new SAMSequenceRecord("chr2", 243199373);
protected static final SAMSequenceRecord CHR10_HG19 = new SAMSequenceRecord("chr10", 135534747);
// b36
protected static final SAMSequenceRecord CHR1_B36 = new SAMSequenceRecord("1", 247249719);
protected static final SAMSequenceRecord CHR2_B36 = new SAMSequenceRecord("2", 242951149);
protected static final SAMSequenceRecord CHR10_B36 = new SAMSequenceRecord("10", 135374737);
// b37
protected static final SAMSequenceRecord CHR1_B37 = new SAMSequenceRecord("1", 249250621);
protected static final SAMSequenceRecord CHR2_B37 = new SAMSequenceRecord("2", 243199373);
protected static final SAMSequenceRecord CHR10_B37 = new SAMSequenceRecord("10", 135534747);
public enum SequenceDictionaryCompatibility {
IDENTICAL, // the dictionaries are identical
COMMON_SUBSET, // there exists a common subset of equivalent contigs
NO_COMMON_CONTIGS, // no overlap between dictionaries
UNEQUAL_COMMON_CONTIGS, // common subset has contigs that have the same name but aren't equivalent
UNEQUAL_COMMON_CONTIGS, // common subset has contigs that have the same name but different lengths
NON_CANONICAL_HUMAN_ORDER, // human reference detected but the order of the contigs is non-standard (lexicographic, for examine)
OUT_OF_ORDER // the two dictionaries overlap but the contigs occur out of order w.r.t each other
OUT_OF_ORDER, // the two dictionaries overlap but the overlapping contigs occur in different
// orders with respect to each other
DIFFERENT_INDICES // the two dictionaries overlap and the overlapping contigs occur in the same
// order with respect to each other, but one or more of them have different
// indices in the two dictionaries. Eg., { chrM, chr1, chr2 } vs. { chr1, chr2 }
}
/**
* @param validationExclusion exclusions to validation
* @return Returns true if the engine is in tolerant mode and we'll let through dangerous but not fatal dictionary inconsistency
*/
public static boolean allowNonFatalIncompabilities(ValidationExclusion.TYPE validationExclusion) {
private static boolean allowNonFatalIncompabilities(ValidationExclusion.TYPE validationExclusion) {
return ( validationExclusion == ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY ||
validationExclusion == ValidationExclusion.TYPE.ALL );
}
/**
* Testings for compatbility between dict1 and dict2. If the dictionaries are incompatible, then UserExceptions are
* thrown with detailed error messages. If the engine is in permissive mode, then logger.warnings of generated instead
* Tests for compatibility between two sequence dictionaries. If the dictionaries are incompatible, then
* UserExceptions are thrown with detailed error messages. If the engine is in permissive mode, then
* logger warnings are generated instead.
*
* @param logger for warnings
* @param validationExclusion exclusions to validation
@ -94,9 +109,22 @@ public class SequenceDictionaryUtils {
* @param dict1 the sequence dictionary dict1
* @param name2 name associated with dict2
* @param dict2 the sequence dictionary dict2
* @param isReadsToReferenceComparison true if one of the dictionaries comes from a reads data source (eg., a BAM),
* and the other from a reference data source
* @param intervals the user-specified genomic intervals: only required when isReadsToReferenceComparison is true,
* otherwise can be null
*/
public static void validateDictionaries(Logger logger, ValidationExclusion.TYPE validationExclusion, String name1, SAMSequenceDictionary dict1, String name2, SAMSequenceDictionary dict2) {
SequenceDictionaryCompatibility type = compareDictionaries(dict1, dict2);
public static void validateDictionaries( final Logger logger,
final ValidationExclusion.TYPE validationExclusion,
final String name1,
final SAMSequenceDictionary dict1,
final String name2,
final SAMSequenceDictionary dict2,
final boolean isReadsToReferenceComparison,
final GenomeLocSortedSet intervals ) {
final SequenceDictionaryCompatibility type = compareDictionaries(dict1, dict2);
switch ( type ) {
case IDENTICAL:
return;
@ -134,15 +162,48 @@ public class SequenceDictionaryUtils {
logger.warn(ex.getMessage());
else
throw ex;
break;
}
case OUT_OF_ORDER: {
UserException ex = new UserException.IncompatibleSequenceDictionaries("Order of contigs differences, which is unsafe", name1, dict1, name2, dict2);
UserException ex = new UserException.IncompatibleSequenceDictionaries("Relative ordering of overlapping contigs differs, which is unsafe", name1, dict1, name2, dict2);
if ( allowNonFatalIncompabilities(validationExclusion) )
logger.warn(ex.getMessage());
else
throw ex;
} break;
break;
}
case DIFFERENT_INDICES: {
// This is currently only known to be problematic when the index mismatch is between a bam and the
// reference AND when the user's intervals actually include one or more of the contigs that are
// indexed differently from the reference. In this case, the engine will fail to correctly serve
// up the reads from those contigs, so throw an exception unless unsafe operations are enabled.
if ( isReadsToReferenceComparison && intervals != null ) {
final Set<String> misindexedContigs = findMisindexedContigsInIntervals(intervals, dict1, dict2);
if ( ! misindexedContigs.isEmpty() ) {
final String msg = String.format("The following contigs included in the intervals to process have " +
"different indices in the sequence dictionaries for the reads vs. " +
"the reference: %s. As a result, the GATK engine will not correctly " +
"process reads from these contigs. You should either fix the sequence " +
"dictionaries for your reads so that these contigs have the same indices " +
"as in the sequence dictionary for your reference, or exclude these contigs " +
"from your intervals. This error can be disabled via -U %s, " +
"however this is not recommended as the GATK engine will not behave correctly.",
misindexedContigs, ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY);
final UserException ex = new UserException.IncompatibleSequenceDictionaries(msg, name1, dict1, name2, dict2);
if ( allowNonFatalIncompabilities(validationExclusion) )
logger.warn(ex.getMessage());
else
throw ex;
}
}
break;
}
default:
throw new ReviewedStingException("Unexpected SequenceDictionaryComparison type: " + type);
}
@ -151,32 +212,33 @@ public class SequenceDictionaryUtils {
/**
* Workhorse routine that takes two dictionaries and returns their compatibility.
*
* @param dict1
* @param dict2
* @return
* @param dict1 first sequence dictionary
* @param dict2 second sequence dictionary
* @return A SequenceDictionaryCompatibility enum value describing the compatibility of the two dictionaries
*/
public static SequenceDictionaryCompatibility compareDictionaries(SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
// If there's no overlap between reads and reference, data will be bogus. Throw an exception.
if ( nonCanonicalHumanContigOrder( dict1 ) || nonCanonicalHumanContigOrder(dict2) )
public static SequenceDictionaryCompatibility compareDictionaries( final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2) {
if ( nonCanonicalHumanContigOrder(dict1) || nonCanonicalHumanContigOrder(dict2) )
return SequenceDictionaryCompatibility.NON_CANONICAL_HUMAN_ORDER;
Set<String> commonContigs = getCommonContigsByName(dict1, dict2);
final Set<String> commonContigs = getCommonContigsByName(dict1, dict2);
if (commonContigs.size() == 0)
return SequenceDictionaryCompatibility.NO_COMMON_CONTIGS;
else if ( ! commonContigsAreEquivalent( commonContigs, dict1, dict2 ) )
else if ( ! commonContigsHaveSameLengths(commonContigs, dict1, dict2) )
return SequenceDictionaryCompatibility.UNEQUAL_COMMON_CONTIGS;
else if ( ! commonContigsAreInOrder( commonContigs, dict1, dict2 ) )
else if ( ! commonContigsAreInSameRelativeOrder(commonContigs, dict1, dict2) )
return SequenceDictionaryCompatibility.OUT_OF_ORDER;
else if ( commonContigs.size() == dict1.size() && commonContigs.size() == dict2.size() )
return SequenceDictionaryCompatibility.IDENTICAL;
else if ( ! commonContigsAreAtSameIndices(commonContigs, dict1, dict2) )
return SequenceDictionaryCompatibility.DIFFERENT_INDICES;
else {
return SequenceDictionaryCompatibility.COMMON_SUBSET;
}
}
/**
* Utility function that tests whether the commonContigs in both dicts are equivalent. Equivalece means
* Utility function that tests whether the commonContigs in both dicts are equivalent. Equivalence means
* that the seq records have the same length, if both are non-zero.
*
* @param commonContigs
@ -184,7 +246,7 @@ public class SequenceDictionaryUtils {
* @param dict2
* @return true if all of the common contigs are equivalent
*/
private static boolean commonContigsAreEquivalent(Set<String> commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
private static boolean commonContigsHaveSameLengths(Set<String> commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
return findDisequalCommonContigs(commonContigs, dict1, dict2) == null;
}
@ -201,7 +263,7 @@ public class SequenceDictionaryUtils {
for ( String name : commonContigs ) {
SAMSequenceRecord elt1 = dict1.getSequence(name);
SAMSequenceRecord elt2 = dict2.getSequence(name);
if ( ! SequenceRecordsAreEquivalent(elt1, elt2) )
if ( ! sequenceRecordsAreEquivalent(elt1, elt2) )
return Arrays.asList(elt1,elt2);
}
@ -216,12 +278,10 @@ public class SequenceDictionaryUtils {
* @param that
* @return
*/
private static boolean SequenceRecordsAreEquivalent(final SAMSequenceRecord me, final SAMSequenceRecord that) {
private static boolean sequenceRecordsAreEquivalent(final SAMSequenceRecord me, final SAMSequenceRecord that) {
if (me == that) return true;
if (that == null) return false;
// I don't care if the indices are difference
//if (me.getSequenceIndex() != that.getSequenceIndex()) return false;
if (me.getSequenceLength() != 0 && that.getSequenceLength() != 0 && me.getSequenceLength() != that.getSequenceLength())
return false;
@ -280,18 +340,18 @@ public class SequenceDictionaryUtils {
return elt.getSequenceLength() == rec1.getSequenceLength() || elt.getSequenceLength() == rec2.getSequenceLength();
}
/**
* Returns true if the common contigs in dict1 and dict2 are in the same order. This is accomplished by getting the
* common contigs in both dictionaries, sorting these according to their indices, and the walking through
* the sorted list to ensure that each ordered contig is equivalent
* Returns true if the common contigs in dict1 and dict2 are in the same relative order, without regard to
* absolute index position. This is accomplished by getting the common contigs in both dictionaries, sorting
* these according to their indices, and then walking through the sorted list to ensure that each ordered contig
* is equivalent
*
* @param commonContigs
* @param dict1
* @param dict2
* @return
* @param commonContigs names of the contigs common to both dictionaries
* @param dict1 first SAMSequenceDictionary
* @param dict2 second SAMSequenceDictionary
* @return true if the common contigs occur in the same relative order in both dict1 and dict2, otherwise false
*/
public static boolean commonContigsAreInOrder(Set<String> commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
private static boolean commonContigsAreInSameRelativeOrder(Set<String> commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
List<SAMSequenceRecord> list1 = sortSequenceListByIndex(getSequencesOfName(commonContigs, dict1));
List<SAMSequenceRecord> list2 = sortSequenceListByIndex(getSequencesOfName(commonContigs, dict2));
@ -321,10 +381,6 @@ public class SequenceDictionaryUtils {
return l;
}
// --------------------------------------------------------------------------------------------------------------
// Utilities for comparing the order of sequence records
// --------------------------------------------------------------------------------------------------------------
/**
* Compares sequence records by their order
*/
@ -346,6 +402,81 @@ public class SequenceDictionaryUtils {
return unsorted;
}
/**
* Checks whether the common contigs in the given sequence dictionaries occur at the same indices
* in both dictionaries
*
* @param commonContigs Set of names of the contigs that occur in both dictionaries
* @param dict1 first sequence dictionary
* @param dict2 second sequence dictionary
* @return true if the contigs common to dict1 and dict2 occur at the same indices in both dictionaries,
* otherwise false
*/
private static boolean commonContigsAreAtSameIndices( final Set<String> commonContigs, final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2 ) {
for ( String commonContig : commonContigs ) {
SAMSequenceRecord dict1Record = dict1.getSequence(commonContig);
SAMSequenceRecord dict2Record = dict2.getSequence(commonContig);
// Each common contig must have the same index in both dictionaries
if ( dict1Record.getSequenceIndex() != dict2Record.getSequenceIndex() ) {
return false;
}
}
return true;
}
/**
* Gets the set of names of the contigs found in both sequence dictionaries that have different indices
* in the two dictionaries.
*
* @param commonContigs Set of names of the contigs common to both dictionaries
* @param dict1 first sequence dictionary
* @param dict2 second sequence dictionary
* @return a Set containing the names of the common contigs indexed differently in dict1 vs. dict2,
* or an empty Set if there are no such contigs
*/
private static Set<String> getDifferentlyIndexedCommonContigs( final Set<String> commonContigs,
final SAMSequenceDictionary dict1,
final SAMSequenceDictionary dict2 ) {
final Set<String> differentlyIndexedCommonContigs = new LinkedHashSet<String>(Utils.optimumHashSize(commonContigs.size()));
for ( String commonContig : commonContigs ) {
if ( dict1.getSequence(commonContig).getSequenceIndex() != dict2.getSequence(commonContig).getSequenceIndex() ) {
differentlyIndexedCommonContigs.add(commonContig);
}
}
return differentlyIndexedCommonContigs;
}
/**
* Finds the names of any contigs indexed differently in the two sequence dictionaries that also
* occur in the provided set of intervals.
*
* @param intervals GenomeLocSortedSet containing the intervals to check
* @param dict1 first sequence dictionary
* @param dict2 second sequence dictionary
* @return a Set of the names of the contigs indexed differently in dict1 vs dict2 that also
* occur in the provided intervals, or an empty Set if there are no such contigs
*/
private static Set<String> findMisindexedContigsInIntervals( final GenomeLocSortedSet intervals,
final SAMSequenceDictionary dict1,
final SAMSequenceDictionary dict2 ) {
final Set<String> differentlyIndexedCommonContigs = getDifferentlyIndexedCommonContigs(getCommonContigsByName(dict1, dict2), dict1, dict2);
final Set<String> misindexedContigsInIntervals = new LinkedHashSet<String>(Utils.optimumHashSize(differentlyIndexedCommonContigs.size()));
// We know differentlyIndexedCommonContigs is a HashSet, so this loop is O(intervals)
for ( GenomeLoc interval : intervals ) {
if ( differentlyIndexedCommonContigs.contains(interval.getContig()) ) {
misindexedContigsInIntervals.add(interval.getContig());
}
}
return misindexedContigsInIntervals;
}
/**
* Returns the set of contig names found in both dicts.
@ -360,9 +491,37 @@ public class SequenceDictionaryUtils {
}
public static Set<String> getContigNames(SAMSequenceDictionary dict) {
Set<String> contigNames = new HashSet<String>((int)(dict.size() / 0.75f) + 1, 0.75f);
Set<String> contigNames = new HashSet<String>(Utils.optimumHashSize(dict.size()));
for (SAMSequenceRecord dictionaryEntry : dict.getSequences())
contigNames.add(dictionaryEntry.getSequenceName());
return contigNames;
}
/**
* Returns a compact String representation of the sequence dictionary it's passed
*
* The format of the returned String is:
* [ contig1Name(length: contig1Length) contig2Name(length: contig2Length) ... ]
*
* @param dict a non-null SAMSequenceDictionary
* @return A String containing all of the contig names and lengths from the sequence dictionary it's passed
*/
public static String getDictionaryAsString( final SAMSequenceDictionary dict ) {
if ( dict == null ) {
throw new IllegalArgumentException("Sequence dictionary must be non-null");
}
StringBuilder s = new StringBuilder("[ ");
for ( SAMSequenceRecord dictionaryEntry : dict.getSequences() ) {
s.append(dictionaryEntry.getSequenceName());
s.append("(length:");
s.append(dictionaryEntry.getSequenceLength());
s.append(") ");
}
s.append("]");
return s.toString();
}
}

View File

@ -64,22 +64,18 @@ public class DictionaryConsistencyIntegrationTest extends WalkerTest {
}
@Test public void failBAM1() { executeTest("b36bam-v-b37", testBAM(b37KGReference, b36BAM)); }
@Test public void failBAM2() { executeTest("b36bam-v-hg18", testBAM(hg18Reference, b36BAM)); }
@Test public void failBAM3() { executeTest("b36bam-v-hg19", testBAM(hg19Reference, b36BAM)); }
@Test public void failBAM4() { executeTest("b36bam-v-lexhg18", testBAM(lexHG18, b36BAM, UserException.LexicographicallySortedSequenceDictionary.class)); }
@Test public void failBAM1() { executeTest("b36bam-v-b37", testBAM(b37KGReference, b36BAM, "1", UserException.IncompatibleSequenceDictionaries.class)); }
@Test public void failBAM2() { executeTest("b36bam-v-hg18", testBAM(hg18Reference, b36BAM, "chr1", UserException.IncompatibleSequenceDictionaries.class)); }
@Test public void failBAM3() { executeTest("b36bam-v-hg19", testBAM(hg19Reference, b36BAM, "1", UserException.IncompatibleSequenceDictionaries.class)); }
@Test public void failBAM4() { executeTest("b36bam-v-lexhg18", testBAM(lexHG18, b36BAM, "chr1", UserException.LexicographicallySortedSequenceDictionary.class)); }
@Test public void failBAM5() { executeTest("hg18bam-v-b36", testBAM(b36KGReference, hg18BAM)); }
@Test public void failBAM6() { executeTest("hg18bam-v-b37", testBAM(b37KGReference, hg18BAM)); }
@Test public void failBAM7() { executeTest("hg18bam-v-hg19", testBAM(hg19Reference, hg18BAM)); }
@Test public void failBAM8() { executeTest("hg18bam-v-lexhg18", testBAM(lexHG18, hg18BAM, UserException.LexicographicallySortedSequenceDictionary.class)); }
@Test public void failBAM5() { executeTest("hg18bam-v-b36", testBAM(b36KGReference, hg18BAM, "1", UserException.IncompatibleSequenceDictionaries.class)); }
@Test public void failBAM6() { executeTest("hg18bam-v-b37", testBAM(b37KGReference, hg18BAM, "1", UserException.IncompatibleSequenceDictionaries.class)); }
@Test public void failBAM7() { executeTest("hg18bam-v-hg19", testBAM(hg19Reference, hg18BAM, "1", UserException.IncompatibleSequenceDictionaries.class)); }
@Test public void failBAM8() { executeTest("hg18bam-v-lexhg18", testBAM(lexHG18, hg18BAM, "chr1", UserException.LexicographicallySortedSequenceDictionary.class)); }
private WalkerTest.WalkerTestSpec testBAM(String ref, String bam) {
return testBAM(ref, bam, UserException.IncompatibleSequenceDictionaries.class);
}
private WalkerTest.WalkerTestSpec testBAM(String ref, String bam, Class c) {
return new WalkerTest.WalkerTestSpec("-T UnifiedGenotyper -I " + bam + " -R " + ref + " -L 1:10,000,000-11,000,000 -o %s",
private WalkerTest.WalkerTestSpec testBAM(String ref, String bam, String contig, Class c) {
return new WalkerTest.WalkerTestSpec("-T UnifiedGenotyper -I " + bam + " -R " + ref + " -L " + contig + ":10,000,000-11,000,000 -o %s",
1, c);
}

View File

@ -0,0 +1,241 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.testng.Assert;
import static org.broadinstitute.sting.utils.SequenceDictionaryUtils.*;
import static org.broadinstitute.sting.utils.SequenceDictionaryUtils.SequenceDictionaryCompatibility.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class SequenceDictionaryUtilsUnitTest extends BaseTest {
private static Logger logger = Logger.getLogger(SequenceDictionaryUtilsUnitTest.class);
@DataProvider( name = "SequenceDictionaryDataProvider" )
public Object[][] generateSequenceDictionaryTestData() {
final SAMSequenceRecord CHRM_HG19 = new SAMSequenceRecord("chrM", 16571);
final SAMSequenceRecord CHR_NONSTANDARD1 = new SAMSequenceRecord("NonStandard1", 8675309);
final SAMSequenceRecord CHR_NONSTANDARD2 = new SAMSequenceRecord("NonStandard2", 8675308);
final Class NO_COMMON_CONTIGS_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
final Class UNEQUAL_COMMON_CONTIGS_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
final Class NON_CANONICAL_HUMAN_ORDER_EXCEPTION = UserException.LexicographicallySortedSequenceDictionary.class;
final Class OUT_OF_ORDER_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
final Class DIFFERENT_INDICES_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
final List<SAMSequenceRecord> hg19Sequences = Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19, CHR10_HG19);
final GenomeLocParser hg19GenomeLocParser = new GenomeLocParser(new SAMSequenceDictionary(hg19Sequences));
final List<GenomeLoc> hg19AllContigsIntervals = Arrays.asList(hg19GenomeLocParser.createGenomeLoc("chrM", 0, 1),
hg19GenomeLocParser.createGenomeLoc("chr1", 0, 1),
hg19GenomeLocParser.createGenomeLoc("chr2", 0, 1),
hg19GenomeLocParser.createGenomeLoc("chr10", 0, 1));
final List<GenomeLoc> hg19PartialContigsIntervals = Arrays.asList(hg19GenomeLocParser.createGenomeLoc("chrM", 0, 1),
hg19GenomeLocParser.createGenomeLoc("chr1", 0, 1));
final GenomeLocSortedSet hg19AllContigsIntervalSet = new GenomeLocSortedSet(hg19GenomeLocParser, hg19AllContigsIntervals);
final GenomeLocSortedSet hg19PartialContigsIntervalSet = new GenomeLocSortedSet(hg19GenomeLocParser, hg19PartialContigsIntervals);
return new Object[][] {
// Identical dictionaries:
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG19), null, IDENTICAL, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), null, IDENTICAL, null, false, null },
{ Arrays.asList(CHR1_B37), Arrays.asList(CHR1_B37), null, IDENTICAL, null, false, null },
{ Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), null, IDENTICAL, null, false, null },
// Dictionaries with a common subset:
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG19, CHR_NONSTANDARD2), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHRM_HG19), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD2), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHR2_HG19, CHR10_HG19), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHR2_HG19, CHR10_HG19, CHRM_HG19), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37, CHR_NONSTANDARD1), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37, CHR_NONSTANDARD2), null, COMMON_SUBSET, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), null, COMMON_SUBSET, null, false, null },
// Dictionaries with no common contigs:
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR2_HG19), null, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_B37), null, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), null, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), null, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null },
// Dictionaries with unequal common contigs:
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG18), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_B36), Arrays.asList(CHR1_B37), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), Arrays.asList(CHR1_B36, CHR2_B36, CHR10_B36), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18, CHR_NONSTANDARD2), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG18, CHR2_HG18, CHR10_HG18), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18), null, UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, null },
// One or both dictionaries in non-canonical human order:
{ Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG18, CHR10_HG18, CHR2_HG18), Arrays.asList(CHR1_HG18, CHR10_HG18, CHR2_HG18), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHR1_B37, CHR10_B37, CHR2_B37), Arrays.asList(CHR1_B37, CHR10_B37, CHR2_B37), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHR1_B36, CHR10_B36, CHR2_B36), Arrays.asList(CHR1_B36, CHR10_B36, CHR2_B36), null, NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, null },
// Dictionaries with a common subset, but different relative ordering within that subset:
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19), null, OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19, CHRM_HG19), null, OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR2_HG19, CHR1_HG19), null, OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHRM_HG19, CHR1_HG19), null, OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, null },
{ Arrays.asList(CHR1_B37, CHR2_B37), Arrays.asList(CHR2_B37, CHR1_B37), null, OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, null },
// Dictionaries with a common subset in the same relative order, but with different indices.
// This will only throw an exception during validation if isReadsToReferenceComparison is true,
// and there are intervals overlapping the misindexed contigs:
// These have isReadsToReferenceComparison == true and overlapping intervals, so we expect an exception:
{ Arrays.asList(CHRM_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), null, DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, hg19AllContigsIntervalSet },
// These have isReadsToReferenceComparison == true but no overlapping intervals, so we don't expect an exception:
{ Arrays.asList(CHR2_HG19, CHR10_HG19), Arrays.asList(CHR10_HG19), null, DIFFERENT_INDICES, null, true, hg19PartialContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19), null, DIFFERENT_INDICES, null, true, hg19PartialContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), null, DIFFERENT_INDICES, null, true, hg19PartialContigsIntervalSet },
// These have isReadsToReferenceComparison == false, so we don't expect an exception:
{ Arrays.asList(CHRM_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19), null, DIFFERENT_INDICES, null, false, hg19AllContigsIntervalSet },
{ Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHRM_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19), null, DIFFERENT_INDICES, null, false, hg19AllContigsIntervalSet },
// Tests for validation exclusions. Note that errors resulting from NO_COMMON_CONTIGs cannot be suppressed
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR2_HG19), ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR2_HG19), ValidationExclusion.TYPE.ALL, NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, null },
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG18), ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY, UNEQUAL_COMMON_CONTIGS, null, false, null },
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG18), ValidationExclusion.TYPE.ALL, UNEQUAL_COMMON_CONTIGS, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY, NON_CANONICAL_HUMAN_ORDER, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), ValidationExclusion.TYPE.ALL, NON_CANONICAL_HUMAN_ORDER, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19), ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY, OUT_OF_ORDER, null, false, null },
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19), ValidationExclusion.TYPE.ALL, OUT_OF_ORDER, null, false, null },
{ Arrays.asList(CHRM_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19), ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY, DIFFERENT_INDICES, null, true, hg19AllContigsIntervalSet },
{ Arrays.asList(CHRM_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19), ValidationExclusion.TYPE.ALL, DIFFERENT_INDICES, null, true, hg19AllContigsIntervalSet }
};
}
@Test( dataProvider = "SequenceDictionaryDataProvider" )
public void testSequenceDictionaryValidation( final List<SAMSequenceRecord> firstDictionaryContigs,
final List<SAMSequenceRecord> secondDictionaryContigs,
final ValidationExclusion.TYPE validationExclusions,
final SequenceDictionaryUtils.SequenceDictionaryCompatibility dictionaryCompatibility,
final Class expectedExceptionUponValidation,
final boolean isReadsToReferenceComparison,
final GenomeLocSortedSet intervals ) {
final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs);
final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs);
final String testDescription = String.format("First dictionary: %s Second dictionary: %s Validation exclusions: %s",
SequenceDictionaryUtils.getDictionaryAsString(firstDictionary),
SequenceDictionaryUtils.getDictionaryAsString(secondDictionary),
validationExclusions);
Exception exceptionThrown = null;
try {
SequenceDictionaryUtils.validateDictionaries(logger,
validationExclusions,
"firstDictionary",
firstDictionary,
"secondDictionary",
secondDictionary,
isReadsToReferenceComparison,
intervals);
}
catch ( Exception e ) {
exceptionThrown = e;
}
if ( expectedExceptionUponValidation != null ) {
Assert.assertTrue(exceptionThrown != null && expectedExceptionUponValidation.isInstance(exceptionThrown),
String.format("Expected exception %s but saw %s instead. %s",
expectedExceptionUponValidation.getSimpleName(),
exceptionThrown == null ? "no exception" : exceptionThrown.getClass().getSimpleName(),
testDescription));
}
else {
Assert.assertTrue(exceptionThrown == null,
String.format("Expected no exception but saw exception %s instead. %s",
exceptionThrown != null ? exceptionThrown.getClass().getSimpleName() : "none",
testDescription));
}
}
@Test( dataProvider = "SequenceDictionaryDataProvider" )
public void testSequenceDictionaryComparison( final List<SAMSequenceRecord> firstDictionaryContigs,
final List<SAMSequenceRecord> secondDictionaryContigs,
final ValidationExclusion.TYPE validationExclusions,
final SequenceDictionaryUtils.SequenceDictionaryCompatibility dictionaryCompatibility,
final Class expectedExceptionUponValidation,
final boolean isReadsToReferenceComparison,
final GenomeLocSortedSet intervals ) {
final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs);
final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs);
final String testDescription = String.format("First dictionary: %s Second dictionary: %s",
SequenceDictionaryUtils.getDictionaryAsString(firstDictionary),
SequenceDictionaryUtils.getDictionaryAsString(secondDictionary));
final SequenceDictionaryUtils.SequenceDictionaryCompatibility reportedCompatibility =
SequenceDictionaryUtils.compareDictionaries(firstDictionary, secondDictionary);
Assert.assertTrue(reportedCompatibility == dictionaryCompatibility,
String.format("Dictionary comparison should have returned %s but instead returned %s. %s",
dictionaryCompatibility, reportedCompatibility, testDescription));
}
private SAMSequenceDictionary createSequenceDictionary( final List<SAMSequenceRecord> contigs ) {
final List<SAMSequenceRecord> clonedContigs = new ArrayList<SAMSequenceRecord>(contigs.size());
// Clone the individual SAMSequenceRecords to avoid contig-index issues with shared objects
// across multiple dictionaries in tests
for ( SAMSequenceRecord contig : contigs ) {
clonedContigs.add(contig.clone());
}
return new SAMSequenceDictionary(clonedContigs);
}
}