From ccaddefa199fde2191451f4da3e004e29d321c89 Mon Sep 17 00:00:00 2001 From: Ron Levine Date: Tue, 10 Nov 2015 14:22:47 -0500 Subject: [PATCH] Validate VCF with sequence dictionary --- .../walkers/bqsr/BQSRIntegrationTest.java | 2 +- .../ValidateVariantsIntegrationTest.java | 38 ++++++- .../gatk/engine/GenomeAnalysisEngine.java | 4 +- .../engine/arguments/CramIntegrationTest.java | 12 +-- .../engine/crypt/GATKKeyIntegrationTest.java | 2 +- .../filters/BadReadGroupsIntegrationTest.java | 4 +- .../gatk/utils/SequenceDictionaryUtils.java | 101 ++++++++++-------- .../gatk/utils/ValidationExclusion.java | 2 +- .../refdata/tracks/IndexDictionaryUtils.java | 2 +- .../gatk/utils/refdata/tracks/RMDTrack.java | 2 +- .../utils/refdata/tracks/RMDTrackBuilder.java | 44 +++++++- .../src/test/resources/exampleBAM.bam | Bin 3635 -> 3609 bytes .../src/test/resources/exampleBAM.bam.bai | Bin 232 -> 232 bytes .../resources/exampleCRAM-nobai-nocrai.cram | Bin 5281 -> 13353 bytes .../resources/exampleCRAM-nobai-withcrai.cram | Bin 5281 -> 13353 bytes .../exampleCRAM-nobai-withcrai.cram.crai | Bin 44 -> 47 bytes .../src/test/resources/exampleCRAM.cram | Bin 5281 -> 5281 bytes .../src/test/resources/exampleCRAM.cram.crai | Bin 0 -> 44 bytes 18 files changed, 149 insertions(+), 64 deletions(-) create mode 100644 public/gatk-utils/src/test/resources/exampleCRAM.cram.crai diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/bqsr/BQSRIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/bqsr/BQSRIntegrationTest.java index a0382df91..de2d7ae30 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/bqsr/BQSRIntegrationTest.java @@ -120,7 +120,7 @@ public class BQSRIntegrationTest extends WalkerTest { {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "0b5a8e259e997e4c7b5836d4c28e6f4d")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "281682124584ab384f23359934df0c3b")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "0a92fdff5fd26227c29d34eda5a32f49")}, - {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "90d8c24077e8ae9a0037a9aad5f09e31")}, + {new BQSRTest(hg18Reference, privateTestDir + "originalQuals.1kg.chr1.1-1K.1RG.bam", "chr1:1-1,000", " -OQ", "90d8c24077e8ae9a0037a9aad5f09e31")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "c41ef02c640ef1fed4bfc03b9b33b616")}, {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "b577cd1d529425f66db49620db09fdca")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "0b5a8e259e997e4c7b5836d4c28e6f4d")}, diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariantsIntegrationTest.java index f815d8542..7d2f2d0c6 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariantsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariantsIntegrationTest.java @@ -189,7 +189,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { @Test public void testComplexEvents() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString("complexEvents.vcf", "ALL"), + baseTestString("complexEvents.vcf", "ALL", DEFAULT_REGION, b37KGReference), 0, Arrays.asList(EMPTY_MD5) ); @@ -197,6 +197,15 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { executeTest("test validating complex events", spec); } + @Test(description = "Checks out of order header contigs") + public void testOutOfOrderHeaderContigsError() { + + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("complexEvents-outOfOrder.vcf", "ALL", DEFAULT_REGION, b37KGReference), + 0, UserException.LexicographicallySortedSequenceDictionary.class); + executeTest("test out of order header contigs error", spec); + } + @Test(description = "Fixes '''bug''' reported in story https://www.pivotaltracker.com/story/show/68725164") public void testUnusedAlleleFix() { WalkerTestSpec spec = new WalkerTestSpec( @@ -255,4 +264,31 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { // Set the log level back logger.setLevel(level); } + + @Test(description = "Checks '''issue''' reported in issue https://github.com/broadinstitute/gsa-unstable/issues/964") + public void testWrongContigHeaderLengthError() { + + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("longAlleles-wrongLength.vcf", "ALL", "1", b37KGReference), + 0, UserException.IncompatibleSequenceDictionaries.class); + executeTest("test wrong header contig length error", spec); + } + + @Test + public void testAllowWrongContigHeaderLengthDictIncompat() { + + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("longAlleles-wrongLength.vcf", "ALL", "1", b37KGReference) + " --reference_window_stop 208 -U ALLOW_SEQ_DICT_INCOMPATIBILITY ", + 0, Arrays.asList(EMPTY_MD5)); + executeTest("test to allow wrong header contig length, not checking dictionary incompatibility", spec); + } + + @Test + public void testAllowWrongContigHeaderLength() { + + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("longAlleles-wrongLength.vcf", "ALL", "1", b37KGReference) + " --reference_window_stop 208 -U ", + 0, Arrays.asList(EMPTY_MD5)); + executeTest("test to allow wrong header contig length, no compatibility checks", spec); + } } diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java index 86f020394..be2bf610a 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java @@ -886,8 +886,8 @@ public class GenomeAnalysisEngine { // Compile a set of sequence names that exist in the BAM files. SAMSequenceDictionary readsDictionary = reads.getHeader().getSequenceDictionary(); - if (readsDictionary.size() == 0) { - logger.info("Reads file is unmapped. Skipping validation against reference."); + if (readsDictionary.isEmpty()) { + logger.info("Reads file is unmapped. Skipping validation against reference."); return; } diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java index f1e832d2c..b8f4fffc6 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java @@ -39,18 +39,18 @@ public class CramIntegrationTest extends WalkerTest { @DataProvider(name="cramData") public Object[][] getCRAMData() { return new Object[][] { - {"PrintReads", "exampleBAM.bam", "", "cram", "95cce9111a377d8e84fd076f7f0744df"}, - {"PrintReads", "exampleCRAM.cram", "", "cram", "95cce9111a377d8e84fd076f7f0744df"}, - {"PrintReads", "exampleCRAM.cram", "", "bam", "7c716d5103c20e95a8115b3c2848bf0d"}, - {"PrintReads", "exampleCRAM.cram", " -L chr1:200", "bam", "5cc9cac96f7f9819688d47a28ed97778"}, + {"PrintReads", "exampleBAM.bam", "", "cram", "424c725c4ffe7215e358ecf5abd5e5e8"}, + {"PrintReads", "exampleCRAM.cram", "", "cram", "424c725c4ffe7215e358ecf5abd5e5e8"}, + {"PrintReads", "exampleCRAM.cram", "", "bam", "247805098718dd74b8a871796424d359"}, + {"PrintReads", "exampleCRAM.cram", " -L chr1:200", "bam", "a5b26631cd89f86f6184bcac7bc9c9ca"}, {"CountLoci", "exampleCRAM.cram", "", "txt", "ade93df31a6150321c1067e749cae9be"}, {"CountLoci", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"}, {"CountReads", "exampleCRAM.cram", "", "txt", "4fbafd6948b6529caa2b78e476359875"}, {"CountReads", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"}, - {"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "2e1b175c9b36154e2bbd1a23ebaf4c22"}, + {"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "24dbd14b60220461f47ec5517962cb7f"}, {"CountLoci", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"}, {"CountReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"}, - {"PrintReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "bam", "2e1b175c9b36154e2bbd1a23ebaf4c22"}, + {"PrintReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "bam", "84bee5063d8fa0d07e7c3ff7e825ae3a"}, {"CountLoci", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"}, {"CountReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"}, }; diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/crypt/GATKKeyIntegrationTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/crypt/GATKKeyIntegrationTest.java index 9e3184f12..2d2ce2bd3 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/crypt/GATKKeyIntegrationTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/crypt/GATKKeyIntegrationTest.java @@ -38,7 +38,7 @@ public class GATKKeyIntegrationTest extends WalkerTest { public static final String BASE_COMMAND = String.format("-T TestPrintReadsWalker -R %s -I %s -o %%s", publicTestDir + "exampleFASTA.fasta", publicTestDir + "exampleBAM.bam"); - public static final String MD5_UPON_SUCCESSFUL_RUN = "69b6432aed2d0cebf02e5d414f654425"; + public static final String MD5_UPON_SUCCESSFUL_RUN = "462656ec9632f8c21ee534d35093c3f8"; private void runGATKKeyTest ( String testName, String etArg, String keyArg, Class expectedException, String md5 ) { diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/filters/BadReadGroupsIntegrationTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/filters/BadReadGroupsIntegrationTest.java index 870277d1d..9f21233aa 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/filters/BadReadGroupsIntegrationTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/filters/BadReadGroupsIntegrationTest.java @@ -35,7 +35,7 @@ public class BadReadGroupsIntegrationTest extends WalkerTest { @Test public void testMissingReadGroup() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T TestPrintReadsWalker -R " + b36KGReference + " -I " + privateTestDir + "missingReadGroup.bam -o /dev/null", + "-T TestPrintReadsWalker -R " + hg18Reference + " -I " + privateTestDir + "missingReadGroup.bam -o /dev/null", 0, UserException.ReadMissingReadGroup.class); executeTest("test Missing Read Group", spec); @@ -44,7 +44,7 @@ public class BadReadGroupsIntegrationTest extends WalkerTest { @Test public void testUndefinedReadGroup() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T TestPrintReadsWalker -R " + b36KGReference + " -I " + privateTestDir + "undefinedReadGroup.bam -o /dev/null", + "-T TestPrintReadsWalker -R " + hg18Reference + " -I " + privateTestDir + "undefinedReadGroup.bam -o /dev/null", 0, UserException.ReadHasUndefinedReadGroup.class); executeTest("test Undefined Read Group", spec); diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/SequenceDictionaryUtils.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/SequenceDictionaryUtils.java index 160abd38d..71d87409f 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/SequenceDictionaryUtils.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/SequenceDictionaryUtils.java @@ -25,6 +25,7 @@ package org.broadinstitute.gatk.utils; +import java.math.BigInteger; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; @@ -79,7 +80,7 @@ public class SequenceDictionaryUtils { IDENTICAL, // the dictionaries are identical COMMON_SUBSET, // there exists a common subset of equivalent contigs NO_COMMON_CONTIGS, // no overlap between dictionaries - UNEQUAL_COMMON_CONTIGS, // common subset has contigs that have the same name but different lengths + UNEQUAL_COMMON_CONTIGS, // common subset has contigs that have the same name but different lengths and/or MD5s NON_CANONICAL_HUMAN_ORDER, // human reference detected but the order of the contigs is non-standard (lexicographic, for examine) OUT_OF_ORDER, // the two dictionaries overlap but the overlapping contigs occur in different // orders with respect to each other @@ -92,7 +93,7 @@ public class SequenceDictionaryUtils { * @param validationExclusion exclusions to validation * @return Returns true if the engine is in tolerant mode and we'll let through dangerous but not fatal dictionary inconsistency */ - private static boolean allowNonFatalIncompabilities(ValidationExclusion.TYPE validationExclusion) { + private static boolean allowNonFatalIncompabilities(final ValidationExclusion.TYPE validationExclusion) { return ( validationExclusion == ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY || validationExclusion == ValidationExclusion.TYPE.ALL ); } @@ -133,15 +134,23 @@ public class SequenceDictionaryUtils { throw new UserException.IncompatibleSequenceDictionaries("No overlapping contigs found", name1, dict1, name2, dict2); case UNEQUAL_COMMON_CONTIGS: { - List x = findDisequalCommonContigs(getCommonContigsByName(dict1, dict2), dict1, dict2); - SAMSequenceRecord elt1 = x.get(0); - SAMSequenceRecord elt2 = x.get(1); + final List x = findNotEqualCommonContigs(getCommonContigsByName(dict1, dict2), dict1, dict2); + final SAMSequenceRecord elt1 = x.get(0); + final SAMSequenceRecord elt2 = x.get(1); + + String msg = "Found contigs with the same name but different lengths"; + String contig1 = " contig " + name1 + " is named " + elt1.getSequenceName() + " with length " + Integer.toString(elt1.getSequenceLength()); + if ( elt1.getMd5() != null ) + contig1 += " and MD5 " + elt1.getMd5(); + String contig2 = " contig " + name2 + " is named " + elt2.getSequenceName() + " with length " + Integer.toString(elt2.getSequenceLength()); + if ( elt2.getMd5() != null ) + contig2 += " and MD5 " + elt2.getMd5(); + if ( elt1.getMd5() != null || elt2.getMd5() != null ) + msg += " or MD5s:"; + msg += "\n" + contig1 + "\n" + contig2; // todo -- replace with toString when SAMSequenceRecord has a nice toString routine - UserException ex = new UserException.IncompatibleSequenceDictionaries(String.format("Found contigs with the same name but different lengths:\n contig %s = %s / %d\n contig %s = %s / %d", - name1, elt1.getSequenceName(), elt1.getSequenceLength(), - name2, elt2.getSequenceName(), elt2.getSequenceLength()), - name1, dict1, name2, dict2); + final UserException ex = new UserException.IncompatibleSequenceDictionaries(msg, name1, dict1, name2, dict2); if ( allowNonFatalIncompabilities(validationExclusion) ) logger.warn(ex.getMessage()); @@ -226,7 +235,7 @@ public class SequenceDictionaryUtils { final Set commonContigs = getCommonContigsByName(dict1, dict2); - if (commonContigs.size() == 0) + if (commonContigs.isEmpty()) return SequenceDictionaryCompatibility.NO_COMMON_CONTIGS; else if ( ! commonContigsHaveSameLengths(commonContigs, dict1, dict2) ) return SequenceDictionaryCompatibility.UNEQUAL_COMMON_CONTIGS; @@ -250,12 +259,12 @@ public class SequenceDictionaryUtils { * @param dict2 * @return true if all of the common contigs are equivalent */ - private static boolean commonContigsHaveSameLengths(Set commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) { - return findDisequalCommonContigs(commonContigs, dict1, dict2) == null; + private static boolean commonContigsHaveSameLengths(final Set commonContigs, final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2) { + return findNotEqualCommonContigs(commonContigs, dict1, dict2) == null; } /** - * Returns a List(x,y) that contains two disequal sequence records among the common contigs in both dicts. Returns + * Returns a List(x,y) that contains two sequence records that are not equal among the common contigs in both dicts. Returns * null if all common contigs are equivalent * * @param commonContigs @@ -263,7 +272,7 @@ public class SequenceDictionaryUtils { * @param dict2 * @return */ - private static List findDisequalCommonContigs(Set commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) { + private static List findNotEqualCommonContigs(final Set commonContigs, final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2) { for ( String name : commonContigs ) { SAMSequenceRecord elt1 = dict1.getSequence(name); SAMSequenceRecord elt2 = dict2.getSequence(name); @@ -275,32 +284,32 @@ public class SequenceDictionaryUtils { } /** - * Helper routine that returns two sequence records are equivalent, defined as having the same name and - * lengths, if both are non-zero + * Helper routine that determines if two sequence records are equivalent, defined as having the same name, + * lengths (if both are non-zero) and MD5 (if present) * - * @param me - * @param that - * @return + * @param record1 a SAMSequenceRecord + * @param record2 a SAMSequenceRecord + * @return true if the records are equivalent, false otherwise */ - private static boolean sequenceRecordsAreEquivalent(final SAMSequenceRecord me, final SAMSequenceRecord that) { - if (me == that) return true; - if (that == null) return false; + private static boolean sequenceRecordsAreEquivalent(final SAMSequenceRecord record1, final SAMSequenceRecord record2) { + if ( record1 == record2 ) return true; + if ( record1 == null || record2 == null ) return false; - if (me.getSequenceLength() != 0 && that.getSequenceLength() != 0 && me.getSequenceLength() != that.getSequenceLength()) + // compare length + if ( record1.getSequenceLength() != 0 && record2.getSequenceLength() != 0 && record1.getSequenceLength() != record2.getSequenceLength() ) return false; - // todo -- reenable if we want to be really strict here -// if (me.getExtendedAttribute(SAMSequenceRecord.MD5_TAG) != null && that.getExtendedAttribute(SAMSequenceRecord.MD5_TAG) != null) { -// final BigInteger thisMd5 = new BigInteger((String)me.getExtendedAttribute(SAMSequenceRecord.MD5_TAG), 16); -// final BigInteger thatMd5 = new BigInteger((String)that.getExtendedAttribute(SAMSequenceRecord.MD5_TAG), 16); -// if (!thisMd5.equals(thatMd5)) { -// return false; -// } -// } -// else { - if (me.getSequenceName() != that.getSequenceName()) - return false; // Compare using == since we intern() the Strings -// } + // compare name + if ( !record1.getSequenceName().equals(record2.getSequenceName() )) + return false; + + // compare MD5 + if ( record1.getMd5() != null && record2.getMd5() != null ){ + final BigInteger firstMd5 = new BigInteger(record1.getMd5(), 16); + final BigInteger secondMd5 = new BigInteger(record2.getMd5(), 16); + if ( !firstMd5.equals(secondMd5) ) + return false; + } return true; } @@ -313,13 +322,13 @@ public class SequenceDictionaryUtils { * @param dict * @return */ - private static boolean nonCanonicalHumanContigOrder(SAMSequenceDictionary dict) { + private static boolean nonCanonicalHumanContigOrder(final SAMSequenceDictionary dict) { if ( ! ENABLE_LEXICOGRAPHIC_REQUIREMENT_FOR_HUMAN ) // if we don't want to enable this test, just return false return false; SAMSequenceRecord chr1 = null, chr2 = null, chr10 = null; - for ( SAMSequenceRecord elt : dict.getSequences() ) { + for ( final SAMSequenceRecord elt : dict.getSequences() ) { if ( isHumanSeqRecord(elt, CHR1_HG18, CHR1_HG19 ) ) chr1 = elt; if ( isHumanSeqRecord(elt, CHR2_HG18, CHR2_HG19 ) ) chr2 = elt; if ( isHumanSeqRecord(elt, CHR10_HG18, CHR10_HG19 ) ) chr10 = elt; @@ -355,7 +364,7 @@ public class SequenceDictionaryUtils { * @param dict2 second SAMSequenceDictionary * @return true if the common contigs occur in the same relative order in both dict1 and dict2, otherwise false */ - private static boolean commonContigsAreInSameRelativeOrder(Set commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) { + private static boolean commonContigsAreInSameRelativeOrder(final Set commonContigs, final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2) { List list1 = sortSequenceListByIndex(getSequencesOfName(commonContigs, dict1)); List list2 = sortSequenceListByIndex(getSequencesOfName(commonContigs, dict2)); @@ -376,8 +385,8 @@ public class SequenceDictionaryUtils { * @param dict * @return */ - private static List getSequencesOfName(Set commonContigs, SAMSequenceDictionary dict) { - List l = new ArrayList(commonContigs.size()); + private static List getSequencesOfName(final Set commonContigs, final SAMSequenceDictionary dict) { + final List l = new ArrayList(commonContigs.size()); for ( String name : commonContigs ) { l.add(dict.getSequence(name) ); } @@ -401,7 +410,7 @@ public class SequenceDictionaryUtils { * @param unsorted * @return */ - private static List sortSequenceListByIndex(List unsorted) { + private static List sortSequenceListByIndex(final List unsorted) { Collections.sort(unsorted, new CompareSequenceRecordsByIndex()); return unsorted; } @@ -418,8 +427,8 @@ public class SequenceDictionaryUtils { */ private static boolean commonContigsAreAtSameIndices( final Set commonContigs, final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2 ) { for ( String commonContig : commonContigs ) { - SAMSequenceRecord dict1Record = dict1.getSequence(commonContig); - SAMSequenceRecord dict2Record = dict2.getSequence(commonContig); + final SAMSequenceRecord dict1Record = dict1.getSequence(commonContig); + final SAMSequenceRecord dict2Record = dict2.getSequence(commonContig); // Each common contig must have the same index in both dictionaries if ( dict1Record.getSequenceIndex() != dict2Record.getSequenceIndex() ) { @@ -489,13 +498,13 @@ public class SequenceDictionaryUtils { * @return */ public static Set getCommonContigsByName(SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) { - Set intersectingSequenceNames = getContigNames(dict1); + final Set intersectingSequenceNames = getContigNames(dict1); intersectingSequenceNames.retainAll(getContigNames(dict2)); return intersectingSequenceNames; } public static Set getContigNames(SAMSequenceDictionary dict) { - Set contigNames = new HashSet(Utils.optimumHashSize(dict.size())); + final Set contigNames = new HashSet(Utils.optimumHashSize(dict.size())); for (SAMSequenceRecord dictionaryEntry : dict.getSequences()) contigNames.add(dictionaryEntry.getSequenceName()); return contigNames; @@ -515,7 +524,7 @@ public class SequenceDictionaryUtils { throw new IllegalArgumentException("Sequence dictionary must be non-null"); } - StringBuilder s = new StringBuilder("[ "); + final StringBuilder s = new StringBuilder("[ "); for ( SAMSequenceRecord dictionaryEntry : dict.getSequences() ) { s.append(dictionaryEntry.getSequenceName()); diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/ValidationExclusion.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/ValidationExclusion.java index 08d84b084..c09ebe4cc 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/ValidationExclusion.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/ValidationExclusion.java @@ -40,7 +40,7 @@ public class ValidationExclusion { ALLOW_UNINDEXED_BAM, // allow bam files that do not have an index; we'll traverse them using monolithic shard ALLOW_UNSET_BAM_SORT_ORDER, // assume that the bam is sorted, even if the SO (sort-order) flag is not set NO_READ_ORDER_VERIFICATION, // do not validate that the reads are in order as we take them from the bam file - ALLOW_SEQ_DICT_INCOMPATIBILITY, // allow dangerous, but not fatal, sequence dictionary incompabilities + ALLOW_SEQ_DICT_INCOMPATIBILITY, // allow dangerous, but not fatal, sequence dictionary incompatibilities LENIENT_VCF_PROCESSING, // allow non-standard values for standard VCF header lines. Don't worry about size differences between header and values, etc. @EnumerationArgumentDefault // set the ALL value to the default value, so if they specify just -U, we get the ALL ALL // do not check for all of the above conditions, DEFAULT diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/IndexDictionaryUtils.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/IndexDictionaryUtils.java index 42cf87433..30bd8ec6f 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/IndexDictionaryUtils.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/IndexDictionaryUtils.java @@ -102,7 +102,7 @@ public class IndexDictionaryUtils { final SAMSequenceDictionary referenceDict, final ValidationExclusion.TYPE validationExclusionType ) { // if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation - if (trackDict == null || trackDict.size() == 0) + if (trackDict == null || trackDict.isEmpty()) logger.warn("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation"); else { Set trackSequences = new TreeSet(); diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/RMDTrack.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/RMDTrack.java index 7863b022d..76f2046af 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/RMDTrack.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/RMDTrack.java @@ -93,7 +93,7 @@ public class RMDTrack { * @param dict the sam sequence dictionary * @param codec the feature codec we use to decode this type */ - public RMDTrack(Class type, String name, File file, AbstractFeatureReader reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) { + public RMDTrack(final Class type, final String name, final File file, final AbstractFeatureReader reader, final SAMSequenceDictionary dict, final GenomeLocParser genomeLocParser, final FeatureCodec codec) { this.type = type; this.name = name; this.file = file; diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/RMDTrackBuilder.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/RMDTrackBuilder.java index 34be252a7..0b84c05a2 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/RMDTrackBuilder.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/refdata/tracks/RMDTrackBuilder.java @@ -26,6 +26,9 @@ package org.broadinstitute.gatk.utils.refdata.tracks; import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.variant.vcf.VCFContigHeaderLine; +import htsjdk.variant.vcf.VCFHeader; import org.apache.log4j.Logger; import htsjdk.tribble.AbstractFeatureReader; import htsjdk.tribble.FeatureCodec; @@ -34,6 +37,7 @@ import htsjdk.tribble.TribbleException; import htsjdk.tribble.index.Index; import htsjdk.tribble.index.IndexFactory; import htsjdk.tribble.util.LittleEndianOutputStream; +import org.broadinstitute.gatk.utils.SequenceDictionaryUtils; import org.broadinstitute.gatk.utils.commandline.ArgumentTypeDescriptor; import org.broadinstitute.gatk.utils.commandline.Tags; import org.broadinstitute.gatk.utils.ValidationExclusion; @@ -49,6 +53,8 @@ import org.broadinstitute.gatk.utils.instrumentation.Sizeof; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.Map; @@ -131,7 +137,7 @@ public class RMDTrackBuilder { // extends PluginManager { * * @return an instance of the track */ - public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) { + public RMDTrack createInstanceOfTrack(final RMDTriplet fileDescriptor) { String name = fileDescriptor.getName(); File inputFile = new File(fileDescriptor.getFile()); @@ -146,9 +152,43 @@ public class RMDTrackBuilder { // extends PluginManager { else pair = getFeatureSource(descriptor, name, inputFile, fileDescriptor.getStorageType()); if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file"); + + validateVariantAgainstSequenceDictionary(name, descriptor.getName(), pair.first, pair.second); + return new RMDTrack(descriptor.getCodecClass(), name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(descriptor, name, inputFile)); } + /** + * Validate the VCF dictionary against the sequence dictionary. + * + * @param name the name of this specific track + * @param descriptorName the name of the feature + * @param reader the feature reader to use as the underlying data source + * @param dict the sam sequence dictionary + */ + private void validateVariantAgainstSequenceDictionary(final String name, final String descriptorName, final AbstractFeatureReader reader, final SAMSequenceDictionary dict ) throws UserException { + // only process if the variant is a VCF + if ( name.equals("variant") && descriptorName.equals("VCF") ){ + if ( reader != null && dict != null && reader.getHeader() != null ){ + final List contigs = ((VCFHeader) reader.getHeader()).getContigLines(); + if (contigs != null) { + // make the VCF dictionary from the contig header fields + final List vcfContigRecords = new ArrayList(); + for (final VCFContigHeaderLine contig : contigs) + vcfContigRecords.add(contig.getSAMSequenceRecord()); + + // have VCF contig fields so can make a dictionary and compare it to the sequence dictionary + if (!vcfContigRecords.isEmpty()) { + final SAMSequenceDictionary vcfDictionary = new SAMSequenceDictionary(vcfContigRecords); + final SAMSequenceDictionary sequenceDictionary = new SAMSequenceDictionary(dict.getSequences()); + + SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, name, vcfDictionary, "sequence", sequenceDictionary, false, null); + } + } + } + } + } + /** * Convenience method simplifying track creation. Assume unnamed track based on a file rather than a stream. * @param codecClass Type of Tribble codec class to build. @@ -228,7 +268,7 @@ public class RMDTrackBuilder { // extends PluginManager { sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index); // if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match - if (sequenceDictionary.size() == 0 && dict != null) { + if (sequenceDictionary.isEmpty() && dict != null) { validateAndUpdateIndexSequenceDictionary(inputFile, index, dict); if ( ! disableAutoIndexCreation ) { diff --git a/public/gatk-utils/src/test/resources/exampleBAM.bam b/public/gatk-utils/src/test/resources/exampleBAM.bam index 319dd1a72de427c1d7a3b343368c124735b41542..d1c9d8001b163dc8e4862da75484bd833350ce2a 100644 GIT binary patch literal 3609 zcmZXXXEYm(qle8V_TJP8UW(dcuOO5nX3;2GN^Gqesv>sKqE_utd#~Cnb}2<#)ZUvH zrRM$LbMA+G?tQ-gKK?#D=i!b6k`Vki_YVj(Aw&cTgpJ8#vGWqNgIrH&MofW0_}32f zd&aB$9~$`E4l~7CB5K8&3Fm$Ii`xRjRH59BK5{j*+hR-wRO+QWp^rcH+b+D*1#hFgx6y+ry@TrYaYsU%Y>*0t| zq`Q2!u?AnmulDm@gV~PvGZiZ9b`+_(6`mb0K{op~H47OtUi(x(CmiZOH?bpjNA@&J{!Wl0u%d~Ppt}5G&hdisD<%_P0F8clIp=udMS%osOCNKGe6iK|*C`dG7#>Zg?V>mD* zI2AJ>)P&P*WsP5VqbGS5pMc_#ScB1u?g&M1P4)Gyu-3m;y|@`(*3U)W6&-aRkAQ98 zl@8_JeTx%smkRi^lURN8SCaEoIPtPx1gEwi=3HAJ0r|BNfE+{Gu4i>MIzp3)owQ0& zS^x+m(51HLcXT%pvJqwwFJ&6S6K~D_po^FeU^W17E2C1wu*{K1Nw?b)(b#7TCWNg&^Z>2K(8CUGT6~}o8Fn&wL{uO$b$42!>Cm1x(+vyu| z$y?F!)|FBGVFh=A!hWQaNv&8dQ_aP*toHXGA8c_AT9dy#l5mYIhG^uAKExf2L_LAba)1(TNs&$E>*o#ItgAGa}cvuH8y^iMkQGlTLJFnlr^E}RA|pjgNRW3U#BDSgJtuM+Hzy_Y;Wb@EC&XGB={RK%~( zlc#R1)%uffmNF(c+1w?X91S`1NqxC|WaM4=i5qc1|11&3^jkS z^U~*cLFda3Ax%MFO+OR8ihSj6$Rq4dtZmVNb3|_{Jl3U1Z}H8TG1lR5l(BVa4>A$z zTpvyYE^bn;uav!{ph#T$L*&^ELIh`%^Wcv^usBi34ULaOy0ie1+bKjq?q})19E>KRscDp@7&3DLUEwpATh!eJX0No?_`z+%G=& z>$of&knWo*3x~{6|I18H--Sy?5cTx+fX|RbBh-cm_C;{(%xI zR+s=?$P*>%!K?F@(Ldc!`c-I=L^bZq)#@5=&O=ydeYwHAK<$J^|umL3cl{K^(|?t^n+c!AGmh4GM(HJ>rU%$ zvlSQr(tnGem(9rp6GaDU(1(t+pFj{=3xO(GZ9 z4i;n=@3BYhMhnqvOLhm9^Q2oVK^vy$>yKg^$`RiQr$`&Q>a5}^tRaD#+D;`1mt@(Q zFVvO)a)p?N9qZ^~Gxn(h_Zck6^TDkkreYJQ%*+VRIRB$lwAVnfZrSbi>QWW38hd%S zKz1+kYzsTjl%1^+C~16)8^!qL7m3<;^4VX_L=TVAEX&D~8=9-P?2=w2Tim}nJ9p;P zt!zVz9Rinv&xTn$0j_H8aF203jgb*PE$zY|Ehj?z%@Cg87NImJ`;_3Oa&#f#EuO9dRQz?Q-&nYW&_L|p(?k~F`5T73q zS%If}ICq@Yb}lvmvqOaaLVP_~Jp#OwY37L6i=qhtFVwtCjVI3{q!s~TD?&*YvCVRv zEQsLtsxGfoquEely?8m|3d%;--~f$8_=AGjvb6`1l44Rza+hrJVggyy6{`2;!~ZgP0_YS=Yc;RNSZw!iZWJ;#C2Ik%tLt*`l@EEFG=6 zin3h#?0x$Q*?rm1;eA6|p@KwyKTcW$gU72mIWbIyMD|7IHov4qG>~4&8&&y%6g^6Z zPogD%nDsOpeae}wM4oaNHRny}5>Rz2VGcR5+7=e(Gd&gCy2J~#By#1t39$JI>J~S-3{pUVRA2JRN`Rs= zaQ2mARxwB~a{Yag=0&Rd1Y?onIm5a}0qeSR??5K~Fk0aB*I9ynsQ>A8^&-diUlz}j zT{ZgyK=alkdHHRQ@>BScCzo&yh|y^o^P#Fo#I z&oBJ{38uu7_>)hW*)ri%Cq&_ZgTC_0(dBZ%$qxuND1{(I`Q^ZX);CZVBB5)O((x(;kTDm~HmsHxNP-ow{laY2&iQ!#M(ciz|BdY0TCrY*JN@O_pUWPum zPwU33xnK8*Gg?5KWFtci=ApMxfGYUnE=kI!_!#)5arWC&oB7q|{@aIL++5dwvx&Mv zN-9=r-aT$b*QGGeLe@Ji2f~oIiLl-9K|8ZxY4pP@kM|SPB3|G?OP*LdA>jynY!BCn z{j{fZ%&C;fK0b8zF3534UE#z#bwuKV%sY5qZbw5$NdyCAqS4E*WSYgb(Q%L&A*N4&11=MpZ&~M*TK0^`}-y4 z+tmCh2KD@>iK0QuulMoI9x%)trknYp-J?1Vr)F?|@!tTZpj{<;KXd(V|EXA$Y1syK zorl)I0*ie8UMrot&RV^{TKdE&V2v? literal 3635 zcmV-34$Sc%iwFb&00000{{{d;LjnL64y~EnkL1{0$E%^b?$y=j_}D(aoNM~QFVkJs z_bwE)b}X$o%xs2j1%(K-^vv|YN^{%pSs)?-*;v5}5-;$G5~M_m@_uJ|i9lJgC@%MN6{ya&1Z|yU`-e|m)rQP3ocRJY~ zci;ZP^x5-g=iSpUOdmdfe(~soXJ;?ZdvBfocK7t%>BB#|m~`*LiDR>M|H#1|HawG%dhM_ zJ^Rq?^6A+Nc_)>3F6CI>Ig^uvG5mclci|?=`cm^N?2Yf=|9l*O=}SAGo7|Zk?7qJ{ z9-rJ9Pwwm<%-`-{Y%>*-uCm4UT&W~dA4o-$xlI%2anG_ z?Qb}Ni+=cNUv#&==`9ljncfTWd5L>#&;?X#B(1*BU=-%(frGE*tOQ zwZ_B7-TB&o!~W+VfBf;)<;ExD$?m~)GTq;u9!z$pljF(JXU2_xZ8jQM=hr{Vyb^3) zky$AyDq@r?tYFhZb19WpDzGsNm0&$Z2ej8g%I)_AMvfwq(NVk22EA4b^Y%uo)smfd ztF=knEoa#8cAFjS-GBSbh``I5`NSZ9)NN)PS>3D+re*1f)v zLDv}x6;f}+gg}0XS4M2CEmVBx3el{Y?URUy%FJbNcRuvDArbqkx)5+dscaE4MT49tTg~}2o7^x~x zs|q2i$QH^WNMn+vq~n38pbmtqR+@){Q0q{Dbnr2R_u7+nEJOGz5Vk%`!$ET%yn|gK z#sY+tR4XI?@(S_v3h|pOAVy_W5sb@90b-0|UB)OX$_hmkB?ZaTG(sp5N(V|<@P!>V zWPJs=!6q|&nzh?H5TW3Zq#h0+W2MkxpDQ5%mP?{MA0Tz;XypVB50*y!=^W>4zuRbt z#^-O8o4aILMky5+6XA!7UV1o9L&QY?E8`h9AQ^79@-&8jtG~pHa~rhW=42j zZN9cV;s1fA;-sV+OWR@y6XkovMgikKC&YC_yEXJiw3bWQ%ozrr6k>x>26aU$pbsHr zJlirfH&DcGzIlwk&5wFmgZTOXPV>~ z6P?Fer%|D{w$iG^TS*dOC`P2qEZSM`v>h2Di5z>>4o7XE>Do5xA>V2BM{SBNs3~}Z zj{3xwosQ%i{cfwc&TlNWIqav#|E{+aq;vIypL=?oUn@)h2;5L!vS!5R|Hb zBuh%7BSPaCC!_?-Sm{U!NTXS0l%~X(>PaE_+zsOhI*wF6q^V}GDRTm6zLHFMoKZy? z<`A7FrN)d8tQ)!vn!`s3V&>y9gT)B?VAq9ch=8QXrR}mi$sUZ0Ow2V$=TE=hP z(C`q}{_1vbV+aopq46It!nDA7RTd;g8O~{rpw)wZWdXt=T~-8@NnTL}Le$Cu&4|}U zRY?n8;75ZMP#_ZpI%NB=L+$baiDNSJ;F^4RPK=vj_}Qw z^+^qX1Z#hJd%4-4p6rgN$H#jMLKP(lW~VAfD#I16ilTr@1HK$q%A_d37nIr;vr=IO zKyF^@eO^CqzEi{$nEFgDW?-`gZA`cnt4~rtdi6;SzqLH!4G@@pP0vtS6e(RMb#sJ% zNvI5o2hIgbDpVH%Q@;Q+X3UHSjUJSN*fgC!F!=~YCftOx{aQXyUP;G@%TQfgm(RmR z%lO8Bn3i$(Mpyhkto_ZcgeMTCM@PHU$-%^*!9byM6#-U+N(??cp<6nFqKdMu0)`Bj{|P3x?fBH8)NwIsTO*fJ-$#~3 zXPbi@mlzI@dS3nFc|K$-tkmyeb1;F|QDbSuS96^2{im6vZf4@oVePB8BJP@|e+)zM zcyF?B;Rq?Tr3+2-oK$%p6&MC5jq}{NftYFW!^d#-Y4PV*jMusNy;T@B0q;_hRDnNDNeMGQApi>_ zMp+`7B@nl15&2!q@>$zqG7y-#F3|81IMNUX3MtJ)Rv9$G`#^g5Z!p z!ZegFg}?=`zB1$V_tt>%-5WaoJ*=JH-ZMbZ)-(O?L7z&zFpzzX@c zXtVZad--&H^%~EN@mj~}DvVIrGrg5WX_jRb%!-;6&<&x$Mj44QDhpa69SwSe);jJD zaIe$t^qcFJ*XuWjew&ddS@--<)RrKmKJtZcCdhS{dk~|p*AbL3FH~gc82<|AEzP*- z8$bB{wT2kJd!wrU7p(oo?Xe0D4*JIAVA1 zrUXV01|>!iY;zibV|86_VbDy_pc#0aa*jP`IG`{Y(vBB`aii24(oV-1CN7w*L_IZ%St*7;b~u7Qy22>+lhx$d8SF0yNSwG>ZPpF^E znP)lX5TF0*@_>sN{pUYh#OSqxov*Gn{@`}NrLdBg_{z|7}q}GBj`@PzYHghc%v-GdH?$tRUrV1Xgf@$89xZ)+-Yl znm4o1P%lYXWhDXMO*2YSRnP=MAuTJKrZfW&r@_(TqAc8bIM$}R9vS39(3uOJ9j(o_ zFeY8keqpA(JDd20tUoPH_&^=Yh diff --git a/public/gatk-utils/src/test/resources/exampleBAM.bam.bai b/public/gatk-utils/src/test/resources/exampleBAM.bam.bai index 052ac614bdb6662a1721c7e3d6a7f497969ff061..33beef3d182d95421d3685cf10345c635ec1d8e6 100644 GIT binary patch literal 232 zcmZ>A^kigYU|?VaVoxCk1`wNpp&v};vp{Gss5p$jpA#bP4Hbv+h4~@kK2UKOUsDWf o&Ljo~CM5G282<8txQbvg1mOqO57P%z2XZ62+tBSsw-aV607z~Vy8r+H literal 232 zcmZ>A^kigYU|?VaVm~1U1`s<{5=el7F&~KXgo@iRLqu1zLufCkIE??B2O{nb6^HTt vfqW3~fr`WUFnhcvF)%Oz=-s|@~&%HeFec#XfUiVXBP<@DEPa(KL zAU%l|9T!CxEsR2#v}gc<+YcfZ0R%;SgLH#r0t0}pBN6N4Zn@LQ7)$oBjERk*W2qjN zA+fQ{(1;jX0==FLN<(KU+U!HI(R3UuCL#&PjG`w-#L#i!@iaqLOnd?@IEs#oV+BV= zgy6zy36X}ZgovnkTs$)bmq3qCz%l9Zu`Ff?Js$T@U?c+7n_v`1LtL?Lc*|hCNtiLs z*v!O)Zh<#53&tDMLUx8(gqoO}<3mGCOtDl?OUJWgq4hNQ6j&bzANjvgK^#02coK`0 zkJHId$<+cfyD%Wv3jhJGGuK%ePj{`~j+)ng|qkkl+b&w<*|>U>P)(Q^2|G z6oQ5z2!`!RFbaf0fZ~dbt{_10g!gI{SfGtz0MaU8pj8VvXfzvxp^yM(JmWhq|7i%_4!hBDDk%H>8$M?q~>*cJqQIP=F#NjY6a_ zMI@8-qJp3l#p&-zsPU@HZ~1!c&)|h zd6nxasN<)O3CJKC;52YbHL}G;e;vSW9_qcfVJ+pZr%2+0A&$TB# ztw$;=7Ce5Qc&ii%7DQod5h|n$_iOvV$tz>JS1|GF{5Y}?i8bi zb$dJJ-msaxUQ1E$tlfdSlu383FqC8oI^8zs%*C`=IF|3$Ig=K1zM`w|kF|*-+uRB} zyQe*l!#PjRvXoJx1f)*x%k27|019S1@P~@tIc`SrlFN0{fQo}|MVWqT9Md8V=KYm%uh<4j^kgh^bu|7rn^2tgHzQ$0trf=GP_? z!(k+~bLa3tg_?PkbgC`+jz+WK{AhrQK`Ut_@GFDA2dZ@;Ppjoz;5F>&3iZA+O}g+vBEa>m)X>dEWoLLuE0+mPoZyvy*;RyVpE{q<=OqbIz~z{-P+w%e$8s+^y;qRFVSqvH>P-J)S!{v#o6BGYv9O;^b9rV4$9y*G3NbCH%<+ zZoYoT{*uZ_#nfp!4r*ZCCnu%Q07LuixKt~W7X~wmEaAd;R zTu1UiM9q-j_SguU`jPJE4bi&Es*9#F^|PvnH6b}K&VcOOF&~MZ3F)=FVnS|XvgEp$ z0(NVLCU$-NiYYXwXl5dABKsCxTCUuZ9wb+#S^E`jjydsR?O6scAL7fhu`qVK!McQV zA<>h4CN2K<-GHo4v_(tw%>sPo;GM=K@~uYK#g`O8$au}Y!Ry<~5-!m@%M$%uQX=s~ zJ&|9Bul0Ph7_-{cWA*tD=IrdH-)4p9q$(Fu)Qj5Y>}_c439+jSpQ;@U2NJXMW(k}i zO)G%=2uQT|-rj4C1!|Yr8MegpjlqnfAGc2b`7Y<7dhPvWe4btoK8a2=Zm91MEDc2m z*OE40DS71Ya3p+(^kz1p5OtE;V)sbj(a4dz?(i*pq4aA z)&56)iT(b?Y)85DDMkWUNSf>3J*;&4rF4+AK*lCdK4UTX#2>wOUUM&8W^k*k-fv9^?wg z{p%iZ=gWi(%pO)2(|2~y0j!SgnRe%?!|&RvB>lw;-wyk-d0cOjQ4Nuw{b`X?BNPq|kDhMbD5)28k;sORpDzY!BSiyHDfDr}UrC1kS zdPNjbyQsK9u|gH4w$*s8TCG;JT1{=aa&gI>2ip6gzxRDV^~3#epLyngX8vb#=A1Ln zHi=Wl$SDN)Uyg3tALOXIrn9EgiweN>Su@ykQzC;Uq3nb?k%dKtTDEFVWOh+eX-;mT zrd-RJu1aOAQX;dLlm@et@xUPRV8^H;XC(wrW~-Hv(lTv{v{b9pmTC*LwPn&-MFmBf zWt!q#ZDCoarmRd`kd?nYc(U}pp#MuH(P_%cHEelkWP~;=J1j>RF)1P}ELf(Kh0CL&zY`FU?*oS&F`Vg&QM*B8Zh|D?lHkAyc3kpUtYt;I@@1M*t+Y5dTT_}N&DNLV z(95N=(wxjZK^}n`kKoWCEVUjXIEfykY5U!REJ&e9 ziiw#wPZ}2-7#tds84?r}5f~I47!vw>b5dYXxHMR*D$2*921ZN{36}o*N0MMkkfcao zF45!{O2{eyf)&ZqY5r;tgC$wIlE2z$$f(hfsK44SnNFNaNq*70oImcH|H}WE46G4g zS72ZFiUv=BwoM0jhP0ji>VI zF>xwb4L8xLR0@R>qoAri0fogE`ADQ8Vc`+63U$^JxF7i&hF2{%o-}sXRct)5@zSRG zn?7jza(}@7ok!Mn70>fzFzEmP5wD2XYrjLLrEhcsvdV ztxU!;8OwyRzC4x6p#cido<>k9RAmZHfcvg)?jAHM_JtrQDGGw1reW*U_!O!w8(@~n z;o3N{2?~yfhHI>f%A%4yL7^n3(6I+;q*M5{B9H%FL&0`bV-v=a96_a1F_%UIb+3oOe=T>`nweKam8pK`H(h%7<wcpNYIU$j6Z71~Y|BSvGvq;-`=}ha;%cnPvQqNq>&;PFdQJN&k|Ij;4ee8sn zwg(s9u9XxEGIN)J^@qSk%el$BO|k2UD2L8B)m7+#(sOF_u*AP{``eN^10x+y9{?Kp zrlrw{x@UF$#E)zM`f&y9YME`d%%xi9xk`t%2)l~_%V6OlV2yw+0(Mk{8L~Jb;7pJW zHv|CPm4(ziq!uBy4M1u;q;^1RF@e-I6jwtVpy}y^-jK-BvjP0cO3x+WPkI6TNeJ{J zy$#UY>Fo(U`qmL;I_q5sl&SZCKLG}M!UX6ZyMkN6BT#OIutHQ}LsV4QL$d?5q5?B6 z(CmswxL1s;0Cd>IL{b(NU4>0#5yB>G##j?JAt|oCj!5bZo7`ZNJ3$hF22BiTVp5<9 zb)XI_XyO8BvVkUBDm0C8u77(Ck5=d(aGM zNX<4J!8f@Ywh0Wt8n%f5Y_o-J4zSG$wz4V&v^?NOVQs?uJB94{fjxYUHS7@yG#XS7do;)! zd1FDgR-sj-1Ps{E1UjCMPv~?a*pDx*#SOo`B zC|Sty*u%{X`QWCmMcH#F)fF@B;a1S*EZFTN6OSxIE3q7lz^Ji z6T>dd09po9%FeKY4LqqpDimd8NMVCC16SAppGe_pcp5q3a@`P#_GbXPFN5&)W&2ve zy>oo|((`;!xwgyu1WnLQ#BfZf?96?1)EG z;&YKW7Y-pY>P5)V>;>q~42HAPnLPN;0vp^?IxEA%!V1FRKDZA_kO*x@A_J#d3-`1V8q-_g}Mz=e_Sir4)P3a_;?`T}t1O)zcfgZ*F$YNEV}yC%q}lX+WO zXL?S)wfmDd>k2zPx9&Xh-R!k*zdnAcZRQTEJG(D^o_1uxG~>W`{D-^lFB^WS-x$OE zh&pB6z@4@OHN1JZQhvRl`-jVK-#Yvl{H@zBp&zKqyi1DTl%MOE@c7x1lCH@~B0U_+azfsDcM)wiqv-Nq7}HX-VXT1s|2)8}w^0Yx&~k?H{hx z_dT7R^myjD`VhMfe$CL0ee&!{%E_3rth7{zy72tMOj>WYOT!%Dg`~!ipHrO9xpwo2 zrfEN2lO2zmFp0Y_zOiWj@&3$XJ$GKv-Ml_LJ#ETDI2=+Rek1+5YjN)S&-ozn!^^?8 zYi3%TZuNG5uT=^9L?2$e$2`{kn@9GV+?3$CZX3o)=Ha*2f=`k?^;7+P`&MmTWd1np zeOs?s^H=fHelED2vpZ{?{+BPGFLZhQq0h~l^L?8=_Ga~)Z-`g><-BZgOoo=f*Q8#l z`t@qW<4+b?$3~fLl-|B> zU%i~asEt*&QL=nG{jBMD+{e24XHqR&>IcUy=bSyT)T}w&E&A;0)_K3gMEkbhp5S&R zrR3U|YtQE7?OXXcwB7UMdDeu>iDwRfe(2Ky^Q*Tb4lZBR{+3%cG&lW+dpG6kHLj}v ziH=@7$tQH4MNf5kx0bv(SyylSRJ=Xo>h0v=puAH*bRM2yyx@7$W&f^P z`@5#fbN(eZ`)@3DzcEx#Z$C06=8%hcW$jl#r2p)AFmK-TM^m>v35~i@zAizzyxC@I z){iSTMh`OkI-D6R4mh(nHFO3J55U4x&&0Y3jh8!l7t*77H8oV<2&ait!)CNZTFeeN zzghewz3b@030}!HIf*B}c0KI?{kp3Ay5E13Q6UU>8vNVYFUn$@CffOJoV9xC$y0Az zE`3p7Y5qK&xLcK$(N;B1eIIV!IbZVfn}x}HZce*;=R)|=Y*WBa*?>5ts=h%v(L)gB z_>YtY6}JTU1&;?y=DMwkmNTCI8WQAs<&XPnhr@0|(wNWny3ahflf5PWEo{khOd*?~kwGCBEb&ZRPN6yB*_9nh;;%yV3kBF(s zETv4pIBlZTmXj&`bh-U68&q-MMJ_Fo)>$N-E5+H_t6pTejNI(KyQF_v!a)5O#gA?V zEcv*myLO7~!MC|5Lj9=(jRL!>U%Iblfw5!9H>Y?#L0sBbnqy#{dtJ_8*p}JPqZ3;t zbWKm&u9Edve0Kl(+O1cgD!d^&u$_@I`JwSapdbnEIs16|!!$XoqWWH)^6Riy8SSjd z>)i#to^=sQ>*wmGo2|;qYU=~pTS7*Z&(d|ZD{ilCes7`I(I0Z-o-CuOIxnqgNZR@F zDqX=xg8qZTj+pg}d-bkGyo)1qcWpm-t<(9?&p&45@#p5Pe*4ecd-l(6SG>9v{f01X z`1#mN{GSkwMnx;(211YjDcV2=f~sY$)w1?#nFW8lVsf1Y&@C9)B=ATOkPuG76cVCH zkauB70c2CvjbY&wqh-fx0JN<}OBa^;4>4H)QZVWTMoSc?D!Yu9Zh#wzz-%>XU=;~Q zf<(5_5{`8{NQ=8jXc;xWm(;cHGFqNvWgE%2lgwvyqlJf=u6*QOfIrOihNRVo#Lo z7>1}Sj9OePajmXZuc}oWL9H5$>@`~2@leZeuxW@o_O*<9N08LArX9}|^o-Ffr;Fwf zpAe=_v{3fsUGB&$m)LcDy8Xh5a?{k)uhN(_avMP#-SYTl6CQXKvT+3zJP#!ZLlD3K z)_1{w*UxT1@&|Z-^u9qzl;FWayzc^n#zV0sStSJ}KtMVw$-)prIv)#SFv=wuGzA#4 zfCi&B)@Z4~p68Q;3ji=$igB+vANMLqJvd(zP1%-tEa|4?o$=-s|@~&%HeFec#XfUiVXBP<@DEPa(KL zAU%l|9T!CxEsR2#v}gc<+YcfZ0R%;SgLH#r0t0}pBN6N4Zn@LQ7)$oBjERk*W2qjN zA+fQ{(1;jX0==FLN<(KU+U!HI(R3UuCL#&PjG`w-#L#i!@iaqLOnd?@IEs#oV+BV= zgy6zy36X}ZgovnkTs$)bmq3qCz%l9Zu`Ff?Js$T@U?c+7n_v`1LtL?Lc*|hCNtiLs z*v!O)Zh<#53&tDMLUx8(gqoO}<3mGCOtDl?OUJWgq4hNQ6j&bzANjvgK^#02coK`0 zkJHId$<+cfyD%Wv3jhJGGuK%ePj{`~j+)ng|qkkl+b&w<*|>U>P)(Q^2|G z6oQ5z2!`!RFbaf0fZ~dbt{_10g!gI{SfGtz0MaU8pj8VvXfzvxp^yM(JmWhq|7i%_4!hBDDk%H>8$M?q~>*cJqQIP=F#NjY6a_ zMI@8-qJp3l#p&-zsPU@HZ~1!c&)|h zd6nxasN<)O3CJKC;52YbHL}G;e;vSW9_qcfVJ+pZr%2+0A&$TB# ztw$;=7Ce5Qc&ii%7DQod5h|n$_iOvV$tz>JS1|GF{5Y}?i8bi zb$dJJ-msaxUQ1E$tlfdSlu383FqC8oI^8zs%*C`=IF|3$Ig=K1zM`w|kF|*-+uRB} zyQe*l!#PjRvXoJx1f)*x%k27|019S1@P~@tIc`SrlFN0{fQo}|MVWqT9Md8V=KYm%uh<4j^kgh^bu|7rn^2tgHzQ$0trf=GP_? z!(k+~bLa3tg_?PkbgC`+jz+WK{AhrQK`Ut_@GFDA2dZ@;Ppjoz;5F>&3iZA+O}g+vBEa>m)X>dEWoLLuE0+mPoZyvy*;RyVpE{q<=OqbIz~z{-P+w%e$8s+^y;qRFVSqvH>P-J)S!{v#o6BGYv9O;^b9rV4$9y*G3NbCH%<+ zZoYoT{*uZ_#nfp!4r*ZCCnu%Q07LuixKt~W7X~wmEaAd;R zTu1UiM9q-j_SguU`jPJE4bi&Es*9#F^|PvnH6b}K&VcOOF&~MZ3F)=FVnS|XvgEp$ z0(NVLCU$-NiYYXwXl5dABKsCxTCUuZ9wb+#S^E`jjydsR?O6scAL7fhu`qVK!McQV zA<>h4CN2K<-GHo4v_(tw%>sPo;GM=K@~uYK#g`O8$au}Y!Ry<~5-!m@%M$%uQX=s~ zJ&|9Bul0Ph7_-{cWA*tD=IrdH-)4p9q$(Fu)Qj5Y>}_c439+jSpQ;@U2NJXMW(k}i zO)G%=2uQT|-rj4C1!|Yr8MegpjlqnfAGc2b`7Y<7dhPvWe4btoK8a2=Zm91MEDc2m z*OE40DS71Ya3p+(^kz1p5OtE;V)sbj(a4dz?(i*pq4aA z)&56)iT(b?Y)85DDMkWUNSf>3J*;&4rF4+AK*lCdK4UTX#2>wOUUM&8W^k*k-fv9^?wg z{p%iZ=gWi(%pO)2(|2~y0j!SgnRe%?!|&RvB>lw;-wyk-d0cOjQ4Nuw{b`X?BNPq|kDhMbD5)28k;sORpDzY!BSiyHDfDr}UrC1kS zdPNjbyQsK9u|gH4w$*s8TCG;JT1{=aa&gI>2ip6gzxRDV^~3#epLyngX8vb#=A1Ln zHi=Wl$SDN)Uyg3tALOXIrn9EgiweN>Su@ykQzC;Uq3nb?k%dKtTDEFVWOh+eX-;mT zrd-RJu1aOAQX;dLlm@et@xUPRV8^H;XC(wrW~-Hv(lTv{v{b9pmTC*LwPn&-MFmBf zWt!q#ZDCoarmRd`kd?nYc(U}pp#MuH(P_%cHEelkWP~;=J1j>RF)1P}ELf(Kh0CL&zY`FU?*oS&F`Vg&QM*B8Zh|D?lHkAyc3kpUtYt;I@@1M*t+Y5dTT_}N&DNLV z(95N=(wxjZK^}n`kKoWCEVUjXIEfykY5U!REJ&e9 ziiw#wPZ}2-7#tds84?r}5f~I47!vw>b5dYXxHMR*D$2*921ZN{36}o*N0MMkkfcao zF45!{O2{eyf)&ZqY5r;tgC$wIlE2z$$f(hfsK44SnNFNaNq*70oImcH|H}WE46G4g zS72ZFiUv=BwoM0jhP0ji>VI zF>xwb4L8xLR0@R>qoAri0fogE`ADQ8Vc`+63U$^JxF7i&hF2{%o-}sXRct)5@zSRG zn?7jza(}@7ok!Mn70>fzFzEmP5wD2XYrjLLrEhcsvdV ztxU!;8OwyRzC4x6p#cido<>k9RAmZHfcvg)?jAHM_JtrQDGGw1reW*U_!O!w8(@~n z;o3N{2?~yfhHI>f%A%4yL7^n3(6I+;q*M5{B9H%FL&0`bV-v=a96_a1F_%UIb+3oOe=T>`nweKam8pK`H(h%7<wcpNYIU$j6Z71~Y|BSvGvq;-`=}ha;%cnPvQqNq>&;PFdQJN&k|Ij;4ee8sn zwg(s9u9XxEGIN)J^@qSk%el$BO|k2UD2L8B)m7+#(sOF_u*AP{``eN^10x+y9{?Kp zrlrw{x@UF$#E)zM`f&y9YME`d%%xi9xk`t%2)l~_%V6OlV2yw+0(Mk{8L~Jb;7pJW zHv|CPm4(ziq!uBy4M1u;q;^1RF@e-I6jwtVpy}y^-jK-BvjP0cO3x+WPkI6TNeJ{J zy$#UY>Fo(U`qmL;I_q5sl&SZCKLG}M!UX6ZyMkN6BT#OIutHQ}LsV4QL$d?5q5?B6 z(CmswxL1s;0Cd>IL{b(NU4>0#5yB>G##j?JAt|oCj!5bZo7`ZNJ3$hF22BiTVp5<9 zb)XI_XyO8BvVkUBDm0C8u77(Ck5=d(aGM zNX<4J!8f@Ywh0Wt8n%f5Y_o-J4zSG$wz4V&v^?NOVQs?uJB94{fjxYUHS7@yG#XS7do;)! zd1FDgR-sj-1Ps{E1UjCMPv~?a*pDx*#SOo`B zC|Sty*u%{X`QWCmMcH#F)fF@B;a1S*EZFTN6OSxIE3q7lz^Ji z6T>dd09po9%FeKY4LqqpDimd8NMVCC16SAppGe_pcp5q3a@`P#_GbXPFN5&)W&2ve zy>oo|((`;!xwgyu1WnLQ#BfZf?96?1)EG z;&YKW7Y-pY>P5)V>;>q~42HAPnLPN;0vp^?IxEA%!V1FRKDZA_kO*x@A_J#d3-`1V8q-_g}Mz=e_Sir4)P3a_;?`T}t1O)zcfgZ*F$YNEV}yC%q}lX+WO zXL?S)wfmDd>k2zPx9&Xh-R!k*zdnAcZRQTEJG(D^o_1uxG~>W`{D-^lFB^WS-x$OE zh&pB6z@4@OHN1JZQhvRl`-jVK-#Yvl{H@zBp&zKqyi1DTl%MOE@c7x1lCH@~B0U_+azfsDcM)wiqv-Nq7}HX-VXT1s|2)8}w^0Yx&~k?H{hx z_dT7R^myjD`VhMfe$CL0ee&!{%E_3rth7{zy72tMOj>WYOT!%Dg`~!ipHrO9xpwo2 zrfEN2lO2zmFp0Y_zOiWj@&3$XJ$GKv-Ml_LJ#ETDI2=+Rek1+5YjN)S&-ozn!^^?8 zYi3%TZuNG5uT=^9L?2$e$2`{kn@9GV+?3$CZX3o)=Ha*2f=`k?^;7+P`&MmTWd1np zeOs?s^H=fHelED2vpZ{?{+BPGFLZhQq0h~l^L?8=_Ga~)Z-`g><-BZgOoo=f*Q8#l z`t@qW<4+b?$3~fLl-|B> zU%i~asEt*&QL=nG{jBMD+{e24XHqR&>IcUy=bSyT)T}w&E&A;0)_K3gMEkbhp5S&R zrR3U|YtQE7?OXXcwB7UMdDeu>iDwRfe(2Ky^Q*Tb4lZBR{+3%cG&lW+dpG6kHLj}v ziH=@7$tQH4MNf5kx0bv(SyylSRJ=Xo>h0v=puAH*bRM2yyx@7$W&f^P z`@5#fbN(eZ`)@3DzcEx#Z$C06=8%hcW$jl#r2p)AFmK-TM^m>v35~i@zAizzyxC@I z){iSTMh`OkI-D6R4mh(nHFO3J55U4x&&0Y3jh8!l7t*77H8oV<2&ait!)CNZTFeeN zzghewz3b@030}!HIf*B}c0KI?{kp3Ay5E13Q6UU>8vNVYFUn$@CffOJoV9xC$y0Az zE`3p7Y5qK&xLcK$(N;B1eIIV!IbZVfn}x}HZce*;=R)|=Y*WBa*?>5ts=h%v(L)gB z_>YtY6}JTU1&;?y=DMwkmNTCI8WQAs<&XPnhr@0|(wNWny3ahflf5PWEo{khOd*?~kwGCBEb&ZRPN6yB*_9nh;;%yV3kBF(s zETv4pIBlZTmXj&`bh-U68&q-MMJ_Fo)>$N-E5+H_t6pTejNI(KyQF_v!a)5O#gA?V zEcv*myLO7~!MC|5Lj9=(jRL!>U%Iblfw5!9H>Y?#L0sBbnqy#{dtJ_8*p}JPqZ3;t zbWKm&u9Edve0Kl(+O1cgD!d^&u$_@I`JwSapdbnEIs16|!!$XoqWWH)^6Riy8SSjd z>)i#to^=sQ>*wmGo2|;qYU=~pTS7*Z&(d|ZD{ilCes7`I(I0Z-o-CuOIxnqgNZR@F zDqX=xg8qZTj+pg}d-bkGyo)1qcWpm-t<(9?&p&45@#p5Pe*4ecd-l(6SG>9v{f01X z`1#mN{GSkwMnx;(211YjDcV2=f~sY$)w1?#nFW8lVsf1Y&@C9)B=ATOkPuG76cVCH zkauB70c2CvjbY&wqh-fx0JN<}OBa^;4>4H)QZVWTMoSc?D!Yu9Zh#wzz-%>XU=;~Q zf<(5_5{`8{NQ=8jXc;xWm(;cHGFqNvWgE%2lgwvyqlJf=u6*QOfIrOihNRVo#Lo z7>1}Sj9OePajmXZuc}oWL9H5$>@`~2@leZeuxW@o_O*<9N08LArX9}|^o-Ffr;Fwf zpAe=_v{3fsUGB&$m)LcDy8Xh5a?{k)uhN(_avMP#-SYTl6CQXKvT+3zJP#!ZLlD3K z)_1{w*UxT1@&|Z-^u9qzl;FWayzc^n#zV0sStSJ}KtMVw$-)prIv)#SFv=wuGzA#4 zfCi&B)@Z4~p68Q;3ji=$igB+vANMLqJvd(zP1%-tEa|4?o$GXbJLV$Bjb(t4+#SRE$R*I delta 43 zcmZ3exlnV$eg(_aq-3*{G)ogpGc!ZuG-Gq)RC8lvOS2TyB;zErB(sh84+#SRG~W&s diff --git a/public/gatk-utils/src/test/resources/exampleCRAM.cram.crai b/public/gatk-utils/src/test/resources/exampleCRAM.cram.crai new file mode 100644 index 0000000000000000000000000000000000000000..3eee8e007285f3f9601e68600ad54e2b92376d70 GIT binary patch literal 44 ucmb2|=3syT;|C^21`oEFn41_sH8C+VeQLsJ)Z@$ByOUu`^@Jl*Kt%xhybNmq literal 0 HcmV?d00001