From 77a03c97097625c54b00055438dacdc25d1e162d Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 10 Jan 2012 16:30:38 -0500 Subject: [PATCH 01/36] Patching special case in the adaptor clipping * if the adaptor boundary is more than MAXIMUM_ADAPTOR_SIZE bases away from the read, then let's not clip anything and consider the fragment to be undetermined for this read pair. * updated md5's accordingly --- .../sting/utils/sam/ReadUtils.java | 12 ++++--- .../VariantAnnotatorIntegrationTest.java | 6 ++-- .../CallableLociWalkerIntegrationTest.java | 2 +- .../DepthOfCoverageIntegrationTest.java | 32 +++++++++---------- .../UnifiedGenotyperIntegrationTest.java | 4 +-- .../RecalibrationWalkersIntegrationTest.java | 12 +++---- .../sting/utils/sam/ReadUtilsUnitTest.java | 21 ++++++++++++ 7 files changed, 57 insertions(+), 32 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 7fa2f6230..cc0b1ae67 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -186,17 +186,21 @@ public class ReadUtils { * @return the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read. NULL if the read is unmapped or the mate is mapped to another contig. */ public static Integer getAdaptorBoundary(final SAMRecord read) { + final int MAXIMUM_ADAPTOR_LENGTH = 8; final int insertSize = Math.abs(read.getInferredInsertSize()); // the inferred insert size can be negative if the mate is mapped before the read (so we take the absolute value) - if (insertSize == 0 || read.getReadUnmappedFlag()) // no adaptors in reads with mates in another - return null; // chromosome or unmapped pairs - - int adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read) + if (insertSize == 0 || read.getReadUnmappedFlag()) // no adaptors in reads with mates in another chromosome or unmapped pairs + return null; + + Integer adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read) if (read.getReadNegativeStrandFlag()) adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header) else adaptorBoundary = read.getAlignmentStart() + insertSize + 1; // case 2 (see header) + if ( (adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH) || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH) ) + adaptorBoundary = null; // we are being conservative by not allowing the adaptor boundary to go beyond what we belive is the maximum size of an adaptor + return adaptorBoundary; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 0aec94663..174a46bdd 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("e70eb5f80c93e366dcbe3cf684c154e4")); + Arrays.asList("604328867fc9aaf3e71fa0f9ca2ba5c9")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("1e52761fdff73a5361b5eb0a6e5d9dad")); + Arrays.asList("bbde8c92d27ad2a7ec1ff2d095d459eb")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testExcludeAnnotations() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("bb4eebfaffc230cb8a31e62e7b53a300")); + Arrays.asList("8ec9f79cab84f26d8250f00d99d18aac")); executeTest("test exclude annotations", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java index 02332b64e..c9e91e664 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java @@ -62,7 +62,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest { public void testCallableLociWalker3() { String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -minDepth 10 -maxDepth 100 --minBaseQuality 10 --minMappingQuality 20 -summary %s"; WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2, - Arrays.asList("4496551d4493857e5153d8172965e527", "b0667e31af9aec02eaf73ca73ec16937")); + Arrays.asList("b7d26a470ef906590249f2fa45fd6bdd", "da431d393f7c2b2b3e27556b86c1dbc7")); executeTest("formatBed lots of arguments", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java index f2f72978f..1c58346b4 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java @@ -55,25 +55,25 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest { spec.setOutputFileLocation(baseOutputFile); // now add the expected files that get generated - spec.addAuxFile("2f072fd8b41b5ac1108797f89376c797", baseOutputFile); - spec.addAuxFile("d17ac7cc0b58ba801d2b0727a363d615", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts")); - spec.addAuxFile("c05190c9e6239cdb1cd486edcbc23505", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions")); + spec.addAuxFile("0f9603eb1ca4a26828e82d8c8f4991f6", baseOutputFile); + spec.addAuxFile("51e6c09a307654f43811af35238fb179", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts")); + spec.addAuxFile("229b9b5bc2141c86dbc69c8acc9eba6a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions")); spec.addAuxFile("9cd395f47b329b9dd00ad024fcac9929", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_statistics")); - spec.addAuxFile("c94a52b4e73a7995319e0b570c80d2f7", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary")); - spec.addAuxFile("1970a44efb7ace4e51a37f0bd2dc84d1", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics")); - spec.addAuxFile("c321c542be25359d2e26d45cbeb6d7ab", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary")); - spec.addAuxFile("9023cc8939777d515cd2895919a99688", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts")); - spec.addAuxFile("3597b69e90742c5dd7c83fbc74d079f3", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions")); + spec.addAuxFile("e69ee59f447816c025c09a56e321cef8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary")); + spec.addAuxFile("fa054b665d1ae537ada719da7713e11b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics")); + spec.addAuxFile("28dec9383b3a323a5ce7d96d62712917", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary")); + spec.addAuxFile("a836b92ac17b8ff9788e2aaa9116b5d4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts")); + spec.addAuxFile("d32a8c425fadcc4c048bd8b48d0f61e5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions")); spec.addAuxFile("7b9d0e93bf5b5313995be7010ef1f528", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics")); - spec.addAuxFile("1a6ea3aa759fb154ccc4e171ebca9d02", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary")); - spec.addAuxFile("b492644ff06b4ffb044d5075cd168abf", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics")); - spec.addAuxFile("77cef87dc4083a7b60b7a7b38b4c0bd8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary")); - spec.addAuxFile("8e1adbe37b98bb2271ba13932d5c947f", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts")); - spec.addAuxFile("761d2f9daf2ebaf43abf65c8fd2fcd05", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions")); + spec.addAuxFile("4656c8797696cf5ef0cdc5971271236a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary")); + spec.addAuxFile("6f1d7f2120a4ac524c6026498f45295a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics")); + spec.addAuxFile("69c424bca013159942337b67fdf31ff8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary")); + spec.addAuxFile("6909d50a7da337cd294828b32b945eb8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts")); + spec.addAuxFile("a395dafde101971d2b9e5ddb6cd4b7d0", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions")); spec.addAuxFile("df0ba76e0e6082c0d29fcfd68efc6b77", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics")); - spec.addAuxFile("0582b4681dbc02ece2dfe2752dcfd228", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary")); - spec.addAuxFile("0685214965bf1863f7ce8de2e38af060", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics")); - spec.addAuxFile("7a0cd8a5ebaaa82621fd3b5aed9c32fe", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary")); + spec.addAuxFile("185b910e499c08a8b88dd3ed1ac9e8ec", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary")); + spec.addAuxFile("d5d11b686689467b5a8836f0a07f447d", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics")); + spec.addAuxFile("ad1a2775a31b1634daf64e691676bb96", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary")); execute("testBaseOutputNoFiltering",spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 7179680bd..32fa8679e 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -189,7 +189,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("2b2729414ae855d390e7940956745bce")); + Arrays.asList("f0fbe472f155baf594b1eeb58166edef")); executeTest(String.format("test multiple technologies"), spec); } @@ -208,7 +208,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("95c6120efb92e5a325a5cec7d77c2dab")); + Arrays.asList("8c87c749a7bb5a76ed8504d4ec254272")); executeTest(String.format("test calling with BAQ"), spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 65de6697b..b53daaf39 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -34,8 +34,8 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { new CCTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "ab4940a16ab990181bd8368c76b23853" ); new CCTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "17d4b8001c982a70185e344929cf3941"); - new CCTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "36c0c467b6245c2c6c4e9c956443a154" ); - new CCTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "955a8fa2ddb2b04c406766ccd9ac45cc" ); + new CCTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "714e65d6cb51ae32221a77ce84cbbcdc" ); + new CCTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "932f0063abb2a23c22ec992ef8d36aa5" ); return CCTest.getTests(CCTest.class); } @@ -91,8 +91,8 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { public Object[][] createTRTestData() { new TRTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0b7123ae9f4155484b68e4a4f96c5504" ); new TRTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "d04cf1f6df486e45226ebfbf93a188a5"); - new TRTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b2f4757bc47cf23bd9a09f756c250787" ); - new TRTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "502c7df4d4923c4d078b014bf78bed34" ); + new TRTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "74314e5562c1a65547bb0edaacffe602" ); + new TRTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "41c2f82f7789421f3690ed3c35b8f2e4" ); return TRTest.getTests(TRTest.class); } @@ -291,7 +291,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesNoIndex() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "828d247c6e8ef5ebdf3603dc0ce79f61" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "aac7df368ca589dc0a66d5bd9ad007e3" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -317,7 +317,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test(dependsOnMethods = "testCountCovariatesNoIndex") public void testTableRecalibratorNoIndex() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "991f093a0e610df235d28ada418ebf33" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "02249d9933481052df75c58a2a1a8e63" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java index b9f831028..367f6294d 100755 --- a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java @@ -103,10 +103,31 @@ public class ReadUtilsUnitTest extends BaseTest { read.setReadNegativeStrandFlag(false); boundary = ReadUtils.getAdaptorBoundary(read); Assert.assertNull(boundary); + read.setInferredInsertSize(10); // Test case 6: read is unmapped read.setReadUnmappedFlag(true); boundary = ReadUtils.getAdaptorBoundary(read); Assert.assertNull(boundary); + read.setReadUnmappedFlag(false); + + // Test case 7: reads don't overlap and look like this: + // <--------| + // |------> + // first read: + myStart = 980; + read.setAlignmentStart(myStart); + read.setInferredInsertSize(20); + read.setReadNegativeStrandFlag(true); + boundary = ReadUtils.getAdaptorBoundary(read); + Assert.assertNull(boundary); + + // second read: + myStart = 1000; + read.setAlignmentStart(myStart); + read.setMateAlignmentStart(980); + read.setReadNegativeStrandFlag(false); + boundary = ReadUtils.getAdaptorBoundary(read); + Assert.assertNull(boundary); } } From 410a340ef5ee55dc1463d9b9f339763b9cef9a94 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 12 Jan 2012 02:04:03 -0500 Subject: [PATCH 03/36] Swapping the iteration order to run over AF conformations and then samples instead of the reverse minimizes calls to HashMap.get; instead of it being O(n) since we called it for each sample it's now O(1). Runtime on T2D GENES test set is reduced by 5-10%. More optimizations to follow. --- .../genotyper/ExactAFCalculationModel.java | 71 ++++++++++--------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 8da72ef7a..e54ab1fef 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -35,7 +35,7 @@ import java.util.*; public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { - private final static boolean DEBUG = false; + // private final static boolean DEBUG = false; private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 @@ -199,8 +199,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final double[][] log10AlleleFrequencyPriors, final AlleleFrequencyCalculationResult result) { - if ( DEBUG ) - System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts); + //if ( DEBUG ) + // System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts); // compute the log10Likelihoods computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, result); @@ -209,8 +209,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { if ( !preserveData ) { for ( ExactACcounts index : set.dependentACsetsToDelete ) { indexesToACset.put(index, null); - if ( DEBUG ) - System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts); + //if ( DEBUG ) + // System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts); } } @@ -218,8 +218,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // can we abort early because the log10Likelihoods are so small? if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { - if ( DEBUG ) - System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); + //if ( DEBUG ) + // System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); // no reason to keep this data around because nothing depends on it if ( !preserveData ) @@ -262,8 +262,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // if the last dependent set was not at the back of the queue (i.e. not just added), then we need to iterate // over all the dependent sets to find the last one in the queue (otherwise it will be cleaned up too early) if ( !preserveData && lastSet == null ) { - if ( DEBUG ) - System.out.printf(" *** iterating over dependent sets for set=%s%n", set.ACcounts); + //if ( DEBUG ) + // System.out.printf(" *** iterating over dependent sets for set=%s%n", set.ACcounts); lastSet = determineLastDependentSetInQueue(set.ACcounts, ACqueue); } if ( lastSet != null ) @@ -323,36 +323,43 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { else { // all possible likelihoods for a given cell from which to choose the max final int numPaths = set.ACsetIndexToPLIndex.size() + 1; - final double[] log10ConformationLikelihoods = new double[numPaths]; // TODO can be created just once, since you initialize it + final double[][] log10ConformationLikelihoods = new double[set.log10Likelihoods.length][numPaths]; // TODO can be created just once, since you initialize it + // initialize + for ( int i = 0; i < set.log10Likelihoods.length; i++ ) + for ( int j = 0; j < numPaths; j++ ) + // TODO -- Arrays.fill? + // todo -- is this even necessary? Why not have as else below? + log10ConformationLikelihoods[i][j] = Double.NEGATIVE_INFINITY; - for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { - final double[] gl = genotypeLikelihoods.get(j); - final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; + // deal with the non-AA possible conformations + int conformationIndex = 1; + for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) { + //if ( DEBUG ) + // System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey()); - // initialize - for ( int i = 0; i < numPaths; i++ ) - // TODO -- Arrays.fill? - // todo -- is this even necessary? Why not have as else below? - log10ConformationLikelihoods[i] = Double.NEGATIVE_INFINITY; + ExactACset dependent = indexesToACset.get(mapping.getKey()); - // deal with the AA case first - if ( totalK < 2*j-1 ) - log10ConformationLikelihoods[0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX]; + for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { + final double[] gl = genotypeLikelihoods.get(j); - // deal with the other possible conformations now - if ( totalK <= 2*j ) { // skip impossible conformations - int conformationIndex = 1; - for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) { - if ( DEBUG ) - System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey()); - log10ConformationLikelihoods[conformationIndex++] = - determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + indexesToACset.get(mapping.getKey()).log10Likelihoods[j-1] + gl[mapping.getValue()]; + if ( totalK <= 2*j ) { // skip impossible conformations + log10ConformationLikelihoods[j][conformationIndex] = + determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + dependent.log10Likelihoods[j-1] + gl[mapping.getValue()]; } } - final double log10Max = MathUtils.approximateLog10SumLog10(log10ConformationLikelihoods); + conformationIndex++; + } - // finally, update the L(j,k) value + // finally, deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value + for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { + final double[] gl = genotypeLikelihoods.get(j); + + if ( totalK < 2*j-1 ) + log10ConformationLikelihoods[j][0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX]; + + final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; + final double log10Max = MathUtils.approximateLog10SumLog10(log10ConformationLikelihoods[j]); set.log10Likelihoods[j] = log10Max - logDenominator; } } @@ -523,7 +530,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { lastK = k; maxLog10L = Math.max(maxLog10L, log10LofK); if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { - if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L); + //if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L); done = true; } From f5f5ed5dcdb96752d37d00450cb0fcfc8369bf62 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 12 Jan 2012 08:50:03 -0500 Subject: [PATCH 04/36] Don't initialize the cell conformation values (use an else in the loop instead) as per Mark's TODO --- .../genotyper/ExactAFCalculationModel.java | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index e54ab1fef..986b2a800 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -324,12 +324,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // all possible likelihoods for a given cell from which to choose the max final int numPaths = set.ACsetIndexToPLIndex.size() + 1; final double[][] log10ConformationLikelihoods = new double[set.log10Likelihoods.length][numPaths]; // TODO can be created just once, since you initialize it - // initialize - for ( int i = 0; i < set.log10Likelihoods.length; i++ ) - for ( int j = 0; j < numPaths; j++ ) - // TODO -- Arrays.fill? - // todo -- is this even necessary? Why not have as else below? - log10ConformationLikelihoods[i][j] = Double.NEGATIVE_INFINITY; // deal with the non-AA possible conformations int conformationIndex = 1; @@ -340,12 +334,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { ExactACset dependent = indexesToACset.get(mapping.getKey()); for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { - final double[] gl = genotypeLikelihoods.get(j); if ( totalK <= 2*j ) { // skip impossible conformations - log10ConformationLikelihoods[j][conformationIndex] = + final double[] gl = genotypeLikelihoods.get(j); + log10ConformationLikelihoods[j][conformationIndex] = determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + dependent.log10Likelihoods[j-1] + gl[mapping.getValue()]; - } + } else { + log10ConformationLikelihoods[j][conformationIndex] = Double.NEGATIVE_INFINITY; + } } conformationIndex++; @@ -353,10 +349,13 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // finally, deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { - final double[] gl = genotypeLikelihoods.get(j); - if ( totalK < 2*j-1 ) + if ( totalK < 2*j-1 ) { + final double[] gl = genotypeLikelihoods.get(j); log10ConformationLikelihoods[j][0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX]; + } else { + log10ConformationLikelihoods[j][0] = Double.NEGATIVE_INFINITY; + } final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; final double log10Max = MathUtils.approximateLog10SumLog10(log10ConformationLikelihoods[j]); From e7fe9910f7d8e3df40f2848231dfadf301f09424 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 12 Jan 2012 10:27:10 -0500 Subject: [PATCH 05/36] Create the temp storage for calculating cell values just once as per Mark's TODO --- .../genotyper/ExactAFCalculationModel.java | 31 +++++++++++-------- .../broadinstitute/sting/utils/MathUtils.java | 26 +++++++++++----- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 986b2a800..1594c92cb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -177,12 +177,18 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { ACqueue.add(zeroSet); indexesToACset.put(zeroSet.ACcounts, zeroSet); + // optimization: create the temporary storage for computing L(j,k) just once + final int maxPossibleDependencies = numAlternateAlleles + (numAlternateAlleles * (numAlternateAlleles + 1) / 2) + 1; + final double[][] tempLog10ConformationLikelihoods = new double[numSamples+1][maxPossibleDependencies]; + for ( int i = 0; i < maxPossibleDependencies; i++ ) + tempLog10ConformationLikelihoods[0][i] = Double.NEGATIVE_INFINITY; + // keep processing while we have AC conformations that need to be calculated double maxLog10L = Double.NEGATIVE_INFINITY; while ( !ACqueue.isEmpty() ) { // compute log10Likelihoods final ExactACset set = ACqueue.remove(); - final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result); + final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result, tempLog10ConformationLikelihoods); // adjust max likelihood seen if needed maxLog10L = Math.max(maxLog10L, log10LofKs); @@ -197,13 +203,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final Queue ACqueue, final HashMap indexesToACset, final double[][] log10AlleleFrequencyPriors, - final AlleleFrequencyCalculationResult result) { + final AlleleFrequencyCalculationResult result, + final double[][] tempLog10ConformationLikelihoods) { //if ( DEBUG ) // System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts); // compute the log10Likelihoods - computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, result); + computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, result, tempLog10ConformationLikelihoods); // clean up memory if ( !preserveData ) { @@ -309,7 +316,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final ArrayList genotypeLikelihoods, final HashMap indexesToACset, final double[][] log10AlleleFrequencyPriors, - final AlleleFrequencyCalculationResult result) { + final AlleleFrequencyCalculationResult result, + final double[][] tempLog10ConformationLikelihoods) { set.log10Likelihoods[0] = 0.0; // the zero case final int totalK = set.getACsum(); @@ -321,10 +329,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } // k > 0 for at least one k else { - // all possible likelihoods for a given cell from which to choose the max - final int numPaths = set.ACsetIndexToPLIndex.size() + 1; - final double[][] log10ConformationLikelihoods = new double[set.log10Likelihoods.length][numPaths]; // TODO can be created just once, since you initialize it - // deal with the non-AA possible conformations int conformationIndex = 1; for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) { @@ -337,10 +341,10 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { if ( totalK <= 2*j ) { // skip impossible conformations final double[] gl = genotypeLikelihoods.get(j); - log10ConformationLikelihoods[j][conformationIndex] = + tempLog10ConformationLikelihoods[j][conformationIndex] = determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + dependent.log10Likelihoods[j-1] + gl[mapping.getValue()]; } else { - log10ConformationLikelihoods[j][conformationIndex] = Double.NEGATIVE_INFINITY; + tempLog10ConformationLikelihoods[j][conformationIndex] = Double.NEGATIVE_INFINITY; } } @@ -348,17 +352,18 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } // finally, deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value + final int numPaths = set.ACsetIndexToPLIndex.size() + 1; for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { if ( totalK < 2*j-1 ) { final double[] gl = genotypeLikelihoods.get(j); - log10ConformationLikelihoods[j][0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX]; + tempLog10ConformationLikelihoods[j][0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX]; } else { - log10ConformationLikelihoods[j][0] = Double.NEGATIVE_INFINITY; + tempLog10ConformationLikelihoods[j][0] = Double.NEGATIVE_INFINITY; } final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; - final double log10Max = MathUtils.approximateLog10SumLog10(log10ConformationLikelihoods[j]); + final double log10Max = MathUtils.approximateLog10SumLog10(tempLog10ConformationLikelihoods[j], numPaths); set.log10Likelihoods[j] = log10Max - logDenominator; } } diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 5ffd634cc..408faf61a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -63,14 +63,18 @@ public class MathUtils { return (d > 0) ? (int)(d + 0.5d) : (int)(d - 0.5d); } - public static double approximateLog10SumLog10(double[] vals) { + public static double approximateLog10SumLog10(final double[] vals) { + return approximateLog10SumLog10(vals, vals.length); + } - final int maxElementIndex = MathUtils.maxElementIndex(vals); + public static double approximateLog10SumLog10(final double[] vals, final int endIndex) { + + final int maxElementIndex = MathUtils.maxElementIndex(vals, endIndex); double approxSum = vals[maxElementIndex]; if ( approxSum == Double.NEGATIVE_INFINITY ) return approxSum; - for ( int i = 0; i < vals.length; i++ ) { + for ( int i = 0; i < endIndex; i++ ) { if ( i == maxElementIndex || vals[i] == Double.NEGATIVE_INFINITY ) continue; @@ -582,11 +586,15 @@ public class MathUtils { return normalizeFromLog10(array, false); } - public static int maxElementIndex(double[] array) { + public static int maxElementIndex(final double[] array) { + return maxElementIndex(array, array.length); + } + + public static int maxElementIndex(final double[] array, final int endIndex) { if (array == null) throw new IllegalArgumentException("Array cannot be null!"); int maxI = -1; - for (int i = 0; i < array.length; i++) { + for (int i = 0; i < endIndex; i++) { if (maxI == -1 || array[i] > array[maxI]) maxI = i; } @@ -594,11 +602,15 @@ public class MathUtils { return maxI; } - public static int maxElementIndex(int[] array) { + public static int maxElementIndex(final int[] array) { + return maxElementIndex(array, array.length); + } + + public static int maxElementIndex(final int[] array, int endIndex) { if (array == null) throw new IllegalArgumentException("Array cannot be null!"); int maxI = -1; - for (int i = 0; i < array.length; i++) { + for (int i = 0; i < endIndex; i++) { if (maxI == -1 || array[i] > array[maxI]) maxI = i; } From cd43f016ce1b7799a52bb105ea15afe420f86fcc Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Thu, 12 Jan 2012 13:29:11 -0500 Subject: [PATCH 06/36] Fixed NPE in getNextOverlappingBAMScheduleEntry() when mixed mapped/unmapped interval lists are used. Added integrationtest to verify behavior. --- .../gatk/datasources/reads/BAMScheduler.java | 2 +- .../interval/IntervalIntegrationTest.java | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java index dca4cc771..bcb726607 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java @@ -256,7 +256,7 @@ public class BAMScheduler implements Iterator { // Naive algorithm: find all elements in current contig for proper schedule creation. List lociInContig = new LinkedList(); for(GenomeLoc locus: loci) { - if(dataSource.getHeader().getSequence(locus.getContig()).getSequenceIndex() == lastReferenceSequenceLoaded) + if(!GenomeLoc.isUnmapped(locus) && dataSource.getHeader().getSequence(locus.getContig()).getSequenceIndex() == lastReferenceSequenceLoaded) lociInContig.add(locus); } diff --git a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java index 3fb330853..a8364419d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java @@ -83,6 +83,28 @@ public class IntervalIntegrationTest extends WalkerTest { executeTest("testUnmappedReadInclusion",spec); } + @Test + public void testMixedMappedAndUnmapped() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T PrintReads" + + " -I " + validationDataLocation + "MV1994.bam" + + " -R " + validationDataLocation + "Escherichia_coli_K12_MG1655.fasta" + + " -L Escherichia_coli_K12:4630000-4639675" + + " -L unmapped" + + " -U", + 0, // two output files + Collections.emptyList()); + + // our base file + File baseOutputFile = createTempFile("testUnmappedReadInclusion",".bam"); + spec.setOutputFileLocation(baseOutputFile); + spec.addAuxFile("083ef1e9ded868e0d12c05a1354c0319",createTempFileFromBase(baseOutputFile.getAbsolutePath())); + spec.addAuxFile("fa90ff91ac0cc689c71a3460a3530b8b", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai")); + + executeTest("testUnmappedReadInclusion",spec); + } + + @Test(enabled = false) public void testUnmappedReadExclusion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( From 28aa3535013ff99b1317a026c520697c1e0cd428 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 12 Jan 2012 15:07:11 -0500 Subject: [PATCH 07/36] Added "unbiased" downsampling parameter to PrintReads * also cleaned up and updated part of the unit tests for print reads. Needs a more thorough cleaning. --- .../sting/gatk/walkers/PrintReadsWalker.java | 35 ++++++++++++++++--- .../walkers/PrintReadsWalkerUnitTest.java | 34 +++++------------- 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index ac69738d3..f029b768e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -30,11 +30,13 @@ import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; import java.io.File; import java.util.Collection; +import java.util.Random; import java.util.Set; import java.util.TreeSet; @@ -70,12 +72,21 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; * -I input2.bam \ * --read_filter MappingQualityZero * + * // Prints the first 2000 reads in the BAM file * java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ * -T PrintReads \ * -o output.bam \ * -I input.bam \ * -n 2000 + * + * // Downsamples BAM file to 25% + * java -Xmx2g -jar GenomeAnalysisTK.jar \ + * -R ref.fasta \ + * -T PrintReads \ + * -o output.bam \ + * -I input.bam \ + * -ds 0.25 * * */ @@ -95,9 +106,18 @@ public class PrintReadsWalker extends ReadWalker { @Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false) String platform = null; + /** + * Only prints the first n reads of the file + */ @Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false) int nReadsToPrint = -1; + /** + * Downsamples the bam file by the given ratio, printing only approximately the given percentage of reads. The downsampling is balanced (over the entire coverage) + */ + @Argument(fullName = "downsample_coverage", shortName = "ds", doc="Downsample BAM to desired coverage", required = false) + public double downsampleRatio = 1.0; + /** * Only reads from samples listed in the provided file(s) will be included in the output. */ @@ -112,6 +132,8 @@ public class PrintReadsWalker extends ReadWalker { private TreeSet samplesToChoose = new TreeSet(); private boolean SAMPLES_SPECIFIED = false; + + Random random; /** * The initialize function. @@ -132,13 +154,15 @@ public class PrintReadsWalker extends ReadWalker { if(!samplesToChoose.isEmpty()) { SAMPLES_SPECIFIED = true; } + + random = GenomeAnalysisEngine.getRandomGenerator(); } /** * The reads filter function. * - * @param ref the reference bases that correspond to our read, if a reference was provided + * @param ref the reference bases that correspond to our read, if a reference was provided * @param read the read itself, as a SAMRecord * @return true if the read passes the filter, false if it doesn't */ @@ -188,12 +212,13 @@ public class PrintReadsWalker extends ReadWalker { * @return the read itself */ public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) { - return read; + return (random.nextDouble() < downsampleRatio) ? read : null; } /** * reduceInit is called once before any calls to the map function. We use it here to setup the output * bam file, if it was specified on the command line + * * @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise */ public SAMFileWriter reduceInit() { @@ -202,12 +227,14 @@ public class PrintReadsWalker extends ReadWalker { /** * given a read and a output location, reduce by emitting the read - * @param read the read itself + * + * @param read the read itself * @param output the output source * @return the SAMFileWriter, so that the next reduce can emit to the same source */ public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) { - output.addAlignment(read); + if (read != null) + output.addAlignment(read); return output; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java index 8cd10048a..484641981 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java @@ -60,19 +60,23 @@ public class PrintReadsWalkerUnitTest extends BaseTest { private ReferenceContext bases = null; //private ReferenceContext ref = new ReferenceContext() + PrintReadsWalker walker; + ArtificialSAMFileWriter writer; + @BeforeMethod public void before() { trav = new ArtificialReadsTraversal(); readTotal = ( ( trav.endingChr - trav.startingChr ) + 1 ) * trav.readsPerChr + trav.unMappedReads; + + walker = new PrintReadsWalker(); + writer = new ArtificialSAMFileWriter(); + walker.out = writer; + walker.initialize(); } /** test that we get out the same number of reads we put in */ @Test public void testReadCount() { - PrintReadsWalker walker = new PrintReadsWalker(); - ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter(); - walker.out = writer; - trav.traverse(walker, null, writer); assertEquals(writer.getRecords().size(), readTotal); } @@ -80,10 +84,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest { /** test that we're ok with a null read */ @Test public void testNullRead() { - PrintReadsWalker walker = new PrintReadsWalker(); - ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter(); - walker.out = writer; - SAMRecord rec = walker.map(bases, null, null); assertTrue(rec == null); } @@ -91,10 +91,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest { /** tes that we get the read we put into the map function */ @Test public void testReturnRead() { - PrintReadsWalker walker = new PrintReadsWalker(); - ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter(); - walker.out = writer; - SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000); GATKSAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50); SAMRecord ret = walker.map(bases, rec, null); @@ -102,20 +98,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest { assertTrue(ret.getReadName().equals(rec.getReadName())); } - /** test that the read makes it to the output source */ - @Test - public void testReducingRead() { - PrintReadsWalker walker = new PrintReadsWalker(); - ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter(); - walker.out = writer; - - SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000); - SAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50); - SAMRecord ret = walker.map(bases, null,null); - walker.reduce(ret,writer); - - assertTrue(writer.getRecords().size() == 1); - } } From b06074d6e74f576f549a6a00411cc026640183d1 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 13 Jan 2012 09:24:13 -0500 Subject: [PATCH 08/36] Updated SortingVCFWriterBase to use PriorityBlockingQueue so that the class is thread-safe -- Uses PriorityBlockingQueue instead of PriorityQueue -- synchronized keywords added to all key functions that modify internal state Note that this hasn't been tested extensivesly. Based on report: http://getsatisfaction.com/gsa/topics/missing_loci_output_in_multi_thread_mode_when_implement_sortingvcfwriterbase?utm_content=topic_link&utm_medium=email&utm_source=new_topic --- .../codecs/vcf/SortingVCFWriterBase.java | 100 ++++++++++++------ 1 file changed, 65 insertions(+), 35 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java index c299511db..84ecc7fcd 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java @@ -27,10 +27,8 @@ package org.broadinstitute.sting.utils.codecs.vcf; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Comparator; -import java.util.PriorityQueue; -import java.util.Set; -import java.util.TreeSet; +import java.util.*; +import java.util.concurrent.PriorityBlockingQueue; /** * This class writes VCF files, allowing records to be passed in unsorted. @@ -39,20 +37,26 @@ import java.util.TreeSet; public abstract class SortingVCFWriterBase implements VCFWriter { // The VCFWriter to which to actually write the sorted VCF records - private VCFWriter innerWriter = null; + private final VCFWriter innerWriter; // the current queue of un-emitted records - private PriorityQueue queue = null; + private final Queue queue; // The locus until which we are permitted to write out (inclusive) protected Integer mostUpstreamWritableLoc; protected static final int BEFORE_MOST_UPSTREAM_LOC = 0; // No real locus index is <= 0 // The set of chromosomes already passed over and to which it is forbidden to return - private Set finishedChromosomes = null; + private final Set finishedChromosomes; // Should we call innerWriter.close() in close() - private boolean takeOwnershipOfInner; + private final boolean takeOwnershipOfInner; + + // -------------------------------------------------------------------------------- + // + // Constructors + // + // -------------------------------------------------------------------------------- /** * create a local-sorting VCF writer, given an inner VCF writer to write to @@ -62,16 +66,27 @@ public abstract class SortingVCFWriterBase implements VCFWriter { */ public SortingVCFWriterBase(VCFWriter innerWriter, boolean takeOwnershipOfInner) { this.innerWriter = innerWriter; - this.queue = new PriorityQueue(50, new VariantContextComparator()); - this.mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC; this.finishedChromosomes = new TreeSet(); this.takeOwnershipOfInner = takeOwnershipOfInner; + + // has to be PriorityBlockingQueue to be thread-safe + // see http://getsatisfaction.com/gsa/topics/missing_loci_output_in_multi_thread_mode_when_implement_sortingvcfwriterbase?utm_content=topic_link&utm_medium=email&utm_source=new_topic + this.queue = new PriorityBlockingQueue(50, new VariantContextComparator()); + + this.mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC; } public SortingVCFWriterBase(VCFWriter innerWriter) { this(innerWriter, false); // by default, don't own inner } + // -------------------------------------------------------------------------------- + // + // public interface functions + // + // -------------------------------------------------------------------------------- + + @Override public void writeHeader(VCFHeader header) { innerWriter.writeHeader(header); } @@ -79,6 +94,7 @@ public abstract class SortingVCFWriterBase implements VCFWriter { /** * attempt to close the VCF file; we need to flush the queue first */ + @Override public void close() { stopWaitingToSort(); @@ -86,27 +102,14 @@ public abstract class SortingVCFWriterBase implements VCFWriter { innerWriter.close(); } - private void stopWaitingToSort() { - emitRecords(true); - mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC; - } - - protected void emitSafeRecords() { - emitRecords(false); - } - - protected void noteCurrentRecord(VariantContext vc) { - // did the user break the contract by giving a record too late? - if (mostUpstreamWritableLoc != null && vc.getStart() < mostUpstreamWritableLoc) // went too far back, since may have already written anything that is <= mostUpstreamWritableLoc - throw new IllegalArgumentException("Permitted to write any record upstream of position " + mostUpstreamWritableLoc + ", but a record at " + vc.getChr() + ":" + vc.getStart() + " was just added."); - } /** * add a record to the file * * @param vc the Variant Context object */ - public void add(VariantContext vc) { + @Override + public synchronized void add(VariantContext vc) { /* Note that the code below does not prevent the successive add()-ing of: (chr1, 10), (chr20, 200), (chr15, 100) since there is no implicit ordering of chromosomes: */ @@ -125,7 +128,43 @@ public abstract class SortingVCFWriterBase implements VCFWriter { emitSafeRecords(); } - private void emitRecords(boolean emitUnsafe) { + /** + * Gets a string representation of this object. + * @return a string representation of this object + */ + @Override + public String toString() { + return getClass().getName(); + } + + // -------------------------------------------------------------------------------- + // + // protected interface functions for subclasses to use + // + // -------------------------------------------------------------------------------- + + private synchronized void stopWaitingToSort() { + emitRecords(true); + mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC; + } + + protected synchronized void emitSafeRecords() { + emitRecords(false); + } + + protected void noteCurrentRecord(VariantContext vc) { + // did the user break the contract by giving a record too late? + if (mostUpstreamWritableLoc != null && vc.getStart() < mostUpstreamWritableLoc) // went too far back, since may have already written anything that is <= mostUpstreamWritableLoc + throw new IllegalArgumentException("Permitted to write any record upstream of position " + mostUpstreamWritableLoc + ", but a record at " + vc.getChr() + ":" + vc.getStart() + " was just added."); + } + + // -------------------------------------------------------------------------------- + // + // private implementation functions + // + // -------------------------------------------------------------------------------- + + private synchronized void emitRecords(boolean emitUnsafe) { while (!queue.isEmpty()) { VCFRecord firstRec = queue.peek(); @@ -140,15 +179,6 @@ public abstract class SortingVCFWriterBase implements VCFWriter { } } - /** - * Gets a string representation of this object. - * @return a string representation of this object - */ - @Override - public String toString() { - return getClass().getName(); - } - private static class VariantContextComparator implements Comparator { public int compare(VCFRecord r1, VCFRecord r2) { return r1.vc.getStart() - r2.vc.getStart(); From cec7107762937832f4014f3df0830a45da4757d9 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Sat, 14 Jan 2012 12:13:15 -0500 Subject: [PATCH 12/36] Better location for the downsampling of reads in PrintReads * using the filter() instead of map() makes for a cleaner walker. * renaming the unit tests to make more sense with the other unit and integration tests --- .../sting/gatk/walkers/PrintReadsWalker.java | 19 +++++++++---------- ...rUnitTest.java => PrintReadsUnitTest.java} | 15 +++++++-------- 2 files changed, 16 insertions(+), 18 deletions(-) rename public/java/test/org/broadinstitute/sting/gatk/walkers/{PrintReadsWalkerUnitTest.java => PrintReadsUnitTest.java} (97%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index f029b768e..0702b08c1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -31,8 +31,11 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.File; import java.util.Collection; @@ -40,10 +43,6 @@ import java.util.Random; import java.util.Set; import java.util.TreeSet; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - /** * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file. * @@ -201,18 +200,19 @@ public class PrintReadsWalker extends ReadWalker { nReadsToPrint--; // n > 0 means there are still reads to be printed. } - return true; - } + // if downsample option is turned off (= 1) then don't waste time getting the next random number. + return (downsampleRatio == 1 || random.nextDouble() < downsampleRatio); + } /** * The reads map function. * - * @param ref the reference bases that correspond to our read, if a reference was provided + * @param ref the reference bases that correspond to our read, if a reference was provided * @param read the read itself, as a SAMRecord * @return the read itself */ public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) { - return (random.nextDouble() < downsampleRatio) ? read : null; + return read; } /** @@ -233,8 +233,7 @@ public class PrintReadsWalker extends ReadWalker { * @return the SAMFileWriter, so that the next reduce can emit to the same source */ public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) { - if (read != null) - output.addAlignment(read); + output.addAlignment(read); return output; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsUnitTest.java similarity index 97% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java rename to public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsUnitTest.java index 484641981..0fcaad3bf 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsUnitTest.java @@ -1,20 +1,19 @@ package org.broadinstitute.sting.gatk.walkers; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.sam.ArtificialReadsTraversal; import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileHeader; - -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; - import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + /* * Copyright (c) 2009 The Broad Institute @@ -44,11 +43,11 @@ import org.testng.annotations.Test; /** * @author aaron *

- * Class PrintReadsWalkerUnitTest + * Class PrintReadsUnitTest *

* This tests the print reads walker, using the artificial reads traversal */ -public class PrintReadsWalkerUnitTest extends BaseTest { +public class PrintReadsUnitTest extends BaseTest { /** * our private fake reads traversal. This traversal seeds the From 3ba918aff1f7cfa05fe2c87852cd164d9a1875cf Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Tue, 17 Jan 2012 11:05:42 -0500 Subject: [PATCH 17/36] Error message cleanup in BAM indexing code. --- .../sting/gatk/datasources/reads/GATKBAMIndex.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java index dc703ff23..244438a59 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java @@ -88,7 +88,7 @@ public class GATKBAMIndex { seek(0); final byte[] buffer = readBytes(4); if (!Arrays.equals(buffer, BAM_INDEX_MAGIC)) { - throw new RuntimeException("Invalid file header in BAM index " + mFile + + throw new ReviewedStingException("Invalid file header in BAM index " + mFile + ": " + new String(buffer)); } @@ -112,7 +112,7 @@ public class GATKBAMIndex { openIndexFile(); if (referenceSequence >= sequenceCount) - throw new ReviewedStingException("Invalid sequence number " + referenceSequence); + throw new ReviewedStingException("Invalid sequence number " + referenceSequence + " in index file " + mFile); skipToSequence(referenceSequence); @@ -183,12 +183,12 @@ public class GATKBAMIndex { public int getLevelForBin(final Bin bin) { GATKBin gatkBin = new GATKBin(bin); if(gatkBin.getBinNumber() >= MAX_BINS) - throw new SAMException("Tried to get level for invalid bin."); + throw new ReviewedStingException("Tried to get level for invalid bin in index file " + mFile); for(int i = getNumIndexLevels()-1; i >= 0; i--) { if(gatkBin.getBinNumber() >= LEVEL_STARTS[i]) return i; } - throw new SAMException("Unable to find correct bin for bin "+bin); + throw new ReviewedStingException("Unable to find correct bin for bin " + bin + " in index file " + mFile); } /** @@ -352,7 +352,7 @@ public class GATKBAMIndex { fileChannel.read(buffer); } catch(IOException ex) { - throw new ReviewedStingException("Index: unable to read bytes from index file."); + throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile); } } @@ -379,7 +379,7 @@ public class GATKBAMIndex { fileChannel.position(fileChannel.position() + count); } catch(IOException ex) { - throw new ReviewedStingException("Index: unable to reposition file channel."); + throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile); } } @@ -388,7 +388,7 @@ public class GATKBAMIndex { fileChannel.position(position); } catch(IOException ex) { - throw new ReviewedStingException("Index: unable to reposition of file channel."); + throw new ReviewedStingException("Index: unable to reposition of file channel of index file " + mFile); } } From cde224746f1c4ce58862ef9891582eca671f2d71 Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Tue, 17 Jan 2012 13:51:05 -0500 Subject: [PATCH 18/36] Bait Redesign supports baits that overlap, by picking only the start of intervals. CalibrateGenotypeLikelihoods supports using an external VCF as input for genotype likelihoods. Currently can be a per-sample VCF, but has un-implemented methods for allowing a read-group VCF to be used. Removed the old constrained genotyping code from UGE -- the trellis calculated is exactly the same as that done in the MLE AC estimate; so we should just re-use that one. --- .../genotyper/UnifiedGenotyperEngine.java | 167 ------------------ 1 file changed, 167 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 5d73e8d28..ee5aed3e5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -858,171 +858,4 @@ public class UnifiedGenotyperEngine { return calls; } - - /** - * @param vc variant context with genotype likelihoods - * @param allelesToUse bit vector describing which alternate alleles from the vc are okay to use - * @param exactAC integer array describing the AC from the exact model for the corresponding alleles - * @return genotypes - */ - public static GenotypesContext constrainedAssignGenotypes(VariantContext vc, boolean[] allelesToUse, int[] exactAC ) { - - final GenotypesContext GLs = vc.getGenotypes(); - - // samples - final List sampleIndices = GLs.getSampleNamesOrderedByName(); - - // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward - final int numOriginalAltAlleles = allelesToUse.length; - final List newAlleles = new ArrayList(numOriginalAltAlleles+1); - newAlleles.add(vc.getReference()); - final HashMap alleleIndexMap = new HashMap(); // need this for skipping dimensions - int[] alleleCount = new int[exactAC.length]; - for ( int i = 0; i < numOriginalAltAlleles; i++ ) { - if ( allelesToUse[i] ) { - newAlleles.add(vc.getAlternateAllele(i)); - alleleIndexMap.put(vc.getAlternateAllele(i),i); - alleleCount[i] = exactAC[i]; - } else { - alleleCount[i] = 0; - } - } - final List newAltAlleles = newAlleles.subList(1,newAlleles.size()); - final int numNewAltAlleles = newAltAlleles.size(); - ArrayList likelihoodIndexesToUse = null; - - // an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles, - // then we can keep the PLs as is; otherwise, we determine which ones to keep - final int[][] PLcache; - if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) { - likelihoodIndexesToUse = new ArrayList(30); - PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles]; - - for ( int PLindex = 0; PLindex < PLcache.length; PLindex++ ) { - int[] alleles = PLcache[PLindex]; - // consider this entry only if both of the alleles are good - if ( (alleles[0] == 0 || allelesToUse[alleles[0] - 1]) && (alleles[1] == 0 || allelesToUse[alleles[1] - 1]) ) - likelihoodIndexesToUse.add(PLindex); - } - } else { - PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles]; - } - - // set up the trellis dimensions - // SAMPLE x alt 1 x alt 2 x alt 3 - // todo -- check that exactAC has alt counts at [1],[2],[3] (and not [0],[1],[2]) - double[][][][] transitionTrellis = new double[sampleIndices.size()+1][exactAC[1]][exactAC[2]][exactAC[3]]; - // N x AC1 x AC2 x AC3; worst performance in multi-allelic where all alleles are moderate frequency - // capped at the MLE ACs* - // todo -- there's an optimization: not all states in the rectangular matrix will be reached, in fact - // todo -- for tT[0] we only care about tT[0][0][0][0], and for tT[1], only combinations of 0,1,2. - int idx = 1; // index of which sample we're on - int prevMaxState = 0; // the maximum state (e.g. AC) reached by the previous sample. Symmetric. (AC capping handled by logic in loop) - // iterate over each sample - for ( String sample : sampleIndices ) { - // push the likelihoods into the next possible states, that is to say - // L[state] = L[prev state] + L[genotype getting into state] - // iterate over each previous state, by dimension - // and contribute the likelihoods for transitions to this state - double[][][] prevState = transitionTrellis[idx-1]; - double[][][] thisState = transitionTrellis[idx]; - Genotype genotype = GLs.get(sample); - if ( genotype.isNoCall() || genotype.isFiltered() ) { - thisState = prevState.clone(); - } else { - double[] likelihoods = genotype.getLikelihoods().getAsVector(); - int dim1min = Math.max(0, alleleCount[0]-2*(sampleIndices.size()-idx+1)); - int dim1max = Math.min(prevMaxState,alleleCount[0]); - int dim2min = Math.max(0,alleleCount[1]-2*(sampleIndices.size()-idx+1)); - int dim2max = Math.min(prevMaxState,alleleCount[1]); - int dim3min = Math.max(0,alleleCount[2]-2*(sampleIndices.size()-idx+1)); - int dim3max = Math.min(prevMaxState,alleleCount[2]); - // cue annoying nested for loop - for ( int a1 = dim1min ; a1 <= dim1max; a1++ ) { - for ( int a2 = dim2min; a2 <= dim2max; a2++ ) { - for ( int a3 = dim3min; a3 <= dim3max; a3++ ) { - double base = prevState[a1][a2][a3]; - for ( int likIdx : likelihoodIndexesToUse ) { - int[] offsets = calculateOffsets(PLcache[likIdx]); - thisState[a1+offsets[1]][a2+offsets[2]][a3+offsets[3]] = base + likelihoods[likIdx]; - } - } - } - } - prevMaxState += 2; - } - idx++; - } - - // after all that pain, we have a fully calculated trellis. Now just march backwards from the EAC state and - // assign genotypes along the greedy path - - GenotypesContext calls = GenotypesContext.create(sampleIndices.size()); - int[] state = alleleCount; - for ( String sample : Utils.reverse(sampleIndices) ) { - --idx; - // the next state will be the maximum achievable state - Genotype g = GLs.get(sample); - if ( g.isNoCall() || ! g.hasLikelihoods() ) { - calls.add(g); - continue; - } - - // subset to the new likelihoods. These are not used except for subsetting in the context iself. - // i.e. they are not a part of the calculation. - final double[] originalLikelihoods = GLs.get(sample).getLikelihoods().getAsVector(); - double[] newLikelihoods; - if ( likelihoodIndexesToUse == null ) { - newLikelihoods = originalLikelihoods; - } else { - newLikelihoods = new double[likelihoodIndexesToUse.size()]; - int newIndex = 0; - for ( int oldIndex : likelihoodIndexesToUse ) - newLikelihoods[newIndex++] = originalLikelihoods[oldIndex]; - - // might need to re-normalize - newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true); - } - - // todo -- alter this. For ease of programming, likelihood indeces are - // todo -- used to iterate over achievable states. - double max = Double.NEGATIVE_INFINITY; - int[] bestState = null; - int[] bestAlleles = null; - int bestLikIdx = -1; - for ( int likIdx : likelihoodIndexesToUse ) { - int[] offsets = calculateOffsets(PLcache[likIdx]); - double val = transitionTrellis[idx-1][state[0]-offsets[0]][state[1]-offsets[1]][state[2]-offsets[2]]; - if ( val > max ) { - max = val; - bestState = new int[] { state[0]-offsets[0],state[1]-offsets[1],state[2]-offsets[2]}; - bestAlleles = PLcache[likIdx]; - bestLikIdx = likIdx; - } - } - state = bestState; - List gtAlleles = new ArrayList(2); - gtAlleles.add(newAlleles.get(bestAlleles[0])); - gtAlleles.add(newAlleles.get(bestAlleles[1])); - - final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(bestLikIdx, newLikelihoods); - Map attrs = new HashMap(g.getAttributes()); - if ( numNewAltAlleles == 0 ) - attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY); - else - attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods)); - calls.add(new Genotype(sample, gtAlleles, qual, null, attrs, false)); - - } - return calls; - } - - private static int[] calculateOffsets(int[] alleleIndeces) { - int[] offsets = new int[4]; - for ( int i = 0; i < alleleIndeces.length; i++ ) { - offsets[alleleIndeces[i]]++; - } - - return offsets; - } } From 41d70abe4e852de5cf3da11b80c8b5a0f509bbb3 Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Tue, 17 Jan 2012 14:47:53 -0500 Subject: [PATCH 21/36] At chartl's request, add the bwa aln -N and bwa aln -m parameters to the bindings. --- public/c/bwa/build_linux.sh | 2 +- public/c/bwa/bwa_gateway.cpp | 2 ++ public/c/bwa/bwa_gateway.h | 2 ++ ...tute_sting_alignment_bwa_c_BWACAligner.cpp | 36 +++++++++++++++++++ .../sting/alignment/bwa/BWAConfiguration.java | 10 ++++++ 5 files changed, 51 insertions(+), 1 deletion(-) diff --git a/public/c/bwa/build_linux.sh b/public/c/bwa/build_linux.sh index b3631a28d..8683bb377 100755 --- a/public/c/bwa/build_linux.sh +++ b/public/c/bwa/build_linux.sh @@ -1,5 +1,5 @@ #!/bin/sh -export BWA_HOME="/humgen/gsa-scr1/hanna/src/bwa-trunk/bwa" +export BWA_HOME="/humgen/gsa-scr1/hanna/src/bio-bwa/bwa" export JAVA_INCLUDE="/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include -I/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include/linux" export TARGET_LIB="libbwa.so" export EXTRA_LIBS="-lc -lz -lstdc++ -lpthread" diff --git a/public/c/bwa/bwa_gateway.cpp b/public/c/bwa/bwa_gateway.cpp index 00f5aa5bc..088ee43bf 100644 --- a/public/c/bwa/bwa_gateway.cpp +++ b/public/c/bwa/bwa_gateway.cpp @@ -233,6 +233,8 @@ void BWA::set_disallow_indel_within_range(int indel_range) { options.indel_end_s void BWA::set_mismatch_penalty(int penalty) { options.s_mm = penalty; } void BWA::set_gap_open_penalty(int penalty) { options.s_gapo = penalty; } void BWA::set_gap_extension_penalty(int penalty) { options.s_gape = penalty; } +void BWA::set_mode_nonstop() { options.mode |= BWA_MODE_NONSTOP; options.max_top2 = 0x7fffffff; } +void BWA::set_max_entries_in_queue(int max_entries) { options.max_entries = max_entries; } /** * Create a sequence with a set of reasonable initial defaults. diff --git a/public/c/bwa/bwa_gateway.h b/public/c/bwa/bwa_gateway.h index 2d26ec650..62756ec2a 100644 --- a/public/c/bwa/bwa_gateway.h +++ b/public/c/bwa/bwa_gateway.h @@ -60,6 +60,8 @@ class BWA { void set_mismatch_penalty(int penalty); void set_gap_open_penalty(int penalty); void set_gap_extension_penalty(int penalty); + void set_mode_nonstop(); + void set_max_entries_in_queue(int max_entries); // Perform the alignment Alignment* generate_single_alignment(const char* bases, diff --git a/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp index 1ccbef0d4..90d70d4a1 100644 --- a/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp +++ b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp @@ -8,11 +8,13 @@ #include "bwa_gateway.h" #include "org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h" +typedef void (BWA::*boolean_setter)(); typedef void (BWA::*int_setter)(int value); typedef void (BWA::*float_setter)(float value); static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment); static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name); +static void set_boolean_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, boolean_setter setter); static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter); static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter); static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message); @@ -100,6 +102,10 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner if(env->ExceptionCheck()) return; set_int_configuration_param(env, configuration, "gapExtensionPenalty", bwa, &BWA::set_gap_extension_penalty); if(env->ExceptionCheck()) return; + set_boolean_configuration_param(env, configuration, "nonStopMode", bwa, &BWA::set_mode_nonstop); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "maxEntriesInQueue", bwa, &BWA::set_max_entries_in_queue); + if(env->ExceptionCheck()) return; } JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) @@ -357,6 +363,36 @@ static jstring get_configuration_file(JNIEnv* env, jobject configuration, const return path; } +static void set_boolean_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, boolean_setter setter) { + jclass configuration_class = env->GetObjectClass(configuration); + if(configuration_class == NULL) return; + + jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Boolean;"); + if(configuration_field == NULL) return; + + jobject boxed_value = env->GetObjectField(configuration,configuration_field); + if(env->ExceptionCheck()) return; + + if(boxed_value != NULL) { + jclass boolean_box_class = env->FindClass("java/lang/Boolean"); + if(boolean_box_class == NULL) return; + + jmethodID boolean_extractor = env->GetMethodID(boolean_box_class,"booleanValue", "()Z"); + if(boolean_extractor == NULL) return; + + jboolean value = env->CallBooleanMethod(boxed_value,boolean_extractor); + if(env->ExceptionCheck()) return; + + if(value) + (bwa->*setter)(); + + env->DeleteLocalRef(boolean_box_class); + } + + env->DeleteLocalRef(boxed_value); + env->DeleteLocalRef(configuration_class); +} + static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter) { jclass configuration_class = env->GetObjectClass(configuration); if(configuration_class == NULL) return; diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java index 73441cb6a..e453c7f8a 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java @@ -41,4 +41,14 @@ public class BWAConfiguration { * What is the scoring penalty for a gap extension? */ public Integer gapExtensionPenalty = null; + + /** + * Enter bwa's 'non-stop' mode (equivalent to bwa aln -N parameter). + */ + public Boolean nonStopMode = false; + + /** + * Set the max queue size that bwa will use when searching for matches (equivalent to bwa aln -m parameter). + */ + public Integer maxEntriesInQueue = null; } From 75f87db468e27b51d124bf854eea7c7b30284d9f Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 17 Jan 2012 15:02:45 -0500 Subject: [PATCH 22/36] Replacing Mills file with new gold standard indel set in the resource bundle for release with v1.5 --- .../sting/queue/qscripts/GATKResourcesBundle.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 8c9063c29..22ac52453 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -134,8 +134,8 @@ class GATKResourcesBundle extends QScript { addResource(new Resource("/humgen/1kg/processing/official_release/phase1/ALL.wgs.VQSR_consensus_biallelic.20101123.indels.sites.vcf", "1000G_biallelic.indels", b37, true, false)) - addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf", - "Mills_Devine_2hit.indels", b37, true, true)) + addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf", + "Mills_and_1000G_gold_standard.indels", b37, true, true)) // // example call set for wiki tutorial From f2b0575deeb833b808a1aca04028da7733467621 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 16 Jan 2012 14:07:42 -0500 Subject: [PATCH 25/36] Detect unreasonably large allele strings (>2^16) and throw an error -- samtools can emit alleles where the ref is 42M Ns and this caused the GATK (via tribble) to hang in several places. -- Tribble was updated so we actually could read the line properly (rev. to 51 here). -- Still the parsing algorithms in the GATK aren't happy with such a long allele. Instead of optimizing the code around an improper use case I put in a limit of 2^16 bp for any allele, and throw a meaningful exception when encountered. --- .../utils/codecs/vcf/AbstractVCFCodec.java | 16 +++++++++++----- .../{tribble-46.jar => tribble-51.jar} | Bin 301252 -> 304586 bytes .../{tribble-46.xml => tribble-51.xml} | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) rename settings/repository/org.broad/{tribble-46.jar => tribble-51.jar} (87%) rename settings/repository/org.broad/{tribble-46.xml => tribble-51.xml} (51%) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index e44c10f1f..43c878b2f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -18,6 +18,7 @@ import java.util.zip.GZIPInputStream; public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { + public final static int MAX_EXPLICIT_ALLELE_SIZE = (int)Math.pow(2, 16); protected final static Logger log = Logger.getLogger(VCFCodec.class); protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column @@ -252,7 +253,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { // if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data) if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) || - (header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) ) + (header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) ) throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) + " tokens, and saw " + nParts + " )", lineNo); @@ -518,8 +519,11 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { * @param lineNo the line number for this record */ private static void checkAllele(String allele, boolean isRef, int lineNo) { - if ( allele == null || allele.length() == 0 ) - generateException("Empty alleles are not permitted in VCF records", lineNo); + if ( allele == null || allele.length() == 0 ) + generateException("Empty alleles are not permitted in VCF records", lineNo); + + if ( allele.length() > MAX_EXPLICIT_ALLELE_SIZE ) + generateException(String.format("Allele detected with length %d, exceeding max size %d. Please remove this from the VCF file before continuing", allele.length(), MAX_EXPLICIT_ALLELE_SIZE), lineNo); if ( isSymbolicAllele(allele) ) { if ( isRef ) { @@ -572,12 +576,13 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { public static int computeForwardClipping(List unclippedAlleles, String ref) { boolean clipping = true; + final byte ref0 = ref.getBytes()[0]; for ( Allele a : unclippedAlleles ) { if ( a.isSymbolic() ) continue; - if ( a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0]) ) { + if ( a.length() < 1 || (a.getBases()[0] != ref0) ) { clipping = false; break; } @@ -589,6 +594,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { protected static int computeReverseClipping(List unclippedAlleles, String ref, int forwardClipping, int lineNo) { int clipping = 0; boolean stillClipping = true; + final byte[] refBytes = ref.getBytes(); while ( stillClipping ) { for ( Allele a : unclippedAlleles ) { @@ -604,7 +610,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { stillClipping = false; else if ( ref.length() == clipping ) generateException("bad alleles encountered", lineNo); - else if ( a.getBases()[a.length()-clipping-1] != ref.getBytes()[ref.length()-clipping-1] ) + else if ( a.getBases()[a.length()-clipping-1] != refBytes[ref.length()-clipping-1] ) stillClipping = false; } if ( stillClipping ) diff --git a/settings/repository/org.broad/tribble-46.jar b/settings/repository/org.broad/tribble-51.jar similarity index 87% rename from settings/repository/org.broad/tribble-46.jar rename to settings/repository/org.broad/tribble-51.jar index 401fcfc3a92c558e6975e80ed948831447bb6bc2..04121804f22936341a786a08d97a58fc4293f595 100644 GIT binary patch delta 19167 zcmaic34ByV@_)S}c{7ujYeGU!l5l5o5+Fdh5-`Y-0|JN<6#@hZV8RhZ1RR1Kf+E3D zT0Bvs?4lwV$#DF{6WL`w&|Og!5p;JwRsoOoT3r6$di^GupugXLKA(QCUw3tNb#--h zRdpw??sXsgi#uZcI8ApEIf!1Y%PZO%(aZ5X-bX?|E#5k`Hm@LiMqc0Sz61MZ_sJ{k z+b_R=TJPe83m4R+jj5ZNlM(I;ceS_Yc3V$$(XIDS-L1DTP06w{E#nnKlSNbYi?V2H z$T(Hzb{VS_nagwzkzg(JO}Aav82*D@wjz3m-6FNq2sNC0+C5?X-OUc5}n`tRqwFqw0V|`8eVZQ14E;*#S{3&+s z&qE$pEdu^J^V?ND45A)}h~5qYE>uLfJU+a?Asd_?bwamtzWw9!tR9vOd?u$`2oa|| z?m0oS6%%mn=E<--CxiO+F2El1^DJ%BCEH;ta}jQ8@MC zM?Tl|bx?#0?|%Hz-$8DE7Z@}^A^nj^S4-2?a%c!0B+zkzSO*a71_m6eTKS zAc{&PbdB#2a#ClCCxsG3@oXh2HW7uP_#}D{RY{a=RV|vWBzuFaQUG9DU}4mSQt_oS zS$HNOKbka900>WJvY$ddDf`ISYr!y%2-FgtLN^~#7@a1@6^E9Lv8zQT=a^27x}j91 z?&zt9mwGZSD9FW|DGc#EN5)eYL_2L~Eqs!;5F4L^dQoq?@pz1xe|d*UuW~yy%}>r2 za#>$tx^8`$Au=AeySxyhj0U33R+cF~=*#h@k^Sh>|fcR9$%pm+24Jucctf^+v8w4ICZ zbJG3v0GB-IB=7z75Pv_+h<0${Bm8)j@jqrza~M5NPZ+e*pg%b1k1kBeE{1%Pfu3S) zPdjKgzn|e2dkorZ(6a`$bX@8RnPckeY8H=VZNh6b%39sfF(#DERlIO{rTdCckMi{| z>n0w#V6Tdl;8ii0mtR<*B^rteKdEuQs|=j8YG&5Yswu6XsZc>+y&Jxqb%xJZ*4y`e z*>c}IrKw@AhWhz6btCJS)-{N9p~m^{Dj(^p09(OKaLae9e2Ex&QA_q!RP^mWx2Ael z&EnG9x|$`{FrNck6pC1k;?aRBU$iIQeGRCbiQ1a zpnX@YB9mUFBPP8@M-BRuNq?qeCjEt8S18q&UY?p~(i`-qLHkU4i|S1}PH&s^4(I;L zkH68oCY_-7OnRSAnp8{k#I#z?(qYVBXXtY#eLx3A`d;j9Yu;C5e0e9j4N<65$>PQJ zi_;cXFJ4kxHz!Rpld1a9gkt!}gjV?2q%(97ODJbX%`B77(p-~@s8}4o)0PI`=*mpd zx0|i_!h&Q&M181wd`l{Oy6xQ~NJ;y`KTR`27u3|vX_(vcYGsY;bnu+Qc57i?M@`+VB~xk}=0dgQVnMP7PXDSlF= zx*LQN7itJ9)`0FRL{6$E2Mwkn_+lJGZ5(3&$*|!bKWU!KR`TlmD5M>eRGewhD(S$| z)xstfXSj_GZD|+{2R?|Lfi0!(g+&qwGct-BqH@u z=uQ}NN;lv!TFG%Ag(!P6-Og5W-AA#R?l3E3X1Yyewo@4Q7d}PV6BYS9MU-V`?Wf3L zDIvh3?8(yd3-v5Le?LV*i-$}v2#F0T2u%rTrm(CKE-eT>MBeDC1U)t+Hq=k;s`^l*nP~h< zZiA*M|D~=5U9Bh=si#3zR~dATevuMTRDz-yCHiMFVq?(jRJv9QE#=_ND^ri5ZkNmc z9YHHnw<%Y^?(1b+uaX23*^7P07Ikq3*dwb8Mu;qKnkv~-X#{td!Pm@)C=On(EC<^ z-3<$78@>eu=RQcJ2I*icY_dHt&Q(K76inAR zYjRlKOr)9ClrX1r2yBKPy9BdFH)s~-zlOODwH6f;atSTD6otqs2F(U~Y&4Qr&||C{ zp<6U*lC^7?r-C#LO9AUW)$ zGDfTcl~U-CFK1SiD?h;z1qG8aB`Yc{Kfx*U39#Oho*;njBwa6XB{&Z!I1cZia;+dN zHVj{n(#WI)XKWZFonBzZnh6g3L!VU;?#+%hW5Zbi#kyL_V`Fo!bzRy?p=({r+Az%R zAy&|GfsciiP4p~=&;qM^AEi+%7P%jM-47NVz|oH>y^dK!n+Ypd=HxvY~xHP&QYN9`uelJ=JWl@2(oI|VnVkuU@4Y&#@ zmZI8augB0!5E~an0YWeZ%g~@36-DN!_|m4vdY7R;9rBQXbW@vz48tt5gt)LRWV*3X zvM-mEH-aR^)iy2{lpZumR58}b&9=1tpL?2@B7+bGZ)z)SL`A=kDCcqo@qgmMU{+!{ z(4zQk)ITedB$a7ZWq3-3Qn>BN8dT~}X|2=)DPhaCoRY@QJa z4TyIlEg*673bm_m$8o2xey39+Sh+JNPUdz$zsZga3_0?-o$LL^A7n@3KO@jG?}Y2F z6E@O_6HDw!#=4U;WzP52ss0jIkB?rGWVL+x#n-YY--qAj%gX5QZ?-wH<%h-DR-~fT zckE6dl&mjA?7dd$BGTSbLd5u+^-%x7?#hE9Ry+U648?BfpOU4-bhDWB%EG4Be}R9T zvc*dql_as}CM8l_zd{cc-jS-)3tuOlu607duj9w{EnOw$7%>_xH zZO|NpY8|w|g#}jU!m6t`Xd%Nb;%18tTH>Gv7u?gO-0ns$T*lQm@nbn-z1g4@2Ca0! zk&A~Sg(sIrs|{*2XpKR)8no7+b^hY%aZ?fZQVgWRXJ~29+fbmLa z7k_Ux3I4Yzo~#T{&ZWCd>{z2r4jY*;2PT+Me`t0$-6P$EXC^Dl9J#d3#HKb{jPC+W z%pGy{Rmu~-yS9uC4Vk&1eo2jZWS15vj;>aEhyhcSIx%vj5-*Iam5pAGLufFMyCH56 zg4Ky3S*w&|00a#!-E=n(+A0Sjcsrq3*u+<0ZxFo<47_&^hAFOivi6bc$=nB{+>_}i z9Y%Q;8?6T@RHgFlqZGqmFfpxa%Vt5|8ZLb~HeRMXSQ}Sk$X%nSd4pKqWgLvAks$@aBv#lDE(xOg(S1 zD$l7sqWgi9N0f1hoK+Zwt86E^%MX!R%Yzp)SffC$k~Z6t?dV8dx{5x~(;8}%7B^}JMyvFTe|JrJm+293N( z5eC${!ZKk~Is$L*IviYezea|Rz1y-WBo)$Y`IY)^yGH5ag^bwEuEd*nmoRQt$`g<& zwKA>!_);x) zFd^`e3=5g`>e#Yp-%x>FTm~=}r7V0g3f0q2jx`1)EU0C`t1P$BSD*!!2A+&z956A4 zI1jv??c~A$JMXk!Azt`8PopY`HA-|4+BlmamJ+Omh*3IlVMW`R-?J77(FBi_;P`#i zSt*tIgwo8{s1sMHFntqM*fVapRUQRz6uOU+k$Kd6>BLToD?LQXRqhm&!l#OWaCLcM zFM0@rKo5n4Na8G0R_RP(N&m2@gpy3#tKeYqdnzn1Rv9Yb=E7uiLZ8pYcJJ&E(Nl{2JFitfmiwT8({x41 zQw4vI8OkJk|5`Oixj9U3W&c^M?33HBcy6OI*xJcf7ruF954L(vx#FcAN|rxslk%e6 zfBhHkQeF(V_R8?nf4KP*SjeA5h@#JxzW#4^C_hF>v|l}=6sO5-pX18k?0%gmlnl2l zE%;nHW|N?Que>h-#F?L!L9#yO7o|Q`W}6b!>yl*ln|^Aj?19aI0*eaeZ+&hl#VnL! zq|5b5gbUp`X3Rt}pDjegdsc9qQ(M ziJIhf;%gm05Zv5Aw{hi0gEsMz0%DsUy2C}A=}xZO!Vg5{w(?GVmq9)U2RLKt9)q?S z9N|o$dtC^+AjY|w?&IA3PUIdi=s_2(Xs?4F;x_`FskFnvVa`-~6s3=GrrE`DPHu!A zb|Tp6f(8RR563(`biadkIXLL);rgcxdfK4fh?k9+JZe-)<+O^cN-9T{R!o^TrhH^& zN%6#zX`_lqPO7Mc78s*Y_(%i+8>;JMoRbx$igSa~G36zrN;>!PBgLDs!aGahegv0B{@qUQCXN~!nb*mA1^5sn}#Eink8u* zWy?uRt3)6)ZKwtDvUp^yni}>B`wy?0bc7B_yWz}OwXZgY#`@#NsS{L~Pl?c}>|GA9 z*5p8Z4}xY1d=JL=kf86OfZ`Y!|2PK5Kd{4kgE!V2oUz^<-QXX`TciPxet~%$zIYqM zej-kksgeClt!mz0JaCdSq1*#!77okR!Y5p&Ke_=r}h4bn_li2~8-e%eB7c8HkH_02NzKAfz|Y zLMT<6H^j!MSTPmV8rul;#l7)X5`qv`*wt*TNE9+&IRtbL4H~AajV^Wqd9R&<7VP73 zv;{JN*5I1biXDZ`j32cjz-_2IL}i8giX)xRYMh7Xiq!FHH!li+ zY!Prom_&Wl){ z&h(y$Eu|~DoqGojTD9UUpj$*tPzR3X!ASe)R_uFg;ef7#TyDUAu_z3{AF3qkEpN3l822jiPCr34Nk2-c?+f zuZD}UJ5=et`>)=ieq$Q~WlyQQ>_-1Npr+aOM(WF|VZ(Y}SLaEGU80IQu7(F)=-|5v z{&(M0vqLPyAn}#`2VTeOemz1oF4y7(pp!-YMtI}nOH`+S;@{LaA}qk&3&wu%0~q!L zi5DaEc=6(=YNCJor)sEeInDdK`ms$x*?ILrc~`-I?mP8_ZDsxGNA;|2;ynGU`dOwm zaB**j)k7KbfO{YzAY!?#hGj^RScf4MfkNmZCJX5E}5X6v9V=L z(T>?{_;H#RYxf*AOS7#Yf8t`TN=8Wh%a&^+Z0GOdVH%k!$wvhv^VS#8QZl- zdq^suKCGRT{cd_ed))@T=7?5fSiJdS)&Tb|SWLSj#g;@pMVvXMMT_%W!NdoTXimTD z4ed!8(eWQRrM($tSz39wq+k77I1wknz|Jvft_$j? zmOtk4V}2MG%?&Orl6tN{xMLA5=EoBLM6^TWXon`!-q4c9Gwez(T4k{57DuzW#LJJ> zVbn-#_`zWg7cc_`23<0(chCkG-9{T33k~LQ4WHP6bIr2qg_EjhEI=f>NxXGl`$BAv)+5Ey3tCsVLXijt)YZ%yV;NO5YnF&E z`AUKb!_Z)}a4ov`Rlk)e%x|lE)&Xt+aG{{*&jEPjpMizRwkM1_1GbSq(J+J!4>fEIb zwF`1bS1*}6v8KV~({>XP*oVSFpb71URy*hswtTvZkH5i`OHuP&2Us7S#;~J!&HS-)usiq3b6sbJc`;-6eK($3bb{Q7wjpHeE%x zy*M3zc^6_ozCYrGwr_@-Dh__DZ5B;WYCS|t54BVr-K|xGAi7dhEw;X^4H3r;HBlV- zP8&Bk8M4E>JscC&<-h46gd#%qncD3a8*biv> zLuVIYzZn1nXdq0WfGw3~8G87nGz+0L@puy&h{Q%8BYfd|Ez8RY+$|33i-OK_dxo8%a2XX9W6UzTJpFAtIA{^lN_>)fIVod;NC~siN@U>@ z8|0F)iRd4+#2~(u2a)4!BgYBb=W=q`3YNo~2oiZCv&hLz#_4DHQsQr~>5h zVK^TTDl8mfR``UisxUBo927+&6Z#F z>+(jRSO!b?rl98T4pPQKmH|VR<1S3i&zMn`kuirTuF6lH$Kl39=mCmXX^Q6nB05yc z;UWcwW;!xS3eLEa%W&9}vXjEgpQTiHS~j9q={ql-vf6c3h+{=r&=-B-C*j^8aLa({ z3UGNPqA#mZ>qY$<)U5`cjgYG~Sc+>Qbn7g-SseR8sjwfS0XiB67<)_!5J-6v?7*VC z4&xxU=)731@Vc#{Ocb%Y=uY~te2_fNpy`*;5K(T<8t|E6zRmcBoWB5}1Aa$<5q8&dJduy{S%YwvV{r8$Qap$N16AkHYyVIdW{Q?a@C(W_h$zkY%@Ix9_^(s=mj$_Z@W7K_5&t=tF}}Ip`x7*5he| zK6YU{I^&?TjP(=l;!_9x-9i6w&_7-D8GX*w;StRb_%jarf{FgpLI2|SR|b8}?{fy7 z7kA|9^P^1!tFD+fX-rv3#pFp)IAg@WbM>eEkLT&(it?PlrH?*UaeZyj1&|D4M0{Vp zML8u_W$80SPCvc>i0f&Z33q%B8$VM_tiY)zytC^#rZmfhk68_C%`%AUY8rAU*3X>J z=Fn{V#ttEUOW%n-E+sD9gvb7GuKA%Y?$qgVtxNciY@5xdpZM{!@9oM&0qHX%`STZ( z{zDf{IE-_|qLXTRU+2h*va;gxD>xSQEB)7`OSm1UD5jz+nyKhY2#x}l;-(y`8>r4y zLg}Kot-qe$&rlpD-9;-+#mP!Hys~EDg5_y^eK3uqR=+Cn_b!-9m}2rH+*BfP-uO{} zy=#}*N|dQYb0Y-@Thpo9P~4^xqjWOim{*CE0{!OD+1zn~I9sXXm@}|yI%5P%EC{MB znK;SnBhFMhgP<=9^q!&dCgSCDMBD(qo0f$N%Y^7__LKF z{S>v2bd*F_9U^y?R~E}l8g~dLPVe*Aq3S6KZz}3#E7tA zEJg&O-Oz(<*~6?w*SuY1KTC;+CA|E|IPcPr0srS7=Vm%XPA zpa%fJK}jG0+BxX9P+g4>d6gFgT>~agwTKEFUCN2(t@S30CeAHKPDPV$2!$*JHEn!L zznpI?uw)65__{XImt z1Q4k+&^P2qsqnaa1Ch5Bvz8G}pFB)_keU&#?AifK4fln(BD0Lkh6T+!GZZrqZu6{X zMFB@nfu0qWfmp-zR?6j@kPp$pEdGoQX{EeE+=T2?l@RLA_fy|OBQQIB)36_KdENc- z41E3XALwr%6?iPb{WJiYYHlDueIE@BqHqxKK^hD2au>?{z&3(+Qd*(o5DmfFlTz#* z3J_aL{50$Zj5mHeI(m+jqm;|*nQ!+f9q~N?(fe3$x zsL)`0rCyI{Fk#K|j3hyKq+!kVz#{WN$VUQp9Np-J0$YZShVL0IN6`ud5Le+Yp%;kO z0M%-oP&X9e5_e*$_hVA-yJ5*B7PCV4461Pr%g zHbTcKIwI6nSSM3}d;@YJmcVCF z$VEzm!0VUjU2+(NOBss|P{S9f__T}7&D(#b~AW>*bn_7AR^f9s$%#Xu)_x;o>AYSfbIrF$PJHisx zZmdj4LP2D;D~KxqSbK8cZ4{P( za#moh7!V%M?!WZ+mc$2lf=ewn&Fx3gSQnSm`fL!bA-_ZGHf*S=iBpkIxY5$S56AvaA26N3RA%p4n;$bF z6Z&h^lV7bg8ZT(0F?eAU9*dXqnst8Uw8uAG>?mFly!AT!_7nuzU?z#}Tj&V(Cq|3dgZ&ybZV$fczVROz&d%ejja4Qn43* zVK9nL!N@w@HZeI6FE2!nkyJn0PC`>9$_QJP{n>4-$IGey`oykw!KWM!Aak z-$cCm?}dhAYp*2g=yv&?C3>bEk{(#5zizvRjtP2nlyn$Vs`cxo>n6N2^ntSQ&YVoTrz| zT;_azg3PU+uUE+2U+3%NWv?Vv^aL|N9fNWWUK~^wRh-N-&^yrNyI6u1DbIgE%G8)$F)NT0YJ3{ zeo^#waldW8;`~m0-&8Dbum>~y{qJH^KmaF>s04n^jq?J~5#nfr-q{+ux_sMzZ^sD- zCmA-D@Xd|Kci6a-EK20G4PA=SJh4YSvQ$sbx_qqbmRH^Xc`Om9j-xtlwXj9~W5yA6 z)WYG*c*zvqZnU|3mv2b=!#JMj#IftZuetG|Hzm0F=SZ?WfLx8pCwc8cHg0ZQVuLPp z;?Cw1%L60+pXC_VHYCtLKnS?^;(-S9@~wnB8q zspI+^+?Sye#hG-~A?7v)Iy{In^+11Fcj0EehlQrYt>O-y9lu(F1}+&(;!e;0tmsh~ zQJgvw7dl6rU7@FnXI9wbSY7a8$V6;PoID*+e=(0irRo+bD?!)V1yyIcy z-3%pE+`Lk^Pj)vk(5XcMplESoWnf%ian&`e0#$~{U4o{^eD=fybqwDv@Z+T&bGOZ> zdlctyR$7NUYJpaGH#b(cH;Wb@tO}5EVHL=zc(4s+ipX4JGot+-8(BA41L&F?W1f`o z$M-^({&-6O*9*4**SLeSu4p^%1G~I|x->89nm&+q7mni(u2&vcerK_5Ww^O#g5bUk z;MlerRrh?`hCWe@Uu~0m)OhFf+d_yqbrj>7|CR7(z%6m=UQE$*Xe4#k`E@`zTo;dQ z)H{hhj~f0PcK$NqqMn>e#EJh_Ch%)+T-n|%T}=N4Q+L4|nDtv4f%iQADAvXsFJ5aC z=Tlraet1w_USGwqm@bL3^wBkC762~oxx|V!0lc@fs=B+as;l@2Qx&9s2UbY5xYX?s znYY@K)Zv0(U;ubGH!f_4?<_vJ)#l|tA9{G$dDL_2DF17wN&Hch@t2}!fM51~;Z~4! zU^>?&{f2{a-FHV7#Sn2C)}i+cvV}O)pt{BCwR)+;H{c$xvuwgxq}0Df5? zn8m0K0NCUlH7WNmbCw;dCT7nZ_xDNc98?RMG`dv^2QQu>X z({9(}CCgXb4m8CMSyyx%k}oh{7Op^Besc&!{#Zw6^<(%15phR=nl5($O|>lEmZf<_ z7mpSy?ptO{aQg)qj(N8Knj1$KTBy(lHze+~dtUea`*+`t5pd#zvB0mn@#NG1c%)c3 zNsAQYHV34udNW9PzFJZ?ehU7gW!L5ac?Y@f_Ia}I{ANAZ8sO=;$^DOnLn@GPYTWi~ zZfvfTpqJ)rF42rGEck>qpWl%T6OB{HU3T|M3IEicdb|a?>$!}-e2=BasiW+Kt(Blf zH{u#Hk66xwXA3Btu(hqu6c^UxCdR5Qfgx;WfLRYnfD1m{pLlOepz+sSckGe2I;Y5N z3e*)hq3-_2WL^COnkm+x<4LwP8o^#i%)T9DnhjC4AF3tcufzF_j-R1qk_?5q3Ei z1dz4W9`y~$u37P54yTTW>AANUK3zl%)FQ+(5N;9Be$D7|dvVHp4DaDPM-ucO*ZDG6 zM-g%A$k4LS7<7b%u)`gsw)y0Bp869*brom6(VToG$(_z#O1IXt$6p0TP8|*J{7_w- zy({2{ru+0nZDePwC{3Ih42JL^gb5nXV?wqr|9Ny?e?vA18IGx4Rah{lALo~J}1vA?=5b_yjeqS zJ9I@q9WB$^z7}k}Jcwm!mx$QJ*Z$Mbu55Zs)5Y<7Y-}CQVLLFoUw^y7W#_Ui>J}j# z&RIJ&%`N;*Q=Geoil2ntYtOLE&7VWIIAwHb`q+fP7)o#k{KYoALE@&P;x^dCoH}my z?^Ff%)n5ek;RJy}KYE>Axp0au*Na8@1y{+fxbH=rIx2Yo4R!-?DOP+dY@rPz5%&rL yKh~fL^4U*8)D4~av03y5@6fZJ>0s*2R zHV{b=6w9-V%6mS$Pe7ldPkA8vP!Qq!pPjwQz3BISH@}&kopR>PnKP&D+^*EkUgPw1_sne9^?Q_SqXp!5hd0uXd7JXZ`&TE_Aq|2;X z(@N8O&KTb`Gtv|3sTt4XHhomru%Ev*u%G^Pkz*GQ4-lrVszw^uRMp6^-kK^MFqa8c z%64~D-45Sjd7Zkl$jeUMlGt)*L|Uz`PN_8EH>b5E`KnVlw$3I!it%Xp=!9om=|rt` z`O`=}(cipjeYs+Z5q`KJqq8%rtU)I|misO|v8C|=jfhiczsP9h-<27E*wf^g1N6?cLlSaW<^Myyv5dI5n_G6!)|I>jp;qj(MmNO)_b+n;Oy-53;Eq(&!G( zrnzW3KWDgTri*5IXg1C9&|I2l(tHm^(gGJPbkQR2awoqp_Rtbq%B5u{E$3>Vn^sU6 z*WAUWyE$9Qa8_~MY7aGuq)PY013O?u%y5m@8;rlb-&Pia0szIW5jVU`m{AJ zfAP=_CJ<<1(kB1Kp(*~UTAHpcYVKdsFHO&+EGc@4xB`nBQxl6?QEQ7HAiqVM=|P}S z-=ZxvPEgpqIVI!o5M+*leSy*5IyV{8ycf;ChfqWdVb6Gf9jUdlzuQoX4uHN%^r=G4N05jW+l0{ zQ+&49Z5OO;uLp%~6ebR4XFW^$QSwD}N)6k?75uK}_2Y+;ca$unxQ!8SvH-TN z$ag>OqCFH(y1jR6sU9|=9yp)@_P#OpDhEW)rM4(_2HAV#tv|J)!KfKa`81shX%2Ov zGU`hA(yf4Y8xBHGz#IUmy$GwMedzrJ4WTL;N++p^KA;iwC5@zS$Tx~E(ir-U#?f^e zFHD*sVrjCdO;bcBO|y4@CFDmLnV7OA@~ud1sU7kPAlx1Zm;l>_I@qlzfowAx7>62N-XS*)QDT-%@iMcq&e!|Gn7Tc|sj>sC-D zm!cYqYZNW6Q)82Q2$Ug3ZYwgWr*Vx^QPmw)aU%8#nTfSQ+Fk&vj!aWL`N$Y?lorJ0 zOn-s^t~n(}^(pB?eVxH=@YK*x6G21mkqhmS>VRm0Wuk9CtbxZsULePr6<=Tt&=s^l zCnntC@9>-$A2X-b#>|Ov+XXNj4s*U+Jv=!vp1dS?OwLhCAC?mnk(cCA<)kq1s5f4|PjP^2A#Vw75-Vd}NZ_c`+umiOP+SjE{;Bk2m9e z5tWqTKy$B*xUrAIS4N1Hme_aWILi~olf58Eh?Y?VEvH&gICa1f4d^bc>uxZ^O6=8Y zx*gm%iq_C1S_^uvL+gFCk=BD@HbC@lqTOiQM-STc+6qd{1+@l|uaGqvrD1p>MS-Dj z1FIH7V1ZE8%<8h2{vG&stIV4QyS6Nu$BYW58lyEtAFypjPm5ZC{2X&oH|C%hU?I89(U_$#$1o)~NqYRd}_k0?FiSWu~x<05S9 zX2;vJ|L7V?#HqP$-87${czF8UL6NrACNA&&wI3|fel98Q5Q+ZcNnZcj(&)qCW&K^s zU@BVkjin5x-hb44V0XBG>K4c5TC`=HQ~I*}AJKLnd24^Io=?2p9`d2Za8lWW&i&CeC5 z!c?(mOX0Lvz0q34)wk(4ZO_7U{{<6s%9RM?OfRjo*;Ms8|jrV{hCvd;g2%1QfxHQ>=YzmjAnskSYrg?BYVOG|q z8Qgm&XS2BXY?ELZ&2_PDnM@0~$3ioT7MXOXNsCQdV$xDsutaMsFBOT)vV5dyB)hz$ zr)((_@kR%TFZuZuJzXX**Q26M9>zu}&;_RQY^l~>HW(#7HGG&wiqYbqk+}rp$2Qy8 zjJrNsr0Th}Le?1r3#?2QjuDSXEVFRh@3z3GE2TIm;$?Y%t#$^#ud=8vb?;upaJvP zRu`XF;DdIy!*X%7gl$P(#=eE2wlCXZqF+|d5z?}l0Kj&LN$_ zY-dsT754XQ^!NsSzC*8Xv9ITV-S<$-KSCCthYtq_xNjeKTg}kyPf3cBKhG-;Ekc(kyK>Bxafb_tV#bg8n>BD5mR=Sqg9|ceZBPEQk zKx|*NK`EUZp;m*Ia43mJHYVL*F{u4%g>j6T=Qw$bym3`j>qWe2Hcc5V5aD_dULFUS zfM&TB-Pl;pu|Z-`Rt-A#ii8$4QgcT3u)*JkF-*nxBJVwes=QASXxC zXs|^xp7Y2ZtlgGJ6nr`HT;U#Y&mLX+az$Nv@LnTA{%}dSs|HLGdzB)n`gN+1!WP^y zKUFUN9mj-I0SoSQIjcZK%8i{xbk*n6M1&*t|1(=Gj!+6<@-lHm=_;AAS#(g9#hb+y zrJ|}v?+`CVs@jalMJuK9s)~+@F6pXv=V#)d4%h`}M5fn9m*G8wP;+O(0JjyPz#%!aM_&-}L?}I() zBm6TH5@q>rtzOl)eYH_4sKYwBm5oXLM?Yz{5tb{bW^4g+RVVWlET=4vPvp@u zH%iMnLxduWR&Z9v* zTyhsZ;DI^iH)*p;4|>3*TRfCV5+i)Hm5bZl7`)x2hghjDbJ4^6+`-vS7wzI_xr=u5 zbB~9eejbDP$0H^^>VZfHxGdVo*)|vLchO^9MEe1g9yjR;_~d&T{t7Mjge*v}ZWg3Ycaxs7z~zWKjHO~b7Lv+C4_aVySfYVI zgdKS(L60u2MM4+{A&TX!!CFhb2UW?8E=G!cd9W5=y9XWR(a%|Qj0Re;j)z%voSv6I z-J+%D7+nYDr(5&_o#5<6Sgz^Qrpzdvn~s3agr@1~gAmq9?_~FQNwyxMrA55V=E^G; z{SA{;t8ck(h?a_ZZ>u^mL>s7;@=k{SU_uLvt&N4IV5vHzS{7<*mWIX*1%O8 zfvJJ7vp$V}g?KN-nns~EmfY7!6Yh*gaQ8Drv)Wlf%hvdy@UrE>ky^W6yqE;6o?v)J zz+a7q`&^US&=I=f*{eDQr z$H4vvu)9xSJD$XXo}$I{G>oM~Sn4yt`!Mi*7Q1@{y{pi26o1db3_V6a0M<`*!p2|} zShWGYf&HHX!f(X(O~r|{FvnXyx&yq*HudWWGIhsNyJ3>)D1~FXM`#9^#RdKFZ!kcZU( z!_c`Jo{imd8)?;W0^4^dCR&W)8Z8O2D+;PMj@4Q^n(~cet))`4*Ne5SajLj|o|cuW zif?YzIywe`Yo~Tdd7AQt0|AwN^?>%1qtY)_Y6qOIJ6_e&9Sh>+lbWf#+p3cvX;YOq z$^bHD`KM6qH=(xhQ!UrAC-#4-B|8Sk>*uwu3gIg2g64S3RTr*kJJgqrs*TsRj~shw zs-~ZHAoO?X=dzXMkl#vgp;k1rmHxHUJ~UsyKSR}?8>ydhYTqf*-*%ASHdT)|Y=h&= zgdwe8gCp}=r1Uj6>d2R->K?hTr4e0qX@>r~v+U=W>P5=SuS!~}cXevVZqlP8Y#^@= zZ2xWoPIE$-Jat}6l<95sFqyVjkFR>_VZD(v{U^Kicb&C9wpZWNP~qSIE&Wr4Y0lgF zNvG?z_w`a|zS58NOAfI{p3~opv8|sLhof3FgjnRHte*j$bu6GOa38i(kM_YV$)Hhg zh|AFk6?q^%#_~%sXC)DQhCL96rQ9&lgF^`KeG-2b()qKH&Yy*J_TVA*ID-eTvj?y9 z$040&GXQ)c(rGT2=0!kiF5ry0mhGXrb>PfJxvb(OplfyUDc2Krw)mgdw$o`aUjZU_x?`*-#|sBM&N zpS=&YvLFgD!&+L&iUlQ@fI!1S_!@2NX#+lY%$zf+89winOpuZ1gj?Elzj00X%72YC zq8eEcDHykj`U#4iTC$*|+5CA^rZu~*WbWkKOYuGJ0gL>!S(f~v_mVB1L8Rg3KlIwN zyrq^Vt?T;za_~g0fjo6Zc(x=6)BnPW+KJJ#=1iF}@Ai54urwH7m+(a!rCjHJdj5IP1ya5)eJjJEbXn;avIx&_=>eN*ZJ0p2wT<;ZH3 zBeC-x0H0kqMww>>L)bu$3nSH6Gh_Fl86zA3!Nd3qf(np!0S5nN)K*t0l_!s!8h(?Y)q#MbtYdB&&4#R~13qC4@A)wP+fi^i?j1b0~{1+1n zmJ5urEJWv^X{N*Snei6`f)@)xVh>$!81-#-Q+C=M@a|mLjPrtqd;he+Oc=N99^`=V z$M9sfYxN9;KSv3IdS^XHwFOMIR1^{gy+lcUurbMqwDirv7ruQr&e~n;2pCBMwq?2x z@*xF4m{9dVB7*&q3hOwXmZD`bB*zlSilvYd%W<-OHfro@Gyo$GgiD~Iw*@=9Il#FA zVX$qH^2x4C_|^rPJ@f|TKTMXPCX8sln=-x!5gIzXTdym9tqgY+)ITH)w;~!}Po{sbyHi3@ zNEyE(d^Zj!r;c{`p|Blp6r5~pc&qMhZTz4|o`$eHgFQYAL;5SZqP;OW_FGW#9O(BQ zEZgrv$sgpE_C}$NMf0yyFTMfG_YFCFnMjf=_8N)4x33>Nf#v6kL^t?vrw1x&7njO8 z+s)aYup7>g-}Z7=fiQvx(>`i)1i?f5J(%z@6+7VfCp=ICPkMmVQzkv_fpve7vqK(? zeTD%XHtAW`;k$e;I>LoY&Z<0YO*5=xE;`PgpXXsOxafq7UUbn*E_#_OUg4&{ap_eT zy~ZP7_rThIle4!td)uUUTy)X{PJfrP_gwToPkYKkf2V(N=>r%2(?uVeBj_WOK6cS3 z9!Ts@P5R6O0sOg(zF?^T;wip#(Pu4O{Fv*RMMzj!5S4DR=Mhee&Cj9^$P5MaAD=-d=kL644jB#>p zq0u^H1dX)d{YF4Xrdhok;l6jR^!X`tA!}@6_c)7^gCU% z=nuLMD_}k@n50|qK<=>MS^Q7_w~LVhfhH`te|K6!qbrCE4Jw^A?auV^GbgB!$z`Eq zd6qCl7-!*@FojEY>uS`iJ6^ag;o(7|sRe&_v?(GiVTni!e(P9ycUNOE7IuA0H={%R zco78z26exEFc7#bqAd{v1O{|78imJN!b=n6(r!lmx^bcwJpNmz%pE^-#*EVO^GYYA zPn!giE1@RWxPwWDoZ0ca6keAA~w;4IUiKtW_ZW>NueZb6w!<>ba*cA1j z!}<+4-6^(j$f9T}974_hKKUZ-8Wz37_&bUl-%311*)j1|6oIcQc(aP2UJ<{J#O;qG zWS}Ox=s3kVwXs#?#fO!+BNTR=YH=lQiNv#nPS{Q+Ld~^vIMxhH9J!Lptsy+x0JpXX z0kwnKkdJsrM@ZmKkhg_4t}K;-kAo|hc;S4vAQH~&bl^P$?f5WsjfR*s(+8hbSwQ*d z1fFpgHU(f<7Uw=giN`4k!Z_K>mqVaNQV|2Mq`E-Yz*|}n>MAnM*eX@l3kR|{Y|uV7 zW-8K^Ta8&RT;$Pwqahdr_LdcCcK`#nayo!&YX0=#`7`?E;tTb53d=oM-Jw1@uzs@< zM28ycd2LWb5KbKme1^eV9B$*3-k3X_XtehX02Fr+?|;dEeAO}GpYE(GgE zP?ZYn7I^Y#Gt1imcNJ@n1n|_!p3hZ-#Uepd|r78p00X zY7#xNyo~^eErG_d^wlnkM9J4URvajY8CXe8xFxeMH+K%&b+n{nzfsM*o)gogl5*M@ zIWd{>hEZHexoyJY!*4t%|M`7G#FxGrTah`!)^^Oll)z2`f zF2e=>jh2fzx=SR}-6Dfliac5+y5pV-i=c_nT)zSIQuK2H#N#l;Mq^{=k_)ag$|kM0FO;k{=^i%V?!7{(EkbyA4IE^3ZKE@wvlt4`N;^U>w%;_H zOv)>}57XRluPTAkSjdDs=yGWt#5KDn_bE{%WBbA4Q9Du`^_Rj>9sP&xxs>Z`IWq*G{EgMJw-2B(9*KdYnj|1twMd<}m zN!|IAuBh30C4discaOmkdmcvG3qf6Pw?Q=QcPC<4g|{6}=S;;ew~)kUs4H5RPdsRH-`3`A}YjNUsT_ zG>YCt;NvZr0<d-(V+X;c5DKbtvp3}k6 zMr@27$h_$3ezzB-@(L%A`MsW8C1x1)lshCF&otWE5y-8p&;I)tL_9eu2dH?q(Lg@+ zh!MW;>6u2j!T%Q^56?H+R{v*!yfoj)S3%F_3yh|!G;V>>N|m-RFgmKzXA6v>s?=(s z(NmSm78?Cj>4SyFKvin8$mp+1OBNXeRO$Uiff1>98dFr=n(ET!JBn%Se0H` zY)n$6+$Bb-Ds5R}%vPm{rN(4cTDa60q)P8DHF~I0;xc2DDlJ+T7<+b^al5K(x7--6 zN}HD(g*MqjF6hf|mK)7v#VW&_&c5cxsHSDhake=%!Y&4W6=h$H^mmz?=wDG9BTug| zoKXE2J|kJ%IR-7+vi=GOM#vSp002BI%8pDy&%B%gy@ggw$Hxme|2%2Ur8zzxHNT3o z?hE~YUy&0u;e{2L@N=Jyif>JV-C`qNGYZ12`Xz?WcX{awE0zjMiahBvtUpbe9OwzF|9qD=`=q$rNR>Rr{|Tx8(bGIn+9}_Q8d>m_(d7!V>sZl$F20P#kV8xElbPzQ#3ae&0VV zx%zIyt2XvwZc2XFp3mnC0L~fLi!zO}qThN~5vDV8f~3#oe_h zRy$zcFcRwLV)r;@2f?f;E4!@vZw$l5m7(_smX>l4VEtNOo?T}o%fhW9Ob)##&^C?R z-pN#L@5jJXJy6~D61VltQEeAmBW4nPufrQ5*BnD|DDJQN^Q}iY;oiVJi|+-jZKG5h zuHw{IbbN=amlmt)jmv;qd4Vux?Uzq;|1LIixo7`Q3!ykTbqncL|CEASxJyJy-+j)ZIoA7` z+uw^N;?yOi|L-3L^}l$ZgGIj5k?V;R1Mdoe_fa$aeVEQ@LXUg!o%)cqH+2ejJIt?OoY7u~fXGPh6 zrYJCHHW>}(>JzlIc_p%(t8?waYibvKvN;0RDv&}sy_el9LtgY7F>-_7NVofc z9^T@Kk>CJM999qfD#{9;V5oNgdUn^IH-35GStu?}g982VtSCGGoa!3$mX<45YKN5{n=3pkbx4Wzb^$R|nEzif{^L;8b~Nvr$hLJZRKW ztY|NL?RyL*#s2s&cL0iNd-6f-+#5A*t!?J8SCIU-qtBg$70#(u$O?Y@K*1{B5>RvN zwg61R*Qz>3*JHQ*S98hFw*;p7f!jy@yQW4RVsr_eK2 za3N^q>ECh4n_tC9mJl|o3r_uS-fF09PHjR^J@a$W$Q#l~RLf4;3Ve?J8r0VG41#7}`SJ`s@lR)P?dzi>?tn-Khy-wd>R;M1mAC3{HP~*IbwEJermU|4Z4flojePV1w zB2JkhaIb82usO9CHwM=42%x%i2Tc)s&Xng#O zn#N3fnIV@@LXcT@zNW8RuIP$`SOe{ChIZjfFfud4Ap?Kk>5zKsmd&%r;+$~`?0vBCII`X<$?9BCQBGre3Z&4f@&Ikmpix_xEMxXSx2NqklGFk1xEs|ubyW3J znT91JwmOG>%+)mcI79@eP}?a7%7MYW)WmSfFLyhV)%Q?L*b9)7oEnD^qCo@h-`XZu z^@qkkaZsbi4(9yvQBHa&uq-z1^`lJ;Ik2^n`X^op`2-m<>rZM(ry5SL{=E1nqw>dG z8+=3gz+Q)Y8%1Q_kB?x)DU^GEzs2sGCQlE6Gdtw9 zZ~h)UI!>X`t`2Ypq)jlw5pWb7sVAKgbqe*gdg diff --git a/settings/repository/org.broad/tribble-46.xml b/settings/repository/org.broad/tribble-51.xml similarity index 51% rename from settings/repository/org.broad/tribble-46.xml rename to settings/repository/org.broad/tribble-51.xml index bb8df5c87..b38fc4bdb 100644 --- a/settings/repository/org.broad/tribble-46.xml +++ b/settings/repository/org.broad/tribble-51.xml @@ -1,3 +1,3 @@ - + From 62801e430a409ed9857bb310911a2d0e65c8df11 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 16 Jan 2012 18:49:58 -0500 Subject: [PATCH 26/36] Bugfix for unnecessary optimization -- don't cache the ref bytes --- .../sting/utils/codecs/vcf/AbstractVCFCodec.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 43c878b2f..836ba22bf 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -522,7 +522,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { if ( allele == null || allele.length() == 0 ) generateException("Empty alleles are not permitted in VCF records", lineNo); - if ( allele.length() > MAX_EXPLICIT_ALLELE_SIZE ) + if ( MAX_EXPLICIT_ALLELE_SIZE != -1 && allele.length() > MAX_EXPLICIT_ALLELE_SIZE ) generateException(String.format("Allele detected with length %d, exceeding max size %d. Please remove this from the VCF file before continuing", allele.length(), MAX_EXPLICIT_ALLELE_SIZE), lineNo); if ( isSymbolicAllele(allele) ) { @@ -576,13 +576,12 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { public static int computeForwardClipping(List unclippedAlleles, String ref) { boolean clipping = true; - final byte ref0 = ref.getBytes()[0]; for ( Allele a : unclippedAlleles ) { if ( a.isSymbolic() ) continue; - if ( a.length() < 1 || (a.getBases()[0] != ref0) ) { + if ( a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0]) ) { clipping = false; break; } @@ -594,7 +593,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { protected static int computeReverseClipping(List unclippedAlleles, String ref, int forwardClipping, int lineNo) { int clipping = 0; boolean stillClipping = true; - final byte[] refBytes = ref.getBytes(); while ( stillClipping ) { for ( Allele a : unclippedAlleles ) { @@ -610,7 +608,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { stillClipping = false; else if ( ref.length() == clipping ) generateException("bad alleles encountered", lineNo); - else if ( a.getBases()[a.length()-clipping-1] != refBytes[ref.length()-clipping-1] ) + else if ( a.getBases()[a.length()-clipping-1] != ref.getBytes()[ref.length()-clipping-1] ) stillClipping = false; } if ( stillClipping ) From b0560f9440054ebe893e0a7209756f085f399f16 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 17 Jan 2012 15:56:49 -0500 Subject: [PATCH 27/36] Rev. tribble to fix BED codec bug in tribble 51 --- .../{tribble-51.jar => tribble-53.jar} | Bin 304586 -> 304182 bytes .../{tribble-51.xml => tribble-53.xml} | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename settings/repository/org.broad/{tribble-51.jar => tribble-53.jar} (92%) rename settings/repository/org.broad/{tribble-51.xml => tribble-53.xml} (51%) diff --git a/settings/repository/org.broad/tribble-51.jar b/settings/repository/org.broad/tribble-53.jar similarity index 92% rename from settings/repository/org.broad/tribble-51.jar rename to settings/repository/org.broad/tribble-53.jar index 04121804f22936341a786a08d97a58fc4293f595..02865df435a3e43dffaad91132c34afaec455d81 100644 GIT binary patch delta 8387 zcmahu30xIb_vg-BnD-t7JRd5X2#C0_sGykQ2Bs~pm}#OSxPYcCZvS)c%tNsMGW_n$d1t@po_p?@;p8dTp%X6u zjJ_&&0N4QR`AKX2Td6jDblM~z7b4~3k60CHMkGt5pH%~yO(nn>&TmV^aG!pP1YXh> zG6}@lyGfR+iF%NUDe(Wp#LR6_ZZbD*_@D`lYkb2b<%HcdF{2{ZsR3vqE%t?RuXa!Y zI;i6M2sOlL*Cs|3%;UaQJ)*mtG6l^o;Al(L#2k;Cpfw<2sB=XaEV_}gCfU!(Np?sV zCnpaxffHR{Fs)AO@uVh6Uq19HmoxVCO|p@unxeBWRMhn4tlGFfupO%wd7afrBR!+J z{XF=>N8kq^8&fg{uuqJ)GRE6aLjatCCQxl84oZ-outu%G=>j>LOHXnJ=*Fr+f$np5 zNPw9Rh=W-Uh=ETpH~1_d@KB;q0so^`-tSmJ=CuuOyH z4rl-?Y_QS>tBCbE`n}o#MNmx88V%MGdYv5%SWlP@cIah53H=tNsFaW!>9&dVZ`NQ7 zp|=ud8;NeWK^Z;EiTu0<6&h4(u%l|~W78E!A9M$uKr6-%kIyjrZA`9ue@Hyz$n=6y zGqhQwfbqHcgY!pE&mWkVHzPNnf!eC|qlWW|p%3kZR2`C_6Wx+^coC-Q&=%V1&;z>a z@Dl9OVK+=;z+!aR16d4w@~4j;JBfidX8O~!X5>!SVI*V;)`tZ-HF#Nvy|7P%{W`n? z2Mku+62+3B3gw$I03BY1gE|}nj|Q*l@H)Jq!(n*S2p!hE&0Fv`daJ_`7^lNKa8!qP z33`ui@52W=9E0OJoPZB?7zNpa<*SX1{euIdbT|p8bf|{YIz`FyV0F6L{#*l&#$k2oKpxq#_Sl@{Nl>1it$bGVS{!9U#Uyx+gfW8+4aaxMU<% z6)~zSbCc0c&;sx6(Agw3+5uXk*+ewg7S*O8a~DX5H0X(cz2RYa1RjSzFbw)a7W9K$ z7zp_=1m?g{SOQPLb1(wdB4s0nzYM7rkPSOv9A?au@G?wV|*8 zG`I#c;CGk}w_pxaU@p^PA#;J7MZqE&fg+5{5{zLQ&cKGMHFT&aLOU>84?-`54kD(v zq@Kj>`4GBkM@6KTbxZhRQ53X)DJ~C&haetN1PAN z^b`z0$QS3i4`-W!<_BTCk|0nI({6)HgU8q%Xr{qn#%@6i4IUq=!4Q56LJ`#)Q9-Q1 z4bU0|Vbq6Wh|CphBiF(Wz=I&>{smhhr)?$+uT7R=kZB2b#3wDc!tR$`EhWvAQM{8z z-r=||nDn#@>+0xAO4L1cm-%Yu%hvxzi5WSCb{6utH)f9M<=^~?1gBqesNI?H8Zh==pp&&z`fnV)$$Oux_`mmuCrjlj@pSjaFdyPekx2e z&hHM&@VQ^|T`6g1nvW^yOvkD|-O7^xyRhg~SdwOt5tQokKTEZ~KS!lhf|SB}<>Sm0 zcn0H0<&xp{jf2Fr(0fe}9`99|xknT_@2kQmH1unLK-$)m({A_l2cXeT9@y=k38|Vq z<1zmLBD3Es{uqEee}GM}Ni4+38SgSy<_1>z77nz@4r17fuXO^9^pf@xF{@;(*=>AT z_HiI#MDKx0jN$y;Zd~4Pmn`4!Z`)V`5yw}VeaP-J-b7F5X_%`(Ht_Eiy)Y-BD^^|Mk1lwW`ctuGwXm zlU@=dYV6FnPAf=JDY!k)?2GQ)ogo`PPktgh??XyFkF;kd(Q_j&0rcD@p){ z8%Lsu9?!x>;4&6yT+{;HB@T#&k#KPB52_n{tqjr%@XXmg%qpRvOnmO?-6Rf33ugEEdC7iHK3y zH9RNMMzOuN)lfu@NwHwxutuV6pc0I{ZDEZVv`w2G3`jf*_Kgq_m-r6Y4}xPSXejO{0>`qb(O*F`_y#BREmp!Gaijf& zC)jm)hR2zf5Ko-~+QgnC0XGCet#MN*C>1QXsES6{N-^St|9x`|nuI zZXos#tf7BGIQ%7kAInCBIgWwGcLJ9OhRJUp758-|buhc|&t;vNzj!#8wT}te2o4OWwo^VR#4jU%9h)pj zY|Uk^7rexJxn<;#|5?Ne-sxlzBH#^(9S zEjxYzJJgu=yNFuPx=K{`diIx%P@a)n*-?K9MeSxOff5?_2J0Cmp{<{@3qBHh<_e2* z$qM;)Kuz^*Jo#vJGI#sbf3h&ozkg=S{UquS`)fkmS_i z`RZ_aaCkx%s}K6iu#H-)I?Y6wwnZIlCda4c>KW6~7c10?SmF}{K2$%KO5#tdr%huw z&#HNv#Ett@{l%Q)@Nd+!PKitSQ+1kVd-(Ez1`?MM%y*b`{4G;KndhZTuYUC_RxKJ`sS^-6(|B{ScL(>~x$kN2OQytuXLu(Wy&s?9JtWX} zCT}Z&(KC69ircaxaUX7VQ_(0&&0BY}che4qqAn#CWJK+D-YLjrSW^Fb0gGuvVg zpTj3g%yKXA+Z>)NFBcm>z zY0xX6_o^%`A8Vw~2r&wBo#Ju6y-v_Hq0xJ&&tBN(~vVc zA)_x-8mhIQuj({rfM?phtFqAHHqI9$nD!{ouN9!ZvSrc{_lm}nAl<$v1Q1gD!wEm8 zuUK!SCpTPI?kn7d=rw66Dnu_N_*S; zT<_l$4e>6?9;?kbH7rzYT5Q?aw-_~by;`fuCjPUSx0L3$JddP`uSC)k-dZBE zmLTE_iHI=X`8r7KT4KfEb)uACC)Nlf=lei$%x$Gh9XIYs@6m`E$do@3}Ti!_NyYm!XT zStqlJU0%`=A_a6XNk65qP!YJ?TKalDa~=0FPo6OOsf8BfmRnZlEJsz-b4?QV0Kqa= zCkZ>`NfWzf0lHPzgK5Hlg_Rr5>xbsv#72`wk9t;WCe{k+BD)iIwR2HODlJ}tN*|e8 zhu$NNIm8!U`gNion^#MBiPV)CTq=i;u0)c19g$+h3dG8cDM&r@!G3&cqS3aV7jH|} zcyVQ=wUBu&vx_I;s~3&>$#r-$X^j^(%a|@FC=HHUMFw9YQd^OI1Ou>dm8ELv?8CqQ z8j1BA2Yt)(UX_KO*N8t|7MjGX=V2CBcp49SS@2m|c5wMv_+vJ}V3VoNj8lvX(cO32`BE?$xt80FA-*SX&gE ztOe^(wR5c!E;gcRDKz5KRt1?TK%;v-p}Z}o&^zmR(!Es=e~GY(5|NIRP=-lFw5Tz- zQxqFKN~*o+o3v*H?i?EC%buybvatU_QXu)8N!Ca2{<`i}JTYkCN2A)Svhe1+Cad#9 zB~cWtw<7Xp*1<3K*#T(O_c-#DrMeoYnX$nNF57@!#$7N;JvQ)&dy^^sl(6BlsTNzk z0((f{!aY)oTyE)W*n6+NUX9-tXwZjc?^Rjo`=zNWrvy(E+9NAVP)qY4OsrE?8c`VCs5f(2R57%?#bh1d>$cIRGf2eqGRM%*1WO2~H z2)tKi;j5j9tGx)@VtI0VBTm2faKg3{dmmN92VYI{*yVe$UDv`N)!e0`u%zq##LCV8Vp z`{%h!&g-qSZk8PjK%=!+DLyL;HGG|?CRF^2)YI(YKD|-6!af`M+2anx?4^`j9 z`(yv^f$)?i=q!!IdWu(`s4HAljnnM3&05+1+fe)PGu}AhH%GC*ry3wOdKvZ-!+RGj zhP3Uxf%wf!b8JW2>;Jw-Yc2EoDMGLM{T|vb3VPy}s+IeY7`Ab4PPc}PKJAM|>*h8L zC@p?z$@*#8xQl4i&!c^f?nwoRiDlL_+-0coiBO9c6^^CkT$z;}myxElh!xJ~@%W4^ zw+_iJ<%qr5(!}m<&2`~dYVG4ue?^Mj*d5WR-da!uJ{) z^;7lZHpHJI=T!SD`?n;ykYxEyueW0SW@}E=1*_xRb1^7|H~HIg$M)$`RE`ePpnt7+ zugbzQQ>2@j>{I~O!=hyp?PwiZif>>(1_AyX4SClj delta 8941 zcmaJm2Ur!?(sOnXx3Ls2f`CX7D@{Z(ia{l6o)H9$F$SXoBK8WFSOH^+ih?e35DO@z zsIf$%*Yb=SV~G{>VvKoFAuloJkI__gPmCuBpGIMB}w}YG3o$V1OIo3OCiMsmY{XJ(@bs{3W=_;UppRLLi|^2;_Zzsd;JuJAUzV}y5&@SX{_Tc926;FLQ# z(feH5E)(qL_dT3ruMs{l!agI^)Qo&NN3x`#hNu@VtVXe1)^`*QP`9!eZ`bkJnPakZ zk|s>e&J`>l;u19rhDH*yU)}X;YjsFzjOshA^@|6fw*uWDo?o6*zy=2u=nAn4^n(Ni z4#8msj=(_zBvgTa!gvBMGjlStCJ``>&Y3nRH#_C1wP}@zxd^IxT?Sxa7}?P;kp77U?RH`L%r3EYWZGbeY#)N#R@Ke!(MR^V&6p}y!s2NDPbs`W{M z>UU+~tRY|WQZtf$g(76k400>Cjy$HD?j4sD(cYNxG^XMBeh3x}k1L*^VCFp@y z3*G^I!3cg3hmfMtK~H!dpSX;8E<>F^BCrRicLy1-!9XDdXTn2an`U)WSgh2l}Ic0q_qg-=UWS?YG#AofDGbNh9|db612#Y= zRKj?ug3*{Fj4Tvwyo{E`f(zPq9)=)fKt=c9{7_u}RaB}ocqsnH zCtx+gF!B^y8{suV9z#1Lq`hi{;q)>1;Z%Q|@*>Ut3r24*^!n>Cg8LrXqr_h10Z=ap z-HQ`iz-U`o*d$n}t~x<~kJ6Y)OVrQ>d4|fp!Vf_k0%?OzgLj10d;l!v9B>tY5@A&k z*Z}@v5eL9+m^QxwTq5=^k;%Iar(5N1;SXeQ96bW<(yZ~F-H}l65Xcf+!d)E&=x0o^20S^a5yluHn|m7*&eO%E0U7m4 z?ouO+Me$hGG*@AU@vKGn%J1{5P+xQM`>0l^JyCcR@RZ~vT7XErM&VO;bxlS;w~oUi zJY8e_%$^Ykp`&^$V-N{bBQslx{22&VI_cFf;7oJ{!3wp#v1848q)r#k3!d!9E2c?)#mZ zzTAI1MWFJ`--!{;1DZoq(ZRfy>_neVLyx$_W|)r5E~sJ&CLj;!U|5A|7>fbj9+p5Z z@|tkjg&625U`CaP!YssE^l&#c;HFs6{pjOi80AKoO-w|47^bF<_D-LJ>rwQ#AC5O1 zJJ1U?;yPKv4?wsT12OXNJ8@B%tyL_GVc2ZN+gb}OG+Twd}@oZoGx782HPeKn$2b^+~XwI=SAW9>3+MJ`D3v%j;*e z+7WVd`((}MH*+*oU#cG?T1s{Ax7~z+KYaVKFtBF#FZay4rCkK(>ZAFRJ74Dv^$CrP zMC&41LWb-~ z2g67|iK57CcK>_g$2zVe!L~WW$ytJj#Bv2zz)A&EE>XdZO=Mdi$^mjGC{)wV1{;j9 znm`YAZ5!O*5#%iscB~49S+QD~?ak_jNG&YQ@}VO!Emq#f?K8CdF*aBRMAGnd0;7tO+kF~ye|Z^QC0(vx3 z>PkdKLYFyyek`F#YQ;vTOQQ1GCZtOnyac^Vt`r_1pe{vHyk=DHCTX82m8`>dUB&F} z(yyBS!}m)&H8RslDMa%}#Cgf2>Gs@JX_6?7oHdNqU6%|kop!0ur2)~Oox)`1BbkNV zkUFyqUrGKfuLLXQq?3}_HtEMSKLXJ$fu%6UV^*tMWJ6rW`iNcfz^F?ClsiMwtFpjQp*J zJkecV6C&KV^yrr*tFVvdqbD0&Df_eIFUlTlcNz7zZ5b}F*BH8G$mcaD&Wp=28libF z@ilr;Q>Wz!YSq+^o=nr9jpaaXGxk(bbJI@4c4-P7WRUT57Ym$8y9-C9&7{2ruyH2s zB!G)EsmK`SGKKWdW$OX{rEj&!&R~5I09BUOtCT z6f?iufy_5(wwU?!4Vo!{$hma90Ltdlu>v6TXpR78&%?0cvG~nAIz#~7=hFcKcym4- zDS-dX*SV4w&{xDv)dHF(fQAJ$!SRuc4OmED6~Wf{-HN@xkoF741^N`bM-?o>R+kU{ zN^SpD7bIn~6N{*?`g5U0y_IEPH)c^qu)H_7DC!)3MdYKCogVMi1@8D=W)Tf%MT@9a zkZwLVsFxl`t z+EPkfp=-9nlU3yDOEuxcj~DizC;XJqwUk}SqwR!e9&-MPrD{S&YqhS-h4sqU*`2yr zJ2&2|3u-Hox)ZC&r&{sbosS~k*{c?u3W!KeBvNx(`=ZLdxpZ#Y#yj2sd_*`&xhWV} zrve%%6iF>WDV}FgN)+2zpo`wYC%0U}$so1&C58Q3plf%3ko(2g(K~$bp5FeeE?D^m zSA_LhOxp@y+A;hd;CSF}=ypr3zY+M1%8n))8|j`725$It&JfSy7wFoxd=s_%{u?eu zjrzu$O?p$`lzfgzzO5$2G*>fjo7v$vb&gLtV!0jR!#tPh_VnnnXLFSqfDh-mNcv3| z(@Z`3gON?Ob2QQHZ`&LHge43k{_OA)`>IQD)m&}1bSVuJ{*GLVlfN_wQk-;SKFf4n zlGJl+RIKKF;JdZsS6whJl7+0G{wyPgxUi3x>hx1CCY8ovw(#McF+ICz^v6uZ#4dFv z1{SbP*C~7%E`KOi!|zy1yx8W|x;&>>1jp(k@?O*E?XL`4PTd^)eV=W(S$sI_YZ#}| zACE_U9riVLJ%7)xA0}$_KgSbA=p#I}W;v?5WU@n)wR8%;vD{v+Z}uaO{(6Q3?ZXDV zrK3i^g{a{R9VmrWBI=p23~?CA!_upS(}zA2m-%s*PVL2T=fk;6y95vz3ZGghJa^}qg|$gyo_ zjGx_k3qz8RZcdW3o^qy8cDVufrCp&eFJmpr>rGj~8rqx%TO~I(w@@$G4+lS{0YO*~ zaPUuI_Fr|uf<}5@7A;F2tPYt(7~C&JdG|wgd8_p_8nhZ`Q+qpB@nzFy<68m`<{(}G z?V|bJx%>=g`D>`RXj$1$tMx@5qc~FHz;s}{*XtNjWxJ>Fg*pg*-d}?&PZtvbEIk!f zjb*y&<|0HbPUo}%tnQ#>VbP-`1MA6jS(zMvD1$Xyi}>;)>MFL*r_FB9KKL&VAHACB zU0v{V2J2i*BiX2Y^kPGi9>O1uzMFQdB>*3to#+c^a{Asv>eZlm^_Q`s<70>u)!|~U zKpb=5CV8`4#k$njuXV^zLs~wZwRpIh(trq`lzTh{8v?c)1>w<)UP zuoTG5>vZP?u0wqNfhMdeJ0(vx*^V#Z_@Yl7_#k%tl+>Kvwd0A3_?in%@DA1~9QDx` z8mjIKsac~`p>V`?_CW~^VdfG&5S^NQC+AEMq|uI^U#l98jXvNTc;m0Sp!_Bm{ep1$ z?#TncdVoki_zSN6S6y)MCr;Il)!mg$Z0cRf@Jy8%-^ItP5YKNuoKxfJJ&iZxI)>VN z>veA)UC+JwUx6wur9nc0*|mSnT!z({59d@}{8SSX{Y0`pv-a`KSAHwP?B;`iUbp|M z3r0vRqzu!z_`Vd(u9fO*-z~+p6HFSa@=M9LF#`rNL`8JvC_k2OkNYA%n<-|yuv?6_ z7TVseIyvfjtigOdXI~TV)dd6MxPbm5;wF?K@AcRD1ZHI!4H3LU!YW+xZ%h8yC4LBL z{8bkm&E%wktfD72_dXkRmBTlnlvQ}laJ_m3Q45`I8`$*?dKCFBzEc>0p9}eLJ^*(w(3n!@%L)rC*E6I?IV$10L_^^VR!3pr zMm{@utv1{6AL7e?DA%=q%28uBX{hwgXi(556!_9c9Y1O#;?Heug5QiNVbIabc_h6p z03RKl4BA+w(Jy=(Mg49UW!{9M5-m_NrVax0W_-{tkuxlDBbu$eWx_viN4bS%k;KMna27T`!ZN?hDLj@hqj$Rb< z+G*S!KAex?puaSx;}7sN3D-(25_+J%_7hxy4`=!q1G`d*VNkpryQ6eAuCZM)Er5=BK&0vGK;u2kB9TaXXJ(gM<1NaEXcZzf1y`C|u zooo2&JD5LwILB$#US$^upIOrbqN85A1$$I7`_!vWb)i zyA;1FJnn6*z6zV_x@_zQ##HIsHWx9H-!zR0-{HN)j+HH||MqvRj(lkUbZOgGT`;tf V)X1Jp!NL< - + From d5199db8ec1684a50f067387f3722d123496a64b Mon Sep 17 00:00:00 2001 From: David Roazen Date: Tue, 17 Jan 2012 16:26:16 -0500 Subject: [PATCH 29/36] Be explicit about setting the snpEff -onlyCoding option in the pipeline When run without an explicit -onlyCoding option, as we've been doing up to now, snpEff automatically sets -onlyCoding to "true" provided that there is at least one transcript marked as "protein_coding", which will always be the case for us in practice (and indeed, all pipeline runs so far with snpEff 2.0.5 have run with -onlyCoding auto-set to "true"). However, given the disastrous effect on annotation quality setting "-onlyCoding false" has, we wish to be explicit with this option rather than relying on snpEff's auto-detection logic. --- .../broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala index 259856c17..8660ea757 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala @@ -47,12 +47,16 @@ class SnpEff extends JavaCommandLineFunction { @Argument(doc="verbose", required=false) var verbose = true + @Argument(doc="onlyCoding", required=false) + var onlyCoding = true + @Output(doc="snp eff output") var outVcf: File = _ override def commandLine = super.commandLine + required("eff") + conditional(verbose, "-v") + + required("-onlyCoding", onlyCoding.toString) + optional("-c", config) + required("-i", "vcf") + required("-o", "vcf") + From 0c7865fdb57fc44708fcd43b24e508b7c6260bba Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 17 Jan 2012 19:18:04 -0500 Subject: [PATCH 30/36] UnitTest for reverseAlleleClipping -- No code modified yet, just implementing a unit test to ensure correctness of the existing code --- .../utils/codecs/vcf/AbstractVCFCodec.java | 1 + .../utils/codecs/vcf/VCFCodecUnitTest.java | 91 +++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFCodecUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 836ba22bf..4726b4e00 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -617,6 +617,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { return clipping; } + /** * clip the alleles, based on the reference * diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFCodecUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFCodecUnitTest.java new file mode 100644 index 000000000..7681ed7d1 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFCodecUnitTest.java @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.utils.codecs.vcf; + + +// the imports for unit testing. + + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.variantcontext.*; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + + +public class VCFCodecUnitTest extends BaseTest { + + // -------------------------------------------------------------------------------- + // + // Provider + // + // -------------------------------------------------------------------------------- + + private class AlleleClippingTestProvider extends TestDataProvider { + final String ref; + final List alleles = new ArrayList(); + final int expectedClip; + + private AlleleClippingTestProvider(final int expectedClip, final String ref, final String ... alleles) { + super(AlleleClippingTestProvider.class); + this.ref = ref; + for ( final String allele : alleles ) + this.alleles.add(Allele.create(allele)); + this.expectedClip = expectedClip; + } + + @Override + public String toString() { + return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip); + } + } + + @DataProvider(name = "AlleleClippingTestProvider") + public Object[][] MakeAlleleClippingTest() { + // pair clipping + new AlleleClippingTestProvider(0, "ATT", "CCG"); + new AlleleClippingTestProvider(1, "ATT", "CCT"); + new AlleleClippingTestProvider(2, "ATT", "CTT"); + new AlleleClippingTestProvider(2, "ATT", "ATT"); // cannot completely clip allele + + // triplets + new AlleleClippingTestProvider(0, "ATT", "CTT", "CGG"); + new AlleleClippingTestProvider(1, "ATT", "CTT", "CGT"); // the T can go + new AlleleClippingTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go + + return AlleleClippingTestProvider.getTests(AlleleClippingTestProvider.class); + } + + + @Test(dataProvider = "AlleleClippingTestProvider") + public void TestAlleleClipping(AlleleClippingTestProvider cfg) { + int result = AbstractVCFCodec.computeReverseClipping(cfg.alleles, cfg.ref, 0, 1); + Assert.assertEquals(result, cfg.expectedClip); + } +} \ No newline at end of file From 763c81d52002cb3e6af5310951b49bc03fde54ef Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 17 Jan 2012 21:06:02 -0500 Subject: [PATCH 31/36] No longer enforce MAX_ALLELE_SIZE in VCF codec -- Instead issue a warning when a large (>1MB) record is encountered -- Optimized ref.getBytes()[i] => (byte)ref.charAt(i), which avoids an implicit O(n) allocation each iteration through computeReverseClipping() --- .../sting/utils/codecs/vcf/AbstractVCFCodec.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 4726b4e00..1bdee802b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -18,7 +18,7 @@ import java.util.zip.GZIPInputStream; public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { - public final static int MAX_EXPLICIT_ALLELE_SIZE = (int)Math.pow(2, 16); + public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20); protected final static Logger log = Logger.getLogger(VCFCodec.class); protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column @@ -522,8 +522,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { if ( allele == null || allele.length() == 0 ) generateException("Empty alleles are not permitted in VCF records", lineNo); - if ( MAX_EXPLICIT_ALLELE_SIZE != -1 && allele.length() > MAX_EXPLICIT_ALLELE_SIZE ) - generateException(String.format("Allele detected with length %d, exceeding max size %d. Please remove this from the VCF file before continuing", allele.length(), MAX_EXPLICIT_ALLELE_SIZE), lineNo); + if ( MAX_ALLELE_SIZE_BEFORE_WARNING != -1 && allele.length() > MAX_ALLELE_SIZE_BEFORE_WARNING ) + log.warn(String.format("Allele detected with length %d exceeding max size %d at approximately line %d, likely resulting in degraded VCF processing performance", allele.length(), MAX_ALLELE_SIZE_BEFORE_WARNING, lineNo)); if ( isSymbolicAllele(allele) ) { if ( isRef ) { @@ -576,12 +576,13 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { public static int computeForwardClipping(List unclippedAlleles, String ref) { boolean clipping = true; + final byte ref0 = (byte)ref.charAt(0); for ( Allele a : unclippedAlleles ) { if ( a.isSymbolic() ) continue; - if ( a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0]) ) { + if ( a.length() < 1 || (a.getBases()[0] != ref0) ) { clipping = false; break; } @@ -608,7 +609,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { stillClipping = false; else if ( ref.length() == clipping ) generateException("bad alleles encountered", lineNo); - else if ( a.getBases()[a.length()-clipping-1] != ref.getBytes()[ref.length()-clipping-1] ) + else if ( a.getBases()[a.length()-clipping-1] != ((byte)ref.charAt(ref.length()-clipping-1)) ) stillClipping = false; } if ( stillClipping ) From 11982b5a34e20e350cd1e8d04b27c67610d5a143 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 18 Jan 2012 09:42:41 -0500 Subject: [PATCH 35/36] We no longer calculate the population-level TDT statistic if there are fewer than 5 trios with full genotype likelihood information. When there is a high degree of missingness the results are skewed or in the worst case come out as NaN. --- .../sting/gatk/traversals/TraverseActiveRegions.java | 4 ++-- .../walkers/annotator/TransmissionDisequilibriumTest.java | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 01bfe396a..384affcb7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -147,13 +147,13 @@ public class TraverseActiveRegions extends TraversalEngine= maxOverlap ) { - maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap(readLoc); + maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( readLoc ); bestRegion = otherRegionToTest; } } bestRegion.add( (GATKSAMRecord) read, true ); - // The read is also added to all other region in which it overlaps but marked as non-primary + // The read is also added to all other regions in which it overlaps but marked as non-primary if( !bestRegion.equals(activeRegion) ) { activeRegion.add( (GATKSAMRecord) read, false ); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java index ecdde1e4f..43d5f0b28 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java @@ -8,7 +8,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -18,7 +17,7 @@ import java.util.*; /** * Created by IntelliJ IDEA. - * User: rpoplin + * User: rpoplin, lfran * Date: 11/14/11 */ @@ -28,6 +27,7 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen private final static int REF = 0; private final static int HET = 1; private final static int HOM = 2; + private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( trios == null ) { @@ -50,7 +50,9 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen } } - toRet.put("TDT", calculateTDT( vc, triosToTest )); + if( triosToTest.size() >= MIN_NUM_VALID_TRIOS ) { + toRet.put("TDT", calculateTDT( vc, triosToTest )); + } return toRet; } From 60024e0d7b08907ef4ffe4781c398ceaf1b51b89 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 18 Jan 2012 09:52:50 -0500 Subject: [PATCH 36/36] updating TDT integration test --- .../gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 174a46bdd..14f7457b8 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -171,7 +171,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testTDTAnnotation() { - final String MD5 = "204e67536a17af7eaa6bf0a910818997"; + final String MD5 = "0aedd760e8099f0b95d53a41bdcd793e"; WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" + " -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,