From f3c94a3c8751b4766e6842b78aeeb9e2ad1e776b Mon Sep 17 00:00:00 2001 From: David Roazen Date: Wed, 1 May 2013 15:48:05 -0400 Subject: [PATCH 001/116] Update expected test output for Java 7 -Changes in Java 7 related to comparators / sorting produce a large number of innocuous differences in our test output. Updating expectations now that we've moved to using Java 7 internally. -Also incorporate Eric's fix to the GATKSAMRecordUnitTest to prevent intermittent failures. --- .../VariantAnnotatorIntegrationTest.java | 12 ++++----- ...perGeneralPloidySuite1IntegrationTest.java | 4 +-- ...perGeneralPloidySuite2IntegrationTest.java | 6 ++--- ...dGenotyperIndelCallingIntegrationTest.java | 6 ++--- ...GenotyperNormalCallingIntegrationTest.java | 4 +-- ...dGenotyperReducedReadsIntegrationTest.java | 2 +- ...lexAndSymbolicVariantsIntegrationTest.java | 6 ++--- .../HaplotypeCallerIntegrationTest.java | 16 ++++++------ .../DepthOfCoverageIntegrationTest.java | 26 +++++++++---------- .../utils/crypt/GATKKeyIntegrationTest.java | 2 +- .../utils/sam/GATKSAMRecordUnitTest.java | 3 ++- 11 files changed, 44 insertions(+), 43 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 5866075a7..961a28bcf 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -78,7 +78,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("fbfbd4d13b7ba3d76e8e186902e81378")); + Arrays.asList("42889072698af972f2004ccfe8eae15e")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -86,7 +86,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("19aef8914efc497192f89a9038310ca5")); + Arrays.asList("213560f395280e6a066d0b0497ce8881")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -112,7 +112,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("4f0b8033da18e6cf6e9b8d5d36c21ba2")); + Arrays.asList("7e755bb09169699b76850e76b71a5f5a")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -120,7 +120,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("64ca176d587dfa2b3b9dec9f7999305c")); + Arrays.asList("d8089c5874ff35a7fd7e35ebd7d3b137")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } @@ -128,7 +128,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testExcludeAnnotations() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("f33f417fad98c05d9cd08ffa22943b0f")); + Arrays.asList("e17596007d0db7673d138a9ae4890e82")); executeTest("test exclude annotations", spec); } @@ -136,7 +136,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testOverwritingHeader() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1, - Arrays.asList("0c810f6c4abef9d9dc5513ca872d3d22")); + Arrays.asList("0ed4c7760f6e7a158b6d743d257300f3")); executeTest("test overwriting header", spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index 34b19ed2d..88506fda3 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -69,7 +69,7 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testBOTH_GGA_Pools() { - executor.PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "71f16e19b7d52e8edee46f4121e59f54"); + executor.PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "dac2d7969e109aee9ad2dad573759f58"); } @Test(enabled = true) @@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "603416111f34e2a735163fa97e1a8272"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "66a5a3eb657fac5c621bc0c228ea9caf"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index 8a165cbeb..e26822e07 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -58,16 +58,16 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","13de8558acaa0b9082f2df477b45de9b"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","9f960977b1b8d90ac75ba4306336553c"); } @Test(enabled = true) public void testMT_SNP_DISCOVERY_sp4() { - executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","3fc6f4d458313616727c60e49c0e852b"); + executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","296917e690616682b79229ec5f923bdb"); } @Test(enabled = true) public void testMT_SNP_GGA_sp10() { - executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "1bebbc0f28bff6fd64736ccca8839df8"); + executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "545d85ce79841f01886a5cf824a4a12c"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index 52970d70d..6b7631039 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("d8b0c5be39ec6b239641c2f2646d2bc3")); + Arrays.asList("ea4c4e020f59f48901d5820c8e4f6001")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -111,7 +111,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("16d975480ff1e689113171805b916b62")); + Arrays.asList("79f7fc64da8f25eda1f1ee139ecdd657")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -121,7 +121,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("60ed3f8d5bc3f765e6ce3fa698b68bb7")); + Arrays.asList("15508fcf61380a91b6611307f182447b")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index 8256a8496..5734d670f 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("09a1a4d4bf0289bcc5e8a958f783a989")); + Arrays.asList("21d1d3c6a50006c723cec738f19caeb6")); executeTest("test Multiple SNP alleles", spec); } @@ -112,7 +112,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("57a1bb44967988f2b7ae7779127990ae")); + Arrays.asList("314b99eb146de1fdafed872ecbe1cfc2")); executeTest("test reverse trim", spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java index f7ac87cda..5f9667cca 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java @@ -74,7 +74,7 @@ public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest { @Test public void testReducedBamINDELs() { - testReducedCalling("INDEL", "9a986b98ed014576ce923e07452447f4"); + testReducedCalling("INDEL", "19bc6a74250ec19efc4e1b4ee6515ac0"); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index d3f3a9936..90d7f493c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "27db36467d40c3cde201f5826e959d78"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "0bf5ae740bf9bd14c8d60d7849c45eb3"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "ed3b577e6f7d68bba6774a62d9df9cd9"); + "5954a46971b7546d30151b068cded42a"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "a594a28d8053c3e969c39de81a9d03d6"); + "b3684c670f68f5a3a348a7fd2b25f10a"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 50165bd01..03d3e8a17 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -80,12 +80,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "aeab5f0d40852e6332b96481981a0e46"); + HCTest(CEUTRIO_BAM, "", "2e10ab97afd4492c2a153b85871a2c2d"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "18d5671d8454e8a0c05ee5f6e9fabfe3"); + HCTest(NA12878_BAM, "", "affed81386dfe60e0b0d4e7e0525918f"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "28c3b1f276ec8198801aafe880e40fb6"); + "65188ec4e3b91796f62bfb5b965ccf1f"); } @Test @@ -112,7 +112,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "bac6f98e910290722df28da44b41f06f"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "125e91ebe43108b2b514c58a9b6d3a4f"); } private void HCTestNearbySmallIntervals(String bam, String args, String md5) { @@ -149,7 +149,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerNearbySmallIntervals() { - HCTestNearbySmallIntervals(NA12878_BAM, "", "65e7b1b72a2411d6360138049914aa3a"); + HCTestNearbySmallIntervals(NA12878_BAM, "", "2d295ce36066d9d8d9ee9c67e6e2cbd1"); } // This problem bam came from a user on the forum and it spotted a problem where the ReadClipper @@ -166,7 +166,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("cb190c935541ebb9f660f713a882b922")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("153d2251de7d22f423cd282b1505fbc0")); executeTest("HCTestStructuralIndels: ", spec); } @@ -188,7 +188,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("0df626cd0d76aca8a05a545d0b36bf23")); + Arrays.asList("0c29e4049908ec47a3159dce33d477c3")); executeTest("HC calling on a ReducedRead BAM", spec); } @@ -196,7 +196,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("8adfa8a27a312760dab50787da595c57")); + Arrays.asList("3306889b8d0735ce575bee281c1b8846")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java index 7171edf20..003ab6cf9 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java @@ -82,23 +82,23 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest { // now add the expected files that get generated spec.addAuxFile("0f9603eb1ca4a26828e82d8c8f4991f6", baseOutputFile); spec.addAuxFile("51e6c09a307654f43811af35238fb179", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts")); - spec.addAuxFile("229b9b5bc2141c86dbc69c8acc9eba6a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions")); + spec.addAuxFile("520720a88ae7608257af51bc41c06b87", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions")); spec.addAuxFile("9cd395f47b329b9dd00ad024fcac9929", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_statistics")); - spec.addAuxFile("e69ee59f447816c025c09a56e321cef8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary")); - spec.addAuxFile("fa054b665d1ae537ada719da7713e11b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics")); - spec.addAuxFile("28dec9383b3a323a5ce7d96d62712917", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary")); + spec.addAuxFile("6958004a8156f3f267caa6b04cf90f5f", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary")); + spec.addAuxFile("ebbfc9b9f4e12ac989c127061948c565", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics")); + spec.addAuxFile("e003bef6762833a5cebca25d94194616", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary")); spec.addAuxFile("a836b92ac17b8ff9788e2aaa9116b5d4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts")); - spec.addAuxFile("d32a8c425fadcc4c048bd8b48d0f61e5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions")); + spec.addAuxFile("0732b6d2db9c94b0fcf18ca1f19772a8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions")); spec.addAuxFile("7b9d0e93bf5b5313995be7010ef1f528", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics")); - spec.addAuxFile("4656c8797696cf5ef0cdc5971271236a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary")); - spec.addAuxFile("6f1d7f2120a4ac524c6026498f45295a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics")); - spec.addAuxFile("69c424bca013159942337b67fdf31ff8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary")); + spec.addAuxFile("3522f7380554b926c71a7258250c1d63", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary")); + spec.addAuxFile("2cd9d8c5e37584edd62ca6938659cf59", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics")); + spec.addAuxFile("78fdd35a63a7a4c6b3a043b946b04730", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary")); spec.addAuxFile("6909d50a7da337cd294828b32b945eb8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts")); - spec.addAuxFile("a395dafde101971d2b9e5ddb6cd4b7d0", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions")); + spec.addAuxFile("aa00e3652dd518ccbae2caa00171835b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions")); spec.addAuxFile("df0ba76e0e6082c0d29fcfd68efc6b77", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics")); - spec.addAuxFile("185b910e499c08a8b88dd3ed1ac9e8ec", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary")); - spec.addAuxFile("d5d11b686689467b5a8836f0a07f447d", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics")); - spec.addAuxFile("ad1a2775a31b1634daf64e691676bb96", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary")); + spec.addAuxFile("0ce5ebfa46b081820d013bdbbfe42d34", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary")); + spec.addAuxFile("c7c5bad6c6818995c634f350aa66fde9", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics")); + spec.addAuxFile("949c9ce745753cd98f337600d3931d09", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary")); execute("testBaseOutputNoFiltering",spec); } @@ -115,7 +115,7 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest { spec.setOutputFileLocation(baseOutputFile); spec.addAuxFile("6ccd7d8970ba98cb95fe41636a070c1c",baseOutputFile); - spec.addAuxFile("7d87783b3d98b928cac16d383ceca807",createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary")); + spec.addAuxFile("4429d33ce8836c09ba2b5ddfae2f998e",createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary")); execute("testNoCoverageDueToFiltering",spec); } diff --git a/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyIntegrationTest.java index 9c9248669..ca7314ca9 100644 --- a/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyIntegrationTest.java @@ -130,7 +130,7 @@ public class GATKKeyIntegrationTest extends WalkerTest { { "corrupt_bad_isize_field.key", UserException.UnreadableKeyException.class }, { "corrupt_bad_crc.key", UserException.UnreadableKeyException.class }, { "corrupt_no_email_address.key", UserException.UnreadableKeyException.class }, - { "corrupt_no_sectional_delimiter.key", UserException.KeySignatureVerificationException.class }, + { "corrupt_no_sectional_delimiter.key", UserException.UnreadableKeyException.class }, { "corrupt_no_signature.key", UserException.UnreadableKeyException.class }, { "corrupt_bad_signature.key", UserException.KeySignatureVerificationException.class }, { "corrupt_non_gzipped_valid_key.key", UserException.UnreadableKeyException.class } diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java index eefc92799..e9af685a6 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java @@ -42,7 +42,7 @@ public class GATKSAMRecordUnitTest extends BaseTest { GATKSAMRecord read, reducedRead; final static String BASES = "ACTG"; final static String QUALS = "!+5?"; - final private static int[] REDUCED_READ_COUNTS = new int[]{10, 20, 30, 40, 1}; + final private static int[] REDUCED_READ_COUNTS = new int[]{10, 20, 30, 40}; @BeforeClass public void init() { @@ -200,6 +200,7 @@ public class GATKSAMRecordUnitTest extends BaseTest { @Test public void testGetReducedCountsIsCorrect() { + reducedRead.setReducedReadCountsTag(REDUCED_READ_COUNTS); final int[] counts = reducedRead.getReducedReadCounts(); Assert.assertNotSame(counts, reducedRead.getAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG)); for ( int i = 0; i < counts.length; i++ ) From 4b8b411b9227a42629ee331484251689f177c778 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Wed, 1 May 2013 12:45:03 -0400 Subject: [PATCH 003/116] - Fixed a small bug in the printout of molten data in GenotypeConcordance Output didn't "mix-up" the genotypes, it outputed the same HET vs HET (e.g.) 3 times rather than the combinations of HET vs {HET, HOM, HOM_REF}, etc. This was only a problem in the text, _not_ the actual numbers, which were outputted correctly. - Updated MD5's after looking at diffs to verify that the change is what I expected. --- .../GenotypeConcordanceIntegrationTest.java | 3 +-- .../variantutils/GenotypeConcordance.java | 17 ++++++++++------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java index 2ebb1d7d8..d5f75a8cd 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java @@ -47,7 +47,6 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.WalkerTest; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.annotations.Test; import java.util.Arrays; @@ -87,7 +86,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf") + " -moltenize", 0, - Arrays.asList("370141088362d0ab7054be5249c49c11") + Arrays.asList("3993709e38b033e89017dfbb63226e94") ); executeTest("Test moltenized output",spec); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java index 35213af34..3110d25b9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -25,7 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -298,19 +301,19 @@ public class GenotypeConcordance extends RodWalker Date: Thu, 2 May 2013 09:28:40 -0400 Subject: [PATCH 004/116] Fixing the bundle script 1. someone out there busted it when adding high confidence 1000G calls 2. new path to NA12878 bam 3. updated clashing version argument --- .../sting/queue/qscripts/GATKResourcesBundle.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 55e56889a..ad83ac723 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -40,8 +40,8 @@ class GATKResourcesBundle extends QScript { @Argument(doc="liftOverPerl", required=false) var liftOverPerl: File = new File("./public/perl/liftOverVCF.pl") - @Argument(shortName = "ver", doc="The SVN version of this release", required=true) - var VERSION: String = _ + @Argument(shortName = "ver", doc="The GIT version of this release", required=true) + var BUNDLE_VERSION: String = _ @Argument(shortName = "bundleDir", doc="Path to root where resource files will be placed", required=false) val BUNDLE_ROOT = new File("/humgen/gsa-hpprojects/GATK/bundle") @@ -57,8 +57,8 @@ class GATKResourcesBundle extends QScript { val SITES_EXT: String = "sites" - def BUNDLE_DIR: File = BUNDLE_ROOT + "/" + VERSION - def DOWNLOAD_DIR: File = DOWNLOAD_ROOT + "/" + VERSION + def BUNDLE_DIR: File = BUNDLE_ROOT + "/" + BUNDLE_VERSION + def DOWNLOAD_DIR: File = DOWNLOAD_ROOT + "/" + BUNDLE_VERSION // REFERENCES class Reference( val name: String, val file: File ) { } @@ -161,7 +161,7 @@ class GATKResourcesBundle extends QScript { "1000G_phase1.indels", b37, true, false)) addResource(new Resource("/humgen/1kg/processing/official_release/phase1/projectConsensus/phase1.wgs.projectConsensus.v2b.recal.highQuality.vcf", - "1000G_phase1.snps.high_confidence, b37, true, false)) + "1000G_phase1.snps.high_confidence", b37, true, false)) addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf", "Mills_and_1000G_gold_standard.indels", b37, true, false)) @@ -182,7 +182,7 @@ class GATKResourcesBundle extends QScript { // // Test BAM file, specific to each reference // - addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.bam", + addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.NA12878.bam", "IGNORE", b37, false, false)) // From f88a964e2cbde0d9017629ae5aaf073c52f3eaae Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 2 May 2013 10:54:32 -0400 Subject: [PATCH 005/116] Adding .fai file to example fasta since we don't generate it anymore --- public/testdata/exampleFASTA.fasta.fai | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/testdata/exampleFASTA.fasta.fai b/public/testdata/exampleFASTA.fasta.fai index 1cc63a5ed..905b3aec5 100644 --- a/public/testdata/exampleFASTA.fasta.fai +++ b/public/testdata/exampleFASTA.fasta.fai @@ -1 +1,2 @@ -chr1 100000 6 60 61 +@HD VN:1.4 SO:unsorted +@SQ SN:chr1 LN:100000 UR:file:/humgen/gsa-scr1/ebanks/Sting_unstable/public/testdata/exampleFASTA.fasta M5:b52f0a0422e9544b50ac1f9d2775dc23 From 13bfa963da1a207c64f7b8a95e7ba552feaf9bb9 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Thu, 2 May 2013 12:59:20 -0400 Subject: [PATCH 006/116] Revert changes to exampleFASTA.fasta.fai for now to get tests passing again --- public/testdata/exampleFASTA.fasta.fai | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/public/testdata/exampleFASTA.fasta.fai b/public/testdata/exampleFASTA.fasta.fai index 905b3aec5..1cc63a5ed 100644 --- a/public/testdata/exampleFASTA.fasta.fai +++ b/public/testdata/exampleFASTA.fasta.fai @@ -1,2 +1 @@ -@HD VN:1.4 SO:unsorted -@SQ SN:chr1 LN:100000 UR:file:/humgen/gsa-scr1/ebanks/Sting_unstable/public/testdata/exampleFASTA.fasta M5:b52f0a0422e9544b50ac1f9d2775dc23 +chr1 100000 6 60 61 From d981fd01b88c643196bc0eb6f44035a5f7477cfc Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 2 May 2013 15:18:13 -0400 Subject: [PATCH 007/116] Now that we don't generate dict and fai files, the resource script needs to copy them to the bundle. --- .../queue/qscripts/GATKResourcesBundle.scala | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index ad83ac723..1736adc17 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -234,8 +234,7 @@ class GATKResourcesBundle extends QScript { for ( resource: Resource <- RESOURCES ) { if ( isFASTA(resource.file) ) { - val f = copyBundleFile(resource, resource.ref) - add(new createDictandFAI(f)) + copyBundleFasta(resource, resource.ref) } else if ( isBAM(resource.file) ) { val f = copyBundleFile(resource, resource.ref) add(new IndexBAM(f)) @@ -312,6 +311,20 @@ class GATKResourcesBundle extends QScript { } } + def copyBundleFasta(res: Resource, ref: Reference) { + val out = destFile(BUNDLE_DIR, ref, res.destname(ref)) + add(new cpFile(res.file, out)) + + val oldRefDict = swapExt(res.file.getParent, res.file, ".fasta", ".dict") + val newRefDict = swapExt(out.getParent, out, ".fasta", ".dict") + + val oldRefFai = swapExt(res.file.getParent, res.file, ".fasta", ".fasta.fai") + val newRefFai = swapExt(out.getParent, out, ".fasta", ".fasta.fai") + + add(new cpFile(oldRefDict, newRefDict)) + add(new cpFile(oldRefFai, newRefFai)) + } + def copyBundleFile(res: Resource, ref: Reference): File = { val out = destFile(BUNDLE_DIR, ref, res.destname(ref)) add(new cpFile(res.file, out)) @@ -389,13 +402,5 @@ class GATKResourcesBundle extends QScript { else return ""; } - - class createDictandFAI (@Input ref: File) extends FastaStats with UNIVERSAL_GATK_ARGS { - @Output val outDict: File = swapExt(ref.getParent, ref, ".fasta", ".dict") - @Output val outFai: File = swapExt(ref.getParent, ref, ".fasta", ".fasta.fai") - @Output val outStats: File = swapExt(ref.getParent, ref, ".fasta", ".stats") - this.reference_sequence = ref - this.out = outStats - } } From 0c30a5ebc614cbeb377a50f0c7ff3af7a842b5c6 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Wed, 1 May 2013 15:25:23 -0400 Subject: [PATCH 008/116] Rev'd up Picard to get PL fix: PLs were saturated to 32767 (Short.MAX_VALUE) when converting from GL to integers. Increase capping to Integer.MAX_VALUE (2^31-1) which should be enough for reasonable sites now. Integration tests change because some tests have some hyper-deep pileups where this case was hit --- ...perGeneralPloidySuite2IntegrationTest.java | 6 +++--- ...GenotyperNormalCallingIntegrationTest.java | 4 ++-- ...lexAndSymbolicVariantsIntegrationTest.java | 4 ++-- .../HaplotypeCallerIntegrationTest.java | 2 +- ...nt-1.90.1442.jar => variant-1.90.1446.jar} | Bin 556173 -> 556173 bytes ...nt-1.90.1442.xml => variant-1.90.1446.xml} | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) rename settings/repository/org.broadinstitute/{variant-1.90.1442.jar => variant-1.90.1446.jar} (96%) rename settings/repository/org.broadinstitute/{variant-1.90.1442.xml => variant-1.90.1446.xml} (71%) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index e26822e07..ecc1ab66e 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -58,16 +58,16 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","9f960977b1b8d90ac75ba4306336553c"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","5eabc12fc7b4f9749e6d1be0f5b45d14"); } @Test(enabled = true) public void testMT_SNP_DISCOVERY_sp4() { - executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","296917e690616682b79229ec5f923bdb"); + executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","71b7b9eac1e4a9b2b7e8c3689d1f29ec"); } @Test(enabled = true) public void testMT_SNP_GGA_sp10() { - executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "545d85ce79841f01886a5cf824a4a12c"); + executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "fc36e925e269b035d4b27edb661be06b"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index 5734d670f..38ab8ee36 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("21d1d3c6a50006c723cec738f19caeb6")); + Arrays.asList("1ab95513a3abb5b760578831c61ef94b")); executeTest("test Multiple SNP alleles", spec); } @@ -120,7 +120,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMismatchedPLs() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, - Arrays.asList("3011c20165951ca43c8a4e86a5835dbd")); + Arrays.asList("94bfccbd06043e90ae1b1c66fc3afe07")); executeTest("test mismatched PLs", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 90d7f493c..9d4c52798 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "5954a46971b7546d30151b068cded42a"); + "7d2cc5c4ece386beedf6b07dfbe5bf26"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "b3684c670f68f5a3a348a7fd2b25f10a"); + "a17856f709b546eaed486841d78248d2"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 03d3e8a17..d5e163a88 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "65188ec4e3b91796f62bfb5b965ccf1f"); + "e2d32d0dce2c5502a8e877f6bbb65a10"); } @Test diff --git a/settings/repository/org.broadinstitute/variant-1.90.1442.jar b/settings/repository/org.broadinstitute/variant-1.90.1446.jar similarity index 96% rename from settings/repository/org.broadinstitute/variant-1.90.1442.jar rename to settings/repository/org.broadinstitute/variant-1.90.1446.jar index cf06f592eea3a230403c301e1d09305cdb76ebe9..9ca3c4f38aa4910227f0f704183965f3be8f1431 100644 GIT binary patch delta 3147 zcmYL~dr%eE9mntPp51c*d0fy7hN6&)LG1;uqGBKyxhRB(2=Y)K@)S_GJVjLS0VY$M z0*wUBLW;3AGeKi)RAN>lHMU04F(aX^#Bn+;zQV*dC=;nAW6lLE(r~Q9HHbPl9J2^>4MNqZih8WP9Lfre`Rr0Cb>#WPiAon(=HNKtcUJ zp6Kq5Z=FCH+5eEkz#9^G{;d6N7cgz=LDTB@eCC3ZGbnRY@eqcmjTpFM{re|g;1l?y znCEGII>aGk+8~3e>c`h)S9|sDUE^VXM5puX40#f&1rs3{Jhgl#lySN~OUT#TgR~{{ zgi8|8M1as11hh9nsDf72iBusHB;y954GK^PON6x`ky61U5Xie#5d49BPYRV#u-5vC zFcS`1`(vShF;L74qQAuK6&@VlcvP$K6AJ_-`kym1GncfHd7>gn`)6jfJM+a03&FoT zRy+-@X^BbV6z}b=cUp$0Y&$cho*t4-YC)Okq5WoqIEO*R``Mx?8tBe%#XUSr)}O`4 znB|&Cd9N8a#J|h}zs0x3Y6YnGA7YH6w^wI8t@xW|ozDKmtNM?n2-V}D&9ld8QvE8>Eb333c)ZX^m!bCNmNkKAYE{-DflOaYKt6InS4f^dwO{%RU3K@GRuO zk!$yD&iA>q-j_V(&TKxJfU~Mi`;!N*_G;C!V3Gu7hlG%Fo~d^kc_9F_fppT&1L8Bu zRVNsf<=FV>VcSi$KS^pKkzY}pI||*? zqYdO954g}mlKBam)lS0s9>w(#cYeOI2FNM6Pny$ZVw??2ton*bJT~esIqw15_md>k z6DWiF}3MgX(m7Fbc8+cgf>?HJ=mm z89s(DK9Ch2aqAEAcYLZh&dPUq=Hut(d``|6-(G1%Lf|!P)XrK z?s=@N;5&LFHzR?1>U8$Us1|-m13TSck~Qb&iZ`GA0$2JfFS=S!GkMY1yr~2qW9{Y& zD)Vp9XdLb2R%C{7Qy-MA+t~6@mXW&{cjpcFr zVp_T!N^b0?e{usd_fbz7a{at_!KlIZ%`i|k#FYlU74Gu(BI=}mw9_`&-+f5^DR_;f zdYLC}v!5iYd#UiE_N1TA;rqr0Xf!`1NmnTmz+3*tcHeD(D2Z|J<{|nLAN&Wy^dmmy z+!5NThrP8vij4ObbUOR<3XDp9^J_X=TXmlXao;TuXf1qTwcF2VlOJ@DIft#})RB@ju?MRU%*+fB7{y`%s-jpTz+e>11@K+X ziUC?yvuc3vRSI_I*eYXK9l*U9 zRu7Q12H{Hw#I0q!!E|{onqH1&<)HM%vTXofaV!U*)&au~FusbWRtJ3JfVg-p!x@)I!Xj|mJua_X!^?65MCWx!fj5?LX@q9g_vKMJKbd)508_`lTF?>)zKFGe>2MK7{j8X^fpW4K7hSxCy$Qd2$@YEWX0Lr^HI8~ooRsc#aoRncFep=ytpT=6c-IBlUO#q#X#`95B2C!&f!nT8A6hX2?9%fJZ%h zC;EK86E9iNF08P17t06dZ+D?7bvK5e*^O&?H?pnZbD$BA@l+!#1tqTu+quw$hi%Co GR`fs6;S~V@ delta 3147 zcmYL~c~Di?9mnr-?mLH_2l~KJ6cRB{En7r|KpyZ=2#W}^L>5^E1Rkr13NBzWO;eze zfVq%pY?_&%F*YhOR{}LQMA0!Lp{>MmGA(X#VjGl+)DknX_x#Rz%^!E}XTHDX{C>Z4 z&b?i8{#|qaE@{a!auft22&0w#-7aSQd1b$K=QkeRQJG>`clW%I6{`bFLRJNatq2PY z4PCb)A|%4k99v&+EAvaNEe%@g;^^Y|OO9bZ$JIqLeLV`2M=JY!%;G9|7mJZp9y}~s zM1U!&O?K54tF5>3xkxMskytIdEcy4OA-BPI4276_opN0o2j0IY(kLPdotv z9+V=0j)!9mvgS+*D78PothngTR=3`}VZKNZe0zc)glIuX3<6KBkcbtGE;);ZMjE6o zUnriFfu{Y%PBX1T+nprVVydar>0%TZ12>4BP=Gp8Cawi9R4%#)0C{zYq93^V920Ax zV6Ed5@kuyXyY7pF%fK4ox1L^jBx7ZrL@d&H!}qA5xsbNOwQ*gu&0E2m*Z)RpZBUr0V{v?4+%_ zBc&?f`|3TZxDV)G`{endu%i629LusNXmV2oXlEbD{Y=Y!C|lX=5C2zQ%joV;G6Fh1 zjiQ(osGvavo49q#-88KaWLlm1NnWFcI-(~s7(QD#m^+{w2AHAU6`f&AM0ucSmSv9# zSy>je@6g3ty7MjOZ1h17nKN00rbVc6&JW#op`=a)p%jpUgHaXBbZ`ZF${)0$Ow`2! z60^|-2P54^>$`U+59l|QJ}GK3QeIHodJxgdiqKI3;Od_{CS9 zE+9s2vKifE0mJPmm7O5xE)?zzNfP>z8#`Y)L+H2zXbz(&a2`;^*GOiu(KpZ;chG*E zLD?Qa8A!=s`Hs3NzO1QBo=O=z5upo|Q_#Ei?GoiIvkoj*&YK~&E>jta1`6s`?y}Xq zk18kG7{2^KF|dfMe^P#6Q@wmjxy~{lIinOZay+Z-XLM-P9`XK|g4u~ZF|O=YU_gtb zcK`gYerxHojt(eHZJSg~s-KM*wEb6=V{9?Yb)~}qMSVG?TwWUt1C769 zNMl3pzi(K@cJy+7Ruc9Q1o{N3#UJ8;9=B%|&GE6pi_Lzq3x1ImU1!AEtmungSY8Cl zudKof`vy%W;2vhRrQ*LKS9-L5^zEz)aS((M2lYk*Hmhx!*j??ofZep-3>*sIRPACu zj%Rn*Sc=P6Le`c({1;as%K-LJAlEYo7f+h>Z-xo0!7kYRM!3_PORkIFe>%+Z}B`W;ubbD-_5si1AJh$YmacNFLaPSpR8uq z{U&mcO>(ynIsLS({}4qY@qP-Ec+{Vh2z=yJ=VZM@>Qj?UvY?*x$)p%yX$pZ0pmwK_3h;TH z!Y5U{j+c669qIi4zf3Jj<%M5K&4T07HlHB z0QxqO?EoGHdyhUQd!hd8eLF)w_WdfzsN*@5I*)`c - + From 2bcbdd469f1c7a48a0c933e652d1c488274280b0 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 19 Apr 2013 16:11:29 -0400 Subject: [PATCH 010/116] leftAlignCigarSequentially now supports haplotypes with insertions and deletions where the deletion allele was previously removed by the leftAlignSingleIndel during it's cleanup phase. --- .../haplotypecaller/DeBruijnAssembler.java | 14 +++++++------- .../DeBruijnAssemblerUnitTest.java | 18 +++++++++++++----- .../sting/utils/sam/AlignmentUtils.java | 7 ++++--- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java index 12a4841bf..5a5946183 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java @@ -410,9 +410,6 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { // extend partial haplotypes which are anchored in the reference to include the full active region h = extendPartialHaplotype(h, activeRegionStart, refWithPadding); final Cigar leftAlignedCigar = leftAlignCigarSequentially(AlignmentUtils.consolidateCigar(h.getCigar()), refWithPadding, h.getBases(), activeRegionStart, 0); - if( leftAlignedCigar.getReferenceLength() != refHaplotype.getCigar().getReferenceLength() ) { // left alignment failure - continue; - } if( !returnHaplotypes.contains(h) ) { h.setAlignmentStartHapwrtRef(activeRegionStart); h.setCigar(leftAlignedCigar); @@ -548,9 +545,8 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { final CigarElement ce = cigar.getCigarElement(i); if (ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I) { cigarToAlign.add(ce); - for( final CigarElement toAdd : AlignmentUtils.leftAlignIndel(cigarToAlign, refSeq, readSeq, refIndex, readIndex, false).getCigarElements() ) { - cigarToReturn.add(toAdd); - } + final Cigar leftAligned = AlignmentUtils.leftAlignSingleIndel(cigarToAlign, refSeq, readSeq, refIndex, readIndex, false); + for ( final CigarElement toAdd : leftAligned.getCigarElements() ) { cigarToReturn.add(toAdd); } refIndex += cigarToAlign.getReferenceLength(); readIndex += cigarToAlign.getReadLength(); cigarToAlign = new Cigar(); @@ -563,7 +559,11 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { cigarToReturn.add(toAdd); } } - return cigarToReturn; + + final Cigar result = AlignmentUtils.consolidateCigar(cigarToReturn); + if( result.getReferenceLength() != cigar.getReferenceLength() ) + throw new IllegalStateException("leftAlignCigarSequentially failed to produce a valid CIGAR. Reference lengths differ. Initial cigar " + cigar + " left aligned into " + result); + return result; } /** diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java index e6dea4d11..e1559a13a 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java @@ -52,10 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; * Date: 3/27/12 */ -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.*; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.DeBruijnGraph; import org.broadinstitute.sting.utils.haplotype.Haplotype; @@ -125,6 +122,17 @@ public class DeBruijnAssemblerUnitTest extends BaseTest { } } + @Test(enabled = true) + public void testLeftAlignCigarSequentiallyAdjacentID() { + final String ref = "GTCTCTCTCTCTCTCTCTATATATATATATATATTT"; + final String hap = "GTCTCTCTCTCTCTCTCTCTCTATATATATATATTT"; + final Cigar originalCigar = TextCigarCodec.getSingleton().decode("18M4I12M4D2M"); + + final Cigar result = new DeBruijnAssembler().leftAlignCigarSequentially(originalCigar, ref.getBytes(), hap.getBytes(), 0, 0); + logger.warn("Result is " + result); + Assert.assertEquals(originalCigar.getReferenceLength(), result.getReferenceLength(), "Reference lengths are different"); + } + private static class MockBuilder extends DeBruijnGraphBuilder { public final List addedPairs = new LinkedList(); @@ -165,7 +173,7 @@ public class DeBruijnAssemblerUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @Test(dataProvider = "AddReadKmersToGraph") + @Test(dataProvider = "AddReadKmersToGraph", enabled = ! DEBUG) public void testAddReadKmersToGraph(final String bases, final int kmerSize, final List badQualsSites) { final int readLen = bases.length(); final DeBruijnAssembler assembler = new DeBruijnAssembler(); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index e48d1ca4c..59a22c550 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -664,7 +664,7 @@ public final class AlignmentUtils { if ( numIndels == 0 ) return cigar; if ( numIndels == 1 ) - return leftAlignSingleIndel(cigar, refSeq, readSeq, refIndex, readIndex); + return leftAlignSingleIndel(cigar, refSeq, readSeq, refIndex, readIndex, true); // if we got here then there is more than 1 indel in the alignment if ( doNotThrowExceptionForMultipleIndels ) @@ -709,10 +709,11 @@ public final class AlignmentUtils { * @param readSeq read sequence * @param refIndex 0-based alignment start position on ref * @param readIndex 0-based alignment start position on read + * @param cleanupCigar if true, we'll cleanup the resulting cigar element, removing 0 length elements and deletions from the first cigar position * @return a non-null cigar, in which the single indel is guaranteed to be placed at the leftmost possible position across a repeat (if any) */ @Ensures("result != null") - public static Cigar leftAlignSingleIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex) { + public static Cigar leftAlignSingleIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex, final boolean cleanupCigar) { ensureLeftAlignmentHasGoodArguments(cigar, refSeq, readSeq, refIndex, readIndex); int indexOfIndel = -1; @@ -751,7 +752,7 @@ public final class AlignmentUtils { cigar = newCigar; i = -1; if (reachedEndOfRead) - cigar = cleanUpCigar(cigar); + cigar = cleanupCigar ? cleanUpCigar(cigar) : cigar; } if (reachedEndOfRead) From f5a301fb6354f0f3fad6cd01bd1c154ee15a1086 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 23 Apr 2013 18:26:20 -0400 Subject: [PATCH 011/116] Bugfix for AlignmentUtils.trimCigarByBases -- Previous version would trim down 2M2D2M into 2M if you asked for the first 2 bases, but this can result in incorrect alignment of the bases to the reference as the bases no longer span the full reference interval expected. Fixed and added unit tests --- .../org/broadinstitute/sting/utils/sam/AlignmentUtils.java | 4 ++-- .../sting/utils/sam/AlignmentUtilsUnitTest.java | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index 59a22c550..fa35e3f53 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -934,7 +934,7 @@ public final class AlignmentUtils { */ public static Cigar trimCigarByBases(final Cigar cigar, final int start, final int end) { if ( start < 0 ) throw new IllegalArgumentException("Start must be >= 0 but got " + start); - if ( end < start ) throw new IllegalArgumentException("End " + end + " is < start start " + start); + if ( end < start ) throw new IllegalArgumentException("End " + end + " is < start = " + start); if ( end > cigar.getReadLength() ) throw new IllegalArgumentException("End is beyond the cigar's read length " + end + " for cigar " + cigar ); final Cigar result = trimCigar(cigar, start, end, false); @@ -962,7 +962,7 @@ public final class AlignmentUtils { int pos = 0; for ( final CigarElement elt : cigar.getCigarElements() ) { - if ( pos > end ) break; + if ( pos > end && (byReference || elt.getOperator() != CigarOperator.D) ) break; switch ( elt.getOperator() ) { case D: diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java index 2a2d80206..e7d54c460 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java @@ -792,7 +792,8 @@ public class AlignmentUtilsUnitTest { tests.add(new Object[]{"2M2D2I", 3, 3, "1I"}); tests.add(new Object[]{"2M2D2I", 2, 2, "2D1I"}); tests.add(new Object[]{"2M2D2I", 1, 2, "1M2D1I"}); - tests.add(new Object[]{"2M2D2I", 1, 1, "1M"}); + tests.add(new Object[]{"2M2D2I", 0, 1, "2M2D"}); + tests.add(new Object[]{"2M2D2I", 1, 1, "1M2D"}); return tests.toArray(new Object[][]{}); } From 0587a145bfdaca6fcca2acae9a259cde3997fa76 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 23 Apr 2013 18:28:41 -0400 Subject: [PATCH 012/116] Utils.dupString should allow 0 number of duplicates to produce empty string --- public/java/src/org/broadinstitute/sting/utils/Utils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index ff0ea958c..97a91179e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -294,7 +294,7 @@ public class Utils { */ public static String dupString(final String s, int nCopies) { if ( s == null || s.equals("") ) throw new IllegalArgumentException("Bad s " + s); - if ( nCopies < 1 ) throw new IllegalArgumentException("nCopies must be >= 1 but got " + nCopies); + if ( nCopies < 0 ) throw new IllegalArgumentException("nCopies must be >= 0 but got " + nCopies); final StringBuilder b = new StringBuilder(); for ( int i = 0; i < nCopies; i++ ) From f42bb86bdde92a6439284f77d9c378868ac32bc6 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 24 Apr 2013 16:01:43 -0400 Subject: [PATCH 013/116] e# This is a combination of 2 commits. Only try to clip adaptors when both reads of the pair are on opposite strands -- Read pairs that have unusual alignments, such as two reads both oriented like: <----- <----- where previously having their adaptors clipped as though the standard calculation of the insert size was meaningful, which it is not for such oddly oriented pairs. This caused us to clip extra good bases from reads. -- Update MD5s due change in adaptor clipping, which add some coverage in some places --- .../ReduceReadsIntegrationTest.java | 2 +- .../ErrorRatePerCycleIntegrationTest.java | 2 +- ...perGeneralPloidySuite2IntegrationTest.java | 6 ++--- ...dGenotyperIndelCallingIntegrationTest.java | 10 ++++---- ...GenotyperNormalCallingIntegrationTest.java | 2 +- ...lexAndSymbolicVariantsIntegrationTest.java | 4 +-- .../HaplotypeCallerIntegrationTest.java | 2 +- .../sting/utils/sam/ReadUtils.java | 15 +++++++++-- .../coverage/CallableLociIntegrationTest.java | 8 +++--- .../sting/utils/sam/ReadUtilsUnitTest.java | 25 +++++++++++++++++++ 10 files changed, 56 insertions(+), 20 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index b5963498a..15b54dbd1 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -259,7 +259,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { public void testDivideByZero() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s "; // we expect to lose coverage due to the downsampling so don't run the systematic tests - executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("1663f35802f82333c5e15653e437ce2d"))); + executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("c459a6153a17c2cbf8441e1918fda9c8"))); } /** diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java index b435fc2eb..84020e2d0 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java @@ -57,7 +57,7 @@ public class ErrorRatePerCycleIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T ErrorRatePerCycle -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-10,100,000 -o %s", 1, - Arrays.asList("dccdf3cb3193d01a1a767097e4a5c35e")); + Arrays.asList("6191340f0b56ee81fb248c8f5c913a8e")); executeTest("ErrorRatePerCycle:", spec); } } \ No newline at end of file diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index ecc1ab66e..86961988d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -58,16 +58,16 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","5eabc12fc7b4f9749e6d1be0f5b45d14"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","9f960977b1b8d90ac75ba4306336553c"); } @Test(enabled = true) public void testMT_SNP_DISCOVERY_sp4() { - executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","71b7b9eac1e4a9b2b7e8c3689d1f29ec"); + executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","eb8b008a463f9fa9ad0155bf2f5b78b3"); } @Test(enabled = true) public void testMT_SNP_GGA_sp10() { - executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "fc36e925e269b035d4b27edb661be06b"); + executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "fb988e9b93bc73b5e532584c83cac833"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index 6b7631039..856e97ebe 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("ea4c4e020f59f48901d5820c8e4f6001")); + Arrays.asList("19f77f557150905ef3fa4713f611a1b9")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -101,7 +101,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("8d9b8f8a1479322961c840e461b6dba8")); + Arrays.asList("bb3dbad9666ebf38d338f0c9c211a42e")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -111,7 +111,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("79f7fc64da8f25eda1f1ee139ecdd657")); + Arrays.asList("8052390ca2b6a57c3ddf379a51225d64")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -121,7 +121,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("15508fcf61380a91b6611307f182447b")); + Arrays.asList("b6b9dba97fbabaeeb458a41051983e7b")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -136,7 +136,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, - Arrays.asList("3d4d66cc253eac55f16e5b0a36f17d8d")); + Arrays.asList("38730c7030271f5d0ca0b59365d57814")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index 38ab8ee36..907af0f34 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -64,7 +64,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("a6c224235c21b4af816b1512eb0624df")); + Arrays.asList("5e8f1fa88dc93320cc0e75e9fe6e153b")); executeTest("test MultiSample Pilot1", spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 9d4c52798..90d7f493c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "7d2cc5c4ece386beedf6b07dfbe5bf26"); + "5954a46971b7546d30151b068cded42a"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "a17856f709b546eaed486841d78248d2"); + "b3684c670f68f5a3a348a7fd2b25f10a"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index d5e163a88..03d3e8a17 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "e2d32d0dce2c5502a8e877f6bbb65a10"); + "65188ec4e3b91796f62bfb5b965ccf1f"); } @Test diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 0eed80f3a..0db3aa043 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.sam; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.samtools.*; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; @@ -47,6 +48,7 @@ import java.util.*; * @version 0.1 */ public class ReadUtils { + private final static Logger logger = Logger.getLogger(ReadUtils.class); private static final String OFFSET_OUT_OF_BOUNDS_EXCEPTION = "Offset cannot be greater than read length %d : %d"; private static final String OFFSET_NOT_ZERO_EXCEPTION = "We ran past the end of the read and never found the offset, something went wrong!"; @@ -209,7 +211,16 @@ public class ReadUtils { if (insertSize == 0 || read.getReadUnmappedFlag()) // no adaptors in reads with mates in another chromosome or unmapped pairs return CANNOT_COMPUTE_ADAPTOR_BOUNDARY; - + + if ( read.getReadPairedFlag() && read.getReadNegativeStrandFlag() == read.getMateNegativeStrandFlag() ) { + // note that the read.getProperPairFlag() is not reliably set, so many reads may have this tag but still be overlapping +// logger.info(String.format("Read %s start=%d end=%d insert=%d mateStart=%d readNeg=%b mateNeg=%b not properly paired, returning CANNOT_COMPUTE_ADAPTOR_BOUNDARY", +// read.getReadName(), read.getAlignmentStart(), read.getAlignmentEnd(), insertSize, read.getMateAlignmentStart(), +// read.getReadNegativeStrandFlag(), read.getMateNegativeStrandFlag())); + return CANNOT_COMPUTE_ADAPTOR_BOUNDARY; + } + + int adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read) if (read.getReadNegativeStrandFlag()) adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header) @@ -218,7 +229,7 @@ public class ReadUtils { if ( (adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH) || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH) ) adaptorBoundary = CANNOT_COMPUTE_ADAPTOR_BOUNDARY; // we are being conservative by not allowing the adaptor boundary to go beyond what we belive is the maximum size of an adaptor - + return adaptorBoundary; } public static int CANNOT_COMPUTE_ADAPTOR_BOUNDARY = Integer.MIN_VALUE; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociIntegrationTest.java index c07bf171a..336c15ccc 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociIntegrationTest.java @@ -34,13 +34,13 @@ public class CallableLociIntegrationTest extends WalkerTest { final static String commonArgs = "-R " + b36KGReference + " -T CallableLoci -I " + validationDataLocation + "/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s"; final static String reduceReadArgs = "-R " + b37KGReference + " -T CallableLoci -I " + " private/testdata/NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s"; - final static String SUMMARY_MD5 = "ffdbd9cdcb4169ebed5ae4bec797260f"; + final static String SUMMARY_MD5 = "a6f5963669f19d9d137ced87d65834b0"; @Test public void testCallableLociWalkerBed() { String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -summary %s"; WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2, - Arrays.asList("42e86c06c167246a28bffdacaca75ffb", SUMMARY_MD5)); + Arrays.asList("9b4ffea1dbcfefadeb1c9fa74b0e0e59", SUMMARY_MD5)); executeTest("formatBed", spec); } @@ -48,7 +48,7 @@ public class CallableLociIntegrationTest extends WalkerTest { public void testCallableLociWalkerPerBase() { String gatk_args = commonArgs + " -format STATE_PER_BASE -L 1:10,000,000-11,000,000 -summary %s"; WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2, - Arrays.asList("d66c525d9c70f62df8156261d3e535ad", SUMMARY_MD5)); + Arrays.asList("d6505e489899e80c08a7168777f6e07b", SUMMARY_MD5)); executeTest("format_state_per_base", spec); } @@ -64,7 +64,7 @@ public class CallableLociIntegrationTest extends WalkerTest { public void testCallableLociWalker3() { String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -minDepth 10 -maxDepth 100 --minBaseQuality 10 --minMappingQuality 20 -summary %s"; WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2, - Arrays.asList("46a53379aaaf9803276a0a34b234f6ab", "da431d393f7c2b2b3e27556b86c1dbc7")); + Arrays.asList("7f79ad8195c4161060463eeb21d2bb11", "7ee269e5f4581a924529a356cc806e55")); executeTest("formatBed lots of arguments", spec); } diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java index 331121c55..abe0c394b 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java @@ -151,6 +151,31 @@ public class ReadUtilsUnitTest extends BaseTest { read.setReadNegativeStrandFlag(false); boundary = get.getAdaptor(read); Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY); + + // Test case 8: read doesn't have proper pair flag set + read = makeRead(fragmentSize, mateStart); + read.setReadPairedFlag(true); + read.setProperPairFlag(false); + Assert.assertEquals(get.getAdaptor(read), ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY); + + // Test case 9: read and mate have same negative flag setting + for ( final boolean negFlag: Arrays.asList(true, false) ) { + read = makeRead(fragmentSize, mateStart); + read.setAlignmentStart(BEFORE); + read.setReadPairedFlag(true); + read.setProperPairFlag(true); + read.setReadNegativeStrandFlag(negFlag); + read.setMateNegativeStrandFlag(!negFlag); + Assert.assertTrue(get.getAdaptor(read) != ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY, "Get adaptor should have succeeded"); + + read = makeRead(fragmentSize, mateStart); + read.setAlignmentStart(BEFORE); + read.setReadPairedFlag(true); + read.setProperPairFlag(true); + read.setReadNegativeStrandFlag(negFlag); + read.setMateNegativeStrandFlag(negFlag); + Assert.assertEquals(get.getAdaptor(read), ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY, "Get adaptor should have failed for reads with bad alignment orientation"); + } } @Test (enabled = true) From 874dc8f9c168bd6516fa92363bdee58a21c80b13 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Fri, 3 May 2013 14:59:16 -0400 Subject: [PATCH 014/116] Re-fix md5's that changed due to conflicting pushes --- .../UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java | 6 +++--- ...typeCallerComplexAndSymbolicVariantsIntegrationTest.java | 4 ++-- .../haplotypecaller/HaplotypeCallerIntegrationTest.java | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index 86961988d..64568d714 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -58,16 +58,16 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","9f960977b1b8d90ac75ba4306336553c"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","5eabc12fc7b4f9749e6d1be0f5b45d14"); } @Test(enabled = true) public void testMT_SNP_DISCOVERY_sp4() { - executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","eb8b008a463f9fa9ad0155bf2f5b78b3"); + executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","5d55b71688a0777a7c0247c376401368"); } @Test(enabled = true) public void testMT_SNP_GGA_sp10() { - executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "fb988e9b93bc73b5e532584c83cac833"); + executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "cf336d66a109c55f90e9ed2b3bc196c8"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 90d7f493c..9d4c52798 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "5954a46971b7546d30151b068cded42a"); + "7d2cc5c4ece386beedf6b07dfbe5bf26"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "b3684c670f68f5a3a348a7fd2b25f10a"); + "a17856f709b546eaed486841d78248d2"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 03d3e8a17..d5e163a88 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "65188ec4e3b91796f62bfb5b965ccf1f"); + "e2d32d0dce2c5502a8e877f6bbb65a10"); } @Test From 6ff74deac71fb405638c85612ea762acbaee4e39 Mon Sep 17 00:00:00 2001 From: Chris Hartl Date: Mon, 6 May 2013 12:11:04 -0400 Subject: [PATCH 015/116] Add overall genotype concordance to the genotype concordance tool. In addition, protect from non-diploid genotypes, which can cause very strange behavior. Update MD5 sums. As expected, md5 changes are consistent with the genotype concordance field being added to each output. --- .../ConcordanceMetricsUnitTest.java | 2 + .../GenotypeConcordanceIntegrationTest.java | 14 +++---- .../variantutils/ConcordanceMetrics.java | 41 +++++++++++++++++++ .../variantutils/GenotypeConcordance.java | 10 +++++ 4 files changed, 60 insertions(+), 7 deletions(-) mode change 100644 => 100755 protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java mode change 100644 => 100755 protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java mode change 100644 => 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java mode change 100644 => 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java old mode 100644 new mode 100755 index bca912d63..bd9ff4f80 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java @@ -567,8 +567,10 @@ public class ConcordanceMetricsUnitTest extends BaseTest { table[5] = new int[] {12, 0, 34, 20, 10, 0}; double EXPEC_NRS = 0.8969957; double EXPEC_NRD = 0.1071429; + double EXPEC_OGC = 0.92592592; // (100+150+50)/(100+5+1+150+7+3+50+2+6) Assert.assertEquals(EXPEC_NRS,metrics.getOverallNRS(),1e-7); Assert.assertEquals(EXPEC_NRD,metrics.getOverallNRD(),1e-7); + Assert.assertEquals(EXPEC_OGC,metrics.getOverallOGC(),1e-7); int EXPEC_EVAL_REF = 124; int EXPEC_EVAL_HET = 169; int EXPEC_EVAL_VAR = 62; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java old mode 100644 new mode 100755 index d5f75a8cd..ffd358a6e --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java @@ -64,7 +64,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("NA12878.Jan2013.haplotypeCaller.subset.indels.vcf", "NA12878.Jan2013.bestPractices.subset.indels.vcf"), 0, - Arrays.asList("0f29a0c6dc44066228c8cb204fd53ec0") + Arrays.asList("6fe03c63a76cb61a76e550137ebf8c5e") ); executeTest("test indel concordance", spec); @@ -75,7 +75,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf"), 0, - Arrays.asList("fc725022d47b4b5f8a6ef87f0f1ffe89") + Arrays.asList("6246d81b25a9a96e379c47056177a65d") ); executeTest("test non-overlapping samples", spec); @@ -86,7 +86,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf") + " -moltenize", 0, - Arrays.asList("3993709e38b033e89017dfbb63226e94") + Arrays.asList("ee1da9b0119ce7869b2d05d81cef255e") ); executeTest("Test moltenized output",spec); @@ -97,7 +97,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("GenotypeConcordance.multipleRecordsTest1.eval.vcf","GenotypeConcordance.multipleRecordsTest1.comp.vcf"), 0, - Arrays.asList("352d59c4ac0cee5eb8ddbc9404b19ce9") + Arrays.asList("a1c48b041b0f0b8bf9387d5db337e5a1") ); executeTest("test multiple records per site",spec); @@ -108,7 +108,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfe 'GQ<30'", 0, - Arrays.asList("b7b495ccfa6d50a6be3e095d3f6d3c52") + Arrays.asList("7f52e70482c30031bedf2fcc6bd359b2") ); executeTest("Test filtering on the EVAL rod",spec); @@ -119,7 +119,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.50'", 0, - Arrays.asList("6406b16cde7960b8943edf594303afd6") + Arrays.asList("1402712d1ab18bafa5bac130af2f974c") ); executeTest("Test filtering on the COMP rod", spec); @@ -130,7 +130,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.52' -gfe 'DP<5' -gfe 'GQ<37'", 0, - Arrays.asList("26ffd06215b6177acce0ea9f35d73d31") + Arrays.asList("6b83695122481d2dcbe3c792caf743a1") ); executeTest("Test filtering on both rods",spec); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java old mode 100644 new mode 100755 index b3b4857b6..005acf27b --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.vcf.VCFHeader; @@ -81,10 +82,23 @@ public class ConcordanceMetrics { return Collections.unmodifiableMap(nrd); } + public Map getPerSampleOGC() { + Map ogc = new HashMap(perSampleGenotypeConcordance.size()); + for ( Map.Entry sampleTable : perSampleGenotypeConcordance.entrySet() ) { + ogc.put(sampleTable.getKey(),calculateOGC(sampleTable.getValue())); + } + + return Collections.unmodifiableMap(ogc); + } + public Double getOverallNRD() { return calculateNRD(overallGenotypeConcordance); } + public Double getOverallOGC() { + return calculateOGC(overallGenotypeConcordance); + } + public Map getPerSampleNRS() { Map nrs = new HashMap(perSampleGenotypeConcordance.size()); for ( Map.Entry sampleTable : perSampleGenotypeConcordance.entrySet() ) { @@ -110,6 +124,11 @@ public class ConcordanceMetrics { for ( String sample : perSampleGenotypeConcordance.keySet() ) { Genotype evalGenotype = eval.getGenotype(sample); Genotype truthGenotype = truth.getGenotype(sample); + // ensure genotypes are either no-call ("."), missing (empty alleles), or diploid + if ( ( ! evalGenotype.isNoCall() && evalGenotype.getPloidy() != 2 && evalGenotype.getPloidy() > 0) || + ( ! truthGenotype.isNoCall() && truthGenotype.getPloidy() != 2 && truthGenotype.getPloidy() > 0) ) { + throw new UserException(String.format("Concordance Metrics is currently only implemented for DIPLOID genotypes, found eval ploidy: %d, comp ploidy: %d",evalGenotype.getPloidy(),truthGenotype.getPloidy())); + } perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth,truthRef); overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef); } @@ -136,10 +155,32 @@ public class ConcordanceMetrics { return total == 0 ? 1.0 : 1.0 - ( (double) correct)/( (double) total); } + private static double calculateOGC(int[][] concordanceCounts) { + int correct = 0; + int total = 0; + correct += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_REF.ordinal()]; + correct += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HET.ordinal()]; + correct += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_VAR.ordinal()]; + total += correct; + total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HET.ordinal()]; + total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_VAR.ordinal()]; + total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_REF.ordinal()]; + total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_VAR.ordinal()]; + total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_REF.ordinal()]; + total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HET.ordinal()]; + // NRD is by definition incorrec/total = 1.0-correct/total + // note: if there are no observations (so the ratio is NaN), set this to 100% + return total == 0 ? 1.0 : ( (double) correct)/( (double) total); + } + private static double calculateNRS(GenotypeConcordanceTable table) { return calculateNRS(table.getTable()); } + private static double calculateOGC(GenotypeConcordanceTable table) { + return calculateOGC(table.getTable()); + } + private static double calculateNRS(int[][] concordanceCounts) { long confirmedVariant = 0; long unconfirmedVariant = 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java old mode 100644 new mode 100755 index 3110d25b9..dd9e822c8 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -293,6 +293,7 @@ public class GenotypeConcordance extends RodWalker entry : metrics.getPerSampleGenotypeConcordance().entrySet() ) { ConcordanceMetrics.GenotypeConcordanceTable table = entry.getValue(); @@ -378,9 +379,13 @@ public class GenotypeConcordance extends RodWalker nrdEntry : metrics.getPerSampleNRD().entrySet() ) { concordanceSummary.set(nrdEntry.getKey(),"Non-Reference_Discrepancy",nrdEntry.getValue()); } + for ( Map.Entry nrdEntry : metrics.getPerSampleOGC().entrySet() ) { + concordanceSummary.set(nrdEntry.getKey(),"Overall_Genotype_Concordance",nrdEntry.getValue()); + } concordanceSummary.set("ALL_NRS_NRD","Sample","ALL"); concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Sensitivity",metrics.getOverallNRS()); concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Discrepancy",metrics.getOverallNRD()); + concordanceSummary.set("ALL_NRS_NRD","Overall_Genotype_Concordance",metrics.getOverallOGC()); for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) { @@ -411,6 +416,7 @@ public class GenotypeConcordance extends RodWalker nrdEntry : metrics.getPerSampleNRD().entrySet() ) { concordanceSummary.set(nrdEntry.getKey(),"Non-Reference Discrepancy",nrdEntry.getValue()); } + for ( Map.Entry ogcEntry : metrics.getPerSampleOGC().entrySet() ) { + concordanceSummary.set(ogcEntry.getKey(),"Overall_Genotype_Concordance",ogcEntry.getValue()); + } concordanceSummary.set("ALL","Sample","ALL"); concordanceSummary.set("ALL","Non-Reference Sensitivity",metrics.getOverallNRS()); concordanceSummary.set("ALL","Non-Reference Discrepancy",metrics.getOverallNRD()); + concordanceSummary.set("ALL","Overall_Genotype_Concordance",metrics.getOverallOGC()); for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) { siteConcordance.set("Comparison",type.toString(),metrics.getOverallSiteConcordance().get(type)); From b53336c2d065b2e139446a30a99f0b836bc32a18 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 6 May 2013 14:57:01 -0400 Subject: [PATCH 016/116] Added hidden mode for BQSR to force all read groups to be the same one. * Very useful for debugging sample-specific issues * This argument got lost in the transition from BQSR v1 to v2 * Added unit test to cover this case --- .../bqsr/RecalibrationArgumentCollection.java | 4 ++++ .../covariates/ReadGroupCovariate.java | 8 ++++++- .../ReadGroupCovariateUnitTest.java | 21 ++++++++++++++----- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index 0a4899f1c..5a2cdc7a6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -219,6 +219,10 @@ public class RecalibrationArgumentCollection { @Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.") public String FORCE_PLATFORM = null; + @Hidden + @Argument(fullName = "force_readgroup", shortName = "fRG", required = false, doc = "If provided, the read group of EVERY read will be forced to be the provided String.") + public String FORCE_READGROUP = null; + @Hidden @Output(fullName = "recal_table_update_log", shortName = "recal_table_update_log", required = false, doc = "If provided, log all updates to the recalibration tables to the given file. For debugging/testing purposes only", defaultToStdout = false) public PrintStream RECAL_TABLE_UPDATE_LOG = null; diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java index 350cf5d33..664c1786e 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java @@ -93,10 +93,13 @@ public class ReadGroupCovariate implements RequiredCovariate { private final HashMap readGroupLookupTable = new HashMap(); private final HashMap readGroupReverseLookupTable = new HashMap(); private int nextId = 0; + private String forceReadGroup; // Initialize any member variables using the command-line arguments passed to the walkers @Override - public void initialize(final RecalibrationArgumentCollection RAC) {} + public void initialize(final RecalibrationArgumentCollection RAC) { + forceReadGroup = RAC.FORCE_READGROUP; + } @Override public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { @@ -170,6 +173,9 @@ public class ReadGroupCovariate implements RequiredCovariate { * @return platform unit or readgroup id */ private String readGroupValueFromRG(final GATKSAMReadGroupRecord rg) { + if ( forceReadGroup != null ) + return forceReadGroup; + final String platformUnit = rg.getPlatformUnit(); return platformUnit == null ? rg.getId() : platformUnit; } diff --git a/protected/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java index 0878fba82..0b2df6369 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java @@ -75,26 +75,37 @@ public class ReadGroupCovariateUnitTest { final String expected = "SAMPLE.1"; GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("MY.ID"); rg.setPlatformUnit(expected); - runTest(rg, expected); + runTest(rg, expected, covariate); } @Test(enabled = true) public void testMissingPlatformUnit() { final String expected = "MY.7"; GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(expected); - runTest(rg, expected); + runTest(rg, expected, covariate); } - private void runTest(GATKSAMReadGroupRecord rg, String expected) { + @Test(enabled = true) + public void testForceReadgroup() { + final RecalibrationArgumentCollection forcedRAC = new RecalibrationArgumentCollection(); + forcedRAC.FORCE_READGROUP = "FOO"; + final ReadGroupCovariate forcedCovariate = new ReadGroupCovariate(); + forcedCovariate.initialize(forcedRAC); + + final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("NOT_FOO"); + runTest(rg, "FOO", forcedCovariate); + } + + private static void runTest(final GATKSAMReadGroupRecord rg, final String expected, final ReadGroupCovariate covariate) { GATKSAMRecord read = ReadUtils.createRandomRead(10); read.setReadGroup(rg); ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1); covariate.recordValues(read, readCovariates); - verifyCovariateArray(readCovariates.getMismatchesKeySet(), expected); + verifyCovariateArray(readCovariates.getMismatchesKeySet(), expected, covariate); } - private void verifyCovariateArray(int[][] values, String expected) { + private static void verifyCovariateArray(final int[][] values, final String expected, final ReadGroupCovariate covariate) { for (int[] value : values) { String actual = covariate.formatKey(value[0]); Assert.assertEquals(actual, expected); From d242f1bba3fa0578ec9cfa7aa35a5bce3e99616e Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 6 May 2013 19:00:58 -0400 Subject: [PATCH 017/116] Secondary alignments were not handled correctly in IndelRealigner * This is emerging now because BWA-MEM produces lots of reads that are not primary alignments * The ConstrainedMateFixingManager class used by IndelRealigner was mis-adjusting SAM flags because it was getting confused by these secondary alignments * Added unit test to cover this case --- .../indels/ConstrainedMateFixingManager.java | 13 ++- .../ConstrainedMateFixingManagerUnitTest.java | 108 ++++++++++++++++++ 2 files changed, 119 insertions(+), 2 deletions(-) create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManagerUnitTest.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java index 5411c5d98..c98fe4d3c 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java @@ -212,6 +212,15 @@ public class ConstrainedMateFixingManager { public int getNReadsInQueue() { return waitingReads.size(); } + /** + * For testing purposes only + * + * @return the list of reads currently in the queue + */ + protected List getReadsInQueueForTesting() { + return new ArrayList(waitingReads); + } + public boolean canMoveReads(GenomeLoc earliestPosition) { if ( DEBUG ) logger.info("Refusing to realign? " + earliestPosition + " vs. " + lastLocFlushed); @@ -233,7 +242,7 @@ public class ConstrainedMateFixingManager { addRead(newRead, modifiedReads.contains(newRead), false); } - private void addRead(SAMRecord newRead, boolean readWasModified, boolean canFlush) { + protected void addRead(SAMRecord newRead, boolean readWasModified, boolean canFlush) { if ( DEBUG ) logger.info("New read pos " + newRead.getAlignmentStart() + " OP = " + newRead.getAttribute("OP") + " " + readWasModified); //final long curTime = timer.currentTime(); @@ -265,7 +274,7 @@ public class ConstrainedMateFixingManager { // fix mates, as needed // Since setMateInfo can move reads, we potentially need to remove the mate, and requeue // it to ensure proper sorting - if ( newRead.getReadPairedFlag() ) { + if ( newRead.getReadPairedFlag() && !newRead.getNotPrimaryAlignmentFlag() ) { SAMRecordHashObject mate = forMateMatching.get(newRead.getReadName()); if ( mate != null ) { // 1. Frustratingly, Picard's setMateInfo() method unaligns (by setting the reference contig diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManagerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManagerUnitTest.java new file mode 100644 index 000000000..9bcd7a3a3 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManagerUnitTest.java @@ -0,0 +1,108 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.indels; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.List; + + +public class ConstrainedMateFixingManagerUnitTest extends BaseTest { + + private static SAMFileHeader header; + private static GenomeLocParser genomeLocParser; + + @BeforeClass + public void beforeClass() { + header = ArtificialSAMUtils.createArtificialSamHeader(3, 1, 100); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); + } + @Test + public void testSecondaryAlignmentsDoNotInterfere() { + final List properReads = ArtificialSAMUtils.createPair(header, "foo", 1, 10, 30, true, false); + final GATKSAMRecord read1 = properReads.get(0); + read1.setAlignmentStart(8); // move the read + read1.setFlags(99); // first in proper pair, mate negative strand + + final GATKSAMRecord read2Primary = properReads.get(1); + read2Primary.setFlags(147); // second in pair, mate unmapped, not primary alignment + + Assert.assertEquals(read1.getInferredInsertSize(), 21); + + final GATKSAMRecord read2NonPrimary = new GATKSAMRecord(read2Primary); + read2NonPrimary.setFlags(393); // second in proper pair, on reverse strand + + final ConstrainedMateFixingManager manager = new ConstrainedMateFixingManager(null, genomeLocParser, 1000, 1000, 1000); + manager.addRead(read1, true, false); + manager.addRead(read2NonPrimary, false, false); + manager.addRead(read2Primary, false, false); + + Assert.assertEquals(manager.getNReadsInQueue(), 3); + + for ( final SAMRecord read : manager.getReadsInQueueForTesting() ) { + if ( read.getFirstOfPairFlag() ) { + Assert.assertEquals(read.getFlags(), 99); + Assert.assertEquals(read.getInferredInsertSize(), 23); + } else if ( read.getNotPrimaryAlignmentFlag() ) { + Assert.assertEquals(read.getFlags(), 393); + Assert.assertEquals(read.getInferredInsertSize(), -21); + } else { + Assert.assertEquals(read.getFlags(), 147); + Assert.assertEquals(read.getInferredInsertSize(), -23); + } + } + } + +} \ No newline at end of file From 2b86ab02bee8d5fff0175f4faebdb4083e229266 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 7 May 2013 12:11:46 -0400 Subject: [PATCH 018/116] Improve queue script jobreport visualization script -- the Queue jobreport PDF script now provides a high-level summary of the de-scattered runtimes of each analysis, so that its easy to see where your script is spending its time across scatters. --- .../sting/queue/util/queueJobReport.R | 53 +++++++++++++++---- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R b/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R index 36e6343cb..2bc0a2fa5 100644 --- a/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R +++ b/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R @@ -3,6 +3,7 @@ library(ggplot2) library(gplots) library(tools) library(reshape) +library(plyr) # # Standard command line switch. Can we loaded interactively for development @@ -14,7 +15,7 @@ if ( onCMDLine ) { inputFileName = args[1] outputPDF = args[2] } else { - inputFileName = "Q-26618@gsa4.jobreport.txt" + inputFileName = "~/Desktop/broadLocal/projects/pipelinePerformance/FullProcessingPipeline.jobreport.txt" #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt" #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt" outputPDF = NA @@ -35,13 +36,11 @@ allJobsFromReport <- function(report) { # # Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job # -plotJobsGantt <- function(gatkReport, sortOverall, includeText) { +plotJobsGantt <- function(gatkReport, sortOverall, title, includeText) { allJobs = allJobsFromReport(gatkReport) if ( sortOverall ) { - title = "All jobs, by analysis, by start time" allJobs = allJobs[order(allJobs$analysisName, allJobs$startTime, decreasing=T), ] } else { - title = "All jobs, sorted by start time" allJobs = allJobs[order(allJobs$startTime, decreasing=T), ] } allJobs$index = 1:nrow(allJobs) @@ -54,11 +53,11 @@ plotJobsGantt <- function(gatkReport, sortOverall, includeText) { p <- p + theme_bw() p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=1, arrow=arrow(length = unit(0.1, "cm"))) if ( includeText ) - p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2) - p <- p + xlim(0, maxRelTime * 1.1) + p <- p + geom_text(aes(x=relStartTime, label=ganttName, hjust=0, vjust=-1), size=2) + p <- p + xlim(0, maxRelTime * 1.3) p <- p + xlab(paste("Start time, relative to first job", RUNTIME_UNITS)) p <- p + ylab("Job number") - p <- p + opts(title=title) + p <- p + ggtitle(title) print(p) } @@ -182,6 +181,27 @@ plotTimeByHost <- function(gatkReportData) { plotMe("Jittered points", geom_jitter) } +mergeScattersForAnalysis <- function(table) { + #allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts) + + ddply(table, .(analysisName, iteration), summarize, + jobName = analysisName[1], + exechosts = paste(length(exechosts), "hosts"), + formattedStartTime = "NA", + formattedDoneTime = "NA", + intermediate = intermediate[1], + startTime = min(startTime), + doneTime = min(startTime) + sum(runtime), + runtime = sum(runtime)) +} + +mergeScatters <- function(report) { + newReport = list() + for ( name in names(gatkReportData) ) { + newReport[[name]] = mergeScattersForAnalysis(gatkReportData[[name]]) + } + newReport +} # read the table gatkReportData <- gsa.read.gatkreport(inputFileName) @@ -192,13 +212,24 @@ if ( ! is.na(outputPDF) ) { pdf(outputPDF, height=8.5, width=11) } -plotJobsGantt(gatkReportData, T, F) -plotJobsGantt(gatkReportData, F, F) +plotJobsGantt(gatkReportData, T, "All jobs, by analysis, by start time", F) +plotJobsGantt(gatkReportData, F, "All jobs, sorted by start time", F) plotProgressByTime(gatkReportData) + +# plots summarizing overall costs, merging scattered counts +merged.by.scatter = mergeScatters(gatkReportData) +plotJobsGantt(merged.by.scatter, F, "Jobs merged by scatter by start time", T) + +merged.as.df = do.call(rbind.data.frame, merged.by.scatter)[,c("analysisName", "runtime")] +merged.as.df$percent = merged.as.df$runtime / sum(merged.as.df$runtime) * 100 +merged.as.df.formatted = data.frame(analysisName=merged.as.df$analysisName,runtime=prettyNum(merged.as.df$runtime), percent=prettyNum(merged.as.df$percent,digits=2)) +textplot(merged.as.df.formatted[order(merged.as.df$runtime),], show.rownames=F) +title("Total runtime for each analysis") + plotTimeByHost(gatkReportData) for ( group in gatkReportData ) { - print(group) - plotGroup(group) + #print(group) + plotGroup(group) } if ( ! is.na(outputPDF) ) { From 52c20a8416e577f89b1143d38705641930b94dc1 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Tue, 7 May 2013 14:21:15 -0400 Subject: [PATCH 019/116] Rev picard, sam-jdk, tribble, and variant to version 1.91.1453 -mainly for the seek() performance optimization --- .../repository/net.sf/picard-1.90.1442.xml | 3 --- ...ard-1.90.1442.jar => picard-1.91.1453.jar} | Bin 1644252 -> 1645927 bytes .../repository/net.sf/picard-1.91.1453.xml | 3 +++ settings/repository/net.sf/sam-1.90.1442.xml | 3 --- .../{sam-1.90.1442.jar => sam-1.91.1453.jar} | Bin 617595 -> 620475 bytes settings/repository/net.sf/sam-1.91.1453.xml | 3 +++ ...le-1.90.1442.jar => tribble-1.91.1453.jar} | Bin 265519 -> 265537 bytes ...le-1.90.1442.xml => tribble-1.91.1453.xml} | 2 +- ...nt-1.90.1446.jar => variant-1.91.1453.jar} | Bin 556173 -> 556361 bytes ...nt-1.90.1446.xml => variant-1.91.1453.xml} | 2 +- 10 files changed, 8 insertions(+), 8 deletions(-) delete mode 100644 settings/repository/net.sf/picard-1.90.1442.xml rename settings/repository/net.sf/{picard-1.90.1442.jar => picard-1.91.1453.jar} (85%) create mode 100644 settings/repository/net.sf/picard-1.91.1453.xml delete mode 100644 settings/repository/net.sf/sam-1.90.1442.xml rename settings/repository/net.sf/{sam-1.90.1442.jar => sam-1.91.1453.jar} (85%) create mode 100644 settings/repository/net.sf/sam-1.91.1453.xml rename settings/repository/org.broad/{tribble-1.90.1442.jar => tribble-1.91.1453.jar} (82%) rename settings/repository/org.broad/{tribble-1.90.1442.xml => tribble-1.91.1453.xml} (76%) rename settings/repository/org.broadinstitute/{variant-1.90.1446.jar => variant-1.91.1453.jar} (80%) rename settings/repository/org.broadinstitute/{variant-1.90.1446.xml => variant-1.91.1453.xml} (71%) diff --git a/settings/repository/net.sf/picard-1.90.1442.xml b/settings/repository/net.sf/picard-1.90.1442.xml deleted file mode 100644 index 4ec267817..000000000 --- a/settings/repository/net.sf/picard-1.90.1442.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/picard-1.90.1442.jar b/settings/repository/net.sf/picard-1.91.1453.jar similarity index 85% rename from settings/repository/net.sf/picard-1.90.1442.jar rename to settings/repository/net.sf/picard-1.91.1453.jar index caf2bc09d6fb177f5165882867bf8a16fe6cb5ac..f196ee5a43bcee0abd96f130acb28135c89792a1 100644 GIT binary patch delta 99563 zcmbTfcVHF87dCv(&fdFw_ilPf0!c_hZwV#z8hY;_y-RPQcS4mSJ;Hz>1O$=Zgp1OP zNRuK67C^8dhzKG!q8qJ3p>@SQ7)xKmr^B@yO%7LSg7gHZaoI{ zOsqM0X!_ux-G=uaJg9JxRexamfSv<;4jS$xB)`&g=rBB}m{>BULdilUOP5Jq_g*a89O-c$?5y&< zeCGU!x_J9)L~lJzedSV~4s{pjBT~QRnbnWyk|lb^w9MRIyQmC3s0AVGuQ4*8lXcLI zE^j;wB~riims#6ohE_F@E2ip;&|p>I(Nl+`i$xL@iv?%#pT0pzJI+mYpklJ9GhB%Lo&~-JsI1CBJk=n(&#yPm?$qZ6=RLcvDme7 z-CW(Ft59%`h|Fpm>*3Xz8=J&9X=%;Bzmj?5DAHuNM`6*j>vT3VS6wV1vmS+cf;~^+(*6~svC&Qh*Mz3+mFz#grdALuE zX}+vi<`3t8+<9i9?&K|eHe=gl0RbfAzpXwI2J^#Az1|q}2zccLl6d6<@ zyLMig>ei_D_gyz;R{2tKdd`=%O^0clU;ne!)L^2i!7{15mCdvT$3o`>h71}-$VSLx=Z{?ytO}`KRt6vG#-!dmniI55XW4v-zIHS`A)`#;0c33~?;b*_g zm62QvsozBM6*qOUxy~z(L~(VbifAsZJGAEhecj@}K*nF>(5l9cyikfGW&c=4rQEh# z5X-lnl5^}|H9}5|NQ<;ys&2Z}5P21&}G)6B9L{{fpkq#C6a*kLk zpS9*HNUih=-xN;SGdC{%x^@nt+ByENNoLxaNZG!#5J1mw^ewk+= z*UtMF^+C_>&lU~jf;5=RQ#x0rVceKT@&FpeBWSdoGLZ9&OZ0l+oq_y}oq6V+!CcAt zYUsM#dH(W)e|}kVuU0^Yj^^-8XA}j>r1LYsL-1$gQs&8ujNG*}cEBsD@t700oEe&Rt4wpYa_c$zCvYx|a&}#N&T2&Q46IOBAaHojI{nPr( zPCK{;QnPk&oYyI0dbL)E7Yj^d;^nO4lM4=Ugj{f)gQT(7;YW_WEQQmxhUZ$+eRw>P z`gNB(KIba3*@zfZZXXhp=uT*XBKvt{yi@j+b5llc!ko7$R6bmv-PR~OZ}zknc=|bS z33X`8`?aU1-0=f92W{dWhq=RZ;-{aKxnB;UTB|M6XhUE`wDvT_Db%<+qK6z)Q0s=SIdNP_?LcB7ZM?^!?Dvi1^TwhI zvGFo(JQt7`lC``j|8cSw?~-2T-YCDL&B~4uF)MF2pN_ZHQWAV;$V*@mt&`ot zwH)XY+A>jO`<>i|bw4R~>caqOlUF-NZy%9_Y7CyT%}+?hfrbi>*{t7Tl#x7aizQVAC#bv^(@@=CRGP z=w7WMcra?O*1+XV^v)Y6`0k# zN`rXLyvY4z;_awV*{rnY4_r8TkR z`Z;Y`nA7q>m%Di7#G4u*Kw$Sx?Sbi}p1(98Rc^lPl<>=4E!CBiF{eshk}3DKYREh2 zzP8t`(VaG5EtM&cG)(G&36Hcx?qCru>lA!H57GO1Wc6snA=$s=u@)r18WZFFTf_vW z{i)T@#e)OqBE(bXHv(sJh#o8;9nnigej#akpmnsktvmdv8YfCR{3yQrbWC&bqq!!V z-xYCkQlbczxoZfYJe?;TL)qcX!NFFjyiicYs3&%ybG+E?vS;eGi#KI#g23o@=i&Kz zM6{ffAWl1V-#g^3VX||gP_xR`L=o%O@{j0p@5zh#gc^2v^NXE6hmd?9@Qy!56a6t- za$-oNOq>woTT)!aXQb-24Q+%)tc^6aQR+I{;?mj}QyXhh4{e;OjW@LkDt)5KeSApD zP|qh>+GK5tMP0P1rZ&ycrdt%IjkLJF_L_QahF_bh&GKurwbxZJ$I#{)+B}P5wD~Gn zpaQ%dqh&1gYm2l@Lwm!}7Mt1nm#MvHYP$_>kE(jW()Mcas|tRgZucop?6ST(qdR_KD)#CBJrAC4cItKH3#kzpIA!nW=qlQGe|VmH4H4@tVqe-PCTF z+E=D_)6{O6+SjV2Z_EtsTNU}v)NY&F_ojBo)b5(v52kicCI4t@_to_$6XQ*Ls6Ozs zsXa2aUsTl|tKe5viz%k|n|k=Wsr_MUf1278Q~S%*o~pEGDtN9+c>%r%LQFu~_7Iv1 zG6Yy6bQO3Ml?zqOYYId4xaX!YP2p2XmJ0lauu&foq;7&u5rRx2)D&UrK3oM6rifHL zS#OFQ>S2^AqCusIF-1;OLytQDHc~0#Z^#3-Ig>&DT_{vRCGsC+7x9#ktnOk zx~4u{&J^WMQ9)ItqAvppl}u3?V?k6gL{&pnv*DsXCZegz-b|%6H$@9mv@}I4Q?xe4D`+aw#t>~)t=gHQ zy(v1#kW`VBp}O7|rs!>oKB|rSs85zi6f^u(K@3(we^bmf#Vk|IRv*W#5F=h!RhXlKxrUf$iutBkV2Xw6(IP`+`njZd z!xW1RvBbw^g*3!cAD36xWj?eHu5bFdvbrw!aaDC);o~%sfyi4vdMKnSd8Gt05(2Ula#P!ay79+|CFhwTs<^2T0X4e3a*@ipfr)H0S{HLA@=f{@<|QR)<9pb zZ-@^A?P`kC8gG>a>xh9oMJ}!*M)1b<{i+if1sBW!V6oH4{% zTYQWZaZW}y7QGYCiwicN;?ssWZi|cJ6Y$Iym-wVDE(_EpT3%eFMa!LyMWGy5#8q2- zCO)^t7vf7p9JR$Yaa}%WEIRNF*|3Rd$6qb!9u*}o{2CK2KWQRj`KJ7-iKyVMqNOs$ zO3p2!GSfDDJhoO|OO*$|6A5xaQ(<#;Ii{&tSmBoV+7RE^;#={ZEpChNZE*+0iMzbr z5I@-Bp7@c;vi{ba*nCdaqgGLM(VDk|j{j@da>&99W zTWh1Wm4km2IsXN-uD1sFL*x&wMUTRvV5SzK=^(ynQCknw!)-l6kF@n1dX%k4D{{j0 z7^bpvM>$bQ-fSR(E%k|A~!PZc;BB> zj&CD!=18#hMBWb0<+b%BG_Ib<*7M4ZZA4*_R~srz_Sa(MPi=q^^VxcSy@0K=UeMMH z$>g?TpeU>t3B1)-oZz5)eAL!zXf^Q_U6<+YMQ>pu@ zygF$^7lxpi*3H(F#nZsh4q}uRl!7j%7uQSJdP(uAT)0m}NxQR{B}(b3a%pE#NR-yg z$YY&F1uiR}b;itIP8CvKuOLTu5&66o6@MzptS+LQw=&Ams|2oh5eXbrRj+3A4)iQN zP5df--9&Oye{F!R8LFYHtF9WNf^q%Oc*2hZ%Z7+J7K1RD_6`-T#Se5RU=I_Um`4Zp4i`mPbjFCiJp!Cug{4>UNJN6U zxGXyoJ+-eaJ4zf%C_p|UE=>lPp%5+yRaJSa#}%;1uS6@jGS*`vi`&Q@qs1Np9gOTZ zMkI*^+%zzIj97xsu8|-t6RDF-86k4ZdgDaBggi<^%Ka!e_opII43y+SRGJ4<3r?qY zJS4DwoG=Af3T&S!Ita0cl4bA|F`-~8Kg5FN7p)~f*4p#0+I;>)Tf~2A%lI#C4?opT z@iXmrjRNmX5$TZ{ACxgtB&lnq>}zK!(zv`_CB+L-L8~ZNFBK77N$y@M3VSR2Nzb-CE$uY+2LQJPj)t0#M{5c#Xr=TRh_GV_pVYG@5!U|H)i_=Ld+4ThKkq-{WK zXbqoG39S*ntrL^_9EM}yeb^M@B& z$(WU*AGelMR|0^&f)otqz~+_W9Zsl&_Gm;wT05;hV(lrBcWJE=8%G5Y>mY|@iHKSq zkpL|WALHY=s@PV38`L1CbwWK{-b9nPF~}|6!O&jmYG|E%fjzwptqZ_rSFM{onI$Uv zFt`Cyi-WDY>JC2uMy)CMU9@?uiVJ(D7PdR3-$j`mxsqA~%Mr-+7t#5XljH1YAl-tny$ro$IP=T>0 zhp!X+{SdNf$Z)Ep4U(1K5iNMIoc9g{Yq~sxix`3l$XXBVGYntSwBhpI^&)RV7H4AA z^g-wR&g=V+D!xYH%(xfwhxO<*&T=*I?z_Trwa7(Y^Vy3E6D=w%SD(||MJ1Gno-p~- zMiG>e%0F6Yq`wTpLO`RxP(D5L(QEWv#a=+FFeApAj}|c20eu*=zlXglW>{>p&%$W* zt6SR#4Nb6$g{WAliiM$Y4p(}+2z48&0)YNL9A$Ad$5-yMw73u#wzvovRY5U}lNngwVqkrfOQ;Hz zw73+fs-U#`Kp8(w6y;P<-pJqz7M0#9ffOs;Qn18%5-MkY5lxrxb5O>Sm#bCX+`+|uM$Cbw4gctvrcjlpeI_1mF_ z+}@j(abJx{!Zg^yJ{aVOf65~;2x2oWlX18HX$(hHJfMfOq=G= zT$^XnBAe#ZB7az;E>y7<2zU`^ z+VnlYVbdzgvgvJ_VAC#o&)~%lI!uz5cuaeDw z*W+Zd4@5{t7O%EBi`UqguI?DT*5-Bm4yIO{-{lQ9Z{$q|Z?<_0OPjaSR-3n}*mhO! z4x4xKE}P#|!EW9IPT3sbWj62S_nGW|-9~mR-gj{EI(-N9wD|+|V4n*1^8uR=@&Tsc ze#yv_(syvm@PX;C_8n^TA-ZGpVLoE>QOu>p9e|cPB^rF(<_|&l_aBJF$P@e#o}?MT z2?|)-e1`7G*nMJ{_z@8xjCgUDKbAZ8iGu!feBJ;A<%@Xo%|1~r*C#e#;>$LF${(wG zU$OZre`fRNc%D8tDk{{bU+Fgk$brA)YX)DJ7jK0ZwQLx|&e?o}wq_1#7pe1CK;8=B zM%wfa-*OPIP4^Y;-|}~vP3ISo5eGy<+-?5erh7=HyYz$2cL;2~%Rk^7d{161t>=?n zXR}8>$jV^{rXIi|l^gCkCHe|+j#A}-v!azwFKA`p@LAEH`Lrx{PUIFKL^eJrPPhA< z2B4uw(3j|B*J&e$S}$lf+G8F)Pgw|SQGU8WJE07&k172kErzA15yevgF_TJD8Ty1a zLu6K>6uN|b9x6yR=rVmO)6a{f_E%7#4GCt#By^0fBBmr;74kfzXoEg8=%W`X#AnbM z_s*cto%mUU&ca9s!sAH%{gmhlX)*X3x~9B$UVND28i+JNL_UW|ohm|dUz8tS5GBF% z-!F({{|)*I^Y**=>K0l2qDae#a&Xa~An6I}_7@eQr<4ryiK=@Ml${^XZ_+J@uW*V* zJ@?{uqT;j(^;9YKmqig*tKRi~t`pM!0&9v?in2(^ zn|Z>U1onL)p1F$P-4iNQmxr$jrMV8dE=IbV-l-oRG3DG_5bddf)3<s-0vC!q{dfuDfg9nJT94@Gf@j1#px){*xf3T2}! z{Ih7`+Gy@v`#EbTOsG49<>3?Ik#hT_n8<&~c>g!?o@+rZxoi63aWHI*D=OI=7P(|{ zOfGq_rEUOIT(w|-h7!fg9u)vBc-c^aY5WZsylh)8VDJJk;901JAk5-$j<7fqFafbB z^&r~f7|y9;xl{nnpqJxR0NtS1#S17X(V}Pt7NRp8w4i_j3II@uI%V^mvz4b8&F6wRUGt$tvSU2a==Xql~U5+QZUilSuoE6Weig^4&y4S{81_xt!}~NF{XkWW5F4a zJkH|rJR$Inp)b%ZA08S!(cp~0BuhWXL6d;}(12(=117f)wmyiVx1wXHZ{R|ZUXJx< zOatVOQ^Aq4>l=|i`LJIolxN$5ymHwH7|394*|LW!fVJvJSrL-K5dUZwOhg-n(Kn@~_<_g+I1hxY6-c<8H8yzmMJ%2~;)Z2AmR zg(oRVz1rqA{I(pDhjYsgk$TPOwaA1|uCw_am3_U!CGYZTxjRxX8omL@g|y-&ZQg_& zbd9ddN0Iu#0-Mz`Tp9Z+C489XX+)X=`e`wr@zIDBwoFF>2bzhTgBoA+=)!5`4`K7XL_&RZa(Z7Ub_ z&@uwdI|w$;KNA$26KIDIL&Kwjqx5+;NWcnG7LU>k$1md#ZHNo#(*Ss~!OmGj!4`lT zIU-8$>{q~s9YnKQ-iXp;{Fix#!JpdjxmX?GX#E2puao-|^(1p05EEeJ2Kim0K2B_; zO@SUsdJ1Q3rY(+8JwU7B_oHOWCfZ8dkU5dwqV2Q;GNup!!%hH9wJMwgr>xx&wR?~^ z3eTz|Z8JozTJfnj_u@^Z1xO(Af?g%BLF){9-=Gf++Go&ygAOPp5E@TYR6n3VXkPsd zE#oj90oM3|%E9O87@*4oDh0GL32^2S#P^4Y`7x)Rq7(EHFhg5@gH9r*R3ZbQmMl_d-4kr`*7Qb9nOYLT(e z$#2ld%!U3S&uCQW1vmlR^Y4n`De~eUY0iUl>Hk0TRsNG=H2^jRH7?+X5rc3AQQ&w1 zJ%KYWp`Knm-TShhh-KFkG1Yn+{@df!Nx-3JlT#-h{9mS^9P0V9rBoSi`>2>|GtFtK zNYbBUiU$2xp3)dp0odmyMOVGZgdr&V6w?1p_yTKZrRB)_o0cr+y9{782C!~YVLeJ< zqy?50*2B5w4LFk-$iCt4>>B{+7NUcww411y2J+SdugyfUB~XMyXF64&bYQY?@T@BJ zr|+C5T7!~q)A#au5j|Jca}KN58(6Q<9ncMg_=F;w8+7gkiZt8?_)i|yD(5f|6$}r& zQdB>|8TU}Gfu&PV`Vlb`xcFxPcytF2qnvIG-7EPe?TES9QKc#B0A>(V)Cow7C`BD; z&&3h+6Vg<@h>B8(U5B5m&hmiyDNSJy{+?0{F#qyoJyKwF%bm%3wH%5D#TOH+g-Iw| zQ`2$4o1#m1i0Ystk{&w^dJ-U94FnyXrL;2;0P#3Dn^KL{3PzO{2DiE2NP)(dv=5!( z4^aBDGxVSqP8ZO#pZ$qvisB;Vdq#D^(&Yar6ZVqlZJR6e^Zb&|^eU8 zYcQqTG(@H6Rr$+P1sltz3QYOr!ctgFwwk8rkWXLH^90N?`gSco5UEsJ<|(hA7G+>}cu-!C2`#6xl|Z&K$cInJ=n8sufzQYeh$K`} zZAg{>yU;iy_K(umf(XsQdoMy)N)`1$`4c!2afI(M6)*;29Mm zcU%n3e(&&rD8D>kNmt1JdL=z3!d+MRO|N{`L@$rjvQ71-?i%FF@5=O)b6e`=eW_}a z#z3=V$yB|he9=;0=zOBpS>McWF>8G*T>l!Ew9Db)@^mZb6Q8xxZwqJfaHOqX!tvpJ z_t=B4U&o+&T|RsWb47{vVC(j&s8aEEz}KmPa_#hFotMgAt!~JlQncYEQ;r!1gYA!@25Wqa(Etlpiru8K3Z2Rym_Pbk6m9L zZ{BHxO5t0jgyTf%ek*<|Q0QAz)VCzQ)j_6gH9}=qzitHZ=%~92?ESQ5C%A6VFFslF zD%^fuDrdupwiMp8_(;Cj^k03>r+@mbi6xU3=vcqWE(`P*PP+_<{CZES4o_m4l-J53 zCoRSf$@V&p8Db_4Bp!y{M6MrM~^TSM#xBusx+}qlO~)47fJ$r- zu8*uk%ETElaWZX%)5i5y=$qVbp?tb!3C-}y+*!KiG+XqPnyVh>BYK!mx$?!!jE#Df z?3?9$V^o%2D~Xfk{v-NR(SX9`zN2~-IN41P{C-r2Ng`PObxeOfp$KL`qPdVk^C$!u zxfmp4X<7sq>`b^|FP5v1>xZ36M`s=X`cX8T?Bt*i^;%egVufCrrWrI{X|<>Mpuc%d z1v6AI(+9LZ+oIQLj=G(zZs)1n`RaCoA0Js{P^P+hL)|P^&z7iLse+{{SZ2_h7Bm&` zcFI93EWp5(Dz-|+vP>0SjX)MZp~q(UA$&KOw9!&J7WHV0Nn1_YW)R#E3K858pqEs^ zZWZiNLBIn1cwfa7CREq`D&>Hs0HL})Yyk}(wLr-+AIdmx0RWy*!AC0Xq(P?)It@7l zTXo>l3B82|U0>E|Z%k&^pb**kl-}M#24IBq23-iecS?Vs^^2&I4DIDf3M@XO7iZ%W z&}YyH*{zDVzKr}>|A8;bu9gagg?Z_@NQrebXK;~LCMT>dhzaG+dvB68g$VHQn(GBt*Z(|42DIe z8T?Uc4H5jLf(I&isDhvAM}r>OPz^m+%ZDZg{c0;&ZbOLwiIf&~5!9#&F)fn@dWy;g zo4mOO*wk1FY^=DRs>Q@J6+BnL3$=zKo0&Bm(pMRvJJhh1sl$fnwh3n0>|^LS`?23( z+vXru&7RcD=3s`IEY#*OhAlsWq3#@lMJ|<)KX25d<&WpFgvcR>tn-u!G`pZ*Ga(C; zKi6ZtT^J(PjYoF@@QL6j(DU_hM8~V>wJ19<_H#WaXY_{rQ^02>L|`=sL9B!vtOW$2 z()uHY@s8L4JoRA6yrD!Q5OHUub|UiydISJp7lQ^H)K!7lt_I*EuIVq3+v*ZH&Y21E1bj~1)Cs8PDdpUR-VD4*JGR&lW zJR6FgIh4ZlD3zB$39^)`@EWSY+kql?P$S++&3PAf;POQSf_or^=>(i16h6uicSDde?*C}nik?yGUK+sAm=9(#Fs$iWh%&@!cKmLs`FK- zmp+q)zSq+n`Lcc8!~rt>dl*RpGQQWl3BDt%-P5Z&(|h6cIb-%Xk24>`Q~L8Fbx(N;>0+NjFsFD;3;S!7WQI zOjJJ1Gv8V00kjuqLE9*~G@Qa0I#jlo{T~zRg zy8Y8a7yL`v5}x{~Aw4tcx#<{8GZ;!iAb3-0KpQFzXhZhG^;T&>8?vdM!Dy>vBDLF#>r8{;ntHs?QB886 z8d}KJ=}}P`ExDD!t!;kAvAx>dj@#QXm~}L`lMN$lSA)CR+?{(E4ElOi*`hVlg7Q7D$UiNP?=4ddZpn9U=2B+?Rm zHXIv9+B}-N+T4=I*gRIn#zC9N<7uYr>oepJJ+{~cN9k$vB!*^iiZZA|54jIUO=YC9 zdAfS-H5JUDuJXZM{S`1I9TrobrOKbpjq$l;wT61Bl|HEKtEh6)eK^58Eie zk!=Z8qA8aezQqG?{i%QA=h`ye<5^Q2oCNs5*`HP)Tw4HC zV%!97Se{3mQJI8Rg}ho-@@Z+5N2^Xnv>MbJ@eW#D>ZaAB-daO?U28;3wZ^ncYeH+a zru44XoHlDMXqVQKKG0gxLG2YfqP3;RS|@srZAYfookO%9Xwg?WS?dkw_dZ-n>(6Q0 z0B)!a=QY{{-lI+8BidyC zP@BRRwAY+2@Q7jny8N^r$UYR7;eu#))dNx?Sya0RqkAMc?H+=ZRHuK~=mSli{^5hx zsezL(Rw-2KIvM?F%mTClF9)HhRN5%-26^H$Hlh8&Xh>?+6O=yflGAsD6T_tH^odKf z8KBAo#q9+sJNtt1Uj7)0Omt8T-_uZ4cSRrT#zTR2x}uNu<^fPzcSTQq3~eK3Fx2Ha zaF6PX%1h#e?jo?L(5oDKHmt(*NsVan_d;dxFu)osQzDus90 z`jaeJwTq|mGpdO6H9@(P5_0AQx>bXI4}O^4WZ4i;X2v{ph6V5;TS#%*B3MB)DOG!e z8f%NGC+_>>*${1+!~YhjO#*(+{5Qm5x=!P|BSR#4pMiIvHHJAk`cLv;V1P!oHCKZwz7wu4d^`pysX^|ycTjc& zFhnmf7E2U{quPJrlGxou&`YK+`N3x84c! zG>pylF}zf?SX+zJ;%!vtJ;!gw)^cm{0e`aRs38w`4-U#q`yjUrpX-eaM3(V{2NxKN zriQ{oUsvQkDzKLd;!@}%@)yCi@NO!kZue4=K+Bq*DSAeMJv3NshH|k6j{|CLi%#?# zI#fT@hR`t$OjN;`s-n;#;xQQ&MdwS#1f(XMc9?SdVy+p0d1fM9Yo=2ca<4&-x7E0& zjbP^mgjdid@6i?xr)`{%wsQ*Yz}&PG?wz|hqXzBa78Kz2^gcfN0gt795QaxV%0Zq? zhau69@auG(1N0$Zp%aiFA8B!PQcEE0Rio2dDxJ}4(pjxGox>FI38sTfT6?;T;s2>N zm9A)W>8d8NMPoI6uC1jnwGEi;_tSOl5Z%zeqpvcw$8=MBLAQjLz7amCy+Y|bkwo8% zx^zdhq8~&X`bl)82VyY1{fE-eVktcm>**JTeiz5+4{@2E2%ta+yk{_* zYI+C@Js<0O1NP``*{gS8pWclveFppWd2H(oIY@t#GlKQEIRsi~!t$UX*cz*p#At+y zFzGzU#3?XO2z1zsh^=`U0~;gN@)VFh9D-QjsR8*?!+FCCzFAncdeD2HQX*u?c8q*% zNpzl?;NoECU4swP%nbJxuqro3-i(p1nbP|ru@*sluIXETx6zO}!h{;v6tez|RlGD|?;uD#wr~`&mvr%Y|pF zDNmHV-N3Vt3&}MNJE*MoWaHxsr zI%hN^z*;K8t+0}8O`W(6b>?ZNtv<_3Xjb6zGAG8nq!*l@Qv+r%PprFBb;Y3Sjb+h3o}Nfu*vIpU>w(jz_0|Tm ze?LzLq<+-T)56u1Kj|os$fUs@wOez@V9!#Q_HnJn1sOI%mEfFD5jZfyv)0v+o?RL7 z>!6%C93!V9qw*5zvE)w~9Wi1Tt@`kdqR5YkE^noM04ahgNXSY{-4uzEl_pM-_Zy}Sq09Q9# z<7vQEWi$5X2y9>D$)kmo!JACJmVJ8<>Nb4DP|5FjqOb#c$48zM2EC-9Yp^o@mitCwMJmFcyW@S6!2VvoH;yO2wfIcExZIG+XdULs}(tQ%%hi z&L1|fQ4wAHuNbcN#>&~>;Ov2K2>%g(Hzxa&^E-XU?R=)qY>w;>vC#&7OEYH~PT{5RO_oMM;$Tf6(M5Y|M*mY)Bfch*PMP4e*LZe;F#ToNm|JP_3@B04X_w6uo4E=`#=uq#WC{9HjiCEt&#VuuN-i^W^Bw@M{KxAAb`@zpbu>bfsf>o z6(Mm_-{~n;`XrsQp+Gug(^>k!^;uQwCPxANiKA??fnHwfwc(2Sve)_@7a4mtc^}vj30IgF(VDO=qRR}!cvS5-41H8Xk8}V0 zxNi#mV=}HibPX*&j6O}v_1_*`a@&=<6OxJ4uL0guNAe8oCC@f8!{qiAp5*_Xes>=7 zoK3<>5gE5U?+VybWRb5uNg`7V3pDuJvyr)s{Pi2pTd|In1vn~LYk=L}4Y3SsN@-d% zs;@N~ zQ#k`0G&#)R@NECL2n)U_ktXLblxZiDqZPyllQVA`*hT=f4*$T8oJ&25HIzQAqw)@n zApus9K}fe7+A`v~IHCW1ViU%gU51@0yS#~V-u#SG3!wjgR#)Ln6dOBmQh0I?fp zGG^Em|rA_4wn8KK0li?1?<>3j4prWPJTB`go^>!6MMd2vS5?r{P`-T)m4XW=1$1W2f1MfgUuVYr z*BQQjzyX5{3Y(zrl&;GH#6Uv~$y|LbQpU13UJmRHeNhX@h)7sDT=C(G6YTHIo^gco zt1G$7lf%@NcrkdS+{WG*9wiTt@FWDjXKyjx9>e6RIr!C{Js5rrVRyVKfyQ2MHxUON z+*pIh89d(L36Am+-hl84gkp8K%qnCQUlMKQlpTe4Qectgjqq5Lz)XjG%>z3_ymLI* zrcf=1H#fE^44LIA6M-pdExhiPw#4RXJe^^thd*b9xNL*dT6gck6vI)bWxMssBj+Py z3eLq=1H6o_25NT!oV@X=1<<20b|y@)Ia9g+L67FB8qFo@PQv7^oDp(Zz4{IvHeA)d zigMs{z4+vYC~w~4%5x8;t~B7h_tI%^wZU)Oyw>&9TW=_LzL$FoWYf2-TynuwPXgrT zT6d!X9P?_)g9_qQU9Qi*(q&)WR43eKtEJi{OF!rwl4XH}b&4|L4s z{f7CnJTfpWLQeWPICo%3oVRBX4+>l@>|M#xv$4#EKLTda?|~!k01Mp3eECCQW>N1d z=5J-8WN(~+xlqHA8#3$Pd9ajpABDn?T3-M6BjrWpkkTyHFl4S0|0?Up>y5JMY%Lu+xjPbkwi2+|d)Z)@CETrnJ+%h2*M5e`9i4-fOx? zu6@*E>h5d!_%#Q;r>N|aa#Ay|CI77FRYJn3@4ex|_qngXC@JqZ^uCJJ7LB~`1v_ZE z%h05sWN-(sg75u0csE*(K>4D`y^D`hl@~x#FKent)Wj)vk8v|MT#6DPawfd&qSVNi~-hmuEJ3+k>D28@&(QMa6^8d-}`x?M@BGZ1?su zof;G@+3TxxFeY7weE}HcQnbn&^O7$$-}er6*I@7YJ~?r%Hdt8O^{%+N&#O*S()W9( zRKiyF8NYis=5;sO68WL4jex=+j&&H13{2=I)B!$P%(iUyhj$hS1-|&hTV50MD6h=% zm$!973AJO6%fO6Mmb-8{o&lLQAB)tjkYk6qT43^DUYr*&TsC^{-6TqpEuTF1#*02Y zCJ^<)yC1tTam?SRY*cV6wLV9;Prg;7Stcf+Qt%Qf1m?EWjN1-h+w%EuS7H&kQkNGu zK)gJRHB6c4H4DGU;EPqvVm!+GoRizjjQe&DY@tL)BUhU>Sri7 zm3;E%(wGQ&D8x|fz>h+V0fy6M+AUljFDpeGIB!Ufi#9%SzP<7A(w)Txw!gsfX-PDW zi_?rSIVr}7z^iY>7$w~v(yMmKgYx58L!ICi8fWZt{ac0vr@z?`hk*49m(FoL&b!Ut zj0yh7G#pvb7!dA!Xvf+n*x?Pms_q=WralxZ=T(Hi%)SZ+Dk&>(G2C2)4AE|-v z6^&8u$9wMHK26@LVx-~YttK1r9#a{lcdBtKush9&c26Yx|! z^ffZnh>C7xv<`EIYoB}9Y81dqN&VW%g%ylE^6>JQivLi3tCLa4J(zLD2p%KD`WWh* zq8@#WUhdIu5er%$kr&4+b~t1QK=zle-_+i{3)1^xjPwioncjLZ1%BigFnP3{lhQJobuF@AVqHYeB+s0vn5IQ%gjK{W`wE3+ngo}m7!UNM`op) zAu_F!sq*(&Zp;dEN*(>xp$iFU(gerzDxb{SZ3N3=TNIN6U~-a6=yw;+6^uupi8tIs zunr$X!!Ouw;QYV9rtL<8dzx6sz31~cLN4_yp;(n9FYGXEY3_A+8nxGG;&!f)hcc(i z0f!t$9Xw=YpLSQ|%&w06;ib9XR7ZbBN_<4^`1$RaanL;(Yu5Ip{c`_V19R1qkBwPQ zKiyO1);hWHW1}&0e)X~Ojti^XJj@&zlaDASpUZ`tSusWa;pyj}7&&voo$8l+##7Or zqU56=jpmSw9RoG)8(UnDv`zaOy?+#_`KU*3t8EsO+aKYuhOC^XPgecisZFEbjpOd= zi&gs;`07D9Qv0)>Idt@YW(-N@dGdM@^QyBQKTy<+7rp7Uyj0ZOfURf~ikbDYo%p`Q z`u7^-^>yenZv-wEGbg!q^}V_GiR@U$tc!ZBEn{|Y6(AEYLc~9GFz!Eaj$ZMc?ftKWJoj zLTXH7vw+)@7r)JS;$j3es`%BCNzaVJ@W)07KZsXnxRWh*nBm&jGU%zMsi zX>M;b38#;YkXw42hn>Dv&u)G8ctN7$1(h}?UT*)$EG|Flv5dj3tt>^5>{1>~U#jCdsZVOz7QqIbVLfz-$=>_*HMcS>J60 z?L9KJJusmJU_#j&$XajSbtlll&DU&@3pblxz?`2qn+Mzp^slb__Y5veG`OrXP+$Vx zWyUK`?=pv!b*7BIeZQ3TZ6ft+>T)mfM-0BKccA!7EbCJZM^|iB|VPqtv7L{G&eX6^?8}IwVJ=w72 z#rJ_|;qxSi6tNvq6e^2O!gQMT?!OO`JCn!fbLrR^v$wpwme;3t^%c$M8{&>eOgtAK zf{ur_D{;E!HiP7sg?wrzx?9Ni$USm1@A0bMUm+lV4OhAH%MukpXs)tOu0myfzq^v* z{-x-#IQ54vu-m+oHpb+alae4Fs&0fgt>P~Hb|VdCs127@a~sjpm=qZl*k0fFj!Wa6 zmT^U7=Y~F|Tiw*qS1^auS7SC_YkU$?_aw=m9}2*$jWG~y`!H`>-4qEjrkl?_qb|AG z7Ab$|=2J(J7Vqx6=#I>Y110K=$K*BMbR~-OezQ$6maNjpsY1g(zKQP8KG{LZHOA`iVNS=~UI^ za`9B(uTCv;GSYX?H!_AO=6i*m}1Ykd{O z=ad=vdaduW&@`5N-}QYal!IyJ248h?l(oR64ZayZr=MQ07t?YNT2cM7XZHn%L0RGn zpDvf~a>#pUmv67zn)+w$dpC#rusM8c)v<}71ji-@HXru2%;CTs|DsnfT?NChx-bXN z!{oLrK1)`<;_$n{72oGh*F3i{$H%hhRiE-0?sL@_>jJ{^?>?9s&lAaovfz}~hj&G5^X*L}+z?#w&6py)=Zk2iXin{vJ-je(r92`_!ax7i)t z{QU0XTYw(7beC`iJ&@BQ(~~_unfRSk&y??c-?&5M_y;qp^@aTGD|g(*!I%zEIebwM z4k`IR2)FAGzW%N-4gP5B`i&4R8y(S-Ppa%PEzAm$xqfm=FZ7ddvnyII6sq{)NvLm6 z%3HrU9X92$@2gD2h5(DQ$(LfM%tw zW3B|cn|7?0j4f}e6EnM)x6Zp~K%-}5|H*})<-l=X{j3<-zmerixKVYYy2uHQtU-9^ z$41sPcVO-HXY`VzTU*_b`bBH&lsmOe9A9gk+|=E|dD(%d-K|otgj=-qwYnRybWp#l za&l{{uzZ-?2$y5~Icm3={j5#yit6!iUzd}W20HbdHqiReE%bPoHl1bjv6g~*Z;!PO zxT&+x-50XY3`9k*h=G0BI>R$$n|QaHsuB4<Ahc{Zm zFZF(5>#Rm@#)m^rb}ouut9~8j!UI-KnXt=>dO3%Gy2DELIs;|Dz4xZH_F8@Mo#A^e z+ijoVu`|lanFp+Xm?x0BFfi?)HA8ns^j{-xjfq1?R6jslRdSbORw7=Xbj-@_u2#!w zch=6rm({PWo9E$SEBs|w4L@bQ?v9&sk$-K6&A!l_VdYcewfdytUbF z*5M0lT!BUsTf*H{q0_9-?^r=E>;2*zE5>bOPp+@NlnFmsy+O~6pR6%%$BJ1VI-x#1 zt?K)o!+PC*F{v2Nu5R+!`BulrRzttDqT2YvD&{ce$V!~Nd*g+LHNox#|14KHj=uKI z-Ik!FrRDAuIj}z_I^*T>+k~CFF;rKOaRK-&n(Cy@`6{xA& z1r!ZjhM)!Z0XbWMg47nEXtf2XjoSUwj@r9Be@yD6JXtzBj>28xI^A~=Q;tUcy2-LJ z{>Z>^;ort^d(P+aH|7p<1TNlgHgw|M{lpXqjhEnWM4mLImR{Ds~9_NxxYL>7D5Zv3wDqW#TKj;9*^=0m&u0js|S zuaqb11_$;um1FArOJmo$0W%Yuu%WZ10>7o~2PY@e!j+Na`w48TQ9oXmr0F^fuZvdJl?I+~{hmxM0eTzvD>DAk+ zb4qYnPd%K}1Ap2EhT2}=3ES)8E(wa7audJOue;~8zNJ?*ir9w{A6I;{@u0WDCshjVA6q;p=6de#kxf#A>{lY5vrwFe13RnW`i-Z-4s z#8!F~*3aPnN}(PY)6l>o%N75SyZ6Q`O!PU;bC!f7ti1mR`p`x%{tMHhkM* zH^cT58?KB;rPGkaglIMRh#5to^w>{Hhl2U%4SUfR>Rx+i}|yCR8i0pdE~>WsKs4HNDfu5Gfy}7%&JA3!u zgoK17B%vihfY2ed&_OyV2+|?+UIoQ~Ac`ocl%Ysh5flLxE{GtY^d@2hse%Y1U{?^x z|2exifV{r%_w)aunLE2XJAG%)IrE%z_!1m>e*UOg%2Mil?wV3k~e8b(8% z=xYuw{4qdr;3ja`_z7jk-bY+xyd0u5Sbh7ML(BfxG@Ql)uP?+(x)-u{&^tUydHS0R z{jorhc{1>~86aDx-s*2|sJ@BEu&;oeIS%B^O^!1-{FlHJfInrR!JL=F%|td}MD`); zBvfv{nm@oyDiROV9{K;{2?jr0^agYh2v(>LeLcY3B$&0{V+SC5(b%%#*V>gp$7Laehh z&Twr=1Z1UJnYrc|09(SWHnxb zUBjUN7K=ePft#!f;W6lQ`)IiNNb=99GyO{sw}WE8pn`z2v0w4sRUBbv=)WO>z!6HH z5oQsG(AoJ(FQHxgs@eqFo{{w<24p<+%3PD1RsYFziCR8@=p0Kgn9J>Bl(#Gdp zbfP=ZRGO}uzhTylo=2uy<~jQc9gz=(9-~_LhFSZe>15X}jpLAgI-f`Z04r(yX@gHT z07OOf86R0bG5B*PRGDC_Wf~=diRY<&x(~=UJb6Lq0ILj8MOT^~n$IHxayFl#^JPAq zAT9T?;(P_{q`@{if1QFi$kxkBgQyt*%_vx<^VJ4_i+0qxbp193Yp8}hR+@$Qcy)56 znG`*qZ#DQfD)J-Bb32tN-!z*w7|*{l_}BCf)b^kOG@XA#!AS~EnY;@hOaat@rc-yF z@v&E&=JT_3=bX;J^|863xo?u{?V`2jvz+IJimx{(2rCu4-k_m9deOnq4q2P zP1UVfAX#1CXx0H}rCi8NwBBN80U}{nC03`m?-IJhkn6dSSu61^Ji%A-n-pI5wyV?# zWapo-+ltg#o6MH*4HKuPZ!#BXZ;9x+>pd}kIFP{J_?C#3q9FT3HQQ`Xf<@K@b!4;I z$NXH1VxqVth(aj)0}}`7accV(vlD+Sl<=Vm5#;B}zs)RYzz!pA?3>lK`o5GleP!a=3o=zUvCOUUc7eHC!*g@45-Dm)v(<2`dwN4*4y2zWEin zM*rQtegoI&O2&(Y6TFEV=+?UlRzs}|HWL_7%)k$|-EG!u`VMk(70olSTvr52tP&Dc zhVDcaIu^z`GFB)ca>(!r%v=5A&U?7xSRpq!*`3;LCe^3x9NiQ^o9J~yhEgW(>l6bZ z3CZteM9#yvgYMAlZjXF`tbuHDvQc}?q}+Si)cG&< z*$T43tQifF5tw68mclLI~Fv^rQo)M2k9b;gdH8wGG9pW0ukNnoxb1R+eCi*{W zy%VVbyZV=`J)4+(FzW{YKl;Pmg{+A@p?=-E*DTYLsz_WBV@)v5P4CMq@jq%wcc?2! zA_{V-P`HzcA^{Z4$+Ox%v*tj`Gs+=8TB6fhVT*6eNe6(AI|{>-0Age^hJ*fa3p}$gJgROZ9tG;J<9!9)Khqv8D1WBW$hrmH z%l_!Uj%?(=jBFRSwi0UPei#4OeEbYGKL$dHKDV{|&4gO_W=OC5y59F6T~BwYQ#e#^ zzo4E*{G8Gcm@S+sT?ii;1HW%Tt)K+-2rGa_sR50Y+R$90UatgVump%9`q&8lncJ28+oZ+#V=9n}j{n2!&s)R@C zG*X@c-A3*GawoBV~MEeO2t zkdI)kLEwEKuASTKlq!E-RwH%i1fo?h>Flx&N)E)=mHPOD>h_O$7^euG70!Q#Th4P1 zGc!(AuR2xjbI5!QRo!{Wtl?CZy3fEA2EYaBf-IHPTT#_2m~B*PfvVQKTh)4~;4jGM z-kL_w@++t_0ifs=6E9(NLP$e-9KS@Yx5VO~ zAB~eq{@9dfmURbemLS972n|f3+fXPn>0Cf1q>j#USF5K(bJtO80-b%Y1BkkC$5UH~ ztRcbW?`>fb9Yf5EaSeTa6oNb6Qk6JnruS{Nowd%2jE{tsG5<) zOAX)q9i=(l&!+;MsUxpQJ{JqhO{DQb1*xo)4l*0mgwp9yQR?ETns|B5T_F0TKUN?OueRqlC>(d_I)j38t9%$$b}M=qx;k$4DCSlwfJ(VlISind zTY(@Ngx!~lrO|eaSe;)4CZ@+(#~LyFHgRdRj+MYV1|wBTI>(JuX}G#YA_L6(>bICg z=NZdUmsmG2Z#?T)bx^YoFzb>Ohf4=R*srig#t+sd!IRxs9yjeAYfb zVmGt4uudJL9t*L~AGr^@Y-C;OJ-t-C?g_DO8(H`5EG8>rB`ca`Y+*?qx3Iz}H#w^3 zMwS|{BiP7_#v374?4y-=6XQL(Q5Brr8^=fNVab$x{tLb61yqGzaLS87iAYQ(Zf3m& zqs1{kVlz~&Ajd$7c@^$2FXtbx*F|-Av)FKB7BESz&+@Fy%e{gJd~vw(cwULB!(T)U zA739&1!&c08R5od8Pu%E`-|yE#pE|DlA1+1JMU&il8_}HR6_p_@sZoMvH@8>_(O^@ zaj~2=jrSpDq{m0bm}u3&b}L!=EO~#{gD7kFX=>D9wf!qIAsZuxc6{RwQhR5dyeruO zsx80?LtrvBOU4_{AXLwiD_MywjM%+F7^)J!Hp@5cvY5q|i8o`c3aEsI(uZwi!?XO( zMZApnZ)77@vMO2j{jZLs+PJB!A{sRcjJR2;d&m3wlDd+eM_rmYLwfvfA377{pWT@H z&u$!jUpIzW-Fuy=iXS&qs*Z6-848^%^S*H>E{f_Jo~sMk!_N4*z3wX$@6#$!Ck$6l z9yjB1$5yv65Roy`(|a=j`D{D0i|--&iBV!Ke*c8ZtH%1W0@{b#N7(wDZ5S$K z?Var;G8BEnY{!Xk;PpV)JPG>u98bh@-4DY1B=BzlqO-!#Q%lA=^%iI`dP)`sS~|O( zDWXAO&^=3x1E)nXThznbv_JYqRqTPZD_R1)4Nu{z%oHB7YlQt#Lia7#Eh87rc+w&1 zU?mooABrGhB!7Wdbd>?M2m= zq%>%X-`HMWlo}%bWV?7VkT38O2KxoYsSH+7d!3hHxn!!dkhYws@pR_Z$m}MOO4zrx z!})8!c9@ssr5L0I+Adz2XJDBOYFnLMKtu0-01dze;k6$mSh$rRp_NY4pc@EBT27^c zsFYI`y62XVo4;uC3an@zcpIHa!hA&p*CsLOg5q&`7$X1${^J>vfHvfi4JV6IMG^{< z{0?Y>e@jOWSdg?zGVELGWS)cbjyOxIR0bqOd>*)V*a7%2@43mH5ELlhDMGSovh@I` zi%#hkkVzh7o&R2cRIBvARtx3Be281Gq}!+g5U4j<+8tKjDXakhin( zv0cZ2H);~X1U>3${1{P_5k^K$LFkKmrd`X@o7hzK#A!3L(X$)bbIxW$3%-V}WL;Kd z>8NQN+4JHxR<=}lTcGbcH414{OFIvr^K}2mOX}y-W=u?9$eN)PhL3VK*!`{e#Yd?U z&X`pl0-k8i=kYye(S!#RpC9W3V-Gbj#|&^4qj^nC-w241hw)nY9K1O98Vq`c=+CAs z*ZEk8M&Fx1d+`y_*NjPoSZ&O5flp;BEU=`a+wSh)xy}pu8~CHdo-`(t4(Rj>81Jgi z#36q(?i_UPVJgvZA?JIO?jH}rbF&Jpq*S@dFq9E@DUQBj@*o@J0W80e;`18S?yOlv zOmCqkoiz(YmBM;DV*!iM-uAqWx>H9jIvegNm))y&w~4U0jYWs!*gJ(cfXK&X3SCl# zQsuV`Y?W8uVt%wMG7=@j(hS}r*5+ZnL;R7ScJI;-doxpjE5TQKPd>uGV{3D9%as85 z?-uWO3ca5dp=VSm=Z*ih2m?hJo7uk<;R5&ndkHAtZP&dbn*3`KktibaUyHcv5OV*$ zi2p31sVfXXgZk%&J1T)XYBN?-Rx^;C&<|H=53ah`Ot{x;E&=%qXzpKer2E@_6XXpwJeWCeZeSHs(24SE0md*iFB~Z~iyo zq~fjdj3he{4|b#-(WPqJA{RFXqeFNq;DS36zhox0HN@k_gX$DRMs7XjodxX2EK_lR zSnRJ_?}#hYF#r6YwT;BklJYJIiE5CdD0Iu9?&uhMHCZ;}j?(0_l&tQMN^op}x(I~(*LIch}zo})(% z9ih(NfZTX{f^4F87MAI1>SeQ3N)yL)pU#>|@afITie5_+kF}Oy$YBn=jr#Pm`DCcm z59TaS0}WII$-1gAV2JTs4O_G8h(V&3G{r3ZR(?=FcSF{NV?S7o(O(S}G-Vl8=LY~P z&hEG$nmP2k`LqcW=fB)EYuT=(ZZdR*rhpK5CY1HJSq<)))W}=?y4*O5h2H7_ywIy)!OKo#Zl09kgZfP*Wk_g)d8|KLjYHB2NDlCGH1| zC|P<=htyi2BIzvU!Tw4AyJY+GsP=iPc6n7NMMkOZJLA0M*7|=pB7{b~^uGuBU+e#b zTkFI~x%vOO)=6)(#$Te=Un=Go2Ar5DT7Xr zhtT6+@Vftxot-cwwpxccAdTZ(^s94*_tR}NT0OPP?Q)zd7L1bL7T_n<%@VS)d#VU# z9Beg=?8SgyhCY3ooXKBRXVYXAcRFI%P2z2eESuTG`Qs^Mp)@hk(Pi87(& zlCq(T?R~q^wD;AZQj)Yk)|8UF+`E-~m-$PL&p@Jzq3s#+wreQ3U)JdI zGw7sevr4WdYvAr)@G5(%5-vCRil$ka-G>Psq@Z$)qQ{@HJud$8g`Zf1g@LU+B!1v0ExSO*AYrmA7Pr0%Uq zwi+x|yJuv4XzniA*u_q_?>gw^s)Dhq@CQ>3c)+v!fqg5y-)VKmep%gxbAA!~>a%L_ zK}lfDtq0{-!dc@sRBZEFF>F4?)UqRzPS1ZhBA0=MP%nHXU)D$$JmqVds&!xwtL(4k zL1)mOUG;Pkoa4f%feU3sd8O|H)>Mzr24!}^#!yKtcP^!|AF?}`3d6tsU>Cu(VAFs^ zhwNLIW7+7$Y!89zp#$;0re1DD6UHK7QX(06qWn|^)qyTSc)f$yP-wtShet7kj} z`@dd3Tjwyk;N#Qj0w#dH943Ihd>(L=%Tg#dpMnKC9EC&n_$pn%NwJs1KQUlO6fC35 zhVwM@~F-~)A=zUYsx=|OARtj z3GdHe@~=$(H9t-Pj^>-t(Y%+A=9}vYcOEs&KYzL--Mqx_yvQ1N70K0|K7(h zQJPEqGQIZ$#ja5BBPF;>_pVXwI>mmX2R~En7toXZH=X}(a6l0MH27cS$!-Q!=Z0fM zK<76N{Gh>ZG6YN&q=;lgq!=QV-Yf#cgoYrpM1U;uiV}1)%_q`DNeW8Qz0wq9_(T~| zmV$DWB$EP|D`+BMtDuRf=o6JhWuK@bs?udO%n9MuMRoXGUnM|QdPNPoRg*43RW?z7 zUXhuwjwAq>Y$6__pc%EMIi`YW;S()ID<9h~T2qc~bkUYB+R??sh5$Oai2ypdiRfSm z0EA16j#O26B03`;^NG%)3ud3_>L4E%{U{d*02+Jr4nC@0x-KQ}uAWK>WQD%EF1Kp2 zFX^I(E_&*smo9qiqPs3~I6R{M+#9RH;$LJX_31nV3bLPhz3RoBV0qPgLsX_J+b7sF z^uurRh)CR{3pfq!tBZa#M@4`5;MB!HT?_*5B=p6fGJ{7Hg^B-k6+8=@`V%KuRw(U; zJi&Q&CH|H_@WJY(Ub0cB;w`BQIMacjyu|2;q8UYt7L~AkF+|c?C}$js#@R8pBgQm70*dA4aiRMyqGS<3*yB&XRV&*466A|3EsVh>mg{-;9!ae_g+#n zxmBTDKPg@ov(ax-%;Cs=u253U6R>wcxDfcm#k~b$oGunh@v2y)e&SXQZ85AYloi$p zsO3M+S!LB%SjE)=VMW#GE5%Z=OhSm(REp(dg%qz*@H(;~^Q=<5L9s@>F${^|0dEN> zSK>`6R*BW>+-9?ws;OD^fp2-+H3_3M%kr*~;vKP8t=6m}#yYWHigz(KpKDfzJ{N#q z##2@5BUXAh@t$~Jq8m0~)TP)+bq-0fNoLJlW_i9Qi}bQ<^VnGFU3JHM>9NDV(cO6^uv&?FmHzuLr1~Ikr*S6 zsO=sLTK)p=w~MJ0f5~DhKEjIc)S0&;GqbIv_*5L##b;6+6F}vBfsW;qs2?7o)_*C* zSK@0ajte+t{)YPdBzjPsqTsYxri(LDoE7JE@vRi+#RVz86VuiD?ok=J1n_nJtBZ?b zkrdyHOHy2h1zGU}Cabu@O62(33I(tyeLH;@D+|bCyi@DYMSOz zoxN7QLJ^q6npe{$yf8=V8eEW@nv^iX_8Depj2d=bCWJ0|t=A)=trXjRrQMb-R7>ygAn2!T1yrbtKwGMsZF zxH%3unK3)$OR~;*w636HeoVER^4!V%G0#-q+4Car;(3L4^Bm&capul=0O#-vV2Cgo znHlh|Z1NPuO#U`xQmKf+vId(Oz83l1pww+XMxG>Ubp*M}98V=4PA)Hr0Kz27?1X z^jTq6n!H&-?H)r_Q|GmSB6U3BF)W7@bpr2am%zbf2}p?%O@&3!Ty!K)y-n}XQ*xE; zM5LAtIdUN$BV-H-dd>gH(@jTWMu?bkxQQuI7x!*4s3K(JxD$78GUIR7F!=_{64jEk zc^>%f2lL5=#1mF|@K=xF;2tJt@NQ$go(I^N?>`E4(n9L&Snq+&8C*BaLs$Q|l-_@& z#79A^9YV}S948C-kbJwi)wnu!KFOL2IO8guj&{X)1u!r*FnMVgr|QKWP)W? zWt6a5mwt#hLtYFAvhEb1*u9ScBB~}jNz9yOe`N`)y@rK$ZkpAzaBIvlDmMWt6F^4f ze=~&;CG*)6)2tUNw+pAK1*=1Z`+##(PE(xlH?^E8?s49Osb8Nd;k%R;GF3O-`aZWk zW=#YH(UpN{J_3p3>bzPBqU(-`kw|*R-CED!op6sTGy?{@^D9)6d5h89Ak;F? zrq~@;8zB&z_(nBnjOZ?2^T7B~EA$q+7X}j5%97T`=th7F5SG0#-_L6B10Ylnft|MM zw(bHOi5`daqfdyM18rL86QY>Yf6l$`cWY9R>4d4^^_&xq9x!oAXpX3N%2q{CVI&2s1=J_uPNip@8vIc08ax6 z7CKbTD$bpk<>s;Zvln7e44W$g<(sfrESJ}FoLNcsY}J_hD;4?93P9HiJG_*fw; zhp6u!c7$SNlaEx&KBM=JnK*AK_XUMt!tG2Xoe%pEfE%mEzA<5Q0?cJa^88@4Gjw@I zXJ^&$kGzkmN6rWOswOS1HvF6lY$=eWGG_)OL)%+giJsWE&<#2?iO8n;VV#{vkvhAe zes632n)IDy!0Snv;u}Ras3riFM^0(3NFbG9U#gqGh&CFLKzW|lbTmP>yO5pBYZU8F zvFjA;L$RMI){A0rphup}5c`EBnVzmE8j{0ys#B%lI;IV7?af@kZ16Dfc1>%;gaw`aRxbtoPC#3XB!7fsg$aA zwmPsRDh5V*ta!L)F3?UL)}lMBDLf`F)Ukur3qqC9)-G08t`V57a5t-?R*UsfW4c+d z^Vuq~yOmIXHWr8k7`4>l%$?1@iP03Gz3AXzAx_}jbyw^9TVyK4 zKft;i;oNN)HP%vjL!D2*s)nSxH)V_KI4ltSwHi+gI(VsJhOxDk9fc=2>Um zH6V7{vLz;{I@2+V;w&>hwe-`t1eJ2lv9+;dwPm>#=(0FHZZUSR#hP1zVH>Rn-J*qH zl3}UlRJZmsFZ@y&Y9^Iy%~}efC)O5Qo!w+vu@yxXAMAzJGI;Gs>|Fp0X5&}g*<=;R zVLT)O7WweCIFyAg-pu6iwLt++`5_GOaHtAfybV2rIKU$HNsB)m2~wcFK~9Mubqo~h zFi!|MAj;@Wzg;NkNBwX-s;^OkrOrOy-jFVasto90}{ju7N@-YN$cR z2`xUHf)Nyqq}w=G9u3p1WFB!0Wi!@sI}8JYaE#?TA|6k7p761{9KPgTuN(CCM7lkR z?mSJGlPNaE&oM~Rr6pJNWF7_cDd7SGvP1m7YVbt{$qoDRB?d%?cyk#A%b}!W@fCFc zH6N@pzhUr|I_c3^e3ijhlZ@*vgS2NXzQ%B5h6YIt`%y0I>8*E-u(Z(PSjR{6jRuJc zNApbv2?=io{FYl9!B>ux@Nf`6m) zlR7`;SePsm8%PRm+hV;Wc+*hZZC1JnjM4dNeCkl+L7$_$L(gxwE^vNU4c%!i!IEx& zY#rrqhR%I#!E*omp-Z1wUwL|bONL)pG6+}CNq!zCU-D|9ipPUh8b^e3oSNM-A zWxrL;y2`IfewAOB{3rgi%Gq!ANcx4tNAy|Af8&3EHE1_<#Hf^3BVcV=g6B(9=l5Hc zBmd-o>HLP|@Jx2V>TG>V#r{UYUr<|o{(#lW_?zFN+TB*?4p^}iIWkc1}|l;CeYVrbtHJqD+^28|`Lv+C^PkqTQELhW1GrnKOKJ&X8Ut z^vR^+3$z=S(+l}L@T@%Ef9Z(zP;vzT#lka)e0N_JK*Woz6kSx3qA~{E*;8BhkhZdK z!*O}*qj8a;2A^85MOu!x57ziQ6Sv0JO=mZ^$vuJrK z(H6|g39up+MN@VjF%J|>8v#Q8E=c_E@K!0@aVvEZ`MM>1kLKpOuU@?S>O~|xfEN}s z;ODB40{20JG|~T1y1+M2UggR`{R02r0JMo<)O2yVWae%1oJ04sx#tEDL$?SWcjYe_ z+Jf^O_sI<)+8l`Wl{|nXplMtCpSdGFG&i6j>n10dl=*LUz`>uOuUWM_XI<++eU6F& z22MRqK!Nd~DOaFPCR~~PjF<`1SS`?$@1s+xosLhFSf|9R5#HP2(0G(Y z9$0Wf^S-s3@ixCBE%D;p@&^Funm^nDfRf*z&HxkxO-7xqIeEr0;|@eEPPg%-TR0?& zgT`+p)($~ySg6tkE6BUwKwUk6hGv3v{S5VD-~qxz3f^!Vz5G~*S`Zjc^hOtzi1^w`#LX_a?ye@9x9l+YlTy=fa|EF`?B^kBQZWI zDve}JK}=|XQSm+Hd~f{*k(I!U1=u9GWuO5l%0}GxVjPRS!ywu6;D1OUlCpUKjCL@a zBs*Bs)Z9zf;el=zj13NF5rMr}2j4|Q)sq{Dd4QX(>tyTBTxyMz3UvvmLd>0{&}Y02 zEDFa$?z;_cpO??Z+{RkKv0R3ZT(&rGW6_5OioXS<1ACBDdoZ@PGyEN4{NyXdbo9m2 zaMvS7#z36@Sqjq?L6lw#yoo!wEugF$tx~U8v6XPlS?Uo#b&3%-SP_5l%rLtYgQt43)Cu- z{?RR175>pJ`3anE!M>-)U$ZXXof%WYGlL8X+;C<_5G>|RXJ*vFh}~P$Xl6L6z;|h2 zrI8A;`(_3vA)RT4G50XUG6VDl`wu;E%TI zk-u20AbwrpB=3U;u5hyG6V8+pWvUv#T7A>Xhf7X`kkTzVF`S5UhvZX-7!&;4#NTjG z@M$-nuD<)#O5v5<8wtNzX)5A3h)7OXjnY-O->ei>k1k*R8WtX3|II3>a_RCjO7|hf z?B5Z)K{3ZZbi3cJBK(L7o;0G>G(6&2YW?q4z3Mfb=G24|55u0RL4g_2G+38z&L^-_ z-bjen`tsi&ZUc$c9^%^8?T;RlFZl=V220}0`ASLGXJ#V zA2chl#OL20!%v2~r}*m#jjT#8FXAt$Db_Qzzli^54FI_hOZb}NED8~O>7^iV^8J<~}ws^$B$7lH~{=yfe1 z5*0UYWQ zj8oS?kBd`fhT*#f4rqXXk`4L!`Ema3j``v@#-oGYV(+QLdGyHM zOf_JRza#E`GRJ?^>)hQo=VMDPdeu)jtlwYtS8&@`_p6?l|E$f}pSACUxFE;)+y|^d zz(SGYwP70x{$Ge;_wl;kI*T{8x$;b5i0FL09tHI&Xn>OnADuqXMH51_)TRIiza-FNb$UrL418Y8AEEsyXeHy zr_!-cH=TF)!HOTEJ%Fm@J*li-KDZ&ufeV7LTZg_rcv-*+Jy`s*lyZQNW%7X(xO_fU zZ75wfBTodmVC%u{S0Qe{3LiHGz6b}r#G@z2L$#fXBL z^b}0LkHc$tKmIZWuNWLgvBC_$0d}9yzK0b!dNKE8^w&RtC*q_>lZ;UB=)P?x?#d9Eem3jcoS4(^{Af7I(; zsy~FdKK?#sxOnQklBq1FuTEb zkSL(~4*oF}`3YU`q7K+i!5%8DB?S=c7UlbNf(^~$2Mm6Ydiape536-^{pm`3&L0(e z_zi!^s(J(y60L=2089o*k#xcm4ZtAN#DU`A%&2G#B+1%Fd;i(f+HCBMz@ z5RVT&T@xOcn zS+z#}hx8xWFQ<13`FrX=q;E=@6t(svf2&gQ@S-CMxn2hOw>lgR!o6U!!<0+XYm%Y} zJEQlURGuA@E2?W0);A!oH9juDJqDHlF&+0RADQns4A*Kh5#u* zubh!1(F&;Ur}QNbA1t(}4ntoei-H=WrWCbADOG+zFh=F>u%jZRsLdA5DxVH_Z9DuW z8oNnL7IJ&M(*paJ^;b4!YbqLCDh1yJs*;gU!+<)@^0NI(G4Of;9G1qU2M z6D?JW#!EztRuBy_{gEL9hYTGHkw0cpu+t9#WexKm&RO<`&$k63I z{->fm;53=4b;6%U_^~+f9kDvl3yV(pEAm3>=m~#0{P1u51*`-(uhGfh3Xzbi$T$8< z;U`u8@dUdV^t$0vnit0ZM4Q7AYXpv;K-#m#M~R$zQKfXAf=fEnhmKaN)?9-NLg&Ohh7`5_MO>|_(XBY>>{8)Fp`%*%nME_ho>P1P(8S(bhYD@zdS@brPS}I{GamD zYWHb>8qZMIPWv0_Wsob;oi)z*e>XFsilO7oxf?I9YMk{aHLQReM7KTHfH&iYgJ%1DQ=QJp{QU&E`a zm(TgDCs%`Y52BMh12nM`ZK7BfVjfhb3h--}&-s%J$7ArJXXk|EM7NNYO)O+hq!w`3 zRN`BI(aNLQC3xeQjO7`IVE8pG<=4HD$XN;gA7_>0M-=aXqB>}5SwaCAkNPUkua5=&@zDz%c{EumhdxLRv6K%2%UO3VI!le>3liwSu{!X?Dqh2$`{@ic~ zsH9`rgRa9!#`Q$k3Rd9jxTpsR7_vgupB->qzxQT`B7q(`Ou-TC0Q7ql=tMgF^Ri<; zj1=5M^(GJSpEHP%r?Ri<^0>}UV1w{Mg#!7Uq+c8Z2H0tX+`m^~XAO3a(tQhy13s2J zm0d8%0epa6bR57#s^i0KgABVc$#t%{WC{BbGz_~67gX#T1=mf|^QU{i(B-dm`72fH zSDpQ4kYo4^_6I%wlVX1v?1s*6Qt5yDz%!F)c)G|#`M0p1v0HEu4{4b&G4rk=AbCOr zAh+zI0Y~we7C5idseoH>6c4h%7l~P5`XEt|I$Xu$RX@H!A8X75j+ZP-L!Ud^$$>d- z#$)Ivr#s>e`=LcIZ}VyATHt=)gYPl(EkU3p;-%oNYl#5Tl`LW!ldc zN=IjcXoo^6If<)f~Xn!R#rd7MaQTTEo3ZkAn~(?udk+#jNrv)WnPL`wHx3> z@YVA6AB7#K^W`1mR}$`O;V?am*O1VdtVM^eiti4@suw?$aDiUIj*Eh`E(wiD`XIZc zI1cEQucBS7Kz&{h$E{r?Z@}w;c9BF(JBO}5+Hubz zJ^a7p?2nM7F<~Sr5+)`NsheGZ)Iuc9x0Kqs{~?YEl{#| z9oAQ(B6>Se&cY-Hs7Ma%kzXGqBbzCbSxmvqw*@*1!1id4~D4nQZJ<%Rkkaa7)os!hSNS4Y}5lX|K2I1hNkq+8I(hi zM?h2SNj?I7v8=KktBr(D7(N)s21hE}rM#nAu7u~WyvlZc4RSxVBgrlr<=7>~ahzk5 zR01tB!4-Z_s~#PVr6myBU&VgS2i|^YU3*rt(fnaht zTNFyFXKxew4y>%T3*?H9J#yP`~Xa1 z;^qhAcSv?2tSFzeXCLI|Up@RwJgYXSlTp<3010Qyq-ox@@(em;;XGlUDi!eA~?D5=# z>s(q~EUYYeuG8WIb44iIqQxbd`wUuIlDRZ!X|bH81>3j*bHTWZf+`%WtwW%W)nyLJ z1F^Q04X-Vkw6K)r@u(4S0%SY55>G^Zh(>Uh6rRFUad20}ak^K7tOOT_13ag#SW9>s zPp8!3H3SO@ts%6al=ZO&yqv+|*VNMN)uS8X;K4d8vz3A8q2IkNQ zpLexT3tFZt(E^%9Tw$(x7ST;F@ht(n1_8^qRG z#MM}I#BGI-xE&{aDd7Wd%H)AH#c4 z6MC9($=jQPCX|{u;=8Axs_bL3IW3Sz=u`{46z6?ufvgj1+{*sklT6IAbA|v_2M}d_ z9j<90df&lZ7ExIb+cgF69(w9w`xw^-u{Rv!gqc|QaAMsL=bU5?vcH6<-=XwX5C0Dy z2`6>*6%)Fp>|#Djf^~Fl38N&*$MNwJNG~PfCVZyz^%7VZsdC}iGuQ^cgscvBM|)xz z)jXNJ_3&OKGt?`T&_f_FZh&zF1wTP^uQ>XLyl8BA{YM#D;QAi|DO^xev0 zuZw{?kHpfrNawpH-$T{d%lF+y z?9`aGWM+&S{3!&aYla0&hE{j6Yk1%}1{}+Z(6pZR&ItZ`sChqobA+}4C>>*n-Ct|V zCWM9!v2Sa|z5t*~XUDSVEWqF65X)VF8{n7z&B00MXuGH@XM4P0)i+ee6E-<^tnllu*?44f0NPULF`-h58N zS=mzuZA5SIXFz8t!=1hd8|u7UN& zI+eZ5CVBFU%j~WZ4za)F@uV?Vux(vY=ZZ$gP$6aVu|xG%+E?8j>vV}u&pq4)W)8Zz z6gpBpNTF+oeLPxNx&TLOivai_5!9jG1k5#di^?L}N0N& zQMHT|t)L%;n5!SPO0?4jWVH_oSd$l|A0elnWA&9=%S0RFqtd5 zQP7&zo)q+=OrZ^x1GBl*(oUieJCa_#_6y zmn(HuA2CE1Lp3&U?nFZXL)Bjlr^=5o#mHRhr;*f8qkL?b7)>=CL$R^++T#=(N5Ocy ze1d`}>2dYx?6c+C*6gNhMv7-FT5Ez1?%DE*tdSVh09b@3LZ zhPZX5Sfh(~eC%zpRu@26y(1uHeOtUsQ|&#UcwcOQ%Psm18DbOtZZ^aR^t;8I-D;xK z4nursh^@M#dE{f;=)2lZ>T}L#vBL*7q>poHJU^jemo9eu*lw}JtAbEwz!(&*2#Jy>T2A+DWCU9GWei2C4J6Z$yEt_ASb=o%Kl zQP6wD=cH>ip#~q@UhXM?fmRQHVsGaILNUASj+|c#4c%>f z1ox;>d+m3R$cbl?`g?eii+NEZ)}vR716b# zQY)qvcaTZC7A>KLTte6UQcKg)A&Z7o?4Rg1T`MKE(prWq(ACOl8OlCj57x?RXOd5z)?aD^Ae3X#^>ToL%SjnJdepGdqf&Ye8agsZ zY6HQS_dIMDGzMvdr8YzxswN$_EA5yF>b_@G`P=@~NK74&lfATOiJ~s)LF_q)q<)lcPLta6l<#z^ z)C*F3QG>@0E2YnfoSYUpgNKe7mxV5wsX-oK*MKNsRF6UTZofpgtr{tVdpv&c=F3`h z_135M`{pZP2enyJo2@A|;HX_ScbpX+kc8 z;3Z3H%V}N}klG6EHA!aLXlA`3wUrcmlY&(gtfu~a3$RY@Z5n<$=F!&R9{+)N##s-J zc39-YBYN~22>wH*d}b%ZSZ?-bc7w;(^4?NgM}5DZf_F8r34o?buqK!u?`fdcpMp?^ ziMEZU7NTjfNt+_I&DsZ2+oF9awXNDVbV>MFAfzL41r_-O?Qu{jk{@)b_dVzA=LA0#!l6UgSRKb@uhWsk#hZ4zhc48)n|2vD7WKEC z;l}FyZKs>)b_kerK-1SyqyM(=Xf0W^8gt8TsD00WQ6Jp0i<@8YBnUf~16K5<`r(#c zKISWEQbq7ru~L(sn)85vtuk-hDf~0l=C&Q%>@)IN{~56g5p;yd--L{F4epTY-)B7Y z4r`$Eci>wb@WETGI27y2Oxy!WNg9q3Q19NhcWC&I)tEbWZ~le)=8l~fe;Ret_zDyN zvr4yE3bI3T47WL_!7<^suX7k%!>;uipu_c% z$YMu;B&~m6$GWM0QIoVlD-F{{9n%6!l7B}I)U?jP7gJso`xEa08p%rHl<{RxAYQ|q zQENPbw$Xp{*+>nud{77zG|Z4QMnwdsYgr731QCJw+-hPzt05M$j^b6;O)O&l#9}sF zEM=p_ayCw^fLaa*EK-l>{UCLuf8%)`T8Z+Z z)XI2g{Ce0JfS3=U<>BdLgp>UO_uzr@I#72s*VQm@;FZU|L$l!c5-5W2fpPW#n3s&x z*b?a5f-nI&95L{Jtj*ox+JuXPY#eZCG7ctLL*YfbyTv)_^Dwi!=z)U8;4}*p61^FN zK~CgF@D$K${NXbUch?3rPY;BmaW;ZyAAl3v0;7r_vP`j6bv4j^!^IwS)jsx$*bkG} z2iS6Ph`lb3INe6J`;vh%q;B&dp6+xJp|9L-^MZeLyDb71qXSMlFHm76opcdUg&FL0 z8(D%L;B*^!I;bb+ckdfKudvgHb?B#=6WNw_j$Co;XwvGHFI&tb^yY_tz3`Wuo?y~F+#l{10_}VFh|eD z^aUPql~E=hi(7K2I1a^%yTG9QqyeQ0IGCB&3j~f7cNkQ|*Xf5x3y}xDNm&8Xe%X)} z7#G8fhRU@LT;$qDRz}6O4UE$sW79$}wGBKW9Oiktr((t*=z>2yu1htifIzoEyh?o2 zDWKycfxkQsU)*ZziX7FkQ(!dmKGG>L#&DoIUvzk7+8mgmra!>F3*gORkAgj4a*>#_X_01@P6vg#{+AfuVmG@KzYbRT8EC03p^uG$8JvscDbeS&X>xnj7b5&dR%7z z%!`2v>d2(PUrr^;ml@Si9eFx13YBOuIqxX>N1$nScDHM!QnL7jIGrQT&z!EQCrLOD^)DAn? zse#Y0CZ>Na7&EJ_s_hG`a&qaKFzDJ82tcMlYilT!kdO-?Uk&|UW$$-tk+eTB)m7b^ z`Q40D>aC-JYAE2s(ZFuY$)KfPuxcrMvZc7A#N@oc?)?2O#xI`SvJT?LfK6KtvtV<%K|Pw)ueqY@*6IpOA< z$2xQY*6zN8)aRmlDI(a*naKNwMDkQD4XJazL4tf$^#+ewm?LI^U<%iIvN%;XCRkTH zftfxeCV0u|-Cp}@EmvJ*gXF|?acr==dq69z>DAuHa{m6TB|L_@B@vHn6$)+?PC;`E zmOC&ThV^F8O$wI7lgy;xpm=Qfi>d@?2rUY8rfl_Kf2|li=*+Die9`X0c8c!cAN#XC zVmR9yDwP#{i90zZmoC;Ygs&*%o@osGHK?n=qrsqRdVy?9DV%8(4Hd5$oa>5-&aTef z-2}6yNt8Oe%3DBD`nm0qd|T6Cf(q0ndrE9qB)Vn@7-=?CXAmi9Y^aF?nh_KmNwHBT z8_mX0Fji-e>o78f^$?m^CVPS&KdCdw5T0TXAJ{M!z$WSN-o~CGuaZ*@2C+djgV-R7 zO`~$2hav*~LU3TS7b97lGHeDtoJj$s6;bRZoxKbukG%pQ5}QqT#`^HW9G%TIm?9&8 z^XM_e0;y~P1qNPS-44E!EjFoh|3AzUuLbQBVa|`hDt9PB2C7 zZWwIJUsi!e!4GB~EeXqJLp*&|i^jpMB*=|du-9O=k$eHY!B*mZ$vU$xl68guqKs@& zQcbL28&-ApCix2L<)BW86iy*#SAYTYdOJpUy0O|#%FZ+v$m>rk&p zbk-f^Qy8f0H0`Ih!R)33of9^JbG1fDR|Feh8IVE^u^L3iubSwLhHTu(b!H*-I$=s}eX5UNLb1tc+U4pbw9Pbi*J%MM3 za>oU$3T-Yct~NgrOig$m@Q#<@7ho+Ea^Git(OLbV-O?XgDg)HzC-8-Ch5g8qPX^<> zAR+5L9qi|dd-+4m2m1<=H)myd3sxUyK^yVbYRA*TYtCZy>GMr?Bt*c5LWB!$&3qY( z+m|N?2?pDBT5wTl&XnLpcdxk6`JJFjng(@&iXl9*-8FgXw5f+;>P4uZS_|hJG*#<4!4eN*e<#igHgMHgv)lZ&P92;VY>SMN z=LcilRb=9e(Wmyn*!Lb)cVV!4$%5NPPN*kSs{1%U1ApCa?qRO&ol@N5T6#sUQXk zhq!_uh(W$75+@!XP;3hYADVnCffs|s7<@#(+jYK!Ztf(nWVs*fz#d}Z-$nWE)(Lr7 zhAO%j+TzfiM0yu|SbYr;foEjT~>v(6RX_QPOQR@L5dx=T~>#GNiRW0 zwt_=OwgL)@2p}U{0Rv_vM*#jb2*YHcJOOQliaDC*gISidFl0tIzojOgr`s3ko$n}i zk+S%nQthHXg{-U`)#@@mLzAlT9}L(wW9>;l@<+<#szJuh68Uulrp@sCvjM|qcn(=v zd!7p&_#DT^8975kW{DhPvqb)ZKtkd2X{v$1@!2?=L}As{cSLMYD_0z$JYFmr|gLbD)jo$=hag{cck2vHQZ zf~0;ca5BIqIa-Cl!gXgC^NjG@szfZ zk1c>jG^9(U_kl@SLFWZ49BXL0NHs(eLlo6TaxutaG2UbkZ3+FBfW(%5({x}G;s5>T zwZV8bba61qONJ&b4vzKkQlT5mgLzskQqWleF`)t$y>QYDOv$QHaAk14(Dwkb%Gpr0 zqiIxWHE~JMQjfeDEI#XO+XA7i)xjfL07%)ruy2P_Fm{{`ejugl{;}l`O?f9+mK(og z;&R?TU9Df|FRIop!$xsuL(s3*y&Wu}UKtQbo4X8$kxP>!43&L`2Scx{3tsZ*B{3=( zrO>1e!3c2iWP7YoXjv$TqufB~^yXk>1Wa26&--D^b6c=h)UR+22Lr^%U6=>k%jc+O z9|a%vmXUz*uXSWR5^L-l2{S95bWv6vsU_;BO5T-slA@f*gbF4(m&3`3qY+t=Y(fEc z>4@oI?+T{k$m9T0R1?*8ktIbV)U!Z|1lmFS>x_Cs2_M(Fnk0NutU!inI z(FyxfYh?MD=qyDSyePV&jnFRKAw@R<;a*W*Oo|?;UBD9;kBYStJ=Nx&!A|v`7AZf`I(`_JxINFBednukkXWUh1waJcLOUVau81b4Qo>Egk2~I5C zO~Ci>UQwNzGg0EO2A0^K7L&=G+%qKXngmzC6uiJ+3YFg#1ZHYWXzkRqF3@2d zZG-6OBQP1;ApqF{jn19W<8~39=^!tGpHYiH4^}VpC*%;9@xafZtr_E%b(5up^ZyGu zW6HbvL)PT^B6wb_&206<7r`X+|6}dF(4$)~N2)AY|vKV<;&`W8Y>8)D7V2z;5<0*5I8h=@LX}A^x)+k$0NB`aP5$hC4 zRFxE&KStyU`2hKBLJr%wA`bo-u^RWy+Wiz!5VN%v+>_5C@M2@XVkX}MzJeWlm{9%! zXnY7l9Xtc)*f_R_YO|572ZzcKT>PB|m1PhF!osIBn;_%DXgJtZVW?m*Z_VaF$~OWE zGOMskTxVBs?XwIPMU)$qSh9{XJxy_^L4|-55y7HS7UH_uX|z4PfkL4ttja8CXZwR1 zNSQ=Zl4zhvWiZ(eH2*~i2Q#0bA&hT9z?5*{vrWr z6HjH2<$B88=&^qx*)6dcwhfe*tP_1eMw{Jzi3`FcNF9s!=d2#k4FEDi83# z4D2eS$K$}s0f{ntTunebkQ%EZ{;n4zw!+jpbs|fLVTS^E%K;O^5X)o-u^HNi>zjq} z7S0mbL)keRm`Jxd+1j!)Tsr0Pc!Nq(wCC@DwnLB(V$@g-hd@);H@qmi)4>X|m$Ey9 zvEK2@?!?j)P-Y*d;ci7SbPgH{Z^aQAyH>oFK*lP%AP#q$sS~N|$o!nlf|o=VM#4Tq z2}%R)o0Z0Xq#)S~TMF1`0O3GuCZuu0SGFf)%^-&kY0>*qY?MA3 zOTv=~3ok`?d|i;{gkm@jW5-bKpPkJAl?nt@UZI(S-v%aibGUqAbW&+_5@sR)(#sKB zl(srl=h?9RN;Mm{60VGvM`MSux~x7_>G{#HgHehKXq!;9!~kQ&pkt>SBZlWeNfQwx zoUg!rsfvi<#P}@3D~Z*=MmXzG201XEK9fm=@l>b`li(OVzwpYuDw4g9Th%n2VHFaC zl(L74ByN$}hd>osPBcYT@$at@(P_zM5B|!lfof6@{%Q_Gyvbh0t^|J6m$+QZe*}9G z=HS>eNWS9Q1O_7Y?;4<5aj>&>UK6#KHHG1cteQT~u4yd{NLef30Tk7-GifqPR(&1= zca2)qMig0MB$HL*I!Kxdm5kjOAqDy(ghC~{)Afym1xySA`~zyfu^W}2qgQ9S{6P8#353Edk>Uu#n<>1 z079V}9f?qic!#k5O9lw|Ag6oE3o>m}D)^=w;#(|k}d!&JdrFj2(#6-mK{UKx2 z{Uai8E{gGt<$nVZ3ltOrQ@E1PKl6_W&b`q4j;~Uf5XxegQI3^@l_+Rz6f15-G{}<* zdQPS`^O#gU9pV^9I~%??;E4vslbQqE$H{HbaI6+yQcv?fvX?-(`|$El zS)?6}5sK*&%PX<-W<-y|{mtf&$1RsXkPa~`2VyXi1y$qBjC=<0K_cnTh{&+PXhAI6 zP;fL1e=K_b8FA8C4g-NHgV8QSSYc%-E3FI@Rc}SO8?8~s;DBK)+oOzQ`;_sr!$t!+ z3!?wgZa% zsNJZXWEDjML*Nq>8&P&+c}MnB32H1tsg3H>q=)N6G|4?M zqVZ`UZW*#11OY-&^}jG_5)_g%Nb5KpwUM5{Zhu82M`Hyid)LlML4*P^ZTcgCbaCvj zh*lwnH^*u$f~D}1pyc0ZzGv8|Z)2^G5WD#75qV7yg)Ti`wF zozMNZ5a$*nG4^OtGE&bGmJoRHn2tJb%R`HAe@$xJ6iX9wP%53M7ZCSu`r^gu$Dt&F z>Hj;TxuMdm>CqQsHsKU$lRcY>Tv~kO)rrkDAV}Y0+@vm0;hX4xfNJEDjpVNU>B}t! zgzJ>th;aQ$u*~<*-3$K#uZc7m;=a7h7l59u6uoV@f~lhfk@&S)i7EC&ZAp2w(23`fL9L(=$=GWGZoL zm=DYz4w*`lgo1W^peh8w!tqOto}!9wg#ajiU3R*3su7o`3`9Ul!@XUJP6Ibb$puD^ z5)GM8-a?7B(max33wfiwBCa^Kp*urlId)tG$fx!AxdNZir$)#-@6nG zN26`{D=IbVzYkGb5QnX$rnItEMsBT5A!RWVEWn=@DC`;0Ae0MY}2L;#tb3FHM^3yW{OgU zQijCoaH}$cdS|4jjIt`DDRl^J+LWLlrlwA zra~HVjWSJBrV{{3Y|{5ED&lOLGDn$f#{-8hqm&F>22`)@V*bde4OYNjLo*rN-+mw}7$YUz2l{J*sTJkZ3F-IvMP=9X#3XdW~2BS#qvAzQG zQD50iU|XytkU2`(W`{O-ng0&@+-bw-F6z|X8bmVLm&%7WWiNy>5$30O`>o0WDi#>d zUn+;}%16p!LYa@LL`Ntf4?!KZD#xhZj$4(qPprzP6dl~#QOb!BZX-k(rGW38s+^?H zcc~G-pvRXK_bV%jW0q0A2~oZ^n)Q@bN@UXachnGP3HEzCG`~M2Y(i*lWYA(+iYKy4tF zsoYfl)RbHDUP+gldLrcgR@B7(w%g)m72WH9SW{mWZ24VN?ubS8^;DiiM8`Q}L}UZK zAipb;8|a7Z_cZ0M9EbOXt)bqXSMm>RsP|ODt82;wkP1iWqWwLT3$F>=&;-2 zcel{7EPUcG*i!c@xcegSjCW|QCj~!sD9@DVVqI%Jw&_36Lyz0@3iQxx$}5Kok|(c5 zkdpl_4pmWMJJ4Qp@ZbT1W1Ec{*p0KG>H~)LAL`)os>PuOslg6aQ?01HL&es{q1x3D zaXQwO7xHBeHPoTzAj!pW6(13*zOd1}d=SfYlJuFI9uV7fs1Vt7KpL_Khne%$x(;S< zoE{~Nkm6TiSE^(yJq+@EB*~fCRxc7&(xE1xz^ELq z8}PtkF|WAgYV;h2mgdVx6xxG9*_Y=*SAq|K{aPnai+G{DT;U0?YSnXZ8ee! z?VxsasGZbQP3;UJQ1G)M1X@Bg@2J;Tr>NbGt~l6HPe5097frhBPLcSo?h)Qjy2si> z?dec^0laJ{J-^L78olkQXpBm6Pq z6&}K8P`?b-q+qBz!l90&7^BqD4t0z=)}fB0$9RC(Ox24NdqN_i6C7%~I#E+6In>GO z6b+J|>NIt_X2^BUa;US_ISzF$qAX9<-wB^bQ8FCrd`f%)e1AtmITxz$In?**8qz*B z(*YH>iyZ1=wXH*40*IoW^=45^Rj#Sa9O`m)1qo`R6sy$LRMs`3WM^L9gSWMsA*l(0 z(ht-P4s|2q{?S=45x0rJH>+DTbt{B3)rp424t2Y_0}`0j8->N6o%KAS{iWv3SY#We z?h@U*=y~((rXJfvj}MVC1`c|0FSYAFY8O9p*xW_W6S5yHHT8f)JqVBUUG&-&O$kki z=6 zyN;q*+Bf%Kn*@TXlKHeJll8*tJeb;xoucPy{{m_fUxDLS9IJT}M4N#PJqeC!P$L!t zK2S0?b-f{y)|=GGW4|^P``7;1b<}5lICRBhN0J+I4rW2M>pi}rkGpLvRRj2BA48gII(=jB|z@YCm zU7wRiRStplMFCl5J1dW)i3vO%m9GV%xruxd8l)kP6JZ1cd!x0iF77KvV=HaJJc=zK z8m$cYa*e?i4@RT7aeZ+Hj_R7SQt+DzziD8SHs-TvhYZS60{kuf;SF0t+O@*W6F2sJ zcF>^=?D|v~wpfTg-$Jx-UTh<=pM%UNH6pPhjG|A-lw&QaW&q1#Ho4aZoOYB=9_*Vgzd+@xRKmXtJM z<4=3VHE^JqqTo#ZgL2eT#93);Rsz}uNeQvpi;$5dh7jLS0dYW44V+}Bg*r>`W1P-_ zX_*HVC<+#23uw4#4Mrs)B=Lpw;OKob-zw(M(xVG!Wz`IL&N8+F7cZb zBHjLTv-J0P+I9{Nk*p;fk1Lj>!mI<%uy^5&x}jipHx!gzjS9ezx*9tDpnhdHx)7HW zAm8}nE91u?|nX5;qW>u|iRuNczzMmgL5puCA=I|?qi_{+h?}8;F z5F5YZ7#aVv5HO)I2=bs*nBjzHp1!PDRyM88Y;3GCj-t~cSxopN zrKDQ{KM>_8KPE2E(`S1mV#;91Mn{Ff^e8Q8M0$qaM8#Ao&SmH~Ti{A5+P4{6>jc_7 z7)+aK{Bvkr1h17X%4%u6W#zx5JPZ-;Sb&Yr zByc09!de~SMs0NLIJAU8ta@Y4?HB4_^DwBb2ljjnmDNXB96u_`ys!6?GzAxr zU`Ao%pwC91A-NG$hni`MW3u`2~S7KSufybNWzLx$$-iY#+`(X8-cjAi<>n zC5@lKo0?4wj4%kzn_*{-aVK(S>b}C`aHtW^pR?A|)(zBzf;^I+4!^dBA0lEkDB5T0 zeh;;#K}yiq5w3Ue0YiNvUg+&KD9oa2i`Zi>w^$?eJp3n-HXcivpM+jk{TUZv^+kGJ z^%s6gbX=sD3b`z)oRwbzvMgJq7t^ky+Z6sQjI!xX8$|P#5bo-z)Z+c1!UjU*d<%xd z0Wd8NCPiY4Rz^+4Z$CCnZI9n;tdjcG+gzwyBa#LMgZ~*EYH*=uE!IgIa^qrsNeI+! zR9~-elY{T?^_cPIuzDi;1AV?UV!3f=w>@8?rN6WoGOkj(@pLfkZQTBVxJ?50f2;T_ zY|!T#MmE1`S+V+dbn*5mDNQQD?`_fD$SruQ-Yi1=R?X@RtB3kQh#Jp==;YG=k%#p} zn`9at+}bGbDHyFhRgNS~OQe;iJFruEagCQi1X5+7^OE$IVCAK(ytI{}4=SQ&(4&tPivExy_#1`!Y7UQ2Ll}C8hppq}0DBhg@l4G7qZr z%q%^B_SW!+s=uXt8G$XgL(AVvJ6}b*{L}avdaR|#I$Vm88vaJ{4OA~MI0{RB{+iV0 zZzVyxG`b+O!_eoa@!cY|NZ3;U4`1kX$F@$xW;NffUo*KyP3&yt2c(i(_UhX=wnsOp z-bsaOmel^Sz4}AvPTy{+!=baKd+Ly(gJOGij~$rWYjC%&MPq3@8r!#5|8B7(C|*xM z#g<5j#fG^=DMQ1{kS?$Yj^LiX0NJg7*VtaT-kZxTD^zSNt2KPb%P(ti|yC#SxlRbGviL?3u|M9bWCncr|@D}%{wnA7dJ4GgGcV{*Eg6`R{TV;%EjiyIZv3R@5fh5(x$Ne? zZa+WZFZr{6m=`#RZ62EjEk7VU%N%SyJvP!~7n@Hqv3PvUmWis@poDbCce=fxW++>L zXzWoEEBcrp(fCmZlq?)~uvu)j#y@dzOo4_}6Q;n=At=jF8k+tV{Gt~L|AK$1@vr2* znryT@H;k+t=ifR&;ZLJp4p8;)P~Xo6=>;Ba3u$M9B1Dn;Fv!ztJATlLVDA7n7Razc4*mqH5~ZF*sWeg1scqtPEDBK&#hGRG z05{i;UeSwIu2em>fBykPq2YgM?CW7hBVy>_VKl6|Vh9Zxg25G=8r!sH^+pYAbgT)N zlH9ae6Y=L2y@BdQmu&yTp3mRms;&izFQ2%5V#sv}^^d-;=UvD%VWqrV4y*r<>pHij z={TgL16od4i{}h{LEAB#iC<0_X!?cYcMhhYxkK1ImNA6Q$2Ypf4E4!Sl>!zL$b0Pl zwu}z}#=!7$HdHPNWg)0V1htqgNoxabO0QvoVP=Z4uVI%H>$DgyB6zaTpZWT#nwPSoMQnd~MQI`M?m;eS)uN1^_#Oh8%wo4^4Af&M-k zZvRc-BMPm0I4cE^B@>3M;L+w0!AEd)0dxttV1oyAT;cgCzE+{Ffx)Z=#y&1VmwX3y z(w2A6;Si;7>yc^dUKSDWWcq&Q+{;|M0H;7ylHu9|ww2g9T5#-G3|q}o&|Xp=OZ!R? zj!yP;_DK8ZECN1}+5uKdWi2%YK#=Nr{BPCGTU^;v;hek)H$K;c(UAw}ItC zq4*p5z7ZiZh!U>Ra>L85Xy2NLSPHtK7MdAlsg09!R)=FBMU7ApZO$6eTOlxIjP|o0 z05^pt+Nj}VcrkjDeNqKY7#wNe$LcL%u1Na63pkXM728(y8-@M;^n0y4KNSRrbi zLrg)|s@`ZH3o-)svBr86eC=a)YB+VPL@h$|G+d9v&p7NO$AhVohOONM;C?!|4wHcA zlOfeG1rlXbq5f|MAZ8+DHfZTwn7Eh+%9R1-dGj&u7hv4I2fWXOY=nSN)go*fm+)X{ zU2=k7R1~7;l_A~VkgbP+#Rq&iWH3ju&0ya~3?m=(QHM2KoJfjGkOa~8igcW5GX{lgu|{M>0`b!lt^6#_xpmJsIE$M zC9$v{F>hT3;u2=YBRM1CKeH456A2T3jN<_U0ut8n>YhY`_fo+fGUApCX*`E1j$*yqfOm$nUy?Pu*o&xd-xwDtxC zBi!k5g6W4?N1D<)V1nzkfrZc~UQ-j%hd_}?3w4AlJ0cFnV<=bo7nOYgmp^;ygkF)Kyd_OZSkk@_uRngv>v zRX^*WsKrNG|96OW1M4P}7$B3#mz6|#!~JaFf0Eh2-m#>8WNocdl>Kbbe2mq>>OMB4 zoINWCxBY)ar1Co!Gan_mm_0nIJ_2L^ET)zF+y}&tF9tWmUUL0isjc8lc;dOV3^=7 z0-+cMc+)`{AW4NcC#F&iOuV@e8U#926>|U6G0uwd3fz;1R}D2N4MFqiz-XWyAI8E% zS_A7|u@szf5VgjoDx&LL{=f*T{6}pdJ&4f9Jcw?j{@eU{jQ6u?NW{LI`0qMft z+8E&AYg;6R83!~UQ7*`BSoZ(cm<+J!;8i40Pb7107%Y}-N0#w}qb@Iznl#8;j- z*QVn^@Zen%qZ(sbu1a5zS{!Bpw`)Tnw=PC&eOXfP)4d zrsfF6;Q}v+aVd)?jX+VoSAY~@hv32wRuZ9>!z`_}s!vF3x)VO#PcS=vdY!;#5MQ55 zFnnc!rAsJylp!b`G;`t-R!F{s1W#OyeG-eLPtZneQgYNclbx7%ueX#I0rY$Mfj0I^9-U@=N?Xgkn2>xdmR+D$dAxbZ>Py6Bg z8F$EeUnt8Rgtg;fFug{~=A!e_zF53M|3A_t3w{PpP$PvgyDv1I>Zoet**!`mhjavi?GIZjh%7L#)pH>Iha#2&;3k zWxp+&*M z&OXL<@F6KT%l89RKU^AzMt;#a-f0y_i#c&>VVrNciaQf5Qc||g^hr}Nk<(<3>L%Nq zp3Jh&;Qw43FXg-#!k_xvS3xb*Hq0aXRdrTYPq5BnQ&s0Vb?u-HB0b4jTpqEEJNx5% z1F>lz7$%avnoF!oLL+3{_eF}kNluc}eVpVR9A;eu?X3r)^-K(E?5qUe?TwucgJc3L zDkZf#jt)G|$zVaGn6u3pB8s=52J}~I;r!Il-4VUKY4SA`{2CLLLVITpQMY+` zwuPy-t(>Q1qn(~qU@dqKP#z)9eBzXlIYqzLGQBaao#}>lniri*)!Plr>bqT}?o>jB zijrL=RF$sI4_&fs_d{8$Yis~9IrR@bLBHd%(juz^69_)tF6NDLx>Ki;UJHdy*BH(piZdKO zgfkpIM3Y{N;%p9u;qak2skJCBHCkwFfyQw3ke~Ipl2(iS45ttI8Kf-pky49rw#Z5< zEtasQn$%?>zG)m%-k&_$`4i{MM9#6!dzKkcj3L6uIbHsOIaCF*Hdj&ek3I?dTq?ZUc>ZPT1hwQjh!z#$!8Vv@@va!q%}s@;)Q(sXB{ zih_x8)1B?yy&3VSfbf_?n&LN@MY2BP&U9yFOkaFLH$@;#=yK-6&-%nN)+d$@@mwO~ zoHJM?&7kq*Z!yDJDiBb;)eL~kwhwHSZcSr<8eaoG7-mB<)W{Ig#+ zVxVl~VpnIFd_%dMBK(mr=W87{k;k2zxlCfM&vr$8^@)>o{rnB^=Si z$t+u>=rk)h$J@2obY ztY&J|Nd-`60CfSkrmI-=yYpiyoZsxh^EA=ny0Z!jvi!QUtwApTURdf><}E6np=hbY zE$2Ns_$zuJbw7wT=|P*gx*H|D^3-X`{LASQbN+I=dzWT??aa$k>{#@r*zi_MW3{0& zo?-rnly^ObGtzpj0Yrc_hVxMzFVdqi%xTgu4oM+E4rzJTjN(AyNee4$sWF_AdKpeh zy{wJK+G(^a?BI9YcDCeNUFp$E^t|3U+T;${j4JioRW*{gKMv?PtepU{9dIj{3VKt_p>E6i+O)sTc1ySdbvxYm7Rg`d{ z=%pS3_C~u3*)Wzcs;@V_T@X*VX>_hLie#_zj1UA^oQ zxXH)AT>r8-*mT7WIzf{IV%Ws=g04`=+>Y@tD(Fh*PGa3X#z{Z@zJ#F?jh7=aY=2T= z*GCY?5&sr*owUHVRkVmG?sEBmDej81@CuowT}9oA4qlO0G7Xhf=2aYY{4XE`ou^H* z1FF(9v8Q8899a1*ao`$cbq7|?c!Mbejn{H;7$ebmc?Yk{p+>bnZ{PsK5W$To3Ql`) zBE6EWpeRtr zwJWVXnIdV=JHiwRgZlkcN@y=7)S2E6QBKF$acB~S0!wTqdq{Hw4XG%-Nn*GUps=>X zqaT$Kb_h5Nmzos%ruOeqqT#T9oyl4_OcQ7vzYsBmnC9#7>Y~`LP@5MD`_UA9I5p4+ zDMg467`Pl&)}>hZXp!)>%_rWg;JPmg^iy*9KS*>zr0{^qTg_F&q(0k#blX9Fb^zIT zis98<1Na_sr=hEeh^X#zdOcWnGt5y6&TnE-t3vD}7|8rsRBhxc;_qDD^%dteMe&-h zxoRoq5JzgFnJTes;%rUVCWy<7OL4U~^Py||kFc}&2jk|kxS8S_1>4>{tss3#ZTuI!hfT@2C0#FLp&#_oP7ve)i#YU*$Rk5X!tF@`wXf(pdU{^5~ zx@5*dA;5U7!_)lHja{eI+;uQzf(^|29%A9(3s!0nzYpw0xC5-2!bEU0SKqWaehgf% z^ zeZijUfmp~7tUyS%Vq)kDc3}@RT~GMZbyOp@+1aVhE}Na)1SCgMyF?~nJ!vex5H1c%y!h3opWOI zG8UVc$_RZl(y$CQD)0Y@plMHTDOt)Iz&BS+GfFe}>?C;9vB( zjeDE4;=OAp3t2X>AOFU+6|#txOct?{$s$%V8Lqj-p3>tP!96EG+){hUpqA(sgIc0j zoIwjG{iZ1-2c*)&BCTTOX4mK=7#6y?W+m%2xyf)%6t~kOgdVt~1`ZXyh0z1|)1t_F zO)}S!JXyg*L%QwUL-w$WbDs@wk)-Jp=w#>(?S ziV3FS4dXTVEdYH^XdqtDN|vyy!hj9xgBEzm<7x8=uGKX=k||1$I4ha7ffN?x!C=sa z5+y+}TxBB5V_3~#mZ0#mR$k7^%On4HND?fWLprgdl}y^?hDjR^t5j6PB#kF)yqc9) zM*`BQtHx8PX0_~)HpIFnm4v{ma;$8U(LuZ(85x5z84AEcCzXuaq>xdYC=v!sC8IW} zBo3C!lcZ&vrrg$yGHVWFuVVE~S26$33tVn)fsjl~k<`UeO3d0279w)|5ESXJ`=0AI zw?m?|HM&mYZT!iZt}RMV=#8NUj|rxkcf{uF6_XDe2Qe z3|#EG?CI!W&<%*|=nkC6&2&Iuq!VW)M8qQ=P52vK56hzr-dV%V9`F${pJkr=!2cN*ULY$1$;!Gm5Hp3iXH%%c8(bK2| zi%EcGtJrlQBvRyR8k`ed2yvDf95oupXF-n+!>(r>Y}pt)q;Y(%gU`cFYDWFq2D(@{ zia&-gaKMJm<)yAOR!D|BcqVT0?sB+dbC7m(0=7B$V!p(X`LQl{<;yh&;%3Z-yAs{{ zb{zuc+v2x;3+zgtMT>@CQu)c+AB*gri-yQA6U;6!vb! zxD~FFAy^^oq*mJnxwQdbdA$BpD_j$F_jy`;evh)@kcf_E(y$iPMb}-f$7%=W6&-eC z%G<%i#fsgo3ZXyZP{+bNkQ4ctT@=@LyUMryMPuK+fSPD_=@n>HFcd^;?6Stl8-5xD zNs#_8uaNTp3d5fA7jQaZixZU1h)eM2VlY!&5ySVm%9f#%B3hsR21NoS@yS8a9`del zi>Goi_Uu;>PbHTqB3d~;A|u1)5Eq{NqF-;#@_p#~#!y+hZrH@jEAzp0cRmrJVUHq# zlV12jM2X{)%9lUx@{+ahMnAh;ss;S~B+=uNYn(QY4Fu8+7QbC`B}cc1O^i-#w$g{q zQToGl!fZBASqO6&E7*L0`OB{Pnq2C3xL^Gpar(N8*2+J{yEk+HksGdRnk4)G-nsov zPy#rL2}U_~{^5!c74FH&q1HW@mLQ?hE>@iT7Y>*HvKsRUph89Ce|@m(_Do*uUYW;z z$0{>B(Z*hjGAk-pmT-rNkN?HQWdb4ZzYL(HS}Co?x24^*PWO~?Ck4r<;>Qve(V>Ey zbT*0#?hes%^H<3KT~6_DGk4{zg}S}1+bw!DcOR9(Y;2(fQKE&r4kC?e;qK^=zEvO3 zZYi#Gc2kw}cX1~g{GRV-hDT4qbT}ng4EzUNp~kJ;VWQ{+w@s|*=MJ@$W|eGWe)SNi z|9C$)gpI-Cs@deV z z5Ou(tNgnM5zGNpIY$O}yV69nQ(ka@(K>|FmK+Iq~@LPI7OhFv`3>!~sTWBZiNqR?n zQ%GM1GgxP6Se|uYod=(hMA=8O??Fi@Y#ci$^~PCnX8eP=_sUfes!xtPKo;7HGp{0| z{J*BTukeLk3Rm=pPjpY?xc;|jlDn%4i%8A=c^Zm&1ZGMk@cxwIGg5}sk7vqD6AH~n!!P`iiE9D z4?L^FBL%svBFOD4gp>z9V$1u?U zV7j|ME{}-RS=j({QQmpjaAyEuzJJ#&_i+srTGOEwm|RXt0@Lab=n<$Vg6!~BVlWGoMvPC5rPE^~OpUY2^akQRm24kQqmSui z8E^)Df)84m_@J|i1v;Bppc=72E3piUIiDW5WG>bER+1Wh?U2q{giIE*_cYSxE7(#S zb{6=+&=d@J%~#ULD*9MW7ZUKXmOj?mFk|531NzutgQOuoFf0YbHFF4~XlyHeY|~g5 z`o-X%O@n_n+i9mMTw{BzG}05W#9{mFg}~<;Z-+$Sej6%qfQk*iRAqLE9v{);u*N>d zKo)1VsX6`ct#=nu?Njlsv7-nOUvG5Zj2!z1SX(`@N4!Bjm}dJ9Kn90u`><`XjFILFbVUn(iZ%xa}FAE z8ni6oyBdA`2qU`m0kHEhw$3inM-BSG%zBAFQs@IS?Ujo3g>s!|S2gx4eSkEaXTNE< zXo!!d^zl1Au2Y%*a6kY8RRTe}$^I0>ZLSz`cC*`88tOLRFc4k?^6JnD=zyrjZ3m9L z!02dC)aEX|-J{2SFnrIHvE&n-WM?kcy#-*k(+ven-A8JgKbH5W_vxSNDP_r70!{O2R!#6xez+>~99wXsl+4}~>5H?D5*z2w-)1X9$!(;oK z9wXrK@=XtrOVMy&b`&d}1Bey--0|923}A(g6Q}mMqvWJD@0`+I+~4OegW*@e?|yER zzByby#*4zo+zsJ7@|b(RCI__ovT2=4*iu%)i3=Ttl>EX?*I^ob;oc7WY~saPcefZr z9;6`}qY;>CO|bcH$_j#>RfGPu#;k>Z?Dy{83O15|oOf5w-bhAcBN>B@WFc%MOaI_* zZ0IrB#{W+F8d{mY78w_0`98bg{xAps(?9wTcPT|(!3v5UH{I=3H4=m!*BbllmXV_o+MX#bHos+UupeziC&mKUQG_Ai**az-M?yu!?p~Z%%I^yG3ZaPVN{>p7NG{k>j92VswYID~C9BUeQ<>Erw8mJnIde zIj!b*tthH?@l=QJq%NKlxn(UjWX^%bmx&%QR{b+3dhQuYt$in;P}`8#d{VJ$ zy2k@e@gU%N7#7lvEJV$u6r}daJS=aY~D+AvA!_)i%qpW!Nbjz zcRSW>B?dJJE41(rOAQhGM|ijhU+f7M;j=wf|JfOyKqP#wMihP?D?k=f zVf2~}GPZ%QW_wCP!PM%%RJXt89MA6HLRhkBI8(=IL`_<|(9%X>wF9~8?hjip7BTNh zG)aBW1C`LU2EOt0$Zn#~`yNtLzV&@i17l@8e`%c~XV9Z(L{g^LZ5;W=F7v=FuRm#- zrYb6OA@X z{A<6_v(|u&h<~{ao15R3iCrsx-t8eHn1%Lu zqKpx-Z{pp&{fgj}yofx?%}OU!eDO>gE}7={oXrbmT5W&!e4;`wLFBpUDW-0RxwZxu zJrg+};y?Y1rzz(v{rN6?zBR_4b3X4Vp8YCEZPIU^LQdIcEi*F9W0TXu^Ck#80^g@a=!vCN_rxz-tdX%ibE3m7KI5o++r~qcJVPI2$0^p>u0}cyD;)!!ch{S>#KmckMP9MjB zB7pMp9c*A4COAl}@Kx9ajmPNp@i3=Te4%n+?E zkAz{z841<{f1IuhfoBHt2&Nh2Hb>#Kj;;&YKqw5kYa?CSY#8azGt-Tjh@A&r3#a7f zBe_wcU?*=PPY|6ud3VUJdzE|6rGXHZ80a$i(&!G0=+nb%6G^EO#~P-32fJk7#1$)b zXfLMwz2ci;UOKdO4ENp&!7101bng@uQea}jL~kVaYcXQcMDIo@JxreD-54zefpFAl z!TF09Cz3H(MG0;}E@n-JcFmM#oMLfyq4BJ&?t?qDNxyWw0WGrg^7^ z$P)jO9<;PQX3q918B#DoGnRVYZ{za!3%nhT!FL4m+%0iFM^xpbXT@N{zo~gcf zLf2vNd=>H4rVr7SABt_MzOpgP;f1g4KWviBzi0Y8Sc(X1Q2!U}-(lzxv`=Vi*t7m6lt~g3$Lio+<1u%BV<;_lnv32Q_k%%}` z%bH6}9qrTQ2=4Mj;b!7eXP-y8r7jGyM2YY&K8K2AL`)Z7v@$<*;jYl=!ml%=40^3C zk*ie@EHJ@U4#?nyn9#*nKbC-YSAO|UH&m2dnE;?sPLbZlS90OyF2SOGcb`LopN(r< z>-~H%uLQR=0KBBn0H5INCr%IYIb^Z>U+Vatc-0lf9$z^txRU5M%%{uLj(>5~TNF4* zF0h2QMp@uC5&p3fER&RR{;F4?RCS!EkgFI_f{ZR`gvfEnx8cx85| zh(UM65N`Lsci}Pi#I!GdEAd65aUWljY`OQX>w^md`N`qnKl^W8`XLXc2{E$c{Ea@& z{doQR&jCd)u*Aiq4fxZgpm(c6U$chpk5&$=fTaPsHwnkO^pQEU;b9;=H`m3ctVr|9 zuj3-S&FsH*=@EsqBk3ZzpNX1}raXH`#P$Pn?5-%o7p{EQD|#YW*71vLTXtT? z7$X;mm!BmW-~-P1#8+A1ha!h9x&%T?EcYp(jof2fU{jh+6sB)p^lMG*Cdmah zNv<|V@=0I0#EGY7x4e-ei_QKZ>bXns0lvIN-lUw=kKyaD$34d!LGGRG2vnZC6rq+J z9~Z_HpDP*ep!F((ti_Y2KXAvhFa;X z+o}xOoE+&?$@*KD{?iJXXy!`peNBrR^hxRy1qL*^z;5b-s-O%2w~B8E`kWFUr&Mir z^nJ7`xxiufW#%B09uJMm>^w9VW8^w#Wsl!==?4!{k}&am73U|7hxx2xCq8AdCw@C| zxFN&ESW-Zw?LWQ|DUgun{WYjt7?7S^m4Hb8KV{)X#$VhgnqM-JX#BbF%HBq!kZThF zABS$B1_wOM-VO1U$V0uR{V=S{5fB)14Flk(6;u{B@%hBjs!FT~3soY-#vwjkMtZz? zY~E;8om{1WNV|&}ksANaV_r`QGE&)Ff9IWmT*w8IoPza?NLMlxk0j&<{QO-_z+5E; zE2|lh$&<#}@P)+np=O~f_b-!N7zvYW9RT00RW^LA*s(-`P%7p&naSu6U$%QtgfVh~ zH3^!X9qEmlNF-*{KiMko5A%7{Ci}C2!oNH+L*Unc7!HTu zj?{lCe`5lb3x;FEebMT}lZpucLWvSjhx;6|ybF#WZ&VLU5ptzoCkn(X@`Z#gEoP7Q zIposi_Vgp~Yz7>;zy?3@6&B*jUn_;gjuAeutW(n&G3Uj%BYYmqv2U{i!$+EpGD<(pO4mcj|24(l7(g$OUHi-AyCKTmlEI;bMf}r^^_=FG`)=2fQX% zG$7e#w-ICYIi-N;bQ{Zxu}946-)MPx(P0c@a)Favvj;{~Wj;jr4tRh_vqqVb)TSu~ z55z%;2re+2-cPe5g^J6ge7bD?guU`|`_iTVn5*z9wixa)EStbxtv8oG&u7 z1avXxuBO^vuLLJuJaa{?8Vx#E6T(t*?+Lz;7=KRHMAdO7e5cz{CmN#Z$<+=h@}QpxzJyqUnH&yHFD#o~e-%9#DPF8#q(5iuGhZpT&CDaMR9E4`y{ zsTeVDJd!F0l^zrxI|#O1vRC;n2rKPWs>gs1uP$+o!p?!boE(4FK0QDx^% zBdLBj)tJ{=-B@0|p;(!Qr2e=g!xtuP4HDm{nV5Jp4ZDWi_r?qlM8II<_4@s>^SKO6FVoFh5UVQie(ovB^TIY zg*wW_Jz`ErOQ`sKvblbAb?mZE#7;)FW4f3CSGro9;%;X&&Nq`ya`%`;=c|q+$px;C zu6H+MRqbKPDUNhEW6AN;EaKh+ah1{@-`j)=pW@3aYZiUDQ0cEgWXJ^$puHna*xplo zu@W{o@8tfttxPwT-`c5uKGB5DHOUewewt#oLXQ_s+y9M($pt1}Z)#R?X9Cfv6=V)~ z{%MGt;dJrx?RK7$VG)DoTfAbzWJH)T)r`>B_3OUU7?9)wJNV1_GD5M}jpv#nk}jbG z$>l=^S%2%&Yh{|HDItKs*?n@a(deS0(KNJMk7Xvn_-VdE*_(c~<0@PsQZ%2qJk9JN z@5JgW?;&w=fdufqEZDToFmaGL-K>Cj)u+Ec@M4hy2Nh@8f9ulEeP$*;`!h?72n&le z_Wx@Zber22?Mbd$HaL-i=u6`CbYEjh2X?L>-**MNiCo}<>Aa{q!`D+%w88^tkJ^WV zlB*w()U8{%n&GPr3(6*)&A6U)CcZj41+Gj0Xh?)ep6RP56C9Oe(1Lj?T z?Es*=;>>JcOF5>VwG+og$vMbr{Qx7U38_IAF=39`U}KuAr*EJc$OY!~{;+KLsLZ)$ z!MDEiT(Z@@3LMjHF&`p!Y)J4avx1{a9s>+cxw6V?XVMVIWV!J^u5GiN1Bls%fQoG<_k>fqH6hFn^I`aS&h+N=mWpNLJ zPY`?Wn4mLqGKg;eFP_<@r36}3blQW-{d*)WQNMcqFV=`V?*aASS#E}}47P}J@B6&6 zYNf4AX=y99m}m5jUg{ z)~bUx`(j07;R}(l8{DbiT>}WYKz_ppz+x^aDYgo8CDV$Zx+aba6!C&Z99`rqCI(n= zl{0dYIVM7Cp369g@ktJ)%>G-K{SBDi!NC?fY(0Nz zuG2`1T+!?_X@2yK7y;(+&_@ za)FH+Sy95qh`EWHP58H%oY-!AcTN_kmY~RktG)>kzSNvmzk892fL06YXhz8Buay;JuuznPVA(L7=;T<2BCP548X!t^T4ol( znbG{%Bh0?!0*lajycr>Exz8pJE%Ui#E9~E&F|cYuV*6T3dqVnxypEmRcUhW@X+!bJYxPk&obbXbA*`Dd32m9bNE${n(7z;^pj z^eqyAuD$;5bRUTsyR{GLRs!Fc{ zm+xXCA{WRT-S#p&k|wUKHXFybWdF0)ps(ZtJLPFPh!n5#6%nUhnnhGwV@4RaI$mwTh|sd3jF2E=zp%Q*ldK45uH`#%#tGR7 zxEcWwerqlx)D#2PnFw3k(fsRA3o=G7a02%9mayf;l{jm-=)KM?&4XLzJBw(X_`%f) zNW8`Zv-q8sp)~jQU|hUhYi2W`$(l`vK)T5VW>amu8SC_8%`5)-$3&;+oj%$p?yd*M z_tAq5KTTnSRsgnqdN+U*uO9oSAF0sAe;EWnuSA(co-uW$Q`@% z+lnhm}$G-@5e9VdBho%xqUq0iy?A04|Q(V9qnU z0pBUNHB|J>g3tSDmEcF1o5%&iFDh@qPxuH#ji{0ApxbA?qs7%tDDOqA;;7{^HbYu} zLLOAA%4V|=H=5>*&yN)`xxh_|6N@@CQbC!XZ}Y_l;>c#CH?Ol1ly(T6@NZU9#TsuD zr!ct51(t7R7X$uuS6irPx5dobdAMZCSP(vPfz!?5UfJ-lvX%vZvaO|eCA2NMK=`SH zv*GQ+z13%2o=@%Z{g~-6 zU3dAI*!+-W4Hsezb_>7}AESn|+$+FS;m)gFj`_RGU0%>>5-t0&aWIBPEv$mNn zW4X|#M-y}ixxkX;J)RxO^*WP~1zpenfbx(FY?)7gFyObJw}prcE6w!3o$vl861_w& zkjZi50>Q_LW!rsjiKOprygSp2l{UG+K~nn?ff~E$xhFG6}FqZFiUj;4g2!e-+(GF0g{@uV=$s#K2-fHWB-~Em&-RVUmYW4^FGv3pATt>42Qh z-!fuOC>s@0gBo{a_WMFGdC6n+;5%w@KYo}Sf(}fa0I~fLk9bDi% zzW7mgq(~9_5GDI}r&+QG>oRhl!(yCVAo?E;63KoHzoTcsK6{KzKijjP$k>hizbN}gc;zzo5OMINS*e>zpVasU6BxO`=Dt$l%}9I9LcdW?d57Ql zrj5zV1_WQ|KgR|mQal|ef&!3h`$qJ2>D3y_7-C0#dsz|tp;@B+A!*$|$6zAYFaW-C zO9MWwzb#H~vt=`nA6sLL*!dxVhPR2+S@55BY}w%uV95oh?-^^rU-{aui%R><^k;s1TipVb zBNsS<6q{kd&z*@LCk}$FTF&9C!^Dw&s9M5eBmC7qU!1I3#k=GGg$O>~(1G~W>%VpB z(?1~4+A@KAgHCo3^ZiI5evgqr_z(8n;;!FkoF9DT*nhQpZp1ILSC*_Ii2c z#=03GF63GTl%&s>Z$!Fx6zxgFSXQsR>v?gJa1h1meZ~MfIs}PAa57kA95hoNI=|X^ z4_27u0w=%`zZ;QW-mr(hX4?csUH*9licYRsKuNCMHQ-NIvKM+Stm0c5KCVs(R%via z0r1~EGTH= zrAiR9&=ReprF_rY?>^_ve(!gFzxB`A&suw4dwBPL_V?X_8`0?=!H$;3&xY4m5BT#( zsIw$_&G681L+ZrP->@ZOEo*Xe{)Z@d7IoYR>OmQ0@^sGH;bV>_V_aiNP)uIwL$k}A zZeeR%<+DCZ(VSS+-rR2@ohoy7)4p2wxsuopV-1Tsv&?=SS*n;697nbp!68)kDlXXb zmtmoHX=vWENzFj>14&krP&HLki={o^H~7NaWX6(U+OWgv056$(B5zW_uUB@!1dE!R zEmDPS-!#E-yK=cUD9j&O`59`MB}Iw5tXPYKN_YAM=?ks!wukr5>(rW-->Bp ze5xrd2u54-tB?`AIAQCFxoFNT-Mnb7vKi-P1pO+U?ZqNsbBA+>E@OLuMfGW~UYFSV zNF3~~^%zetnD*})R0&I0#rSvkJ;r@aVU)E^I;qq<4-PyFFpH{i+mA+U$Fedj&J;@J z+hiMx{-RN@Bfzq#GlGs54PR(7g;2(JiH~|QA~V;CwFM+YNznLk#&;5}>5HB-H*Lop ziAC*(VJ#UIP1UZ(CMroYX$K>PgKGO+i`&$=5)LZC%^)Y0UJnMz8jgdA@KGI3zDXlE zyr8ufUSEkB{HjVBer?jk$yCna^=-Z39YlJ1Ex%B+46`d1wcQ@-=!FJSeI|-#>JFLJ zaW%eM#-eMnq$*k6m8~f??`$&Bf=x31ne#JEbae;fpVCtckJ;&r6(5_jcj8;|^uAQF z%NhA!WB%<=Tjr1jt5*ni54^t>)3#_Dl>fNNMYDIwcyi7?Q$+PU5zoq@!XjCRnr!sU z7@6*p&g=G$x8qncM6Dvzi;RDO-rMDj5akm4Khw7z#bky>osaJ+WN4N^#q>YDC=(MB z7S%Ugo69JCV|&BYl=|(K38D>Z3cl-c5jol+79yn~=hwXe&iV$^v_F% zm=fsfB^k-vYu-qB6&{F19ak%B8l-IaGdRo8bRRq+A9s%2hi}>QE}H1{CUYn~x(_>Z zd|M*#!i__ezYkjtm7Xy7IXDg8){1AqsaLudHqF5ec8FSPPyQl}LBT<{F2IGcLRQhg z^51|1U{T%D%7@Ha?EPAs6DgyW8TZ8Z%V;)Q&CT|pgR}JUwpw<4MIEKq`u)!CA_mh- z$E>^19$D1Z^o4r&(sO%l?}0{`y)t(aCfR%#U$n3L)2Q!*XvQtl4F2nWET zW^z=4u8>4oFPkk3#vrQF5bU~hq4o%wY|ce>v#5po^#olrk+R1_YwBTHxuZuH1=6qs zyvHDzAl>=Z&&SJ||sh98z@iBX(8t8H7Bx$6?ji3Jld?{CGl zl3uK)RqBa~%~4c-2p;f<9R4>6*H!T`jtxxgyJ_AM^h*|XIx^@fh9=UMv$9Sr7FTW+ zO~a@C5Dp5b{CvE2Qj%-&qc4AiCt`d1FD}W|E}j}sXk04LlRZn*U^Ox4WsvqEUm~=VbdD0Q0i8BYRwvqWGr12rSx|-rJ}bi8T6~C!5=)Z+f9v+ zAnR=wY2l@dEG_By53=0L=5IHsb;V8D4EX}L|ly+2hUw7z1(v#rMZF%@53mN`VO?(DE(*bnK1rD!rTFm zB6%!n3cj*$1N;u0-4lMIVCCeIRPr-YnLC-oai8{IW()oNGj^NUUa$1Ux$&qr7IoG& zb&7_MJLYUHpi>;}Rz<lEVcWS|Jm|9a4c#q;IXq-$DI$0q{NPTx$Q!K++Bk_q*zT~%BVz|y5DLR7Y#%y zbT_YBPX~{q6v9_(;RPRq(3i&nVXtIL^ea~|e_>I_f!&{J__$B4k(B?t%+<-1!S6gZ z6w7IdTDPY4j8CBBHPZ9NyO#~}tYEt;NnK&KQU4n5p(fjd`Vb==Y|$ZSNfpYAMRiHJ zPF>4J%TLH?0)hr#ei|1GS)k+nt(Z18ODZhQwwdS;D2O6?&$kT@*`LG!;x|YFZ22}9 zMd3@h06ekvljrFr20S**1IV-W7QJ4i@o?@(Gv`O6gF#epn>|u0ln$^tsKhgcX`FYd zWoHaAEIqv`WM7pyLn^r6w>77KpTd4PH_lcqCib9%r;vo5Z|MLxwXAifX=%**(?P>) zITN#VKyT_BDvY<;(slnRayIGwow5Cz;&2i~9S>{4AIo*N zV6`oQhMbfG?m^$#aesy2wkt#(r>1Z4;v>m)My6_Wt4B6oK>@I+!{-l`pyuP5WYGwF zydhOfQs-XwG>8<9tqYx_cj~8{+^W{YFN~`t91GdO6Qj<0SKqs9vEk=dodV9 zc~G;@&pJUFXMtLGSV!S3D26Oi8h!m8?b1+XEQw0BZ9Xd%)}M7o3G62sH@B~Xi)K-s zZrpi^?R45^A?rEWllHtA*f`z>{{vC`yB~;5&%ysi`5|M)nhgNJ5O{bfNU z<2#7{Y|8Y{+=RIai<;!{7|rUsNV`9^0I>nt&P&q^92xuO#b8qb60hVlD87Nxp!3pI z#il3q_!?P9O}Byp@*;EO&8J+TX$ zzAWjA{ZS+75ki}HT5{O;@2@-jNs=)e#f@w>e%E&WCIsg(9rQ8zKKV}gK?&#= z5E4w8p5zTn%fIB=P*t}(wq`X{So{l4GjLmYAUDun3ID*NuC+IHl?t=6LLA~UolqE9 z_sgA~Pd94BqW1gh?m{7vhWsiEOMgV!*t~0&3M@`ZECSm(AKw*Qc>3GPMt_|4xlo88 z@scvnl7xc(c(T!7^jI$y@RqWL#$J^kUwuPaPd^bplh;#_;rRc=!Gn*Cps4*6-uu;lZEjDJl9Yk zfp7Q_b*-|{A+dAph`~}Xv#P)3VBD#wYAF=lG!|IVu=qU(Dy&Dts%Rs^uwrU~*A5sU zVpm>qP|9Lrrcj8a0GxIZTPxAmv9+?HXDEG`9U76YZZYdmHyCq#MpMAj@H%qL-y8Vt zhR5J@SRPd3EFLI~1KSVJNx<<6y#A_#akY_=C-I(#9{gy^c$qW(l?P*kZZk}3iH9E| z7s@rX{@80g;ZfoJPeTi2aMg>qle+5!XL65D{llSHNEMeR5-Q7 z7DyMNAS{Zv0vg|h#kEHz;95KzW+h!fh_9y0~&uC z0TzEMzihG7>I%%~r0c8s`gM!9-#vR%5nWdFVkP)Bw|1<~( zN$Ue*BBQgi8pp@(yES~=wlFI_iW4cKfCRH@{$1Eh|5Xcj*M~L5MtE2-4f4hLz1*$naq7%y!E3CWM4#ZisfdPYHb-F#ghefq>iJwG(epoYMTmi!sW<1Z zcSZQTzLW#ti57-8)jmgYoI25_G0wbYUc6P`@MFr&r!g&ONl;2H5pVB!?Z4UBrG3NF z$vBM({dT2>2R&zuXch_|r(*h@&opR)k1JecN-z(0_Ccue2@jl!0sT;sEb25R)sH@I>WZN92w&ildXYOp8Q9ksxtnC>@ML<= z*QIUhDZGJ78NP4RpdJQ6tss~*a012+mTn6E`7jM%mB6f9ezM@60+rx4jB+gM=eYJx z#`h7$%hZBtK(MF_*a6X+Rkxq3wSY3;pc9{+SkyLuA)cX0voDicy824U)D?ub&VKh_{-Y-v8GLwIx zv@F;S;{#!N;|mQyg~M^%HwX6$gk>f>ti*l>i@Ha*X0%tyL3?0S;F$*TDjYIPl9G~d z-_h{4AX(ZvH=@x8T$!aoL6m5vFZ|>lE3@>0Ba8{64POi2TCCx-=7CDCD#)evUcujY zItcmu-z8eOpxTQIc4<4|UM9a_<8*7(at#k&Kc~22A+o0P_8DvYjdcy|d#L>U^r@B3 z<7Zq>PFSyLmTm7UXZ2Hf-F z<0oVp6Ts{ux)4H^Uut{Z<5qCuRW* zw7RrexR?D8!;LzgKO5uVEwtB%NXt$%> zzV9xLy_Drh5#7Rj;J_#SF`8p{#ff(9r4+<`AzrDYA&*hD-IXCwIuD)^_3lWv73L+y z^P^E?r1$VL;vuK1_h|o5O$oeo7McSdI}wS_a%b$~k9?rvL3x)d+6S}Tku4Ox+qB4Q zDx8u$U(cA*ZYg>;i@KQFRP4ov(r2MAx6k6$R1@lY$ftZYg@n1<`P8kZN5fp5e4T3? zjc{1h;h;~#TxmX!ub~5Bu3X~n796l*`%Ws0a5??XjdY^;{}0Id0;K=| delta 98557 zcmbTecbpZq7dL#)WbU20b7y;B%I?zJR(fB0XXzll!_sys(qSorh=sCBXwpQaDn+ruXPa__WC$rZ|1NKQ|0QNC(g)x=VD za&w3DOKdW%Ps+Ugg}rH|(koQRnD=cJu`7E~eql}!d-2TVBF?iPmp1WUckPnvH}e;8 zCC-qW*NQ=M@xh=_nL1k!+15c@&Q6vZSGq6Be2%l4YSx`o>DtZ1+wc zqE4Rtq^t`0p)tPv0>MG3ENHZ+;kKOMNRLy6b$i^o{1+n{p^CE+1N1OewAVuqzgCP$ z{g&ik2};k;Z7@LQ4vzK8)a7g->o2jgm{Yvv!Ndvqb8AjVT4b%qp+TznxOJId6pJJ( zmYn6}2cN-`}%z$U=ICR#9hKObk92&n)3UU3=Wit?NY) z)r*kxe(^`<=Z+7}pL;kaIHk{!UL!{y`)k)epc?J>X%OMG|I5a?kMnDdX^&=J9kWYw z>WHp)?!mHDqGhRacp0CnqYOST8DG8(`}0q&J`&TE!cog3(&#VpFi}xvD#zNkt#7`p zJ2f0Sy>MFdSfb{!a?1d(3q_&hVgH3S_Adr=^>|jiLtUPx?&Naq_bqj|1$VOBWZZhdEi#bWk?`gT(nC8~+9M)r$>G zr{)JiSFYq=`L-cyDD~ZpV5f${+c#7ikU%sb;VHsG^Do_rN0tY7win_aa&ste5E*34 z?qOU==7sS!r>=uVVxN~e;arb10^7p*7CW`QT+4g%Q(*N|ep;JPUW#C^42kC8|G*Wv z70Kt^xqP(i)ray#G}l38Au(Lq?QoH8&u*2O1-Skbw92Ig_^Q)*i<=XFk+TajhCXn) zAXm4Y#&cVzO>c+MZ5J-ns~c+Md~)-M*l^jYG-L7y`j_So;m$05=i9wk59&k*^XEns zmbVXRG4ikKP9D1kf8fr8){72xx`dg0De~z&@X6ElIrhK3NUXzGBAxbY?fBr2uLaT9 zV!^OrxA~rTI7q%d3X7+92eo(tjXLmUceW31*7A1MD59!S4yxqsQL#nMr;|J|sxzMq z<$ZyAgZUbZ*;GWP4CMv|{-VKb(NHc)Ib4NuxfYG&7BrfN&=|R3C>IaxAIc9|(E_9&D30mT^-wTzNTH3U?|< zSpR5ryq756>x`1yaL8)*$@N>DA=tTv3woUjXjr$x^2-2Y$_J8nvJ_5g&B*g%^7w9k z7O8dj@aRHL>ex|p3d;i*xGhp6zu--wPGd#)zegb$=n!3Qx$AoZZO>aABU8r38h@j^ zz)u|J(!Z^5uKMx&%0%kd8T2n+X8y)C|49G?t8Vd2ML8#s8lk--L_9T=SE99S*)m3} zA6O8h{Tbo_bgu(YyQrkr3$r(*LPV`V)l%AYch#1Ay?s`JLPP}${R8*6leMA>_7ts9 zA!m|KI2C(%3W)d=8S_|$4qIp);ghvKCLy|aC)EF zd`(DPd7`-T^7bLi>3wP0^EWo+)x}zr3?A}7q8|9Jla`X;`-r?03i9}-EZjIcMrMrF zq^L&0^7vRSJ;39%EzFw&+s13A5MNLwxo?uzPnMjl^$pCKthE<>>8o~8 zwS$38&uP6~NESZmbKrVeqU&Y<*W*(E!s&pQwB3nZD^PTk7O9Dulq9RYqxA|be@DB> z92JnU3%5)S9|D zdN)2}7t@Rqw zdQFCYCL(3xX#y~e5{p~#wksao~TpWqA(}Vh}+4v zZh~T~Us07OQC_<2%ilCQ1H|$@V+Bgkba3 z*myZ~YOEpOnQVYoI$NpyxQK|6H@!xW$o(krx0HtHKynMg*s`i?p zEig1`Y6~q&)E2247OT3(8rl+zifdz4yGsr2b&E>5FUt&VxkY8&mlf*sN<(|YqEt6! zwMuzY1#49BmI~G?XxFLOdQ;nAXd6{Cd7BLFZ54UP&^D{*78Pt&!8VokT@`FMwH>Aw zFtnYj>+f0GE^W8!;2!n5S7BnGrR~=anA$;A@O}02fvFu*Wj|CYhgI%%hW3$<+GJ8o(xEP6&;Wb!yuJ1O!WKYnazrz~VYt+Jo-(<<#nL;FO%oVBQd z_Nl_#XMXLRNV(sCI4({x772tsr_PVcT@wvn%Zxsc2{-ncNP4h zdhwj8-BTa$o7w|Y`_t4On%W~%`%9%gMj!~a2nJq+reZ<`IAZh>c^;rdcvWC1DDzd! zG=&c{PLL@qQ}|Vqt%4v!1fxG9M7@NXB1~lpH${Yck5oaFDWVll)|nzkeT+3l9H12O zrYK;Ff@)4pF-0MjwXi7?EXqq1iKa+0MG*x}Q56(3L~&D;uxPg^smhizMQKx%QJ>1H zzLZn3WK*Q5msAy`SzJw|s~|(YmN!HNi;jtkn2w^7DJlaZQAL6EwQ9JkDXN*Gy6Q*` zUmg-_nxYogf~akXOheSM=yUB$i_VL>DyV0Q`l`|fmS`v%nWC{NnyB=qhG=Gr<_eY; zs@9e&mSu@nqP5E2My0hiMLScpH$?|ibTmaL43&7s5S>-8x|pJ?DZ0syYE#}v61d(^zDDTbS3gegXvVwB4DtO`b(VvPDU zRt4i!FkS@{R4@@^D<+v@vRVjVn*uFP@l$m%)fCh4Hr)`2VngQEQvBMMrg`%B` zKw*j>Q#1aUcuxhpd{pe1*sVVAQLXP)!9G*$H~3jo95BT}Q@n484`h02BQlUwQGDz% z#-M8;53>7*A~jH?x~RaRhYax{KywJ~0;VQ1y|0LuQ)-BJ_!F5{Q%utjGu4vAL$RlC z9xOb8Q#A#c*pC$X5y3LMwpgm4XR0R;R`7GH}?^7L|laoMz`sKl4$)Ry8Iz9N6=9Ud(+ zt71bovqS;^H>irA<1tJuGkXKG$&EyWu(evUdzL7|Rprzyk<4}FJ6U30)o;ajhWOqV zSH%yuxF&wI#dUxue&WrBxM7Q%;%6qy>d-^Cw>xF_cw3XbuK z`{KMU9*95X$^yZq<*#i-mEfOk@ld5c5`Wp^v8>uw9ME;rSw3nj3JOj9B@4C_IyN4FD0ygp>tEqSe-C>C`@_hbAL zM`aJ~H!{UWxi4%zNG7!xCBzXuST=1hx`l)&Eb7Ca9Wun`gLnM|BiML?SZGE9x&G-|Z-3qLOUA2xC%mQClyH z5z>p}6%rm%Fc4DBMCcpGR9ZLNds(M4p$ltBl9q$9ST ztb!D+kFBSQzXEH!h%s7F8YYdNu4mYKdCcLo%OY0R?=GGf74(YoWOq?YRMIQU``txV zQH9?Cw{Jzr#yvpQtE#%H>DA@N9-^4HhC)|Od9H`3?5%~W^xA=lo+5#RGW9w(Z^7iz z>xw^Q?Oq}|X{a{L)(kbk_0${;QNaiWV|~4WtvA#g1YYhXRt0l)nU*Uii85r#&AFnu zypSvI^X0Vj1cVu%|?p#%(;QzM?rEYI#84>Hd>UHtw)Pea?)t=7x$AB z$B6x+2${0hSP>_?j1|kdt^9MW*x^WF-W?~B0vE=K*I1(mGIxR)788$C8}T42%!8>6 zL;)2zhuWZxM-zl8I5ptS6J3OJcjf zP8Ye6npQ=Qmm*0$Uw`sEAVnrtG%pm7W2$M@AwC;Le%{9KA~r?Ev>GyIktpr0=_gIA z6==8!R4}o&mPs0~29qB~MtH%rhg8AP>O|Kycz*)2)s<%#i+uu{$jkPB7$Wl?fW>X$R!l|S$gF)|` z6B}BK2b89@MB6$ssphbr0%cwo%>LGZI-bgDZ9s}EEfdKx?F_B`eeyn}5QEoP&Oa3B zAjdBggSeyoewip@bV3SNV<33BSi^aZNKkVM(z<9}5$jBeKtV^uCQu2)x*@ho#eg6| zM=1ko-4P@DlRw}?c&gaj{02ZnOzVM8xrmA(ZwtVl+Qra1^)$4e{ei3ghSm#*jow-x zVqT5{sU*v<6zP5tA2&ts706jBx@clP6_Y345OEQ!^e(ho?@ovG9&|?UNq6*Kl2>Cw z6<#gUWSiBZhUlmDm-AMGvJa3ytro?3pbUFcl;mt#^-WQr&_Hbvd3h17=I^w@PcS}) zA|FvOAP4z4n88F3mCvs7Pi&<2Z9-3SnPX-@dluN3hl*OgFjKyWSoC=aHPT^Dq z6WC`TgMDVsu(&)|P(el2KqWs{<|-iz;$;71U5cO%>EKxwd*{Dj@6l5Y#of zp2_tsZomyqZe(&}^{I);O)W?To2#IO$t_LJGP#w>txaxYa$A$zncUvw4yqp=6&5-f z{EX^;XY`P}cr){EHpgfZ8Qj(6ZWeW992PoqcZ+*)Pm_C@+}q?n7Wd_TD(J6zJU~?h zwt6UM8$3wWHrUTYc&ML;aV3LuR7$Su=x~EasOLz7!A}om@Y6$ivBlQ#0o{-FBu@+Q%nr^!njq6*3$ zZ;Kr~T_$!8Zx!hOjtF?-5)Gbi@N))NLx%w+z%pnnBJ#?g!Byno?P8HW1GP$Xhj?g= zW6G&fr<%NyuN4U-2SlEhH(>8k7-~~2= zI@@%SzBD3vhRut3vB67hUdkx8jF-#WyF_hXA-DAAf^x|&5t6r(SJ}Lh->^YVe>C__ zo7aGN@>-kM@p_v#@J53-*`QJjZGMN|w!sk~wnf#u)mEg<=Ituj!2w{)=AFFQ=J$9P zlRc={=w7MWIjIe^hxD^~xB9S01$%j)&HH&DQ}Cc<B;8wsb zC<@8MUBKdW!%%!^^I^cOQyj`Z9c{y+kBjUl(Tn>k>V!(xL}VcBu?;2 znYc%k^n(vH_>9e;AV-@$qIUdQn?L2xZ0HHi=fHVL^ zF$YMUs$moUL{>Q?(t+DSXGF695?uz>UW-=O$yH}WX3=nzC%O-i9-wc3QZkfu>yc6h z>BRy675YZ{J`n|~eoNm0-oJZLlD?(ypY$M@v_}+f&{g!{0k-+cs6eA1@bwyoy4xqB zn!u>Y#VUd^lIK4GD~jnOW6mOi!IT}(;?pgvt#{GN$ zyLfg=8EJhcpsWkD{!EndIQ!qAi)-q)gs8Qp%-iLSQj%7me0)h1mPap%Aes5xKeYpw zz7VZkrRcf|g*>Bx*-^SH1pd3&*sp{VPtW{HjCSS4ZNI2g@ev015ihs^eF<`!vcxw6 z`oKV)Z^UC)a$EG?qA4E%ijOpTW{Ka;wcrgf^SR$S)lL3R9Mv45x<0tu5}EabsB33X ztU*x*MH>_&AAcvJ1KWNOh6~@qW(6a^1AN~>O9Pdw+v?*RU{*qB{*A^_*8OA3 z(L(b6&p6~~1X6B^2d>Ji>@OnKk?#)LQixkYE6~cLG;uPqJzyJuM}*3+(sk9}6~Bl! zt}15gfcpow1`}-!cDv$aefXC!|EuOp`$N1N?2N(n-AQjug)U>N{KcbJ0uOS*f(9AF z3x~1rvZ-P|6<7xQEw;g1;3e4N5bzd=fw%A~-ogvsf+IP~;%LQDL~^Y95NB~b7f`W+ zDu7(h%Y{_{$()x#50g|y;45O(ZX1)w#SAWPf~P6;u3%>2Mn+H&XyYK$pPH{ZyKpo7_TW zfuPRoFdu?K>SlriF}RJvZ7pia?JR1+?NtDIU<>YGDpthnvLe7kXKeB9xr@nNReRl3 z&|RhUP}TNSuf0^v;Y0B3tNPhbrGT#nE7FI(3i|_9kgdvrBk?k%eC>IN$wO7Ihp7~> zB{2+1-*6t`*5t~gaW=G|h8?Q{z&KKIUsZUV3c#3*R5+esD$ZoQV!=Xq zlEsrbFVNZ4=W3yTd^8*u1#^X|vP=*4Ck0~t`g#tUhJnRcVi>`X1WwudFowX14uL5N z)DPAxv)+aYtoXvHLNYg4kCv|;)M8$1B@B5wM1QP*hkWvMsNRaN$)qs-J3R@IN?*7> zl{d;4!}aR*v1iZVnKtFqtABGUGYBcq(=3~Qqq|^oz_##<2ESzU%lwMXbLg&3FVHLi zZS!2ZE3F8+Q zkD6}tLSAHJ=Q?ZCT$*R|V%jLfBK6W@KOK-YBlVgUmhe)Wm+w5$9;w%hS%W-i6ua+Qm3tjp6Bw8;N^Mo4$OLU07u<0kdVe^N4SgwfHOBY(gAK7$= zApJVRM-4t^^T&MBV6ZEv`HZ|8t#|iB{&$u?wfQsjIST?J|2YOv>To`9NJuuK9@Nh>q2xeY{R4C+SJ%Ixt<}MAyp>N%{n_fi?y{P0~|1 zZxg+ZT^ZU%T8ZPX+SNADJG2>@6X|u@0-i&oQs73wq2lnH4lZ~*?Z93hK;CG4tAn&n zw9`2|s*>-aAo5$lp==h>n0E>BO(%Kz>A7a`EjaaI2ULw~D!N ztC$P7iXB7EI!?hi>0{LAp%A`8#}V^VAP)d5KFY1+X&1&&=p*Ffhg8#`6A!5*!rGBA z;P4xClDX79@{BcT-DBVdEY?4}iLc0uKhluFpi{a3?|cRSm$AluDxlG6z@vfZ($vBV zq?Xi+bKa-uFoZ-vwL4HM_N2pzsbSRce}~gKK|vTs)j1slu}CqD=xBt5jYnrQc7ui1WFUgsk`hZNsIaq$nz}bg>1l-_5A5cVVgRVbDm4-7Y|IVWZ zml{&g@W7Z9UAk*k2ON>$P}P7Q0jsZ8gpR3F!C4Vl$sGIyQjKl;!>KI{#&Y+-$ZFJ< zc4IO;0HjYRLmz76OacALw?FYsL0pD>kEjuF`tT7ogm4B6x9~&7lb^p&5#Y{K-3cCt zS{0BWr*cD@J}wckE3jetfCGj?Nog`6=bQ;3O{hLM%KGVgW~z=18hiKzo=|9FD<00r z|I0rgujyZ!jy0Zc{>=iWR)lC>v4D9QG}0h!*0m`UHBcQ3XBjXsg$N8xeQID*LmyHa zsi3h*P%bhxwWt|^bzuU_(u`V~l%<}nQ~>oNQyYWY8U%hN66SaWmL-xpr~v#*1a(q5 zAUt8}Y)}^vwyp+s3*=-#HxSgtpzeSM=?3+Xbvk$p$*9}msq%Dry$tu1f0ozVcIstQ zZ|Y-1h}2gF|JrJ86{5*1PBW19xhK!bYPlui9@8bkwatO4ZXvNjD- zNkdgIjB;dBtgnXrrh=XjlS{*G$WxGBROPQq)odIctI4UqiI@OT_lH&}{V_U#Ghd3K zhA>U7hjatfmQkdZskMCZXj%766J$Gz= z8OAJoqx29dpK;C|Z$6_Z>-?%b+fA<>;vRM@W71Ut<*O=-ch}qUN;$5(UiZnj#tEGR)9SfTr3|2;7mPJO5HwPHlqin*p)$WjhX++3m8PvX)t<9+?1Xh>l) z{R6#bV8jQy#%#%F59u!!C;`p{f@|y+uTTis-E!E&D$soDK>0G^L;Zk*?B<UEI{7OP;1K}#)2H(*c{MawKDU{tY{Dz*y8tSEXz z1*=u?rV7@m;4Kxvj3|oM`5~*>VA4iQF%&B09g{Yjw8bDO{!0-Q|KR>q5Ksa1{@dw2 zi*^z8{M#M=PCfUjl>HXCyMyZWeM>QQ5TaM74}GZQu%$LF6&$qy^RcHAsgr@0AL;Ei zJ|!OyjVviYKB{*Pg52t~L1zs51i1pK$MpBuIEzl=PhNTGNeToV*HhW}6z~}|M&7RF zZ6aSfp=uy}Vq?WNwZWlWvB8pjEB&YR)ZX9O*zB(w>YRfO-liYvy5i;$yP;w?>1P|f0)pEr z_(cVGRPZbPY|w8u-G$ it5T!&aNIja~Rpq_k;-pl&_vvvTjJmr}Ze<)SfxxsVHv`@I(i`KCK@%TlRU3 z8SlZShONl?>t$dU!*0+ACw%&Ue0@j}`~ZT(zRupz5Az+61nykW}lu|EyK1} z+hG=#VQ;GsW;yB^4o?*u3w9LZaImT4p{LT&rK1km zf=c3GqCnI^4tfCW9F$#zauX5rQg@V2Chud&YhCyz88rDR7W2^k2UO$%1wEoHbOnEf ze4(B?fMtqY{*_*{^;D->@dSU<={Z!VK_Sr<&mw5DGuBuaNFGb!gf>+!N})WRqId=v*5|?ezd*(KMTm7? zrWBq-7$2cT9QaD{G5N<;eQw-Y3gS-zQuFUt_U(kQd-b7~IP+TU1x^`BCERNn+u@8JWMuLuYro$@uNCig> zMamA*F+Wa0$1RM;36*?Oy`J*pGzU9J9OP60O7)paJ_klxF_oXImkX-!7pllb75mbl zuMGOyg5W#veUmP!$Ym8=QNcGB;ryZUeGdV-g~{-PrNrVEj?X`-hOhUe#H^ld^jh)p49E9)eclRK(;iUz#D=mWKgWVK~*iovO96e9p=156)RO2X{89|97>D8+t(D*|S}E?KmFC`B z8Sby8a<*2E2W!bZTT9`&TABmLD%j-Aph`5GdXj-2!=@MfDFuO~6jw|t5coSKLKof> zvbp;>0*yh;hFI?_B?!g__#woN^RYo1;I*bJ?$wcLjDRe0H2CPA?4<>$MW<&`qeRTZ zgHiKBa01xTyeKs*8cDa;-U&mOzKD(KGOzdL0acveh9}9rRdjzRut*H)w@H zE9Du>6PdS)Qy(a0Xfoh`qmDtV9|QiNf95l&tn;@@zvp|-t4TtuMP98o`LsGzM5{|>w0hJY@h)0p z>ZLWI0a|lzV$?uevDPDVe5w*-zDJyFezD6#>Jpp6O89AC$x zDdMwFhF!%rVp3o%5L44biP_FN6G}KSkX~n^oW)Tk8|y{zX~;mcF*sg60m&$4Emrq2 z+MI%@NxgUkq{2Nh^9I1aZYyFIBx{!xnM1>WK`N$Zs7>$6--11*OKmr32mTbR=}d^n zSc##|Y*6zBsZTP<1|goJjsBY(lHtE7P8FW=h%!_?)_t;|T~E#AM^p{zZv+)iN{EYt z=tr@20sKG_ohHbGp`HSb_B(JL zpl*1&?B*e`o*LFfXNR+m0He_#$h<{2x#xe`%||z%TO3r6D5#zct85mMnPHw{a0coZ z=Gh99t*QK+j|vA8!adCj#4E!Wt$?i+)C$?)VBfX1!Ya0l-VHpL?0MgiZ|u>-rF}6H z!iI_-=#@JM#ROii=m`%_?uG#l!dlx{=3OeWlS<-IYA2P(qs&L-FQZbolX{fF$H0g6 zJk#{NQadO|Y=pqE2~WfJ(+M*O{!84DTBDz@pu#zr)^l<0od<%J4_F!b59n*{FkRAa&}HpUx+1*v zjR>Xh^F$0?6{YB!Xh}bc?sQA^fmQx!`bCVRJ7PKgD&D5w#CvpCd_ecaF}g1<(x0$X zeyE4B(4$$`%d$sr&0f7XTlzru>#wk_FXUi-C5J%Y848Ay(2FLV~YUKU)Ok0}i&nf=>Vp4}i=&Vi&X$hsmRL zf)gaTYhb3A)(pA^;7#kRHRedfOno5LSLb|S3z8^DPv?}3;aKuK`51?f@%XrsiWHI) zn|XF~8Cj*drxk<%FEsbO&n0B@7M`0eiUQANfYY*+#VIt1Q*q!+r-__F&v6BM3DbKM zSHg_0Oy{^NUE*qVm#gDMPy+&rS{%c*xiA>u;^28pa(y|orRSKZF)-C6&_2ua73Z}k zaJ4XbZjbY32kOS1s5^JYxv~olu@pKs1vLu6Z!{(IH3~%HzS@z6joS8GNT#LdEbVHUZ2~NTwn%1Yuk~u%)%< zvTMOoJHywnBq(M{kL=aUQ$?O{=LyDf@oGCyX;<>Nqg(l3Z$bli)9Yji$@urB27$hv zJ-1w^=eK5;+9bR5@-zq0dyo*47WllEr@JfE^zAEtOV;Y|>4B=3_4gET#kOOXEN%v^ zx~CsRq~V}y28GJgQE=Z~k!HsPx@CKox=!{huQY~5{9sQPlR6jxHB}+u0N4)vuXbN2}LjE69`uu-?v)4_ii83Jh7` zDd*O)qfP&ta>Wu=$Bw0*1g}%YH!DBRn*wE^`Z)*fc)4N?jz;5Fd4if`D18kqHdFu$ zmvDlGi=Wag(D`&|A1tJlw?K_68dS-k$_7=%DRp$OA;m`Y>(#g42!pEQy&cr`a%vc` z*S#=@%Ct?Mav}Xj;EQ+E!0eI5P*IIQ`Wv3gnvR{hk{mu1tv}l0i7Z%?irC;Il< zObKgIZ5um5rVSESMBd)!N$}St2 zet#n@MV?#l35F63zIWxQzX}GZ0Isa%DBA~`1v8jrf*DLI!P#vn4KoN9Fi;RdCZ?WH zo{cgI7P*yZw58ZDOEC({6kDBeENtzQm3#usTLeu}!BiDYQ$?ry)tSbinN}ZA`R7fV z1(P6pL5PJzD!~$vjYj6Jcaa#9P|yq zqi0TaucF?+hd0HswyjDvZ1A!fHn`6UHqLETY*4^eHsoh-*x1S6G-!>D-F&UwGSG@$ zFx?XdeZ@K(d;11?un%OTQ+j#hJvMgyO|BK_g{_|Q1>UB24BBji2iOWpG?++-J+#5s z?aWW=8Y^pW^8^=rk9MhMcB>QcUK@-TY();J0LBjnePDy5`cMwv=4qLJm|zxigpS&D zjCR^erlfeIldvPOl`zSXR)0#LDY5o>8)lyuoZ0Dva2sbWNVR3`yPnnVWZdy1_lZl2V%PB*HA|*&rrT^;r(hho#C&2;b(Knv80;&|b_FD+>PkifH z#}KfL`QGz-jH6(|nJ8Fm3McAjI4fmQrq(Kudew7;c~#)v4<0zOccy6B{YOs|!`9y7 zlG@t9jvqbuT~gSta_pMkaNX}6Cg&Dp2aa;`(a)X`kiwu_p1xs@pzzG;#Rp!`AbLH+ z)v5v7qB6FcH%Mggxz|4S*1%D_D~G|=k;4s+cw*5OX~7sI%H(JRrhedBVnGD)PtU(> zz~T=Q%Xn3)fcjL>P-3jETo_7eW%O5&6D>}H#UBFKlc1n-BPhZpRFRS@aNP*}%8kI! zdGI5Eu*Jg0n5vRsf0hmc3!=#3CW8c~z?}d=MaRZ3<%x}-#Z{GoUsV|RDH)WiKU|fD zm0xYnR6!j>+4$AudMdfT0SiCMR2F`j+(-qD4Q^s^QAZds|8prFz*<1&-Yw zGC&puO4XeQsbH`QhNxgDEcu2hbNxx`O^Kx}Fa#Kg5st5@T&H;p@kq%JK^87+-l#D6 zRyst#jk^i(vx;QI2!ltIo%%5jYit$e9+N|-lT z&ON6411tRAd{59+;MGAS>>t|%YKD1V^1x5wShTk=ToewZ8_vF4AQU8MHB9btvRLHU z@X4w1X3YYyBl$sncwRXYOg9_}nB!`|FUxu9;W4H76*w6n1Dp($UjYoirzlSYp3nKp zdl9C4FFVpNM@|UeL8;zhqvu))B*c1y%*D#5U@`n(E5v$>roPV0RDH`0UZHI2SK0gq zuQGVG4Q^?T&2RBq$Aa*wKY`rR7(G~llHN_qvEUuaD?E*4Xq;E~!;^IjowT7OI4RGJ zj3_EQTonTS+S!z(_xfJgJ zq0HBZr+Fh}+f;9Y|2o7J6l&toaKlZV`_EJe@!ixD|4daj?M@9=+COT5nBPrp^v_gf z)Q-Fv|7!?>e>YpJS0W5~B+VQ9zfVvH$e+@@CB=RCBsvY8niXlt)bxMkfsVlalvc3&LDCKi-f-?up`WfDr zeHxC0*tuAjUu1X(mmEjSuobn!ap9uZ2cpso>Z1SXy0$*^C|nLH?^Rb814`r-CfQ#w30VY-mdDch>RtwdCgFVKkfNrg{UtfYf`@_>=rVn%2`dlil1K4 z)cdu|vwxTNM{7B;g|{zKue9*K8|;`eObpuBPQKpNt2p%gUA^lq$KL##%A(89xW(sZ zxp{zBUAOhy0B_Y4z7R-R;GM!ah{wn+(pxp~vxIlAQ%~KY<@cy7$r?YsM5WbI?;U6J zTH5*Yn!S*F>ytiDo zd_d&)%jD)Q4v6+vZ-4hyd&I0QO*6Ma*j73I^Mvwm1xz}TTtF! z<<;fvy2j(sJW|drWrWF0+enlZJ%-`} z8hDHq?osUh69r$BH+_b>aH*kXocB5v+-r8|he0@-3^J5`Nio^VG=gN;Fx5(6P?#~q za4>s&P{9x6*RckSh-Be7;}fUo`-9@UrDD3K3OQvtuB1WR5wdu^5r(3bDxbl-kw8Xz%h%1NLs#O5l)E!cvOWVlvn3t0?nU zL8ls2$5uMk=Xw~W+OJ-0GmLpg3 zu8z2dypjjQEsSTVT>hRBhYH_&&uHmRqhCXd-;f~(4Ry6quY<;uYvuMlJiKHN>>l@+ z?s;C7U65zQ$`?O$T3Pg=vCmz!yBd5wNA@^r)PvjrMg|^(ymHPAUr~ATq|x1(SD(d~ zoU|J2VYQB9HoD*Hx;HYkSo(iN7t>A|Yo5pwk`JAUEJhSr%te>VFXz1VPZPX}pBiy- zFqB$w(|9P9`}=D@8?6J^e>OJyoSC|>b5Tn!yJx6Nx_-T99CD>6srh3*{-H9_50#%> zI=419UDj%^D-4G}Hikp_B%dp1o)^wNg~^Z_=OvqKq24c*Vm5waOm`8d(l4>+eifLW zVoq^U&}neTv>oh4zDEt3bCie}FmWR@?$ez|9cdDc1g%%5qt5BxOK9HsFC+4BW+n^0G~ zMb9=X2C`3~DTF#zht_-BkH51&M&7yBLE0SLT+A{objmjVsciNRS zBcJwR+kCTaH2e~bb!HP6-P*f^3!#m{1az8KoQ%wz`ccGcaX1jAT-$5 z)ioyVJ7PxU4s7B(G?&O-7(uGv6mJ zB-+@w9~XyMsd$uPC19$UVHA{UTb|sZhO8r6_)11OgH-g{YUkz3-oBcOMECX`a}wX` z)Tx==(#HYo_ddQeE`PE2nMV)hscfIx;KB#_CY#RW-d=&fsexYCkZEe8xjfnDmnTN} zaO8%&Z2Apf>40~nFT(BaHmRYu5xn5`4+G$)PuU-VdI<9?Mbv z%E@1T@)ehTw)+&rAujtix~p*Vff>(V0p_oOC3?DREhyp%mbb1t>)_E<-?#4KK7MOr zQZ{zM>?fBNIx{o37>>7zH+)JMo_fPKSU3%L{cYi%^$>ulUmewOstogc8!J`>0 zPMs+X^+#cMZsU+0^k0Eeq}Q4g2e+uz30CtbJH!^~mbOwNZ3~D*>zeM&iw^Zq*h2t3F_BHrcbm=%L6&S)c@k>gO=8F?nZer zx%&??tD}X>OaogwT1VYk{p^&xcVNN-GZtJk9VtU^Td{J&?${W4d5xvE=CggR43}3K zdHkg;88pOFyh@KD)&`e!9@$hf0MRAHmoBQ(hMzHm)ZBZ+w#gdXtQ@3n+-4=aI+}Vr%3wmgXJsR` z*)Gd=hbHM^rwrL+zcmPp7OC?BT@F~!yF@AYMxCSb`XOrwC{{(;VwW{X=KXAX0yjUj zX1O_bxBD_yPC8}{MwLGvvtD!?3cY!{w48XxQpb$X&R82GoL7{Cd*XzmvdVC^W?GN&torqUUN?fr`WG}=ik}?f;)`mk7O?G0B|~3%JU*pPR_Bz z_`O?Ht8x^#LH;v;+ z6QmqzVw59IC*?!aSvksdh5w96-IaNWYhK(7Ce7Kyn4+3Ly?mlcpq9tKhm{MB*WW_# zi9)i8;cwyXZ9{O~M=mw|&7%9-lu2a_>gQNH8_M$~n+Ca8TqVxd3(NARznDH4Hwekx z96KQ}-t-UmX4F>(U69b!QkGoxIVh7e4JdY)8X{ezM*kLYkD58Evn&+i&j8w5h4@#y z{Gz943j9UfHGIGn5slM)!QM0C`~0 zpB8y!$7cQt5MyVln{u+)G+?X=!%52zeYCBDAd7KXj>(}01l>@ex_<1CZktc8#!;Cd z(T-3O?Fi+_9-(B~5nR|*hM5tZXeu}MST15Jg-;e2HI=$2i;J1`vPpAHnr}9td|a0U z$9BW9)3l&-xo0ZR_VUWDy*!sSxtyu|*~=?gcLt}LoMx&kb6{v`as|_oZ!0*;%dF;p z+~>%dCf7lixvl{}bwFKTy}(aBLy59IDi*Hf*&fKVkp`i595*s>R}N;~J;-D&Xoumy z-kF;lO1j+*Uh53CwgS7AAF_!jcjl<5tvhx~k7dw))}x{b%?XQ5tFww#cK!-$i| z%PJlGbt+GQ1*aqFR!?qzP8EAj#b!X-ySaluE@&q0 zv6ZWb4cCrC@>~c15c7GS1q)ApK{oB^Pch+z4)vffeE%`Dtf=S^w%M z_vk2s@$(N>f-K#|zgFy~J@V5o{^H)Xa6(tN7Xs8kfv)}zzKQ{Zb{g~^{#bd}fu1OX zFl9aXk1z0oh6^#wN^?P9P+4G2KmQAY_sXgTffyKI#4*fI(N>~c2nL$V@rL66hqUj2ucGMso|(P5ySY1iO9~`} zkc1W>34s6sLJPe^5EPMKLhn^j3@W04D2OtObW}hPK{QuDnn;tbAfQMuqNu1?Kp@}$ z?A}m(eBSTfAz~L!cr3CBCaC!=|%fq9^K9KE#putg` zangT_^iHZ%NJR}A&9plJQe&reb_ODUyJtQs4rMV~JZA;-tAvukDn*4D`XA(gJijxv4nXa7x>N!kna<;)soKAi9yD5d|(g{Y_4 zb&v)1E?60SqQXX-sX76M3xMj>7;ToccPFL0#TK0p24=q7!H%J|Q~mrLf|o{{#jz%? z9BnRipw3IY{%KZqjL7QhOf^rmDoh!tW!=YOfMtW+!e?)7 z;88y;Gp!040w=X|_#--@q6mU&n-)T83v(_WqdT1usFKUg+L3cOHe_@75KPK+=nSQf zX==oBv#x&{hsH)S0j5pklj!c2_2OFCan>(dedlk=rG|MTGV_H7E@fo2F_W^h;tjwd{o@qC5O-!=G304wu3biIm# z_o&EuE6ieiyb7)`<0HrOO$N|AQQ8*Db1M~gd4<`s-gth@;K%8m6FUEz2!q+nG=7qT zZ;9Sh6hniY_CWuhD#*`Ja8~E%UyHIH655JcZNj71YcPT@IrnkDAt)11y59y zJ~9Wpb)L^z`PskBiVTW;vzgFVfd9i95(V82%o_HpXe}X~kGCmU#+It-ADgjmz-|1- zud8isInMt+Hk&~kE_}UN+*(1lbl(zqM*%Xj(Nk$_IeXHy;%f4GGgDh3!c<^sRHFKJ zy%|@01yb6;9>UGuwT=HIhpH)1Nbz>m2D6O>UXT_=VDeV8m!3BbYB&tr`&i6huVbov z!~RCoaXt6G>kM4KdEa#nT+h4jx*4wL-*;UV^ASzaoGr5RTa0V8`Q7WcaE<0?ymTnR zJGgAhVG4wNKjc4~$ZTt(>4IHW~;U!uj|JkB+hYDGQKY@J-C!Hmm>N=mBvGwoO zzR`cH{oRc3WwRakpz>sAqdqs&+E6yc$G{Z``n~ymSrz{eis%kywG)3D6z1(^lTsUqU>EvlrTg2@5j9Zq5RkH zHRC(oqxn(jyLI|6o%YXy3I`T-np4ms6eO?>;@HX9ou<>nVc08I~~&kO5GD9tg$m zL13SU*;!V_v$EL4&^P30JJNOwOEp&1!Y?d4wDB^a=Xe~Sxv z(-kO&UV#(?z%!E0;Yt)fBVBZQ&Ix{nNWE&4+UTI!8Kf>cXx6exrRFoBEDEI$qzgdJ z1d1LYIuoOch^DWi!Cg`tfWTjn&%LRNo)r?PGcg7w0>zn@AcWM7)Sa}iZbkFfdKLi; zl+^XuRI)b&tEIXeGArcevnu(lQpc2hR$*Der<9w^p4f1#kN7*-zAuAC#B#6|E1-k> zF0=$!Vq#eZn~>G^5Fm)hczjkg1jsc;ee42{+LX7tOL;51UiVW@&uq$*Q3zIO`Th&f$T9i0JX z7gVqoScC%P;$mwCfEJpLo$NLtJ13LE9;wVM>keeBSUPwa4cwYj$=leJWAlBB)q`O_ zKzSh5(9&kpE$ib<6f_#?LhT8G{<>xJr|&vdKsdx?^sie(5|TL6VO(f zf7Fas8&Fc-2b>@w7_t;WV#HFT!E6X4itf3gS(@TBBsXD0;oSQjTBUa5 z!qBB83h!RKJDzkysmd;&BXDIJ6*D4o(laSqZKNUC7VRU77lNAf~Fv+i!sth?JY z>q0$)9Vsj*mZE36@<(7pK};NdAYHJpaY)_=k{v?#A!CO;iaA92K$JtuK9J%N;J3SS z9m{jZs8hu9!XjXb;j)*ACd_x6I3}8#%A)rGofHqi1zaWL>K1Y87|}IvF^R}+0EuxV zMg-Q%Q96%qr2WN;0K5EwPR9n(>2w%r3Dx_US=pY0y(pY8o+7yqX8QrVQ=B=fY^ZJS zO$v0Ua0p@*!^ge|?ICy!h9NeBG!bF7Pes9gtfB`tvWGgQ1X=sm8(Eo7K~}a6&vNlB z0A#vT^I3;ZSp!@<*`;Xr8|-W>Sn`LnToI2@q~jZ~m&tT5EBH3n6_K9L9({v75*xOY zSsPiWP7#j>4%4Z;kk0sZYkp-w|BScu1K_rFWPf|bC?t~c?)4@)i6;4u# z*zk{8|BkWYn>T^6nRvo1mN&5dGFCZD-v8bpA~!tR1cgZvgXzM^XO#=dufJuo$(I#; zj%dUvHT7$=YSn+XA3G0n zn%O;k;%>v)J$kS0)GuF~k+GvGLmh$LOZB%<1?{F&@!yz_O$fxm)>e zq46}Ab+G%%X=_hftVheBst@1FT4b57Oc^a>J)4)X6g-KRsNq;O;~O*Pv2l~3u3GTr zvVsN6SRgBmDj4y2Y*>($-m(^ReScUvOkh2DL#Q6Ng+{RgR`v>*P`iXz1n}Emj25tA z7JtGVQp3^`rG27p!Qz8OFj(*nW?fpSVaAsT0a6KAI>lk?><_7Rad133ZwDJG0LWtw zSqas7dP)ZBZR~Z%gt>N-ea{l(!LZO%pJ;%$X=UR5efJ+cIcO<)2~WTtR|IiQtE z@@xoV?K1K}x+A+X_yhpKCm~@t?BT1}3zA0*gPyrff|d@s3uWlh=HbwVW?4&{8d?&i zHV!+9+j!TbO@ZW{W?55QXS;bxs)+a#=7*&qA2YO6aNnh=3|3Too0nmEA7TD1rY+%R zc`|cr+pyE2tqE34?LgtYtR3LxczMh$;o1&ffu~>!3uv3{8CpY&Zs4gX#F>||I1lNrxor3%0z=(Gh3!8%}ZB^7xr)n#G|&__5l|Bq)# zg1s28a+4*93MJ!;lAH@C>;IMxIJ%UyS^^LebwafV6haijgM)JQ{ zI;kCF`}qKA*g0Z5Xn_bIRbctRq%Uwrf*l%TyZz=SHolV)5ijJksp2hGu|jALr*Ana0%?;{?1#_0di?XH$?ySEFelcv1>`os zwxlV($G+@&Sb6q^l{V~W@at?&p)Q86ayj5>@|h}m7F!kCrqCX68t&JmFA{ukVXQAi zR#_N<2KM-4pn{r+W_TDt6SeWNxN%4{1lteV7d8jB1AR)gW8Pgjw(*J3cZeZ|SRH%= z0<=wmm182Pyu0e=*)J3>yN?pH(3J23&?S{I8Zzx+MGjA#5o+JlId*`V5WiwWSvVvg zcn})7)kb4T-el*3J^tmidTSc)K@5W}> zF&Ay2(TeJ|b7pnB*{C86%vmABR)aUh1jTXwo5&kM%Hl9=&US-R71k0gk6YeiUeq!? z9CW}*g|oQ;dnNEc_*`N;Qt+40ic+!niA;dNfiBCtPo6>I9K2!|c|WUA$ToA{^j`}w zPyoi(e-v=V-X-0Wm;77)gvf-UMU9rhe$<~l8u z$BI%D<+GP+`xD~&BaWK9)T4IXz1DKJe!pUK`Wv=>*Y7K{kYO=kq0|Bc`2I$tXVg#O zDk-V@9^HwzaVnCu5C0j}(R9g`JA$;2FtOAcup^P`l93 zeF(WM!d=1J^A7g!!<<-74^p>9mGpn=`2U_e@-bJ%r-3-<)9GzEPXEjKG zRFg~*vpl#{ivaiF&j_}h6w9WdK3z7T06cpmocX_&Ek6#Pzhq8yso(xG%c(zqFe@ZB zm5@R;*9i!&G;1kYE7n?KId3ar2v+dzrpR>S#shPi+q_X_=FXl5Qe0+4hZPqdEt_EsKNj5pS;!(2(|AQ(~8evmBKMqlmz}c zE9F5&%6K58N!D38ot6J6Th7$fmHi%T&J8mT$3uq1TnXkoMPdh&28dA7v4BVBQZkM5 zY~|GUFPA*VQQJhjwm`)Q)t1CRKGfEY-wOY%ZOS;dV*)6gKosWu&vcWQHP;YONPqR!qKjslTCpdlXz42; zPaeHTaom4vP7Z0yHkYhGxd&8@$onZ*XG+Ub!3~je2Cw=5NdA@CpKogkm-j7W98bJB zW3rV0Cujw<%9M06wj)}ei{i)C@(Qx4bKC{^0I;7xSc_I7MZSz<^jA`3rZd8LJj3#* zj-|+r{5>^0RaOt(Pr?1v;E7b(*ij*Uv)!QZ?U>rOb4RlX<&tJmzlw5)lOTM`Bk_b| z@Hcm~50Su7hxXuj<$mAQ6DrBuj){A-)A3bRUWO!9*UvI!W=P%e*WWKajg2P#HG)k! z#1a*&%W{R4dn{9i{BqapG^8M8|KT{3rsBFS6~zmaR<11wMvYxQ2>A^PBY;}_ zSk6$J>r1J+@4IK(5M5smadfQ44b7>nrZs_k`HbMMCUU24pAXJ#BdgdS;jif}znTp< z6tmT#HfWuUV3D?RD7>nv=N^+Mw1-%f>e*SQ1z+hb;~fK{sN3zHQo*j05MOU}mGOT2 z2yt8Qi&s^Np^~&`dk&Q&1NPmaQy)%N-{eVBNllm}zjh#SyS2~vE1+G$NGRCiRhi`| z`sE(kbW*)IPtv6F)jS#NI9H$iQSw*RAx9W5T?g-^f`SSplE=MkXokq}*$t1;3IV7d8$X({Z+5 z9PnOFN~m0^Qs2t48X!c2^S+hi3|Koj>(_2o{E~bbas{WMI|6`GxZlB0-#*dsRY*K|5k_3#(zLQw#SSNL#)`rTr*qnppA;7y$a z=QV=Qr;7y?EcB2g1qznX@>GX+~H z*oq*JZ=>j^;p7*}L;eputeI`Ji3OP`tIhXN@Hu6&mx6sdS!y1joDX_fLw?AE9sXgu z{6gnn>imd@HRr(b`-&h0;X?NqKW_39{A&tun%s;|lihTh+>B0>^P1r-nV;cj4Svqx z=jrYRgI_fGC5m1)_;((Dg_2z1-_v_PQ0yuNKT?99=-$s11EQjv|3VLbrPyyCex3iW z^FIv!r@^7y20gs~{AD8NrfoH@^S=##%ag}%^E(DFKwk?65(PN&1{@x(!G|>vnju{D z3y8KbaxwBE0s*Hc#{_&w8>kZks*?!!(1|hnQAm%lgg0FHgx`Z8;1Llb(j%foG^GQ+ zVlz>cp2pCFSTj!)6LE%skGs_(-oyo`+)EfD0sSo!4UuFBY!aG@Qb36_1a=8-GWc?f zWV%_-Bg%^k6r|9-R0`5OqM}Hr0O%aeL}dzKpWaNsIK7#u>JimMrbkp4*pD?6HQ{0J z6QS!OE1azm*ps;h+|PU>>flms{aIG(g+$FfqPb{6$y-v;ifYpuLqW9hklRNO+bY^o zjt}XgJzaF5i-!#X^}J>RsuRt`qlV~2FFZ!1c1C}TE*{ZUbi>dS-5o8vp4a4D0d3ND zrTmXt;iHC)9Nl�f&g5!Jfa#KegCdy6B~g-oVn)MJ{gZqA$Kt4%eW3OrX2EmgJ8H zSWcu2{`9AOQ7gV%7yWe6Ul#*tREmMR7^I8Ax`1}Z%HW*8Wh&>T)#18WUHs4cqJlr% z!rqBzsqJ^<_k5^ow^TL>UN4Zk;0fgMthg$uUPPB%ieVB5Hb8~(6@i#2U^PAx2#+F9OhUb-cv?K8>bb1A$Y-UP4AXbGZxzpp2~tcE z&#U@rfuur%WSV7m_Qi&D&`dvA&fx|k=$e6c{S4YO)#3t=<=bC@;Ky$JY` z(7P_@w#uj%-Bv`c{!%O!uvRa}TS&1~yd}lk6ug6M2q%-UArNcIn@O=851{9nql*<% zyen3!`b*uV)or)cP+KKJI3ADcmgQb8#rtB78l+n#jkV$fDb}H5Uem2qUD*hfN$PA1 zf4T1BL-CPB8|0(Ar3ezuA4{=bY>;B3_(T_*q}VLB=whoB+XP(aZl@HVNwGuh#Hf;D zmw?0JJz|R#p95V~?4>mO=-EIi_QSPbTf-_|~GP0;uHm^#S&edk>ad4r;GDaTo4zfxFlZWtejdK1$5{Mog#by9eskV?Z@!3 zQd|}br1(x;k>Yz`dWs(~V#QVHmWUte@+S&@7T2Wsg|h#Z9{ff{Ugyv00)y@kqAicc z&!6HiDQ-}zo8oU>z)|lly0}fl>W(OYGWgKk(dnc5rjMGPQi;G*rFcm~LJMIDx!i$| zXH9A@ElkCGtOmv07{{8f8Bz<^Ji2B|JhUVohO5P!BA`j;v6hB$Z?LW3n&@*u(k9g6 z1S`3y0Hy~HVv5G$0PMaK66HSr!c*EdtVa`uN%;c*;cuB`>_e(_AbDHq>>!ab->0wg9OC&DRh1-VmB0UZQ|*i zkYSQs??G^S5S1?NdKa?SgV=pFb4|fVPN&v=Tmxa(oe6>PLZq(_5_Ih&952=2HKG3u zKkG#xaXSd{1NO%D*2~G5AZDCbYhxirn?l-_LLP&3opjRc=#Z;HTn4#VHn|g+1P5jK zR~DyhWg$*QG>M=Tfr_4Vn_i)Z*GKPdO_g?|u?MNdx>f1YiLJ97z3}tSJum3c&KiO6DKr+G!z#wEaW=)UqHFdZN^&!fr%I{uYH+uZ_`1!x6h-uPZcknqEt zk_rxu`eKL@3UT8U>&0pvLTTy%4+d$Fx{f`5H9JP**JZy4yi|`V;aQSqBIQoCF6VW` zkO+gowHgrL4%#wtla5 z)$ncSvdgGCV_WZLyY+6iTY+dnE$IVrBtV-{d^7eq?{B}`hK=L{>}Goo)DEW3}D;N@e8>SH4uleb#sfBUxvUdsNDp^_Da6UqPND=G; z)Gw8+kx`?e7jSo?!yz}#`D$Ec>+$a6_~YoUNSxx2hXfN2kI;A%Z1Rd>sHE8Bfm6I& zwd%BHfGjr%orrgxfds!P07~I^sMVh0Jc{JmE?N&%n+(wLG*;c|8JKyodZ}Y{XEJ|I zMODF_DY(<43hq3QJM-zz3y55y$W**lt*TYCEd>84XF9a0$c+&>wRZmdSyW`#vSXnV zlftg!cLgTT?tq7!jlJ-z|8!oxcKg@GpMfms8F$Fx=4A*5wl=Zi)L&IC5~#SVS(6XvgPegFF`XR`hmII@hffe>ehqfg zWKaU5w9py$u`_geMrUW$+H`Mc^=>=sV}1^x8?N}^q=&3BuGm$myw1+sfX@f*$OV+8 zvx{ouBi3*6mqJcHMzPUk^8!RU+t~+ndVd6%4Yt4~wK0eU4*SZsU}MP5M_1M$m;u;kJsf11LAdOChc^mn4GEA<&Oophx@>|}rz!l|QHET~~5tVp8!XG_` z`y7uT?4pF(Qly00QnU&fk?~3J3<5)r7^n%ORBR|9#k?RnPL(BXbtF#=&gx|S0^irc zXM0-RxJLR{7kXKpRQ=x8Tb&eH)Zy^)H68x3ZHPah{E?pA9GxkA?8xnP2j}l5x=fIT z3Bq}{rwtPIKZJg7|4%1iLoTd7sp|Hz$j#HZKGu_tG3s;qyFOEY53+I!ZRvUpx5}#* z2V0kHaoNCg)kdiaL#(cN>G%-qiepeWqR6t2U=(45=zP+Zj>aUbgGV9j^N+@1YDO?- zwAIOhhbYzPr6X$mQx?HP9C*rF=D_&8+#>SL{+LbrtDMf>SoP*qi;TbXr&`eg0sm+F z=UU@!kyi6Jt#~!?O>2WA(c9J~PNmMXsvvETdDa;Rapc9gCvKS_#tb1;TssarDt&0h zs_bKkg@6ZMUTs;95y4dd=l+m$i*}v8F)DG5O8MJH=I2@snD^!ca%F}|Y zS8_x^L0g@J7kBY?^z0#e25#IUAFmdFI2;RnN8_HEoK8k^@l-}v`t3$RcM5vkbuE9N zg`5i(a(TSJZ68P34KfH2ZXp4}fup6Oh8qM1XYr8~jG|yP-5vv|Hvm}@935nbF7o3F z@2$2|`C3$o8su5N2KjHTPoCu+uLSh=({vlBE>H1i=@QB~c~9}@Ojt=gZvua9Dh1Q% z@pKb<7SBU^XR=mgD)BP?&ZH-8C{qaW`tw&PHk8c7T>Ltc+Mj6+*H2T;QQ!zzd_=;sr;Zp!nvvZu))7DIHu~URGT9P zKWaca2T9oLwy{S%KLJq|UH9^wanN45ydD)Cw#9l+xLN>uFc|--RZjR}m3SHu zdpbXZCP!-rAO6g`!1-B~wbNRNnf&XW)?t`BtlDKkjqfAQ;(8AoGQ8)=Tm`h>sNmt< z)_Pat^K1v%b)eV0DQ&OvU_8!3_#NVIPpQ*f=%jpp|ai;xKpL#P6uX z7esUs>|_r~oS}p775rDUixir0sl!VnN>l}REJYYScMF}CA28g82-gL0|85CviNVqX zF;tSQP?D@r7XiZ8i_s#4m%Nii#fwOK6vcm)BHA%J0qQ7#yQPSMTZ%DIwxTlLG(62)9Y#|(y;t1M;f%^5kY-^iXr5*-*=Nt~!!df2M2C5w{k z+rw79c(P;>>zg<#cf^?7p}liMj)N1WsD#ZOi)@uU zwBP9d52TQyG772pr4_4XAkz6ut3^T;Xg?+P8PKBkQT9Cpnx6GSvGS}U^yaZtxc;p<`=^h&-I7(9Lj>7$U&~ zZKV3xctHYy5AHELYwV!Kldu)pjlyEGkt}JO&QqENWdrJOniXvb?M!nnMfu z8}MG4q6yURKfuoY65b-Ma)+qP$k!?1J5)E%dG+$$S1%*sUc9h?K>-Z%2-`vF6=*(T z_lAF{7o2USLmwaiZb18nbad(B`|_1;lPUn+&*rWhP-D48>Rng=0`?Jnhd57eKqbr8 z-@RJET9Zj--Tijfg}EcW&VJO%+Z$y>nmp#e(SUwW@l3z3gxjpF(_RtTFp#jQ z7)(BD^cAZUtZUDzLszWg8m776&sVGnQk`>}YpGo@d6kX7FKaI|ipNGoB$MzbfB`LN zVpmmvu&&o37)dhteHz{bG!#;bjlA#07#4nq)z&#=Heo!Q$7gdt_~bw~K3kfwRPI&l zz#u0JCZUMOujnj{tD|K7-@+SsYMr!W;_&LD*;&23g*C&yXlZ7}P+ zayG^`<^gg85M1!1#d$j~eQ4Ovw+1V}3pqW)nsL9~{he52d|gl%Lenn_H9cZHfHQp> zPK%%n7=hC3u->6+wr1@ChkWfPt4LLxt3~4<#8O4@k3?X%P7=!{8`UPc)zu16MG8KD%}Oe4mN~3Ls9D~G z)vD7h3s|eW&2sf_vs|U~1-n_ULe4_QBtA@1&X|7%I1W+j#xGWJYL*iC_dv5?R(POU z3KQ7Pf<;X=|JAy3cVtWsjSR9FziE$*0KA3#ZI6t4=&=V!hMkI&0+pQ#vHO|@gYoXj zs8%>Kuu=u*|7PX!gi^dTRfw14We|hl4Y9Hq4;t%)EzCt#>vtRx->o@qN$yLd z0)QKck(jOuBwIAe(ELiHl??IVYuojx1H^ZR#ZrTb2sE$&z}t#H#ZI}y!DjU5fBcgJT{0ze ztNQzfbr*`&x@{$?CvRF`*;)a)AMHB21WmXE*gCdw=Xgw_subZ3Q>%%kTllv{tmU%5 zt+yPgoi$&_|1}P>xp6)MLX1(9+}`5Ke*(%tA)fD@JJxsy`sk(4Z?{wljHv_%5(+#b z{55#JBz_v|^=^&k$Ajsmz4e4fcz+|xddmkFl=WWIpe*)s1#h!R$6jv@zM-{P=|8}F zzApGv1#bw=a?`|@6I;Oss8y^3QHk8c)Xf^+qN;U8yNJ#ey=`J`7WUzHmf;|%sTZm* z4s=bk*4}7UteJPS{rK4qi$C(3wPh?;GOTlAvKvY z_==i1)heL|boLVaJh`)XXoRgR()eb@r>{UFdj$?)y~OX=80vl6#~Z5+6TMpr7*N=_)TQ5u3oVniEya(}k1OQ_cuLqnODAMuz6f|(xTkvtaAD%1mF; zNk3gQCzX$S6aZMs!&{nITmHdv6FQBD^dl*FIGmNT5d-o7z)m1HC=y5CvHUR#Ivd1x z$AafZ{@rxqxRdB0r3X-9pae+KUeE*Ny{W7|9{5nf8F#-B%6)$VoyTeRK*E42L##`0xzD)$J1LA=-w0b6pVN# zpGd(|RAN)ci~CirnWu%ye$6g)??O(BAwH~0$xL#OL$9xRB{p&`R(xU=&}`w;(m z2o{}!ne-Gqxrfi9GT}D4KL-fWt41L|Zh#x-Z}7PWf0LSL9wnVmL@j{wB~@yX$@=ic zI$uJ+OR){6honVgkfO>!zT6;I+{53ctn*e<2zlz$#E%c;tEoxfr^jn(TZI3xh1cRXzM8$~0f+K@YqM)(b}f27MsiSx>(x4vhM<{1bYziE@WN%|z0tnMgPa z6Ztj|2Ld`^{y*iPQIR|7dM7o&E(&&2X>BP0-(HG;u9H&DY!0NN{nWw-bfBR_A?@s3 zZ&Xme>;2lwI)vefx&j;>LjfWlo%C!5p^NQd(}UbF^QmYYv;Pbew2S@yS$M67~2+dt8s%L*ZCnysyULUg7JTl0oVCS$-m{Nh_5C69LdjdNHs68 zWsWn<+S|N-1AIF%@0V4FZQcsTcl?Uv-}4^`!l8(ox6QlL3si<58R$ApocS+~sB=vb zcTsIUSTb4_y`>{h+1L&`|6TGw_@9!4DJMQUegMBo)csAtEedY)E2zKZcX$D@ z-kOAsP?*DH6Mt>vHh{AhmW1gLj+giI>R_*dAte0#Yw*~Tv~j>y6T2KuK@kD4hZqs7 zMr`-yRsaISdJ(4!C_TjU^SUTuv&=SIEIDjuq$ma11wlQ``OG`2L|G}4MLEv2#B^|? zhSYv{2QB}F9rvWXk)J}2qjTZn6Wa=+6?s<{8B$acRVC^1NEj$BlcI*Gsd9FBi$!F? zk%_29L2Xe-in=02oqd-lqlN0R`Q&$?h}yivTfS*Uku61ihBjXy~wUP!eb- z9#Vnhz8b-WJH1PxH?B72`j!SS?eX^G97uWJ`9`S^%E}tS8+*M?A~=Mqr@U!KEEogq zd5ftrr@U1FYP8~%H@#$BXgln}&M#oceL61=2Ojn%?fBHqQ{JkT{_p9K0ql7L_({M& z)$O#m*#9Q?xu?A)LCmJp-bA36oI34|Nxr|VM4f{mP(!5XJPDU{AfBP~k}Cd;w?y;< zi812{>c?Qt8Dv+!3|4+j?9eMH3t3ng?AAu{WW?NHRdRVb#K-|mFQ7TUan@TI0+STA z|E%{APgM)fd6RjX+Ih~~M6ZYqCT;v8&wGE*s|*Ma9Xj6d>I8^{FdU&i4(?<(c5@6S zH}GnMU&LUQw#42bPJ@=f5^N9+5K#?mt_B9o;p%%~YO0I_S>j@l9x>WbB0ceLPI_=y zntilck@zK?=0;1@|2E6En>t?2zTi#LNpBOMpxSuB`yQ{M23+*kOsEOr3GHjU@GMl4 zO6kPEzcUe0~X=vh!$5o{?#nl2h6JGVEJ8Z z;|o(cr(%5f@Ie#4_hwh*xvEEuZ=Y6|`BZwWuUfEwtgnr;G1*Z24sg0B`fCU7ct z)XBKX-dibOz*gc5gY>Tfa9qY0tvscDv_*_5?K@)FyPpvse!Tl9AR*9SS?4q{=X6X| zUg1#yfd&l#kRAE0?!yjHaFBvSI{9zzL#J?VI)!tyBOb_@zM^}_D0bWf9{dDdlIb(N z4|y;ikk7aDdy0b720P<{4CAc9&QZGa2D?BnT{PGwlvVaY$hxkx-wpC=oyz{C$A3}mh7m$T%pl*^ zP?>{UYmfrjB+MVdiQgvg)(~%LK+I!5BOn2Qn@mKY5vqIW3>yIza2sB(u^TYMF$|;! zA^_~^z4R)+OuvUU4fzV9G?CD$M?EdI)MvZi3pfKQZ2jj5zF`U_EMk zyv`t3LErHNgD28&61CU~eBlQ9yB@+zqfBb%AvT(#!IRNlyqr!@ihZcf`T$Rn)D`I6 zG{+(mpAy#E=0k_(j%+x%=crM+fc>k4b9l&WN9T^jYBG3OKh-gn#|P`B`__>MCMZB= z1=A|~phbvHKxJ?(FjhoYsMxB$=ZlpGt|W{PVSk49Yy5ccT&y3%h9c8UwV|pn%|+jm zI{UY`Mli9OuO%;LJH^hB94?6@{AB_LuokZ^c^!OLt0zYlQwuYF84++xB*`rizPhf7 zwqI;@KGRn^su9dV8Sg5gJkt;x6I+eXKF`#eL$k#`%EbEaS3|1%WPD4>;j2hOL9Z?L~X3$tLh#~@nOIc3r|%^HGNg;4W~+t;DCD!H0cQ`d!NM7 z!xH=C4$dWAKxk|>lzbF#_Q2D=cIV>hXmWxyL`19QHGLJ_V;OY!#;M~qeGN4N3B0xk zTCA8Uyb*BXZ6B0{RQo3Kr)Ho1AuTv5%ZGhrCDp94Z?;we(9@?H`(lHCH}-83`lpyf zDU&VDd>?8%*iJRJxi1?C6uU8N8sNgWH20;~{hX#yh!6(jml*AV7^lQ&55e!y>|*%- zkFw|g$!M4M?`oBcWhB=DYF>0YriAL=0-qSB)9gU0;ItOLMvkI=Ov#f^wl2q5>vD63 z;w*G)OJ9WYpNVnbo6w+9Fp+%^*E-=Kmn5?!c*7LqA^|Pt63jRB#FfAn`I|HfoCzcx&Z`UX9!(*T&><-oNWwLo!5cgbA9O5t(+!=T8}vL} zr@146%C@5m>A@UML4>lZ$z+=tx@RGKJF`Yw)yB062(^OH%u4~A{7^P0}@KnIABB%&= z@bp{B0|?y82G5|!guO}xR-=h8lkQaakUAgHlV!lW81B?IY!yAihXb2el_trm>~~D) zSv;Gb*QX~9h`xq&uMrsD5WhV z)dCVLa_LCHqc*>5*;uT^?IJ~2z_6wnyt`o^F54{bSZejL#OJzr9}`P=UkX}KYVbJy z9d}|ld>o)AnE@lXp`EV+=L77CP%Uyrh6PKv_Z@L13?xp7Wcz>EJq6}?Ff^*{Jy~5H z`rSnz_0{&{Iq!uh*nE#0Ibd|IS{!tz2B$viTgbJc>>V3$F8w(+1)Md`8sXlI50V_3 zSGMl;Kjt9^pq1R0LDM940rYL*yv%`VMG1x9Dg*c_94ksX&n9?Qq~R>dNPq&u^Vs|RW69U6lihqd{E|xR?yJ^jgX9}2H(0Moz8SMFi)%P+ z%-}wWLx+tnoO6fa5)*807UtJ2{B;ROWN6QAR0Z(jPWufYf&q2k9W(6?Sk(Q|-M1!s zC*LJ;-ZJ0uQAQdy@J~kbtlHYis-T{#6sQ}_>FFEn;`@SNV{+Ymaj?{2-}*2OF2B|3 z5xxPzRwI44w9;pw(WJ9;1vI(p?5GayCm5E8?t%cEq+L*h$NHLtUj#rK-MFMSj`cML z>&(ab((8R^&&pSD+T_g2jqpqVy@t)voa|W0KT`Rkf9`7c|M1N8b4YZK5qc#i+gU!} zKlIi}CK#hCPRA^KV4Tma@ z`t228zmTZ2HO_8nSl5F|&QI8McgeP@D1{E}{+>nPcjA$nuA&HLHC-l|9cc z78NL}_U!XTsqyoCpV;q&2glrj2T}H_K6_RqoGOK0@v82Je3n`^({{!4+w>Cx33KGc_cBa9iiVrFAe#2< zqu=&*bF`ICJh$S^4_FO<2)PNd(_cFW*S;Y+aru?LA6@qLewSj;W_5)-PyCftNuB&f zsB)HSvBpQHXNNpIQuuT-6$*$5@Kz#H7g%S)1lFHwFc~8G1YO1$BGwRCV!}imB`a=- zc>2W>BSeBOus00|A6+C-P?CaD;H|djP^>fsW!&`+(U5|@Ij+ivvpgY^b(j#bc<0UX zbiaa5rbJ239XgtaSkk$u=)q%4Dhf!FquLyFsvZ-AEhLFA`j~=hET>s zbukRiFsZ5fis8B#p|RKJOftks@B?BL5kJ}#W2l+NP&18%``tV-jwpN_Q8Augn?Nz> zLk$;C(&a=7Ach?-AcpNLCQ&U&U|q!=Do9afUiZKXXfD0_CI$0M zSpT3c`-%nhU?ByIXau}Tqi?Y;mU!5F0TrqF;w@^Rw{`K3A(mlZB$gXug@-K`Jt_IS zx>$+dJh4g_?@{X29<)03tHfGeeBfcL#X7q9&?72|k31q@1P$>q{jN8}2KwD- zh)?KulRLZZB)cAl*lgIgJ-XQDVVmi@`jpJk?9XDmM|>uBP(?EyjqT6AHhE4Dt zO{V}GX`zKv;6bx$*hNDoi@~H>y5`k2U$F8=zM=jUs5kj_Er5PUkK=3AwFrD7x`x?q zEIwj$xu*y$w(Ng?Rc*I#D<2&EW4Esp=idi& zKKHqx3ZNd|=lj52RBAC=tg`m|lKjQAI9)3)wRo+B)DpBrRd>H{otC7PRM+?W@|%>> zO6yt~sg>1|p*#!OofZvetXer;^GmI~RzdQgwUmEu{eZJGXF`vaL`u?rDdp%2YqFXDq2-4yPEp$pfAOrsa4mt8j|nUvZPi^t8I&g zU0NM1QaQeeXq9uwmsqZ@1REbOwR&1fiJd)S*;+|mt1q<%T0=CU)Ea4xIgGY@z7$9- zK^*U=Qfop%Qwo|<(42x6T1(p`E>dfyfm?l;KSEV%Bek|#4yi{c>+Mj4_K>E|yvxgl zNv#8qe~^C)11PYfYuQ)?NQjcYn>?e7zLf_vI_-WwQf@D zPO%;o^wcWo8W{A-T5qZKp@MU@hEnUR^}|q*T7PYT)COvUq&ArMfjU=%+`hv`Lg8!7 z=;32VC-xpZY*en)h5$US;TOK5#!zjT)P`##Y=^Yr+DI)$YNNCi^~@K(>Y1ac%|}z2 zV|X8oDyfat#!2mQj(5gu6LjqfsXeJpl-g5Tp428$Z}!%b)vR+7C9U4L(w^3;OYIr$ zSyed$a@_1MeO_%cMoqgfeF@rg+7xxB2KK;}=3$3&Xq6A3)o8-!RsDEZqB`^?w%OQt z_SU8<-o;lQMkd-c+LTvJL2SCzUZkvN5Lqykds%~D1uL=d$lTo4xkHAH9FN_rHcM-x zdK~dZ#k``m!P8oaLwY`b@9AtjeesCzL-SP{u&+sNj`oIXdDK@uZ?5*HuFaF$d{omu z?E$F^bZw#37SR}5tSzB|xD=urjJ>yLL=}1yf|V4kqBeXF z+LqdC>hrTu*3{m|Jt$m43}b6-hIbg*v-cpdD~f;ROW4>gtAdKwEGv?)^CsC^=}P1iJs^9EjmZK1+7dR9cg>0hM#MDrM6GoAH;FRZ}3IzoK!5o=zBr%gF&2BbSt8h zK|6N!YS$fKITu~1PwzmcX2*WJ<4bj7r3!rIOdPU74xNhywYqwwz;{P$!6IJk=x_Ym zB7bS~7>~yubP4o?j=y%$Upe{&2TCWnEXOp+zR$l_(cGWd?8^eM2atE-sByVV=kMXb zi_Gf2q@zDPBSrrId{*fWYoznlI^T@5!dtA2j$@~Z++S2f%PYnG+cbQ%s~vvF^WZ2zQ0J>BH2;w3 z%ltdkN@vM90{R}pBopTbKd9T9KRE-VB=lM=(+E<{ZBzXoWcojHTGzi()tM z9`qhrc{p=?%J9c(7&&UL;eROdHh&eV1sy;lL}$ZnuE6k5hnl}sGs69`d9}owtdp3> zx{LX&zgWOVizRHlSPJnYK^hyt2Py)%VX6>7e4GkFHz+1_uOXBL?Mfo`1QJx@7cnTz zB~iIBAr;iH7$IY^LhnV53&)U?>bgqv_$%vB;Gu!8ay|Z6&`S-`M?UBmT!EUX3(9s0 z=p||Ti|2;&g`ga}KsWN0tg$e7*$47%hg8?mOEgyEnX7i+W zjZejyQ#-aotC=0EDe;;1x;tFU=aE|QpWOQRhJ8P553`c?9yPN#RNKHc(t(lm`X6@e zNLqX}>#hA|a9~zu&IvH!cpj}@kMR@l`A&@gU>TcN{pCWhZ#xu&&#__-+kL3B-=!AT z^%p^^{JQ?}(L5nose}I#*FJ#T!PrOqg*JM12yS6 zqQlW-@+|ackl|2KcpUMcwe^a+Z*S3G9iQSK4XRo{@89m6zlHC3>xznb$)@$Cm;8z0 zHtSt!^?+Mnz}YMPHKN?>tF2Gj_JQ8l{oh)=XYlIVelE0k0abQwnLjCR5RR8#gh+NC z@W+-wJ9{n52H1Li$io{1t1kEFJI8t_0vl3uAS}oUQ~V}O3jM466+lg`RsM$@=|En$ zI^rfwk#0ipeN+pHC-PSLi>U)^?an;8)_=?K{mI{JTSuMw%n#>_Ds6}VN1N`lBhEK* zq#L%#zys$>ukZ3(s?ARO?Jhh03xOS@Zte5e4LL_4gYDY*KI*8}`~55IhWhdMlV7Pj z`~76ulySg6#j(wL{=J&;opIDp8eBJy`gdA(fp;zydA0(&fc{FUBu}8Is?^%+RzIHi z`;qX@d4H-SCmG-~ivQs)Yw~LlmFKY;*x|Z5P#h;ep=vz#i{uJv$ycL-GC)<`ZT9&G#|U zg`TWCcmiKq7~SP!0*Q`e_Li}M`f6ru;IduiT}$UYr1FXd$iQ<;u|N;U)N15!FO>Tb zlREu{DtE9%Kv!)_1jy%jw-SMT2M+1U(IdLN0JoMesHCJoIv%x43Ji|r+k%_30xt>8 z4d}79bpiv_Cv^fZ>OH_Y4q&^29qR^WI^bB*WlzshKi9KM$C1c-M@TmD(W1p#Vl1?b zAfM-P%AXzZt7gpt#Z{fg0rKDnW;BgKa8QvA*BK0`G&bCX0`w?~ji%TblZ|ELD0p0F z<8=lZfyp2vFxivz81|1(L6XBJ(MibDI(tTElXV7(Kooo4U@yQc9D`6Gf=#1xro#^$ z{m!7oFNL#qY3yZsIFka%A|lu<*aw1pW3%DRg+Uk)!62PTv#kUT_BtWdy+M!XQUInu zi9ty#4dPR_KxYeewkTMvNnpC>!PR0NYCmw@@_zHc4ER;9(;~0|=7z7RZG*#OmA_?R zar_bq-SoF5Tz9iV(rSOlK!RG+D)3O1byz9WD^sF;3W5^)@RjLahQwJ29(+u>R%4kG zwh*16^MeA(Y{%TdHtOUmZ4=H0yR{CqirymGR??XIl!EOPbYtCBPER;)yVsWkHsbU2 zw+Z|O`Et9jtW>pSgC%FznpIOxXd9S@UFwsa1G9s#=L810V+Ya(4lVJp&U%0%9mm%G z`gV!=@0Q7|2^o)u=C2C zPu2X60n)p?*)gy^-2SwlXzlKH1PjZNGHT`@0ecksZ15|j(s~8nj^mYrD<%f23vCLZ zs?X;Il6*4(e)&E$h=V{v{t6)436lc#gMB6i;2G?sdU8sje^Fbc%3DCgttG=TFG~Yj zK|^2_6if;HWb2H++qL;xTcE6Z%m}2Z>M|g}@|dnTb6a|Gw%x0}A~a zK|Jy~Q|R13b29p2o$BWz)rnMJl?Qb>gV$dP3~)ZM%q?ka8(=nW5T^X?piOqBL4>K! z{1lT~IBRTHfkqBU+>pt;PAbp*z(Xji>-<2p^VxT7U`<@`2<9}Ew=ht%bcT4|!t`#X~P)cP+I~J|G9<3+ZAdy}Jrq zYWjVjZbP=Ul&>|RDDVNG=lF*dd<0)e^ejlvKBn0EaK3?Wq}V4EY%+1=0clu(Z_!D; zFu=Fz{8PG_x1BsneWpXd5EJ)K%6At|G4MGOp?hGX4f92$cN7dW$S^RCQta2sHZYSf zGzh&blMDni?->YY@*|KthtRv~@niH7L|aQaL|aSAA=FY1(biHxc;#{E22ug1J;1hx z`r;Y8Y91`+X949!H_uZQFVO9a^v)%UU8XF)qf|SoO|KB0pz3>ih8kt^9}J*-u}8>6 z5n`_5{3nCpy^8Z|2Ee`W`zs(;>3JSxT#s;Ap6A*qU*yILnHA@MQt+2fa9;zc1xE3G zl;$SjzUcRs2^GQ*bbiMKv~U3e0n;fk4QT@EgOG786)qd26QWq9!fug9T!b^bjZnXo1Jh}Gyvl~M-9Ia5wDhR57gon)#>elR5$5>h;$YBEYPrIcitJs zf1)xB7YQpXLsYTt2&)pl5so8xNJLg1Dw3iG8S5hvP9}6wONu(8E;J%-9Z1mttpN>4 z(Z~_UiN>M{od^S?t*K}RibQid>rD!W4&?Ks1KEmvm8EH|=>=#%YHgqfnG_RVTLpIn zd`6CFCq<5UNFCS_$TZsvfYR<3O*pHkMyNnNz_;!e9i(`e&#_%vIg1Mfepra0uK~fR z6dlE*QglM@$RSR>x-(F;;$xySmWrGl2@!QyDZ1fp(H-@)7bMX`^pw0bFC#@S(TcMo z>d^avB4I>DZ*_fV;IW3!incg#Ow4JMB}Jc5JO}YyigOR8>?=h-(H{b*d)le3dk!aR z2S_nc3{tap1&S9REQb7x0_#u+Z^dw1+0__9DybtyrrK8D6{)sP4HSh5>nI|NL(T>j zd6l#~P$UtsXt+};q+FUK#u9+msW_mf`Xu4okp9YJph3sq9_3J>A z2KPJa>8}Gdfktx&S@=Oi0jyOuG)F0@R37{~5ceol^9mnvQTPbhMlPGGQY`EadCkG* z=79#d#i0BJ+s!C)qhdFMEknXr>yR4OG1%anz*|Dw063K^--4DEoU7r|5lQOm_ zz1SrCU4utKJ_OB1r#FHpzHu@I0GlIHoA1xj1NcD2+wOI;5;CBn9b0q@cG#q(IUa zA_cK9ySIk3Kmki4Ggu5{XkqQ{MP&!UShf0>r?${mb#!jWE-J#el;>}Pm=0P>HBDx2hFJkF<8EhhAl~AyTPAG{7 z)Q!}6gs5g0!7HN(C*xQe6qE zUAz6DLSDtz>-^PL@T5yd>eN+ecHvdXb~Xw&umD2i@D>Y1_q*BrPq{#!qyWw2bTpM2 zN=)!-!W-(8s^}D~Lcu@I2Ud!_NMOZ!<=sh){-wc-waOzrM2*epD%zlQ*FEvi7&J&^taywHtqzNP1Cm;fC}$n*uz%G z7CMUUu=Dfro_WC4vR}ru`Yo{LvQfw-%!2faWc#?^CdR{#LoJwyF@e`r9Ga@xAo^V> zvQs*I7W`acFT--BL8z0@LAJNe?&Nc!Zx!k6j>jB~7*SoZH|Nwf^erbc+|I^sW>Zv09q3^3swF3;#x?dtV{9SE1C7*t0B%opag>crjEW@tT?f0+p86 zjD{*|d}U}TON&M*Im?r?8<;K<@rMvJBG9e!f1yjE@FAd*#61I7Y@)otm!6An$Sq1) zBmBJ~>@BONe7(%yEGP?N3$t8I>m9nU5=o`WYAyC7)Dg`m>fR4cFVzZzG|C4JtTpan z&H!Up82qr_TePZz}%BRdk{JSF@`);VtoEEv_M{p@tPHS4>4ItK_W2L zi;@0&4?_jP0c+!UDuZF8JmlFGSQ%^`#3Z_tvl>6pi!`ALJ!>0S&ZL>o*2dGtny8WvuQGsz_wxEfLsFAWSfOP3Snx! zbe>qn@0)|)7x=`ZijsK&oUMtVf67MWJE~>qQ&JUyk@piS{=I6~>o-Ebg%B+5;OWvk8#cMD+Ex%*jYRN5%5IJd$q z9u+4Q7##suBEwJ~7(Ga+Zfi}9u*M>kimnljjgIVc%m0S@4LV0@@)V>Fu20oKwVh+a zh;VAl<;d3z)C36$)*=WCfSdmadhPFv(O^pmvw< z;4#lb+gjDvf-o^V+M4IOXp?l&CL-Ez6is~%Z?~}4C%g=G&~a{=cY6_M~lj|%;11qX`7_b%y9=JBM<^E#GT3(&{N()SzCZ{2j?3k_HS%!iNYe_iNazY z^Ji$4=*^g0P($i2#2X=)L5HpSp=SJ+%$=3^^glzZX}cB+y!IG=YdFpj+5QSuW$_{F zimm1c{|Zfp;3?nVp{-QE0ZApJuJX}ldwab7!EE289+BSjOZi%Sn$u24q;ELwf2$iD z)+c8-@f+cG5?z(fW8bRcW)-W^ng3qc-WqS?qwRUrJ)SMi-Yp7qVbpgi%8?L=x%kQ` zNX3>FvC~%4rXuz{E=6Eor&#mT{6ckm1H283w`Z#R8}T9L4JW{1P6&MET8ko4qfG>7 zP5Vi87a%EXUEvLF8QaiS71ZMku%TVs9$2c3^u1XSg}YEZ67I=l!Civr2XB6(%kOl# zPnQQWnQ?!_o7c97r_qa-Rx;L(SHhnbSZey)f_MMWU;D6sf|Q7+6%%)J1&9vfEbh}tNNE{ zZ&1Jr#o;89*%TKYCep(!uBl+kB(Nzb`YdFv-6H0JTt>_Xkqqu4VOdR#vfz>*mXomT zP|dW9g-IAw73!7vN)f^-N@2xJioGLQENOxoC95N*#iC)sNW1VCjI;w4QVh*RJ8L9X zMBzZA5|_$!iL;1R#HunGXE&2ZWeYLhBqms(BbrEvkYv=9Nujd6SQ`%D#X44qFi<*m zsdVaFu_4_+CZ)<0u@OBqmSMl0rL`xm$`rAgRctO|u9rfpmF>kA6s;xQp#zu|WA0Xn zCa~~mD?_^yUu&-^ICdb-vW~LYiHwxpv$BEsBrf=565fcNDL5G(9xoM?gyCHsd|C`YD= zlVx#=8HOUJ%HlLan2u!~J&oYa%#W@ySuv0QpoJ(JyM|W@zVHQ6%L2Xi8KpK-H zHH}FNtfeotkg0UKKp<=uSDM6C7IC$>hG=Edo6k++T8gsHETrud*VAnox@@qZ=Ant$ zNZdqdUzkWoawK#lVE|qEJ|xK_#qDN1?x1S^Qihf!J0tEgla?ff!JdIx)012vLYRDp zG$jv;hp5C4Q;LpIdZ8n17LS@pM>4Irc-$nOFo|DL^pkWsWfjcSGDeD!DW^a^5|1sY z5TPDfUOY>2&zZ#YW;}djg@&Y3tQSqvBEuC&)!eSF)R$tQ#irrcBC`X7RR3gfzLT_zOMUHH-H|;w$2B)a)Th zE(T5`-nWPk#D}u@NIf@lroKH6M&SY)*sBXq{Iwd`{YJyDviOuo6>)^~u;%tULNHIE zpJ#l1bNhbS%@m)JAXI#Tf8PHidk>+S|ND>Zy~L1OviK4oKnFtKk;Om!jau2yiILO` z(B@$=T^9dDQ-^a#^l7w1v_FBhh~c+OnDg*!{js(FOz@a^$K7@tztsy$V>8>@#|yVO zthb#uU{1W9Uu}nk{LLfV+uI6%`$x97W19Gne?|wpN5nyxqfOzDXC-to`H4>U>c-bL z@n7)`HznC4TmNMf-{MR>*=0+a!|&lXOpQQ0uv7ldO_WfYkJMmNybwbAeKjO$sJ$$WX~(lkAdX z{;I(ReKyGnO-ad37mwt%Nj@pehBFqu_|qi&;3DCqav3h=#o4z`k=^OrLtv^crE4!b z%p&EJpamLXlOiP-PhM*aD^7>%TgpXK; za9;REz%Fp&M>c2KYzGrHdv)*Hm;6lpX_Mk4oP#VXRnu&$V*Z6wgF-J^s!j@-2@4KL zo=7QCiX~;t5U8j%8JL~N9w=Fqc}8<8|itLF-vu& zdQj5DBiZ?jBuz~lDQTj0*`y{E1{F<|fY8e(HAg=Bbg>r@UXl3I%@ckJ4&pnypkaT+ zpLel0jBg>eq^fRZgJxzM=wnLlY{FSOf^(QInRGJ8Plsww(Bl@mY`{foPZiz)#7B3v zN99eII?7Tf=xa*Jq^j9PRi4i0 z%k!zCWQq18_$6+W7D|h3(qai(j7xx#(Zk*{Y^fy5(lVR0T+y{8y;9P(L|WJIcRlQJ zj!aql+$OC>`lWS>k|iQG>}fBRePxD(opjlS52E*=E_YKg`>8zkfOt?Zdz5uA>4)wkoyu4q*UMh7 z`T^;nO*$m?P?RfW>8MRQM&EFLMD`hbl6}S@k-djRcJH4$6q=UDiQ)-0fnO0%AZ25V zO*%;*IR%;zdfDq#IZYINwX&^LP(RmbMfUDS%B){YXKd10>AWm`W0NjO=ZTA4q~2eI zuPk6MWRj&zHtAdGJF-y$_2&?~(|@$LeYg)RlgbnA+hI8>h(Dfa&+8AGWPdAKx{!oh zB<99lQ|zyBAm2LGUd-LY5nM+WQcMU!U&TbeIA-vT{f(xAkt#YU$TB@VI3fRCmX+hbGQ7M9V_}P3hHM9h{_;Vx8U--XGE*4&2J1(m$!iFuM8OJv zg&$;Jy5`?H-Tp+?0b4oKUYcK?X`h)!*|kFSUs%bmg;m79M!GNo8LtbqH=IjEVKl); zVqc*gW_@c|0~}Kf#~Ry!Q5LH`6j(XP^_mG&ahJ+mbYPLmLJJ&~Zw8z0Mqq^EZYC|2 zh0OxYQXpq^hh-qGT+?u?uJbIy8tlZ(LxF^0l{X*jHv=R#(`gL{bxkTj5>=>4;Yf-~ zWFZQd3adK{e8&pN&`MX%DrP|}Y*ET7F6lMg291fDYU04f48v*}9;eS~jD09AH&^Lx$jm8f(Cqfa% zljniAlTP0^^X#S5+QKdybtuwnZ3KxNwJ;Mlu<|P{Ojb%tYX>N^?2L8DP7`Y+>=h0m zO?g>$t>eLkr*IIh&Zsm4TKKDB=toL=-OONqOUla7a~>D+mRTXALi>+GIx*t$dGqZ{ zOJskio%S6wYlah{6QC1J%}3F8Zj0uDD3~qr4xid*x|MdR1VhO=%nEOr%ADryr}pL& zhECr6GyDD4aE^u&Z;A3dg<3E|cr!&fEqsmiL&l$7_PERb{zCQp3Rx(_Dgui$uUT&p z#}KWRX%(Kaz`jr-e|yEaJt}Q7j=NHk)&Z*eE(@BlRo_E)tqGE##|eaO5D{wmFsEhryQw5q4uU!kbymUyeADqQOY9vb7{TI!OR;RY&g(6;H&nFd7c8|m zkfAkCWx=2Cal81NrFNh7mcrLe(3J1RBbV7r$ahd%qVNlExy;_8aUP)!l(RaE^@IU1 zP6N$vI2IzEp1*|s2|g^@%1c%7JAi#GCE@oTt16xQP^7e^@XZ9$G8O|#T?-Znm)pZ3 zfhoS+zSs(rd#5(oH!D4G_(qItnH%jHiZti@y8#b*)lFDSC`WbU{l_-hXQ_I&X~F$B zZG_G+K>_}}FqGG}-4mesyG@Y@&E95j8Ora52@Y#RWW}g(xM;uOm_1I_ACrD=)v7SK za$(IxPV!j{c4bIdGD(2$M^PO5B6}yHuwH<>;wAxy{Y#pJQYJE9APP7s8cOC1LWQz) zDMy#`bb$ebP@$qMK*9lAB=k^)s3@eWTF4*?y^FU{8-tf0`mIUU^J>9$Pa5g@;Nx{* zmWE>Zgt``(+^A0k8pr~~90kaLflp{`CG#V60Xu6>CLAE(h#}MXF+xi#or{uTxBw4r z>C%qgw5JQ?8lgf*D-@tQQ7V#PzkuFAl#x$Jv9h!hWK*CkY~)*5WuZI8?P0;rb5DxV zOBQ-lq&^g*FWo_)Q2=K8ke&gu0GUQ@p$L5>)d~gm#uTM7y&G(WV$=Y->p-Efn=ph5 zaHvVMqu*S>8QoBr@Dmc!U`ik5fpepw!YGVrCNkaEhD`Uhf$2VkVAp&+1*Xfw1PeI!#Q^*|3Mutgqh)J(M zwKS)}n$Wm~4v1NV#iTShj+Exc3CrlR9HXy^ROUtqt0-TqiPRdn+``cT6Fgr6WgSkq zz-FIGaeB$4e2&lfioYeJzwbA8oIoA&ujKLfzhvKOGzQSzvNV65Bt-GKKiUWKvOn4j z?!IbI7V}g>%aBRc4wJx!%4r#lW^I(?2?PCU*X<2OOefF&Z2u>4I%)-tO$plKKYhdA zMiAEX$XoU(iS#|m5`!6RN-6XEg&EIU9m1ed*R zu>i%9xrYQ-$(94_qzoB4NEUY5gk1vkPWMBRl*UdN;pMC3&eH$y2<8928FB@F3}2!K4E06xhJu&nw6B zlCZAD;|0206fU7SY~Z8cWv{4mt<4GF6Ut@E;}0t2N!e(3xnKVoNS$&#IU z;Tp`m(aA0KB;1`}?1?B6nCiPpL~aSU=_7Ywdcc@x!5@;*XCu`a@_S;iL2n*+zX&j> za1TWbMXr3lUxnXf)pq~SWbB9*%#maZ;sIZG*Iv|~!LY=8C>&A@`rBamU^=Zg3$>sG zLCNb0rTHmcp5deq3;^|@(zdZ>3`R5brh-tSFQ8sjiiqP_*B=z=6)tQo>XGaVk|8{2 zdmC)$P6l3E#oV9`+W>Gx@GoV}CAje(!kI0U%{OFq@SUt0928*dUlBIbIWmZij5bjg zo-4NfapDXVphjq=BFV>ZNx{YtY+W1_ZLmiuLJ=w$j;!9^vlr*Rezi9=hCqXoMerxT zSt9&D{%SV|@D*WXf#{LF0CxuA80*lDfu;N@kL;2mEflIFu+jt0jJAmD6pc?$#jil^ zN_$5bGca|YIfTt(vxl%bct*i$p@4U04xW?&!()&j!f zY++h^w&*=vG!qRsG8cCV;VxwhHB@ZnWefpTfoe#@hF?BVR=&TSuve(q0`;x`qO2m6 z)e1@)Lecd!oa{^@ftiH!Io_jc|4U&Vg|5&32x|MkC>sf7Q#J~E_J6;znaxZa0=rRy zq8X-^{d+mtP84=zQvh4|FNK|ivJ3CgE40w}2>2;o;XUA)+5dNT*&}~#iOu}KKH^N_K|~y?qT*_%&{GzA|_>|Ob(?8Y^)hr z9F)qNvND*pS7gboDpb4^u_Ueov-U@UL33_UgaX)c{F=!_6n%{!YA!^k9lTQ^xg*V#I^1v^2-p2b` zKKw>BAxq+syIE8TKg%Cy*v|?Ovct~`#swgxP@K`1X55R_D_bO#_u(#DK*nO?WS_j7 z6G2Vxf5BllQXH5^J-LEi3A?_+&rK!ud>lVUn8}RvUAaXpug_GXs6yOc-p?xR zW)=71TpL9S+|Mevv&Z>amBSQXbrHi?Ve8RniL>Usu+R%R&Q{cCHSA&4d?slxt4`^N z2l*gC0?r^6_1OX_FwJCmFEkjTH4wUvgpVdZL*Fjl= zQeEu>14jB2){OHKjYHV5y9d*ZK%HQ{hm#r?C=rtAxj+oQyupZq{HibNVcMSwl#J z@Pb}7YO^lu!n&-@#rHB8Nn6fhs3i6?5ucXqO?I;YHDEVuYHx<8-ONITBz>W@pCh=g zhUOz zv;`gUOYpv6Ut-1p!`*TK+UK^rRzT0J4K1{B3s3f0*vp#wD0li26b5m*li$_8N~ z+XPw07A#J-3Oga{`3^4ser7vh`!?-M9E98>yoHt6{OFF<`gdcw5yBcWI8DQ1Gzy*T z9PY@*?moD=n~V;$1xiof;GG3Yd7|hfnOG;G0M<-rk-xfF*(m-!@EP(Kav1`bgUF`h zzyqJJ22IV2iaJqi_H7!r!?CIs3u?X74GpdM; z4eV>^l8M4E>;}Al1RzhJvRmvnIwmb!NcNy6>KA-M{TS{vYNTbL(zu6ZB*jd-1x+lY zqUh0$K?$uc={o$bIHiCR1N324eTT=SMidPT{h{wzysGbjhq7i{B7WYG{@W9}`x|FJ zaSRxuuP{Xo?lH;0NIT+jEJS_KG$CX6m;>PoW+w(Pm@Gl=*=>z+G@%Z-m`&f$nxkQT z6lcJ5i#@ER`qQe7&rqChb9y8mTAyV3=|{!FPc~NT9IO%obq(tF;1<;S1AGBm=g6Bz zw@T}86K5=e)~;c$%euLaHPY}6^p<)Hi=#UYJs>-kMxU{GQD1<)?fcB41iC3Wn!;?d zj+G!C5CeDsNmigQ1lDQ-TMcGuXPFnRDgx~(8ZD_J+E8`0l-g((jnQ^Gp)vGCGx!9R zJ|9hB0jhr|D*hm<{1PhtCsg`P2!N>F_CS051mZwaM>e7x6;xVW0O*f~uH8j^{vf_X zcSg{NQSeay%hCI&X_Cz2w$=*i?k^w;$~$U%q-S&$W?QeJ6;eARd8kVGmmI>sfUp|# zAH-Dg?*aceyv;`{HJy~^^b{UL&?5d}1t37Fi01bAHA{d*kA}5>0hp;GIt_-Th#Y|5 z9^iBBnv^)jOt@*^q{JT90X_lj1=BjJtcIFRr<2USpC!=%(+R^(=XK0VkGM^aL;az7 z_zYADl2Qrk(0L!!A7Ipb2>I_LWa%-={wV}-FIZQ!Q7TjFPE@k$&`EdiP`XqsJ-~F* zCL_54ECtV#K{yFLP5{?3C^-%yznkb7&%%q;m7!hDXdbO*Od>V`4Jn~V@l(u#Je2~& z*O)0iAy@Vk;;Fk?7jV)zBVKlmlX2}9XF>+MpJe-r;Ww@OZr0-n>q+eVBwLR+z3jbr zvp#627(RUF-H>!6QoqGaHmqX?$Vv7oem0^VI7xjr#4Nx!_;QLi#a|D?!fuiRD z0q)_2AlR97uq?qT7_pcLgqs#S#II@L@V#6Rs;U*>?4&0AnKTokgziECrRmU?#CH%a z1mHLePUAz7mE1;==sTx5!HTH~Lk7NzJ2OJ5mGltE*srAASuy63h^+?_L9QjDg?J$l zZOsVzLS-Q+4YxApE@hz-KN%ek9Ss{6A=Y+^x>_9w(TSBmQM@LTsq7HE*ZwiW8DI!g zhT>~L77FBlM}>U~6;;SW@#WyAg}_MT{~x+?78L$+2>-8iVf0r^aoYcqyX+(bTL5ur zB>r!Of$5)?MUDFZ16!qD!-32@g@2eK++7Sp7%Vf@4vxJi1L}jZv1sPQDg?L;i&#)G zDUYEW+AF4!#asb-x*3wu%6YpidqJTKABUf`E<;hzb&9`-5F}uL-9LGh*#QRN3^$+!1kaW$*eW>rqnwmEQh6y z6})IX#M08n3O*pB5yI;M9FkHa#9bat!3=0}!y);dgGQJif`372gv8i!bODCm;l`4* zN}-heOn~ejj2)_8s8JD}B0?<_Oyvv;#VRNh%$zOYZetd+Quv`KyiCJm)cfwDEDB# z;}>DQ!hG}yu81{5P$Mng1SKBU7epPy6;a1zhp6KT)YBC56I*N(Xi-@Y$9h^|7Az?P zmRkECA-gS6^KH=o+hU&I3Hu-^sQJ#UrH~A1U^mo!cgS%2D#bxtZY?qS#NiCkqcKVy z#;DqYm^moW90kE@VM|~j)}TT(iVyV_qd$Lqu;!|>*KLduM3AJ}&4-E(2c*|iMF-rE z)a3619qT08KUyE;D8w%XITk9?>$b7YyYoT8j;cW266{Ejlq1B|CKWi<2OO=BYIZop z;V1;@_M0Z|3jTf$M+Nl=@YcNI#Aj}3M7vchm@6HQV1C8pAZ5pU9>)u}@`1+rW^Cp+ zOFC+(;_>SKbs;hn2@yahN}_rRj$S%z>KaijJ)U-(Y1+ z5TROaA79QP^G%hM&+e_{_{l1~@-L``N2xV)@wK%b)%b6<9p6g_doSXL>NrX&rWXzu zDv_27rvU`T`O)ie1RT=DVdl>oJ1o59-*8fNFkXh1tG`@b$KYU9&VMQ+fp2N8l+?r4 zjwb5i<^i71WuDm40R#8`ksTc;R8`H4Rk7plqWbPKHD{?raa*#(#>)rDFn>UHHhxv8 zy8g8u;R8E6;7Q59r?Vqn^R*jSqy$gw2^*b8w~xnq z%pu$#F9(K{VFz$%l?{|xDhUY&@~LT#e8q^V442sm6B|i(3r1Nm!-TL}RE!tUZ<nfkV}V?^@5TNa%qu_7nCI91tk@`1*%_T z90XE5QBWW5QAD?K{8zs!WNmxaKRF`R8}k&@FgEvEBeQccU%*MWxU-4$FuP1 zuuvc{Yf86aDey5_2Nq>yxDf#yGD4a=lsj$VGNh0SiVK@7DP zmdkibXRCvUeBv-fuT#tp*dT#|9fNq5pdNR`_Fo_P`L6Q6{g@hadmK zk>P+*CB(;xGEBb=n0`gDe|ewHj=cUEn;nByedjYXS5N!c!Pv(RO&ytNW$bYTeb{>w zw>gG8l*XR7bPYI+KkgvKXX^>aU#jBs<&P%@tOw&;FU*f>3ccq`FfE&K(hhZ81L0ICYX*l) zglS>TO$^&|P-&(M^p4G}6~#f%Yhz+pwx>%6nHGjgGG;H#R<~y7Iv>?;-l}$! zn(ZL6$7Vp$$K}79M$SMap4(;wwiizd#IgEk*(aiXyT8JJTcA%V=lfR1HTbv^nB1(cQtc9H7cWA zpHn|HYmdEWf&zT|BN%i}-C;HJRM|;`^f=ksF-$@H<@q03KO$uW1=Zp3NMWaiTk|_} ztU}!^?8LZ}-?>xi%4_e9w(>XmoisvKDB$d^TGCqi?o|42C{gS#nZsWKNQu_WFO+Zw zN@awx{+lJ7=`ib$S@B;cp}bWn&alN(L58my#qaf$QqBVgc&wRwJ=nq9lymM9D*FS= zJ1ZN^aWZOQX6FZ^QO$Vvgpnxg&NcX+sNV{+H1H;VE3O z{;LHa8gy4jz=FS?4UHGmt4cPZq0q=Cz*Cw{XiCA&D9R!RtL)8f!bd_2@|^)AI4dX& zo7al3EgXb_c555h>7m`Mc0zj_j!nX5K!|djUs0&6HSlAM4mQ|b?8HM`*`j&>IOq1} zN#s5wNl1qK43=TTnPX7iMU=bJ-G2J?QFhD*o1BQ+gZ8!iAX${QzR*q;`TX5qmVG#f3S*EGzeq(TmgCs zKgK(s^ZCmS4*%B)PVAQM;w|erYiW#ZGaBp`FtV+DQ$6QEVJi=>@65J=wHIOg5Vqgn zufFrOfb#Fz&^ggl{^Ug) zJ4Xsdd1hnhTnUmNUa5&QufJUr=VC#)%l~NVydl+Lq5N_)XQDr>xiei9F7c%;oOM-p z3}Lc}iFO18%B6(P{?{#>2?A1DrB!xHp*bf)EhiEu;%2r&E`H{3S~=T!RC79`5H=cX zjWIB1G8Sjs$N5uRJ1SC0Y(JA`uXCV-ER&87qGIp$W8trB+d8hWyerb`yQRq+| zgTV4QX7?v>Wa%rMQ#}a|rK%Fu$I|O3>?e`_QYZ=p`g#ywMWa|og3lJk{W$F2(SoQC z>j5Dm{q=#cuqW6D4Ij5~2Rn;+OQ$7OYBO`F%`BfoZam1TM0yhqg-GulBG7$;@SaX+ zN>G*KV>>vD348d44$c}@b&2*3bc`r=5X(^IWHsZ_ZE{*na$|M)U{t z*%O_=0QTAn@JsQoD1M}(Xy75D?>+Y?yDT`K{;SM>qZ#b4W{6WEmbh;se;5Je6D5&7 zvO=`@gf0+;Kcgx0a|?;V%_M`hKqUK@1z3O61y*hn$rZ|D2J1GD$rZ{A_Lkn^jMSeD z_H7Y&dQgJI zD?hy|K$n8>g#ufX@Ir#E1JxZ0eq(4f`V}6OOr%*9PfA6JR04kruv$|BhklS`kSR%* zg3vOe-&lA>f{PR)s=7#N#2x}uC~p!fPH;=46H=t);;wvZLw>x{DL3qDV;Mp>nOHsS zjv`U;kpx#L*dUsZ5H^>FucgHF=Yr|O{tEGX8?Dn6eorosh9c1zn!xmhMXrb-9L^oC zuzQ;@N*Jjq*i^!~*)cW&-i$C^!%Z4p#?vJoYa-}qLXiim@RMNO1}ewHZ4f9;u?cj- z^zbTYVjNUNW(YIs?JS!xo7O^O;Q$3orX6&L5p1q7280t7Pf_!P`ShhvZNg`;AD6hs zSdb^Jc5X5)5V&I0nuo4&=C>_YV!+n*5@D&T=#sL=S+MR{VHu`+@G8>1U$-ITS7bSS za4?~SO<0M;*ktp8x$6%YmeQ|xx5yzw2d6-pMZt}WMDxo8U%;EC6@5!A=0`@JD2l zog&4QtyH$#s4lkiA2OY>Rt(}hs32im%pdx>liOWaXp;Fo5{n7dRa_ylg5A=Ce&=6O zd)Nu`?{OybH+!6w0GkXt1$>&>!gH zJw8;1?DnWEI}0hx*+BI;DwuC5Pul0q4_=?X&v`yjS(BXEZs|LxAY*e1^3)F5eD^FY zYUMid`}DNa;}Z(_zqsvmN(S)Bs{F)V=U869!=HK2 zIaf8CxngglFL<@b@O@UsU$?Ytqu?+8)LBDa@CnArMFL90CUa@}av^@}sWafiac1YA zPPw#_&Yye!T<{Sjy^l=lP?GVd)6SnpyYhgtEynf4q>N{>J>Wa~UU42-)@9}QpPFf! zj4wY@!w<}vRE<}ObJfM$iE*w3RRolrJ|&f(jd#%~Xh?7+g)6lFcyrOh`*d(+pHJq8 zJGd?@A%|)h;oGF6t3L35?da-kQ{LXKR?o{D_jXZU=k<2QsT1l|Z&q~q1Vi8_M%Br3 zLV(Z3Q;oi$-17AebxlzR!SjD#-Siq${nugYiX6yJ{tdQ0E!j>D*>(QwDAypUSZ-!p z=&}`>k!+g<1nd%|KUn*uWCqaN_^59TV zcj1EuxV+LC+<@;z7CuY5bR*bE8*9fJkcr78m@Q%FZP>L1JH0@ci|i5?xU_XE*P}o>@Me$lCpjE5E+@ZxJ2=kYbE<2KfTNC2r@6YR^GvKf$=hZyi^i<5G(_NVT0Y&C zB1R5Gha}mKx7bnUTg(EaFCcCfiXCA-{Oo37{P|2*!TMe-DrrFpJV>)Tq3hPcy{b1v zVRiA;5+E^bQwfwOT;uVg7Jg~@O1pX>7Y@sit~?HtMlP3#$vGKNcJ2E`ZE09d9GvXyzT1U?^^bf8S5phld-(A&yw3- z!xy;vsNFk3vd!SLmbpqpAbNP2Yn{>$g3||O@BzzRWTJWFa@RyvRD5Ijn}KanKyAhM z7M8bGxB_^v>3!mT*SOk?3Z2h}Ry)Pde@?xU2M!CU<8QRqRmY%6x2=<1ODDlN>?FQo zqpK(cCogf%mBFy&ct}Dq-Jd|0iBPg;pU~Z8nN1<@cvINQrw#ZXtQ5%yycn z1D9sIWrlOgDd2+{z3c7w?Yo z*sZSnkzmS`gtHR_a@1f6Ds zWa_3++o8)j8#Le$>pWaVlZuWF`V!wL-SRtD(?)7J&_nIeUZySxg&X+yqGv4YuPE+S z=>ce0*iSM{7U7{5Jz%iBDb&SN9eTP&V#pcy(8`;6`PxtdysLHs4+ z#c;gX`TmO$coDS!J&cif(d+#eL)j>PY`?3vLW777#|!&`_g;*^i!SfK7>O463WSRkX}lunZB5kXQ&!6sjTJ)jymlv^w6Q**_3V5=5!8qw0rTCK%`U8FPF6^n8{o9_onyM`7%pCc`J;(v?X?C;~@$5yj@S$fof6Z5}MZxgD{jgR@!7>_f$X>)G4(QQ(TYQmMKhcAE^UJz;KSq*p|J~AV??@qdC0n~Djeh6o4CW&4?VKGVPI0lT;g?~Q1|8Tl#IDn6qp3{Rg$w~ z+*pUJh$r&8i>sy7s{Ebry`dPuLbaW-vThS^7UZ^lSm@>QyTd}2xtlp=_v?rt2v~zu z@qB7ocO;TIx2!uVNI?kjZ}@31CU<*HWL@9KBip!rACTK!!@Xa%q@Q}PO!OjTe36uc z?B%Z;Q0Z}OfEME*$}zP!0DzIsb{9`%#6uC>@~gaZPjMfLEfiy`xs9o@81yR)O)q)z&Fb_;5~1{J(U zmq(p6<`pn*L|Djq&}x-8DfZtGD}PJ|*#Wq~MeM{8Tq>Oa@GI zKT{1n&*a{hNlu(C!nZ67pI_6|%8i);gJ8?>Mu zgI3CB0SQ;IR{*QtGV6#v0J3!6z{Ha1x3h^Q({Bn)HnT1!*44zi5ryvXXvTWdrI#g4 z-Ja`f!6XdZbp1_i0AvKpj;8<69Cu04fnj4Xb}o<*nf1ltVf^zqbKTXALzNY_;@dZ` z{LzrEgL|jq)o{gbyN!)vqoK=6Q@3#>8IFK*4Mrx6^cd%D&}E&(SIu)blAzwtpUrdU zjh(_q*dUpQ^G~Bn3IjyRkJK|HYzEfvY?k66a!o}`HRMWM^Zc*LpIW+S` zkA|4`H#CwRZmk6zdFQZm*HSn3kNt&~x*Hpn$xQFN&X3NcM$hxK)ozl;?_KTgZ4vyu zz-D)It7_sD`zWltf8u7hStY-7=lGvjVLrG@Q2Ri$EQgPu+oH6qYg^oFRIFKbTXdX& zhCTru^f?ex_q#)RTiD6-9ivn`ai$t{zmPjBaW3_#z)_G*Txr)-gn2SJB>}8Pv-OIZ`>GU z6rUFUHgDY~mFf4rX*_iUyPE_pdCFe5@3Ur}ynID~C!Ak&<_3;a9)E z#_0B1+jz@x`U>$%tqjC04K~jk^{`$_bkf;K6j!8)m#%_di$XB+G?y=&=Zk~PuZ+K; z+mm18K}R1v`4JubM~#E0PBI#Kdj#B* z06+f+49sbRVnITHnru=v@ z&vB&WxTw}dvznfe4=EdJc)n6O&DN;7-|b0IPu-czF<}qeMm|v>VCrn2hAMQH~Oskl+f~Aef0H%mQ`N zYE|jdDodq1N241$0nCSv7!$dIj-M#`DlBl;xwRdOis+O8G-h4Os4bpz{%dX;l=&^6@Mm zkC^6J56|qU@#9hT14;m|B=BN^R~jqzGX5FUJ^R(R5}P{zHE%M@LyRqBmS?h6NmJvL zH}O2*A`iJ8=)B0&SzR!`oqs2B8ESr+m5*OR>~5*2AP@qUc^;mTjzMqdWlwLVz@wPD zY;SP$`W~ck53khATVC3Su+(1O@Ukv&vjN?QmTJ~e5S?gJs7^*x9iZm?(R5M$gJ(Wjpfh4IqLl zI+8QH0y}bk#z_x15BAy>?B`!(WbHu-64cOP#|&4n$Bzn}|IO4;{!xD|hP&nuua{%D zo&Zw7oWIQUgi%@y7|9u?72}DE3nw%}$rF&3i|oJ5^b#9^T~WbQ3Tj5=_i)=h00njK zm_jGyxQCA&;8lfmdmEQe;d3dp@ChaK*%>eY^r6?G{+G|UXLA2C)32y8_|}iT1(mcE`D6No`cM}osG5t33W^z{ z5a(&byumzjDy+{J8>Xe``s-a$i$R-!q!8JEndxWeyhi^_S%!)gvHtBiVnM z>Bp91 zeHYx}SMekFp_N7KL9L!0t~Hfv&MTq(=5W-~>isHG#&sc#cm4y(sWQS_TxoP8hR(^y z2aW*Jf~)T#5sy}~SK#no7L2w;P(3@D_{~gT@qq9v^O7UI4uzfX_v0!c9izL;>%h!4bRIMfxf!tl~D1Yl@Rmf zmgMLQ-jn0OmH(!e5gu7>x;P>(cilFE!fRQps;ZqqV_45F%)& zL#o}D=vC&)f1suOx>&@~?S5LK7ryR#HkzzS=a~_jDt03G{_K`z(E-D31t{0(ks5t%U2@ zS8m{;IHnBHGs9^-b)2`FlC8ndN>qQ0fq_6jb?JADh(_?6w?rdv+h6M|PTp`m&dJeP zxepSt_`Zst^hgZhjnlM5r&T)pZZ}#hLGhd{Qj5*=SSc2s`WVb=9q^SF`A?5apYfw< zXpwndsNvgQp@oHv*D@7we%G(N@-s%Dr&oq2j`vnkial*SioJ9merbZ&`=4UpkhCBP z##R}Dp1pQgV#M&-dEt8JNgk}+8%)r$J@HZ${}ah4(6b101bwCm@0ad%DkWNWc$@V; zwmQc@=1;!ic8tL{EoGrIuPDr5d(}&^dkMI);L5(#hDNUC&e|>BOsNZBBboiZDi?Zzt`QeD>>N6 zlQ*G222Rvcef_)fMPtxw2=t9H{d&6)>7#EGzLyui z1BcpMCuun;@b{wer;s3m{5javLJ$9<;4fHXF!E-fXvs13?X)C&uCH$iCx2DRm#7Q! zo3rs3Hq7I^i=-3in_K1kDt^`@n41WhtTphz#($mnZzP={N|&5eo;cYXt5n&0@0Am( z>NAbe>z?tslfC&BtUlZG3aYbAIs=;fmzjQ5!E*CaFQgFu4?eBnr}zBl$PP^V2=pZ} z=ue{R=5OG0p4XqE;Xl|PwI&@*mLMjVJW8%>0KYT^B^_im*!kNjUYo-5&sDQ7Ee$?H zpifUxesPMovcdqGaQ{mlIu#6HO=Thi8QODjweTtiGjBXq`*P6cdIQ}K7^(vl(tUYe z6*W@A>mVy%nT=okxS>{KQ~-e<|6vmq|43Pbl?QyL(Z3bqw<%AahD1+psnPq%Zs#kfY4qMpBEu|9`=k2W*_N-I<@NCU)3oL|JnPlfiRe@W z`a=3mLp|s+1oJA>HPViYA3xg;W=s&1O&PzL={u5C(m^SPK;C_(hCkeKZR}{^66kyT zhAt}pv##(S^*Eb;yX_$_YXX-*KchK+m8}eN_ChdqP4< z$fh>BTG8qE&w}+4=(}tD2o-KszH+O8_iOU?qRmUGqcIo}vU z`G8qk(RP`dXt;rDBPgh=3XEx}pxE&`-yoFHxx`EQ92D+fs0yzB9e> z$`1L4A{V&7f;XS-Eu{3Nh3Q)>{*I3j==pCx-fXrvjBlOojZwbWsI8~<-ySd&Km}c@ z+I&UyzfO;UPp+jBsvx?g6hM^c=F-L2wg_hI`TaIiH=%;5%zp40< zj|`>wwg+f{kr^6;J9F*z;ax~IK_lHqhVbSY-VRYD@_74r(&E6HP_+l>Yk6Ks0N=h6 z2Y(Vm1M>3Uv*+;fWz5~9QSJnlbyT0X;M+d)mg6;dXe;R5oyxvcRp>uN9oSAq{d%s3 zx^8;R=9vb@2=r9HOjc22=V_?sLxrCDFoY23v$deRiu$Z)fD>kG1B^U&QFdovvbfUO;~&5#y?dOG}eeN_DTz5#B&=ueG);>j~_7NA53 z^mCEmfrMX)TR+uEyBGT!{SGt&eNwW9s;J51KsxJVq(5gim^Gx&$pJn{-2nPbw;8QQ zO5gz&9{U9%5pz{~WQXg2?BNlgp&OO@EC(t01IoKU>hU-))?);E%6~7WNbQxjeP`9# zwDDL#5a_YeRuNWRUOULA%4MeYcyx6jcrSq-wf6=U_2B}oDm_Pf-0F|16+r`CDc3km zsC9VZV4s?omKD8g`8bZecpg(wRsQx#fQhd>s8ztFi;Ise!{nYo&pofcBmAqIe~H2;(4(F$&F3zGQ`O`~*dbW9P)lX&)U^YRkkt_+=~C&y$%!2NXO)d5a~oXp z{c^6K&_W6HrSMt3oJe_i^fIlk?#BMroF7_*{9SMOUUGB>m($igtM5UlLSf!YD0M489mzmyT z8{sD?MKQa>_+OyYKza=yh@_7|j`z8@x`OpGq|l=q_%4CISlb^aT4)iUYxAcAZ%!^b z07gQfA8tQAuA*L?Aw$-_ODn5=wQtOsg2WNT<|LA8c;h*KbRAfJ*R>kz<*?|nnP`&) zaXQqr^MqQCN3YYGd}4_cmz)I|BhU-AMt`fK&RM5XJ=DH{6q- z%^!aDHw;MxdiksIEn!vReb;LQ+wO`P(hkH3^cB^hJYTiR8?&G!>gjwoI@$k%Gyyl1wiC>n1Iuy;5V&o_9dw0#HkbU#_K!AKM!E zk2Yy7a`lCc!F!^yxdqS#_~Q#ADo)0`QVIVpofxb2}za;$F-29lWPfgH`Bt=fB(&heTrgwk9 z2W|FN*evtL+q@nn z&Bns##$>Mz^ew zS5Bk835w>{J9yCPWGzC%PC0;2-L8FMUE8(|FQ9u9=ri(TiV`6|zuDJp;`g>|^NfS- z-oD|&4iwFwJ>N%|iZNX&yItjA$To=(4yWcKKqn3>HYJVYX3VRz5#tYmzWVBYoEyQ& z-|W!RX!F@8yhLUQ^zE+NL?uF5W!j_=FVl0ddlDoe1bTtU_(^7d`wA*%{B*OK@A**is<^O5fW<2 zA<8JEL^yCPXxv6Dvj~dozHsMfEkdk_MU5Lnypr5;M?RUdU^i0x!(BCeLH>Xs{`6O~ zkvH0{%~;Pp9TUZq2}ypS5lJm*4C0ZGP%@jdu{!>-@BlwbSVtefj}`LL9L$S8)^c>g zux-_eU})$9^ds<@m$|XR_|sQN0j*7yY?*?j3zzaSMxbw}49jl%I)LvuU^Dr*d(Z?qqwMx7Gd;$Zvh2&$hnzKH()Rz&{t)_J{oRhelX&v z`@P{xgwva&{Ww=cXKOwvxFY@EkI>k`!(GdEMMe~b2s8Ou-a#YY;P$SUK>e_QHLemwNklX?h_O;lCqXl&O=!3c1 z_Dn6pIdbkuQUWEDsecdnvKV?FK}TH)w%D#k$T(-gfv)`)1E2Y&#mk>bJ~e91-9!SYce=q8S!`}SLr=s4pE#n3*K^q zbZIMwIkx+M@lr>Tn3cb12m_CLBb4mjiE94maAb|3rH)wO69uBrEUmStIBA;F5{@;b$`f4@01XbM^~fqq;%hX_XmYe{Z9?zQD7 ziDuc;2}@gJnoCef_qFAsmGRVp(4TB~+-p~mhfW^ael?f?0W@oJ{xZ`m1_F6|L936i z1+vnB1ClF3FXgz`ZE%Moa8P&BJ#gT>{J$IDlO9VC6RkY7metFvp3r!7$o!!0yx9p9 zWMmB`tPr1#Fr^?1?|k8yi$WyOkDwK5D!BQ0MqO(#|NVs4ORgVXxH1|86hZ!+1k+l^ zQ(uXYmsk2q!!4NP^DIOAAkcHeaf7u)KK%qHN@fqWx_JAOT9WcDJQi0G1wx?bjsr&K z#tP>dqmg&oE>$v;S-E!oL3tP>(3ktaW3*Vo6RalQ@}$N-Y4cdxy2#2)m$X4uDEsSI z{hwC(3i%??=kJ$n+?BhrL;5@DC?78VZsP|ay?UM0e)Pw!kFg6bitp(9@Z?fVhFFxc$8?`dsB zEH<2dH3~5Z^s(|7_}p{Y5ll7N3iF0vYxrxfJ~Q+OE0xERV8W@{FiC%4Y~ zFXl-E`c~enxQc)AYj}XnQ7x%=GjF+aXnq9xImMSH75w?{5?#6ai5*|O`ZXOLh@h4( zdlky41!E`o5q|5Tm0XSWU%0 zlxVZ@&gV4z;RBkLK7mo1KtGo}RD?!nzco zmr`e`G5FIa;AD~CXzMfQ+velKA)zMF&#r%Kr+k5LKCf+8AMJYYQEDipssMfEHtD0H zrgB>#Z#uwc$)!JFTC3(;-MJU!WIg#vq#5YNbQVCNZ0zK;~osom@;n%bA zyMKT1Cr@6?rU6(*=lo@+A6Tg3Z(C~1mz$nRyXq%q@1YmZ!KO|by_@jkl!}}BsMm_F zm}U~_vs&OQm1=SwA(XfMS!>688k^4Ykc()?-{H73>V>DSgypO-q*7_;UwvA8FCRqV z9nQTM9^BR@^TQW4xzMglqrH)4oJ|1eMM%>xd>E<7C9S;XC4>hI2L~h2uP4Q;??uXJ zXouZ5L{eJNJC841y!Z=G`PS?FPYY`ExH?Zjz6tbwh~1?a#dt(QsEIe&p|zk-Jl~d4 zMDz7NEUP)c6TYg)wEg(CJn9jN4;kD~@G<#=*U7h43Cz#`_|~hQ4f%@K=)r@(Ly7q- z=7d+u)v`I#@r#I4Xov**eE(iK2S18`|D9G&HNGBoBg6?SUI2YNH#So7vzi9x$=$E) ze|O7IqtFcr3hCNOmt?|s@lM?W?R?tz*`=`i)QN)NQUn+ua{e;YBYF`&ri6n6Blx55 zwaV^ur~HuLz!nG^0Z9BqIgQAcmtC_+^wg>xD>mh^&vgFv6FRd8=lxmr36xmvtc%hl1N9m8*+gb5;b zR2wbip;v&vWqF{?Z~mZ_^33qjZ>&fGL0KJsuZ@JCsHAmT$eEDrZE1ZfPV6MCFn;rj zH-Mkt6KLYS=W5C+5%*m`mqW=C6ws0WoDaH!&5aXcdUAy${M{84RoI_I z)6JWO+QWFAAGP+?{r|LeEnrnuSr|~R3f{}Tz`ggJiy$B(@(Rt;B#}|XRM6{;sFfn> zWLcVMHj|?zXk%*mN@Zo3lA=aF%Cd4SAC!}(X=r9XP(jPY2jUCV6lSe`_C5RV^9|p( z1Ltw37>4&#)ek|rmTK3Yt zj>S0BlXQ@Zc$Qme1wqDTo}|n<6KW=4QzDToa?CKUF#cIf2y{QqztHD?a$yb5G$irT z7i=$bg^ImMw)oCz{)K?M!z%GbJ-w;Y6?Z~TU7YUGl3+wo}j=Hig4U{to?yOq2NBNw1O6Al~5Mp zhsh_7FTn|sL^f}meisU1tbgrza{HSv4*e@d}enaiRKB4+z{Gt(v>VQ?gB6VX}!m7gMtg#tk)t7a=F>s zn{`q)nf3o-Nh#D(*w9cG76VDY@osk-65R$g+^KW&DGqXor(z zNFOU+idg__A2EcUwCzar+)b73@SirY?1sY3O;(q%(nQ$_r+YP@uFjPJk}tIY<85K z^HREY0gu1*PF zYo{+OQLGa8tu6X%E)MJ@^4OY%FTWYK?M0+z&A7t*;FMc);~@767Wr+J0}e-^0Y8xb zDZav|%o$TBkC+?b<3kc9CAp@Jw-%+Dbzghzxgu!9X#GA-h5V~{F(NYE)usVEpqx;)(Y{w-1B-H4?5h@OT8fI(9+7H7|&o<>A!^$I(XNq|v z2_FmZ9OYxk$0Pe~^$h6p)V6N64UY6eYp`H}%?y>-_>iDhhJRxA$2*NEvO?fIp%4mb zwY<#UZ*X^h9tRu}Sz+u#pwP+%l8e&w@>%+>(;yY4_S?@N^+BR^w;))SM(f6dJ^0cvQO}+0&5R$M3y9CN-`MT&pEx6>TdAP z*_6<0l*BZ<5US+gFF+{V{Ji*9oxLvnj8 zmEF#b1l5+6!nv_=B>U)l`nD6ExTB~nnUZ%4DMHHwg4*G&JBq?uBKfmO!|{uxqXdm@ z6#~8?A?;vw5IT%FFojPWh|3%VB#Aq&!o!8pBphXf{Cedv_DS+^u>nhk zMBdR^78e3bx`nu)s-B<02$vVe3Ywe5vE%>)G8_0%}8vGgF8ZOCYac^pxfK zp{E8@)6qY|qv%Ncbc3Qj#3u(BP}_i=W?eeu-BluV*B6>Ob_~5;WZ_ssBA4SRH{rXo zuE)GztiFq}+{~fBI-uh7kbvde`Ed=o!GIeSTt7?;WmtL3YEBV? z19)|iGX#Ge`bgx|)L!Na%n3r(L%yCBeiDppmW-?33g|V#0)ra)j7yyREpgBwk(brO z>$%qAMkSsVkYM6Bj`*M46JOBC`5~Z*kK+;>zo&QN@6y6r9~j+CwHK{M%6;VK*N*Sl z$R`o=OFHxL)jH|`eP zRQ@Jd(ZHt=Vg`>zyzuZaX(xUmcP&ex!fI((-GFDS!}i5Rt5!+S`?`H728h-;jN z_kQk$M~d7qxbEaR75uoy3HFb2FC}Em)h^W&YXrG2*7-6hR`}KHog5DJ^cUCb9U5CU-;_RyvAY=~++c@wbLXydBK&M4)>atDGcW zs`@KKG(1wc+Qb;iXm#`bF_Ym#^>8a}_E$9)3L@C;hTVchF51Imyzn!|+bz%*ovO1$ z2>xB8QqDxKWK!c9RIu+FXMv7W_4tE`k5|i|U`aKW2l6AbN^;M}!)YW>)$W87Tk1M2 z2NGOK=|81~p`hUzAL_&wJboflEku)qM79O*aJ>6Fhjs$M(l01hjlKUH7m3FBJw^ysXsxN5`AKK-Y(Fsf9;Z*HmDSyCiQNgYTd<+(2j z>cF@5_7=o2sTCzrN-n~gk3`;Za+VXmAB$B`Od1QMX+NQ2h-*@dP$Gtu5-{xqL7nh& z4R%ACqgd2}`_UM6VJ+e9Ktz!6f(K8;j>sC!M7FLrHEH-1XLBp;_TUBOqp>gCGr^4j z-pv{1W2Zpu0oDNeYkPP(P)%ZQ1|`ZzDMK|-7of&MRiLV^W^~R+tkx$bL1Vb>c^35xp2jIx$dh<)tYIAC2b3{zQTz z%Usu>K$v6ZW+w4JLWvnapO{MFIJ_YPB&=V>ttX;cLcYseT0k!R82ms_ug z$FW05);}a#LaE;!YtZpgP~z`IB367s%?we&OlUN|V*H&@7^-#=cjt(o5C+>-N3&Oh zoG_Xw8j+o_1FPG7BqW3_6n5T;+aa_j5}btk!bort8i!fGFrgI8&!qmFa&+e|FhbxSYUA(js^X3-xu6%)aEe!Yo`_O!D>Iqsj-dp+QRi8vBKze zlZN_wkA#Zep`!vJt&N)E*RGcSlINFuhkhqO$^-lgU9Qyh_YG65X7QEe!3s>K9RL6T diff --git a/settings/repository/net.sf/picard-1.91.1453.xml b/settings/repository/net.sf/picard-1.91.1453.xml new file mode 100644 index 000000000..5d1bf41e8 --- /dev/null +++ b/settings/repository/net.sf/picard-1.91.1453.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/net.sf/sam-1.90.1442.xml b/settings/repository/net.sf/sam-1.90.1442.xml deleted file mode 100644 index 918ea6ff0..000000000 --- a/settings/repository/net.sf/sam-1.90.1442.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/sam-1.90.1442.jar b/settings/repository/net.sf/sam-1.91.1453.jar similarity index 85% rename from settings/repository/net.sf/sam-1.90.1442.jar rename to settings/repository/net.sf/sam-1.91.1453.jar index f6e003657d94954a8a3443cf744e175f3db08351..d25dc0f66ea469b443a1a4519d028c1af8765551 100644 GIT binary patch delta 36363 zcma&P2Y405_Xd2-LI6b+=@%Ff5D)~VNC|K$ z(gd+nY*=Y_MFr8HiU{9(b_1gS@A;lDdS_>6XJ=-ooH^&6cZR<{Ds|-NQlafz=^j6k zMfCi*txH48Ms6Orwc%TlZ@svWh~!sPwaxIwF74B}VYP~x{j#$v49Ln(E!%Ed zpC)!jSZ31dFz3~{W5+8cq!T5it69x;DNWJ*iUUXHPMGlK-NVQ5tvURZpL)}uKI%oc zP5R4EI^B_*zx{CTnsm=kCgXqhlaC33tcj!h*u!2w8*G|vne6jZ1pR5TA6E_$$F?}s z+j+$D;WWOA~fGU!jaDQ0qtpDJ-`2&ZvzlS}w1n@jq+6qh!+jKs( z(#qu4Cbu!UZQ#O)qJg(FnsU2>s0ybwKc2KVxr2%OFDtFmJRR}zfmzutxs1A$pk)=@ z$u7eFIwqYl=`HChiIpcXcPdz1r3Cxt+cbv8GF1)a8dbeEjpNRN|5a_xU6xfV8#tC0 zlVH=Y^u0|#(oZILwYeM3F}b_VJ-Da35UHoA{4`%e;QeYPtzO*Q=3d-qS@ooppKR{S z{Uo*iHa$hiWEc&%>1kSEb1p4Vdu%-|(7Sr;B1>tRO*arszep}OX`Q-R#T%!8N8hV) zm$_)5{#Tw*NoRn`9c>=SgB1VijZ*bfIl>9gGkLJhLwKkf+un%r+w>8AY}326NOgFJ z<4TT0syvL5_c$J5^GMFKc@&Sfc?>Nwd92Ojc)aSs9JQ=+i1YOm0(>$i5K+Uc`nO@< zsBt_IHEZ)Eo^0|Io2N=q-={@3Eu=+E5yNvQ=T@3DaoEU84TnwalsD1lN9cQ#r`bGR zvU-$fnB2+cnVfI)ES_!i99n1dV{-dA&$ampapv)pHjShq>f|zxlCc@MeloORbj=X6 z5#?Czv$HKkp8;FzHERsocQ21`TvnZQyEyvO0nG+GazLCI8)Ho)hlW)w{uov=viK=J`1t_Kx6hlC&kbU@YzVL61|A%KB`P@X`7R5O=?bV_I0QUZKoZWiONte z?Zi_rvRe9}u9nhEPFA7xIPId{sL3Lf?_{r2Ng73a@YF+l@s5oGAyxxKD?l`9iArSt zP4On}vCI(>x+$s9%*j?l{mr<=c^ITa9reiQ3dKoKO6=~O^Coj@H+^+eE_2TuzgJncfq zoaza|gmBQQ9-9WEN{-^QLovR$ITaN~chNf1Kf+Oo7n~~da~{Rx?I8%GeBO1x_TW42 z)E>ci0zULZ_)fy7Qy6b_YScqTd5!aJUMk5?(<^ieouMkmnbfUGHFz?erdepACOn*8 zMbUknf^IJz1oEEd(`)dBa8-JQUWX4OuK|swH{cEBrYQHD^cFH(KqcsHdIu$x#)?5v z+;1zux4A_rKqX4$-lZacQ^(}N5NS4PuzmQQ70)@e+&!v55&wOXjYm$* z`>k6^yMrR%rb=Gx9=iMgOHzTvHVOF;rR=z z;qzcXy+@zHClX*ceGZ>N)zO76(WyZH_Nj4a<9zYHcz=RF0g*4$6_j-Xden@-s`lIT zrW6@y(xFa%fS;$8yoDwqv*zeO6)BIdVuYI%M>FXfeSt)YRzLWz)lLVOqOC?jHX}_P zVo+p)2Lt6P`U*wyKIk+O>nH{|rUvI(0iH>XB$08K;?U&9(ML@B`VPUfKcVDpioJ~_ zzd;m@zQqp{Um&I9nr6$L?@WVO55y(f;K_ry|5327xxYjN6`)YhbSevG|F7PXga z2Z`HJe4WI>TQ+r;xLw5QYEn0ox-*50YA|u)_+bMkP0X9X6y91bs^E=KWs`l4RsPFb zxXRRwDr(kHt;qdSMg$IYJ!0bJ=6F9avs)kTq0&z3@ddwi>&8Y;8^W%);+@{;*gjN4 zrnxMe`ciM3`cZ!yQ$mhfJkgt>c4Trw;B=3aqyaS0##EGN(*PQ5L!b_^X($b|X*ktY z;}$#fyIVa4<$AuUm8*arcn@7lH%YuECWQyyijckX$C7Ozsn1M3n{oo{`ZO}DA*?#p z2z=A0Q)*4Bg}W5QYlsS#9uStkk38k^J4DeM1-kc5suo7YA>iaMjB3Nj=rnbxZsCh9 zag`n(Eiay-aJd|U!cKt~`=)k|E_^owF}$Qj)n`n%_0S8M5)q_Pef%0YzJ}-DwWFcP~N>U>FW zjH=&3FQWQ1bF8P119})vq?(&bkE)OK>T7CTYb4nIOhn-FP%*H69NJKGtf7jFKAWnz zr#C6ke@d7N&+?hpeUk?^+Z89y@VymY2y`FW3h!+mxykEX{M!2a|4mvV*~A%RtjV+ zxyo|_7wX0b$}D}XZZBJUrA_519q-t*i~@*ni#lMVV`tiwCC*B!W>dEKa>Un%y4#p3 zE2{Igw2}dPS+b#Q>Ox)B(z;fuK<8!E^i@zP7cEPNO1Z`vZh?pyae6lzs7^kn6)OXU z(WGZ>sEHeF$U`{KiL*)4-YiZxkqp%W9o7~L#4e66m>J0Sz=+zTk|W!)M&+HJ+ZlZp zc?K@4fk?S~pvkHsMSEa!U}}Ts(3vCQ?Mb}?CsxhWd*j+*bv+#=FL-QqD=ya;Bh`b^ zuo+@O-YG$AsXxLrY7g@&7e1ZFLA~r7`2CsW;scl???9~ytq&D~*_V`b50V@cbX&~M z4N%;HfvRij=Rk@$MH}o4sv=0NGTOIF{W!GL2pR1E9$dxuexe0lTvH!i@%uG9J=_yVxKP~6p;_Cw^ zaCbv$zR7uhisZp24>5VDAKx$-9Z;MA50fY8*8zsaU(D#-0Ujm3(LNr-V@)219>={c z9xvAk7EhGxBtK7P^y(-^FOK9#{5*}(sRR6|+`@XAX>q=v(s`D}v*C$k^y2_OX7S@D zqaz3S3BMRne#+vfOrGzjDgtF>@zdgi6U7Tccp)z^d6C77{Zva*S|SN8HF=rI0Ws{C z`>6r1FnOh)8nZHamBp(~e#TEtd5xc%^IDVFdGqhdsKlS2TJf{;Y`w`FEZ*p+_B_i+ zYxp^FHc6p2i|={yZIRr!K7cDsenHTJ7cJfUID1Vl z@KYZ57H6Ny`~CSeRG#PY0YA;)pveaX`h# zpXTug7Jn!$dr=bqNSu$w`NZN+P5w-td@jx3Cczsh9jlWhW zt8%I+|j8G8?;Ha`A@!S@@<>{ zLV|q9P4#b+@7jFNP4g35BfceI$WZ+#N2s(=y{%7Ux@yzc^i5#P;Z9Kw>~h`KG)+e- zG!IiLwP=D?3fRX{&1*v?l&^Cnr|LS%ThVK4hGweB`es~U=aFQ~a&ppqfwVE1h<;S_ z3)+zyDD(0ND@3y;B=iB$0`%ml7A8)(D`vlaxl~F7SF*K8I9!g?ZLJ7=P*lIOwJ5&D zRMU~F^=De@e3q}MlS9$Kh@4tnA57A>*p0dIbEmcdiwc=W`8*SH3jtsOrR!09_D-n3)Sl#H7 zT*}r;YH&(RO@A+$lnK0jtg%s6OJ-fzf|jx5peGE4&eWjg9o4dJEk~}H(6o1hUyE?Wv!AvGhSDwg-&L;gH3iC&Fz(3&LqCW$R zKYUidO@9S4FBUcLAnb3t8)$X0rHAf8n_PWya|+YPPHG)kV;wINbpV761=#U<1HC_r zuVcW%C9m@d7ffKwKr=M=UT&C-haQ>uG^YrR)dKdhKd|?s)n)JJ(zGxaDYKXK$cTu- z$RU_`0c{2*e0(i56etA)6A0lj`d?tcCuPHbfACcrLmm{Y{v=c@AIU{v(Vn9r90i|` z4gilUijo7Ar3A-kaZ9Se(Hs-F@L6JhEXSePWw|}aJKvYhO*sKy@4*~YhZ8voCF;*{ zoQ$Ui7efhw5f!G80((@-+<^RdDAnZDdr+oK`E16bKq@kZ}Al|XS6yk3~U3>%Sc<_Nx(-~tXFz55_P0v zKFV$Snt*yBtpgh_zcZkYQ^gID%^9Fs(!r*3UHC+co5b~;qK=}F!lxF%@A^&^NX99o z-6g>31{&44hwlaS`i|7K<3c;LxB)i|oV$`8(g-+|+&AVX0sCrp`2G8)+ziD!uKHg0 z#so@Vje8KWIkyNrakX-^6A^itXzE?SVl8gPtug5SNwN*JQ~TOKNw(wmPLll&L4`w- z+$d1*T5@3ZwG=m3v{NS}r~$fv?OLs}S80F!c&Khy`3sNrZyoXM!CIH^76pcdUm7f! z1a+W>9tn-H>lg0=7!Q=YaZ+zc5rH3X)X=xns)ABCflDq87qb zC;BPgFtcylRN`>XMrhmNygkVYef^2|E+{B^ z)eu@^9=A$#LN~W69iV#e^doM&?+67UN6Aal6J>Id72Wd&oJ+Rp2 zS5Do0m1ES$N1)YIigK#x^kFt!tG#JU!~ZEd)2AL4o~fZ}j9WldL~WSnv9fTj={~T(I;-&epiQ>hUqJ z*XvTa`s-t^qR*$@!AhU-?{2NFy8h)hRq-<{J)rGSzFD#BRD!mE64hHN>b0HYPp!1 zmj-XALf3H0u(9#5-l}d1ts=@Ws)V-Oc0$MWD3zytnOY`7Yi4SDbf;wPdcX3`TM0z+ zOLR)s{}OxuQ!-V&vUb}F0rLAQoM6c+TCs3Gry4cZh81@17LcngAuU^Bv8V%eqE6~~ zW34y8r;?gzqYC5nLY&@+(+4*2AQ-iS)!HW7S}ssEnre*-<4i)F$pA;Ez@(iD3}9Mt zaZ~N4;d>V0q7c+nPqon!!(s%G%5Xrb8XBqHZM2GcRROP8$IR0L(Nf*WC{ioj;WdDq zk3nQcLqI%^FpUNx)P@RS-JxPwN;3rSdIH!U3<^B?f5*RSGN}e_wbY2{!h|Ie0l?I_ zw%TZ?uWU3s<#$9^?&wjuo6wb|GF8M?HdS~#O$0-7JFT z^csFQQ_Yy6f!3gwZqfz>Q)X)GG^hLgeQ!{mDiFc)OLe->m04OW-u`2jcG}>!>gYUe znO>DbRmUf_a{6RSSJR%<_Bijpf8}t~GAN|{WbTnAS9PzB&%hAVL@W<|N-O6Uu|by) zKX@0>-ZfNY6UdReOSJ@bZn0CNpD)(lcg6Jfvph^4TB)_fyB?*rb{Q4BCX_gI1jRdI ziM)tkxtt?atB(aY46b}eJ8C(VyZc(DY}IR%Rt2H>#LF(`*!0tHVqQcOz35T(j$`OY zbk)PuiLJ1ic5HF-KD$gl$qqaNk@X~foa~T>dl9tbH3OuQ4d6~wN1F5eW z{~21rpATq_IW`0;VIw~jPB_hk6c8e$fcRj`gWAz#KCG+{wbq40WEZ;JZlJb%FzfDx zQdAHe@}bsMM_RkWNFN~5hlq3$Xx2yQ{vQWZKh{3id8@knnN}w%29JqLVwtHF zmEh8tipvCBey-)S&n_HjOVqoUvqJo*m$gp$&AAb^7X6ZBKKBdp57?!cH!#=6Xp2-Bbd zfy)DRHOyU1`zhXF{2%L#PuL`gI~OrNL`+fSI;9_=R<$Ei5{IG?p=T&mE(b{6y{tu+ z6x14pYQxa=JbM58(NK1If-k}oRq2YBmRt?`R0rui5AS=gPX6Qx}3fRB z!PVC)ep{31Tb1^gmX)8QnLZ#ZnF6n5iIeT8C7dH+mBpz7-B{>4PDnKgsV)xkUCcF2 zu4Qsm+&8s^r?NtLDl5QK$%orgysgA( zE$_m&*kPww+}6*~SLF$oY*sM1C|Iyr!JUCGa~JV-6{njx-Q}$w;_E36@W#5_+vGkL z_w`ezk95zmw~J==Aj|N@(^c)DbQnGK#x3{$AIJE z0FMnh#{nK2G7$BK0zEe1CKgY!c(TP)ES_rdBl0%Be=$!p1%hnHk6H{!y&=z(Kzs<` zPoQgE@_DvAo#O|O<8jGlt|cJkXr5>BlV%|b*_WU8VbOJ=pZf43KYV~57Xx?XrMyhu z2JF~Ja%(5p<8TLiG+Bvvm7j+3YKi@fDd^*930dqwke0xX*V!SsR^?rQtId|Yju zt5pru+?f!}_UPU8h!bcR=lyXepEUUuwc6Byse)5dR%MMe$_93uCYI;)%-}PYp29@{ z=AJV7G?HcV4VwT+P0fxq)6{jJelK*56V|Cgo2J7?=Bb;zyd?vh>c$6ah3LIG;Z>V< z@EKcdQE^OLa0{DW<<|si8_ut*q)`1r#2Yrh$!`@BhCZQf>S}=zr6R)g;^p7wcO;Fo zHowj1Y%C97v*`;y4~M?AArkQPXTD(byL3)PdW|yb@i4u#@gAMC`F*uJOmCL)0e@)n zMVmiju#tYE&u#vM%h~*?+(NFIe8T3>5TAbI&t(-O+2%_^MmuTqW#FFh&ubH zCtfWK*DF|8`I-$Iy_|X_Tpv^73#NoFqsQkB95ZC}uxWXNQU{|ZQU{M2KPq=(O@NPA z`AZiC%=e|I?clHEdjw#$`CBQ-cd+aW5iI_me^9{)eQ3sYn}4KlB_{Y=Kl2Tn0ePSe zY`!T)-NCsKQp^;-O!+8U{s0paXiq}~DeXj&0HIqZ}TP5Td< z6F3oQsA^qAFBN~=y31&w1_JD zGEP^Us^~Sjyt-6H&j`;D3C|U{qI0dJ;;ZWEju4)#CvR7+s_LS8&8e#UT-EuJKMx&g zf#D{t7Zd1z5(7;#dUlZ5{=waY z^n@6GR=qV%Zx&SyUJeQ0yxxcPCfqvs z!xnv$=_G!-p#Ry5ST?N~?pno}hqZ|RYES21(#QMILmC~_I~S@xS->T-0X5{HsFkrS zT_w2fpx)6n*`uR#r)0x+%m&CT<-O6w7oC59`IkQRW1(_^kJ1QII*k^E&|+E=BE&dx zuyVN~1l6$8Bqdh};WUC+PNQcmf>=(YwHB?j=vj-_TeQKXjaWIM=PcS}(Pr^N3U8sU z7QJB6ixy$E2b>W&RS6<^J8hK|Ac8>}GYQK*KH4izffNS>k^L4y?gr_gA12HpaA|@+ z9@GEQ%snWLNk@V|pVVhCzZ{%!N-xTMH2CCcGW#W>arSMCy2{8=uNOG0ML=vsf`Uj)Z5B?MsHK;WgCnr32oFCPGj-4 z2k-ExO=m?QoTm#ywS3Q{_ibVJh}bG)<mmrW%@SvuyZ8kGpU0^!Qk>@| zweeHE0ung)sow1oSOI0ac*?Nemts|iuAUCM;T@|scJ&-Xb@lJ&+3Q$x(cL{2^tIjv z!3Nzu9lYEm80h19+keS-4`dNvu3Sem&bb9+~aH2IZNxzE5VzVVGG zLbYuMOYcNK%d~T#?+dmIrklO;TTcgdcDUz`^KQ%Eq8hD+{E=VIgLf^}e-y|cyY44| z;>VBlta0V)^tr8%sFx>r1jG5?1W!X(Vt%&wV6Lh^$s@Sc^hq9%YrXDz`q1tun1=Gp zc9IX&&o|VfQF@d*KE)$a?42o|XxBtsf4$w$>ic|8U4)jK<@wv|lyGW#mCL`BA(CI= z*DYCzuj})K2V5t<;-k52`Dm*U;pYkqey*_KpJ3bMP(M(9TOx#s6E04KIFTk70iDTx z7G(+ZfG6rul*H z7so!rUaVd*S%e9QWRaOd9PJHR&lsAR^p3W6ap}uFR3iCXPJsh%o4%>b}wpA^z zCfDlXz>vrg=6eR$@^Nk05pZBfWN`!RcIDO2{(M8pZZEk z`|=>kG*2=e9Ku7yFc`waLU=fj5ND(*oP;4f+8>G($4Fc;5G>XHNl#0k$xoZY6j&$> z`{3axJ%2MV3Lc#ADW!$t2^iyxg(1UBg5NLjTxYJRE-vys#p8m57JHhqAGXU}cB$Urg}2)Lg7{t( z-!|SZtJDeLyMkc=@zh2Ey3`ehC zKnra?3%#1pJ32I9fF?RIZxl8mcq&fF9feA?`8_DovWTt5bkNh)o#mdC$PY04bj3WR zXI0FUl71+3c|dNh^++hR7x^QZtj=Oql5Hvhn?IIvej<#7Wuj|;YxC!{Q8Y#svD#~e zZs1GMKWU>gjj7pl%;?~W6`t`$ntUn~jAhaZlfJ@Yqe-9Qf8;H0&A8ubI6VI!QEJ3N zfPWpF@`C3e^QS6vna*x2w{2~(+R_t_!;hVfO9GUfAY+CMD*?Lo;1A&H&qEcJjr=C zu#G|kENfv;#V43d>i~q^hs9awi69|?2ru6U;`6Wc8AB|fVj)d*Y*W>K$y40X@`_x#zNtFoZ*@yF-grjxIZIC=_B@NCd?3&aUsojU26u4CazHF(973{A5^ zu+J-=JdIbWQ>Q)kTpSGFsnJMEh5AqqIz!~Eo)x|&2$Jf_QoCODlmd!W3NZ73 zD`8g8!4ntC%7Thn1$T%E7HB_IMKa*4{s_~s4nukO^W zdg)(P3&Y`CmGU*r&Xs;|(cr-=p7riJ!+`<3PWAf&&7Kon`-P{5J5TQU@=7W7`465- zoU=9by2s~ClT}*0&=i0N$uHHN$ov29G1abC4`b3Te)BAK*Nz6QHL;$4vf+>xfInM! zRip;oeO9=)#DC3-pF_Q^T#Mm@?;rmlD-z8c>Bi2?_`k6iM|pGHHn}~$^S)5@htMiw zUkI3Nl=2QK1)yPcOK+RPCDR4yI}5>!TogRh(%aJDdn&Sn_hvmT_+Su^gy}a5T(r@c z)5lOv9t#H8IBLWbu<>gmz@SOMR3`&Zor3k;seoV}p@Te)j`8$hR!8r3jk~B~UA#NM z3%1oeUA*ynKP(n~)y2D*`K+4M&D+^61E0Zc{2JBc*MskO^Oka(v(_uUV}%)8-Pyeu zr(T-pHC2zE-jhxXe)vjNqlyirA3s-D5^Fz1rL^@GRsDNOlLUwM@>a8*HW|<{;go`0GVr_7v`zw^^`TzYo3ugL+_0ZY5s;}; zw8^B+me@}bw8aujDT3BXj2Fy8ODaafx0_;5CD2ZbUb1MHRcJ`1NZb^$rV41E`1Z@I z2TTf@Vn`hU4duNNMS`n`dh5f0`iF)VOdswos(BFWC@5#Am2Z?c0+5!VSi$zAywyEo zd%?^*t{%z2%v_;?*9xrMA1D3E3|<-MZJ_B;`&G$_-ZCMehv!gbvA=4n$SX#i8a>e) zW1O^UI>_YnCwhkf{`zI2w~j94a$&s+cPp1qse_ZehoeCuH|aD0E1F}2rSO`KeJgVw z;4LtDj{d_T3S2)?)-6KB6z}7Dlo%||TqPbzk`B~SZJp+Ap?8AkmUueDQ*XMrzAg;g zInxmWx~n=n-CIrXK|PiAXkoFMKkA)j^)ZnK1f9A%%NrF)xoWEKAN8i{{VCT8QK>V$ zWptERwVdH?5Hbk%nuxDFto5y&;hh^g1UDM!?-Ddjm7VEr5H%d)m&tuF5)VLtR*%l~ z*3+>IQ5{2!tZ@*Et`HoLWdcFx{WK9ik2p*)v|K*32{rOy*;}M{)N4Vn{vel!~AM*;JRQECO$p~k( zU%&TV?Uf-!D??CHX3tTw9f&ZmQ4>=sb4v9Dt_gS zb@ymLVjrHP+Wh1dt99N_-p|}U2M3oN*`P-J>J?_jv%h+mxO3@;cJ<8@F%0CF?X0<` z{K83U^j#;-NAG$kyEcC4{sHgZ#(aBw#XUn($OhlS!m@o0*yn}BnQA~5s$&q<#2&6% zAf?xauMXgqy1_j@;}KUmINGf7)JV*ikzUXr5>#raVX2}?-d z0e?YZwPgEx3Vq{)#`>2JgaJ5>=`Tw*uzRqTU6u&^?gn-b8%|cp;9j8zdE$=+=E)k||ZLi5ifD2-~6GvHGRIbq$$H+BSvWx>J zXmUKL=JXBBd5aUR!p-X~=o^cRS+aHg2Trv(&En#+mLn#-RcOEi)waZZZ^7j(P6sL7 z{qxR(*5dq07UjjsPz|Dt((Nnwg@I>rro~wnXIq?Oa%EwPyUT>5r4WUC*};UVB}+rK zv5nmnlYWvstY^vE;8<>8IxB;YZTwc0F`dEUZWe7kYc_TtsO!Ze%a6jmkn#u?w3An_rGdP;>EY3j9jqgKUcHiIDtQk*S(IB+U$ zZp-a#?5BGMs&j@sZEp*rYI6tfs4^3bSnl*-p(Ij`NieEJH|Neacj2xkH^x?V*{|N6 zHn>YPR}zdg6PQL%rZ_dWm{GLYBV+ThP}1DN6>RRqeNAjx=l+~)^8g;GY9<<`twFMj zdoT}CLlcb}`cM!KHYXab!iU>D0-MQ+8`!jsN2$AsMl}n|jW&?AVn(_#-PtRy))g~$@=TYI-hPm2 zsP+APek=pzt8pntji^~Rh--6X|M=rJHk{6tPf3Y~e(^zy(Ttyv<*g`{nQG)fVgl!l z2~JBjDn|FcB=q0Q_b{9=e{DADU6U?3Bx94VU|P6}Zlhu6;|yFAFwJ72+4cJ}OSW=zt5 zpsB&M=0>t*CPGWUlMY`s2g&H9EToQw4~N+M#T-bQ6F7M`Z&8lw^&IwB@`St&@w zj+$zagpC0*w}Fm!C)b$kq*M6ixTv(@pz)pv36J}C@vclA{Ch0alUP*N5f2cfiB8a+}XC$eZ{phOI)SiJ@D3qvyb0f-em4EKlm7d<3j?#Va@tdcoLg>h^ zqlo$k$%mC}GIe6ZNrMOHjn5m@Vzg@ah8dHSLpY7Xq*6XOfcCIQU3_K5DJM?4Ng3Ey z2)^{hp~EJW%mj(uS!$J1M4CqJVWWpltgAZQwxaLn6COyL6|L$IFum&41g_$VL9Xb| zR&=P{V)W>|@r~rjkh}>hGuJA5|D7=V;j*bae^`|hu#!%Ug|he|c@rBxg1wk6M!SV? zpfY02VgP?Dq7HF-fxGrA1#+_H77q#-rj^W6$yL1ZV93e(uv2vs_FA4%F)f6!cfm&> z@`q3*8&g@V>@9jOV3CSlD-G` z8t@$1dMP?=S&(tc;hl8RW{{$U*%+JHROtAMg0?1FuZ@{4)_EF7@lN{rMD6ehHAiuR zBM9MTLm@NAI3{<41XU35NK|D>q#}w{$;rq?7UBvs0{sv1z(5nsBDt{iJyCX<@~rgo zQgVQV_e%~!IQ;o#oRliNkscJ;0wyG@7J@3J6c*mXhq;Bfu%J~O32WfON^&Kkdk}sa zs|CqJgYvlD?9M>rJ=2&laeQ9xs6hWy)v}C{d83C+9EvVg zwp>dfoD)V26QHwfOSv<~=S`fPi+RK^!L6N`>e&@uY>asZQoTu@QC%&}Glmu|LG^9W zByku^G8MC>Mj!6s9UdNlEk=)>G_jLY02@_~Etx5lD!!67Wl#khoXqMr`UOT!6{>2B z92BRfjkQx`(g+*i9`5sM{$Oa64@oZ>GlI-&|6gM_zhqXea##WSzvF*$?y!-$14icI z8=_FbLa2<#e+lS6?q*^OeFI;LClT_oxC$*l)wxPfk`5~yEK9;Ig}bTfM>2#;r=!QF zC&{oYzhAW+Vua>nMCeHzBx66BZeK2qh%P<57P@3@jNv*MEOk+odPuB3rUyb+PI4Hh z<#30DF;WHe6rU7K24D$i8BwyYkJvKMFlLHaLGt~R4-`Uq&=BQokCna|*-%p99H4#hRUyb_o;+mO~zAvB50~9iM z`C3O>m?;OyZkXN!aw9EwKb)P&!jqEQFg@)6g{DYGx|5N+3fl^TqctQ@8`MWzRC@>1 zMn`1S$tgfOYA*&7Dicq1JTaX?F7=lA;PW!`A(4yYy9DEetSRmQoU#!`z);|IE8??1dH1#nZVrc zCK$Gn!x6E~%1H`K06n6rpMjGZOQqY2A+*gf#LX5C@~%4|Vcz1#Rks8cl^H ztc~&$+N6%t6UCPSpN@Bnq94n_7ln}W9jpTW>#H%m3ky>fX-F#QWaz#JDDt6B>r}@` zG|4EZ2>3`uxGg9hU%thgJ_z0Uzr$tPLvMQvi}cS&XgJ(Q+PEpEDtAhX zh9nU5#)~U9Z~a^KTmQzNAa{>2T0phzIl>qg?qD7H{pU5Ri5iS7QtG1I7@ z4=vOG!LDDq%N1vUA5`=~;>#1?VDSwR-%#=4Xn@p*j|PzQ0mi`eqH*Gk_X7o+C}DCI z0N9z-Pj04&GgX{N{FF3`rimT=s7W&bR9OV8I9W)kvvB}GaQ}4UV@`ngF`NMd8ayVR zCQ}FIjg^z~Cg6A}^}-C}xPP8W9i7*oR1;?!?elO5MsX@(L*l}5w)Q_(c&JxdDlLb4 z&9~8%@GTHu8S#}D#dD#J9Rye~U1AHT#s&zr9C_L}8mN(+0Wcp_78|EVt+qiluCEra zHA*MhSOr7OwS+v^(Q+Gyp8y`7XTzRvslrxT@#;WbtE}3QZ?y7kz@cTX5`C_VFI+v| z3oCd{XBh{qlHKk>u6l7akO>w7qkrLO4-6Pr0Mtc;3(%SRlrdTF1<&fIjN#l{dFC4} zQu;Xkq3^%jy-2Y|+rI}4{=Kb^uLPYTVg}~sb59#SWezk1B-%6Is03S1U0rFs?3fV; z-(5UqP$Z6|igey^bcAQS`XU^41zqGUHmFNWX451ee3KvQCs{N_f~Jb|h*j8_*r4rz zjy{@Ic#=aHO|s-LhcJ5FlJgwG=QvnGYzm`A7A>}DiJY^t)S_kG zut8i}Vuf5+TBIb-DvMTI^o*b-;5MbuT0iO^5XL%MFSi>UdQ-YXZ}Q`mgXc}!BHw@* z0YZP-YSIhAS63NBITYR(P1wSts1X2N|pf&3QM}FLT&`J5u~U?HqI6r=P;XIrlVl0 z*;uhWAw4L&bdXL$na_aMRfoySfF3Vi8-n;K3wHFP!-i7ttTt0>yh5j;gAEY-1TlhO zh+w5k#EsBsCAXIs^VL1>|1=t?V&kd zPvrd;_p4XO8QHeA1E!e?9s;`NP?Lt41Z@z1CeU#xB^?X< zYC1$4@>By~fZ;n@ZF#}y6g|dSc6>Mn#DfZZ(Wp~=ywiIp{JZxKd#Te`XD~C%PYx}K zQnOw(($Ql78_deq*C)abY{xn~Gzu#z& zTx%UL{^uI{vj_h!$!!al4>P!aI$0t&Jtg?_*_)31n@%Kz^jn;3H&cXlGsU6rTSDJ2 zkKC|}xbA&;ZVJfch}lq_M&dLUr-?XC{oIV3V7C1V{H&2|w@**a{DRP>~6r3S~@(veggg7H53`G74;OoE{ zBhFZH#)>oC#735T$zINg^iB5kUyf5$OX5=W1d)43qMtPPZIFzvY}LV(~1KXZv{$ zKjy zz6G&^r>f*j#w1P+uDoQF;Ru}Lh`HD1r-UaCnXT?d=t=6k%f{>?^Z99;=Q9q#TqqBh z^CI==6(cbxoVM5;3RqK?jBK352WlVbduhMJE3$bNuU3CvF$Q`-DSA}(ziL#dwFas1T3%=Kv%FqNL>s{$;!P$ar;GH1 zjZ@qpy511l%zVimV%T&DMZCpDO@F>>MCtQ+tMXnmW*YPP1qS!I?=>S?ZMkO5ih0rI z8T1L%h>m%=gHrRxj~_E0A*%KlM&JBxz_2M2=VQ7CU)gf^pKoW#(=T|ZP2WoquE=>8 zpVDVGzl2*3@GhHo!@)`cs6=~Mfv6-83g|~rmFQ=i_tQ-ffCxnNAiW@br9;@20ICw9 zbcxQ1oWsU|&i$p4fCb*lUmA0LM?qU6oJ4XwkXtiaohT0EV0|w$BKYH%Mr1-6&ppUl z-I<`ka{^QW$E0uL8o|LiJ#c}4uveA3XVg|D?_p;c);&!%^PZ6kYRme2Ml1E~JtKAn zo8wI!K!C(nI@`ZjEHP8f5EdPJW#FgxV^+LlrgG?tpUl?2FwEqbZ9t!H#JG*1M{#R` z(Xoonp7BL6^sspFZ$WZXmTJF1)hI7c{lnf`Q^ zI@aFFETn^Z(`7$4zV=g}hFIE?AJ~tPX0lq`-Squ;R%p=8Z0wfqF>S~>wXnY_+ZE6D zH&3~o&CA)Fuc_5TP2s|SJk%`W?%00x$JntS0>k~#kPVWFI9|%DE)NG1S!}pNFwYon z=6kue+A+qQY6zqSagbMxwQG6V@nJh4)XZ$7^gg{U0+|sKdJSZ zW{!+;s%k_?dUJJQiSvy`mYNkh@3Ph&S2;g zK3I)VB!-J&08}fQMp$yfSTup~IGVnRPAwbyS09(h70HsoF2OKaq-LV*K>=Rrn?DQyT zgRb4+Fd1XSbXbW|gI$Y?R++Blu*Ny~S1o+T%ney%gN4;X?1|Z`LoUwRE39^3Fky{( zzymRUVZ9k0TmP<@c_!>(pJT={j27Zt_4j)7)!3TQhUI8h;bs9zhQU*XS^_x#V1rpJ zRP0s_)|v;VeG`?u(JYhAA84keObnhdiz)&5;5JiVWD2Ey>?%w2xotrB*c?FuPv zEnxY#gxT2&dTyKG)Sc!V?qY7{=<4-M^mJ2~wdfdie@&@~nt|<>OR5ux%%jeF zM*0sqyPBdppdzu^e2!Ecx^xO-}| ztNqnG9bzHf5Z+s&>YwW?Qh4NEu)_ythPz|dI1%D%_dk>|s`Q zP8qSm`k$HeT=LT!(INTj+$9;b?(-|TW~{n#$t)6FbJ-m4^2I-TbMt~HQG4=htkQbH z{2cooN}l=!1Yf^vPVpf$=DQVme#WG?SJwjsSI;eI>epsGYPjatW^xMu7VPe|Zn2KT zHB?*EN{c!VP5(V8_ZMjtEYvYvmBt3AnO2u@-k{FMSWOG}>;^Fp90ZU15cWeJR;6OC z&hGOHYHO?&Ul{2)BAq~_lZf<6@La4lp23D_7;g=7$$?ZcX-cpC+ zt!datQ#rxvR~QEeO{4zGYfKF^4ZD9m!7T|^Zx8ni>M2%LU9X5vT)VhcN}r=m4)!l@ z^>Dx1_kor3ELIVojc~tudRfa>^#=Nk2Xp{B}eqb;->oA+fS~SjjfM`q- zIAfAnz7uGwB{;@3nr_jfIHiqdSPqNBvxMr_K3t;x* zph76BWrcrOR*o%PilgLd42)m7V(VoxEf*hHY1xhm>}>DBO0!98OlMyY6l{7H%{~j# z_i<-~k0ti&bHUe3TVwQy=S(4~p+r&wWTtBUXf;v`)2+I^M!lVGy%x9G7N#vMW6;*b z=-eiX)(#lJ+sj)ShHLq%&&yk7jXkuNsieA69@BD(46CaS-kKWowGnGz8xWXlYITOy z$q3ru>uguQWLO=nLv+}d^PF89c}NATSm@D0BiN>su!NUYu<9C{TuT?nqQ|M9Dp)Oy z(^8aIgLNueUm3;wdGA5+Ad8+bL3=s>_lB`|o2(FlXfea6##OAovQT7Kg2i)JOhX z`0aVL?)STpr~I;I(50*P6LqKpRV}gfeynQkaYtw3t#b>oQ%iPhQ9U=}sP ziT$&NwcM2;Pp)rYd9Mk&t0_yiiE377#L8@DMW~OOIB9;}#M_2yg1N`7smO!d|7 zewG}N@OnS%9asB#cJ{K)YTXd4oqWv@tBp%9`f&M`=-iS-xh2*5p(YAB*cba?2yp`; zhQBV(8{)hv&RgQVEzUdQ08?sB=fp7M`fw!Zyf_!cc~_kGuun^l4?Snn2QrXw{m`O| zvHcYMS$dX$)d|p8zjONY=x$)7F`P=Y=iaFSHkOQ2!e>PdP>rF zK2hf--RmLf9Y2cmlV3KwiX(g;lWzHe+n%%NSGoS?2mJa!ODH@Qgtvo#xj|CC?fmN# z8A9Ia@wY{H{YVyOCdOeype6FJPSjyAB0PT3S#>;QP*Dwe2u<5!Y~k?BHN+&GG->;B z^h}uK^O3{|Hywfww%e!+qpT91Cja=f(`yLv$7k&gp-Kz{{~PO$oc(4&$cwQ3uHGq-cHOYnT` zQEPrwF`H9pnaQA?q;YXMo%DH_5>`s75+;|lxfBhxxipt?w)xteE?Z6FZ7vT4l{09G zP)VDrZ8NOm7H|ZcD{-cJe}+{*G>bEB7|Pi&;xL^7n@Zrys`gB)n7$c2gFZ8@sh+9= zmTqifl|gTg)bWp31eONCUF~c|syg|WS-pmhGk$6@+4s#S*;sAM<{liQFJTpjjj1?s z)Yw78#)BASGyY)KPLE+@HduEFzfeTyZw}FvR6M*C`FXCpKDz(5Gf-|RfsKpDc9=ub1x4^1pzJz|fop!163#}2M zJ0VLTk|DQy(QdVAp;ZYB7ar)rB1QH(F$>fe3o-lcR9zS0?R_{*bt~;xs}@aZbP{*S zng2@sB)#H3a%h_WdUT3TyN`x0u~K54(%=K#?~#M-<-1-}&n~fAIwB`)PTbGx#u5u> zra4Gjo2Awd&Vw`W+&ir5FSEKpqO4qI4YeHQD{p^;;C#pf`85?;l%gUwStaj_vi@tV z1+G=M;gJ&6Tn&q;{X-G!Z*(MA_l=maoTZL^oz@k*49)qnTDsM$3sdHvA6h3S(H5X~ z!%^ahUM`F1PVh4l@?c8ES>|%TUYren+DOld4?U!Xu#%p)1XGR_Ou2=#Z#D#A$Trz4 z3+#BO+`MGcE@6D{mb1Y3i~bG+U>CV_`yg=5kE$j>d+4BF{puA?eqkN)=yO9cUse;KC!7Qb(15L zv263HvpZhh+iATNcFD#`l24m-#g-!s)RC90Bm<(Gv6g~UHj~`$fL8XW*))W{66fo} z!^^+HjxWNFDl8<3ZT6jNzRT*!->W^FttQqGfo-nSk0N7|V59xyoN1u!-B!19I4t!S z>L^Lw66aT(a6pr7>@XT{W0Mhle~2>_J-& zk5m_yS`~s-_E=y$&QtvkS;Y)!dteT!#|~M2b(BwidB|F>!+uoL4_g~Uu;hft)xarO zqRJexlFg+a9V*8%)#`|qX$G7@wOq|PVr4sHC++<2wbjcDUBKXm30 z^~6Q!%+08JaN9*|yt{3r%IwGP{RxxiPoPlFTt60K_{dIYEosnq)?cnkvuEh(Ix`V# zrXgHx9Fz8)Wvcl%Ea755d&8RLQsB~lw<?$5fx+EdlYj|899)wc@XBmq)(> zn%*nXg$7`}&sLZJauT`qmv!1bD6ZENJx;0Pkv`cjdppwC$SuGl4LWAbgutD7-*}9N zcC@^#aitgvw~EXI!}@F0!W>6Ea46; z`GqOC+79ckx5IG8x&w5EHt~-q_{V1cv4z1~IS$?zle2@qzQtH;i1RhK2D*#Q{)8Ra zACqVQVIsBe$`tSUHT&}dMY!|3W4X%%V&7s5#K)(=F!TN4on+(omWj6TBr1Y2}PB1Q?xRH z7d``WD{35f))Q`GkqL8xia5D5j{o5QtxQ1Q%4-}@)TA{YbvC*lO$EYvyv>u&soDF*(+i4$~f@&3`g6N@e{$zQ_v zETA(l_NPHGZWj8F;r?WtJeh?_##ngI_KEA9$n)ZSb=UbKjrsVOna?+;6y*Kj>)MA` zR+tdZCdK;}(2M=W@J$@*a*gSC$bmf#*a?B853$7*-xiG<$>NGM-6N2dme6d4j2fP9hgk$xO; zFNz-_QY6&e8Um>h3)+1aL%YgLMCj^xjW?G^;8r~ge|>Iq&b(gKC0~SO$mZ?{W?Pyk z68>>Mkf}5~U)(8?<^l<3cbO*={^io<^5tEc_m>FVC=twa^N+8U_y~A*J&iwnXZPjs z*t@^$f$x~ug`o$#VU~xHR9+w|WH#}o6SS=W*;KR|p=39gDPe7;HObdc&<%du??L@G ziT+!wlgGPt0&OvK9imSpy!r`76BUa7G9&q-fITSN?CgZ!Ol^gtRHlr%Iec-C?AzWvVb z17&!pp$|I&3{7@+?fcJT)K-jz*$G3%9TLi-vC!1^vFmMx7k-YThS!p+pqAS4HaoJXiH<(19Aeae^o(~k3k0N!V#U@(v#7f^Xt>umsWis-iEWqNqV?DADQ0im0@aVNcFbuqi`dS>b4~P@0V~oa0p{-2&Ss$!6>9+LEQS-r|D9u z7%o$URZ_19AaD-{zKQol!aFy>;GB(tUbKG$RJK-*5}_RaBuhNi!*k)i&=jC|n*up> zdZY-+?6t&FEo?QljS%?~WMop^=HM{pj})V1tgMnc5n0S`s1?#*b_QtIzL+#fq=x9g zNRcb?MnBap{cC^a;X6{-Q6fd+SxWT+(tuqLEBv26ukkrTP(u`NL{|B9f_Wn0r(Ot9 z+gOoKE8hsD(J6S4B|%E9UK%Aj#cbN60ViIE2ye?CB;*?KyK82tUa{+Bak6GSZN0B;Rp@hH?v;do@oEdI(1mNEzvt;GjeTj z-7R=%;Du=3SUnyz0(fqR)UxOnPmsfFf7?LMNZz@PaqfEdi?!n`{zO^4rGcLK-5@ss z=KPHD>6BOt(ZpHfbW`@Gr+ViO9nS1i_~Qp+66mEU-bmWH_LLl>g7M-GN~m#kocX=?b2BKOMMNieU+hGWM_}puhXRp zQ6$Y{@vi_{BX-XCW395Gx>9tNP)iZaivIM23K|s$3(~PlU7T1M>>5Un?CkELf0Oc* z)YLBJwvuE?hgoqoVg$uW18?nV=>(lLMk&WFNx$Bw@KN2IARV8eZ8M@du5CH3>?s{G zI%fu_M*P zCsAEF0?nPGJsGJOx87#p&LcMZKw4}fJru>WI?Ml`U^dn${5F9tsivTcrs zZ_n~A+43WdVvAe4(wBO8C>YcueFu(K6#qxDhE!Mu7j0h2XDku`Hk4Yv~ijU$-$nDMi-2ep-JgS2W>urHmPCE}_53hlu=y#x=X(ZKOL5{~(W@t~Rv za}LIxNj$v+cO{5%vRIq5FD~kXUGh`=5RSrRNUU`>!HT{4kitS09PQ6N+FCxSP>5ca zoxSS$o7P+KXAeLjM>J?p<{X2YCPR)&PuY}z2xa!edql`9voX&e3sTPe;Dlw(79CW5 z^$s?;v#s;M?m7f*v2(AW9QiY%jKpOQtC`qN8l7nQ^=Z8NhCwm1%p^8DBsr5`aHmqnkwLBDbQJvn#Uj zk9K4H7#XVJCcC+`b+ob~N>q-eq~+RF$x?2a15^K5qd@y-AnLE?Xj3h|vK4jye8y); z`ZA5h8mi1ONmrg*i|}4);P;(re;tZ|*Z(nbIT=GwcJ>tL5rpw8R z{63~MQ}$eZ-^@o92&Co=X*9E#1IST!_L2%1Z8E$A&2cGax!LfWU3;gtC_EU%Nfe5H zpT%jlvSQ(H6<$(YXU)@D!OT@^oCi}MepS8CKM11w9=wT}KShY~CZ>mr&{wgS`(V{5Ht7q$AZ`FGqGTXdo8Y>md z$NS2Q>OF|8nKDwtU3Hs*)=v~vvRQcrMdF#edJS_Mc1~7(uJGWoLgNNYBNA`XSgnt$ z_(I`9RSkKpR<&X!eHu!uJ4YzE?DjG^-j#T1D$EI+q*zEs6w^;MKV-Y=1LfvJA9eGxL`%X{Q|$@BBJ zQ<&fP6_(7{jj+*5Y3X!Op>X9ldzn^&hC9bTctdm ztB+5o5leLX8j_&f{%l#t_&)U10&NeIg?wnH*cDp#411cPnauuxvg#7Op~xJfB^Zve z8*Skqf94i^b-ms>v)Dxze(G)ypKDgnm}Ep>*CV-0p-Zge&n2)=Zb=W1U<8tpy}$A2 zmzAf>+v5|>M>2UMkC&t1axEkLrx@RvJ~{%tSfIg^bzzc6i0T^jaHO#T z1KLJEZC@_B-8P_Q)<*ze&(0nZy&!#LGQFTPiRIcv!v}GvJiZ{ukjdnbkX)mewThV` zusfQdrvqFdLFr=ydluA{b@bp-k_k@HHz$YT|3Y P*L+LMQGJUsd-DAcH_8Yo|#~Mp3nQon@{G>o$Gq;(YuFl6#wVr;-PI@=pH|jMfB?66^lYk zg`OO|qW=)FPoqDQxE#bN8RbRNq79D)4%d@*H7J{U(R_DT4m7*v6Yf#R;=G+~f#9#d4&4NBJq7qy0313!!|D zky2vwFgA-@_^BGliLbfI@fIieDUG8oPLw1`65dQt!;r~mviEMmpL`V-C&lDcKV{NC z@{wk8x}VB(kq|D*#Y`^lr))0a=aO8?hmGx6IE+-8rFHQw~A>5D~ ziPKmb)kMOYim#bC&HYriJfrsm*eg!5I4R20(?>;BtXY*~wg5wVkL{%>N@BlTz2DvnIWT zF2R!@S!pcuWBD^H7iHh`HjSiFOjQCi{275yt3+_8z_ltZxpQE8Oeux+6q{(%?{v+k z8+6m;E;e_ir%di-m*AL*Jp>Gc*;S9h@|R7x3O zaz~qU`SHN$lhJBdceVlxvn(fSpvi-59?U~j)^07>{=_*>6PkPNRJMrdE@hU)i`bxn{}5=@|g6DNi$7) z7DH^(ER$y6re-G1A(o*H1afk83~b44;j{%F`Ak_faKkOSe8M*m`VWc7q&5HC1G=I2~WpyL>daH2fkd2jUKJK>BT z!FLiqtW@}3g-@q2Uh9mghYIm3r*B>=#?RAhbP9{13Qu&#)}-qE1f8bon4v~I1Pwon z!z`5+It#?l#j%B6M|mM!g(lJ)@a5T@LnG)-ctg1{+I^1RLS=KQD7{VZpoKD7v1p3h zx4r0_+myWpEB;@Kxj;v&z?ya+!dvk`#byvKsGlRIDtS(CcXP`psn$ePzJRbTV zeT;YyUBsrI*1DPwpq+oPbuX?f(~KU`AI>yusNz?;gfN|==_|Snp9dS~efkVO+3CCJ z3Va4t!}_^OrvjbYrp3P-?@RC{_!Iq!Nc=f{fwqpt%9$9L-*$^$pCSS^+tton#xK)& zRMZqeq#WhamsqMM#nUuY^EC=2fU|orafP#prCBTJ8*Bh+)-q=~d$1Yi(6?CP-unwf zQXRu4=7PArh?CJsCgWd<$FyP@m?`w#Z7PC~#A1I_+}|kjdnD252Ru0c0txL`HCgKP zF&&^sfQ^B*mrg&y=K-*&=)gLisvuTiod@|EIIvE@dE@&{Z%o$%|FlohaeN1oJ2VWr zMNgq&njE}=p&e@LICcYD@x~b(__0I7z_gBrSCbPzP@`kRkUvl|WBUHYIWwnYPxCLQ zjeiGz>R2O85Z!98o>{Dbrz)SiFvN^iY3H;!)no_*yQJZMjAkQ~fd6RJ$OnkgBm|?4 zlMC2SJOTNMqm~lVN}Sf>v@xlzN$vdDQ;jTYFYgYLwxjqS69>6$>Lg*E#pz;FSChIi zg$&ObH*U<}e&fgGj%5mOnScGUsho(o=7@F@HD~fS%pEl*w}0PpxdT+>&w8w?5N|{W zUg=ztyXT+pJc^ATHua=l4iXdBn`+9LR?((D)XN5N2PV^>Dyzt5Rz_fT*P6)#C|3a7 zKpQ8^ARC}@uuVf~sGRTh)b3ay6&Q_#{M2r*Yo#lglyw)2MmMP}mYWIC4zGmD;>*UU z%4v<-0ueo@>6NH*U{TKoW_5(spqhbCdv-{vMYZvjigfiboHECD1|UuuJo_mo(7IQ0 zWt)lsNXef~b>L$xin>VdW^0PKtl((Lc7{UbwVz@GYkQ@&jxNZViFhw*k@Xk{W)4+G zh9m@OR3A?R$JY=$P4ID`jF_;hhVKE@B@5z4-+u}R!h2T*LDQu7am@n7RKMTWmid9O zel-!+ykA$t3A;Aee>|{tP<4cTJ*d9s5JUSL9m!NPx_OfWYn~4abR8OYkF-jAFrgx4&sLqN(0+O9shmiT4fHt@7nnMx zFjB1=v%=%#EI$3mC4pKK8YArK34cX9tGVa3aTQ<15%g-f$~s}hqNYk%$1nS+1P$_2 zNg6D^A>s`6W0tYNN6<+593{?Z!4SrXZ>*jNx*xInq-X+NdVGk%&$&x&uBIJ2e7IpRDg zWj=4xT$ARRG+(v+QVRr<=MCir`LpNM(_#_65aA{*GJ*QiVx(l6sZRC*<$7~(`9P5_ zQGt>3%Ln=`T*FVRc7JIJf#gL`*TIIUM%8U9O&KV_rT{HLW?Rm08*8|NODu0;4ub!5WR1!=3;Qu7L;C_BsQx<2 z4%ApauuwN_PNr6f2ihD7Z+Geu*tdL|-V^UCE9&a#Y5tQdT5##!K!ZHkEE@nVBu`OV z4Uka*+omlDtbXw6G#dD0@4&T{g}HygzbXZ_3<#82Rj*<$pona;f!I=nQ&q!!4x`HT z;xVPeWa~Y;#V+<-UNZ3FD$uF+icU!zin(~wc=Zmw~e`uSCU&EiuQpO)`47N0fwbw5qwH$wPLK4nY}25e)jH>mH3wE8)%(ro^Re=+&C&38~B|Lc}|*VM??*e&xDThsVAuszaMc&Oe)Kgv;o zI)~a=I(=aSd;Bi2^iYS0>x8ynw>6LEMLRS@ojj{WtIOlGm_XWLkI%H}8vTS0I$%|u zAMY*awKYrgskLu&d|>tA!j|8uO$!N}JY0!w;cTP?3LP14g=%3^V)z5D3Lx@PEkc|~ z7x;d7q-1IoXW3db94^C|wpIu}V5L9US`7aNOeAn?e!5DlXsNcArlqS3Q}x(Dg+1ZOc$03q{kl5N7ou*Qv;0U{ zL@R1*#k5pOSTa!OSV{dat$1M2u{tp&xTLL>(BPDkF}@}>lnxv|*3ih%Qkk*>?;J}H zlsKN@16%JWTg%i+OX(TbQLU`4mD9=xoSN+&oG8LS zs6|hJ^>*XBRw)1Ci66?IhuaPdR;mUiD=@|Gx)%U+Ii?U*bM!z7(70UV1+XL^rgyZO;KtTwxIs&ae zn4@DI1ulM2!^BXd&f_X9-GkBpu)O{={Sr9#dC|bk595IE{}y=Z!~D$OL1_tE*a`I3 zYk0qf$48#Xh>j-x84B`BP9{P7f2F?z(>{7JwI@wG(AL z6lJo{tq!bd;t;k2Yd>C5>Rv643u^gq+c*^M2}6JTUHl?69Aq8?ZwuiF`XkWplTzV- z-tVNwfC%}sKMB>!L~|jKJ@3*$j)Bid`@wySMa#i&1vg847B{D|9EY8E;!;vx0w=Y_5nt`=}LH0>Wlsy*A>?s1*buO0L3H1MRL9%D@z3 zQcbRAg1v5X4U=mU)x-B+=!CQQxEA>ZJ_iVog#XY>%k`o+IaA7 zq-nz`q#)J;XS=@BWf|U7(*C6u7)(r}c^CZyWA$wrTgL@I#Nr0rFmUW@Ha7~C`@C=< z?DN#{MzSdo)0CT`=;P|8k{~ByK93K%7tx$s1jc_}DaQFmEhe`F1B8*K6}QH2a|-*S zusXlqG9@F*9*WOi7Y*n0egz%4qf^0%?bxgfzKC}tFe{ItPz_x9{a@59^(F1Amq1JS zOa9tpJ&F}J{-?>m*9#HJlkQSIqlS9|5l1}B>iCR6?JrH>i;cc~53KS){LPbk9SRRz zx>-Fx0jD3&3zYb|c1W1BFSmn|-4Ph`^9KGt5cf;2UV?mqykFAuH~cbA=eGk1e>|oa zAuBNYkHUe)f8>V=ffvkP()Q(eqL<@c>~qcYW?u zZEWxvroX{)YEmrDxTA+S6iMqvb1B0qph3UnU(~1s27VmeoxlTKqOfM4si#T;XvkAs zh9^n2t?M&Yi&Q6j*HqShd_?su#jT@bv2F>=$O4!s3hX_DMyXdz0lN|c_g!st12w-i zR%>?f_0oJZ!69eOA0mEJCmJy5-r%1NIEo`>Tyn=*r;He|!SGaYD_zryqg2yIPNkh1 zu@&PCVBVGkcc+2yoEEO~e)GhtKE2tm=5}*Zt?b6r!<|%5(|~=OlZiGbtGmB?V^!PX zx=&e`bm{+NLwIYllPaTDbVf5!?ak1`$rvi63JWXOe^}qKWADiygY{-{izFxYD@zW2 z85&9y8mjtSLF%3BI0ePMvyQ_<9cp^j`4Y#*V9?}AQ0ZTDtg2hzXWSP@2BUTX*>?t^ z)r*7gsQHJu7793bhz*z0fA`NFyVb=LfDG9||4F{=afasa0gYb&7sDb?h7_Bi!d~Yv zm3qeMblEeU8P2tX-9CieL|;iI)Yl(zvtaiBI7{P>YQRN)Nv}a+%J_sU2kU;qzq8YV za|`xAu4-R$CTGGWUSTO3a8KHP&BaSNFPE@^%g6~#g6nj+pN*bD@ zYFE}M^?bK%twI2Mgs@X^zUJ&0M z@jVdV69nu4b-as5y?ZFvp?@UIS`~Mblt|q8qS`)I ztAo&MW3|0Lr;20qGMlPbrfQ;NaALjIFW7vV_JZbgqwiZegR`LNkY%|LqwX`B|G&Gu z=nU4aL_8VZn(9w4;T2zpDC?*0Nx&@5CC~1!>`K zaN_m^SAL{*()ggd^|98bAi;-7@DURH54@z0gPkvGmvvsRDqq%WherV&A}$7@Q*kQF zC4w_AYkBOm3UFs(Ci3WgYDJ_b4U!hRTw^q{AUtUsa_HETA`ruw3EbGr zt*J1#0b8amwc>V8lSEjCk_(#TrRHiE>WJv%NTEQ~To>e%I|aYJilK1;;_WHxvebw# zv}Tyo17B#(L!8jK2|M;F|8))A-e9Ne+Ebr@6Po$ER>teh|4)-=O}btK z`>=*H|AAdk`2$DSKrEs8d|+5)3)iBeIF+Kp^%S)iC#1TB)DWkpB&}sK zsFgZ?pwH0w2<3Vbm?KVoKP{DYw3Hi~+{hA@j~H$u-%Tx1_*g0mA4{R{p;B&ZM&43x zC7-P&D|`zaB@c_+`nesqmly~=mNL{iAo5tskam<{NIRBt7je3Z(@k=97hex?z{RV> zy-eVeYlzyl4E-i+EOQnPmxUZxqk5_&KwHx7Um3`FK7r@KaA-=!Xvsy#?Ur zae$Xdb};jLN^Na~qc_Ci=$X7iyes|K4yz>fY7;y?8X+MI9KN0<%)HSuFoQX7t;OrW z(1jR-*YgHRvQd05o5IbT#IN{xGdOs>73@0%ZI3r7i?J z53{$|;(ZqHm$n8i;ovRcgFaGx$mGK&A2Io;$;Z^Due4&i$;Vagf3*ZYp+^6}3BghS zYJ0tWQt?f#N+5H4VsN}i@2p3>ib-)Yk2U!XwTLq?Jnv zUb6Hwjskb|w8>{oKEdQ0JQmz3bv)fnSLOZsUGoKJ9!>?zhv*wQ@vKeT_;s5#*2OVx zVF1{4n%}s`rfSA-s;;5>1>>B}Z}HozLYQ8+%sc$9WIS*4JAAqV z`j=orE{f0_rhY5!6n>-4KS%{Xa+!Oq9lpjtsb3@XL7CTWzCqtfN~kQ|;-76{0bm4e z{zaOwjeiyAHyapcl;R2AH1$fPUX%Y&Uq$LA_0nMDWu>L6vQc^kunK!c>4n2i+YC;j z&54`@#-N%Lr58`QZSoyq491vz7oBhzgPLY*x;h)BSMvb+ed;pxN=EC6mhk^H1KnyC ztrv+jrP(~yh7|lV*nPB8MZT#e2KsC_f~%wTs_7Ww4=U-ULP~)j#6}1}X=!Cv)*I+D zFs&-UeoVAY0%W28VH|JNlg@a6(*R=6PJg9y{p8i%5 z{xvvyo!&#&`%#zR`Hgx{4rXoA*O{S0NA9Q|vpECgVTQW8L$8D$#_ZI`_&`_BKA?9j zKtY7^g1;Ql+q;DClA@0-t`tio51Kv16R)Np)k4+eW4Z|GFZ&_eo9}~S z0pM*qEewI~(&7-2^NF+6Bo%_8SZ>k^c>|oL698;Ft+ogNHl1Fy=p~ERShUunbtbKc zWPvtVw9%rM#S3`)3T?J%i$z;4f;411?GOjBbUSS>FC_q&?xsB^K^)?vz2fYX=0Hgz zXb~VNG)(;96&yC{NU+Lr{U0qnAFVOLwOQ%5gaYjI<&Gy2}m_Pqqi!5%m#l@LL0P((@=bEq13O{l zk5(U?*K>ShfU62cHCCluK9R?Z)ej%&;n-<^e<1Kl7(Y#4m{JUE zJ3R-XYcA?FU1Ful-W9ET%qQwGUwAUm0`G6#W68u`(mPM&qQMc5c}lVlg+jHwlc$~f zsgvg@k5F4Xd-ga)S)VSRvcW}NJZ-(46a1p5=PeJ?b_(w9=PAv>&-#1Tx%5_@GJmvH zvxj(U0p4N~pa%$@GFx4k=!sDizw^{m=|erY9YCf1b-Nz+hwKrmD4j`B<@RV%s@E`& z5V^yKc~-e_`1Lp1Ra4>PJi^LqHqKL@9hkXkQ^y(_V4SA89HE;YJyM+*@8o@FyvO6} zbWChs$yT3C@H9f+v?o0=E%iwB<`N1j=l?379M2Hh94i4yO=mEKL zg-njoDZG9?Q8JGW5sthmdgRe=G=zjnqKFO+>u6D~j^-4pE!E^SAi_yfPr4r@3*JTi zV38LEYuysv@pLX>aY=cXl3Y+AH$;Ivn#+ijX^O&k2A4BMdpuLL$1_EHJX1)EOd%~Y zg|x_o#x+;*Q$bA3T`(}gyj zDXQd+IbLFcZ5ryYS)TMfe6;aH6FPtsU;oR8i=vqeJ zbp*U`y*d~0ROAf~>X`xs6y9hQ_8FZISca zyhqaI^D8FrwRs=!x9LL(4)Ot;4~n4wkj;lB5M&gWt5P@j1A_(2?S8ey2do{#`-QF90#;_xU~Gz2kC+L(Sh) zZfxJ-7+T=xKp%i6*Hg`xdeS35!r|2k$C#c~u7Wi3f1;oseIH)}x@DHO`D38%a{l3j z6S2(_!Qs)=Px%t23K%>A)&7ju-bYYz(5>Msz}`i32Zy2>9qx+>MlAD;iE8wX98*xs zIbqV*EE+kV;vcOr=^L&Q{|60$=l>%~^?1PYZ-ZxF@f=_v{L{C1V$E;3L_g$mK78MB zCg48CRUO&lSq%|w*R7tyF+V~DUBkIqpRVCdfD$n{q(7Rmd4naeY6;FqO}EishAsK72)lyZqqtD6ulWMESiJ^~OjPhDKB$glXXT5)cfJ zbN&}HpHQ=2_3PG5n6ozvSs#fNxu6o|moRhiytECdEVjSfutu9>gl=#mS?ElJB-91U$ zr_LLra;|yGbM~gu*E}viIHn}ERug{%bjl8H|IM?=@05BWG5)i3NNJr%c&@RkMJHd_ zeF9_OWN%TQlk(Rye|C)riX)H1osCrst9T>Av;HF<`pV}`^f;8q-~0QwY6i|;GsWG_ zI294&E&87(2Rnp$Te# zaOixEz;Qp4YVas(z++)eHI9bE24Fr<03&819pFj9&F#I{G;XJsJ?7n(R1S-ZU`*fv z{1-@Hh+hHwax<91TY`N%c^5F)Ah$YuJG#~L8Jx&xgB`ngi@Wo^-dC%EXsuSme2XEV zdUhPTeNSC=or3{&u64OD)N`q_t*%Z0H+oGsZ-`2L!W*WJ(4IShy3|&3wM~d^JXnnpUzBN8<&b2136a8pVVZ{lY zs^YZKq?awB!XoGui#E&qB}uWxETG3?C49RnR9PbJv}l(_yR8CpELGA1u=%mZ_lpm@ zX*L}&EjkFLevnS?@t!c)TaUrj5i22hy80*@t*YgDbF#)b&{620u`~|Z#Wx;40mptw@mk>sbMuP{*poC- zZ9%re|IOwmnS{zFqdC@eZ|(cd(PTPQ`{~}v^{2vu!lekagd!{{M>@tcPiRpL=X8^1 z6i}#6sONsD2?9{;o}N+RGrY}A&~+%MuDUfJqw?}o-gus+&du-^R^h*Tfoi$|)>B^5 z(=cT)8)di8VhYQ_?;NC)$mc$bGbP;ymnkycs;D#MfFLdh{} zyirIqbB*_pqTF6hJ>}gM4owXJvcr%}9HBCNRMk7}jR|%=joxyentR5(rD7FWP!N9( zIQa#H7+(TTenpk|YpTWHK;89w@Wy|HLd`Xr%-3lS7^92%W^nXbZ*RuQe&Ka*GxucI zVC^90t31vfMV_JPnP*)^B-z>e8EDfpR~*>0RP+pxm<` zytr*A#OOO?95Ly#ZoX)>?_F;s5}ke5yF11ij48o}wbk^my&{D_^0hb4Jp?08-JGL7 zz3vsNHRgu*lGjOV^ObAruV2yE>|o|^-i1D=no;l9sW%SmeVit6H&$K!*IOD1{CB-i zxSY8|KW;z$J2V@92aFYEiL15&sbduA)6^c2SdP2|G#9c-(Pe=6{H)LDS)j&J6^w*x z*gQ3Woz#SOcP;p82OIc}iJEh825+?ti^Rc+qi`|~PL!L3f^OSJw0lQuo{4Bm-O!`%T{cmUjOzNP|>?kKs$A?M3Tl5D^dL-RnAdRKy$@GgjfBPv9`h7kK z=mizE#1uT0zV`tH-SIKQf%4cLON_xiFbii7te0J5aDy$UDpYW{z=JyRMWRd%Q*lWL z68|3#aV*6rai}E({!bikiLz9r2wsG)&%>B03=bIC?k z3&Kd7$M9ISyC2~Bm1JY5J`Q%;iwYZ;^(T0O8l7TP)L~$tHl-NNtw}uDhJ8UF^;e3K zVN7)_vsLL-V>?fGwZ9LW5e6h-OqEpn9F9?U6O2>{W}dS7X?{j5w!xKshG#LwtKA7k zeBiksL)C~hqXy50U@Tg;vaMcEGfMIt;TFdRjdY`2OwTK#j(hbk_8d;M4Y0R#)K)*2 z&@XTne2Jx{!Lo2F-ibJPA)0pn_Hq;2XZWx@s?(?RkTI2<5x>mb(eVbLFUd;?xnRu1VNDUdoY=`Z}5C0cj%$%&wt$VE?4LVo z+~AQT#+uYpy*}K?QIpcl6csnZ=oXa>R2`Z~(Kf(ioQ)YvQX_YpX=RcL5`cJ0u%Y=> z*v2_u!p5;*$_6c*DNZ?CNK2QiU!k~JL;b$VsGzEQe6gxuqMq0uf{h5YRg{tvaB@Tg z)quVxR)&Qy)CYfrROze@)Z@HSJKn;)Mx}65@bv^r5cI8>Iy}lq$lFVLa*&LDWbQ_w z2B0m=46_H>fCn56AB>X1KvGBJs45D2ISnx`!CeeZDuOQ*g_t-LFFt9C=2Td$pu&2n zPB2F+qfC?)B;SL208{f2Q4uFgC8uCl-^03JuvJhn%Xk$Om+chqlIvK={_QBiZMI9U zS9MCdN3Od~WEpCtlK%HC=zn@f>OS)1f#}y$_fts5Ve)pB4?Nm`2gFgu!D;QqX&nmE zAq{+)G?eAGH~oHl(`CNVPQmL$mpTIv=C`2cw#Y97Au2iMezI$2blp3UJpR6DhocbV zcEeSs&B4Uh$J91>sG>pz6^`A0+>On+Mx?I^ZO!h$9|+&2LbC{8sQXOwVy;m&o3hqA>K zWRn(QoTKYK+8`Jsq8YLP9_II;J>ib7xU=rvLc2Uv=)JZuCbmRDhdPyqM|$MN@Ou8c z{CYl=+x!3MiGXj7YCcfG7BKt2kZ$8txABH(Eo~TY40hQ{dy5=Bl>^3GPWZAhRxETi z;#7EFUySNG(=Z``1;L(9eSD~|?|u6*`RXT5e_RGbu*yp#sO}7)f#Mq^K3Jlri6v^< z!yE~T1wm_5u z(^1gekjRNWH2~&Z@j0M#p7=^i81@&;Q*B%_un;z?w%ku*!2J1d&czngW+{JS7;u;7OwhN3! z3}bNYeRpTq!6XW$J}_WB0k%PRbv0lN;hw6+5~G=Vf2|Se&=&kn!XmYOi6N?ve=ae~ z+dwr+tu>A~hOXp%Ut@h^yF zpxh2o>;w7xG{ze0Wnw8jQ{K;7G)vyI#Q|3qW9S#^7DHu;z&?x?TC~Wb#bQezuxN>> ziP17h?d84Pq7{;6rA4bOf}vxo$m~-EwZ=$^vsOOAgINzzmLseAy^g5`un+XVcOiA33H*nhm-CVH=l{;B&i> zE=xwgiMSd7DGt(Etb(%0-5%&}?tNy^Ids9M&eX-m-WX}qTVSKWur3xjz}qwyqLE5j zRnTV}cYwSb+`P$n!4IUh$sVIj_Arrqhnm#Hgg^}IYmiBUfsCLX(h{&_d|)6wI7);+ zvtbX=u2JgSJw}I^(GHvY;S^&aly8%7)Gjg(*)$se0L+LWI$zuP#~Gac@>CR53NWRx z`YhiN68l=d@w)D;+9ox}Pf|yMMk|E+4*+R%u)5om>;4jpmc_E_Gu|k!&QCD@D+dTJ zJZi-AJ#?~ygT8I*aX9vGG)A5Z>LmN()y?xpAr*hzxM!uW>Kr$|c4~erXWAiE=7iA} zHBUKV{O+Qgv7elNT}^t!5c&C$H;jG}PJ4cuJ$*}QoN}eTf!%{*Rm59{p;~`x{9lSk zeLgXu*(p%=<&7UT9G^)vKGVq|#+>T-6^C#>>8A-Cf!H(gg;uI67xF~v-Fhvj?Bjn=*5d=>V0&;=~f+xAv zW#ThWs74q}NvRdY$&$}(@l_OGCFer1qPYDGA5|oORq^3ov2r30o&X{ePEC_*SwcmY zL2a;!5jxTmCDREYB9XFzI1R;VBu-;-V7m+RBoNLMM9p*pw~#tpim#QQTRYc>S&$J^ z7`L}XLR_0m6_Drz%Dkh+j}?3hq25W-b#?+=%Dsye)K#2rCU>_);j|5xvY>Oyyt{mM6#fAggvY4&ad8GpMjTlaMA>u#%*oN-q2de^XSjro5Fg~oB=>QJVP_W)w58vVO%aAUR(dx453F__Z;MX9W*`ZR}^Bt;T$7eCLTX0qc8x zXN>oxV~GSm_zq-W#2g!}65RXsoM=1(f~##WS;?y855|nB=XtKp&ogd0n=g?|c!4_f zgOOA*f?l>^g1E^BQ*)M0n}vfh%jSiQqwX1+X^U>O&5I#L1^e-7o0rg!Dl);FRLJI~ zyv)W`M*AFDx6Lb{s8;nyqrU+$_f$YrfT(zN;7236=4uqiFEB10dx_VGM0*{C*}TEz z*`j~+qd-1D#WzHJ`sAnCUwP2|5gAA#<1!CFFee z4I?tKs8{ZT055ti7;7fOGSXzA2j9gvfODH5SNzUUt9@pIdeUc>1n*^y&#VRU>SdqV zf_8OTrGR~0CNJmY|f)43N zA!fG(q4P8@^Z>34U|V@S=Ofk3Hd_KfF0;*am)%+X^JOd4@ffp9f#E}^ShJeffyw%F zi}hd$B6%viF!@9zbf^AgpPU3&lFWl89C%#li%)v0HT6xwOMj?u`s1C@a%C2Oqi%O| zs%zHWyyX_U|Ic%O)(0C|9+*-Kpzl)!!scBmrtiOEyu7|pwy?RM4L?}1j8r?^{Z+m@DJ^%!SL zM}~|u^SoS5{V>sd%8|p5oMe_&YbKfNoPNAsyMpMgfvJIJhdoIkR(r?be$EvHK6MH!Bv-!(BEu7W@ob za9Dw8a4Jxi4f1iOgM;bM!h}z-sggF$5_})<&LrH0_`GvtjdQ1>NmFgy%jP2gqJJ3; zEOCi**NmfQ6Du^pa%?J}Lj;(#%El!tFF4mesy8;6eM4Tb3D#s1`n1YgD7QMUHN%3P zHk$iAW$T$#TP(}5^XtHCf(HOpE$3g*zd+LJ14nBB)|Idr2n0a&E#~QHM_K~bcb{QMl>fRHdUy&AL8Rrf7Wlq(y4YRBdGDVE?#v#+^sz%Rhl6Zawmhuc2@oEdM zsH!{kVROhr$IPc);mTa^wD;9ZubCp^{`fWXduQL}4Ebl4dgGMY1jR<4Hk*byp>5}` z`(5>Y-)xG|E$^GJggBvl)UEoe+ZBMv?BLQXW~Slnrti*$Jo7IY68~yq=#`|d4mVBJ zXC91t?|dy=%F(G-)7~~9KPiX`SN}}Ww*(u0Yo2wr6@NS!n)MIL_(yYhBoap5+vj33 zr$+TOqL$6^#s#l@Z;o*Xunq!2Is_Ng4a1>3 z90cdc;F(D4v2b3klGClm?mXc@({P@p!isEsy4BH*E35uUw-TaVgR$dCZ~_TVLQ(KF zH4!<3!;4s@n7>lXidq9)9TZ6Ee2~(W<(trSxTPYCf%*NRnp@23T@a7!LoqgGG^QL) z3u?u!9v|NTbqU#y3!&;tc6%U@ww7R%Mu%i0LjJ43DSR3IE!IfVvTjh=L8F}|C zhv}>jn*o3&TL4Qbx%euZMq4z-y3cQb=CH_=pY(zKFwqh&1CS+)rV5XCn&mJVKsSQL zAOTN<;Dxp#J?q|UDy7X4=Q+PX28-rda%a^70LoIdP@F}$ehPO4(2aQ zCi#@*=&Jj10qH7}R-5#KBS|VlFX1eh4q!U-Azk%gsVr-x9=+b-*P}(Vz&vnn$6BB^ zRkCJaR?_~o8mp0&tr_tf9cc+DV?o}w*tj8Kn=Q)h%Bo^z`gYo|58RDgpVWdXR(y1` z5u?VB>o9IiE^ZMKG>gHel?XMxiq**g4GgZY8vQyK3Mn9iK`W-JIu)%{l~~m(ZyXR@ zZJTOS)oN!Qa>?MME}?v?s+AH7Vpx>ePl{s1Np+*DRma$1QZ);!1B?dnTu15bp*7(omYpa|e1Bh9R0_mpNjHMEw5IFO{| zfMeZZLIAxLxwtJ!r7ZxaHLsl&aqn<$(AL`PDm|}!uJ;q_ZfC0vHh-Hg7)=KU2noG9 z|2me4JPq8MoHB0ovSRO5_g1cT)5RDrQY0q`>mH7?L{i1STn)>#J|zfu>J=de=`a>AX;6Tn~-} zT@dFzao!i_1Cu_Ki{;M4)B%g|N$&U*(KQjeUxMI;&U<4BX zISlpa3yZ!Ck()35^bLJ0A>WzwJ!r~e^n*`?&Qk7AAy^&P#kt{^i+sfS*(?w?m!iU4YZ^mAYwMF%agGaKc+v{!eiQeOgINHT@!*Il)@qIjF>#d<`XtPPBs4h~H_d`w zWwM&^tX0b>EaqS-u6ohsXRYU=Q*BPeb$Xm`a}h2ochkMBUN~eWm9|0J7Q@vnHskg{ zn{f-d&80a5cPokJbE3_exKNMF(n3eINscHrYuIvaIg@>l zUTQpeSgv#F74ATmuyTV(mK!&G)PTXzz^-fK>Wmy)>~>rN>gG{gmk^>n;rpMnx`o8A z|4Y<*q6B-IDb5Dm@a*(@%u=gk_$I7S!D>WaASHab)Y|CXf{(2#Z<#ev$6d+l`(;*F z-k};OtERaV>vZ{AqPdu3OyWvP6>!sBZ@#{X!!~&{(clLJ9eYqSz)E# z4+UU#L(?vLO!eWz831nGC`V(c&B}+$g4v`SC3m{0xerIdvdE2^x*Y|+fv7mAapO!9yFZxh}evcCmD6)q1PdAEA4;TA79e;&xUX7y1Q|U3m&vcmQ#xI={~f zQ^|WQ(LBKc*<23NX1LH5&T?^9IA&j7H+YqV+&@-vFRgs95ofI*w=}L79~MNj2a1Q0 zqD0ut(LM~JEwt5-Nd)6~hkSrjyi4>Bcgt0-djRa3QND!l4H1n)ae^lF4e6jCBFaPJ zz$)yp=rtaZ97ioWX3_EckkK!PqDils^qNVhz#F1NK3x5L#t-4=SpiJ1i>&1h@x3X| zIdR^S3g0&A9kaDS+8V;YKS37++%1zz(BNiL?R&Vl&kcEB8t{QQ`^9<7$l3YFJO7LG zgK4+UG4}%WHDT{1H}(hT>eJUL43(F4+RI<+e-cYlx^quGCb|9I&QGUJ-W6TvIqIUx39zam)5u zaexK@;wtN))voY2HjeLa<$Bs&*TO*T%YatdP>=gTy>!rO?*Ry|r+B+n5BI?hlw0_r zRwPg*#dpKGs!fz2-rWz(W!fRDR_UMRmVUHjAi*kjApIu44Df^v-FB#p{3*V_#2Jku zU|CS#8c2WJ1a%_0^0O$^Ht*mnIaCcP4=xH*Y402Ha!|V~FgW+H1;w1%YX56iiUAxD zciXE^UbA}XXt1hz%37i$r+V*{wKfDYB1Bh(bbOJTcG@azF81iaJpyXoX{&;{#Mw|w z)y30Rwu2n(YWr(KrUwVC$0f%vpS66d_8CX-w#6B%oNIpigBCVS&3Mxi)1?z{S}(gS zjNOgGue_N?B+otGz$8Th}=79)~iqpk*AcPAH`KY!{Pa~HhiaC-MMKCf>l*wvQH?k$C7;wTqCkS z>+YGY))esxDR`-fZ@P>2_i3Dzvkel;ZQ&{{*;ho(E#Zp-tKzsE)&p@JrEnL@&=5Z~ z`oUiqD)-(F6&e6P93hS{7fc#!$z``td6J+faM>-`vXVPYB#tl_1d9L+1VKp#I>bP< zNWP*87)F$H;jKkaNi9!@2tO41AFk-DaDnIx27Mv2bj6t`&U|qeNMeu$VU7&U6pElU z4FuD`q@}^0C4F-_dKqM4j{Pc{h<^aH6~W(2`QB%90^k~?i|)qzA;TAw0pTL#i!X{y5zZRn7A})}XxGaXw9XkEudek52f0rfUtwdTPzUQpT`F4LjPyl?+92yU zp#tX^#m{6^SJ%t<;zI%dfIo;3sUk>+=uBUx;gSxHWlU2X)Iu&eVlwQ!mLmra9+CDq zc&g*aumbp?EaA`91-TXJ#2g>y~0O&bSz6)pr)xibX%H8_I$ zz-NHhLOADrcVw*ln$+)(+>G8I|0xx2gR0HS{F@R>NsB~HQJQ2yC;E0}UyayYWrpsp^Y%Qaf%F@ zI!)zv@I^bLBDaLAyqaiZ<6(%&3_Lq5Ox3MbP|hRwg+EeG?J-DQ|3ON*GyD&rPx4fJ zBxUvlq%0MvH_cRKS`>`qgWJT(Q8>7lVZ)3`&m(nFb)#0ntjUeeDz!G+r&b{*EwFID zPxYzoi%OOC_TX-3Tw!*v5WF|cn5P!i_GLSh_Q>^rk0c8J5=AByTm%@EDzoz7zP|sK zSW9KqDJW4c;>*LXkVh15?)_mIM5;w~eTnLYIt4v_a62DCKj*>Uh8dqlNz!z6vyQKX zGZoJ5d?|I&)$R#yR74?2S9;ei=-s7=os~QIu0uCYo=4Zo_|i_( z#^NJsH#+do73v%%PB$p85yY%4;Wkibtm0x|Y^aA{H@ zpq6z71C6k?NKD}|F@axu5ss-7scMQuGMvJ@#ltaJ{48bs8w~vY53;M6^GMWa&ouM% zVZ6rIbdDRB`Y$QpJQJfV-841S*bOHayR*4>o=2jodl@%(RLtnny9}lki#(19nzqgv^Dn?pip68x6j9fG z8$E@J84)nY^FT2=M%9zH-0mI)-3_=Lrp^*kJZd7i z`wD|S74uQLc{gg5jtbF2cZ&@pM!R~d+;;#gSN+sI7WxmMVf;)W31YXKhp!^23qpO7?S)Vj$<2<-$9-q7fSJATPTjW@1%>8D@OCfjK<*uP z3S4H+O~QSg^3UJ3`bdGaduYd3^OfD2%FSu_z=xKO59TgVU1%C3r+C3+$SXxJ zJw7=s3XUvL%@dK4AA<&!49kA}8w>;rSiZIG84ui`=E*aN(kK~6OJ$lNE~d)n`-;M1 zp=5ekJQO^p9F7@}>Pv7czh7hvCBUWvw1FBW?jtYuPsLoMnR7@!9Uhe^Co%|;jQ%%5 zZmB3>2WK{R>JOFKG=uiY=o7Mk#{v;4B@7T%%uRAAr*6#dH*3r`^rQHFG$VsLf=UAh1-3|Ef*6w zA`?3cCKVvW)cGLIs`Wq~+5l}g+%a$npS-MKkhJJV(d#-lYXzUp+=7^tA{ADdwk2&5 zK5LFODAC;Qn5zD}ViCn{YX%qR-JQ^=6xuzw1d!B4hCJ3HMwh|(T&5y_kVu+^dx0jU zRkq8M2+@nx5QyyMp%*{BVi{));iElLp|z#jBiUZ+nmrLvw^$jd31OVGmHMnx6M15M z@Y_PAN!$ig&_F8YLRoS`8QJqDb``Tk`OFxb_P}Z6#WPU2L=>ePSdjO{s3Qp2ID z3}1pH8GsK`t+Nd!`k%t*0~K@pi~d7OL!tdMye-vJRS=-;f5@pocP#nDu8)Pf3N@dU z@{cKZ^l=0eJ@gd}{OJqN!E#JGK3$=9)1r_22 zfV8D3)b)sjGP#P!YY+kGp3wVp1>ldxB*TatZxkF`iU(P0%BBIpTWxOeTcShc*F5eW z1o6vMB+zsKPNm$zwo)p5&yxQ#lncNx>Yb6cd?Z}5(qOv zTMt@6QX}j<{E)}LPQrJ(*})o8N~er`pN`*@%zF+$oKrE~^6n3$>FuQ$e@9j-SKu_? zX_e>-cU!3*fv+!lJ+Kw)*=Qr-*O-b6M@T*aeqAXNS=7k8HAKCCl%hHfH)XkMU#H(X z_YeR5Fhr^I-T*J`KqdAkPRDIojz;%f4-q=?DbB>}gM;8E%ltyC`=TDd>oX&JkfNp?rmkusW63c^^mid*M>QcjEZa7dzChPVRLc zA#UJae*uwMK(L3&J}d}Qy)tW zgSOQogGH?-=&yd9QiDxv;p%GfO_nm!1w&MVfzvFN0X@lK5ztlbi-EE#<<*pbQ-ga6 zDw7SR{#t4}0}7wPsh-!Q2Fk)=_~-}O#Sba>ICxwAYq+|xBW3})oCB(eT*r|tJhf@ zjnJ$B!`2oln+Wai`7pD89lnU870HhJL z6s5M};lu~%o?|PO1-*9qfO94M^%851*0MTrz$sgJ#RspXgeSHR>KI751akSZ#SF@w zUX4NT!2rnHAhKc0kpW9#J%!E?9c)Ki&lOu?2&lFx&dQbg80vg8fWt|4C>NAfe- z{Ry!ffy<4G>B(hNq!nF7h;v)GLp|yl%iFU7t^8G)tWT>^>&*c48XewVN}{O@?S;KH zD#|mI_ZKzT)GjO>5DzF7?CX=F@iOdVPo`GH+Oo~szxxs9qkS7cRNok-(p(CAL*f^PVy6tzH| zQT&dFl{B2wd0XD?ww2mc{(@xz4+0bJjjB5 z;`iBl6=F3k(y4TE6ueoFZrXDPp==10ly^Y`-JDyMgOf?VkcVo3+bp~BZ!qv;0mN+) zB`l?pZO)GfKb%c%jgc@n`%MGG+b(ax8`4rD>z6+l5zB8PxIF{tX7uV?A>}Q|TgZ0U zz`Ger6>CD282J{xP0a!!-sXE;E-L0Ht>X4g*udnyd*jnUrD*Q7jyaQH15jZY&MoWbT80!7H-x-UdT zz|8GxstLP*loLcd;n-s)bxQe`zaY$#erykZZYdei2E!=t`?gZ7J~I-Y;Q{U&shIC- z<3ERtMiu=vzQK7Cs^}xBWO%VrB=M%=WTQxN54lf$;XWZtc)qrQC-{RK-G_>~x0$ci z!l7pX-f?;svWFY!2xDBYKD7H{B_F{ieDKb6rAe5H88|vBqYceB;^6WQF^NMSV<=*p z@Mcd;qW?{U_cKKllr^bVG@8QM%Ka26kNAqnp=I;E*o_N6+@E#rE_AtWjKkB9>;G)T Bb + + diff --git a/settings/repository/org.broad/tribble-1.90.1442.jar b/settings/repository/org.broad/tribble-1.91.1453.jar similarity index 82% rename from settings/repository/org.broad/tribble-1.90.1442.jar rename to settings/repository/org.broad/tribble-1.91.1453.jar index 75b4c2fc5b4e143c6eaf8ae07f8a9373ab16db93..aad68d8ddf69bdf20fe565eea5189dbfe78d9869 100644 GIT binary patch delta 17807 zcmaib2YeMp*Y`QQH`!Y@q>w-oN)kc{Ed&T9gc>^1Lhl_!iu9^T5*2Au7Z?G7fMDT4 zKv6Ch1O!B-`v`*4R1|3nq6i4&{hz%zfveB=`))F`yEA9boSB_DZO(345w>SVSV&fD z-Qgy361_h5jYT2lLpF?kqw$V)``#T&L_-%f?pL*Tm8$)!S4$aKt$KX<_TvT&8aX7s z$>?!oMvogXVc6(V6++x0?ng-SfaFA+E|CpNtLa&$x&9G z7FndqppgT{kN@)Fho7Lkv~5ecc~B97PB{Pd%8d@9jSdwylA{+JRXJ^2HDi;*#^v0f zwQ^wyk)#yCwb+PK=YQ6Nt*tE$0G2Zshd2dIsobi{+8t95We;L3U)dXtGIm<8B-cjS zvE)0N4L5pGcDi+}ViS}_R_e)iCmGKU8@(4)Ns6-x&T<&m%E}>}X1!H;e~933KXCu2 zUX93WtxAruvK*$BmxB6jn^QjUA%3@J+G%Ta)%qxVyJ}+}^&e`*vrnLrq$EMC#)G8LiC`BHGfcy)ju;_ow=+Rv(C?u zk&myFU&N;DP9;ap^WjA2t?ZWR*=cmpP1R|*K_lFx(MT6nqo)lT<-)^gH;tjOZVIAt zQas*`Y=S`(4Vom+lif6hrn|4^5-zc1KgKChnPA&SCqpX}- z4XngApYdzzdIxS`m2dkZuU8FI_3}X`)uj#Aw{1&>y-sf!^rlH~(MBo!k7cwQpR|dZ z81%MDn`w&_ZZ)YXy<^b3Cbgt(CcQ`Rn>5r~(=L{`TL}$2Sf2JRV!gD(qz~vrlRlEH zE`4lLC+f^p(^@nl(dz$fr1e_+EOVzpubK1-?UIU{o@DE&QN+sF5n(mYs-f?uJu2r9 zEi%-kPie17UFkEybim5Xs>`2S{BxALaaAjBlmCUqnb<>hBdia1RNmIV!xo*t+!og5 zt`^zxE-^L7s&(Mn0tsYTrMfpTy5PPmb+ZO`-_y4{HNg|nB(m;&7dZYgw$}CE!*20KK0$ zfbBHiy!qqyJ9Q#San_YwEHK|vz{EaY}>FAuWR~St>F{Q>Suxv@qEnZd4T9_RN zfUViz2H5~N^tT&2ies@MW!eBYydl=<86jXg4_%Z=FT1HR&2!Ow+4Bo&k(4c#%#v)0 zWJ^7?jFjAA(`U-2&!iRdxY9$f(ki)I?c44Kt#wlb&2!Q^xxVJ4^#Za%9$t6S8}jg` zJiO)3rf}Nmq5se($=)_-v#^{kl5KU?Q_z8f%r`NKVYQL=WcAHFAX~A zq(g4Xpu^JbM@~8-4K~Ukj~aB$pyLL8wJm&RmhQ&Ge+~NDpcB@pIolj3F=#7kZdcxH zO`AL1@hSSSdao_5%% zpqEU_q7Ei@_+%4|8`%@o+MtsreMe_Z?8j!-+UKLK1r1B7FZ%1T*4%j+)y~p6CeASF zJY6v9EPaos6qA0Cryr$NH@axhPbQ6|r%f6~v(%!aS_ySyj%GyvOqWdhg)SL%*`zCU z)u3x8U8i5I@OHJWq4RrI{7stQFu_FHoAja(bW^bWPPYvD!=yjyHYUNOzvzxNwt5oS z-Ceq;*1oNmv1%`fasDmn&(eLc-cEWqdSGo{(2DO_cNQe-qiD3X%iCaE?}b-_YCM?_ zRuOE_lLien=qY9cEJm}RpQ7P-?ZO|!s15Ek8i9KZ#d>C0KO4)LWlMWQETV=0(c9*L zLExt2s5Kx5qO@3H-5@;q>e`}i2%d_PmJf#M>ufxs=Lz-w9Zf_tya?;CIu$Yn@)ltXm4B9)^Ff%e<+G$Q-=mUR#AEbo@(Qtri_Wd|K^!J4pgl#Vf5Kdr22 zPVitTSUSMf!^(6QwW_>Y%a|FKDX-cZ@oLAqvuO?ns?l5s2%=c@o{9eZ;JyN`UQB&D zR{0m?3gpt0_B}D#y7OuWu#hKKwT-YM3dDw-A;w9rj`O40%WUF4y~!mO&%5;v6;C1n{c zg_2^>D^6PBrbN+B%F?S&S|xp~mP}L=H{QltC?jr4p=S+R@1|7RV9@J^t#WMJv-L-| zy5~h=h5m=Bv+Cf~Bdud!N2rDMIK>Lz)`DNOdTjg9y$QtFy}fNEv}wO>^m}Ev^kzde zlZLd_pm$7qm$n)7o=NZ1b`!H+M|JoQ$3~gpIGs$qi5*OOm7&kr+6nekvK6$wB7bC6 z-Cm{)bd=7tQy@PvX%{szg`+WPL!U9|Em1iD~17)cP z?K2^Tp`mo7P6lnV*0w8VmGn+^9uWN}gFd%bd1IYlh{E(G9mKY5>^Kyt^t9*0Dl=J+ zk{7=1!yONtQG<-TSW==}$lh#^G?<21hxRP5|0J{#*aOrZ%d9c3a`zN90)*TR!Sey5X32}9AvSHgaQ3tEpb!9i){KIN@P|SwK2a|&Avh{U(d}i%B zCg#Oo$D)ebQvK<%UC&PkebZIKVMz6@w;fjANw?MLqKMyi7jt|vI&Ax#+h4*Klay*3 z99wS}w>*~wDCW{3w~fBYt`EYk>o=v#YQH!2Su{!I|MR!?T)r3?ALVEJwJ?I&_WYt z4zk4pVM(@xrm6OIxT5z~G~W)2Ymkri7-UdAgX)WzY9Rc-fk6$yZ>v+P%8KJiHLoOx ztCTp7_A~-3V!J9+s_Iv7L^UxZvfO21H-WVx*Jc3=x^a5u#iDaVi6q%Zv%*`BQkUZx z3(Wgh9M222m&MFfu4yleQ$;J@az_zKs;m;4aVa&w8k_&H*}aacY)08@v+}_CaVokQ zHv_)D&A4)ejpn%bLe{%bvn2V^tnSICTJz^)nyUA!Zv38a1I4wU^1YhhhZ_US@jiSu z*p?BMK5Ja(i(sNJf^8Y0wsvEu+Bt+FG6vfNcrcg`2HOKTID%{2^mw@J1mzyd?Kn*x z8qJfv$42s7K2`Vcx>bHxxuf|Bbb_i#-ER5gIId;49DgVLgi09C*k;}#<2m1DmtG8W zFH$3)<2ES$;5lyRL)E&~^ad2FR({Mi z^|^G#d-P*A9Q>yGW;eGf9t%lGT!VtRHidCIRpmNVn=`z%_V6r?^VP1se9LbB{yt97 zU*r+q6Z?3Qj{xiM(l=(|JxNNmIYCyg=26oQ*aXZuz;k@>sNL68T9rJ^bzEsw%%GwM zMOq!w6TP{IIoxN+^m6pu`DH0EGqvU@H_T4srf$fIEiURnTP1@zz0D1dZkN0#CGShN zT{5p^(7ii|?%hFj?+&7Scc70w*bYA3+ey2n`5w1e%A$??v~LG}=A;8s^SP70kV?_N zVGERyo;uSZL$vVHVg{5J?V>atb;0gE=Aq;C6*OoO!Q)2v;K&Mo`3 zZI;>2WH);ZHchO$A|{7&nEK=lm(au6qt2Y+*ysrMFohJfY|_3uEON^L!>#i?Fr!Ly>cn86N{i*qz|5Vf|YRzannWV3ilLsSv5Mx z)w!~Ir>hp5ZgL7&F*p^29X?>vfYgZ-hK)>ZJ77%NW}_yIn`&}Zt|&qXeO2S?aN@~O zY8YH|*(QEUrJUzZ8MU~!$?06j6MhtI!UP58QQvp5OqEU9_6kz_7|jX*}wD z*z$nQ*Rl^<0xAt)YXLSJ#W55d`GBIJbc62u)EUmU)O%F!9w2+7TBG$yScGcHHJ(^} zH98mlvAW%*27)Wn8}z1%zs@n)t0@PSzM{9#yP$_fcc02Y`+WjByC35wV0_!gy&29m zpZ}^eGj(YcL%neAjjIDH`XDHp^|1Gs2fMIyZ_=%QT1KA@z1)8tWh(BDv(B{lT9 zm>pUTfNXGR4;;1xdRKd@x7z5^TA=iIm-b~5Tg>in<1KYNp6GVGmD4g>Rc!zjFwjTO zP|kXZ7vp0$?W9i(+GPkU-Xm_1Alj>wag2mJ?lZ(2;-;IN(Od% z-k=M@wn5zYa{WQdf0Vn6Vx|2gk3W0p5`ni}7OU)vpb+pu`YRlE(BK%n277M5HaOP1v!qr{ca+44 zy}!q4XEnEGfZ@?)gG;N^WwhhLV1aSqXj>DtMqxg_XVO4$K4eMD%4%s*P>Ie^&xb`^<-Es%+hzT>f6L(a}$y%n~nY&mw7M6sI3Woyf$ft#}!n${Z-J>;l&sUPUNDO(f<1XZvQgcr>FgY+RsD# z`Dy>Nf4nnwI!#M2?B7$^zlWZNZ&&)qJ5^O{Xu0(Yh-_L2)YK1zLqMBc9S)KhNWR5j z7t=6Ooj3^?ELx(j*U(lpS&A7Dv!Gn6$(g6C?q98{K_`H(YBP)F@snw-d44=@tnODz9Bb!T3^ySI0k~j z4Bms}m}pD1^^hMEfjpiL5Dt$(I=H2nfw;=un>b1mp>mrxVqhBn{(!7_1A_~p%d`oy zA_(V7zT*^M$rhBz&{|XQLkclyYb%Jjckqoih4ctc8Uvf#UXd@RYkfT1(Y+L49jj~U+FMl$sGVR_Ce%)_$KspfAmYCvz%`s^ ziOBp=?X9CFMIXnQ+fX8AJJ6%IEJLej%bWw3r>)tFb7V=j`$8=op%qtCGc>U;XJ%;c zI&HxdbFIcKmDxmtRpwpTM0+~KZoQ;(jl7?W5J^h6C6?X#SVztEPlK~V2d%M(tEeTt zwFR~oQm&7dq)+09-Y5EKSA9lV(|WT{sQUf2=IG?b{+dN~+0zHjQ*Q#dM>l=-OZyyaSiYVe{>eVZ4x#1KtmHFlA9F|Hl_y9AFwICf+?y!vW* z`8BY#I%1#pqdDF&i?!aYF{vY#Hc_ugZqPMaNMo1kC5p=H=ksjn8kSFZ&uv=%D-O|1niw2Ce$dLK##V=I$B zcG6CH5D_ol)!p*2N3u^PgVD8*_DlAeWDxG3i@^m~pJa!m?64uiJ%Wx(cFYBU<8lWP z|6lsrO(*E2WU#9C(Wz{YoLNW)xqd?AI?O7`&P#Ryc844OPMB5uM4~5%L{AWjo*?#B z0$mE;bp!7NKY45XTeK2-MK)QZIVKIK z{wC|}u+5NQ8fdU#ijp8k5i$=vCj}=h!JRn-Vw8(W7D`W;)Skl(4mUZ1iy9nhGRz*> zJn(eE?13im5QdIeK_wVMsuY)o{UugTPgpt9YXThKu;UUAYv~zq@0)~^T)(8Ahe|+Yz^g^!bQ~7ja*g@-=;+e7XwAqUEI+n)%tB(TJf@+B#vWr zSPmKV$lkUsrPkimqV)D$K^-5Xmsbl9Yel^E-qSWh)~ZV%X)|^4c8vWPy3267>0SG= z_Bv;e$;Zov7#L&FEC>nQ@B=FCz;Q_x+hPz0vS{#fPQmaU5N!uDgKkYdD^_})f*hMF z?V*Gf6r^vew3`yzR(y}RGEyGZ#jOjst~{!WTQ_dqd6b0PAlwFRr!vc_cnBVycyxY1 zi5jWnpJ-LndcslA4IQ8hIaENmQ8<>FhcZN2pwuLX;k-{RKx-qh%tfBo*rh$m3)RY9 zTB%Zt;L&D3r1pSJ@cQZZAn{lxU7gsaHPM&gro?XCz#LNpc55?xEP`k>C3!&Zy zLmJc+KPF@f4r~hvY7RS00g(f?$8y~1b~nYy@qj`xiPIlY6@ymj@V<+!q{$mqnR~Pb zI>e-U9ygxVc6+zHW_56nHoDkad#p09AkxFQ6m}U_gv$C6Y_mGB$J`WuOLXys%?SkWe=yIS@@yJ<_<)H_$+Rtp|#Vx%5^s3o#py1rhgBI-`Q z)|%7aZ>sN%vaxpj*$=bya40y(vmx zDzEqQb+h}8zYeL(m2}Y$s#ewq2iYR=e);;ZpRPo7TCJ_7*MUgPlQW3BE@bet4HTp9 zzIfLL(nB|mpnOy?8O&~SvL+8Y>e!*j%X;7-4tB%hbGq2YZn^VFW=a-fa1l4Oh@BTr5p?Y@djNwj1!lEq3^Lb8(55WRt?LvQ6cH`V2MG~)y? zbOA_2I0Kh;Axkp2ob-?kGn30pNd;*vwwwsVd}LhN$tf^=s|!H`xr$_|f~~4#)eNpK z_|v4P8j{tNtd^A3mb-L2JWCqYF*w6bO}VZV*K^^nzGMv~YY1&$RDQ}wm$BT)$(eF( z?Bpg+aTdpNb~B;8xgcsGcP$Otfx|j(C6qtmhLzSv9@`q+PK@gI0+=OP2Nx!yqmw(i z8TOgXN>_usNlkaja0WGydrFgD2KNR9i=RN2gIza}`xxBU5ZesIc~7S4f9N;}J6Ky^ z7Y6@ef4piqf8bIrU!kkDwtS3QmaY#g5{Wf1bjXBOqb5vZ%2eUOTC%LrNEH_B2vS*f z^j@wErl7GChm4zQmFSY8{-~o5ft<}YwTix_oR^``cC-UADy6ReFbX~qiPOM=6oi+- z156&sgH)q>ddH-}CJ*5!4IXOpQ#{Pz;UAA5ELWZB2fGUo?3Jzhnov zC7L`_sGG&JO`gMZO@3J(=ix{hk2o~jOZ&t&_tPO0IY4@Yo->Q#J@{58=QYVzA>&27 z7(F&?*R($BX&)xLt~@}-)|CcOPF@t)mGr6{*x_!WXf2C*iNQ-vUdGDSa@3emUxy_z zyv6*ANuSXH38^_{@^W6G!W-&!J@8Px3T=m1@oGDiP-QpN&ERk72NT?QQA7RnFg~Pu zX6Yq^4@o44TW!e#GhULVx6|=jy+38?!8#vR*E;FREsnwLa`8tHbjJ~6Y4AP>kF~gS z@)8^{euY}NOSKf77A^%tk-PU%2IW#q6b%}p$J7$&gzDK@Psu*Xj8$7SA1+atg9tOk zQHNay)d+D!JM$rs11li+r2>%7d>9mgmA+Od|6i@p@KC(V6FLYuY?#FV(Wl@|BzIwr zs?bGG;FGF#7d@sTngx@qunyD%C5GVZ_|P2+ayl_q%)WPi7d=Ji)9T~ydNux5{n}kG z755#4uY)?l$~tYkm7Q7}xQ@@lL=V-P(K%JIhhEBa9+PQ8;@6`Ks$&m5nZH-h^w3L| z_#V+1m{81@&<>M(r#*Slx4u{J^w3KH?r;x1CR^aJhyWKX;XHmo1FuOy3oG++Iu7MR zkf09#kJX}W<$DzLSU>-xdC31l&8&v>)MIq4EtT6-@8S88^6<)Cbdi2iKlRl2XCclC z473z%&tI@Y7+#eSdxdBq5Fw3pMMenap>475f(fYt~<;2nx+4DY8* z#;@vLFTH1cLA#>8)4y+>qr8 z$Uo>$^>=SQt*Bk#hn4l~q}wX9k6z#S3xyipQ8W7J8MYTdFc$oZt33zwSaqe3?i25+ zeUIEw_xtDwhW6I#t2gqU$>#QGVX5gub@6{68mjNrZ9I##_Zt2UWk^!84^Ph1x?7d+ zt4mnXMmHr>Zx^)8K9V8&pqxY>$e9%l6kB(YlLm|5c!*?Ax@agph2Sv>f=0x`aJeFm zx-5-!($n%VO0v;*9CZ*PlkK0>k8?^mbyf*|of5!=XalD|e5tI& zQX|v=fm(8X9=;t!E*bE~U3f|_r#6h#?-(d~2^6aJPwTH6xdzRI?V~!6(!W!~`|F`< z;V?ZSHkD?Za3EJP;U%jqA;jGbG38*7Ajq#+Y4Nho2D5D0EE9fE+Y5`BOoJ9nWWo}) zcC=p3X&==odPN-_ttX@{rR65vYH%>E6q^r>bCqBy(q#0g2}4FrNNF=<)X)h}nc_x6 z(31%cw$Ap(W*y-mWzYIf!%?)F@auLaoLbmX!2_pG7&5-T*pVBszf!=kI}R8%bck&~ zVvIuen`-VDJx=dRZ>jeG#X7JtO*{+H6gp6^?u{O+9}Ftm8jGb3k{ElzNNuT|x;;s6 z7u6mvcn+vS1f=RQSsW^L1j+S`=tAD z62K5+1q5ZVc70_TctS9A6jtF^B|2i)2tE^?8my z+}5jWcWCjU%E;B_VCVT#&oY-c7s~lWSl4|;P#Elvp{ZHRo>dSh0kB#Qql%&n7 zpR%>-4N8ypX~@NAAFnqLvo}xWf2WsLhc2*7{pNbyU6uc;zQE^^+TM2ZpXbZt69l9R zzAi&6YX6EK`R{6dax6Dctv}FLmnaQZL*Uv)V8ccr6ekvXP)*o-y=eOO5B06S*os>D zlVa751A0S@@Xi6fwP6#Sd|>u4)%vI|aTxQC>LYyW|H8$+uPysZ%n5JiSGw0{k8~Tg zegCqrb%`)(^0of9!*0K&{lz`%&M7?urD>=2UB33SJ|FX{y7!&Ub!wc!d$et-#B+Kb zds_Ee{ipqbmm#U1s&-CKQ1j2}NkH-5IsHzMj*n{8mWz6N;Ab`JkBfRu8&Tz-^b*DQ zVaTJ$4Nc8z%~~W(2rx*J0Ij~L%=p<~IdI(Q0fYYu(ioM6zlg%SFuI;uge|TaS@|D0 z13$wlY(TU=w*jm0O9;k0wxfu(MGI0Te%2%6#YBAcEe_#hgh#*ixrc%anncQX4C?1g zdW1cf@_~+8{j*+7JL)L}kuP*2;sm37ma_<~SyF{Z2tmX~6?RGY1>VVbIx6cDa6F$Z zLs8Q%>7{LgU7|hlwRgSY^R)eeUQ%0YvJh&npVx zab2G~VrW5=5PWXon|*t_VKttdne)u$?>#I7y_`44H zxFVqIxnpg-kAG|7Th9SsR|GUUq~5u%x3yoGd|iJ$>8f}1G2>850cgPU;V%V-(Lg(Po3cdj53L9|`Heo`0<$392oai$Dgjps-cA zT5>~=u({-0H}n$P-(Lzo37%EwZusAo1mGru^-2ol1gQrLpsg_nV^zJIx@oTx3C~qo zH!=FWV+9Z@=cXQOkESqC7n>n)U;g=H0kl7KH(0g5<)BUDy14p#Lp`QNId^ed0O02i0ggL~9|ZlGxW7SjUp86aeG8KKFXIz$d8&ISAU5_hgSq zqGDHp7D<8he4@hc>J1CmtM-;s3AgY;MEpC!YW7|KMw6)7iXcr=t$^;%`@q%i4Txm9 zbk{$efOy&f_&zIa^a5X0HM-}Y@PP2w0QmZ9>^;4@y-XzbH3I}l3gi*NhkZ!p{Ay{s zpFtJ|!h$Hw#0-T2_upeC~r&L^1&BG%0#Ja1zSi$#?3u&^ zi5L4Ehx3x;JI#XM`rPF%AAHVfe1BN>fgk=+B-uaplWoyH_%RRk()J5q^Z-lvW*xh9 zS%l{L*D?+WEhy+1*Y&woGYVRTtMU*1T}y<)2&_m+fwDfO$s=$H|9muO0r3X`UEItn zXcMI_42EL-7y7YzvmIm*o{uR@>Hi3(d%m9xi8h!F!6d0k0LswmHn_SmLrYW#^7Tl2 zVFkn?1i)W<;W2oGBm^&Q;P9!O0Z{}2ZAvdIASo8{1U_LM5F-!(ePgY!hxk@FUO~)2 zu&VOVzh@tV!_iv7PgvVQmBWrQ|9V@6fdyFcflS2tru5Rn9&Ny}SOHdC*XO>zU!W>K zTJLhSrK)I-V%qYvzN+gdAa9PI1(~vE9toYbSEW8uj=3rY8dHG>T><@__BE1Et4pah z4*$$PI_vu9`tA3?*9nfl)N+S^Pud4x3w6-A>|p^75o*^%kcePE-^-F?u#X~%B&7$C zke^@BCSKJjKxrS4wQ~SvSd>D5f23MiS`ShEi|M|8?P-+rvTML4DJ1}BT8yueeH76* zMmN+wjLlxSa-OD&1OfB#*g~L^k5okaByEh;gM-^W+emO65N)@&&(w}dearHX)x%Bi zGneeswbWqL->CFh{pesvj7@amIUCN}@L%{_pZi)hA3R1{R2>a=_yl1g0B&%K>i@|S z+`r6ll(T8F-GfyOM=`#KvRKv6aFnn+6A$4YNKi?E+;G}Z=`5zuNtI;Fr~uz#L0`D8 z&t0n6>8NER4e-+h0G*iXLpsLkpDf!Sv(SmjsFmxhTDX;qt5Z(@rVQ}R1fcq1t`A-u zHL9-54=;|HVqhtfst3ULQa*UwE0c8w>}_R1qe8z8Oy)qLwdd8x8o`z0oAChOO+Y7$ zRlM8bv(RlnOn)~gNzOsIA@kgRB;t`71}P~iP&;U?j=LT8?U@t@&HJEAQs4rn zuY5?eztzK3H;;d#iSI^@^`KMwy0G3<7rxbl)g&w!dokF)oFf8s=adg9-{UB0_a_dW zE9gs7;7r{6&IfvIqwZ3DO#gI0Hh6?$6#QiEl|Sc$Z+%X8RSfXlG!AeJ6|}*1eePg? UqnYp$shAK)Sn#_Su`ucX0J|aEAOHXW delta 17865 zcmaic2YeJo`~UOoUb2_F4WyF-BmqKAAcWo%ARtmh6Odj+dPh(oi6T|%0)uoAm8u}% z>5B>oyjUStM7m<531Xpz{JziLCE@V>fBr{ic6a8PXP%jzdD=Y7p;aMot_lh6)K+)6 zh@3=+hrPTcxN`7^!(MJa^-Ngo2qGG>qZ&8MBce8}p=%{Xn(Vr~^+ zmsvA^d)qn}*a2lJ#tw&FmfvRkS?i)Z9cAUrrNMTYHf>`s>%Ex9D7zD51&0d0u`R}h z{H+m5s-QwMIZ`eBMGv#qCK~|ztkP}GZd$$8_xr8SsRbb%FtwT<4^>X$Q0SD9c^H0s3|u6ol?o*5g`v zYi#|kHuc9}?{U~lY|tKU#xy7>Q9=g3PX$;%gcAJ_YK=>cwhlH76Rg>3^qGs&XoNu{ zU8K>dAgW2D4H^@Khp{diN8?=-Kog{Rq6^t1gC-mFj66?q(NucYMJ7GxA~!uJWz*a= zoo2{gwv^=00s(pv`YG3hbdYtlY?+oY$d zjp9GGDBU2BH7=}+71%Me)I0R9N&lh!CcP(FBYNMYuGG!CKBl3yrei1b1A{i3bbt;r z)lma}*6Ua)omy2qL?1GR4j(XiK#h?DMh~sgX7t1%Lx+qvu@sM(^aLFfOdnafL!zwo z28mW)CkKD5LUoR`c9nKodGVooBl^V3IvFXieNRf(X`Mga6V&aF7SZJnF}1>icHr6? zNr;K+(bVXU`zNS}^<Eo)eM=qd_#5!VqeY4iYJto&rkU5B6y*tXK0k z!+=bq;Zz&fs^~5mYo`|Oqv&~>iEiR?U5IN1tm@@pQbz&(Ut&LA(|p0itOeHEUTN4M zdA$zk_6AA#s&Q&-46e0iWeljI?q1eQJ3o2wpAUdLZO`StZM<7Arpm+;R-*x((Q@H{ zD{ODK@2bY$wQ>i^b~`cXHQg>-yzG~n*7TumPUg3QK=A za?dj(mU!zf9+*1H3K=att>5UnUL5;hs<6dcJFXYXZjU?Xg?s)^$6u{;6SGj3I%$Vy zbB(>9e#J=!I0jN>FJl_pKSZsb&cZbkrc^R)lD?1c-P-Dy-4kV7v%e1r7OrrrdfVOn^42VpcMYB!nL|G<1LsLw!Rb-D-TZ2xT zbcW7aDGL_XJV)o5xS>fG=%PvI=m$KdnDnDOU81wX8-6nAXOl+JXp_d!Ty_4qmJn;w zFZ8QPztOJ-T{h_oT{Y;MN!RIjHMWfwxiqny)n{R^syC$lO%waKqe(AH|9=RYTlA+v zf0^_*{ewZ9bery2OPW^F@6tW%VABd#jYXxL_XYns$_L}i3J^38RLOgsY~?K~!}qM~ zi^}U`XskMst3_I47nW94f7gSpJJll9`FosV6}*~i%@23)Ik9+D01vmCtSD7_xB-4D z`i1bR5Nd}zjYi@g1GC1i=qKo`FIOy&gup@#0irj!5f%z2b%Hfb#U)PwYhf@LqHxj* zz@jvRo-$}6u|boX_)tzpV@$VoSbY~f70g_t1mw@sbJmuXH7ide<^qu5Z8?oJ=DSo{ z3www%Ooq(kZZO1=2u|Z2*)TcBp7SN?` zvV9PpnWF24LxA(zKSKF12E~;hdyg-0@fG% zvfS^P)RVfH;5dzJwWEUJRSZ6mq5~!!q;ykQ7n3#=GrUbC|6c3-rgXg_9kKT;Bz=N) z{%s>1N=i>UYC`5gE9gvJ4B9DbNVGK}EXx|`sp$Mj6px1Vv9;I}>-ksa-w^^|qs(92DRVy8M#-UCao zIj(XyjM4!icf+y1WqU`55k_P|}=AK;VzEmVa1YA5CMd{U7!Z8jI~B`+U4r z;c!y)DhTxyEZ)uJ^jo~vfW!4|`F6F4v|0_+8zf4}>Ja4c>~J{3JLPgL~Fd6D6^O0c*@_1qbtc7|q!){j;fZt4*}Lm}f* zj%I_5ZohieC$NS-ajBbEFw1#)4lD0+xb@&?k-g?GpSkS*M?CY_FV=;d(tm|NT6)cs zV`^ruHMT$ufHw-V4IACMZ{q)6ZxXFHRo*e(MyJ}|(xbfHdL|jX(UgIymWLt{t)7xWTW5Mwvfh&QaZ+Eo_LHo?WCJ7{=%PV1Sh9p6^8A!! zLnV7!vSE^epNq-isTs+Cb1-h}fKK~wl&V>Zf60cNNuyd2XXRmxm{2pvVbAoQSj_}~ zpCEPhz`9MEM3a5;u@X%+g;AR{ji#d=AZG~U8D)ay=9g$Rmaz)xp%?433YO-0P`k;{mCQK;QeLa8HU^CKQ#j9&goU5FfoQV!P zWO8zty=G^9@Lb$m(8}HlC_Q}!U?|Dzy)pcZ=io@*>Q#$t zj=b@c8aIX?M@v*i=yuDm#&cb}>jH3bZ&>zt<$-K z7gbdCWw%v9HfNwTBZp^uH|y(l_PSKw0vkibLS7$KxC_f$i*o|idBxt1{^3vMSlv_% z#oyCtB`13|bl&tw{`_RTETn2G@pFz-V_xQ9wPBS#);CshgG6qie%;R-z|l*n`R{QZ z&yn{y&;b+m(}UbDJPMi&acv6VdKAL-DU};|svqJx8sAf%!+gtbw(|%ldcHiule{BI zS>A4h>UfOnf|gmwcwT`0x<-B8Zbd^dwT3oRv#J06u3kd*+oBn8C`1_)Zcv2nQ1DFu zoI|~QZsSWen;b1ebhM1>Um1)^27UZIUf1U*7+wU<5E>6e2H4ooLoh`o%?cvPF^pO>BoXx*rTdcQ zOZGsr0)xq522&h1aDZ;Gt`5a&G3tRsYp)J|!ByDdx%>s^I2-{?Z>f+(t&ZAtnoFs5 z-(cDGJHtp@8bZyzMWoCDi!`NhsY@eCYpBa<6*lR`(%q=~~u*62EZ z$PgQCQ*LH*I`Dz*G&jLzsxcVF#RRRI`s5D|#|v($Zr;IQ7k|r@Jhd)xognU_hW*B$ z#+4IKEX^SoWLIxUtjRFhpNEv%Ld#W`%iOb^1vi>)vn+Mor}_phzYD2v5?&po%SdpQ zed_pSzNS}&8}WlHoRobE`sQfh>Y@mAFesiRwl#k+v)g=&pnMZ(%s%dhgJQtQ#VsF-Q6r=LKD6J&0ott#;v$7HXE z4#2*mZRlOl!vMTb<)Gs~4!z!oaVHwSVdH+2y5lMirmn0}xAYi#64&0iI-t-Gf-c$^ zJAGAP5ccUG^yi}n)w!pBN#BGSkW`-3yVp1wycNC08+3|TQLjK- z+b~FrwbJLgJkMR{l3ssQPTkW1T`;09EDl?>>n6v8kLTUwZeFib(>-6DQYp6>ZYR%k zw>U4v=I-B>JlF0X)bD!^c(J1G<)T(PG?A#UIkbGQv;6LwzyDU-gS6Hty%nT=;`I=( zSd?4+P6E-L1iQn8rW~dg=Iepkg_;0EvwKq9A@95B13F;PK||Q|hvH8Opd&gNd02%G zI%>2lvWx;leIDVc1R;Cw(PuVV@sGEWVRLX!M*gW$6ws^D}lXT z7Q63?pb)o*LDvmo&1rN)tWxmjRJth?{vmfD?iM`R_32N^z_IHK25{`g!m(3?W2Xqm zP7#jXi|)B$(125aK>2QZKm|@LI%eFm1{Mtnb~rh}Y4ckb2eHdV?bt2#P~U&$U_o?3 z?n*ehBye#Eh7iS}P7af&a3@DNInv-L1FRhW?eZbfOhIi%4<0h5>(sGB436*rUX*O9$#w9^b!nX_+-rnY0)KsOVC|kzNj3ga zi&g8Av^wz(eH_%}rrbmbvRP&Z$Ty){wt)NFW{=^j?=3C~Ni>jK3jU3%(;~gL8d_0n z?9P2I*8U0NnW{>fRu*2KS?cjLt!^Z25-=Zen$Lw*f;SACWxiUTrqzot ztaACPpufXUG7@|%h!&vzmH*v-0ovEC_5W$V5ba;8^*{T^yHY1>Y4wZxcNg{Vh7WFu z-9O%?s$N^mZCprX%OaqbJ|KcQBjoCE!NfpRSzs5_FjAd32_skrnRTVMwyMPn%z&6c zb;Qk#tH6{cy#UR0gQmm7W6&z`+&zDvoCeLj3+UB=5obgM5MdpukLqaMs}z!wSVT&q zIH+xUqiva_V+OHGJgw_$6F7Sl3_lkfXP?k3@H7O11?+{KnPkhq&5$;efINYY5l)5B z8O{;KfW%eqw$e6qFL&E9Fb)6ygk;%*!3D$Dx)YKm0Fu*tI^r$ag%TNBTMB$Y!3OPa z1A+Kf0sK)I6o$A5YBWmizqcIl5E7*$zb~g{FSrYE<`VcYoHPbDmmEN;=j&^I+#Yl< z1z5-Ga(!)U^+IYV+LQ^k6Ya5hr#OK49>l$dBPtQF`_-WaT2l0v7;`%+&(24roO;ke zYivuMso&(KLz9IzYkNUd|Atxx$d(ryYHvAhQ_Feag=p2Rg$C2jv#^CW%6k|al|K5P zA4=d%5~;q|EIK$qi+$7p?Uw70g%)>10;(ULsv`)HTFcFtc_zIj(Q>#wx} z@>%^gtBl=cZ)c^ADshI^9i>xdXj8q`&wqn5%je-3G%wUz@?x|~$kwW;^qIDN>@ZU+ zAFOe{ny^H>6!$oe1c-9sy`71J##s;zvvKA$2TIXg&*Y_AZ)gMe)z_9bNv}vjs@*az z(KBwD=J6gC=hgV~>nU)^Nb(JAL$(&Xe3v7z=uF8oZH4wt5U*36H-Iyhz$ziwK(W-> zvvz~l$BT#W9ergs3_nSAy%UtzRSQx#HftqS(@i$n?KWw118i&byVtdb_NmV6T;gg= z7;7z6{1&Y>tg~uCQ2F*t216@@-gnXm@*pBy{IiGT;X}y|O9q4MDE(KmW0FC5e=Nop z+=P;SE@j6J5#C{xC)pQ40QgewAi}?dsE>2?yBGk1Fd8?JGXvNqSR@)2nJG z>onh_;nd$`2M5?Dh(QAlcA6soO?FFW!X}f0nQpKkAQZWzWFhppNgX-V;4m?QA`Ff+ z8D>j~N;5=HS&oNADAo-GPonf#4i0lJuP)Aj;jrOtEn0Qjq6IY; zPZww!Km&a;coJZtup@&z-8KwN@W@w8t{@V6jg)PmjrK{n%G#qf7Tsfy*0od;R}^P6 zdaHyCsv%?@3@9~VDThNWcI2v_(7oCPXeH{8{n~6@JR8&A(`tHlzo)&*jpi2Mor0T> zHE0fay+Lz1pUOILq*C2B6NHzG+Ae1rjIBXn#hSfMdZ;BIXz{5Hs_vzrWTXvL9k(vr zx;9XG+y>w_;B6}RGDYIai6`gVR9^k?ftFgU2kiYPfJ}VQIAH+lNEqA;ZKGifN=c zh#S~gYWzWMcAce=Vg@DQlwt`q(LgZnIubkq_D?wDE+nWG46KzvFn6hx=UT?#@b4TTaXt@?z{-(?o|+?<6kzKLY4N*gH_)IAGOm?9NhRcHg|rYS)gC4%6yawMLCfxd zH&x^EH5?eIH}kcdw&WdlY*nX3i19?*O2jEpn7W`l0@b+(wl^=#pppiKs4@R=l*dt^ z1$g^!H>20><)M08PTSW07Y(HQLCD}!8z??C;q0&u*hM2) zLj}V;a=2i>1;|67)ERn$EFX4qkc(aH4ie5ScO@h%DOrfYp)Lw%Y#SJDau+UH1dK>n zZh}N?H^CSUzKCN>Z(p%e5+_+{$;wDpRr^+=#AZtpNCfI68R@>k@g1@fxR8O+{ zk~NUBhH}@)4zH3%jSX(%qL$oLikk)DE?u$=$(lo%7mc0@3Z%A$lUt(ySk82ED<`*> zvd4t-HiGDJxoc~PQ#gg&3FYlwaPW7O$4&-kiOt+u0J})mH3$>Y&B@(eFtmEetn@S( zr&0qMr&0sCw`6^!NneBefr6!vBg?_I9O&f!2E%zd&~v$#en-dI%cXkyCZm_ZgYcTw zg8KS=2Rw+SR7?YXOGFb44~Urp#!VVBekx8vE4Q&~?u}A6O6&2;(NG`4Nh-(Gs(K6d zG}LD~ve2b!(nx<03CD(IK|UV zp268B=WwogXK|!cQI9R@JKb8!&zoSBx%LsRN=Vk@B1~}Y9n$axM56IblWx)>YxlEp zJ<;Kd5J1%@4jg7m$c-s>i^;Qiw#jpNZlU+mFAR<638nM-C6gEMLX#KC<6?xS;SqK| zJow_wOia?N44R1Jx}^-0e?D6#os~IyMY6TXco{E8*O?t!!rY!OjRG3U1B5VKr}KG* zDe({{?7TH{euuloay++`mBA}bewkO<2X<;)Ne${wDZMmCxs+G?j>09>hS%^~mDo&g zL==R;?Bv-V7GsTS~yCz6fthWJ>+g5 z%D`>3@}ez6ESXv&eXE9d)l;%hicg=y3*ZceY4<)(HxTe6WzdhlL(EVCB)ESCG$^T% zprrzs5B`jZhA~rGz*jn z)M843vWVQzZc~8MiLqk#J?pyZDLS7~pY_md@>%tuhh8TB94u1@bQGNT4sd6C>M_{@heZUqK#BYD0UCHs0$NmAfMarKBZ35V_WtU1gcnT(st)m!hCP}nZqx9+5!L0XnT zK|%UM&Frn$iM)kbmtP=U6^m?D>M#0R9q+BDxo^pG1>`?;TeAT@-3`Pc84GH?(EFU@ZKVQdzlrth(Dr_a2Z&RXN!}nSJ#JklL;K>gnF2*m=hW zH&Y9T>f+2kGgLq9b%Fob^CrV0A~?uHPU&UT;!(Ow)$glI*zyh+RieH@&`JAA)?dPb z2Y~+**79I6YKJ%_@}Po59#o*GgAjuvK?f49+}A0=&IpQ=n$b=gBM)OG8|RYnf&j!2 z+drwF=#&tr3N+azfnf$sm4_P7x^U$9oKwP_5SoWDC#M8FSD+jhTn5j>IV0DZh;T9_ zdU>|G@w8r1?Hj4zHc&DL6soZw>he;SZ?#dEFa6vXjv4YAO}4Xny^A;Z0FKpM&|hOW5y>`HpPc# z!n9bWPL0tMYbgRxTtll2T5G}+1`fSmaFu8=24TgcC#JL;GJ5F5r)`@L!JERMH`~6| zgNCDAZ4p5oO$nGngzvzq6NgNID^}6#;?e}e?u_u{AtszoTQN+bdmB@%%9)_Y!XSKt zwyObOVKK;i^qd~6hXsbW!#Zh?B*tDK5@HQkJfnAr>;y+T`_*JoXEo>N%)AAT2jmk5TErbLmH&W{Y$6-d@eSQEJ=u72$}IR7X0is8Sc`rmFFhtvWY(Nw4g- z75Bpr?rvA3lrG^Ndz2pS4G~_JUE5rM`Cgzx&*)`U`WYMm7p$?H2Cvl@c^y@!w#N53 zU71Ky6?;Hd&TIdQB)Paweq|Tg8Jl`j*mY)u8;J#-Z19ut)qskJ|}dE&scv#AK{I#_-WwiXZL|hNh#hL%{if$ z2$?w9U-9>J`BL|=ZSd^K{qw?h_;9y7yiUv1uXS-+cKcf2={+V3T6X9aGg8lO=X_(GO#oHRkm%nI_LBTHt$*A$9VWdcE>?NenIo(hj&gn_O^WSs& z?EoDg&Zt9|^!omv&Zxkj^g1@Sra$R%r9Q&+j2S<)MyIx{Md*0jaIi|~*H~2YW%bfe zdQ5`Ea09I9Cq7H>FNCgTgt4`;Z-hGalWy9O@-awi+Q`u)(u(g?!iqNn)dMuMmy&#rqjvqQM`>TVi^0m)2~-Pp zhnE=nI%i2RktA%8qTlA+H)5oTmC!a+rTnai*`)h@w^IZwq|d5kcYv>W%G!|f=}r%< z0!gWUkRMdFAus%*SG6JiKIrj-9Gt8Ye?u@;=C68~Js$a-NA>#^;~AV{S6LxZ0XB7Z zwS3E?c1v}iRB!d-)KJy?vTnx98}j?C$FHNGRKjn1ZJQeTsOL*mN~-7wlwZ%Dk1F*e z?Dt~#D4+PKUB98n@CK-j#D^c1?CH006@Tm_XV}HR&ACrMWCZf17h(>I=w>YLBeX=JJWeZJ{x_%4Zhp_*EbZ7;SgcY;?3*dCj-j<%5ee@J~o;?T2USI2+HK z6*Q+RyMv=t-F#o)@53$gKIu;zi&1Cz)w8`@Cs`7KfWa=pKGYm z*FkE_Y}CYAyIzd6URoHYKDzG1RrK)&Tvi!=xN=_fwWwUYB_+mNW&I9(Kd&jmH~Jly zx{dGO?04c9@$H9i!JE=T-S}OvX!EC-8(={LRL%`O;a{&`!f=xy*ChGx>owA#zKvi% z5NOU%tc$P<^h=O#d9Wr){yVp^K*m{%j|W(tJ~P#0H+>ZQh4A`Oyfd$`g<4ez zT^!E)=nm5^0lKnk`>T(V;;3GNs#E;ioO|SFq1hMW8>lM(<7*)SzRkhOB>B^R`etDZ zwdD^z+@5I({hf~ak>tNWM-=#=OIRBU0*fgs4h(*jb*CC?S?GUlKkKEv_3_RB2|gMee2CHX4QWJ1{c8xdgEBf zppnh}AA*DbzwjsW`2)4#Z@rogB(be!!M-H<%h}rk6tB*W#4(C62YUfYylWUhCDkiL ziR~vZ}*49QwR522-*CYt()xYyQEm{Z{d9z3ji1gG9*YL!Fi6zkW+j5QGux z!V=6}>O?N7s^9ieApx}Q!1pAHLS6XVoZD;q!;NNebTMDE<7?l%jTe)jT?7=Ws^>uM zSo522rAfH$T96{ipPb_hiW)_$j(7C1qV>-2s+>D|C7aLNVYqMJ(WCfxDYV0FqtzXN z+tcS4k6TDJuA6g5tx~ynV4@U;=LV`|&8M(PIPDy;CQ0S}8f6@_8(IBx1Jo;beSH+i z**?l`T%UXUK-g`pO-^xeE#aKG__sNC&IvE59cnvYb9l88i7r!P?_mnpTvIjgV|BcK z&$mG&YD^uHs_dKIs+`kor~=g6>rA4?j)%a@kK`|RV^!*Xy^TG45=5qQ@9R-|xQsRqVUN5xV&PFK|I-UL{PI*svJL4Mb>N3NVnq?=Py``(=iM*3zz_U~ z%RBapB>@$J1z<^z?l6v{@p^@u^(x1%O7`y+3-aNYtU5uxAXF& zKHPv0Q`e8;P2}J8v2gp8EM0R%={aW~hWh`yF#3$zL$HQUi2H0~>GgncJg4(;pO)VIRYJ z=UEQh_D8^WL#ppL&p)eBhoiN<<>d5D^>;X;w0|IeO-F>BzamM0KTl)qWaRsx}M*0oFBf+6}<3rf}lPMaCkYUoXYitu#x1?qH0tw zf-j{ORo0yp$=W^|Tkh#u+SA8Q;Hj r$FfEA%8HLt8%#%v4dmx?Dgu(V&{=>I#rITJRSR~61a7#5^-li>5+etA diff --git a/settings/repository/org.broad/tribble-1.90.1442.xml b/settings/repository/org.broad/tribble-1.91.1453.xml similarity index 76% rename from settings/repository/org.broad/tribble-1.90.1442.xml rename to settings/repository/org.broad/tribble-1.91.1453.xml index 01b944fe4..93c75edab 100644 --- a/settings/repository/org.broad/tribble-1.90.1442.xml +++ b/settings/repository/org.broad/tribble-1.91.1453.xml @@ -1,3 +1,3 @@ - + diff --git a/settings/repository/org.broadinstitute/variant-1.90.1446.jar b/settings/repository/org.broadinstitute/variant-1.91.1453.jar similarity index 80% rename from settings/repository/org.broadinstitute/variant-1.90.1446.jar rename to settings/repository/org.broadinstitute/variant-1.91.1453.jar index 9ca3c4f38aa4910227f0f704183965f3be8f1431..d339781e93ad1d8ced991b7219c3cff8009b1828 100644 GIT binary patch delta 43000 zcmb@vcVJaT6E{4wdv4Cj%?W9=5FiNp{)*dAT9 z6(n+qw#?YNDz--K<{4XCAK3W9zUztT`cuT2> zUN5)SsN8yKHM-9nIePNgv^LXbPMA) zBe_{Agj@gq=cR$aqOxBpG^1~J=xERmeZ;X`->fU+l)G1b;$?bulBG*K;WzUW}5 zhk97$O4S}cdE~5FuU>xY2ujy4%GGtWmp8s)N2i%h)XX*`jw{I<7g`sU&h?9qMzyfL zEGwd)e*aW!qEoF+&ofbyrBtYCso28$>Xslq%9EmLD^C_@ip^7lWce}EY@Qw@+Yii; zpIdcfufgVcZ@viad4H4-cv@j9Cyk;KCF4sQ@tHU_B)Z;GL1{HU~k zv-q~e@K)X?&SN%j4^kW6;n0pC_2!)p?-GP|OWwyF-XqBG6@u)uxhP0o`3Zsdq|N)~ z8y08>Y(5yI9=y@!r=;qqgVdJ`57~S;NCTw)0lePkBSOAs1l&=f*R$e0C(iTI<`*RO zMVns=(r7*wb+j59j~_{Y8Dr(j6-{wht~mUP!>>AgT)@00&gnZU-nNeiP(x`MV&0&p$--kNi`R&+^YPe2#x9lZxz)}slyy!G_2xVY$ez#F;uVe%bXPYyYlaL z5}E+Si~f`9GEX(}nLH>oH2r3)G?N`#_3h|Tt17LzOwp1m13A5{D(9;5D%s@#N4Z>z z<6LgZtz1<>rMRl1suWs0zkH~4wS2B@@*mXA93bVYRFxK5TdkinibuODT~#rwwptZK zNg3@EtEy_Qs;V+vRh_%KDpOSnP0AP@Q^QqRD%({#s$A$~MsuB`YKF>Je=xe1ORuQf zOj)5BxfMe1S5M+PCbyQ85sJ%<=DH?-zEycma#S4WhB{={tb)qwsrs&Jpc=ZW5xwfF z>r`Wq%~ee#rm1RXHk`CGOxk=qKIp3Ef7SXDWRMb|rGx^C@na~$CGM)CSgXHQMiqC4E-br2X7*IQLK{XW0f*kIkE{BsexWamS5@T* z=Pcb?!1s660M(Ewx%Y(GV`rsJm^yA++JsqYQ$|i6H*MyWv16L1xoV(l=Bhzzu*=m@ zISLK2RlKW)s$rqS*;Qi8so|~~p{lrQq#7I=dAdvmH3}gv;BHJ-kNn=fT{Rk|)fiO} zTD&?bR6D11!vgMVtFf*cr>@6na@BY>!B!JpHAzjj)f87vRnu&h;Hv3rhO1_(SuPfm zJu)N&{pis2&q`zDl;iR)m!&;|)9lc*IoCOJ)LaSNU^b*gwN8QWMpxa0&Z76E`{t>e zZ8hIj3%E~cXw7n8-5jWixwFJoONA(v&D}M1a#6Kf_a|_V zqQfo!V!ehkLI?7u=&4jxR4xBy#X9sxn{p0)c8Owa`V4{cRjex%O}0(H+pKI%L&E&F zh*~3^DN#m#sD0b*Y=?eno0R3y9?a=zWU??{%VUNUl_KUC{JMjQXK^gbV4@V&YB!1t z<2iv8Wz*xFh=Q6L(I!q35Y(I==F;#5s5Kcb178$%qLo4)vZ)6x7Yafmcz{|7;eg_P zs!FHmBMNddDs@r4pH=~BpCTrc^hHXwIpqqCr)aM|T`D5T-Vf0w>S}W(n=Ahd{WLJD z2>J@>&*D^0^IL;p`Yf)3bd1f=zz)-@RRa{|)fP?aB9#Nl0$0HyBgSv&#}1#D%S1+- zYEwn7K`)~j*HIE@adzmdj&%y9*%}MPOroa{H1uxgqH+xcd8$sCETy6-7tLBgMkS=MWAJY_d`h8~wWJq!|zD#w!ftX@*^D7ijR-|=E!xB+a z{nhrAG^Yhn0(Zv1I1aRb0sW&WlwYu}Pad~HJ&h@g+j2V)wF{Ny_S^wD6I%QRy@@Y; z+!3QoCcr3qpFW86Gsf>_8f0@Pn>*XwP;SrmSRcA&1{xOa>bYM#BEz4&c;fHS zq`qw-WAEv^t8#+GwSRo~A63w&@|z!;ajZpX$LvxOu|5?4NNVWZ?6Y3sjqPg93XMP3 zK2&pVSq$r^*8Wsf_l9+jmsa%ZzSE(hYulUdZJfB!;bV3rL)zjseulK4ZhI(nWceV} zoP5V7e*^GmQq0};VlaDSLQ{|BnPzQ^ix#Ed-5|z;`$31ZYXEl$iaoVxuG2Pn4Hpp<$b&L*ZlCh!kzyw7g9q>YeFLqjYQmYht>tXxQTQAco5hFl85=e zmm`>)oaK{31K#e1ygS}L;^#g6$L=RWGd}K#xaU6}p5&EJdv(D5q3m;Q5I6N)16>Tn39bBH;@F8IXLFXlI-TBuo597{J%YuzDl^O>^bYCaX)`t4w78a*kUUK!qZSPb>az!tBts}_>$xnudM)L%} zU%$+)wcNbnaz0AN$8z}u54{bUkH(t1l`*C3hv!u0xjxlDx%G|Ly4E1-TEp8@na_t< zrA&<~j9nFP3DOW*;&c!L4PR_IEmjT(VmOLz3D`k)I9h_i7>;3A9EhC|G~;ZJkD*eW zAc=`V8puh})SpWS;gk`lY*4s)3xj3m#OI1r4`Wky zRt>~yC^3y}zRu>xg;H-5LA)tAsl&~swiZEZF6H`jOG#-JO+z_PeEHFET07iEavV zNj?38U_1kD27?`J5=wFD@RwCNMcYHrJ2npuTbcZjwy~;JWNN%RS7nCZ$mVa@uI)F< z9IDAT@dR^aY#_z-ti_|PlK^abUa{7MyVd506~~z$>+&%@g{GQcDo2$yujO*3v>`4J z> zG?6B_^awrZLI!!z<|!^uMKn(n0Mo@cgH2wtRn~l4k1M#7XfhMmbip5I8=e}dID+T! zT$gVEP&z`-xO^kuJQ0Fv?YW}?dlE0jE@|S(xg+aL zm{zC#gvnziX_O>k$L`V;zRl%jJkG3pD3FpU;oDtaPE%dJgU2ydGI>LyDw%_8qsl_2 zACE@x3choVWoMd{2HdLR9KOq?x9Dw`@8)}4UP&iiUL}a!OKZ&B1{k+2yFk1jvR54m)nF1Y-#P;MQsb@>rqZ&H_AHL<7L z;POVGifv3ztV^eOlg*F1^bv11iyzcwLUns*m=TrP4j*jDqXL8b(-5o>QIt$0Xe5^D zQLoM9cqYb|tV;Z%BicUyToubX5R$QrY zBD!)4`bvggHVU=D_Gbs}ME1JW!W(dkvaq4rMY}yjX42!3DajjV{=k-)y@-((BvX`I z`Vy70X&=VcMS=iYbb*2xVUYp$gx6OPUL(UGMMl_@*L0kaZ9jztWiu_08+l|ag&=_{ zWk8oSv{8`thgk+40J4PG03C$n${reMYne!ePyc&FV-z2n1+7G6P@{UO63jC~@q!4& z%YowA>&QSSQ?~up(X-LN!IP((%lX`@ zuz0R_p%{9eUO)@`BhQQU5`2R^-!b@_V6XZzy@D`-ZM_Pg43qxYnCy(SwJcc|sWRw% zk*eDCT5p?P$Bz4rk~wh|lQ+>$@MOV+$Q){oUBYw|+lEs-Tp9pw61I@|cQL-?Q<3uV&c^p=Rvc6`(@2 z(#>^?>~6J=n5}KF_D?szw&6?k(u3ryBGFdOXZRS2Pq!vJInj*r+yDmK=0goWsfD?!15N1M(AMT z`q-m4fP$$J#N+FzCO4sm+>}~E36RGvs0+8Gew;`1I3G&z)>O!CJ&a`)?gIV8FCd*o z{lG7O_10tHHIaVv7GHSjcla1nWjOr-YAX!XzVsh&kuF|_{j@(z(*8sr3C$}|RC%ag zKymy}xG?s5%PgY@^m7_!7%jL;dJ(20=w z5=4T_zMKNGG(odu@+?TA{`+ab17urU$A=7~YTLVr zp!hvNg9>tvw9qXpRoqDBb1KzqY9(9yDY3!<8rD$+PYvZK9i|W6(dZhAt>WBnMoMSe!d}K?|#1FU_m}J8i`Ts=c|bs zb_HYKAIsG!3Tx;UbQpGF!jg#oqU6hz(9#QC0eJaQh!>a9b#lhSG!$Hw;+=o02x2cU zx)od%oE2NrEVHgF+vY%LInN8f)R{AEIr=kadvbG6NAO;cZ@afoB9hj9=ooxD3931Rm{EF{yhj6A-hAv^`Nf;+O*$irDi}oj5w#-_- z7Pig~<1i;XvT;Tk4oWZvduNV~5j}7RapNU2K^$zI%W#tTuyrmY6_=6ya~bTP5m(OP z^72h~xPrqe4p#&_g2vY7)bO$Ec??^>+SJdCT#cPoy9wMcylw)oW&R}GcoIh|J{4{~ zg*z!u3*R}7b5w;FT?~!^E)5iCkT`>>p9^lf)Vw`|J4dCv5XqOC^qJgT-wMx&ncO4> zM*&s2noGAMLJn@s(U~|%$YAqDKUF%(rAb^3o4HIE0`LPaXVSw4D&wbh2G=mhXK_I+ z#C{jD|2(O2wW&6ns}=5|Ew0FmHfOn<%{eX^S`8pLqNoYYEMf17>h6?ct55;W5tJoE zZP{c#01wEI{m2Hwp(o-ta2+6t>LDX8wYjd#xw7$3bHO!NyNr#$OZUSEaTBj| z{CGsg%#%zD%ysj*evF?KEt?uMXFeCkETR!!-j=l3T$#@mw9vfb0v@b|;PY^z;Sv1p z1w5z5ZJ5I{t(!~l;46!9ECrm4l!CoZg3gnwZl~pD>OyX*Z>KxV%0*nxytI(BVpd>x z(; z?Oi1mbcE+#^U)%}ypcjCd9mlY&-7o+U3j(Gw3wUXbY->qd@*;`=uT6231UEXGi3>P zM~=f1#uzadmhj}92ZhSuY%_(z529V&@VNrquZ=?xqr|LQ3i6=q%(qLqpS=vR7zC!> zEu3$!$F_%QidnfiP}Xd^h0`>8)4X&G7nI!uL7P3|L8guA2*{ASx0o8Y^6)xa(OeNl zt6+g0fv=)8Y&Jxc7Ggskb_r~kFMwPugUsx|6$*+?rsi#2S!0NquD5ajayvjrzd#h} z3eqi%@wePOeH*ul+2M7fMC>%t%Qz1aBaqJ6fNU{+mO&e|8~Y8}aT6UhdzNv#!pdNK z#P3rD{s3$Ihg6ME;RNAhs?DEJJ^oCj3h*8aoIi+_as=pS1MQXAv`AAL&^{`{SA0)+ zm^}%ng3!zVT)#$q`_igCse~iM^cCC@E78OiyrcpaKXZ07&(pFC zoV0~2>9O=;c-aW8QAfHKlk&kne^+-*l16CG(RmsYBn=@nF2 z^TvK}G5mNx4onJT_%7}iBPyRD-N-A&w@Q5XiW9o#M#GJKpUtb~!h~Gt2vRS(($S08 z+WdfAoOn>2ha9=mfrAWLp`h}4M1ZUpXM;E!#n~iPJQ_q#-x=i1eMK=_o7>pD)#hzM z8Y^1IvHX}L_dCWiE_aOO@s4-D!{OaApvU7jZg=oZ-s|u_NA7lv1}9Sl6+@?ZR-dHcL|U3l>u{A3ia7F>6NTbF}; zhYlHlE*XfyHVAv@!H^CIh3`DU>onhQa!&G2-HS?_QztoB523HZ!BhOP6MX{1Lgcb0 z=I1ZDKBU|Z5PLJe;>PWoKm@-NDM*93EZaKq{etnMuvN;0fGWzN@?cB9!Dm6(_BY9X z+V6f^c;i=mIEs6k5#OVJ%p4Q<1DDZ*)u?dQA9$DIN6gzlam%E#f`odIDyoO5x_X#u zsE5Lp&+=sU1f547?LM<#Hj(_cGCeD(hUTrGxpe3HJj@m`ghdQtdPkgh#epEgbRrrK zL=mPB#P?w|b)b_$>_Z@lV814DkVQ_>r|_AqbDUi$dA}9syC8i}KZx_AI6q0;S@A(6 zVLB(iU&IHo=?gWW^WhFpTL3z(i(KS)hEPD;ebZ~`ZalO#@Q2ON6l&B{9kxufZqzA{*MSC>1IYFYe=~Bc}@&)mGIEZ{FK6Z zpMmcG=L(Q2Y|e1Gx(E+*2q*vDsfWuoILl-x)ud;(%Q>PXt_3LpvkL2ci0+fi5OrPZ zBkOvu`1*+t2d}aUZ;`bgzJcOvh_cW^-05;7zRrbwJlLG8r)!z@d)Y;J4mY;BiOWs7 z84gxmZow^GZpHO+0^xEVmKJX9avN@IGX$V^s9jD-O{!MqHI)A5*`2D4(}{7%L)d2O zPj=b1a~PLAx^RJwizK)^ffC(C6zRb|!S_)~FPG&e3HQY!#{FG}qTdBuTv>d}Le3Y> zUacD9fN`k#<*c1%7JeU9Azal`UzILD`wHd(*2>v7SY+{+>19(nS+&rwV4Xdhph}yW z$*OkrtJuCVwn@j$W65e%i`S$iy-v&U6{XG__+9SlMBntbTr#E_V!!tIzfTrx48Ht{ z=~zL{1_M1@L2XgqLDL64lWLhsl~g~(K37S-Tf>Vz{^77r=Hd2AnC(yP)!={^+y0^d zoHsoRRA0nyDNqmiXJAto@80nT7P~*po@0SrGvay`Z|?4;Dk0O(Ug}TV+dC{=U32DJ%dy?BwZP#o~+GZnJ1U4QK?nXJYsBW`9^Fc=izvFKDOx#aK5vU+HqmH-W}>O z<_+fK6{=}mqR2M98Y}A>tg6PO->IJFx6CJZs);E+plvwH+KwIe4xGvC#NE4H0J1wg z>@IavDUVZpf8oFvb*dA|?{&f{YMD=ZII)Gr_Z;OuVqbbG8d~^c;=C-*D?vD~igO$q zE!ow08D5W(BlZ~4VTtpOP4C4(Z-2rTrV>XVII_h#NvFK6%}M$=2&IR=uQ@6E8t7}V zrI8$`gN1a4zINyvN7&2B2zzmaxttW{az>cTNntLdy#0;@es$~+p#EXhviyqwFOXYI&I;du_=5br!k0e7}-h8m<9M?13L z=_`Dt8M`*e2BDoOB{|~6!HMB$oEXBv;X5`xqqwvL%7}yg&L}PyO*qet@i|O;P6<-` z$Z=u!$Qj`o?~KslDmGV*rpB>yHh4zP277aL0h4KS4Mz?K2XS?WvmMS6xHZM8B@PbC z>R^XpdRDR8g;Uq5J^?G2X@c47=<;U$dez;0x=DR*OCFo+g^xd~eq#GQV2eY`@QYj2 zT&v=Ay4^o3!a>Ve7pE)NyCUejkR@k$oFHW)O~SFGxACw!7YvAS6okD77>-G(WHm5H zH>s3>3(4(S)AMiLO3H!}QJ_u7Ufppir}a)(J9fg9z$*F-?9tBU z*20+ZZ79pU4njd?)H6SW>b!%N5k33`6?NViHLgaU`Az0@UjyzoO zJ9N|{9%1uHS1w_=JVrVqkH<14nHdYCl2Toq0-^{+2*<3YF_-)goC}kv`=t(h$+`3tXH{a z!Q(2^+}vNqm~P3IW7@Q_+L_vCt;%70kID+P80B&4(cn#qGLpcY6J;>rt0G5q3P*I4 zz?>(0anqn5U>RgcpJ}E%sj_*Nx&KL(S~wf&vNMo_3oNAgith&OM@6`j1HrKKN@}>-gFj#3BgYUx)c88tIW;*i>62 zgGiZ%2dIn>$B$^VmdfTNUgrlI|0}V{L27E=I-rs}HtPnwnlt33YQ872#eQn}0LmiR ziUn(=7Psbk5F>A3PMGpR2^kiaH!NmSMU#6_H7xuH^4Yf--9KQD_&1HjA=W6a1LoX` z#_$kqPR7wVzKO2qbu^xzqzU{oP2}S=4g201UjK^L`XlH(meU>R-{TlwUjHJ0HZ~7; zk>(YY7wPkK-VZ&7%o6mK-}{B$0LQ^vq=2X7@UpFpbpH{GPrd@(0_3PwHjaVuz1OCY z=pTxspaGI7{4X9TsaG&oij)7}N)!T6x&N!2|MwXiUZeyJ*XG!?U%=>P;el{vo;JW& z=6MjE9DR^lb0H4Zgo(&Bit>_f@W|>b1A>t$!oorSH;6X;Ul11jPY7VY5eThNpD=|O zAW;~dbCBAq|AKLg2g5(`j47F@F*r5915j9aX9SuY%L_-zlP3AV)>7q2pq;`nO}H93 zO6~XL9HrQ<*)_w|!K)vW5V<4)KzyF%$$*&NH86r|VcS^`<4Aak&${wT(8-ISQwyhv zE5Kir$1gN-4d8~g1rh&)=uj_7RX)vI`b1y-KS<`YnnwW2Ka>|x8&FIdn=ef|NFB|Z zL#j$4@IMCDD^1#Z-yCO`3!PBnBNAl$c-2V8*ogxsiaX=z!-%=D#(;IyvZVi zBtOznVS=t)5RSvBswSs2K584`6U|wnH@0xnWP7;X2n5tIva`d~Ip+!LBEf@H;L*6N zAeU1Usd_8fHTP531Jo_TDk#4zB6=kD+)usMf{98_eNA$er>XbA#6E$;)OVl;;C?u) zkpjKT9-#hV8t}|skSYe(q?=%Kh>kQbA67Xf>s>|Y!l2Uw%1v=Dx&n6wWC-3^(w8@S z1n@b2CyI)>f@2rVYfSlVHZBzSb9h@s-&-8(PyB5-d%+^%XBMU8cJFk0`voXYlY`!Y zxjzu(#2f1bHNh}#YE~Rk$%W0jWnarRu@Dq9&JV6KOMg5*i=(6Gs3DXbjiCxFUfWW! z`gwc~T~sr|=R`>&d`^CvK#M3GPu4aWzkdGRUQacldUr!9Se)%~WRt%37ChHm@Nyzu zjVrh!v%sBcdOicZz1eat+c;J-mB8xN!C5l;~LzXNspzd=FM`ZTn!e#@EyDWOtSHqM}jUu9E1mnv7%D9}Hu zq?4n-J;9X$2%D4$pe`V{Fsj&WVYfBGUiZ_8{|(RQs<9>DVtsH@7~}m8PZtca{T~=g z{et4i{|(X~EB}QdF!G0nTt1P`e-UTHaxSe=)w|HIM^iKGuJ4SB#?JX9x5-QXr)qg! z@cduP0z}IKX3TR6H3~IH$z%g1iS=te9gw)FptiLmzGz!RxzJpF zXzQ3DwN!7#&@lxiX-)0M6sk))Rx*_mCr+IBXda*w#7UH3k~pO$2wi4@hPt7qhC;JI zL!nuqlf|hZPKsoyD85Rzt_;0`PL)8KIO&pBMVzV+0fUih~=< z=^9Fibe#=HL4lmEp@>LVP#~vkXd}`!v=QmLwm5afsVh#dIQ48@-_{L+G+BMU>+b4$=Yz%~EgO zMq*;cX)C$fiLX5%dElg%n(7YXbQA~onhSJi&|PT~zo^yd75Yz+pAaMpua7a)O)x#v;aPf_h zxRKHDjS^?HIAg>aE6zAcyq{NXF@6p)txWk=VA5Uh3#u9DS=4yiI1nZ?;|*&3*OlL3-P7;Bra3!_g}ojkBKu zT^I97-(~B&p;4FqzsJ!lgEU#Ma`e594%zxXXaM~Yu-egUB*{4X{z6Bub@T&{e$dem zIr?EouXFSxj$SW~+aQ29I(n0%A9eI*32hN)s}Ly7(c2vTSWs`*J0vpO(L3dv6V$u( z?x22L?-2+3v3=60-i|JE^b;~SLD{o1#-T&4r*S~jQvYIWOuIb& zn>fFV^9Qc{7wG>8P5!j?UqNo~7d-E1kmqkf=Ax}H1@&cp#kRoix*=WLFdO6oOGTre zpD{Qr#8|c!FmInzr8z46^*Plms^TTk#Dhs0xHeS7O#e#_Ht)vkbsS}8Cg|54%LN~@t=MqqME#%+K4)8{Fsp1UF8p&D-BR&) z;o9YOW8L-xe&6L2aMXTr!hFe92gE6*;#^flRdxAu{=yY|ZI{2|)1r)n$-xp{>hf)T zHyqW$Y){c+%EY@og{O+$z;rPgnC@B$R-&m|QP1Th^FT$NQgewyA9x(yA(` z<631kh|1TkH!l1eor-q&sF z0E=a=RarsHxu3&;Z$UA5qm|}b=@w3Zw!yaEs_I(RtPBb470Zdv1Fh;(O{SG7N2ljq zbps53`58XyS~YmKYh}UVXOV@Umwee0{9A&@g)(JSS(jgw;4yJtk}NsMq7qe{m=cV( zY9brIB8jg`Vl9DLTY$VP!8$_nkA$8dOO{W>`Ba?G#Q9RF{WX8%@^}2bI6t^z&F+dd zyUV}wZ!Z7Nf0(0{_2{N`tz6ftYt<98TEM(0)n4W+u2tV^;KD4(a;=6|BUeSK7*{2r zL#+e_+FfTg4%Mw%uFz^?HFd3KR&y6N;K{Dl0`#z2s#Mo%1+#8Au;s2IguTtmb5(sQ z-%K@ft$bhyUlZ4AjaH~;s)cK{5!`RH+DfpsYqb*->&l2j5WZ%7$+p_NsIY@;b(H9~ z{9Gs7>g1};7Ea!tvAVccfe=?rudS|DH`nSeP7iT@ z19eKeo*O$zr6N~8s%v85$T3|{)kCbI;gMA^i1l=g^tWeOwL<5TOL;OM=rN3McGWDw zWe!s7RMT^HU2C}6R!z^YaFCy4ifi4sL({YlW2a7=GPYpaXbj7`)`)PU44ur*NFhWN z(C=RzE7FYu)CRjN0|a+13~dj0S9?`K*Q zO|2Tbg`ULEnJG1Nx5jq~)VtL^uG*{iVY*tAUA0oJf^Ca6g(<ws~CfAzECq;E%&a$Rq?ujA%b*gZ+Y; zmuuYy;-Oi~%fc=B;e~Na9}ggv`<9u62h@=M{Wf4EFPKaVosV zx=ZHw-L5*Qo)VDvFqOXAvVsXyCygCbFku!ZlZ!Gz4S`TD*Q+j$4}Hq|MEcS4CO zs)sIOw1lttcJV}Ps>HV&s#e7h!0+!eC2cYQiJeF+Y24Mt@jGsgY;YpAh)J$+wU94n6ERGpUVUohizD;yZA7cqbOU zyQl`=O`Z837{9HAB6t;yvF^oTO30+P)mi!gA2eOs>N-Q80^ZuI+28Tq0%NeJQ2`8r z!1xcL3TZ(NioPr+yc>mPmna=ilz7C0XN%6m)vbi{qA69SB8@u&jKQ4E2W@pf4Yq9R zw$nW{ShIM_nzik8Q~d%$@3+%^H5juW-L^fV!JN(F_Ii{Ct2SS^*Q52z@Dy~=gEjcG zpYdo1-82S_8nu7`^oj{|)ZGicvt?C{f+~aJRCP*HnN&;FpzBl?HBmX(%hU#W{PT=j zI5s%Wuc2!z;Pw}uj=@$oj%xAiJlE^cmVPIfbcvsaqkNAXailMaA^e*lLS?iZ(Qh%3 zd#fXw^L8e?zu1el}=x7|*qMO~x80 z>G}Uin(8HC<&&f`=Amx7=0DO$c?5T%;55$c;JOS6?3ksm7-XT{ivt=^+hq*%0c?c-U#+ z)}v0MWL=t4bUA9G%TrsOOzm|Fb<&l*i7f|NO(<4r^Ho2cS&#zc#4I9TRfO*7B3nv?3d*kDYWAg|e zZGZ;^ar65yy`aK6Oz(A903XePE*uVnW2tsXUojkbLNW)nWrf^iz8bE3M(}(GM@R2c zf<6I##QV?~eE?ifdU%Q+xh5>61kb@Epmy7Q0k;8IbwvNAjnoy)#u0d6u2XU7tb{uI zp-#F>arA8?5sj^ZU*sJL;Rz;VjnadgboW>&OnOxh_!Lkpt$KnO{_OO-PGTk_#vhiw zP(L1OGWUmF8HGi0pJ^+xZ?u{vecFyWcN?^!k68wDGvp&B9hdGkV~ zuvk3BGz<)?I9IV0mQ1#DS>6}|huVnYn~CvRrtU@DBLy?kd#mah?Jy>jI7% zm1qk5DTPbs4JYWr`kpXnEhEdiohn+(DbKotI#?^Hn{_8muMN5^LHBRz-}`~4Bsd`_`O*|WY zT7~KZ*;&(cMlvvianlxgMoP^)PjNsGqO)vg_fFHdr;9)(Kh3-m;dbtI@VvbVb;RFe&S5id_Hj;sRIW-~;f*0KRU9 z{(AVeQ2ze`9#l|iMMl~W;jd=uXx80uJ?-aNIyrSlpfSx3G^6=}=Cml#f*uOAq|Jd= zv^~(8iUMs+>TI3Px0w9dIydH4Zx+YXx9T>tV75+aeydDak7ZP);AIe%-<0n6B#JHY zv;ciGV=s_{Z&I-^v<$7iT`f27&DPWE`CCowPfA!*wz@;DkhwQ0d`iy0nnqbq0) ze8J-OIS?$MJ}~>{=q{-OMM}hbWtHcXvbeb=N`-L6x%y^qcrV6%kk-=}6@o7YC;IQJ z`!Fly?X#n5wOWG>;7EF08OZw4xEJ=Nx?eqDUb{hO6becJ%Vj;3U^~n)>OnRUQB;-? zKl?+F#*mBG@Iyez&-GA5JZ&V?qr5F|Zh%5{Yz5=AM=nu;t=8G+Jnu#=<#+D)-O;E z*;pou-X;9#JiUq=-hy7VaNt^B?N*P&2OHHAT>mP;^)Ii*g6m&`tC=`oSB~2QK+rAu zdmj(jI$qBqhnSs}-e&7vi6nK-G2j247kBHNmU}7Q-xC?Yj{oL$MFN#G}{5I9TefuE^r;1`o! zs5>QX7JKx_q7B7{I)okLZH4+_K5VKj(wQ}nfX_k61}(ue7)SmLOCT%uGU4f^Ei#2= znMJ{)P)psgNVhK8+C8h5nim%78LUmC#X1wH4OJsg%Yo?vHtAs|BscM=n)$;OG`yh1|`G-#d#xeCGsPo zH^q5NoVOjZ1J0s%qhT@)+nX$UUmV!pWQpxf7M&F5lr82rAJZoi_%ukL(dW^)0EMIN zkLgRBzH)F|9)j1IXzG|kU*q8>`UYNlLww(g?|t!oXFge~lYJB6EV0MQiWmuJ(QkNm zF9zoE{|VyK>!0F-o%{J9=E~m^_)_x1NZ7N!$&y?1Ik-H~!1xEvEylNGs_ZVl~Sz?=$CAa9a7zf*NVmF+{7}1G9R<5L% z4nKN}E+2puFwTIEF4y-fs|>IT55H4GQ;s8sa^|u7_2^1?XA?#{F782$!MlYnSL8}A z-hIR=M;dH&yi3kK*6RMbRa}^7uYfJBq*p68&S9A3h@Ea`i9rr5Yhbv?)W!5%iyhbx zYjyLeH7*UIN991Jt(ozFPKht$a&57}886nkbvajTW|AN););5GZXh)+wPBgdo_S7h zS|&Lgdl%@-Y$OP1zHyFs6<(5B`nuNm2X&yP+=i#2@&a0_i`RWyyRcB(;o>gSdKVX8 zj@aA=Uv3MtA-vj(S4UfMyD;8RI_1{7+opSDgJRjV&?bcL#x;A-a}OkkI{IGBMi$#6^X@j?&X#MJ!|}vZ>SH*kl?#;j(^^yXnC=<-0Chu9u5r(% z2Tj6uT_#DMI0G<+89Mf%fp$b7n2 zUvJAjCv=u6*r$i`GiLifT`B#jcYO*PnPQUYS@i5IdTthO$gyWE`t3g5oL@2-Mc{ic zPyu2Y7bRadGmG?;ERRLetGMPa%QBSGnA2nbed8y!%Lm(x^MoFYn>tgU(C;hH-00ZG z!nF7%kWx&~u^c!ce$S5I;RL+iB=NXpk_2-W827_` zzb}`Kk>~Gh#$Ca9b|j{P#H7T?+!LoVZ1^QQO`LSeRV9ddhdU+lu(2zIn_=U*JL>6~ zv`8l0CrOnT?|QCoU9xWgnKBpfcrJR=0HDpFwvZY%Lfn@2f3I!MFiWxu{W#^JFN zk82@#uTFZ*5p=;ta3^HEG{+L(gf}15)dINP-l)n}Sj}L{ zH#6*HZsYrHd2Q|ie$eHI_+jsfyMnP+d9(aoJv$u6(Q19Zr}(O(ZLzNhO&ZIakW zO`rF4FU;!~J*&s~&5YL@Fw0%u%G*FQ&tAuQjInNP2a0FkgL-8L@4Ra1h*P6Pn$M*>_}V!*n>x=BBnHP0%5Nd_(FX9WR+?2&gjav z_-xv0Rw8gUx<(}W3N|ePo7UTSqQa()SR4|(Tloq+IU2*{G4t0MJs@>E?i0&TRowa% zo1FkI8Q+5Eb>yw8C3rw!+Sj_LMke$8*Sd+l3wAtsJiuV+O|88L_cP@u7mJ@T<|s%f zPvAPP!crrT^uf#}-lqMP;T6XG$T#|}0S966Aon;|5NubaFnovLW12*1*qAIs>E(zO z<(D6GBmyrE&roF5y(F(>ZNPF_KGK+HJm^Qwv2XRju1~?PKz^zL<{Ch7um^t@&=nr8 zDvisZPor*u?w{^e0}juZ%avdqj z^$v0g^{U98u(VC@*0AZ(%Ty1BnqF^j$$)wIiNC0(h~YlE@`MzgrZM$@!2PC|U|T7V zaA0_wRX^zbj6?KMq+(y67z8RGBgQZLiI??L^XCt`PZ`LqNRYP+1&Ry+jCnKSN8P$k zaZ(IQe}tssE|SaL0B`3Rr-xxFVE7c|1iveN3?=O^4<|{R zeJzF%h&TQN!}Dk~q;}KvXMM+{i)gC+_#UcnqZ3K(Ol|bSuq4VjYWbv9G z6zhqKHf=5w=}2a;=_0&DsI6x)UXO-r;S4-r>YUS~8$ktu)yak(p9qQy)lI?69?@84 zOVR89Ho$)U*t3_KVyZSZzg&b^?f#C ztexR=m-OGhXjF6L(eJbb-tVpFZL(e<$_hb9F&*}doW!0>yiRu%U8 z=Fv@Q4b9~=OHAMUq+82ly{EB0uWc20T{H8QrGsNYsF-3x<>CU-{5qC&jXdKW+ST)U z^I8kb4X> z-oY}<<5`#~>pLb=PYn{NHA zF@RXq&l(c#RUM4Ib;jx-k^B}&)h*1T5R6*Kj<6Dt@rDtW33?gF9QfJ6m2Y{eg1TOl z@!WP?ZJaR4itSjRrbSaL$`fPv{2%d}NY!gl%|x+0@%C9$+mpKi*^A6J9+P*X67}HuW{%JAqWMeVC~I zWtWJ3XCe)jtt#G_8b-ritV#*;*Eg_H9YG^)8s+VhU0BSHv#F0u*VA|aa%qATfH|%U zbAu^nWPYH)EM119ly2j!4yM)&Ya0G`%BSP3XTn2fS~FEn71?BBV_C(P=Pav__GF~gMyMiQW2fW{8% zd^T3Wnouv+p!zUms|{)qEFQ{%%gfJH*n-vd_}EnBt%o`!u0E{8Fzn%L2(Bh(y^~-o z)(ER+Dq>|(@}V`RsFC2f^1y8Xe>;LjeI`%(HLVWSUjhs%#MG0au~1ler70~Ti}-x3 z6go@(D#bv2|Lgs^{oVSu~{OMflKs2s!m_^oz$h!=up^-a7 znfx{OF5f_H@-6@x^|ajG`W3}W@e%N%oTnk(--+~ClS^1Vr%c@Ety z>GK_0Am4=&iF@Nwv?v<8?Ii+WDPH`cTO{>X3En0(EW_U>qUGY;A*rKQi1*GQ>=to3 zd@atD*#mP4oBWm46+{KraaMqanPHqZiD{Qi0b!z|FT6H8i{90GuV)kmrH>0ymfTsiD1-fFpKZy+^%@UZ~2IwETfd?wt< zyuCdz70C%N1O|rJ?g%vT8=Z2~s^jL=?trkN4vz;G`>}0*%#Sv2?F-CAY@4FMDBsd- z&5DaF%&r3hCS37g;0M2XWk#JlW#W!_u?vp`D*Aa_muhlUs_bt{y%q?XCXaiqLgn~F zbAnR4CQo!SJB|f}DaO4VSZ8}}ZBn;rWpx}U$!|*`N&~ZQk`rsLyoocd`sVJfcKdMG zw*p`KU6XtL;j9giPUP2H#T%GK@8gNRw8>7CS$RT=gzrBQ*yQKV+OXxR%2X>{wkP*J=i$F!6kUxKranL;cWndOcC43e5 zQu(EVrF)pa&jgkrcH!57H~qE@?_O)%?+HZm>rI0O=B@7ossA25&zueXC&3f8KD={D z&w<^D26i)h2Z>ag3#sYy>_BwkKP5adyE-k;ubtto(eOdN@tFJ_jO`NGVe?LKR^BCn z-Qqkh&K`&N26-PBN#F^apOnA(wqF9_pqx~R^OS&vj?H4&!yJ}Bpl8V+&^yhAN5y?s zoae-O-sTsI|9W0!-sSMIAXVm<#d#%2l^Cz4p627W{KdQs`HOiO{D#AC#^AE&+cEqO zzbnprw)~mAn*4r{Klp$8`VROiitYd1Gs({6W)ng}LLj685d(xOh9VFk1Og-kq)6{Y zn)I##6cm9_B8(utC?E;~dc}e$2r4LIe=17n$5Rk2PkE^Pzi02}hK=v@&nGi?%Q@3$ z_RO4Sex=NR@eRD~ea+zCNLXiM-*^o|;;1%98CitdBLG&NV$f`40baX~$wpjI5UR@6pOwjxl@Cs0mAIT)q{6~6J*-be5~ zhv7pV`9&v=@;Li^9~{sX;GB657bnTQ%r(Vn?FH(}SNC&-^V+7=ouBpK;`fx)OHm(A ztgoVeKI0|xveojjnP=O*SYeir`qKcb*|D{o>=mu6DH^C~kfOnoa7KHOhH@gqBn|gL z$Y_M5k(~U4=#xSkq=G!S8fzCQX z(>R*0Xa>LQqG+a~S&C+()#zc)VzQ4Op+ZG-6wOsMPtl`_9t$(Sr1_W|w7_SEeMyf? zdP34de@LixMVLqY*Oax@OgbUyNz5b67DpNjuXitPtIyV9uDJe%pxGow(mHoA2tlb2e`MD9IuD*U$ z)YegzL^+*R!}R$J;;b-JJw+>FhY5W4K^3iZ7&}{3(V7zZv`woCwr_E~Vj7i;C;VCA zzMd1|G7wgoZV|k;;6w)Xw9PORUI^Ekkv6TP^)@t~H%h_@Y8Y*{X$u}hdIBCGx9a}! z*2IWy^sEgThKiiVc6~bDYLL2vK%m{C=Rw^0Bo#YK8fZzI1q5u`Lob!Ml7YL(y(Z** z_Sv-GoCAWXS~cKQv*~5psW(0>o5h-8&?lqaY>ujP*SuD6jOeFV9kmkueI@H!U)I{7 z*Xa$%X0UC-%pN+zrZ>5O2k9-=+IZWhclh=&9YOcl^e*2V<&qqu_iTEfUbX21aM=xT zQrOlM9p`r^=tG^JV5NkAL?_X)HhqjDv0}@;w3lA8=@geYBsV{&bKuOJKEYG-33H)M zpVDWx+0X26)93UB#)3^>(r$KU*e2;K23ib%GW0LbbcW8_bdCYfqebZgL%U|VfY!{Wi*(7R%Se1m6{}kJL-c>(qtX?O&N}sg{%;d} zg+<&5KhTdjv9jqZU9;&Yx-My%&1%|`ez93e+ou0G4ba@C-{}vVwX`MOvguFy3w($; zA9|)8n*ML(6p?84Dx}+JIfrEdYYA`J!sH$TKf^HV0yGDp=h4W5gs?@32$jOZ5aEv4 zCZsJC$|b^t&lcgr#-K;;A_5(V!hwf@caf;KD2H1SCCb~5!yGyzqNRYb{w5Di9HHe# zkh>*XT1~_@Q9-v!vNEh#Tf{+sU}}<8Gc}$*WW~bX|C6mCDxoTGCs{Fxl|_OrsyJ%i zwn(PkQl!|BkF1K;7u8tDuVZR;I%$hE^nkca)Ud_f=n#?4QB96&A*FuFR(x1(DKczP z2Q%`KWNS-AJyG8l4Y(N^f&u^b)sT!J!4d8sn_^{GFtva0Jyy0UX2MD3%|=$Lc!+H( z-)ki%zk-b!IH#boH{l3)Gmf;k;PiDXX6QB;;jDGiw#WuHi?`tzi`%YTaxDdLx)qHeSc!Mwb3+l^})7Q zdUCEbYyKC4ua&^Zsu}V(&MV*_S(-DW|1x5wIZInPXku6!xowb+$gxtR_`PAo0DSgs z?A)*u%CV9o16Z>q7&nZibFA?aGPT^bI;*|CqBfkO)TFff~8Zxjw0 zv&Xcrz11jmK0F;S(An*+HRj{``}S5?mAC-oB@tmG=Qz|~0k!;pWn zTwqN|wqV8r|1X?%Z1NxOU^NMJ+3k*2bMYkIqnmZYoG3DzfKq{lAeiNAgY@_+2IuPF zeV8fcQfNDYl7YCzavX!hDg``nuE13YGkS5(a|Nd`F=i}Id#>O}AkK_7SK>;d70Q{b zaHVh%-rm&6BMe<>7|gr)4;(=hp+kL~gN-bnMivg3PR_F`!Rb_!JgbgegKXh*N{`F4 z#>B3Jup|Ganc+?=5(I2F=%4beyTwKr%B-Ak>7>ng!zV`hW+JYHiG2}DXWjyLUtwQ) zGT-XlYAc?RxeYQu%pnQE*;1OBlvN2-7@7Cr12i6^U_wZk@e-t+dzY07`s&-(GSpV!6uGS)dN?K{G1>TCiQuRQ#tUO&PbfY*PIu)epP z4Nd>pN~cB+GYk%m^q^fZpWN|hkkAlqAwZ>md5ZB-7QNE(ioJxTrSXt-_5)Dx|Mcozkkkr!$vD^m~^(vIq8!|lm!AXyVU^M@Z@)@0Iv;!WFe^RCV`Ls8*OE<00UiXK+<2%a$R97%Iwf0H1!P?6^G)uVic zBd@_A#%KXO&Jh!0ik{>*i}=lAF3l1}MT(a4{W6Z0^TQPkQ|u#9JN#@F-)c!ugQA^I zIQAM$YxvPKe6?25x^P-g8#vnNGi`3{!L&)zW<^^VY%AYylk}|5SU}HlO}0zg;WHM} zPQH1buXZua3mm=ZGv?85E=3NfxJS`TiuNkn$FPO_xhGx@1MT8h1d|9h6-YV&lS9}p zAiCd2uhHv@-hkI~g46DUiVi{U6-i+Pp*!DJ^bTJhmQ1v~E9t1BV?JXCjphhD!5xHs z%ntg1%YB?7k>XQyf};<^=%Ye9$$j!MF6mUMS!e)%0ypRMse%m>+BeXZyl2KrXfX+_^D`d-l)esNa8`Gb1~8P2ix?#;C=sh*3>zFB2|g15-KUQfGxo&k;bXqRnyAC!pMJC);)m>E+wxsg4Y?5E=5f#YDrPs z|Is9?dZ?M<|7E(BDUxe)cZ)hw)Rm$h)+l^{AUFfk0xN)N%!RkdH2=8S)^rnQ8!kU= zy#U9=x03q#e}2Sz#*CSZnImC4hBix4A7d9wpnu~W3wlM>F#51Q=(h@SeiL+w98&f()FfVmM(bQYNB5qNU3_`B`ZuHdd&I;XEtl+TZ==E zKp-Gsbt3OQQ|WK!<9*g$l<8l+0H5m+?sc8{gtf{C(W9Tx*HSd{pLxQnOd+5uCq8E` zvP?6^Pm8SrGyGoLOl8~Jka=>XRb&g1$^OnAzaqM5i8UspF{B+tJsX0z&1`nEST8Y*$n;_Z*l*Y#tSv_L2MO$0sKupDKVQ#lYJ6P<7=E?GsjJ_J~9?}4@l9=7QICuhf)SwHAmj2+6t>-T3?YTML!#Y za08?m$i*LIL$|mYHkeR39|jUMTMW^&R#=-W4;8~~bBDRpHjBkDDTZ@RM%d8H%wy&D z>bg&{)i5fKYxtlTWsA`w&osdKTxr#4kei({{J~*kh9d7ewiqME+Au>f%!U*m8s((< zF>DOkV!W7Oi-}^AV>*}#-U<-ZY#|_pq{S2)>g6xkaO5%-n~Jvi*%(~5n8pn?UCgk> zOff1b(3AI+)fnVn`BvbyT^)(->W0x)MIR?M+ZGR_wCv5jDsCUKp+*OD#Dy?tEar&0 zwwNa#wZ&s7l$b9T=;kY}c`$_age?||C#6_qi^XDzKDpAW5?RERU&>nJkETdctq z^qRl5Vd`Y9UZ}16L>$HI_xNI+Sg+;N)-KRX`=7RY$8I#wpke#iVw2d63-sm|(X2Qs zy|Qk;+DZ@E%H2>f#f;U3tF5lF_t@B`poY(h?QpFscGzO4*;3Dmw-PZLTZ!kjT4Rk- zyTl8&co8qk*S1>d*Vb6sg)qGn*i)fDUcyzoE{HO)x7aK8*j9$)ogGQUT z@yutuM*|>Bl8LRvr__y=s14hUp$bqIN`z9`TgFHVF-L&Myagivp?clNR@*L?xep}w zLVAN>-5Vsg#xVs12|??!NjWGx!MJ<`Nbj*mc{hP|NC49Tet_12P@Ya7LT5S*X*ihV z(+y8qjo{zl!BbW)JSIbS57mOu;2Wo`RCre4d%`^*RoME+Q&tTbVN`bXpDTZ2O|2J& z;w!9}Y%UKL%m8bImG|Mrg{(*-*OsdmlE z6}iSL|LAKLn2F4p==Vv}%h!Q*ow4AC>{mWzPO&J%bOP|KZ=QXHCJDgzvg( zJt&qLKQAT69AY?GfpH^J1MyU4PvN(zOTUJ=Kvtej3w&im=JwmcC zf*BF=mw@!dhEES)y%K5|9KfXjwjOvw*3>7;J0j3Yg z!`0dM=?VGH(>L?wybveBx2I1&RWHmi>V>f*n!EL!ZZb+QI_ear_#D4NdsM8X2PUOGSs1Y_k2z9tFwe*~+V9gW^k<6NTV2C`FX5P}37Rsr~0lr-^ zL%*W(1DqQ)^tWFQgZ)ms9ICf(+7wJt4h^_n>gqGxF3IhsENp0iLeDTp zvtq~?j>bx3oH8B)H^vyR3>-D58xxf=Ng0!sF-0*eKiZfknUx=Dfc%bRetx7eQ!+n4 z+Q2b$I&<`+jYl}bz9)mDbmr}+8*`L_4Ntl;FC3=H9}9;xXV!vn<8k8&juuMeNo6bw z$7X&pln<1FjZLJnTp25{3pB>_b6B2!N*ODa!_WsiUqZ{^(baN@xkg`JE#D8Bh4=c+ zI;yJwi#5`ui0O_SttX|i7Qp)QT3KkW(<9c&p1$>1RqwMQv$j#cw@xlqo7mr3JELt0 z6{CyR%b40*Y#cIe186lHOylQ}jtwzNX8zl3?G~xq2At>G>^GNBL!n$^uy#j|u}4Bh zzzkYXs<=VkUw)IZ7YCLp!zVs87To@9ps+n4|FR8oOQ`v!|DJ6UYK|=OlJ%@i7gvl0 zdgQb6;SqxeLICGB)>8=04#Oqn#G4VkdKn`T9(1lo89ngJl5{C( z=E9VqM7N2T9M>)o}B>j;JKiwdMQ=a`e$=3_Ec zIjhU|W5*-)?vt1%4gJ5JltWDiZO-OypJjs+nq5Ayjx0Te`P|U|!YTP~IWt*LIxCAr zO{~-*=VYpwZVcDe&&lJ?Z25zxT&oD0ibM0jin#QWte~5mmynBd*tGru=Ou*cdG%g< zpmT_R@PdR~o&TK+GA71JBkk}@x6%_0gF~}m8ZkQQ4`(5({f8VEhp(|-8>t>~6pItf zsYKDwXyodu!vOXQBc(wl_ zPAv^^pUa<4|9(9<`0&L9T|raj#k_<{N|j^E1_T z(~Qx3QrR!=u;(xrUdR~ZU;TN+}3)MJc8yeu%%}4g~@Ef6L zwor|_aGT&R_=j@3e^JKOR!4A+1M9j49Zjl$b6Tu@Mp+C<7~O$!39dHdYW@G<6H34# zw05;sMct~Us_7h+jcchA%mThoDlUMW-3u*MV4VH>`(M5E+bva`Syb}2oDR)WF;0(u zuz7nuosy;E=p^YI7bp?P74?`b6`jIGjcU|hHN!;UkR8k#4y@;PwMxLt6|c7{*DhP= zMPaXh@iiKULtPK%PZ1@Uk-AwcH>0Uv%>C{PQWczX{p-)q zZ#|!67#!+*zze#Ug5N4hJ!kiezd`nnbWulz=A}*PdEV_iy92^1cuDUeNus(T=YdIv~1U8`ab)Ys%FHYd4`<9NKwO+p;hS zno?Z%vZX(4CqV(SurXMIN#Cx5#tXhj<%K?%T!N09@+rAM*(Hq;U6sMk} zru_EI|8BLsV?prZ7mwZ1YP#0K(Yj?0TCMr15|AXlBF8QBh2LJ>-w!i{1B*kf)e2{9v!2V9qWb!TNe1wVTE zt);U8&%ui`<<|ht#}&+S`n7g$vFdL*bpAMCIe6h)-n=7^r8lKcYVYRJ!Ft@+7*r<* zFU|vhm*VIt?cI5qeD$uHdRcoFC%zM=uHR18)OFwnSbvQKoi4wtaQi3v8Uy4i7odKw zib>^Jd#--Q!Z>^mIMnep0`z4#v2D)4|{4*wQB$r{)pps|BlLbQW@J3tKR{Awxi0(D7z8`JrGxCWrNbct#i%c zrcNClQmZ&>hoviYa%&LuGzs=b(^7B@q1Z(Mc)h5|^y%3p2<&7s4ilBbeIA6w=Us%2 z@HnS0l^~S*pTvOhAS`*sEm6V4iaksnCP8>8Bu4Iej3{9i{BKZ7X5q zBMkDucdM*lDo_v`I0=)g1@&;~x4SY~IzQhXtL)wF6ndIN8xQ8R#5*t#*Qsa2L9}i+<2& z96m+`L1>=a-MZcZ4>xkK<~P!@VI#kLUSg+>pcZ3 z$C-R=F<$>&fOTuuJ9>E+)mEo`is}vM;wHjYh*nR1uvNRXU0cFeb6(Cv1A8n5svoX=vy1EU;Cg(4rQ8{=!@Zfi);LCo- zAxOskZWbPc^qwq2E|%i3je5Pc1Se>d{^_#o^NxStfz!>MSf$ng3~c#I|F+KU`KZuy zyQ(JodR-xPHojTTkY_dB&(MS%x|YJDFUIV55$bJ$$HRg9g{8x~yAvg-#tT(mZ|(Q^ zU94q(RAVL(oZe&=8(q{LOKIv2p(pfIE%e3i?mEosHLH5y)tkS%uX^@S6`azQYSxsk z4&8KNp=YE1u^zFS%{HvI9K8MB;V&1jc~2GNbbL_n<{z+KOg+A*>gufFr5ZP7Q(UIc z_Eh&fNd=W`{*hFUkCSSti=n9FGzhEN=ocTr@Lm-ecoo`9#RL^=l6$F&v>`h1W*Xi& z=?4{S{*nIKxB%#t640QI%|AfzN(g}V>FuVeT)N?%9-)`t+ke_RfpIC#mjlRbKX(d$vt@xV?W?ZD>iNBX<9^5|}OV85Q~76fV>iO#HG zeT?%U7&&+s=^FD~*oOwF%FZ}*v^16vK!v6+ao-JCg6+q`ZpwBBHY;qrfX^@oFX_Bb z*Bz*O1Xc;?Z!`_Vw;JIAPwC!05Q0O8&{jG>1(N!62D@!us@hR@b)ItAMS!+PfUa9Y%nz27qg#Z8KG-wmu98m{FK6*w1l#PI75 z1we}mF2KoyAxTbdP6{lMuis{XXDc}X?R3NtcO0+;en7LYY{T`z8LU&1rNdN$Q%#o3 z?*{^cL!PJ9Rce>QN`1Q)#PYer9W0OBz6a|lZ}ISL0PiHMcDXqXo4>8?B>AY{mZaWap>d8q1E#NgjFwuR)UBbic0A}JusXm%e5pAM@MMs+uLR+h(7Sa?2?A>hT*2OhgSSM9 z2TBlLN0A~Aprv*{=r%2D5>!SO9K2YC2TQOzg;v(1S)*Y5!8=h|-Qa&WLG+P3Q5|K3 z*GIYaVkHEeMDl55nUx^nJr^rww5sKFqenHt(*|iDyRcAIs1*1*K`}wUH5w!IxzF8q zC5j1wEsaMP!IRiN-DQj#zoahy7pL?RB%)jvc@9s zi1VdT>3aWIcYal4ZH12*VDzO@fK=UdoJx1{^(ZcQ3is}{QiNpv`8fBB_Ek`Ep)Bo} zVd^duRg&IVV8!X%lS7p=Eu4=r3t7Qry}_ZQCzlJIgGlLm;}|y+7QgyD7N*1yyt86c zmpkB>^|h34K3vm`+-)jyCDt}{||r@S6sl&8E8+;Wn3SW{n4bccq+lDaVw z#oZa9&rU;e!F!6)S(98+$HScRB=;MRbzS(Hx*8THCwt~my@>NK4&HrgetZCaPl}Az zuT62`OSxBNclzBj1Mr2DRf3Z=^RTjbuYus*p!RJa08N<N+?O>ujU`CfY56g0(q_XplPoUdR}nd9{n)+r9&9c6w$hOem)ZLlJBxqi~p zHK)2g?O}*{+D{J&Ab~#?XgUv%$O9U`nnCL@%P6pzc(?{0(9^p7G8`ukS08cOYw{`AwIN9JzkFSrB$|onzI_NME zmQ6>dKOE)v)%Bh*hqfMgc`*o| zsL#xF%kN=Bc;ILK5rlWR5|-{f%l#@c-{C5NIe3?czhae69RfEw*W;Dc%VNR$*##`8 v>6tO156!|VpIOm`au^eFv)yl>haur9OZ6%)0vHtGdfIFiWu0rG0P_C;;b~Ov delta 42717 zcmb@vcVJaT^FKVZdv4Cjy(c6D2qd(G7E)-T_k>;ogc5p3R1iW(5FvorL6Ix0prVM1 z3QAM13IYlW0xA{|J1X{uU6Ato%sDp!eV*t2{_~>QvvqcMc6WAWcW3XbXVaFQNsBA! zsjVQ9Llm05?yk7HalzT^I&jzfQm!GQYwqgMyK_!lu;cXcV`h%OqF#r&tx_A+Z&ttI zs3wi;jcwSdUXv!x>oscBt4U77#;JAlrcRwSKDB$%xcV7!!MNZh$T5KYW_2x}U+c>! zT#K7|-x1u}LAXQQ?5yv@4{|5Y32&WQF??rid^n_D_mXz+AN5)IQ`M7mw%5}QbQ#G$ z>F(({t!<*#wz>1LN;Atx1>(bNV$zUjS4>;m%X7h0e(;kHM4xmpr_(qye3S#l^~X&b zGh@b~3(xOH<8su^L3)^rVrc_U5ofB+(}HCAG1G0H5hU9W%#`F=;>;H3T5;wC;mo!9 zx*)}Kkz}1G&h-*KUz`Qv6ieztaTeKJyx8U&99|NnWWLenn}U2Z-{SDClJPbP+-~#I zAf<7U!^`A*hd6hNvs{wza(G27hxzUxoRu1VMh>rX*aWE|-{bIVo7dQUZ;%@CeG+xQ z&1-|ygdec^!5}r`VxZZK*V+7#q^@`PVZr2)AhqO2Lv%B5kk&sYzQ;rS1aA~)lg&>C zsUtt-(8eJ3=gkgp5rno%-fa$V7v!H7f;?mMjv)2oh`@W+=I7-5yg=J&^R6HjaPcEH zzaUlb4pJfSv3YNh221^edA-dq3i)0VaQlQ_`^9-#oL8jH2PE~N&94S&93P6=TMLcH zKS@8Vi7sB$6o2ue!$%x`&EeMt%p2ky73WQH-m;7N?I2C#cf>j7@Noh8u0S{;&U@0L z_k;Wae<(@v5AqNEV=Nc{#HWM&Gyf9eU->u5{(CG?`NQTv<@=X7XM(hX&)WRAAa^cE ztN6Ul7lO2!FM^&54PLDnMU@hCcLtSKn7d2n}Ulmb;8{$O-$NrtFq`cbc*-?Vp}ER z|LuD!4OT%1W8Q1!+uPGBH()Ney>FGHY?ZfFg>da^2RSMHW3}JxBs2kthuyC$nX6;1 z3gMIASmBh^53FfSh2gC&v&_aBfeNNiI#&!Aq|LJ`GTGrb-?=8-szxiWw0%~MA)J=1 zQe0J8RdLzjSeMIkqRV;Q!BtgNHCI(vso^(ARtj%CRVQ4pR%K2z{DEri0CQK>P&LC# zYYlY9^OdfurD~hIst2lsV`_IvtfSIhRY%ozRR$NhDpS=CkE}f|l;x^yRnJxRRZ93s z?KZl;Y7h?9xi_|I^*l?rVeWT?BMj)y-^8236mwo9=X1Jp`$q=Bd>& zDdDOaHPZ4`FPHmsUsv^3eOy(b;Oi$&e`(_YRcPL9q{;!-PStkR6>6lJR!vt(QR5NhKHQhdD#+{K-&I$lxVlR9375%AZ`y|oY<0D( zCa8%RNUpj@O|sQwR~4x#wwmgyX==Ky%DZZYn(3-pYPL(i&=%?OYT<-ovEh3@EFbT3 z3RiYHnYIW**9wt6?9=$>DNdL@N3v5;Fs)ZnUL3WN_ z#ACxZX18*tsl~3kK`k*a&I@E?WZmehn^cSKe`h~kmh-o7Y4;cF6;vy{F?aIzDtQOg zP>Va*^a)=iMcSs{Y*scCG2+e>rdi;BBy-F~iY5C}3BugR^5y}fL>c|zww<168>B6O zT$b9<4|JY1HKr5}Acw;^5HVn3!W=;HUC74XJvJH9{-Q7LM-x&1{-AS>Efo7-V*my-Tc^GaKni`Ic( zV_+OcD}F|I#89|J{@RR=+y(WtpbYNH-9Y1>l*HY+2XH1DORv)#_`=6M!x!@_C*<3V zfztagluiT#DF|ow>Rj_b>y3^PjQCnqUKmYyMX!!oP0(ESa7zI;mu5x3k}>7wPZVys zqIbvoGR6GtRWD`7up2Og3;$I@xbw8$V}=f<=aIE!3IwpK52jrpv5?Ur43|WkheQX# zP!E&KK%@3yC2;-GJsi_{1dkM=nV+-mq;Q8msd^L`()d2PS>w>L|E)Dwl)&&>qZsR6 z_-LQe@mG2IqqO3y!|e+?mYYCt14b$RAVN+IuPo>mdks$l#)>D0zbr^g>ja6Tb4)Cc zhD0|eCiWk~e1|_Pytyr0w{I8DF-Ow_O}9VXcZV)3g89XUJ-*sljY$3)g&Xy)A6{J; z2dR8R;gi)oY5v33-%kt=pE&?=TW8i{FRp!)vJ=Cj4s;3cc{etkJsbJMTUY!pAGRD}-VHtVI#>h(J+ zc{RVa{@q=Wuu1-Um)2Z-yi9oGE@|#_yVeH1!hc$?E=JQzP=EYjv`G^lyt+oX&zt>_ z_n|j;2R(&lSJ@FgMwBBOQO>;htxAh7YogOU|KX@|9=M<}--OdncSPLy($fsUJ^b4O%PTScLe}%)zGtMw$}_K7UPW&{ba-3~OtlsPGsfWt z;Rh~Ak*6-S3V220@BXTZIjtFG%mWTT6ZyvCi9Y?FwreJuVU8q4kW|fwS8wA@R#I7_ zq_Sr7I9m{pGiO`|AtH7hS1Rj~Y{%~hSI$I7$)Ajjbkm?I2h8wlU{N`dN!56^_L}2< zcIfjCb+PuDpVGK(agL}5!-TOy-c|C|BJE;Hv_e=*1UlK=IY`~Oiv+ql+$|`4*5;n_%@?}$5;(nu#(gAM;I&qQeFeoTeQfR* z1alll*Fvx>=Fc_N6Lp@a@@^MYBtnFdw7@LRhNay>sRR!dWJebL4m0izohj})e z=knEN(UpP9rdK^4XPp2*bNHgQD$=7q-w)1gzHZ0|^dy>WrdE$hG6x%R)zo3GutJ+h zxjdT3xIC7}p%Ry`;PEbB$yb?^TOda#^b7<|wK{fVbB}gvdSZ1`m#^jtE>C1gs8{nf zE={1TU0O?PTv|%{3(a%+dYD&NMpn`y;%x~bTN+jp4BOI@sr zM_pdVces2fop5=%)OHuGWJ>N(*tJ#atfKLArj8$neVWt`9lPWVm^*cRsvy0>0|PfwjGWo%yMvf+D}%1j(HdrX5_Gbc=HkUwF@OqW;F zcP_8tdtLgAFvIT?>fX=yxV)Ah5ES%Jy&EIT57z%=8h2Uv3WUdB7QPZ;PzZUGrBjI#DiL}M zVUcVxegLV+tUITZ(x-UfA%RmHnhIA$RDzH(ji%!pOEYLDX=L8qnpg4^)42^dDjq?L zQJLg!2h<87D$kPSx8Vyxfo?~^r6S1FGP=X} zs~L3X3|fvB+(j#prO0Hq<+S2U%^mMGKq?1$D2xWlzXZCQRsw=RE{|^%vYtV!W{{ad z_W&R!hX6#59DIdbPFXsgvcSG>6a0!U)iLohk~(xR-B*kZ0va+_38FyyegMU^ivn#B z1sX_6h+pt;h*K_u7!Zj66W{~%U=(2ED8SflWV*LwxAi3iXI=&__J4xA6Zoy8hk$dD zS>BFQFQej|%K*ej0Z7LMNX6wc%B`n|qi8gVq7ls8fn|^`F1YL8jl2FbU_p9>9tEHc z_!F(IC15>9kIPuZAKNCUJr~>3k{aZgXWMht;>qYc8I+kQ&=H!|6SNUo8&X?uC@RW; zuC|Gu1Tezjeu_5ZD+ACU@)ENJA;CVGV%+lQsfEZ0K9N2y&A)XrQ>rdA#&b-C2>8!sbuMUPT`%h7KY za{b{iWAsw%_JAmR=|zEV7UuDM@PZ3@+`hPk3-qKAy+r$f^C0BePcOqa)bqUpUrVUL z2cTlgWE)Pe!Y4yy5FJ9R{kA5Nb&jfm&Z583;r=!qfolI+>1?@#$?Iq**t}puWOjAn z0^kzYkt=(+Gy&XlP*&f>G`8?x4SEX<5F2My9WwoBl!^t*m)|3wb!`t9QR;fpdq@=q(i2+-!t5;!*bm@SP|TC)L$pX^FelJQUfX1Z zWD>ST;zYOp7;>LDMCYgipKVAE+HjsKVqXRS=i%>vH0UIKDh)CRJMkDGpV^r!54kij z3+OXcCjScPbNKwu3Q@_gfTEr&7dzX8<;-Xz6A0`W^HF)idwR|aD&qB zN~b>bmDi*+EGE(+8iT1QI%^5b$9vvfwXnDd5%{uMD`{T?J0V78vPB)22H$+1azT$z zRE$1Dl`qZPZ!RN3xso+Q@cWj&^QiEB7tS0h)5M#r&{!R=3Qf7%Wz!`YM!^JI@a7N2pNSdL0i5mllCfERIMycGKGPxx+=2nn!TT?r3OWm={Fo1Ju4(HKz z*h`qtojiPHb?%Ohjni12EgFE$j-S1C8dy!lPL(Xm@Y1jFF<8$i`VC9A?0gl{@806Q z2^e6s5L)hO5iu{8TMdqn)n46bqip>n} z4!ZWdjHW?4`!9AZF@K|qC?9m9L_P;m;k+-ufGjQ1ESWm<%hA9cG-wUk)&{clcB&rp z0u3Ibw^7&rL#%Doqrd6ao#W~a$q7p~to%?#_u)uDCH>&Jh`^JnJTvAj#?kZYQa<9d;6#_?d~6@O(sclMOm zn86=jy{8tD{53OUf>u3~`m2gDNAsPS;*u=`ZiVv}IuwgBc37Mv;=Cr#>p?hg*mTq( zoJ865mW1Dyly@9DCV}G);UMY+;S4I1-nZ$4AhvlAIrO1?KLYa$VqyKnp_3B+R05w# z&d(h>W!}4zbD$??(oYVZmh_(;`o*SSB?AtbGU<15{&46|`Tivi4w*0l#di)vPQYDo z2o z>o^K(pv8m1O!% z;DH(~iaaubS2KScX@3pJD*hnSeKL1foE8aB;Vi|km_5@tN#8(AOm2BjHy5XI&6pZ4 z-H3>~)47el37)H_bIVXomuqot7gF{T?_|gvn9j8{bavDCH7=hB-LE#ZzjPOJ>%BM< zp|z&`4Bm+h8Ro4SoF51A-^C!G=RyovWwK^+t>UL>g9~AIt<9M(XF+p>IK2u$pu*PU z`tXd9>Xu2d6)x13SuPFnGSqh=%-)L(AU?#khv*4xD&xxyfh4Mj6tTqSMsg_A7<=U| zcqQ~R?DUEzyUL{pB+t$yZtg;Yfm(`vUV#E_YbWiBoSMml0?BhOf~kRm)A2u(vcXNk z(9w#>fa^d}ji#8>^SEUN(PHGE3;AU*=AL9)VA@^JjYED?tSC3;`s=y4?jmn$i=MO? zGn6syYeJzLh#5Q$5Fp=`MRDnjn)7*>mQGkSp9grIaAH2s;+swH1zaU`OIe*O6+z=M zvlehWeJkB=b}ZoB(9$Rh(o0aQTO(zeXsKNo89$VJ?fSD7akaVu@m zxnC{fUK(9#nlDBSC~jsh#^&-WvtPm(ABJz>Nm=&^g~8XR3w7_a>6YI3Tm)4vk3$e6 z#N2ZO$bfD$Ki$9s?OPCw0buf%aGqX=xZARlOi`nCd^YdfB>^yiap9P^<#+7;DMxIsUVPwTfL$_TE`ud$^ zlU2T^bOXz1GXrkoK@~T7ohn@!Lpr4-@iz0)P24fGiAEr=E~3bc6u76 zh?+pjka4vGeqW7|v`-Zx-CbGb;EEmAsH-65=t$ zph1|JTRFel9*TK6Ct5G<(0- zI}S90S4`S%{6(gZho}RB&#V7jw6cGD=CMuHiR7=9zl_xH`rK1lU($%P*-l%3uB+|lMoZMk_eo;!;3n8T0D_X%;Xc6g)1n;d>JggN_^&6|VR zklW($R)@DqhV72rJ(H~yQ#td) zd)z2e<9+_XiG3SGS0uFNCgyW)tangX)9!O_(YZNP%cY1%3Pk3m7+>Q18U3fAFx7^1 zIu9i)LB{$OJ`4TnOQL?-Z+=>2=jZ%F4EHi~zvWF8y~(47QyDda64fY5QKPA;8WYL= zj-OO~ulex@ZWo^@Tu z5#To?fj>nYsbM~CW(|*AbA}TGTr;xZ0za<;^d=B7tyuk(0*S-sI=C>wbzRw1bfFp+ zxSYY6rlV3V`)0XvKJ7wS$aFEiAOnQyZWrgqja=wAD@6c+Z=m=Za}!yi9~C(Oz9Hgk zhO+buEp@p$<6grk8fN;`!7x8vsf^jYm0idOv%FITmvQiGb6XcopuNkv+{ES@-o+Z5 zJG!`o*V$$WNYG+kIi5A2;x>Tp4i99b3c3e8Q?;tvp{L9F-npz?>Oj^4kx}|`KlDs% zoBO*wfD2v5A*s!ST^_N(sjf8A;v@V5BPx}WS+^YJ}6H|Sug&&pRi z;AsIi&9r4_#ij%DWqQ@LNmgz7khv~db&5Ru%x=}~X@ONLid*0lNG$C>+{KJxIxSV-k>jL(+~s;EZhwG<_XSRbUQN0oOpcJo(* z9x%67RRa-wvZ{K!t{1!J&)4^won4gh-%wXI%s)oU*;MsMQc(fog;5PL*^Zn++?W$GCSNB(9%1Eh|$Z`L=6CD>N17x_+@HN8L!6c{`l&a%sSY6F=g&kopJg$9mhkW&WsavW}K)q z<3ybqN4Q)!j*i;CLgQt4D+CSjoe&+P_9*gt%}as@YudG#d#+x4FO$ZraEf>dgh5BNo#Y z-a%pryAfp^j+djd1PS17Tw;*&z($axyf_uaNfM``4PJsfia1ETf>Xo+Lm9_aVkwTR zhH(3(dXT!x0b*B9!^D$Cv!)~GhFKnea5&xOx{^5~*aUHzL2Q3y37FE8!(p6-V_O{d z3fxBG8Ttz z=5x6fj@P2h#Vo37H8BP2Rpo#S3GNm1;@`S`F$#m}P@rSCetmGI!{a`AOuC>C_(cbT zMBy_aaESp!3OWlD>Sl9y7h8|WX7w%1%XcB3!|BAmuJ88VjxEA88)197Dy%HyLk(`%K zm7bjDLg3zMs?JrJ=C!BPb#Z&?0qN|B;P!x7)B@*!vih{YPub>{%_`m8G?Xivo^Noz z$!evuA_ZI21*`2iPs+an!b_qIH8Af)8Eqaj$N2NU7-?YMqD?61Iu+oTGdx8z%6rawnza8k12;>oPUddP}x$ohU%R$>D`+;BBH ziRl|CkXW}QrIDYK^Pee={gfvEnbH&~O#r^xvsA%{<7aKYnv${-Tlj&N|4MB29JMw- zJ*$$twdq~&Icm!gnriu;#CAKV{Th@-Fqg@XENRWY%$<}sBr8H4o*^Y;!1BhxL`pFQ z )xA45p{5!&Ph8iU)DV>t&bIUj6$6x5UnbR`z&t9Tt<&Ck*Vex0W9`xs)N!bJ*vQsen$2cC&PD5~<>70dEt;%XU|^ zpP@b`U&LW5q^K1(WJ-MRwrQnoPLxDpYe=GOT2(S4YFxyiC`tbREO8N9G%}3)zhOsn z{@)p!o};oDmu;ZgpT#g`;e2psn1(x5Wp7r*AV`?uLO_3SAQ&YF3x|}XCCj z7G4;I2HJZJBKH!s)l@AS=%O$X%U%lXrLJ4D_EKE0%du~=d^v$u{ucuIZ1^ESk^}H0 zAJqNPw&e+?&rx^t!Y);#82BClQ|lS-+xIIPYAF<7pW; zb4q(|j@M+LLwBc`&_x`qV0vQGY_u_aG1I~}JsI84Er~@2iN)y(n7W@?HXfhyPMtTO z#r`OEdm#3pF6s8Yz)V)jmKf2k8GC1$OuaC=LNa;kl<@O5y)!T8gJA6?tnl4S?9bCJ zAk9?A-dGD9jh8G<)xncI?u9PtxwhexUr5 zTu+0drR$aJtyfvmzJ3NtkT>AYFbnsnx8T@i)s!$f5j{%S3xqB}lx}TD#g_vc`Cov&|9=33Sc=Mh z)&Oo%irA?C0_zhG)<3v!akPyuq3FI6IRb3>{{nUDKS5!abuY$~-?COfN~jc*iKFJx z!u>9%P{q>o!oNm&L?F zN&c9-<|hAhTY02a`1;KOqMHLI{E`}qeS#lfQn&iM5Q9g=I;Qxbf;tQtrv7a7Rvt0kK@Rm1^nvo||f5dJ(p^{>}*q3JPnP`rF?Cp7^f1Weo4wzve4S&h zG0ESlx>bUlua1dxJQmKo;+&A+dqH~2jQ9@gy+oZ9=TlpK=BUquSaMED_zQ8q6z3~P zE~ZvcUpwj>M|~?%-`NU#2WjdDNBt=9ev-)3;`|)sR_d3K`c?fV&hH`hhx${Tzifrw z$82>rsQy;xq|kXd#p*&xUBn@U<{;&0h03sZ<@}Aquvo;J@FkEUw!lY_o_UWk2Kv_oF=wzYU^e}nxx)$baQ-l3vpVCgA?#; z)Ttn9Yc0+FO=#5y9i!VyTsvt+d-3IplPAFr$frBnx>JzmD{M6P*PSH>x3&-HE|ROO z__`HKp9whWmsYyFI6cI{Mdo~+4?^o+l6pbndW(bG%&l~RIDMs0aEUoz_Xp)Y0Dead zkYa_BFc3YY2Z?X6I77r48q~w|aB)Tixj0{s4C+yOv}7D3zOj;LTr7N7h%;WCE5*4= zoU0{uf;bb!xkj8x(%#9oE^_o#TTheFbfL|RAZ=2o96eJ4vm8BJzSqhi>E`G;Qel}; zF+y`=QQ|s@pC|43&C%COV7{Xl2>ANqG{CHpUg)oj9lg-ei$GQ#Z|lWEgByg#D7Hi* zyEytrN8jY=o2A?>GUR`^^{ug7sBa6>n|}Llm$aphUIu7)h(8AtNZ)B|92C7T{ePFE zR|IL24maBX4INKflw6v5 za{6Ukzv2|@13`UIzbX-jq|o7@KB8ZX)vxO}#CJ4Szp396-`lo+$I-_f{jQ@=IQl(D zgG?U;xw<+fpg(l0HIrJp|)V4n%qKMw-LSQ&2^3tu?;OGkg@=&z-n-=O|) zZT+32zjyQxj{ecsKVf8JzMwEHid$)Hisoo+ini0g+8XmDSN|@~AL9Hex&9K8oU!%U zAb0W0{q1N_=e!_u!PXapQ1#iiz}|WzUB_A)U2R#hsOIOOiv8OP*j9{f*{gn5<=Kf; z_*wOfsd5ptv8`C3V_QMn3fYzmP__kQ<1>+kf2xTppqXj3SvOPFij+GGLrac}+;vXv zVO|{RdqM3=;GvN+@fv0polI7Op2@eG2NU#pcFfwc`n8aNF9UXCTk(+@iTYk0+;3Y6 zm_4>t*4+7u#s>PU9BUf3vZ|Pb3c8wU_=3)eJe{N;V*V~NsFH4>JDuQnT|N#+;b_oR z&+!FUJugnYnB>+}wOsy`KXds#e&6L&{Dqhqz{X%P-yj$DaoheT7~8&GSzl2B=EtS` zUpx()U{*P+yveVkXLqZ>l$1AV()dZ^XQYlBQ&co%X6o4SsYSCUO>)%|*p1@8!)&n< znCn_esupPG!s7XYYgMw6K`z%y;eD=E8Lf~rk?O8hmG^;|u2l{3Vhii4#{52r>{``T zsyTXGcQg-F)iD{V3a2g+eijv2xEnIZs^MBSEi~zIe!{kDyH*`5T>{(0s-njbtFF|O zVU>6J44)OV*lsTG9WE{@Uel8CC}0 z^7k&oUfvaZc~|V^UH+Z_Fn2Xnwanh}RwYxrx=wGEV>NcI9IJ_|76|n7g1|*ZuGQ3n znY2;?*J^IT=-O7Us}j-SRyhUQx3pTBqeHl2vDMmY<63R4c4BCr;#%!N8Y@?&x>la5 zBTj}45zzQ13x?TE1VCHW+_gFaQTST9RwpzFhTZL4tFtuYCaa4CJGxd^G3Bm~I0WHq z!xwC;n~Ms&yH*d0M*i-$)zeiQEbM6SwR*W$Z=rKnbc)r-DsZj7;`9@zzc>xyi1l{V zZhIv1sj-rIfO5>pYB7V%u2em(a-j@!V-3XSLu=O>WDUkf&-59tHH5!5-O_Yk+)!y> zA70>E!!UkVrRmvm!&NGB<@FZ)MwsLpx+rF(HHvA9xmZJ|n4+3GF7k8@ozHq2X2Y9> zxB=G?Rxxp-Vf>)+Ke(2j4bg6ld9#+DnX;4j;mFjpP3bzmXv*aA`BTQB;v8#i zWK?aP%+5HW0v;Q z6Rm5^ss5I<63jA>!h3J z+1B;0HQxgHimipFeNDTwIhdjEsk_Kp3>p^X+tv-PwZys+^v`y!o2;8%^_1G|TDO2! zXvMAOmQ39*rnPk&uE&}eI|P!=nN0ma;&!z|rsnOgwNz&DGX6p={*z@$feEd$?v$Cm z+*Lc(E`fTNxtOJs^-HRQshq7d^$IH-eE`9_+qG7L8dL7oDT%JN$}%qY`X{>9J=SVe z8U8paIdU{x$0aItKJrOBU7z&=s%{c;_2kH+T%E%Fh`BFM*Ufyy<_Bt4h;`0WS)8vw zdXDPgrUrK8u-Q_c#o|WsV>l}Zt1&<3>AOd7QHTdlIMCm3_baF_OC+pl=2j{ZVcC@sQ z&~{{fnx8TAhv;!h&gYWBWOLz|N0tDFcfQ!EX7=mYAioGC~y$=ax&J&LEc6%g(XM_2!yMmt=8!z&d zi$@eJPJVjvztfAnbgWyFp7)QUqrD`oW+ti6zmnu^ioG=sr7D>veRRe@iekw6&h0}wn-)zr__TK!@s7U~AA z%A?q1oP?w`*U7jlIiquk9qE0%Qy6dXmY?l+ImK3KfQ+JR~&#wZ+ z#AG2)RfX>oXA6M1Rd9c#B<)(H39TxSb)I1U3|x=O6-$1!hEJ(#7)kdI!jytgV?G$9 z3$)0h^#|)*@Z|7ijfZU|Kc&(T2X)>d`Q(ZM>;PDgBAl_3Bsng&C3 zM(s?M^-m~xo+rB$N<9xsec|l6A)l#R0NcPE9HP7a2L=iMz@VX5wvlNv)I$~vQE5+k z$a>3gk8NLxzrKdRL-*m4s@JMF`I|nZmJpSjz&B*TPO$ z^@UHN&hn}ssN+vxFwSVqB*ggRwm<5Zmz};DtFNzCQoOBS+#{AIy!XNiPd=F&#_5g4 zgE4pmnC%195H%E4RHD9Wm`C=?-l!VwNpKTON7Y0RflFhTNMbN{J?IHyVOZH#!K+ccHRo*?661wvE>fi)Bz-5UsMjK`s05 z$D`H9$rDq;VS(_AYP2^_*2xuELD(A|BCPqjt}V?~BCI8o?Oc$jh``ex!Wc=%=&jK3 z9Ik`{J7`cocA8TnW3SZbSYJog%$TcnBeUviJ)n3T>R^bqP1P0fDOgB>x1v`x&X8*Q zk`yOlDH0AYC-@Jk@xpTX3CdMhdL<_D0z7newe%XEgY!h<(h=>aO^A{)gW{aOso!~s z+IR>{MstnY$~i}4J!UMlEjWYce=^SE@~D!Hf#_^IXYQDwi#^FNVWLhhH_2LFD*08x zvFBt}WO65B9flO&%vMukV$M>^r8ztmz@XSg?whFFa)s#_8S>85aJ)z(O}`!nF-Odg z*XUXysCv<^tYWH8(hs-H#L7a}W>VHxs%~wkI@Z(F*LsHfTd+K{o}(q!Zo0+VLo2Mk zr3h80u~?igOwv^w&cgET51@(^a~>ip+I)^Mp*O&8p&k&717=VyODpX;SwG}8^Ymn$ zoW9(8&4VhIv3>D8-nHl!3+Ji@(E*f=%NR0ATNLS9$zXmpeJ1xef5~iH&OPGCA^$&wov8@*3f40@4i!iJ67*Pz~9lI25tHl?EkqC>jE~Us- z4KFTmDGsU`fZqV%_H_N_sLP@J{{uWI`&Rzw;?E=bGq4}n7_$|&&5fu`pfM!`a6K{5 zl)49+(SSg6ni*(K*9Y3r!a!TPFVK!23ACpt0v*iqnL3ScGTUbAMxmR%8Js|0t6R*E zGx4a#&5%EQg4Lwp1<;p|$_Vh}j}7p&1*y~H&XR*~xe~Dn_mk9Z>UPt4mY$O1>zq(= zO8Hc#TB?@GRHICFhsdQR+z*$^6*dRVm$M*-V1vNaoUMCS7XXqu!7HXbpA^HTD%m!O zESs(8anrjo5rXs}U7=RO7ot{lT&==1k(bf-Dx>ZphsMxmwHnGnEUs&Prq-zYOxroS zcClasu$<&!1M^=Fsr%VP7d2T*{OoHXwjmd<;`@M*pXcpUr-NAuYgsg3 zSFTVph}uR6kplufW46x6aUO83<-y-!-k7fkHj*_?XjD0xH3UjWFdQze?0(FBpcc!P zwHdeo;}zT1=FJ7VoOyHsHY>1yEuq#Ldu%4YSho#fS17s^tTKa&bxYqGB=7^J1b(En zz)xmqqy zQIk?fO!XUd&Fq)dy*Mo2M4##@5HG9a3p@CK9-3!tJn`5w4aGXG=my=tRStIzQV+T0 zo*|dqGw5}3-UwWb{*%yAao!Z?El13EGw7XII<{(wPUoW?c$JIAlA)`RNTbYWM0yO``Ey>^KS4a0Co zEN$zSn$5ueMeOi;uEFlAadB_=5jiO7WWHLXD<>qm+(0a3u9j=wjqtP?tXaxIB5VwU zT5c*e+~D0|_l#%y)6J5zrQCC`DA%Yb;PoxCodLt;eh-c!;k@DU>O2KyqVrLGMM}=UJTI!dJL=WO(`sOEe0pGi7 zqpn!?^5ln5aJ{L!Q5TiHROVrn5lU~^s7Kq60?FY(vdkvkCyL|-z`Jr29x&WsUfHBm zoyTV2z7@k5^~X)RJ#RGipVXarlbP|Po~WP1#pjbx>UCwGLJq&#n*nh!9(rBz6xu0w z>zKBgkDt;V@DRc}Q**N(8j$;8&zQxVbuRBP`w(Nxg|J>ouC+yv)Od2n+_*)r(a$5M z##TL=cbXfv>M8jAx)tfTn`H)U(^uKMQ3_pVUfQNd@LrR;T~|$e(W6*-q(mu($C6%} zLHlNS7k^~5johx=@Bwr0cJRWNDIYl*mP)Uhueal@|B%;pht2S(b&7YH=C6e{s+$E* z>+!f^bLwe*TzN)82iImdOlk>E+0xuuRW&px*Q;`-;lRt^7B~=uevgeSOiU#cbFxg# zLeDBCB-2uyM2E{c@+M&+%^*81gwhJ*mO|KFBgQvJ5u21$o6{t^ zhB!4PSFIqXS}`t-Byfs2>9#y(*OxP3;4YbPVWc`|OH92W*9Y5x(-3y*-b;3MIOxb@ zc6H=2yE@!7h$8`c&d$MGcHA=hkX<%6bL1_%>VnZ^Zj1LsrK0vW?UHaVurKDk5T+J{ z?~V|;xRW@Y9q!_A*H~of7Q|fYZgUT@#px+6s%3M&!@V5t?Z|s}ExABI^%X+&gHc)# zhYzslD3oW)apz+)4-(&Cn0!b>hKe&R#KWb+5t4k(NNMUQaYjo#cE!g8X(*2stZGSx zxKNWMkJ4S?@c1B&VqAR)hG*vlV>I__o-9t0I8$uz<$1IB z8C@e%`FUL{P-TS8)A9bi_X^=mbd48mhaO-$WYQpIn?471FtT=!R+?u;*1e>^(!trF z5@-ktL~t@*!_B+ag@U>ihr)QJj#M5JVViPY^tC#P=^{_o72E!Ub&FjXvM+J@M!w0G zv+A2+BQHm!Nhp7dyiRwUEho9RbFs@yd6{=m4W>OuPRL_SMKXY*b1_T37+I0u{A zqPKBAwUSo}7e!7yDd*BWUhUFN-ZS;`e%$^3({b|9UHIs!6!Z7ndPLlVyw1h0)OyMN zu)hv0eMjfSJR-4=n*HzSewfv-cvd@hvL&KBN^N9yQ9!F&FHHLBrlT>MXq7QEdzkGD&)&&Bt&tTkO? zpK)P`9I^RXkJ%&77xbmbiE~|k9t15hJPx|N6Ak2Dfa;xSA9+`|(=YID6L&%n3GI;| z7TD|Zi~N$Ac0$*S+sFG|ep#GXcz<|rvks9TPG}S2Ns${*>3(cIYQygSgD-S7TYNS> zw(3h=?NV&rXzWFZUjdsQv}ql#2S0?Bp{#e=Q-LSPVz4}6uKrRFuD%hXll-d*V~{!E zXR^<@1b1-deW}H;`2XZf-B%-%Y5bLLX+MQ+I#~Og8DHt5`de{fSN=7^5-5y02C|C0 z6{c`YRi4~a=kSg)MXJFoj5+>meRS~ic*IWjdTtlneJKpz3-B=v%u-?fE>Bchi>bAM!~W9zCAT{j1PejfmVE4QD09d@}Ls_`6c?iB7;CLAAm92;CaWVL5 zkSraodJQ(H8nrworo5<;(3+#ji_KNgFof_x8;Qm3~%7`8KqB=k7LtmJ_ySB&{HwMK?_NpPljf@L^?(YW!bnrHR3(8T0|4XQ9Q|H*Xi$n+sf18|C*{Db{^!!PUr=1g6V)Ze^7CORjZsScO8;5qZ~vOhe{ssSpl?QRDi2gV%~F!(0MRZl~Ezv;n{ua{T0 zrkhXB=-kNrzv~?TcU#iCtQhwN%73BFlpAz&GxcX3XGWe8C|>>u^3U=?Ep2!{%M3rS zaYcDmytOGZ`hxyjc{hpA1}Z)J5rBN;Be99CXz{I)Ny@74*EDX-^~D3Q2{6EtUx;bA zYLHXTd^FgxgC(yuQHGgvMo%@nD_ZR%8)K|T{O5;foJ>gi5x{@+f%h5W*q3fIZwXqb z{cL-xZ6CS;**19D;+y^R!Ra0G*5lg715_P4t=VR_B;p2+*S@Wh=gV1*dkB{KT73V(OX=|~|5-x>)cwLI)bpKdb~W>>TjOJMprqpzghD3m ztR7;Xye1~jeE18Fk|(5E=e_><PaR0^b4(q-; zZf+lq>q|M2??ziD=nb2jcGv3k2llG}Fn3;K4eOReQ)4NY^6*nC)FFsT+)CKl0CA?}Tx((E+9GQg zmoKzoF;2nGbHA zjlrHjjdOAFf4mF3)vIhOgz5-QlwdwQQA)rl*2QcsGFx9!^-MyCKy_1YnspY>x{Vob z?TNfL-I}hlYG1^}hB{fp#^yJ6qLd9;9ETaQO5g+*hbWGz<$cNSv(9-Z$36Kil_aj&i&Z(76t?OXR zPk#JDOfMNLt%AjiG^0HP5TALKL0?q@szdSpk0;}PnpB@pU;vyjKhL(tU~Kfi)_OJ; zS3lfE)@biYVag(_g1Kdp^|&`#s_%{c&9q!>!Mr|l|6=P4|3&u$4GteMw=J_KA@@iSKI7>G3=?z7^Gb7aG*0Zp|Dx5=U0%f>Tqlh34C zHqEx_T6t@3jze?hd!0D*aED3K=R34OzSwNCX`zhuMIzbVAOLVXyguD1sW(aRW~t$p zAQqk5#JOEkmx^y$a2TfZok5gaE*b9Nv_>G_E6#nA ze82eChOlxyD9$>`@{lB~#|mcCqmh04!IJqgv*%CiC6o24H6aeskK1@1%`=2E>t404 zWFt?mR0~f%S>FseWPM>F%aexBSarQO5NI)RL>CDY`jrHHQvceTugc zBzTyZ;htgYn3>qZfvgG*zLg83GW?3nv$Vs-2z}0`=Uov_T?jUCyay3>(;gQlN_}1G zBphR-*z}_a-iej1C%pEypI!#W@+Qjx7xE&~Faul+{-!YG!;jJ6YvadfAUGZgj~SO_ zo;hMQXts@BbMb@;GQUAbT?hk!_7*`q!P6ROoQsDwkan1kyLf`}glHe~TA;~G4fHl= z(gOX>$iJFwy4U3J2eXB1OUSSw;Z9^0*L)1_S#WD4y0)VcymwLY_^ z#p7-8RX9(xz_SN_ICr$Qi57qxT^1g7A0%0$WpZY}3MivsCcIwuQ#(vMy;!{P(4 zd2{;EYYSgA4HE)_+scH%c7F+7y(o2+DJUPf7O@A*2fp^7)q1y}{Ymp|a$pEz6H@~H z<2(qr7amJClhXrYP=(*V80hC+yMD@zrhax{G-B_}4vebnP2MhPHx3`&o=E=k1mhN_ z#Z``LmL=ez28(@RU~GFY zPnicE4`34s%VZ^!`edM5r1cX4Yy*kR@lE1=)^adf`5Wc~TfQ+MTb|+lfgzE~n*uHU zX1|?zTVFGLb3jF#*7zu@&ioQZ7r_|wbV17m&r zvMWD&Wt?g7T%ZUr);<^b&L>BLd3&q5Yq!Mi*b}G{@bLO&ed4lz;RV1cX8933-ya1G z3|IKbeNfnTqp5c=Al&z^gMqdFkNQ2Cb>g-^NtE;rqyG1 zm&hAO1E2XnO;M}O{0r+K(X6w~@@uT-rpk#xl5vlFjYv5jSZ{fl4Xg9e+-g|D-Trf-G#ZGK0bW48Q0Up9W9k5BM>j{KTl_W!HvIsmIEw)XCsWM*=+At4DQln@e% zbO=Zh5JC$*geFRnCLJLlAP5qof+C#&LzQ9#p9lhqR|OPB1r;fR4}F3H4^S)=QBion z|DC;;d)>VEUzo|>oik_7&dko8Io1AD+utzwNoAkX9GR~kBdlb9r#UWPCHqXYwJ#5W z{_L|FvH3u9AU5B5`<${bX#0Dg)jSxUuO%Y$SuO3KIQyA<>K7*el8;Re_zDkv*@pO~ zvHdGwUE>UPHLyV7E11+=w*TY|zJ~VMH?)0I+ptW#Y~O;t-er#0caG!ror4!`F#oXp z!f5F-K|K=D`V>V*Tm1+^m`bEHhw3{=QGCb8SJ4dP@Kp>~DduAZMZ~lNr;y-tXa+y< zIfCP%eU!);N@yzSqa;e^+od#>hB^W4120oqAAEgojW@0xN*!_AuRhs`uK)6|^*W-`4k_zJu4gVah>YfWu5 zVds4ibRuhuy#uxL*}2r-M;)jmrVn-Eg3g>}bMsw%9NP}P!qk+rf8Ynr>&BS7^JNdt z!0c^IVD_G)UQGUjih676%gFj^0)KbA{ex`&%oAtYZcUu(qy98NHrc;=y|cJw1x*7r z4bn7N(Iea|VDO%!p^ApVOC$|fG=kBOF|oNZ`&3MkKToTy2XgR6wl?4!ud6*|%OLvv-P2 zMBqw9OC&#SW2aJGx^-^R1D0m=b|9F{!=>e=zVFa3hj!DRprkU#?uy=bAbba<@6ld| z-lq>7+Q(l%q>nHr4t>mD_A^Nb=o1HG?zbKKjP|0f8c4MqI>=?8(;@S68CgF1Fde~Y zI&>72v0Y08=rina=nE!0qJ4H&7sp<2Z*b@s?lLRiq2u(W>v_+if6@s|0EfP!Z5$o3 zRk^Pjh~v!AzqpZa7>V6j(Mg9+(YGeQtjw(U9TRjKvQ#=lXC3-C1D-?I(s_msI@dzA zCJtSo?;ZL9$bTygiiu? ze8NTpf_R8vgJ(e!Y{A?I6r-1g5D|`u6jBL=$-*PCQD_J1PWg`TiD*YS0``mrXkK7? z)c|$&3@(esgNrzP6~#q-K$t;iM1m5Di0CnP{4n!KIoU^SgaN?^-)+3 z{XdqMS&8->e@1=T+7@H1GG=lEnP%2BkY&xD1~S?IbpyE!b`$>GM)EQEO2_W@LSuQ# zzrTrGN>CN-*GvvcfPOK!3au>2z^#gQ8~7#6@_*e-{xi}(;NR9-uCpNz>6#_0rb5Cp(p^rV77i>p*?D|1yI7@HU=v%akyE?~}SoOW_riDzsmgM(jC-)?L-!C534s$(2)@Uv+QaPO;C5^X!NI|oJ=$iU53hj4Ii^fUnOb%l1Q_N2wA6o~2w+!( z+6vx4)V~D|#~S|w9p!ztm}I4x#hv8+=2$2BpuND<=qyw60+u{J2yqu%7tu_NT?UMs ziy$J-P6qqtA{Z^d_mTUHTRjm-9JY3vtG`!2F9)?0XT7Fc(8VE+Wd>4xQybMOAwCe)-$Z9vE6q9o=pGvQv?X;az-yL5d(6j%l$n zGJDhJ92qzGD)uula1Ytchr{S2T$Rka7vm* z10vE*ou#rG9MAJXZMfuP-O}OmJLw*WpMGJ{kqJW(^Umatm94NbKddO-XD1U}GhpAv zStUi4eNgDVhri(Wv?~9t##wdFYH(JQvs#?h<_ue%NXp==x*B^g{Jod6`kXc3tRZKO z6!u?MGG#W zurbAUt0`aiS2O^7alRa=Xb?8xG+5IkiiT(!s7tF6@`<1G=-k<(NvnoU!T;JH=SwE(|_AaaouMS z*4GDx{C4b(+>k(*}BxvqGQkaAyyK*(q(*w28r9 z=I{Sd^oq}#MVt9aURAUO{5RUlUtZ&@Z49%WvmM~Z(d$e_2gdk@rk$GJ)Z}N_ytjBH z-i{({M>N3+0&IyD?eSSF=^oB{`RHAGPt#rmKBxB;eW2+>sJsFdW)Oz+BTXOkB{qHs z6n&!TQ%#@wtbbt7i|in0pL2GI=|0Smz_^@_aCS6`zR06vJSN9+NneJ2Is+I7y-jpN zv*VW}`dY*P0RF;fLK2%!^;E;&TG2Bf4|rC0|Q)C^doPl zF<+B{K41>hFA6?jth}Rqaaqw7A6=zixz;tFXut8--xdAAljXXmKQ;ZO>4v78+-uOS zbKjp0bW6d#2o!reXu6|?rP&|MSwR}7QaFUcWrP-ynw`O%g)zYuSKx8;wYNZtMm&Lo zM4Rl^m}8EZ$~`qrJn4w(VoXTG=c^^M5!7soFO{9;3`gXH8V&+q36J`eBc8^<*?Dk9 zES?cF9WhHh>xk#jx|l6+oX!YP_v=#mVB#Dx*Aer?d?gk*Vj)z{ExV3;@PI#(#r&u^ z3tNs@g4hPknfvX41p zwOHeb7jR{NW1n=yTCvU~E|*PlE+}id^Tm3x!7N@bU&ZFB`3l*)M4^2e#Po5*OJXA~ zFnXIrbF+1YYy|~X}q;yskL&T&KnlwQSN@jez4M|@x|uadiCf#06yh<)Ni9)gd| zJF8{Cn2*JNN5JV0Ps&eC#Wiwpd`IyaPOah~4;jYtb8*POVvVd?%HHSS_onPt!Zbc8 zOSg>TE%fiohDO|9K&)VM#U?Z``s15?;|6O&!C=t+&TFB9355%2vFY?}|G0y)Z$zIv zHukyjlw1Z{A+@oeu535A-?CE>hT$)SuINN?Z(;3*6&B&1&pH6^kw6XjL5C$?1f2gn zb^;}mL}?7YXAK7d!ysRN5&H>ksvMPVx<=WrV^cmG%mE*OATonNL$o~v+wRdI3RX?< z?H&Woxks(yK?G}n05cv@d)8o6J{c^4C~|5UbW!TWdQs4QI^G3D)h&?G^^TAD}Gy!3^fR3{OS!>T1|R08zj zRBENL#WMEjtS$DITLif@Pj=XuYL;1T*i%D5@QA=tIj~MHCb&d=ZYn2?r1RfZFCE# zi~xdlo26scG(-??wBJzeigVjeT?3a@gl4n3tG9!Y+#Z)y5 ztLa!1Hwnj@ay?)wP}iT+MLiXe?%YV3JJS5CyNjT54^=myb!+>; z-2ScKHt3g>`p_RUTL-JAK=adJwYQRe-Ml(mO)BHE2p};o4(+&fXy;Xgr{CuA_1oTB z;cxN0O0l7&-G7eioD>MnvJ)@uo3M}bL;K(@m|*QPdGl1N*)>X2w@ z#hIP+Rek@Z`D#1aOU=4PDk#hZD)-G}ZDKT=g*?jH7-fysRxSi7);Mi}OI+1@Oj{GQHBnoWGz;+Kt;vc7_{FTz zoI#3T%$llLiXU%1$?!PT#Iqbf-kQp|kyT}hepM@9TTk)dr=wYSGuoPEWj@Q{9A|U( zyt3wKYi=}7`}1IGpshuWc(Jx{n5kfmz?*j{B8uT-Bz zOhdUDS6inq+fEVwnXA+-J8p^_`)Up-R@NHSF$>nHJo^O``-1A_TZ^T*se^ZVy~%w+ zEzlb{Mp;{{O?WqA8mv_bH3}ULj|<=mjCVYuaYTCm?1}JOD9W1Aj${MPyK{ixy6jX2sx0N;QFO&@D#R`ED2ue77HOfMG zLUcc&&z`Y*N?yI@Lr<@W^xR+i-OtSBO{xRt3WSs&1mu$yR#*Hd3}vj6%DkUN>1JIc z5oeliQAvR6u|*{-_tieyHKEy6yqlaFxUY7>m4?O5wl~~2``tIxwZMyc`p)EB^Y+`S z9ct4%>RQ09=TA9Xgt_&xY76mboBhh~qKv#K-Vor=aSEU`nUz(9IkFH+W0?nB#MuW_ zqz)`u$vZBYK1Wm=AYOk&JrY?>gfVYjrlxQr}!_$ z>QVvErd?|6zHMSl>Xt##?TC{4EBA3DzJB_IX`iH7O>S+HUJw|oJ2RIT8*Hnh%w)Tn|JJ?wB@!#G%I2kpda1uzBYhd+hfJ-;dYawLY;LS4 zrn)w?@G2V%)#{pH57vjL;6`xD-v*k)?W`yLt(xe~LPR5a<<(}oQH&1~eY@@I(rn~E z$~0)M>$~izG0ksy#;4bKwu9J+?`^| zWP-`f(A6?=vj^WyHzRPfJS!2zOsf9#uRzZ!1L^LkFn=*aRxUVV$Cv|mq2K+m|Jj;Q zH{gVjW1&xB{xNc)>qHhSNEOo5Y7xXgddc{@O)&8}wGP1}1W*10l4~Fiv##M%a@&+y z_gzcN;#AuMc`YVLNu;^nLI-|aua`Kw!xV3+OHyiAw{&m3D(K%8?$KHXTfS4L!}puZ zSr#YnlRebaZFyTuUCNyXC!72{(i~-6-6x==sj1XTC%7fe?=3#kWVS-d@rhhg@ZO0i zv!<1fcPZYLvt?gClw>%y_fY&{o(p|@W|Z-_3{u=Pi*A`(t%2*+*=}j(K4oUN4&oYp z`oyBT(TK*5B;Q1fe?^3bw^QeNY z`=!})x0gq(8=k%!csN1(CG;uG&pL`4O-y#%;1ljH-mgPHXt!}|V z`N`5|Z`Qc+~+*H;j_W!5|<3T%_RkEZ!6QNeUO&eCEwqh0_>c; zEqpIb`3}08ncrR~x+C+3st{imV4S?5;5WC|&0V0Hn`cEJcsvJ?hbC6|Q<%Toq;}Bv zr*YT6`TK#T6Y!dG@=o7!+Pg$@=tM+qvN*9~>D^CZezDB~LfqJCMrZ4Iw}Ksts#tWToBpO- zp>36wlH3n)k}UKT<@OVEMBg zeXLv;ls}XbESF%1O}ty~#vxn4<#S+Q%JmnvhYO4EQ0~qzj!3o~#|utaOhcc-{AMOD zM|X9B!U4)SEPD7;nBTz6$kF}XcPkW_EEB)}VHiBwir(CEQxn?<;gRFI2j7%XSh311 zISPLY^FLq3@HIF_P)zlrV;**F&JpCppThi3=4e;l*QJF$oUb9v;?yM!16rCLaB+i= zmui-G3obY8`us7+vN*N&fVcZB0KPXhu?YF&;TSy;A|Fnfo;oRC1<21iNwMbf?!l#p zU8bK$Cv$4$fnV@#0Dj?VoUku;57HENre1WB|MFZ2e<^mY9vH}wFLn4+_b!=Fm_s7jM79sOquZM}nDtcXqJV$XUR0@>1IQw-9`cNrVlD`<}2BiK*QS z^L+82e=CO_i8Awh1*aU_kGuqgoV-N*>p!7Nj`<5nU7|x)CI1C~#xmm`)-_xcl2H!^ z!3T%zONyc|9~lOR#Yq5r$gJeQu>UhE3=V6P1h<<*<|Y3HUfJY7q`SKm!?tAqX#jBY zK4I;67pE!TTUT~_jEzmq-rnf3aV5jez0^C{p=@gsD1NhaxHzd#ka)H*fl>#|+lx+( zo8`jb>u!ozv%F7mIR7r`}ASMlcEhQs22d8u@<_a?fV4@3R{{KeaBwy!SJow@I;yK z(6l=Q9%~cN12i>P`vqSIwlsmz&*Id{0~Bd;AJ%ecBDf|Bhx?;{w&n-Q%Wj56%z~#Pq`41*6c;I5EPROH zH%KOiqz)BkYM{BJ#!CyjS~U*<|pC(`>jDi_>j_h9CUI$@O94w;MX>ej5SRM z2jRoo=0*GCm#YEz>o;Kc6oU8YoqOzHrHJISf52Yz*I06sGT zTF4>#S^NA#gk^Dh!~?%OAp{>|UPz2gH>ZXK2bGo6yW-vAl;x>Zu~ev1wA-of3gD`( zUmb#(@^$54`Ox6v;OefA8H%En)k8(`=J3$qAbYgfJxvW-HcaQZW9(62_rRnrF;|D_ z`lfuEEX}GIfr-N^>=2G|NUkEkHqqwFFg?&k>QQ9(Absx@Gj6yZ;X<(%J5;?m_DzxL zePAm?Wt+V>@nUxw5gdMxe!BqywmG#;$Vn8_c@j$J5iyx@7Ik*dI>kHqG zl;+Uz;Bw`vz2}WYPu|)S0Q!Y>+b@j_Hto@X_aI%kF97fA!XFqOe7#*A_(h|DZ^eEt z?__=-6)a*!coRDsMK2!aL96F#!PgB~2^QT)g|*;|u0WN(3{-aY;!lna(&^EM_t2T~ zuK+xB(_QuWz=7AJ5$}P2?{o2Y$}k0DPURkyTCZm>@l@F8?_g z1)Rou;4hvJ!KayTE<~o7MuUQHNLZb|=(5o2Qi!Njv!Qdak+4F2(M{fF%VgwYHx2Fj zV(uI0{y=!NjXXqkHg$4!Z+9AbG>beyzOIb4ZP~1WgC~p~KjnW8oQWBSU&SuN%++x^ z&2$_WoMxE zIB|UNw>B(EDSCRm?|cAs?T?VMd^0}Sm0`h2QTVx+1MsyT(`8*MSVW@YA%8=v>7n(b z8v)P(10y2M!pDNYkSsj8iZ`8;_rR7EW9%uqqRHu_63nd_$azBSDUWQ$)5xIWp-R=v z;mJYzS-`Rt>>*B#J@C7d0`N0>;TK|EvXW-o#Neo~{KedvfKhoL3{|G2$wXboU3^`M z%fk~<^hw)5(UGx{apskY!S=$km!gNt{@E`4vPnAKopdaEsROnWCoiL|zG*Q@w{@Fy zB{2&pp}Adsxq3CTcYrEpPEQIhz9E?mCWn{L5MhcxuJ3mTCM?AO^U2c(k1h`&ty34i zzE(dTd(xNz)74a@xqpj_4Zq+XuDFN7 zH(w7`(#$27=H<@8cY|5vS8?>?_PCegh`@ApG8O^7iN&Ea8ka`-$LN5$3EH z9p)}a1EAowr@L4^oODkwoX1+qSet*Z5>1tsow>UAyvpEp(4c2Rpc z>>kvgT@Rs7Fo*ucQ$|k - + From d3c9910af6606d67a59cec09a004d6d42f7d3a6c Mon Sep 17 00:00:00 2001 From: Chris Hartl Date: Wed, 8 May 2013 17:25:14 -0400 Subject: [PATCH 020/116] Cosmetic changes (comments and variable names) to GenotypeConcordance and ConcordanceMetrics to address reviewer comments. --- .../sting/gatk/walkers/variantutils/ConcordanceMetrics.java | 2 +- .../gatk/walkers/variantutils/GenotypeConcordance.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java index 005acf27b..848261d73 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java @@ -168,7 +168,7 @@ public class ConcordanceMetrics { total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_VAR.ordinal()]; total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_REF.ordinal()]; total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HET.ordinal()]; - // NRD is by definition incorrec/total = 1.0-correct/total + // OGC is by definition correct/total // note: if there are no observations (so the ratio is NaN), set this to 100% return total == 0 ? 1.0 : ( (double) correct)/( (double) total); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java index dd9e822c8..10397d718 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -272,7 +272,7 @@ public class GenotypeConcordance extends RodWalker nrdEntry : metrics.getPerSampleNRD().entrySet() ) { concordanceSummary.set(nrdEntry.getKey(),"Non-Reference_Discrepancy",nrdEntry.getValue()); } - for ( Map.Entry nrdEntry : metrics.getPerSampleOGC().entrySet() ) { - concordanceSummary.set(nrdEntry.getKey(),"Overall_Genotype_Concordance",nrdEntry.getValue()); + for ( Map.Entry ogcEntry : metrics.getPerSampleOGC().entrySet() ) { + concordanceSummary.set(ogcEntry.getKey(),"Overall_Genotype_Concordance",ogcEntry.getValue()); } concordanceSummary.set("ALL_NRS_NRD","Sample","ALL"); concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Sensitivity",metrics.getOverallNRS()); From fa8a47ceef0840fa23966f573c1adf63fdb33e0d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 18 Apr 2013 08:17:15 -0400 Subject: [PATCH 021/116] Replace DeBruijnAssembler with ReadThreadingAssembler Problem ------- The DeBruijn assembler was too slow. The cause of the slowness was the need to construct many kmer graphs (from max read length in the interval to 11 kmer, in increments of 6 bp). This need to build many kmer graphs was because the assembler (1) needed long kmers to assemble through regions where a shorter kmer was non-unique in the reference, as we couldn't split cycles in the reference (2) shorter kmers were needed to be sensitive to differences from the reference near the edge of reads, which would be lost often when there was chain of kmers of longer length that started before and after the variant. Solution -------- The read threading assembler uses a fixed kmer, in this implementation by default two graphs with 10 and 25 kmers. The algorithm operates as follows: identify all non-unique kmers of size K among all reads and the reference for each sequence (ref and read): find a unique starting position of the sequence in the graph by matching to a unique kmer, or starting a new source node if non exist for each base in the sequence from the starting vertex kmer: look at the existing outgoing nodes of current vertex V. If the base in sequence matches the suffix of outgoing vertex N, read the sequence to N, and continue If no matching next vertex exists, find a unique vertex with kmer K. If one exists, merge the sequence into this vertex, and continue If a merge vertex cannot be found, create a new vertex (note this vertex may have a kmer identical to another in the graph, if it is not unique) and thread the sequence to this vertex, and continue This algorithm has a key property: it can robustly use a very short kmer without introducing cycles, as we will create paths through the graph through regions that aren't unique w.r.t. the sequence at the given kmer size. This allows us to assemble well with even very short kmers. This commit includes many critical changes to the haplotype caller to make it fast, sensitive, and accurate on deep and shallow WGS and exomes, the key changes are highlighted below: -- The ReadThreading assembler keeps track of the maximum edge multiplicity per sample in the graph, so that we prune per sample, not across all samples. This change is essential to operate effectively when there are many deep samples (i.e., 100 exomes) -- A new pruning algorithm that will only prune linear paths where the maximum edge weight among all edges in the path have < pruningFactor. This makes pruning more robust when you have a long chain of bases that have high multiplicity at the start but only barely make it back into the main path in the graph. -- We now do a global SmithWaterman to compute the cigar of a Path, instead of the previous bubble-based SmithWaterman optimization. This change is essential for us to get good variants from our paths when the kmer size is small. It also ensures that we produce a cigar from a path that only depends only the sequence of bases in the path, unlike the previous approach which would depend on both the bases and the way the path was decomposed into vertices, which depended on the kmer size we used. -- Removed MergeHeadlessIncomingSources, which was introducing problems in the graphs in some cases, and just isn't the safest operation. Since we build a kmer graph of size 10, this operation is no longer necessary as it required a perfect match of 10 bp to merge anyway. -- The old DebruijnAssembler is still available with a command line option -- The number of paths we take forward from the each assembly graph is now capped at a factor per sample, so that we allow 128 paths for a single sample up to 10 x nSamples as necessary. This is an essential change to make the system work well for large numbers of samples. -- Add a global mismapping parameter to the HC likelihood calculation: The phredScaledGlobalReadMismappingRate reflects the average global mismapping rate of all reads, regardless of their mapping quality. This term effects the probability that a read originated from the reference haploytype, regardless of its edit distance from the reference, in that the read could have originated from the reference haplotype but from another location in the genome. Suppose a read has many mismatches from the reference, say like 5, but has a very high mapping quality of 60. Without this parameter, the read would contribute 5 * Q30 evidence in favor of its 5 mismatch haplotype compared to reference, potentially enough to make a call off that single read for all of these events. With this parameter set to Q30, though, the maximum evidence against the reference that this (and any) read could contribute against reference is Q30. -- Controllable via a command line argument, defaulting to Q60 rate. Results from 20:10-11 mb for branch are consistent with the previous behavior, but this does help in cases where you have rare very divergent haplotypes -- Reduced ActiveRegionExtension from 200 bp to 100 bp, which is a performance win and the large extension is largely unnecessary with the short kmers used with the read threading assembler Infrastructure changes / improvements ------------------------------------- -- Refactored BaseGraph to take a subclass of BaseEdge, so that we can use a MultiSampleEdge in the ReadThreadingAssembler -- Refactored DeBruijnAssembler, moving common functionality into LocalAssemblyEngine, which now more directly manages the subclasses, requiring them to only implement a assemble() method that takes ref and reads and provides a List, which the LocalAssemblyEngine takes forward to compute haplotypes and other downstream operations. This allows us to have only a limited amount of code that differentiates the Debruijn and ReadThreading assemblers -- Refactored active region trimming code into ActiveRegionTrimmer class -- Cleaned up the arguments in HaplotypeCaller, reorganizing them and making arguments @Hidden and @Advanced as appropriate. Renamed several arguments now that the read threading assembler is the default -- LocalAssemblyEngineUnitTest reads in the reference sequence from b37, and assembles with synthetic reads intervals from 10-11 mbs with only the reference sequence as well as artificial snps, deletions, and insertions. -- Misc. updates to Smith Waterman code. Added generic interface to called not surpisingly SmithWaterman, making it easier to have alternative implementations. -- Many many more unit tests throughout the entire assembler, and in random utilities --- .../haplotypecaller/ActiveRegionTrimmer.java | 142 ++++ .../haplotypecaller/DeBruijnAssembler.java | 436 +----------- .../haplotypecaller/HaplotypeCaller.java | 357 ++++++---- .../walkers/haplotypecaller/KMerCounter.java | 18 +- .../gatk/walkers/haplotypecaller/Kmer.java | 8 + .../LikelihoodCalculationEngine.java | 59 +- .../haplotypecaller/LocalAssemblyEngine.java | 406 ++++++++++- .../haplotypecaller/graphs/BaseEdge.java | 61 +- .../haplotypecaller/graphs/BaseGraph.java | 233 ++++--- .../graphs/BaseGraphIterator.java | 6 +- .../haplotypecaller/graphs/BaseVertex.java | 16 + .../graphs/CommonSuffixSplitter.java | 4 +- .../haplotypecaller/graphs/DeBruijnGraph.java | 17 +- .../graphs/DeBruijnVertex.java | 2 +- .../haplotypecaller/graphs/GraphUtils.java | 48 +- .../haplotypecaller/graphs/KBestPaths.java | 28 +- .../graphs/LowWeightChainPruner.java | 170 +++++ .../graphs/MultiSampleEdge.java | 123 ++++ .../walkers/haplotypecaller/graphs/Path.java | 263 +++---- .../haplotypecaller/graphs/SeqGraph.java | 58 +- .../graphs/SharedSequenceMerger.java | 4 +- .../graphs/SharedVertexSequenceSplitter.java | 4 +- .../readthreading/MultiDeBruijnVertex.java | 118 ++++ .../readthreading/ReadThreadingAssembler.java | 162 +++++ .../readthreading/ReadThreadingGraph.java | 640 ++++++++++++++++++ .../readthreading/SequenceForKmers.java | 93 +++ .../DeBruijnAssemblerUnitTest.java | 53 -- ...lexAndSymbolicVariantsIntegrationTest.java | 6 +- .../HaplotypeCallerIntegrationTest.java | 18 +- .../KMerCounterCaseFixUnitTest.java | 103 +-- .../LocalAssemblyEngineUnitTest.java | 280 ++++++++ .../graphs/BaseEdgeUnitTest.java | 5 +- .../graphs/BaseGraphUnitTest.java | 2 +- .../graphs/CommonSuffixMergerUnitTest.java | 8 +- .../graphs/CommonSuffixSplitterUnitTest.java | 6 +- .../graphs/GraphUtilsUnitTest.java | 120 ++++ .../graphs/KBestPathsUnitTest.java | 147 ++-- .../graphs/LowWeightChainPrunerUnitTest.java | 163 +++++ .../graphs/MultiSampleEdgeUnitTest.java | 103 +++ .../haplotypecaller/graphs/PathUnitTest.java | 80 +++ .../graphs/SeqGraphUnitTest.java | 13 +- .../SharedVertexSequenceSplitterUnitTest.java | 8 +- .../ReadThreadingAssemblerUnitTest.java | 213 ++++++ .../ReadThreadingGraphUnitTest.java | 191 ++++++ .../SequenceForKmersUnitTest.java | 80 +++ .../traversals/TraverseActiveRegions.java | 2 +- .../org/broadinstitute/sting/utils/Utils.java | 41 ++ .../smithwaterman/SWPairwiseAlignment.java | 27 +- .../utils/smithwaterman/SmithWaterman.java | 56 ++ .../sting/utils/UtilsUnitTest.java | 27 + .../utils/clipping/ReadClipperUnitTest.java | 26 + 51 files changed, 4146 insertions(+), 1108 deletions(-) create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ActiveRegionTrimmer.java create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/LowWeightChainPruner.java create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/MultiDeBruijnVertex.java create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/SequenceForKmers.java create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/GraphUtilsUnitTest.java create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/LowWeightChainPrunerUnitTest.java create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdgeUnitTest.java create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/PathUnitTest.java create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/SequenceForKmersUnitTest.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/smithwaterman/SmithWaterman.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ActiveRegionTrimmer.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ActiveRegionTrimmer.java new file mode 100644 index 000000000..063e3b218 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ActiveRegionTrimmer.java @@ -0,0 +1,142 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.activeregion.ActiveRegion; +import org.broadinstitute.variant.variantcontext.VariantContext; + +import java.util.LinkedList; +import java.util.List; +import java.util.TreeSet; + +/** + * Trim down an active region based on a set of variants found across the haplotypes within the region + * + * User: depristo + * Date: 4/27/13 + * Time: 2:10 PM + */ +class ActiveRegionTrimmer { + private final static Logger logger = Logger.getLogger(ActiveRegionTrimmer.class); + private final boolean logTrimming; + private final int snpPadding, nonSnpPadding, maxDistanceInExtensionForGenotyping; + private final GenomeLocParser parser; + + /** + * Create a new ActiveRegionTrimmer + * + * @param logTrimming should we log our trimming events? + * @param snpPadding how much bp context should we ensure around snps? + * @param nonSnpPadding how much bp context should we ensure around anything not a snp? + * @param maxDistanceInExtensionForGenotyping the max extent we are will to go into the extended region of the + * origin active region in order to properly genotype events in the + * non-extended active region? + * @param parser a genome loc parser so we can create genome locs + */ + ActiveRegionTrimmer(boolean logTrimming, int snpPadding, int nonSnpPadding, int maxDistanceInExtensionForGenotyping, GenomeLocParser parser) { + if ( snpPadding < 0 ) throw new IllegalArgumentException("snpPadding must be >= 0 but got " + snpPadding); + if ( nonSnpPadding < 0 ) throw new IllegalArgumentException("nonSnpPadding must be >= 0 but got " + nonSnpPadding); + if ( maxDistanceInExtensionForGenotyping < 0 ) throw new IllegalArgumentException("maxDistanceInExtensionForGenotyping must be >= 0 but got " + maxDistanceInExtensionForGenotyping); + if ( parser == null ) throw new IllegalArgumentException("parser cannot be null"); + + this.logTrimming = logTrimming; + this.snpPadding = snpPadding; + this.nonSnpPadding = nonSnpPadding; + this.maxDistanceInExtensionForGenotyping = maxDistanceInExtensionForGenotyping; + this.parser = parser; + } + + /** + * Trim down the active region to a region large enough to properly genotype the events found within the active + * region span, excluding all variants that only occur within its extended span. + * + * This function merely creates the region, but it doesn't populate the reads back into the region. + * + * @param region our full active region + * @param allVariantsWithinExtendedRegion all of the variants found in the entire region, sorted by their start position + * @return a new ActiveRegion trimmed down to just what's needed for genotyping, or null if we couldn't do this successfully + */ + public ActiveRegion trimRegion(final ActiveRegion region, final TreeSet allVariantsWithinExtendedRegion) { + if ( allVariantsWithinExtendedRegion.isEmpty() ) // no variants, so just return the current region + return null; + + final List withinActiveRegion = new LinkedList(); + int pad = snpPadding; + GenomeLoc trimLoc = null; + for ( final VariantContext vc : allVariantsWithinExtendedRegion ) { + final GenomeLoc vcLoc = parser.createGenomeLoc(vc); + if ( region.getLocation().overlapsP(vcLoc) ) { + if ( ! vc.isSNP() ) // if anything isn't a SNP use the bigger padding + pad = nonSnpPadding; + trimLoc = trimLoc == null ? vcLoc : trimLoc.endpointSpan(vcLoc); + withinActiveRegion.add(vc); + } + } + + // we don't actually have anything in the region after removing variants that don't overlap the region's full location + if ( trimLoc == null ) return null; + + final GenomeLoc maxSpan = parser.createPaddedGenomeLoc(region.getLocation(), maxDistanceInExtensionForGenotyping); + final GenomeLoc idealSpan = parser.createPaddedGenomeLoc(trimLoc, pad); + final GenomeLoc finalSpan = maxSpan.intersect(idealSpan); + + final ActiveRegion trimmedRegion = region.trim(finalSpan); + if ( logTrimming ) { + logger.info("events : " + withinActiveRegion); + logger.info("trimLoc : " + trimLoc); + logger.info("pad : " + pad); + logger.info("idealSpan : " + idealSpan); + logger.info("maxSpan : " + maxSpan); + logger.info("finalSpan : " + finalSpan); + logger.info("regionSpan : " + trimmedRegion.getExtendedLoc() + " size is " + trimmedRegion.getExtendedLoc().size()); + } + return trimmedRegion; + } +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java index 5a5946183..48972dfd5 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java @@ -46,101 +46,53 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; -import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; -import org.apache.commons.lang.ArrayUtils; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*; -import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.DeBruijnGraph; +import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.SeqGraph; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.smithwaterman.SWPairwiseAlignment; -import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.haplotype.Haplotype; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.smithwaterman.SWParameterSet; -import org.broadinstitute.variant.variantcontext.Allele; -import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.File; -import java.util.*; +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; /** - * Created by IntelliJ IDEA. + * DeBruijn assembler for the HaplotypeCaller + * * User: ebanks, rpoplin * Date: Mar 14, 2011 */ - public class DeBruijnAssembler extends LocalAssemblyEngine { private final static Logger logger = Logger.getLogger(DeBruijnAssembler.class); - private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers - // TODO -- this number is very low, and limits our ability to explore low-frequency variants. It should // TODO -- be increased to a large number of eliminated altogether when moving to the bubble caller where // TODO -- we are no longer considering a combinatorial number of haplotypes as the number of bubbles increases - private static final int NUM_BEST_PATHS_PER_KMER_GRAPH = 25; + private final static int NUM_PATHS_PER_GRAPH = 25; + private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers private static final int GRAPH_KMER_STEP = 6; - private final boolean debug; - private final boolean debugGraphTransformations; private final int minKmer; - private final boolean allowCyclesInKmerGraphToGeneratePaths; - private final int onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms; - protected DeBruijnAssembler() { - this(false, -1, 11, false); + this(25, -1); } - public DeBruijnAssembler(final boolean debug, - final int debugGraphTransformations, - final int minKmer, - final boolean allowCyclesInKmerGraphToGeneratePaths) { - super(); - this.debug = debug; - this.debugGraphTransformations = debugGraphTransformations > 0; - this.onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms = debugGraphTransformations; + public DeBruijnAssembler(final int minKmer, final int onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms) { + super(NUM_PATHS_PER_GRAPH); this.minKmer = minKmer; - this.allowCyclesInKmerGraphToGeneratePaths = allowCyclesInKmerGraphToGeneratePaths; + this.onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms = onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms; } - /** - * Main entry point into the assembly engine. Build a set of deBruijn graphs out of the provided reference sequence and list of reads - * @param activeRegion ActiveRegion object holding the reads which are to be used during assembly - * @param refHaplotype reference haplotype object - * @param fullReferenceWithPadding byte array holding the reference sequence with padding - * @param refLoc GenomeLoc object corresponding to the reference sequence with padding - * @param activeAllelesToGenotype the alleles to inject into the haplotypes during GGA mode - * @return a non-empty list of all the haplotypes that are produced during assembly - */ - @Ensures({"result.contains(refHaplotype)"}) - public List runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final List activeAllelesToGenotype ) { - if( activeRegion == null ) { throw new IllegalArgumentException("Assembly engine cannot be used with a null ActiveRegion."); } - if( refHaplotype == null ) { throw new IllegalArgumentException("Reference haplotype cannot be null."); } - if( fullReferenceWithPadding.length != refLoc.size() ) { throw new IllegalArgumentException("Reference bases and reference loc must be the same size."); } - if( pruneFactor < 0 ) { throw new IllegalArgumentException("Pruning factor cannot be negative"); } - - // create the graphs - final List graphs = createDeBruijnGraphs( activeRegion.getReads(), refHaplotype ); - - // print the graphs if the appropriate debug option has been turned on - if( graphWriter != null ) { - printGraphs(graphs); - } - - // find the best paths in the graphs and return them as haplotypes - return findBestPaths( graphs, refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() ); - } - - @Requires({"reads != null", "refHaplotype != null"}) - protected List createDeBruijnGraphs( final List reads, final Haplotype refHaplotype ) { + @Override + protected List assemble(final List reads, final Haplotype refHaplotype) { final List graphs = new LinkedList(); final int maxKmer = ReadUtils.getMaxReadLength(reads) - KMER_OVERLAP - 1; @@ -165,10 +117,9 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { " future subsystem will actually go and error correct the reads"); } - final SeqGraph seqGraph = toSeqGraph(graph); + final SeqGraph seqGraph = cleanupSeqGraph(graph.convertToSequenceGraph()); if ( seqGraph != null ) { // if the graph contains interesting variation from the reference - sanityCheckReferenceGraph(seqGraph, refHaplotype); graphs.add(seqGraph); if ( debugGraphTransformations ) // we only want to use one graph size @@ -181,69 +132,6 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { return graphs; } - private SeqGraph toSeqGraph(final DeBruijnGraph deBruijnGraph) { - final SeqGraph seqGraph = deBruijnGraph.convertToSequenceGraph(); - if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.1.dot"), pruneFactor); - - // TODO -- we need to come up with a consistent pruning algorithm. The current pruning algorithm - // TODO -- works well but it doesn't differentiate between an isolated chain that doesn't connect - // TODO -- to anything from one that's actually has good support along the chain but just happens - // TODO -- to have a connection in the middle that has weight of < pruneFactor. Ultimately - // TODO -- the pruning algorithm really should be an error correction algorithm that knows more - // TODO -- about the structure of the data and can differentiate between an infrequent path but - // TODO -- without evidence against it (such as occurs when a region is hard to get any reads through) - // TODO -- from a error with lots of weight going along another similar path - // the very first thing we need to do is zip up the graph, or pruneGraph will be too aggressive - seqGraph.zipLinearChains(); - if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.2.zipped.dot"), pruneFactor); - - // now go through and prune the graph, removing vertices no longer connected to the reference chain - // IMPORTANT: pruning must occur before we call simplifyGraph, as simplifyGraph adds 0 weight - // edges to maintain graph connectivity. - seqGraph.pruneGraph(pruneFactor); - seqGraph.removeVerticesNotConnectedToRefRegardlessOfEdgeDirection(); - - if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.3.pruned.dot"), pruneFactor); - seqGraph.simplifyGraph(); - if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.4.merged.dot"), pruneFactor); - - // The graph has degenerated in some way, so the reference source and/or sink cannot be id'd. Can - // happen in cases where for example the reference somehow manages to acquire a cycle, or - // where the entire assembly collapses back into the reference sequence. - if ( seqGraph.getReferenceSourceVertex() == null || seqGraph.getReferenceSinkVertex() == null ) - return null; - - seqGraph.removePathsNotConnectedToRef(); - seqGraph.simplifyGraph(); - if ( seqGraph.vertexSet().size() == 1 ) { - // we've perfectly assembled into a single reference haplotype, add a empty seq vertex to stop - // the code from blowing up. - // TODO -- ref properties should really be on the vertices, not the graph itself - final SeqVertex complete = seqGraph.vertexSet().iterator().next(); - final SeqVertex dummy = new SeqVertex(""); - seqGraph.addVertex(dummy); - seqGraph.addEdge(complete, dummy, new BaseEdge(true, 0)); - } - if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.5.final.dot"), pruneFactor); - - return seqGraph; - } - - protected void sanityCheckReferenceGraph(final BaseGraph graph, final Haplotype refHaplotype) { - if( graph.getReferenceSourceVertex() == null ) { - throw new IllegalStateException("All reference graphs must have a reference source vertex."); - } - if( graph.getReferenceSinkVertex() == null ) { - throw new IllegalStateException("All reference graphs must have a reference sink vertex."); - } - if( !Arrays.equals(graph.getReferenceBytes(graph.getReferenceSourceVertex(), graph.getReferenceSinkVertex(), true, true), refHaplotype.getBases()) ) { - throw new IllegalStateException("Mismatch between the reference haplotype and the reference assembly graph path." + - " graph = " + new String(graph.getReferenceBytes(graph.getReferenceSourceVertex(), graph.getReferenceSinkVertex(), true, true)) + - " haplotype = " + new String(refHaplotype.getBases()) - ); - } - } - @Requires({"reads != null", "kmerLength > 0", "refHaplotype != null"}) protected DeBruijnGraph createGraphFromSequences( final List reads, final int kmerLength, final Haplotype refHaplotype ) { final DeBruijnGraph graph = new DeBruijnGraph(kmerLength); @@ -344,290 +232,10 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { return true; } - protected void printGraphs(final List graphs) { - final int writeFirstGraphWithSizeSmallerThan = 50; - - graphWriter.println("digraph assemblyGraphs {"); - for( final SeqGraph graph : graphs ) { - if ( debugGraphTransformations && graph.getKmerSize() >= writeFirstGraphWithSizeSmallerThan ) { - logger.info("Skipping writing of graph with kmersize " + graph.getKmerSize()); - continue; - } - - graph.printGraph(graphWriter, false, pruneFactor); - - if ( debugGraphTransformations ) - break; - } - - graphWriter.println("}"); - } - - @Requires({"refWithPadding.length > refHaplotype.getBases().length", "refLoc.containsP(activeRegionWindow)"}) - @Ensures({"result.contains(refHaplotype)"}) - private List findBestPaths( final List graphs, final Haplotype refHaplotype, final byte[] refWithPadding, final GenomeLoc refLoc, final List activeAllelesToGenotype, final GenomeLoc activeRegionWindow ) { - - // add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes - // TODO -- this use of an array with contains lower may be a performance problem returning in an O(N^2) algorithm - final List returnHaplotypes = new ArrayList(); - refHaplotype.setAlignmentStartHapwrtRef(activeRegionWindow.getStart() - refLoc.getStart()); - final Cigar c = new Cigar(); - c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M)); - refHaplotype.setCigar(c); - returnHaplotypes.add( refHaplotype ); - - final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef(); - final int activeRegionStop = refHaplotype.getAlignmentStartHapwrtRef() + refHaplotype.getCigar().getReferenceLength(); - - // for GGA mode, add the desired allele into the haplotype - for( final VariantContext compVC : activeAllelesToGenotype ) { - for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { - final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()); - addHaplotypeForGGA( insertedRefHaplotype, refWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, true ); - } - } - - for( final SeqGraph graph : graphs ) { - final SeqVertex source = graph.getReferenceSourceVertex(); - final SeqVertex sink = graph.getReferenceSinkVertex(); - if ( source == null || sink == null ) throw new IllegalArgumentException("Both source and sink cannot be null but got " + source + " and sink " + sink + " for graph "+ graph); - - final KBestPaths pathFinder = new KBestPaths(allowCyclesInKmerGraphToGeneratePaths); - for ( final Path path : pathFinder.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH, source, sink) ) { -// logger.info("Found path " + path); - Haplotype h = new Haplotype( path.getBases() ); - if( !returnHaplotypes.contains(h) ) { - final Cigar cigar = path.calculateCigar(); - if( cigar.isEmpty() ) { - throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength()); - } else if ( pathIsTooDivergentFromReference(cigar) || cigar.getReferenceLength() < 60 ) { // N cigar elements means that a bubble was too divergent from the reference so skip over this path - continue; - } else if( cigar.getReferenceLength() != refHaplotype.getCigar().getReferenceLength() ) { // SW failure - throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength()); - } - h.setCigar(cigar); - - // extend partial haplotypes which are anchored in the reference to include the full active region - h = extendPartialHaplotype(h, activeRegionStart, refWithPadding); - final Cigar leftAlignedCigar = leftAlignCigarSequentially(AlignmentUtils.consolidateCigar(h.getCigar()), refWithPadding, h.getBases(), activeRegionStart, 0); - if( !returnHaplotypes.contains(h) ) { - h.setAlignmentStartHapwrtRef(activeRegionStart); - h.setCigar(leftAlignedCigar); - h.setScore(path.getScore()); - returnHaplotypes.add(h); - - if ( debug ) - logger.info("Adding haplotype " + h.getCigar() + " from debruijn graph with kmer " + graph.getKmerSize()); - - // for GGA mode, add the desired allele into the haplotype if it isn't already present - if( !activeAllelesToGenotype.isEmpty() ) { - final Map eventMap = GenotypingEngine.generateVCsFromAlignment( h, refWithPadding, refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place - for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present - final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart()); - - // This if statement used to additionally have: - // "|| !vcOnHaplotype.hasSameAllelesAs(compVC)" - // but that can lead to problems downstream when e.g. you are injecting a 1bp deletion onto - // a haplotype that already contains a 1bp insertion (so practically it is reference but - // falls into the bin for the 1bp deletion because we keep track of the artificial alleles). - if( vcOnHaplotype == null ) { - for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { - addHaplotypeForGGA( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()), refWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false ); - } - } - } - } - } - } - } - } - - // add genome locs to the haplotypes - for ( final Haplotype h : returnHaplotypes ) h.setGenomeLocation(activeRegionWindow); - - if ( returnHaplotypes.size() < returnHaplotypes.size() ) - logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against at " + refLoc); - - if( debug ) { - if( returnHaplotypes.size() > 1 ) { - logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against."); - } else { - logger.info("Found only the reference haplotype in the assembly graph."); - } - for( final Haplotype h : returnHaplotypes ) { - logger.info( h.toString() ); - logger.info( "> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() + " score " + h.getScore() ); - } - } - - return returnHaplotypes; - } - - /** - * Extend partial haplotypes which are anchored in the reference to include the full active region - * @param haplotype the haplotype to extend - * @param activeRegionStart the place where the active region starts in the ref byte array - * @param refWithPadding the full reference byte array with padding which encompasses the active region - * @return a haplotype fully extended to encompass the active region - */ - @Requires({"haplotype != null", "activeRegionStart >= 0", "refWithPadding != null", "refWithPadding.length > 0"}) - @Ensures({"result != null", "result.getCigar() != null"}) - private Haplotype extendPartialHaplotype( final Haplotype haplotype, final int activeRegionStart, final byte[] refWithPadding ) { - final Cigar cigar = haplotype.getCigar(); - final Cigar newCigar = new Cigar(); - byte[] newHaplotypeBases = haplotype.getBases(); - int refPos = activeRegionStart; - int hapPos = 0; - for( int iii = 0; iii < cigar.getCigarElements().size(); iii++ ) { - final CigarElement ce = cigar.getCigarElement(iii); - switch (ce.getOperator()) { - case M: - refPos += ce.getLength(); - hapPos += ce.getLength(); - newCigar.add(ce); - break; - case I: - hapPos += ce.getLength(); - newCigar.add(ce); - break; - case D: - if( iii == 0 || iii == cigar.getCigarElements().size() - 1 ) { - newHaplotypeBases = ArrayUtils.addAll( Arrays.copyOfRange(newHaplotypeBases, 0, hapPos), - ArrayUtils.addAll(Arrays.copyOfRange(refWithPadding, refPos, refPos + ce.getLength()), - Arrays.copyOfRange(newHaplotypeBases, hapPos, newHaplotypeBases.length))); - hapPos += ce.getLength(); - refPos += ce.getLength(); - newCigar.add(new CigarElement(ce.getLength(), CigarOperator.M)); - } else { - refPos += ce.getLength(); - newCigar.add(ce); - } - break; - default: - throw new IllegalStateException("Unsupported cigar operator detected: " + ce.getOperator()); - } - } - final Haplotype returnHaplotype = new Haplotype(newHaplotypeBases, haplotype.isReference()); - returnHaplotype.setCigar( newCigar ); - return returnHaplotype; - } - - /** - * We use CigarOperator.N as the signal that an incomplete or too divergent bubble was found during bubble traversal - * @param c the cigar to test - * @return true if we should skip over this path - */ - @Requires("c != null") - private boolean pathIsTooDivergentFromReference( final Cigar c ) { - for( final CigarElement ce : c.getCigarElements() ) { - if( ce.getOperator().equals(CigarOperator.N) ) { - return true; - } - } - return false; - } - - /** - * Left align the given cigar sequentially. This is needed because AlignmentUtils doesn't accept cigars with more than one indel in them. - * This is a target of future work to incorporate and generalize into AlignmentUtils for use by others. - * @param cigar the cigar to left align - * @param refSeq the reference byte array - * @param readSeq the read byte array - * @param refIndex 0-based alignment start position on ref - * @param readIndex 0-based alignment start position on read - * @return the left-aligned cigar - */ - @Ensures({"cigar != null", "refSeq != null", "readSeq != null", "refIndex >= 0", "readIndex >= 0"}) - protected Cigar leftAlignCigarSequentially(final Cigar cigar, final byte[] refSeq, final byte[] readSeq, int refIndex, int readIndex) { - final Cigar cigarToReturn = new Cigar(); - Cigar cigarToAlign = new Cigar(); - for (int i = 0; i < cigar.numCigarElements(); i++) { - final CigarElement ce = cigar.getCigarElement(i); - if (ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I) { - cigarToAlign.add(ce); - final Cigar leftAligned = AlignmentUtils.leftAlignSingleIndel(cigarToAlign, refSeq, readSeq, refIndex, readIndex, false); - for ( final CigarElement toAdd : leftAligned.getCigarElements() ) { cigarToReturn.add(toAdd); } - refIndex += cigarToAlign.getReferenceLength(); - readIndex += cigarToAlign.getReadLength(); - cigarToAlign = new Cigar(); - } else { - cigarToAlign.add(ce); - } - } - if( !cigarToAlign.isEmpty() ) { - for( final CigarElement toAdd : cigarToAlign.getCigarElements() ) { - cigarToReturn.add(toAdd); - } - } - - final Cigar result = AlignmentUtils.consolidateCigar(cigarToReturn); - if( result.getReferenceLength() != cigar.getReferenceLength() ) - throw new IllegalStateException("leftAlignCigarSequentially failed to produce a valid CIGAR. Reference lengths differ. Initial cigar " + cigar + " left aligned into " + result); - return result; - } - - /** - * Take a haplotype which was generated by injecting an allele into a string of bases and run SW against the reference to determine the variants on the haplotype. - * Unfortunately since this haplotype didn't come from the assembly graph you can't straightforwardly use the bubble traversal algorithm to get this information. - * This is a target for future work as we rewrite the HaplotypeCaller to be more bubble-caller based. - * @param haplotype the candidate haplotype - * @param ref the reference bases to align against - * @param haplotypeList the current list of haplotypes - * @param activeRegionStart the start of the active region in the reference byte array - * @param activeRegionStop the stop of the active region in the reference byte array - * @param FORCE_INCLUSION_FOR_GGA_MODE if true will include in the list even if it already exists - * @return true if the candidate haplotype was successfully incorporated into the haplotype list - */ - @Requires({"ref != null", "ref.length >= activeRegionStop - activeRegionStart"}) - private boolean addHaplotypeForGGA( final Haplotype haplotype, final byte[] ref, final List haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) { - if( haplotype == null ) { return false; } - - final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SWParameterSet.STANDARD_NGS ); - haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() ); - - if( swConsensus.getCigar().toString().contains("S") || swConsensus.getCigar().getReferenceLength() < 60 || swConsensus.getAlignmentStart2wrt1() < 0 ) { // protect against unhelpful haplotype alignments - return false; - } - - haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0, true) ); - - final int hapStart = ReadUtils.getReadCoordinateForReferenceCoordinate(haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStart, ReadUtils.ClippingTail.LEFT_TAIL, true); - int hapStop = ReadUtils.getReadCoordinateForReferenceCoordinate( haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStop, ReadUtils.ClippingTail.RIGHT_TAIL, true ); - if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED && activeRegionStop == haplotype.getAlignmentStartHapwrtRef() + haplotype.getCigar().getReferenceLength() ) { - hapStop = activeRegionStop; // contract for getReadCoordinateForReferenceCoordinate function says that if read ends at boundary then it is outside of the clipping goal - } - byte[] newHaplotypeBases; - // extend partial haplotypes to contain the full active region sequence - if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED && hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { - newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.addAll( ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()), - haplotype.getBases()), - ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop) ); - } else if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { - newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()), ArrayUtils.subarray(haplotype.getBases(), 0, hapStop) ); - } else if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { - newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(haplotype.getBases(), hapStart, haplotype.getBases().length), ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop) ); - } else { - newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), hapStart, hapStop); - } - - final Haplotype h = new Haplotype( newHaplotypeBases ); - final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( ref, h.getBases(), SWParameterSet.STANDARD_NGS ); - - h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() ); - if ( haplotype.isArtificialHaplotype() ) { - h.setArtificialEvent(haplotype.getArtificialEvent()); - } - if( swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() != activeRegionStop - activeRegionStart || swConsensus2.getAlignmentStart2wrt1() < 0 ) { // protect against unhelpful haplotype alignments - return false; - } - - h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0, true) ); - - if( FORCE_INCLUSION_FOR_GGA_MODE || !haplotypeList.contains(h) ) { - haplotypeList.add(h); - return true; - } else { - return false; - } + @Override + public String toString() { + return "DeBruijnAssembler{" + + "minKmer=" + minKmer + + '}'; } } \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 6ea543f25..33d1104bc 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -68,6 +68,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory; +import org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; @@ -135,10 +136,14 @@ import java.util.*; @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.LOCUS) @BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN) -@ActiveRegionTraversalParameters(extension=200, maxRegion=300) +@ActiveRegionTraversalParameters(extension=100, maxRegion=300) @ReadFilters({HCMappingQualityFilter.class}) @Downsample(by= DownsampleType.BY_SAMPLE, toCoverage=250) public class HaplotypeCaller extends ActiveRegionWalker implements AnnotatorCompatible { + // ----------------------------------------------------------------------------------------------- + // general haplotype caller arguments + // ----------------------------------------------------------------------------------------------- + /** * A raw, unfiltered, highly sensitive callset in VCF format. */ @@ -185,64 +190,6 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Argument(fullName="bamWriterType", shortName="bamWriterType", doc="How should haplotypes be written to the BAM?", required = false) public HaplotypeBAMWriter.Type bamWriterType = HaplotypeBAMWriter.Type.CALLED_HAPLOTYPES; - /** - * The PairHMM implementation to use for genotype likelihood calculations. The various implementations balance a tradeoff of accuracy and runtime. - */ - @Advanced - @Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for genotype likelihood calculations", required = false) - public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING; - - @Hidden - @Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false) - protected String keepRG = null; - - @Advanced - @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false) - protected int MIN_PRUNE_FACTOR = 0; - - @Advanced - @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false) - protected int gcpHMM = 10; - - @Advanced - @Argument(fullName="maxNumHaplotypesInPopulation", shortName="maxNumHaplotypesInPopulation", doc="Maximum number of haplotypes to consider for your population. This number will probably need to be increased when calling organisms with high heterozygosity.", required = false) - protected int maxNumHaplotypesInPopulation = 25; - - @Advanced - @Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false) - protected int minKmer = 11; - - /** - * If this flag is provided, the haplotype caller will include unmapped reads in the assembly and calling - * when these reads occur in the region being analyzed. Typically, for paired end analyses, one pair of the - * read can map, but if its pair is too divergent then it may be unmapped and placed next to its mate, taking - * the mates contig and alignment start. If this flag is provided the haplotype caller will see such reads, - * and may make use of them in assembly and calling, where possible. - */ - @Hidden - @Argument(fullName="includeUmappedReads", shortName="unmapped", doc="If provided, unmapped reads with chromosomal coordinates (i.e., those placed to their maps) will be included in the assembly and calling", required = false) - protected boolean includeUnmappedReads = false; - - @Advanced - @Argument(fullName="useAllelesTrigger", shortName="allelesTrigger", doc = "If specified, use additional trigger on variants found in an external alleles file", required=false) - protected boolean USE_ALLELES_TRIGGER = false; - - @Advanced - @Argument(fullName="useFilteredReadsForAnnotations", shortName="useFilteredReadsForAnnotations", doc = "If specified, use the contamination-filtered read maps for the purposes of annotating variants", required=false) - protected boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = false; - - @Hidden - @Argument(fullName="justDetermineActiveRegions", shortName="justDetermineActiveRegions", doc = "If specified, the HC won't actually do any assembly or calling, it'll just run the upfront active region determination code. Useful for benchmarking and scalability testing", required=false) - protected boolean justDetermineActiveRegions = false; - - @Hidden - @Argument(fullName="dontGenotype", shortName="dontGenotype", doc = "If specified, the HC will do any assembly but won't do calling. Useful for benchmarking and scalability testing", required=false) - protected boolean dontGenotype = false; - - @Hidden - @Argument(fullName="errorCorrectKmers", shortName="errorCorrectKmers", doc = "Use an exploratory algorithm to error correct the kmers used during assembly. May cause fundamental problems with the assembly graph itself", required=false) - protected boolean errorCorrectKmers = false; - /** * rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate. * dbSNP is not used in any way for the calculations themselves. @@ -282,10 +229,6 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false) protected List annotationsToExclude = new ArrayList(Arrays.asList(new String[]{"SpanningDeletions", "TandemRepeatAnnotator"})); - @Advanced - @Argument(fullName="mergeVariantsViaLD", shortName="mergeVariantsViaLD", doc="If specified, we will merge variants together into block substitutions that are in strong local LD", required = false) - protected boolean mergeVariantsViaLD = false; - /** * Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups. */ @@ -295,13 +238,139 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @ArgumentCollection private StandardCallerArgumentCollection SCAC = new StandardCallerArgumentCollection(); + // ----------------------------------------------------------------------------------------------- + // arguments to control internal behavior of the debruijn assembler + // ----------------------------------------------------------------------------------------------- + + @Advanced + @Argument(fullName="useDebruijnAssembler", shortName="useDebruijnAssembler", doc="If specified, we will use the old DeBruijn assembler. Depreciated as of 2.6", required = false) + protected boolean useDebruijnAssembler = false; + + @Advanced + @Argument(fullName="minKmerForDebruijnAssembler", shortName="minKmerForDebruijnAssembler", doc="Minimum kmer length to use in the debruijn assembly graph", required = false) + protected int minKmerForDebruijnAssembler = 11; + + @Advanced + @Argument(fullName="onlyUseKmerSizeForDebruijnAssembler", shortName="onlyUseKmerSizeForDebruijnAssembler", doc="If specified, we will only build kmer graphs with this kmer size in the debruijn", required = false) + protected int onlyUseKmerSizeForDebruijnAssembler = -1; + + // ----------------------------------------------------------------------------------------------- + // arguments to control internal behavior of the read threading assembler + // ----------------------------------------------------------------------------------------------- + + @Advanced + @Argument(fullName="kmerSize", shortName="kmerSize", doc="Kmer size to use in the read threading assembler", required = false) + protected List kmerSizes = Arrays.asList(10, 25); + + /** + * Assembly graph can be quite complex, and could imply a very large number of possible haplotypes. Each haplotype + * considered requires N PairHMM evaluations if there are N reads across all samples. In order to control the + * run of the haplotype caller we only take maxPathsPerSample * nSample paths from the graph, in order of their + * weights, no matter how many paths are possible to generate from the graph. Putting this number too low + * will result in dropping true variation because paths that include the real variant are not even considered. + */ + @Advanced + @Argument(fullName="maxPathsPerSample", shortName="maxPathsPerSample", doc="Max number of paths to consider for the read threading assembler per sample.", required = false) + protected int maxPathsPerSample = 10; + + /** + * The minimum number of paths to advance forward for genotyping, regardless of the + * number of samples + */ + private final static int MIN_PATHS_PER_GRAPH = 128; + + @Hidden + @Argument(fullName="dontRecoverDanglingTails", shortName="dontRecoverDanglingTails", doc="Should we disable dangling tail recovery in the read threading assembler?", required = false) + protected boolean dontRecoverDanglingTails = false; + + // ----------------------------------------------------------------------------------------------- + // general advanced arguments to control haplotype caller behavior + // ----------------------------------------------------------------------------------------------- + + @Advanced + @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false) + protected int MIN_PRUNE_FACTOR = 2; + + @Advanced + @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false) + protected int gcpHMM = 10; + + /** + * If this flag is provided, the haplotype caller will include unmapped reads in the assembly and calling + * when these reads occur in the region being analyzed. Typically, for paired end analyses, one pair of the + * read can map, but if its pair is too divergent then it may be unmapped and placed next to its mate, taking + * the mates contig and alignment start. If this flag is provided the haplotype caller will see such reads, + * and may make use of them in assembly and calling, where possible. + */ + @Hidden + @Argument(fullName="includeUmappedReads", shortName="unmapped", doc="If provided, unmapped reads with chromosomal coordinates (i.e., those placed to their maps) will be included in the assembly and calling", required = false) + protected boolean includeUnmappedReads = false; + + @Advanced + @Argument(fullName="useAllelesTrigger", shortName="allelesTrigger", doc = "If specified, use additional trigger on variants found in an external alleles file", required=false) + protected boolean USE_ALLELES_TRIGGER = false; + + @Advanced + @Argument(fullName="useFilteredReadsForAnnotations", shortName="useFilteredReadsForAnnotations", doc = "If specified, use the contamination-filtered read maps for the purposes of annotating variants", required=false) + protected boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = false; + + /** + * The phredScaledGlobalReadMismappingRate reflects the average global mismapping rate of all reads, regardless of their + * mapping quality. This term effects the probability that a read originated from the reference haplotype, regardless of + * its edit distance from the reference, in that the read could have originated from the reference haplotype but + * from another location in the genome. Suppose a read has many mismatches from the reference, say like 5, but + * has a very high mapping quality of 60. Without this parameter, the read would contribute 5 * Q30 evidence + * in favor of its 5 mismatch haplotype compared to reference, potentially enough to make a call off that single + * read for all of these events. With this parameter set to Q30, though, the maximum evidence against the reference + * that this (and any) read could contribute against reference is Q30. + * + * Set this term to any negative number to turn off the global mapping rate + */ + @Advanced + @Argument(fullName="phredScaledGlobalReadMismappingRate", shortName="globalMAPQ", doc="The global assumed mismapping rate for reads", required = false) + protected int phredScaledGlobalReadMismappingRate = 60; + + @Advanced + @Argument(fullName="maxNumHaplotypesInPopulation", shortName="maxNumHaplotypesInPopulation", doc="Maximum number of haplotypes to consider for your population. This number will probably need to be increased when calling organisms with high heterozygosity.", required = false) + protected int maxNumHaplotypesInPopulation = 25; + + @Advanced + @Argument(fullName="mergeVariantsViaLD", shortName="mergeVariantsViaLD", doc="If specified, we will merge variants together into block substitutions that are in strong local LD", required = false) + protected boolean mergeVariantsViaLD = false; + + // ----------------------------------------------------------------------------------------------- + // arguments for debugging / developing the haplotype caller + // ----------------------------------------------------------------------------------------------- + /** + * The PairHMM implementation to use for genotype likelihood calculations. The various implementations balance a tradeoff of accuracy and runtime. + */ + @Hidden + @Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for genotype likelihood calculations", required = false) + public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING; + + @Hidden + @Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false) + protected String keepRG = null; + + @Hidden + @Argument(fullName="justDetermineActiveRegions", shortName="justDetermineActiveRegions", doc = "If specified, the HC won't actually do any assembly or calling, it'll just run the upfront active region determination code. Useful for benchmarking and scalability testing", required=false) + protected boolean justDetermineActiveRegions = false; + + @Hidden + @Argument(fullName="dontGenotype", shortName="dontGenotype", doc = "If specified, the HC will do any assembly but won't do calling. Useful for benchmarking and scalability testing", required=false) + protected boolean dontGenotype = false; + + @Hidden + @Argument(fullName="errorCorrectKmers", shortName="errorCorrectKmers", doc = "Use an exploratory algorithm to error correct the kmers used during assembly. May cause fundamental problems with the assembly graph itself", required=false) + protected boolean errorCorrectKmers = false; + @Advanced @Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information about each triggering active region", required = false) protected boolean DEBUG; - @Advanced + @Hidden @Argument(fullName="debugGraphTransformations", shortName="debugGraphTransformations", doc="If specified, we will write DOT formatted graph files out of the assembler for only this graph size", required = false) - protected int debugGraphTransformations = -1; + protected boolean debugGraphTransformations = false; @Hidden // TODO -- not currently useful @Argument(fullName="useLowQualityBasesForAssembly", shortName="useLowQualityBasesForAssembly", doc="If specified, we will include low quality bases when doing the assembly", required = false) @@ -311,10 +380,17 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Argument(fullName="dontTrimActiveRegions", shortName="dontTrimActiveRegions", doc="If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping", required = false) protected boolean dontTrimActiveRegions = false; + @Hidden + @Argument(fullName="dontUseSoftClippedBases", shortName="dontUseSoftClippedBases", doc="If specified, we will not analyze soft clipped bases in the reads", required = false) + protected boolean dontUseSoftClippedBases = false; + @Hidden @Argument(fullName="allowCyclesInKmerGraphToGeneratePaths", shortName="allowCyclesInKmerGraphToGeneratePaths", doc="If specified, we will allow cycles in the kmer graphs to generate paths with multiple copies of the path sequenece rather than just the shortest paths", required = false) protected boolean allowCyclesInKmerGraphToGeneratePaths = false; + // ----------------------------------------------------------------------------------------------- + // done with Haplotype caller parameters + // ----------------------------------------------------------------------------------------------- // the UG engines private UnifiedGenotyperEngine UG_engine = null; @@ -344,12 +420,17 @@ public class HaplotypeCaller extends ActiveRegionWalker implem // the maximum extent into the full active region extension that we're willing to go in genotyping our events private final static int MAX_GENOTYPING_ACTIVE_REGION_EXTENSION = 25; + private ActiveRegionTrimmer trimmer = null; + private final static int maxReadsInRegionPerSample = 1000; // TODO -- should be an argument private final static int minReadsPerAlignmentStart = 5; // TODO -- should be an argument // bases with quality less than or equal to this value are trimmed off the tails of the reads private static final byte MIN_TAIL_QUALITY = 20; + // the minimum length of a read we'd consider using for genotyping + private final static int MIN_READ_LENGTH = 10; + private List samplesList = new ArrayList(); private final static double LOG_ONE_HALF = -Math.log10(2.0); private final static double LOG_ONE_THIRD = -Math.log10(3.0); @@ -373,6 +454,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem // get all of the unique sample names Set samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); samplesList.addAll( samples ); + final int nSamples = samples.size(); // initialize the UnifiedGenotyper Engine which is used to call into the exact model final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY); @@ -428,14 +510,36 @@ public class HaplotypeCaller extends ActiveRegionWalker implem throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e); } - // setup the assembler - assemblyEngine = new DeBruijnAssembler(DEBUG, debugGraphTransformations, minKmer, allowCyclesInKmerGraphToGeneratePaths); + // create and setup the assembler + final int maxAllowedPathsForReadThreadingAssembler = Math.max(maxPathsPerSample * nSamples, MIN_PATHS_PER_GRAPH); + assemblyEngine = useDebruijnAssembler + ? new DeBruijnAssembler(minKmerForDebruijnAssembler, onlyUseKmerSizeForDebruijnAssembler) + : new ReadThreadingAssembler(maxAllowedPathsForReadThreadingAssembler, kmerSizes); + assemblyEngine.setErrorCorrectKmers(errorCorrectKmers); assemblyEngine.setPruneFactor(MIN_PRUNE_FACTOR); + assemblyEngine.setDebug(DEBUG); + assemblyEngine.setDebugGraphTransformations(debugGraphTransformations); + assemblyEngine.setAllowCyclesInKmerGraphToGeneratePaths(allowCyclesInKmerGraphToGeneratePaths); + assemblyEngine.setRecoverDanglingTails(!dontRecoverDanglingTails); + if ( graphWriter != null ) assemblyEngine.setGraphWriter(graphWriter); if ( useLowQualityBasesForAssembly ) assemblyEngine.setMinBaseQualityToUseInAssembly((byte)1); - likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM ); + // setup the likelihood calculation engine + if ( phredScaledGlobalReadMismappingRate < 0 ) phredScaledGlobalReadMismappingRate = -1; + + // configure the global mismapping rate + final double log10GlobalReadMismappingRate; + if ( phredScaledGlobalReadMismappingRate < 0 ) { + log10GlobalReadMismappingRate = - Double.MAX_VALUE; + } else { + log10GlobalReadMismappingRate = QualityUtils.qualToErrorProbLog10(phredScaledGlobalReadMismappingRate); + logger.info("Using global mismapping rate of " + phredScaledGlobalReadMismappingRate + " => " + log10GlobalReadMismappingRate + " in log10 likelihood units"); + } + + // create our likelihood calculation engine + likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM, log10GlobalReadMismappingRate ); final MergeVariantsAcrossHaplotypes variantMerger = mergeVariantsViaLD ? new LDMerger(DEBUG, 10, 1) : new MergeVariantsAcrossHaplotypes(); @@ -443,6 +547,9 @@ public class HaplotypeCaller extends ActiveRegionWalker implem if ( bamWriter != null ) haplotypeBAMWriter = HaplotypeBAMWriter.create(bamWriterType, bamWriter, getToolkit().getSAMFileHeader()); + + trimmer = new ActiveRegionTrimmer(DEBUG, PADDING_AROUND_SNPS_FOR_CALLING, PADDING_AROUND_OTHERS_FOR_CALLING, + MAX_GENOTYPING_ACTIVE_REGION_EXTENSION, getToolkit().getGenomeLocParser()); } //--------------------------------------------------------------------------------------------------------------- @@ -564,7 +671,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem final AssemblyResult assemblyResult = assembleReads(originalActiveRegion, activeAllelesToGenotype); // abort early if something is out of the acceptable range - if( assemblyResult.haplotypes.size() == 1 ) { return 1; } // only the reference haplotype remains so nothing else to do! + if( ! assemblyResult.isVariationPresent() ) { return 1; } // only the reference haplotype remains so nothing else to do! if (dontGenotype) return 1; // user requested we not proceed // filter out reads from genotyping which fail mapping quality based criteria @@ -613,12 +720,18 @@ public class HaplotypeCaller extends ActiveRegionWalker implem final ActiveRegion regionForGenotyping; final byte[] fullReferenceWithPadding; final GenomeLoc paddedReferenceLoc; + final boolean variationPresent; - private AssemblyResult(List haplotypes, ActiveRegion regionForGenotyping, byte[] fullReferenceWithPadding, GenomeLoc paddedReferenceLoc) { + private AssemblyResult(List haplotypes, ActiveRegion regionForGenotyping, byte[] fullReferenceWithPadding, GenomeLoc paddedReferenceLoc, boolean variationPresent) { this.haplotypes = haplotypes; this.regionForGenotyping = regionForGenotyping; this.fullReferenceWithPadding = fullReferenceWithPadding; this.paddedReferenceLoc = paddedReferenceLoc; + this.variationPresent = variationPresent; + } + + public boolean isVariationPresent() { + return variationPresent && haplotypes.size() > 1; } } @@ -644,63 +757,11 @@ public class HaplotypeCaller extends ActiveRegionWalker implem if ( ! dontTrimActiveRegions ) { return trimActiveRegion(activeRegion, haplotypes, fullReferenceWithPadding, paddedReferenceLoc); } else { - // we don't want to or cannot create a trimmed active region, so go ahead and use the old one - return new AssemblyResult(haplotypes, activeRegion, fullReferenceWithPadding, paddedReferenceLoc); + // we don't want to trim active regions, so go ahead and use the old one + return new AssemblyResult(haplotypes, activeRegion, fullReferenceWithPadding, paddedReferenceLoc, true); } } - /** - * Trim down the active region to just enough to properly genotype the events among the haplotypes - * - * This function merely creates the region, but it doesn't populate the reads back into the region - * - * @param region our full active region - * @param haplotypes the list of haplotypes we've created from assembly - * @param ref the reference bases over the full padded location - * @param refLoc the span of the reference bases - * @return a new ActiveRegion trimmed down to just what's needed for genotyping, or null if we couldn't do this successfully - */ - private ActiveRegion createTrimmedRegion(final ActiveRegion region, final List haplotypes, final byte[] ref, final GenomeLoc refLoc) { - EventMap.buildEventMapsForHaplotypes(haplotypes, ref, refLoc, DEBUG); - final TreeSet allContexts = EventMap.getAllVariantContexts(haplotypes); - final GenomeLocParser parser = getToolkit().getGenomeLocParser(); - - if ( allContexts.isEmpty() ) // no variants, so just return the current region - return null; - - final List withinActiveRegion = new LinkedList(); - int pad = PADDING_AROUND_SNPS_FOR_CALLING; - GenomeLoc trimLoc = null; - for ( final VariantContext vc : allContexts ) { - final GenomeLoc vcLoc = parser.createGenomeLoc(vc); - if ( region.getLocation().overlapsP(vcLoc) ) { - if ( ! vc.isSNP() ) // if anything isn't a SNP use the bigger padding - pad = PADDING_AROUND_OTHERS_FOR_CALLING; - trimLoc = trimLoc == null ? vcLoc : trimLoc.endpointSpan(vcLoc); - withinActiveRegion.add(vc); - } - } - - // we don't actually have anything in the region after removing variants that don't overlap the region's full location - if ( trimLoc == null ) return null; - - final GenomeLoc maxSpan = getToolkit().getGenomeLocParser().createPaddedGenomeLoc(region.getLocation(), MAX_GENOTYPING_ACTIVE_REGION_EXTENSION); - final GenomeLoc idealSpan = getToolkit().getGenomeLocParser().createPaddedGenomeLoc(trimLoc, pad); - final GenomeLoc finalSpan = maxSpan.intersect(idealSpan); - - final ActiveRegion trimmedRegion = region.trim(finalSpan); - if ( DEBUG ) { - logger.info("events : " + withinActiveRegion); - logger.info("trimLoc : " + trimLoc); - logger.info("pad : " + pad); - logger.info("idealSpan : " + idealSpan); - logger.info("maxSpan : " + maxSpan); - logger.info("finalSpan : " + finalSpan); - logger.info("regionSpan : " + trimmedRegion.getExtendedLoc() + " size is " + trimmedRegion.getExtendedLoc().size()); - } - return trimmedRegion; - } - /** * Trim down the active region to just enough to properly genotype the events among the haplotypes * @@ -709,17 +770,24 @@ public class HaplotypeCaller extends ActiveRegionWalker implem * @param fullReferenceWithPadding the reference bases over the full padded location * @param paddedReferenceLoc the span of the reference bases * @return an AssemblyResult containing the trimmed active region with all of the reads we should use - * trimmed down as well, and a revised set of haplotypes. If trimming failed this function - * may choose to use the originalActiveRegion without modification + * trimmed down as well, and a revised set of haplotypes. If trimming down the active region results + * in only the reference haplotype over the non-extended active region, returns null. */ private AssemblyResult trimActiveRegion(final ActiveRegion originalActiveRegion, final List haplotypes, final byte[] fullReferenceWithPadding, final GenomeLoc paddedReferenceLoc) { - final ActiveRegion trimmedActiveRegion = createTrimmedRegion(originalActiveRegion, haplotypes, fullReferenceWithPadding, paddedReferenceLoc); + if ( DEBUG ) logger.info("Trimming active region " + originalActiveRegion + " with " + haplotypes.size() + " haplotypes"); - if ( trimmedActiveRegion == null ) - return new AssemblyResult(haplotypes, originalActiveRegion, fullReferenceWithPadding, paddedReferenceLoc); + EventMap.buildEventMapsForHaplotypes(haplotypes, fullReferenceWithPadding, paddedReferenceLoc, DEBUG); + final TreeSet allVariantsWithinFullActiveRegion = EventMap.getAllVariantContexts(haplotypes); + final ActiveRegion trimmedActiveRegion = trimmer.trimRegion(originalActiveRegion, allVariantsWithinFullActiveRegion); + + if ( trimmedActiveRegion == null ) { + // there were no variants found within the active region itself, so just return null + if ( DEBUG ) logger.info("No variation found within the active region, skipping the region :-)"); + return new AssemblyResult(haplotypes, originalActiveRegion, fullReferenceWithPadding, paddedReferenceLoc, false); + } // trim down the haplotypes final Set haplotypeSet = new HashSet(haplotypes.size()); @@ -738,8 +806,8 @@ public class HaplotypeCaller extends ActiveRegionWalker implem // sort haplotypes to take full advantage of haplotype start offset optimizations in PairHMM Collections.sort( trimmedHaplotypes, new HaplotypeBaseComparator() ); + if ( DEBUG ) logger.info("Trimmed region to " + trimmedActiveRegion.getLocation() + " size " + trimmedActiveRegion.getLocation().size() + " reduced number of haplotypes from " + haplotypes.size() + " to only " + trimmedHaplotypes.size()); if ( DEBUG ) { - logger.info("Trimming haplotypes reduced number of haplotypes from " + haplotypes.size() + " to only " + trimmedHaplotypes.size()); for ( final Haplotype remaining: trimmedHaplotypes ) { logger.info(" Remains: " + remaining + " cigar " + remaining.getCigar()); } @@ -757,7 +825,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem trimmedActiveRegion.clearReads(); trimmedActiveRegion.addAll(ReadUtils.sortReadsByCoordinate(trimmedReads)); - return new AssemblyResult(trimmedHaplotypes, trimmedActiveRegion, fullReferenceWithPadding, paddedReferenceLoc); + return new AssemblyResult(trimmedHaplotypes, trimmedActiveRegion, fullReferenceWithPadding, paddedReferenceLoc, true); } /** @@ -821,15 +889,17 @@ public class HaplotypeCaller extends ActiveRegionWalker implem if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) { GATKSAMRecord clippedRead = useLowQualityBasesForAssembly ? postAdapterRead : ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY ); - // revert soft clips so that we see the alignment start and end assuming the soft clips are all matches - // TODO -- WARNING -- still possibility that unclipping the soft clips will introduce bases that aren't - // TODO -- truly in the extended region, as the unclipped bases might actually include a deletion - // TODO -- w.r.t. the reference. What really needs to happen is that kmers that occur before the - // TODO -- reference haplotype start must be removed - clippedRead = ReadClipper.revertSoftClippedBases(clippedRead); - - // uncomment to remove hard clips from consideration at all - //clippedRead = ReadClipper.hardClipSoftClippedBases(clippedRead); + if ( dontUseSoftClippedBases ) { + // uncomment to remove hard clips from consideration at all + clippedRead = ReadClipper.hardClipSoftClippedBases(clippedRead); + } else { + // revert soft clips so that we see the alignment start and end assuming the soft clips are all matches + // TODO -- WARNING -- still possibility that unclipping the soft clips will introduce bases that aren't + // TODO -- truly in the extended region, as the unclipped bases might actually include a deletion + // TODO -- w.r.t. the reference. What really needs to happen is that kmers that occur before the + // TODO -- reference haplotype start must be removed + clippedRead = ReadClipper.revertSoftClippedBases(clippedRead); + } clippedRead = ReadClipper.hardClipToRegion( clippedRead, activeRegion.getExtendedLoc().getStart(), activeRegion.getExtendedLoc().getStop() ); if( activeRegion.readOverlapsRegion(clippedRead) && clippedRead.getReadLength() > 0 ) { @@ -843,13 +913,16 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } private List filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { - final List readsToRemove = new ArrayList(); + final List readsToRemove = new ArrayList<>(); +// logger.info("Filtering non-passing regions: n incoming " + activeRegion.getReads().size()); for( final GATKSAMRecord rec : activeRegion.getReads() ) { - if( rec.getReadLength() < 10 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) { + if( rec.getReadLength() < MIN_READ_LENGTH || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) { readsToRemove.add(rec); +// logger.info("\tremoving read " + rec + " len " + rec.getReadLength()); } } activeRegion.removeAll( readsToRemove ); +// logger.info("Filtered non-passing regions: n remaining " + activeRegion.getReads().size()); return readsToRemove; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KMerCounter.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KMerCounter.java index a7194f85f..aad8407dd 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KMerCounter.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KMerCounter.java @@ -46,9 +46,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; +import java.util.*; /** * generic utility class that counts kmers @@ -97,6 +95,20 @@ public class KMerCounter { return countsByKMer.values(); } + /** + * Get kmers that have minCount or greater in this counter + * @param minCount only return kmers with count >= this value + * @return a non-null collection of kmers + */ + public Collection getKmersWithCountsAtLeast(final int minCount) { + final List result = new LinkedList(); + for ( final CountedKmer countedKmer : getCountedKmers() ) { + if ( countedKmer.count >= minCount ) + result.add(countedKmer.kmer); + } + return result; + } + /** * Remove all current counts, resetting the counter to an empty state */ diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/Kmer.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/Kmer.java index 9b0e1ac0a..745d4de06 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/Kmer.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/Kmer.java @@ -149,6 +149,14 @@ public class Kmer { return bases; } + /** + * Get a string representation of the bases of this kmer + * @return a non-null string + */ + public String baseString() { + return new String(bases()); + } + /** * The length of this kmer * @return an integer >= 0 diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 8697833a6..fbd9b29d5 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -69,19 +69,33 @@ public class LikelihoodCalculationEngine { private static final double LOG_ONE_HALF = -Math.log10(2.0); private final byte constantGCP; + private final double log10globalReadMismappingRate; private final boolean DEBUG; private final PairHMM pairHMM; - private final int minReadLength = 20; /** * The expected rate of random sequencing errors for a read originating from its true haplotype. * * For example, if this is 0.01, then we'd expect 1 error per 100 bp. */ - private final double EXPECTED_ERROR_RATE_PER_BASE = 0.02; - - public LikelihoodCalculationEngine( final byte constantGCP, final boolean debug, final PairHMM.HMM_IMPLEMENTATION hmmType ) { + private final static double EXPECTED_ERROR_RATE_PER_BASE = 0.02; + /** + * Create a new LikelihoodCalculationEngine using provided parameters and hmm to do its calculations + * + * @param constantGCP the gap continuation penalty to use with the PairHMM + * @param debug should we emit debugging information during the calculation? + * @param hmmType the type of the HMM to use + * @param log10globalReadMismappingRate the global mismapping probability, in log10(prob) units. A value of + * -3 means that the chance that a read doesn't actually belong at this + * location in the genome is 1 in 1000. The effect of this parameter is + * to cap the maximum likelihood difference between the reference haplotype + * and the best alternative haplotype by -3 log units. So if the best + * haplotype is at -10 and this parameter has a value of -3 then even if the + * reference haplotype gets a score of -100 from the pairhmm it will be + * assigned a likelihood of -13. + */ + public LikelihoodCalculationEngine( final byte constantGCP, final boolean debug, final PairHMM.HMM_IMPLEMENTATION hmmType, final double log10globalReadMismappingRate ) { switch (hmmType) { case EXACT: pairHMM = new Log10PairHMM(true); @@ -98,6 +112,11 @@ public class LikelihoodCalculationEngine { this.constantGCP = constantGCP; DEBUG = debug; + this.log10globalReadMismappingRate = log10globalReadMismappingRate; + } + + public LikelihoodCalculationEngine() { + this((byte)10, false, PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, -3); } /** @@ -134,7 +153,6 @@ public class LikelihoodCalculationEngine { // Add likelihoods for each sample's reads to our stratifiedReadMap final Map stratifiedReadMap = new HashMap(); for( final Map.Entry> sampleEntry : perSampleReadList.entrySet() ) { - //if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); } // evaluate the likelihood of the reads given those haplotypes final PerReadAlleleLikelihoodMap map = computeReadLikelihoods(haplotypes, sampleEntry.getValue()); @@ -152,17 +170,16 @@ public class LikelihoodCalculationEngine { private PerReadAlleleLikelihoodMap computeReadLikelihoods( final List haplotypes, final List reads) { // first, a little set up to get copies of the Haplotypes that are Alleles (more efficient than creating them each time) final int numHaplotypes = haplotypes.size(); - final Map alleleVersions = new HashMap(numHaplotypes); + final Map alleleVersions = new HashMap<>(numHaplotypes); + Allele refAllele = null; for ( final Haplotype haplotype : haplotypes ) { - alleleVersions.put(haplotype, Allele.create(haplotype, true)); + final Allele allele = Allele.create(haplotype, true); + alleleVersions.put(haplotype, allele); + if ( haplotype.isReference() ) refAllele = allele; } final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap(); for( final GATKSAMRecord read : reads ) { - if ( read.getReadLength() < minReadLength ) - // don't consider any reads that have a read length < the minimum - continue; - final byte[] overallGCP = new byte[read.getReadLength()]; Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data? // NOTE -- must clone anything that gets modified here so we don't screw up future uses of the read @@ -177,14 +194,34 @@ public class LikelihoodCalculationEngine { readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] ); } + // keep track of the reference likelihood and the best non-ref likelihood + double refLog10l = Double.NEGATIVE_INFINITY; + double bestNonReflog10L = Double.NEGATIVE_INFINITY; + + // iterate over all haplotypes, calculating the likelihood of the read for each haplotype for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { final Haplotype haplotype = haplotypes.get(jjj); final boolean isFirstHaplotype = jjj == 0; final double log10l = pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotype.getBases(), read.getReadBases(), readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype); + if ( haplotype.isNonReference() ) + bestNonReflog10L = Math.max(bestNonReflog10L, log10l); + else + refLog10l = log10l; + perReadAlleleLikelihoodMap.add(read, alleleVersions.get(haplotype), log10l); } + + // ensure that the reference haplotype is no worse than the best non-ref haplotype minus the global + // mismapping rate. This protects us from the case where the assembly has produced haplotypes + // that are very divergent from reference, but are supported by only one read. In effect + // we capping how badly scoring the reference can be for any read by the chance that the read + // itself just doesn't belong here + final double worstRefLog10Allowed = bestNonReflog10L + log10globalReadMismappingRate; + if ( refLog10l < (worstRefLog10Allowed) ) { + perReadAlleleLikelihoodMap.add(read, refAllele, worstRefLog10Allowed); + } } return perReadAlleleLikelihoodMap; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java index 4c0483ad6..20b005b40 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java @@ -46,28 +46,388 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import org.apache.commons.lang.ArrayUtils; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.haplotype.Haplotype; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.utils.smithwaterman.SWPairwiseAlignment; +import org.broadinstitute.sting.utils.smithwaterman.SWParameterSet; +import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.VariantContext; +import java.io.File; import java.io.PrintStream; -import java.util.List; +import java.util.*; /** - * Created by IntelliJ IDEA. + * Abstract base class for all HaplotypeCaller assemblers + * * User: ebanks * Date: Mar 14, 2011 */ public abstract class LocalAssemblyEngine { - public static final byte DEFAULT_MIN_BASE_QUALITY_TO_USE = (byte) 8; + private final static Logger logger = Logger.getLogger(LocalAssemblyEngine.class); + + public static final byte DEFAULT_MIN_BASE_QUALITY_TO_USE = (byte) 8; + private static final int MIN_HAPLOTYPE_REFERENCE_LENGTH = 30; + + protected final int numBestHaplotypesPerGraph; + + protected boolean debug = false; + protected boolean allowCyclesInKmerGraphToGeneratePaths = false; + protected boolean debugGraphTransformations = false; + protected boolean recoverDanglingTails = true; - protected PrintStream graphWriter = null; protected byte minBaseQualityToUseInAssembly = DEFAULT_MIN_BASE_QUALITY_TO_USE; protected int pruneFactor = 2; protected boolean errorCorrectKmers = false; - protected LocalAssemblyEngine() { } + private PrintStream graphWriter = null; + + /** + * Create a new LocalAssemblyEngine with all default parameters, ready for use + * @param numBestHaplotypesPerGraph the number of haplotypes to generate for each assembled graph + */ + protected LocalAssemblyEngine(final int numBestHaplotypesPerGraph) { + if ( numBestHaplotypesPerGraph < 1 ) throw new IllegalArgumentException("numBestHaplotypesPerGraph should be >= 1 but got " + numBestHaplotypesPerGraph); + this.numBestHaplotypesPerGraph = numBestHaplotypesPerGraph; + } + + /** + * Main subclass function: given reads and a reference haplotype give us graphs to use for constructing + * non-reference haplotypes. + * + * @param reads the reads we're going to assemble + * @param refHaplotype the reference haplotype + * @return a non-null list of reads + */ + protected abstract List assemble(List reads, Haplotype refHaplotype); + + /** + * Main entry point into the assembly engine. Build a set of deBruijn graphs out of the provided reference sequence and list of reads + * @param activeRegion ActiveRegion object holding the reads which are to be used during assembly + * @param refHaplotype reference haplotype object + * @param fullReferenceWithPadding byte array holding the reference sequence with padding + * @param refLoc GenomeLoc object corresponding to the reference sequence with padding + * @param activeAllelesToGenotype the alleles to inject into the haplotypes during GGA mode + * @return a non-empty list of all the haplotypes that are produced during assembly + */ + public List runLocalAssembly(ActiveRegion activeRegion, Haplotype refHaplotype, byte[] fullReferenceWithPadding, GenomeLoc refLoc, List activeAllelesToGenotype) { + if( activeRegion == null ) { throw new IllegalArgumentException("Assembly engine cannot be used with a null ActiveRegion."); } + if( refHaplotype == null ) { throw new IllegalArgumentException("Reference haplotype cannot be null."); } + if( fullReferenceWithPadding.length != refLoc.size() ) { throw new IllegalArgumentException("Reference bases and reference loc must be the same size."); } + if( pruneFactor < 0 ) { throw new IllegalArgumentException("Pruning factor cannot be negative"); } + + // create the graphs by calling our subclass assemble method + final List graphs = assemble(activeRegion.getReads(), refHaplotype); + + // do some QC on the graphs + for ( final SeqGraph graph : graphs ) { sanityCheckGraph(graph, refHaplotype); } + + // print the graphs if the appropriate debug option has been turned on + if ( graphWriter != null ) { printGraphs(graphs); } + + // find the best paths in the graphs and return them as haplotypes + return findBestPaths( graphs, refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() ); + } + + @Requires({"refWithPadding.length > refHaplotype.getBases().length", "refLoc.containsP(activeRegionWindow)"}) + @Ensures({"result.contains(refHaplotype)"}) + protected List findBestPaths(final List graphs, final Haplotype refHaplotype, final byte[] refWithPadding, final GenomeLoc refLoc, final List activeAllelesToGenotype, final GenomeLoc activeRegionWindow) { + // add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes + final Set returnHaplotypes = new LinkedHashSet(); + refHaplotype.setAlignmentStartHapwrtRef(activeRegionWindow.getStart() - refLoc.getStart()); + final Cigar c = new Cigar(); + c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M)); + refHaplotype.setCigar(c); + returnHaplotypes.add( refHaplotype ); + + final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef(); + final int activeRegionStop = refHaplotype.getAlignmentStartHapwrtRef() + refHaplotype.getCigar().getReferenceLength(); + + // for GGA mode, add the desired allele into the haplotype + for( final VariantContext compVC : activeAllelesToGenotype ) { + for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { + final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()); + addHaplotypeForGGA( insertedRefHaplotype, refWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, true ); + } + } + + for( final SeqGraph graph : graphs ) { + final SeqVertex source = graph.getReferenceSourceVertex(); + final SeqVertex sink = graph.getReferenceSinkVertex(); + if ( source == null || sink == null ) throw new IllegalArgumentException("Both source and sink cannot be null but got " + source + " and sink " + sink + " for graph "+ graph); + + final KBestPaths pathFinder = new KBestPaths(allowCyclesInKmerGraphToGeneratePaths); + for ( final Path path : pathFinder.getKBestPaths(graph, numBestHaplotypesPerGraph, source, sink) ) { +// logger.info("Found path " + path); + Haplotype h = new Haplotype( path.getBases() ); + if( !returnHaplotypes.contains(h) ) { + final Cigar cigar = path.calculateCigar(refHaplotype.getBases()); + + if ( cigar == null ) { + // couldn't produce a meaningful alignment of haplotype to reference, fail quitely + continue; + } else if( cigar.isEmpty() ) { + throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength()); + } else if ( pathIsTooDivergentFromReference(cigar) || cigar.getReferenceLength() < MIN_HAPLOTYPE_REFERENCE_LENGTH ) { + // N cigar elements means that a bubble was too divergent from the reference so skip over this path + continue; + } else if( cigar.getReferenceLength() != refHaplotype.getCigar().getReferenceLength() ) { // SW failure + throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + + cigar.getReferenceLength() + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength() + + " ref = " + refHaplotype + " path " + new String(path.getBases())); + } + + h.setCigar(cigar); + h.setAlignmentStartHapwrtRef(activeRegionStart); + h.setScore(path.getScore()); + returnHaplotypes.add(h); + + if ( debug ) + logger.info("Adding haplotype " + h.getCigar() + " from debruijn graph with kmer " + graph.getKmerSize()); + + // for GGA mode, add the desired allele into the haplotype if it isn't already present + if( !activeAllelesToGenotype.isEmpty() ) { + final Map eventMap = GenotypingEngine.generateVCsFromAlignment( h, refWithPadding, refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place + for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present + final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart()); + + // This if statement used to additionally have: + // "|| !vcOnHaplotype.hasSameAllelesAs(compVC)" + // but that can lead to problems downstream when e.g. you are injecting a 1bp deletion onto + // a haplotype that already contains a 1bp insertion (so practically it is reference but + // falls into the bin for the 1bp deletion because we keep track of the artificial alleles). + if( vcOnHaplotype == null ) { + for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { + addHaplotypeForGGA( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()), refWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false ); + } + } + } + } + } + } + } + + // add genome locs to the haplotypes + for ( final Haplotype h : returnHaplotypes ) h.setGenomeLocation(activeRegionWindow); + + if ( returnHaplotypes.size() < returnHaplotypes.size() ) + logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against at " + refLoc); + + if( debug ) { + if( returnHaplotypes.size() > 1 ) { + logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against."); + } else { + logger.info("Found only the reference haplotype in the assembly graph."); + } + for( final Haplotype h : returnHaplotypes ) { + logger.info( h.toString() ); + logger.info( "> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() + " score " + h.getScore() + " ref " + h.isReference()); + } + } + + return new ArrayList(returnHaplotypes); + } + + /** + * We use CigarOperator.N as the signal that an incomplete or too divergent bubble was found during bubble traversal + * @param c the cigar to test + * @return true if we should skip over this path + */ + @Requires("c != null") + private boolean pathIsTooDivergentFromReference( final Cigar c ) { + for( final CigarElement ce : c.getCigarElements() ) { + if( ce.getOperator().equals(CigarOperator.N) ) { + return true; + } + } + return false; + } + + /** + * Take a haplotype which was generated by injecting an allele into a string of bases and run SW against the reference to determine the variants on the haplotype. + * Unfortunately since this haplotype didn't come from the assembly graph you can't straightforwardly use the bubble traversal algorithm to get this information. + * This is a target for future work as we rewrite the HaplotypeCaller to be more bubble-caller based. + * @param haplotype the candidate haplotype + * @param ref the reference bases to align against + * @param haplotypeList the current list of haplotypes + * @param activeRegionStart the start of the active region in the reference byte array + * @param activeRegionStop the stop of the active region in the reference byte array + * @param FORCE_INCLUSION_FOR_GGA_MODE if true will include in the list even if it already exists + * @return true if the candidate haplotype was successfully incorporated into the haplotype list + */ + @Requires({"ref != null", "ref.length >= activeRegionStop - activeRegionStart"}) + private boolean addHaplotypeForGGA( final Haplotype haplotype, final byte[] ref, final Set haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) { + if( haplotype == null ) { return false; } + + final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SWParameterSet.STANDARD_NGS ); + haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() ); + + if( swConsensus.getCigar().toString().contains("S") || swConsensus.getCigar().getReferenceLength() < 60 || swConsensus.getAlignmentStart2wrt1() < 0 ) { // protect against unhelpful haplotype alignments + return false; + } + + haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0, true) ); + + final int hapStart = ReadUtils.getReadCoordinateForReferenceCoordinate(haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStart, ReadUtils.ClippingTail.LEFT_TAIL, true); + int hapStop = ReadUtils.getReadCoordinateForReferenceCoordinate( haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStop, ReadUtils.ClippingTail.RIGHT_TAIL, true ); + if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED && activeRegionStop == haplotype.getAlignmentStartHapwrtRef() + haplotype.getCigar().getReferenceLength() ) { + hapStop = activeRegionStop; // contract for getReadCoordinateForReferenceCoordinate function says that if read ends at boundary then it is outside of the clipping goal + } + byte[] newHaplotypeBases; + // extend partial haplotypes to contain the full active region sequence + if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED && hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { + newHaplotypeBases = ArrayUtils.addAll(ArrayUtils.addAll(ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()), + haplotype.getBases()), + ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop)); + } else if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { + newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()), ArrayUtils.subarray(haplotype.getBases(), 0, hapStop) ); + } else if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { + newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(haplotype.getBases(), hapStart, haplotype.getBases().length), ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop) ); + } else { + newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), hapStart, hapStop); + } + + final Haplotype h = new Haplotype( newHaplotypeBases ); + final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( ref, h.getBases(), SWParameterSet.STANDARD_NGS ); + + h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() ); + if ( haplotype.isArtificialHaplotype() ) { + h.setArtificialEvent(haplotype.getArtificialEvent()); + } + if( swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() != activeRegionStop - activeRegionStart || swConsensus2.getAlignmentStart2wrt1() < 0 ) { // protect against unhelpful haplotype alignments + return false; + } + + h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0, true) ); + + if( FORCE_INCLUSION_FOR_GGA_MODE || !haplotypeList.contains(h) ) { + haplotypeList.add(h); + return true; + } else { + return false; + } + } + + protected SeqGraph cleanupSeqGraph(final SeqGraph seqGraph) { + if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.1.dot"), pruneFactor); + + // TODO -- we need to come up with a consistent pruning algorithm. The current pruning algorithm + // TODO -- works well but it doesn't differentiate between an isolated chain that doesn't connect + // TODO -- to anything from one that's actually has good support along the chain but just happens + // TODO -- to have a connection in the middle that has weight of < pruneFactor. Ultimately + // TODO -- the pruning algorithm really should be an error correction algorithm that knows more + // TODO -- about the structure of the data and can differentiate between an infrequent path but + // TODO -- without evidence against it (such as occurs when a region is hard to get any reads through) + // TODO -- from a error with lots of weight going along another similar path + // the very first thing we need to do is zip up the graph, or pruneGraph will be too aggressive + seqGraph.zipLinearChains(); + if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.2.zipped.dot"), pruneFactor); + + // now go through and prune the graph, removing vertices no longer connected to the reference chain + // IMPORTANT: pruning must occur before we call simplifyGraph, as simplifyGraph adds 0 weight + // edges to maintain graph connectivity. + seqGraph.pruneGraph(pruneFactor); + seqGraph.removeVerticesNotConnectedToRefRegardlessOfEdgeDirection(); + + if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.3.pruned.dot"), pruneFactor); + seqGraph.simplifyGraph(); + if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.4.merged.dot"), pruneFactor); + + // The graph has degenerated in some way, so the reference source and/or sink cannot be id'd. Can + // happen in cases where for example the reference somehow manages to acquire a cycle, or + // where the entire assembly collapses back into the reference sequence. + if ( seqGraph.getReferenceSourceVertex() == null || seqGraph.getReferenceSinkVertex() == null ) + return null; + + seqGraph.removePathsNotConnectedToRef(); + seqGraph.simplifyGraph(); + if ( seqGraph.vertexSet().size() == 1 ) { + // we've perfectly assembled into a single reference haplotype, add a empty seq vertex to stop + // the code from blowing up. + // TODO -- ref properties should really be on the vertices, not the graph itself + final SeqVertex complete = seqGraph.vertexSet().iterator().next(); + final SeqVertex dummy = new SeqVertex(""); + seqGraph.addVertex(dummy); + seqGraph.addEdge(complete, dummy, new BaseEdge(true, 0)); + } + if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.5.final.dot"), pruneFactor); + + return seqGraph; + } + + /** + * Perform general QC on the graph to make sure something hasn't gone wrong during assembly + * @param graph the graph to check + * @param refHaplotype the reference haplotype + * @param + */ + private void sanityCheckGraph(final BaseGraph graph, final Haplotype refHaplotype) { + sanityCheckReferenceGraph(graph, refHaplotype); + } + + /** + * Make sure the reference sequence is properly represented in the provided graph + * + * @param graph the graph to check + * @param refHaplotype the reference haplotype + * @param + */ + private void sanityCheckReferenceGraph(final BaseGraph graph, final Haplotype refHaplotype) { + if( graph.getReferenceSourceVertex() == null ) { + throw new IllegalStateException("All reference graphs must have a reference source vertex."); + } + if( graph.getReferenceSinkVertex() == null ) { + throw new IllegalStateException("All reference graphs must have a reference sink vertex."); + } + if( !Arrays.equals(graph.getReferenceBytes(graph.getReferenceSourceVertex(), graph.getReferenceSinkVertex(), true, true), refHaplotype.getBases()) ) { + throw new IllegalStateException("Mismatch between the reference haplotype and the reference assembly graph path. for graph " + graph + + " graph = " + new String(graph.getReferenceBytes(graph.getReferenceSourceVertex(), graph.getReferenceSinkVertex(), true, true)) + + " haplotype = " + new String(refHaplotype.getBases()) + ); + } + } + + /** + * Print the generated graphs to the graphWriter + * @param graphs a non-null list of graphs to print out + */ + private void printGraphs(final List graphs) { + final int writeFirstGraphWithSizeSmallerThan = 50; + + graphWriter.println("digraph assemblyGraphs {"); + for( final SeqGraph graph : graphs ) { + if ( debugGraphTransformations && graph.getKmerSize() >= writeFirstGraphWithSizeSmallerThan ) { + logger.info("Skipping writing of graph with kmersize " + graph.getKmerSize()); + continue; + } + + graph.printGraph(graphWriter, false, pruneFactor); + + if ( debugGraphTransformations ) + break; + } + + graphWriter.println("}"); + } + + // ----------------------------------------------------------------------------------------------- + // + // getter / setter routines for generic assembler properties + // + // ----------------------------------------------------------------------------------------------- public int getPruneFactor() { return pruneFactor; @@ -85,10 +445,6 @@ public abstract class LocalAssemblyEngine { this.errorCorrectKmers = errorCorrectKmers; } - public PrintStream getGraphWriter() { - return graphWriter; - } - public void setGraphWriter(PrintStream graphWriter) { this.graphWriter = graphWriter; } @@ -101,5 +457,35 @@ public abstract class LocalAssemblyEngine { this.minBaseQualityToUseInAssembly = minBaseQualityToUseInAssembly; } - public abstract List runLocalAssembly(ActiveRegion activeRegion, Haplotype refHaplotype, byte[] fullReferenceWithPadding, GenomeLoc refLoc, List activeAllelesToGenotype); + public boolean isDebug() { + return debug; + } + + public void setDebug(boolean debug) { + this.debug = debug; + } + + public boolean isAllowCyclesInKmerGraphToGeneratePaths() { + return allowCyclesInKmerGraphToGeneratePaths; + } + + public void setAllowCyclesInKmerGraphToGeneratePaths(boolean allowCyclesInKmerGraphToGeneratePaths) { + this.allowCyclesInKmerGraphToGeneratePaths = allowCyclesInKmerGraphToGeneratePaths; + } + + public boolean isDebugGraphTransformations() { + return debugGraphTransformations; + } + + public void setDebugGraphTransformations(boolean debugGraphTransformations) { + this.debugGraphTransformations = debugGraphTransformations; + } + + public boolean isRecoverDanglingTails() { + return recoverDanglingTails; + } + + public void setRecoverDanglingTails(boolean recoverDanglingTails) { + this.recoverDanglingTails = recoverDanglingTails; + } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseEdge.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseEdge.java index be5a431c4..a6ef0d1c2 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseEdge.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseEdge.java @@ -76,12 +76,10 @@ public class BaseEdge { } /** - * Copy constructor - * - * @param toCopy + * Create a new copy of this BaseEdge */ - public BaseEdge(final BaseEdge toCopy) { - this(toCopy.isRef(), toCopy.getMultiplicity()); + public BaseEdge copy() { + return new BaseEdge(isRef(), getMultiplicity()); } /** @@ -92,6 +90,34 @@ public class BaseEdge { return multiplicity; } + /** + * Get the DOT format label for this edge, to be displayed when printing this edge to a DOT file + * @return a non-null string + */ + public String getDotLabel() { + return Integer.toString(getMultiplicity()); + } + + /** + * Increase the multiplicity of this edge by incr + * @param incr the change in this multiplicity, must be >= 0 + */ + public void incMultiplicity(final int incr) { + if ( incr < 0 ) throw new IllegalArgumentException("incr must be >= 0 but got " + incr); + multiplicity += incr; + } + + /** + * A special assessor that returns the multiplicity that should be used by pruning algorithm + * + * Can be overloaded by subclasses + * + * @return the multiplicity value that should be used for pruning + */ + public int getPruningMultiplicity() { + return getMultiplicity(); + } + /** * Set the multiplicity of this edge to value * @param value an integer >= 0 @@ -117,23 +143,6 @@ public class BaseEdge { this.isRef = isRef; } - /** - * Does this and edge have the same source and target vertices in graph? - * - * @param graph the graph containing both this and edge - * @param edge our comparator edge - * @param - * @return true if we have the same source and target vertices - */ - public boolean hasSameSourceAndTarget(final BaseGraph graph, final BaseEdge edge) { - return (graph.getEdgeSource(this).equals(graph.getEdgeSource(edge))) && (graph.getEdgeTarget(this).equals(graph.getEdgeTarget(edge))); - } - - // For use when comparing edges across graphs! - public boolean seqEquals( final BaseGraph graph, final BaseEdge edge, final BaseGraph graph2 ) { - return (graph.getEdgeSource(this).seqEquals(graph2.getEdgeSource(edge))) && (graph.getEdgeTarget(this).seqEquals(graph2.getEdgeTarget(edge))); - } - /** * Sorts a collection of BaseEdges in decreasing order of weight, so that the most * heavily weighted is at the start of the list @@ -187,4 +196,12 @@ public class BaseEdge { if ( edge == null ) throw new IllegalArgumentException("edge cannot be null"); return new BaseEdge(isRef() || edge.isRef(), Math.max(getMultiplicity(), edge.getMultiplicity())); } + + @Override + public String toString() { + return "BaseEdge{" + + "multiplicity=" + multiplicity + + ", isRef=" + isRef + + '}'; + } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java index 7ce57e2e7..8938af7c2 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java @@ -66,34 +66,16 @@ import java.util.*; * Date: 2/6/13 */ @Invariant("!this.isAllowingMultipleEdges()") -public class BaseGraph extends DefaultDirectedGraph { +public class BaseGraph extends DefaultDirectedGraph { protected final static Logger logger = Logger.getLogger(BaseGraph.class); private final int kmerSize; - /** - * Construct an empty BaseGraph - */ - public BaseGraph() { - this(11); - } - - /** - * Edge factory that creates non-reference multiplicity 1 edges - * @param the new of our vertices - */ - private static class MyEdgeFactory implements EdgeFactory { - @Override - public BaseEdge createEdge(T sourceVertex, T targetVertex) { - return new BaseEdge(false, 1); - } - } - /** * Construct a DeBruijnGraph with kmerSize * @param kmerSize */ - public BaseGraph(final int kmerSize) { - super(new MyEdgeFactory()); + public BaseGraph(final int kmerSize, final EdgeFactory edgeFactory) { + super(edgeFactory); if ( kmerSize < 1 ) throw new IllegalArgumentException("kmerSize must be >= 1 but got " + kmerSize); this.kmerSize = kmerSize; @@ -111,7 +93,7 @@ public class BaseGraph extends DefaultDirectedGraph extends DefaultDirectedGraph extends DefaultDirectedGraph extends DefaultDirectedGraph getSources() { - final Set set = new LinkedHashSet(); - for ( final T v : vertexSet() ) + public Set getSources() { + final Set set = new LinkedHashSet(); + for ( final V v : vertexSet() ) if ( isSource(v) ) set.add(v); return set; @@ -153,9 +135,9 @@ public class BaseGraph extends DefaultDirectedGraph getSinks() { - final Set set = new LinkedHashSet(); - for ( final T v : vertexSet() ) + public Set getSinks() { + final Set set = new LinkedHashSet(); + for ( final V v : vertexSet() ) if ( isSink(v) ) set.add(v); return set; @@ -167,7 +149,7 @@ public class BaseGraph extends DefaultDirectedGraph extends DefaultDirectedGraph extends DefaultDirectedGraph extends DefaultDirectedGraph extends DefaultDirectedGraph extends DefaultDirectedGraph extends DefaultDirectedGraph extends DefaultDirectedGraph extends DefaultDirectedGraph extends DefaultDirectedGraph extends DefaultDirectedGraph vertices) { - for ( final T v : vertices ) + public void addVertices(final Collection vertices) { + for ( final V v : vertices ) addVertex(v); } @@ -349,8 +341,12 @@ public class BaseGraph extends DefaultDirectedGraph extends DefaultDirectedGraph extends DefaultDirectedGraph outgoingVerticesOf(final T v) { - final Set s = new LinkedHashSet(); - for ( final BaseEdge e : outgoingEdgesOf(v) ) { + public Set outgoingVerticesOf(final V v) { + final Set s = new LinkedHashSet(); + for ( final E e : outgoingEdgesOf(v) ) { s.add(getEdgeTarget(e)); } return s; @@ -384,9 +380,9 @@ public class BaseGraph extends DefaultDirectedGraph v */ - public Set incomingVerticesOf(final T v) { - final Set s = new LinkedHashSet(); - for ( final BaseEdge e : incomingEdgesOf(v) ) { + public Set incomingVerticesOf(final V v) { + final Set s = new LinkedHashSet(); + for ( final E e : incomingEdgesOf(v) ) { s.add(getEdgeSource(e)); } return s; @@ -413,15 +409,16 @@ public class BaseGraph extends DefaultDirectedGraph " + getEdgeTarget(edge).toString() + " [" + (edge.getMultiplicity() > 0 && edge.getMultiplicity() <= pruneFactor ? "style=dotted,color=grey," : "") + "label=\"" + edge.getMultiplicity() + "\"];"); + for( final E edge : edgeSet() ) { + graphWriter.println("\t" + getEdgeSource(edge).toString() + " -> " + getEdgeTarget(edge).toString() + " [" + (edge.getMultiplicity() > 0 && edge.getMultiplicity() <= pruneFactor ? "style=dotted,color=grey," : "") + "label=\"" + edge.getDotLabel() + "\"];"); if( edge.isRef() ) { graphWriter.println("\t" + getEdgeSource(edge).toString() + " -> " + getEdgeTarget(edge).toString() + " [color=red];"); } } - for( final T v : vertexSet() ) { - graphWriter.println("\t" + v.toString() + " [label=\"" + new String(getAdditionalSequence(v)) + "\",shape=box]"); + for( final V v : vertexSet() ) { +// graphWriter.println("\t" + v.toString() + " [label=\"" + v + "\",shape=box]"); + graphWriter.println("\t" + v.toString() + " [label=\"" + new String(getAdditionalSequence(v)) + v.additionalInfo() + "\",shape=box]"); } if ( writeHeader ) @@ -439,10 +436,10 @@ public class BaseGraph extends DefaultDirectedGraph edgesToCheck = new HashSet(); + final Set edgesToCheck = new HashSet(); edgesToCheck.addAll(incomingEdgesOf(getReferenceSourceVertex())); while( !edgesToCheck.isEmpty() ) { - final BaseEdge e = edgesToCheck.iterator().next(); + final E e = edgesToCheck.iterator().next(); if( !e.isRef() ) { edgesToCheck.addAll( incomingEdgesOf(getEdgeSource(e)) ); removeEdge(e); @@ -452,7 +449,7 @@ public class BaseGraph extends DefaultDirectedGraph extends DefaultDirectedGraph edgesToRemove = new ArrayList(); - for( final BaseEdge e : edgeSet() ) { - if( e.getMultiplicity() <= pruneFactor && !e.isRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor + final List edgesToRemove = new ArrayList<>(); + for( final E e : edgeSet() ) { + if( e.getPruningMultiplicity() <= pruneFactor && !e.isRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor edgesToRemove.add(e); } } @@ -480,13 +477,25 @@ public class BaseGraph extends DefaultDirectedGraph pruner = new LowWeightChainPruner<>(pruneFactor); + pruner.pruneLowWeightChains(this); + } + /** * Remove all vertices in the graph that have in and out degree of 0 */ protected void removeSingletonOrphanVertices() { // Run through the graph and clean up singular orphaned nodes - final List verticesToRemove = new LinkedList(); - for( final T v : vertexSet() ) { + final List verticesToRemove = new LinkedList<>(); + for( final V v : vertexSet() ) { if( inDegreeOf(v) == 0 && outDegreeOf(v) == 0 ) { verticesToRemove.add(v); } @@ -499,11 +508,11 @@ public class BaseGraph extends DefaultDirectedGraph toRemove = new HashSet(vertexSet()); + final HashSet toRemove = new HashSet<>(vertexSet()); - final T refV = getReferenceSourceVertex(); + final V refV = getReferenceSourceVertex(); if ( refV != null ) { - for ( final T v : new BaseGraphIterator(this, refV, true, true) ) { + for ( final V v : new BaseGraphIterator<>(this, refV, true, true) ) { toRemove.remove(v); } } @@ -524,22 +533,31 @@ public class BaseGraph extends DefaultDirectedGraph onPathFromRefSource = new HashSet(vertexSet().size()); - for ( final T v : new BaseGraphIterator(this, getReferenceSourceVertex(), false, true) ) { + final Set onPathFromRefSource = new HashSet<>(vertexSet().size()); + for ( final V v : new BaseGraphIterator<>(this, getReferenceSourceVertex(), false, true) ) { onPathFromRefSource.add(v); } // get the set of vertices we can reach by going backward from the ref sink - final Set onPathFromRefSink = new HashSet(vertexSet().size()); - for ( final T v : new BaseGraphIterator(this, getReferenceSinkVertex(), true, false) ) { + final Set onPathFromRefSink = new HashSet<>(vertexSet().size()); + for ( final V v : new BaseGraphIterator<>(this, getReferenceSinkVertex(), true, false) ) { onPathFromRefSink.add(v); } // we want to remove anything that's not in both the sink and source sets - final Set verticesToRemove = new HashSet(vertexSet()); + final Set verticesToRemove = new HashSet<>(vertexSet()); onPathFromRefSource.retainAll(onPathFromRefSink); verticesToRemove.removeAll(onPathFromRefSource); removeAllVertices(verticesToRemove); + + // simple santity checks that this algorithm is working. + if ( getSinks().size() > 1 ) { + throw new IllegalStateException("Should have eliminated all but the reference sink, but found " + getSinks()); + } + + if ( getSources().size() > 1 ) { + throw new IllegalStateException("Should have eliminated all but the reference source, but found " + getSources()); + } } /** @@ -555,11 +573,11 @@ public class BaseGraph extends DefaultDirectedGraph the type of the nodes in those graphs * @return true if g1 and g2 are equals */ - public static boolean graphEquals(final BaseGraph g1, BaseGraph g2) { + public static boolean graphEquals(final BaseGraph g1, BaseGraph g2) { final Set vertices1 = g1.vertexSet(); final Set vertices2 = g2.vertexSet(); - final Set edges1 = g1.edgeSet(); - final Set edges2 = g2.edgeSet(); + final Set edges1 = g1.edgeSet(); + final Set edges2 = g2.edgeSet(); if ( vertices1.size() != vertices2.size() || edges1.size() != edges2.size() ) return false; @@ -571,29 +589,35 @@ public class BaseGraph extends DefaultDirectedGraph graph2 ) { + return (this.getEdgeSource(edge1).seqEquals(graph2.getEdgeSource(edge2))) && (this.getEdgeTarget(edge1).seqEquals(graph2.getEdgeTarget(edge2))); + } + + /** * Get the incoming edge of v. Requires that there be only one such edge or throws an error * @param v our vertex * @return the single incoming edge to v, or null if none exists */ - public BaseEdge incomingEdgeOf(final T v) { + public E incomingEdgeOf(final V v) { return getSingletonEdge(incomingEdgesOf(v)); } @@ -602,7 +626,7 @@ public class BaseGraph extends DefaultDirectedGraph extends DefaultDirectedGraph edges) { + private E getSingletonEdge(final Collection edges) { if ( edges.size() > 1 ) throw new IllegalArgumentException("Cannot get a single incoming edge for a vertex with multiple incoming edges " + edges); return edges.isEmpty() ? null : edges.iterator().next(); } @@ -625,12 +649,19 @@ public class BaseGraph extends DefaultDirectedGraph { +public final class DeBruijnGraph extends BaseGraph { + /** + * Edge factory that creates non-reference multiplicity 1 edges + */ + private static class MyEdgeFactory implements EdgeFactory { + @Override + public BaseEdge createEdge(DeBruijnVertex sourceVertex, DeBruijnVertex targetVertex) { + return new BaseEdge(false, 1); + } + } + /** * Create an empty DeBruijnGraph with default kmer size */ public DeBruijnGraph() { - super(); + this(11); } /** @@ -71,7 +82,7 @@ public final class DeBruijnGraph extends BaseGraph { * @param kmerSize kmer size, must be >= 1 */ public DeBruijnGraph(int kmerSize) { - super(kmerSize); + super(kmerSize, new MyEdgeFactory()); } /** diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeBruijnVertex.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeBruijnVertex.java index c240949d9..4d9441efe 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeBruijnVertex.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeBruijnVertex.java @@ -54,7 +54,7 @@ import com.google.java.contract.Ensures; * User: ebanks, mdepristo * Date: Mar 23, 2011 */ -public final class DeBruijnVertex extends BaseVertex { +public class DeBruijnVertex extends BaseVertex { private final static byte[][] sufficesAsByteArray = new byte[256][]; static { for ( int i = 0; i < sufficesAsByteArray.length; i++ ) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/GraphUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/GraphUtils.java index 30c5be190..4aa6047a9 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/GraphUtils.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/GraphUtils.java @@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.collections.PrimitivePair; import java.util.ArrayList; import java.util.Collection; @@ -60,7 +61,7 @@ import java.util.List; * Date: 3/25/13 * Time: 9:42 PM */ -final class GraphUtils { +final public class GraphUtils { private GraphUtils() {} /** @@ -135,4 +136,49 @@ final class GraphUtils { return min; } + /** + * Find the ending position of the longest uniquely matching + * run of bases of kmer in seq. + * + * for example, if seq = ACGT and kmer is NAC, this function returns 1,2 as we have the following + * match: + * + * 0123 + * .ACGT + * NAC.. + * + * @param seq a non-null sequence of bytes + * @param kmer a non-null kmer + * @return the ending position and length where kmer matches uniquely in sequence, or null if no + * unique longest match can be found + */ + public static PrimitivePair.Int findLongestUniqueSuffixMatch(final byte[] seq, final byte[] kmer) { + int longestPos = -1; + int length = 0; + boolean foundDup = false; + + for ( int i = 0; i < seq.length; i++ ) { + final int matchSize = longestSuffixMatch(seq, kmer, i); + if ( matchSize > length ) { + longestPos = i; + length = matchSize; + foundDup = false; + } else if ( matchSize == length ) { + foundDup = true; + } + } + + return foundDup ? null : new PrimitivePair.Int(longestPos, length); + } + + private static int longestSuffixMatch(final byte[] seq, final byte[] kmer, final int seqStart) { + for ( int len = 1; len <= kmer.length; len++ ) { + final int seqI = seqStart - len + 1; + final int kmerI = kmer.length - len; + if ( seqI < 0 || seq[seqI] != kmer[kmerI] ) { + return len - 1; + } + } + return kmer.length; + } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestPaths.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestPaths.java index 466148588..3ba85dd92 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestPaths.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestPaths.java @@ -59,7 +59,7 @@ import java.util.*; * User: ebanks, rpoplin, mdepristo * Date: Mar 23, 2011 */ -public class KBestPaths { +public class KBestPaths { private final boolean allowCycles; /** @@ -93,7 +93,7 @@ public class KBestPaths { /** * @see #getKBestPaths(BaseGraph, int) retriving the best 1000 paths */ - public List> getKBestPaths( final BaseGraph graph ) { + public List> getKBestPaths( final BaseGraph graph ) { return getKBestPaths(graph, 1000); } @@ -101,28 +101,28 @@ public class KBestPaths { * @see #getKBestPaths(BaseGraph, int, java.util.Set, java.util.Set) retriving the first 1000 paths * starting from all source vertices and ending with all sink vertices */ - public List> getKBestPaths( final BaseGraph graph, final int k ) { + public List> getKBestPaths( final BaseGraph graph, final int k ) { return getKBestPaths(graph, k, graph.getSources(), graph.getSinks()); } /** * @see #getKBestPaths(BaseGraph, int, java.util.Set, java.util.Set) with k=1000 */ - public List> getKBestPaths( final BaseGraph graph, final Set sources, final Set sinks ) { + public List> getKBestPaths( final BaseGraph graph, final Set sources, final Set sinks ) { return getKBestPaths(graph, 1000, sources, sinks); } /** * @see #getKBestPaths(BaseGraph, int, java.util.Set, java.util.Set) with k=1000 */ - public List> getKBestPaths( final BaseGraph graph, final T source, final T sink ) { + public List> getKBestPaths( final BaseGraph graph, final T source, final T sink ) { return getKBestPaths(graph, 1000, source, sink); } /** * @see #getKBestPaths(BaseGraph, int, java.util.Set, java.util.Set) with singleton source and sink sets */ - public List> getKBestPaths( final BaseGraph graph, final int k, final T source, final T sink ) { + public List> getKBestPaths( final BaseGraph graph, final int k, final T source, final T sink ) { return getKBestPaths(graph, k, Collections.singleton(source), Collections.singleton(sink)); } @@ -136,20 +136,20 @@ public class KBestPaths { * @return a list with at most k top-scoring paths from the graph */ @Ensures({"result != null", "result.size() <= k"}) - public List> getKBestPaths( final BaseGraph graph, final int k, final Set sources, final Set sinks ) { + public List> getKBestPaths( final BaseGraph graph, final int k, final Set sources, final Set sinks ) { if( graph == null ) { throw new IllegalArgumentException("Attempting to traverse a null graph."); } // a min max queue that will collect the best k paths - final MinMaxPriorityQueue> bestPaths = MinMaxPriorityQueue.orderedBy(new PathComparatorTotalScore()).maximumSize(k).create(); + final MinMaxPriorityQueue> bestPaths = MinMaxPriorityQueue.orderedBy(new PathComparatorTotalScore()).maximumSize(k).create(); // run a DFS for best paths for ( final T source : sources ) { - final Path startingPath = new Path(source, graph); + final Path startingPath = new Path(source, graph); findBestPaths(startingPath, sinks, bestPaths, new MyInt()); } // the MinMaxPriorityQueue iterator returns items in an arbitrary order, so we need to sort the final result - final List> toReturn = new ArrayList>(bestPaths); + final List> toReturn = new ArrayList>(bestPaths); Collections.sort(toReturn, new PathComparatorTotalScore()); return toReturn; } @@ -161,21 +161,21 @@ public class KBestPaths { * @param bestPaths a path to collect completed paths. * @param n used to limit the search by tracking the number of vertices visited across all paths */ - private void findBestPaths( final Path path, final Set sinks, final Collection> bestPaths, final MyInt n ) { + private void findBestPaths( final Path path, final Set sinks, final Collection> bestPaths, final MyInt n ) { if ( sinks.contains(path.getLastVertex())) { bestPaths.add(path); } else if( n.val > 10000 ) { // do nothing, just return, as we've done too much work already } else { // recursively run DFS - final ArrayList edgeArrayList = new ArrayList(path.getOutgoingEdgesOfLastVertex()); + final ArrayList edgeArrayList = new ArrayList(path.getOutgoingEdgesOfLastVertex()); Collections.sort(edgeArrayList, new BaseEdge.EdgeWeightComparator()); - for ( final BaseEdge edge : edgeArrayList ) { + for ( final E edge : edgeArrayList ) { final T target = path.getGraph().getEdgeTarget(edge); // make sure the edge is not already in the path final boolean alreadyVisited = allowCycles ? path.containsEdge(edge) : path.containsVertex(target); if ( ! alreadyVisited ) { - final Path newPath = new Path(path, edge); + final Path newPath = new Path(path, edge); n.val++; findBestPaths(newPath, sinks, bestPaths, n); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/LowWeightChainPruner.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/LowWeightChainPruner.java new file mode 100644 index 000000000..7327b5736 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/LowWeightChainPruner.java @@ -0,0 +1,170 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; + +import java.util.*; + +/** + /** + * Prune all chains from this graph where all edges in the path have multiplicity <= pruneFactor + * + * Unlike pruneGraph, this function will remove only linear chains in the graph where all edges have weight <= pruneFactor. + * + * For A -[1]> B -[1]> C -[1]> D would be removed with pruneFactor 1 + * but A -[1]> B -[2]> C -[1]> D would not be because the linear chain includes an edge with weight >= 2 + * + * User: depristo + * Date: 5/2/13 + * Time: 10:38 AM + */ +public class LowWeightChainPruner { + private final int pruneFactor; + + public LowWeightChainPruner(int pruneFactor) { + if ( pruneFactor < 0 ) throw new IllegalArgumentException("pruneFactor must be >= 0 but got " + pruneFactor); + this.pruneFactor = pruneFactor; + } + + /** + * Prune graph + * @param graph the graph to prune + */ + public void pruneLowWeightChains(final BaseGraph graph) { + if ( graph == null ) throw new IllegalArgumentException("Graph cannot be null"); + + if ( pruneFactor > 0 ) { + final Set edgesToKeep = new LinkedHashSet<>(); + + for ( final Path linearChain : getLinearChains(graph) ) { + if( mustBeKeep(linearChain, pruneFactor) ) { + // we must keep edges in any path that contains a reference edge or an edge with weight > pruneFactor + edgesToKeep.addAll(linearChain.getEdges()); + } + } + + // we want to remove all edges not in the keep set + final Set edgesToRemove = new HashSet<>(graph.edgeSet()); + edgesToRemove.removeAll(edgesToKeep); + graph.removeAllEdges(edgesToRemove); + + graph.removeSingletonOrphanVertices(); + } + } + + /** + * Get the maximum pruning multiplicity seen on any edge in this graph + * @return an integer > 0 + */ + private boolean mustBeKeep(final Path path, final int pruneFactor) { + for ( final E edge : path.getEdges() ) { + if ( edge.getPruningMultiplicity() >= pruneFactor || edge.isRef() ) + return true; + } + return false; + } + + /** + * Get all of the linear chains in graph + * + * A linear chain is a series of vertices that start from either a source of a vertex with + * out-degree > 1 and extend through all vertices accessible via an outgoing edge from this + * vertex that have in == 1 and out degree of 0 or 1. + * + * @param graph the graph + * @return a non-null collection of paths in graph + */ + protected final Collection> getLinearChains(final BaseGraph graph) { + final Set chainStarts = new LinkedHashSet<>(); + + for ( final V v : graph.vertexSet() ) { + // we want a list of all chain start vertices. These are all vertices with out + // degree > 1, or all source vertices. + final int outDegree = graph.outDegreeOf(v); + final int inDegree = graph.inDegreeOf(v); + if ( outDegree > 1 || inDegree > 1 || (inDegree == 0 && outDegree > 0)) // don't add isolated vertices + chainStarts.add(v); + } + + // must be after since we can add duplicate starts in the above finding algorithm + final List> linearChains = new LinkedList<>(); + for ( final V chainStart : chainStarts ) { + for ( final E outEdge : graph.outgoingEdgesOf(chainStart) ) { + // these chains are composed of the starts + their next vertices + linearChains.add(extendLinearChain(new Path<>(new Path<>(chainStart, graph), outEdge))); + } + } + + return linearChains; + } + + /** + * Extend path while the last vertex has in and out degrees of 1 or 0 + * @param path the path to extend + * @return a fully extended linear path + */ + protected final Path extendLinearChain(final Path path) { + final V last = path.getLastVertex(); + final Set outEdges = path.getGraph().outgoingEdgesOf(last); + + final int outDegree = outEdges.size(); + final int inDegree = path.getGraph().inDegreeOf(last); + + if ( outDegree != 1 || inDegree > 1 ) { + // out next vertex has multiple outgoing edges, so we are done with the linear path + return path; + } else { + final V next = path.getGraph().getEdgeTarget(outEdges.iterator().next()); + if ( path.containsVertex(next) ) { + // we are done if the path contains a cycle + return path; + } else { + // we now know that last has outdegree == 1, so we keep extending the chain + return extendLinearChain(new Path<>(path, outEdges.iterator().next())); + } + } + } +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java new file mode 100644 index 000000000..c1937e5c8 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java @@ -0,0 +1,123 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; + +/** + * edge class for connecting nodes in the graph that tracks some per-sample information + * + * This class extends BaseEdge with the additional functionality of tracking the maximum + * multiplicity seen within any single sample. The workflow for using this class is: + * + * MultiSampleEdge e = new MultiSampleEdge(ref, 1) + * e.incMultiplicity(1) // total is 2, per sample is 2, max per sample is 1 + * e.getPruningMultiplicity() // = 1 + * e.flushSingleSampleMultiplicity() // total is 2, per sample is 0, max per sample is 2 + * e.getPruningMultiplicity() // = 2 + * e.incMultiplicity(3) // total is 5, per sample is 3, max per sample is 2 + * e.getPruningMultiplicity() // = 2 + * e.flushSingleSampleMultiplicity() // total is 5, per sample is 0, max per sample is 3 + * e.getPruningMultiplicity() // = 3 + */ +public class MultiSampleEdge extends BaseEdge { + private int maxSingleSampleMultiplicity, currentSingleSampleMultiplicity; + + /** + * Create a new MultiSampleEdge with weight multiplicity and, if isRef == true, indicates a path through the reference + * + * @param isRef indicates whether this edge is a path through the reference + * @param multiplicity the number of observations of this edge in this sample + */ + public MultiSampleEdge(final boolean isRef, final int multiplicity) { + super(isRef, multiplicity); + maxSingleSampleMultiplicity = multiplicity; + currentSingleSampleMultiplicity = multiplicity; + } + + @Override + public MultiSampleEdge copy() { + return new MultiSampleEdge(isRef(), getMultiplicity()); // TODO -- should I copy values for other features? + } + + /** + * update the max single sample multiplicity based on the current single sample multiplicity, and + * reset the current single sample multiplicity to 0. + */ + public void flushSingleSampleMultiplicity() { + if ( currentSingleSampleMultiplicity > maxSingleSampleMultiplicity ) + maxSingleSampleMultiplicity = currentSingleSampleMultiplicity; + currentSingleSampleMultiplicity = 0; + } + + @Override + public void incMultiplicity(final int incr) { + super.incMultiplicity(incr); + currentSingleSampleMultiplicity += incr; + } + + @Override + public int getPruningMultiplicity() { + return getMaxSingleSampleMultiplicity(); + } + + @Override + public String getDotLabel() { + return super.getDotLabel() + "/" + getMaxSingleSampleMultiplicity(); + } + + /** + * Get the maximum multiplicity for this edge seen in any single sample + * @return an integer >= 0 + */ + public int getMaxSingleSampleMultiplicity() { + return maxSingleSampleMultiplicity; + } + + /** only provided for testing purposes */ + protected int getCurrentSingleSampleMultiplicity() { + return currentSingleSampleMultiplicity; + } +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/Path.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/Path.java index 47676a498..a07b98bb6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/Path.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/Path.java @@ -52,8 +52,8 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import org.apache.commons.lang.ArrayUtils; -import org.broadinstitute.sting.utils.smithwaterman.Parameters; -import org.broadinstitute.sting.utils.smithwaterman.SWPairwiseAlignment; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.smithwaterman.*; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import java.util.*; @@ -68,40 +68,39 @@ import java.util.*; * Time: 2:34 PM * */ -public class Path { - private final static int MAX_CIGAR_ELEMENTS_BEFORE_FAILING_SW = 20; +public class Path { + private final static String SW_PAD = "NNNNNNNNNN"; + private final static Logger logger = Logger.getLogger(Path.class); // the last vertex seen in the path private final T lastVertex; // the list of edges comprising the path - private Set edgesAsSet = null; - private final LinkedList edgesInOrder; + private Set edgesAsSet = null; + private final LinkedList edgesInOrder; // the scores for the path private final int totalScore; // the graph from which this path originated - private final BaseGraph graph; + private final BaseGraph graph; // used in the bubble state machine to apply Smith-Waterman to the bubble sequence // these values were chosen via optimization against the NA12878 knowledge base public static final Parameters NEW_SW_PARAMETERS = new Parameters(20.0, -15.0, -26.0, -1.1); - private static final byte[] STARTING_SW_ANCHOR_BYTES = "XXXXXXXXX".getBytes(); - /** * Create a new Path containing no edges and starting at initialVertex * @param initialVertex the starting vertex of the path * @param graph the graph this path with follow through */ - public Path(final T initialVertex, final BaseGraph graph) { + public Path(final T initialVertex, final BaseGraph graph) { if ( initialVertex == null ) throw new IllegalArgumentException("initialVertex cannot be null"); if ( graph == null ) throw new IllegalArgumentException("graph cannot be null"); if ( ! graph.containsVertex(initialVertex) ) throw new IllegalArgumentException("Vertex " + initialVertex + " must be part of graph " + graph); lastVertex = initialVertex; - edgesInOrder = new LinkedList(); + edgesInOrder = new LinkedList(); totalScore = 0; this.graph = graph; } @@ -109,10 +108,10 @@ public class Path { /** * Convenience constructor for testing that creates a path through vertices in graph */ - protected static Path makePath(final List vertices, final BaseGraph graph) { - Path path = new Path(vertices.get(0), graph); + protected static Path makePath(final List vertices, final BaseGraph graph) { + Path path = new Path(vertices.get(0), graph); for ( int i = 1; i < vertices.size(); i++ ) - path = new Path(path, graph.getEdge(path.lastVertex, vertices.get(i))); + path = new Path(path, graph.getEdge(path.lastVertex, vertices.get(i))); return path; } @@ -122,7 +121,7 @@ public class Path { * @param p the path to extend * @param edge the edge to extend path by */ - public Path(final Path p, final BaseEdge edge) { + public Path(final Path p, final E edge) { if ( p == null ) throw new IllegalArgumentException("Path cannot be null"); if ( edge == null ) throw new IllegalArgumentException("Edge cannot be null"); if ( ! p.graph.containsEdge(edge) ) throw new IllegalArgumentException("Graph must contain edge " + edge + " but it doesn't"); @@ -130,7 +129,7 @@ public class Path { graph = p.graph; lastVertex = p.graph.getEdgeTarget(edge); - edgesInOrder = new LinkedList(p.getEdges()); + edgesInOrder = new LinkedList(p.getEdges()); edgesInOrder.add(edge); totalScore = p.totalScore + edge.getMultiplicity(); } @@ -139,7 +138,7 @@ public class Path { * Get the collection of edges leaving the last vertex of this path * @return a non-null collection */ - public Collection getOutgoingEdgesOfLastVertex() { + public Collection getOutgoingEdgesOfLastVertex() { return getGraph().outgoingEdgesOf(getLastVertex()); } @@ -148,12 +147,12 @@ public class Path { * @param edge the given edge to test * @return true if the edge is found in this path */ - public boolean containsEdge( final BaseEdge edge ) { + public boolean containsEdge( final E edge ) { if( edge == null ) { throw new IllegalArgumentException("Attempting to test null edge."); } if ( edgesInOrder.isEmpty() ) return false; // initialize contains cache if necessary - if ( edgesAsSet == null ) edgesAsSet = new HashSet(edgesInOrder); + if ( edgesAsSet == null ) edgesAsSet = new HashSet(edgesInOrder); return edgesAsSet.contains(edge); } @@ -175,7 +174,7 @@ public class Path { * @param path the other path we might be the same as * @return true if this and path are the same */ - protected boolean pathsAreTheSame(Path path) { + protected boolean pathsAreTheSame(Path path) { return totalScore == path.totalScore && edgesInOrder.equals(path.edgesInOrder); } @@ -199,7 +198,7 @@ public class Path { * @return a non-null graph */ @Ensures("result != null") - public BaseGraph getGraph() { + public BaseGraph getGraph() { return graph; } @@ -208,7 +207,7 @@ public class Path { * @return a non-null list of edges */ @Ensures("result != null") - public List getEdges() { return edgesInOrder; } + public List getEdges() { return edgesInOrder; } /** * Get the list of vertices in this path in order defined by the edges of the path @@ -221,7 +220,7 @@ public class Path { else { final LinkedList vertices = new LinkedList(); boolean first = true; - for ( final BaseEdge e : getEdges() ) { + for ( final E e : getEdges() ) { if ( first ) { vertices.add(graph.getEdgeSource(e)); first = false; @@ -246,6 +245,14 @@ public class Path { @Ensures("result != null") public T getLastVertex() { return lastVertex; } + /** + * Get the first vertex in this path + * @return a non-null vertex + */ + public T getFirstVertex() { + return getGraph().getEdgeSource(edgesInOrder.pollFirst()); + } + /** * The base sequence for this path. Pull the full sequence for source nodes and then the suffix for all subsequent nodes * @return non-null sequence of bases corresponding to this path @@ -255,174 +262,114 @@ public class Path { if( getEdges().isEmpty() ) { return graph.getAdditionalSequence(lastVertex); } byte[] bases = graph.getAdditionalSequence(graph.getEdgeSource(edgesInOrder.getFirst())); - for( final BaseEdge e : edgesInOrder ) { + for( final E e : edgesInOrder ) { bases = ArrayUtils.addAll(bases, graph.getAdditionalSequence(graph.getEdgeTarget(e))); } return bases; } /** - * Calculate the cigar string for this path using a bubble traversal of the assembly graph and running a Smith-Waterman alignment on each bubble - * @return non-null Cigar string with reference length equal to the refHaplotype's reference length + * Calculate the cigar elements for this path against the reference sequence + * + * @param refSeq the reference sequence that all of the bases in this path should align to + * @return a Cigar mapping this path to refSeq, or null if no reasonable alignment could be found */ - @Ensures("result != null") - public Cigar calculateCigar() { - final Cigar cigar = new Cigar(); - // special case for paths that start on reference but not at the reference source node - if( edgesInOrder.getFirst().isRef() && !graph.isRefSource(edgesInOrder.getFirst()) ) { - for( final CigarElement ce : calculateCigarForCompleteBubble(null, null, graph.getEdgeSource(edgesInOrder.getFirst())).getCigarElements() ) { - cigar.add(ce); - } + public Cigar calculateCigar(final byte[] refSeq) { + if ( getBases().length == 0 ) { + // horrible edge case from the unit tests, where this path has no bases + return new Cigar(Arrays.asList(new CigarElement(refSeq.length, CigarOperator.D))); } - // reset the bubble state machine - final BubbleStateMachine bsm = new BubbleStateMachine(cigar); + final byte[] bases = getBases(); + final Cigar nonStandard; - for( final BaseEdge e : getEdges() ) { - if ( e.hasSameSourceAndTarget(graph, edgesInOrder.getFirst()) ) { - advanceBubbleStateMachine( bsm, graph.getEdgeSource(e), null ); - } - advanceBubbleStateMachine( bsm, graph.getEdgeTarget(e), e ); + final String paddedRef = SW_PAD + new String(refSeq) + SW_PAD; + final String paddedPath = SW_PAD + new String(bases) + SW_PAD; + final SmithWaterman alignment = new SWPairwiseAlignment( paddedRef.getBytes(), paddedPath.getBytes(), NEW_SW_PARAMETERS ); + + if ( isSWFailure(alignment) ) + return null; + + // cut off the padding bases + final int baseStart = SW_PAD.length(); + final int baseEnd = paddedPath.length() - SW_PAD.length() - 1; // -1 because it's inclusive + nonStandard = AlignmentUtils.trimCigarByBases(alignment.getCigar(), baseStart, baseEnd); + + if ( nonStandard.getReferenceLength() != refSeq.length ) { + nonStandard.add(new CigarElement(refSeq.length - nonStandard.getReferenceLength(), CigarOperator.D)); } - // special case for paths that don't end on reference - if( bsm.inBubble ) { - for( final CigarElement ce : calculateCigarForCompleteBubble(bsm.bubbleBytes, bsm.lastSeenReferenceNode, null).getCigarElements() ) { - bsm.cigar.add(ce); - } - } else if( edgesInOrder.getLast().isRef() && !graph.isRefSink(edgesInOrder.getLast()) ) { // special case for paths that end of the reference but haven't completed the entire reference circuit - for( final CigarElement ce : calculateCigarForCompleteBubble(bsm.bubbleBytes, graph.getEdgeTarget(edgesInOrder.getLast()), null).getCigarElements() ) { - bsm.cigar.add(ce); - } - } - - return AlignmentUtils.consolidateCigar(bsm.cigar); + // finally, return the cigar with all indels left aligned + return leftAlignCigarSequentially(nonStandard, refSeq, getBases(), 0, 0); } /** - * Advance the bubble state machine by incorporating the next node in the path. - * @param bsm the current bubble state machine - * @param node the node to be incorporated - * @param e the edge which generated this node in the path + * Make sure that the SW didn't fail in some terrible way, and throw exception if it did */ - @Requires({"bsm != null", "graph != null", "node != null"}) - private void advanceBubbleStateMachine( final BubbleStateMachine bsm, final T node, final BaseEdge e ) { - if( graph.isReferenceNode( node ) ) { - if( !bsm.inBubble ) { // just add the ref bases as M's in the Cigar string, and don't do anything else - if( e !=null && !e.isRef() ) { - if( graph.referencePathExists( graph.getEdgeSource(e), node) ) { - for( final CigarElement ce : calculateCigarForCompleteBubble(null, graph.getEdgeSource(e), node).getCigarElements() ) { - bsm.cigar.add(ce); - } - bsm.cigar.add( new CigarElement( graph.getAdditionalSequence(node).length, CigarOperator.M) ); - } else if ( graph.getEdgeSource(e).equals(graph.getEdgeTarget(e)) ) { // alt edge at ref node points to itself - bsm.cigar.add( new CigarElement( graph.getAdditionalSequence(node).length, CigarOperator.I) ); - } else { - bsm.inBubble = true; - bsm.bubbleBytes = null; - bsm.lastSeenReferenceNode = graph.getEdgeSource(e); - bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, graph.getAdditionalSequence(node) ); - } - } else { - bsm.cigar.add( new CigarElement( graph.getAdditionalSequence(node).length, CigarOperator.M) ); - } - } else if( bsm.lastSeenReferenceNode != null && !graph.referencePathExists( bsm.lastSeenReferenceNode, node ) ) { // add bases to the bubble string until we get back to the reference path - bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, graph.getAdditionalSequence(node) ); - } else { // close the bubble and use a local SW to determine the Cigar string - for( final CigarElement ce : calculateCigarForCompleteBubble(bsm.bubbleBytes, bsm.lastSeenReferenceNode, node).getCigarElements() ) { - bsm.cigar.add(ce); - } - bsm.inBubble = false; - bsm.bubbleBytes = null; - bsm.lastSeenReferenceNode = null; - bsm.cigar.add( new CigarElement( graph.getAdditionalSequence(node).length, CigarOperator.M) ); - } - } else { // non-ref vertex - if( bsm.inBubble ) { // just keep accumulating until we get back to the reference path - bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, graph.getAdditionalSequence(node) ); - } else { // open up a bubble - bsm.inBubble = true; - bsm.bubbleBytes = null; - bsm.lastSeenReferenceNode = (e != null ? graph.getEdgeSource(e) : null ); - bsm.bubbleBytes = ArrayUtils.addAll( bsm.bubbleBytes, graph.getAdditionalSequence(node) ); - } + private boolean isSWFailure(final SmithWaterman alignment) { + // check that the alignment starts at the first base, which it should given the padding + if ( alignment.getAlignmentStart2wrt1() > 0 ) { + return true; +// throw new IllegalStateException("SW failure ref " + paddedRef + " vs. " + paddedPath + " should always start at 0, but got " + alignment.getAlignmentStart2wrt1() + " with cigar " + alignment.getCigar()); } + + // check that we aren't getting any S operators (which would be very bad downstream) + for ( final CigarElement ce : alignment.getCigar().getCigarElements() ) { + if ( ce.getOperator() == CigarOperator.S ) + return true; + // soft clips at the end of the alignment are really insertions +// throw new IllegalStateException("SW failure ref " + paddedRef + " vs. " + paddedPath + " should never contain S operators but got cigar " + alignment.getCigar()); + } + + return false; } /** - * Now that we have a completed bubble run a Smith-Waterman alignment to determine the cigar string for this bubble - * @param bubbleBytes the bytes that comprise the alternate allele path in this bubble - * @param fromVertex the vertex that marks the beginning of the reference path in this bubble (null indicates ref source vertex) - * @param toVertex the vertex that marks the end of the reference path in this bubble (null indicates ref sink vertex) - * @return the cigar string generated by running a SW alignment between the reference and alternate paths in this bubble + * Left align the given cigar sequentially. This is needed because AlignmentUtils doesn't accept cigars with more than one indel in them. + * This is a target of future work to incorporate and generalize into AlignmentUtils for use by others. + * @param cigar the cigar to left align + * @param refSeq the reference byte array + * @param readSeq the read byte array + * @param refIndex 0-based alignment start position on ref + * @param readIndex 0-based alignment start position on read + * @return the left-aligned cigar */ - @Requires({"graph != null"}) - @Ensures({"result != null"}) - private Cigar calculateCigarForCompleteBubble( final byte[] bubbleBytes, final T fromVertex, final T toVertex ) { - final byte[] refBytes = graph.getReferenceBytes(fromVertex == null ? graph.getReferenceSourceVertex() : fromVertex, toVertex == null ? graph.getReferenceSinkVertex() : toVertex, fromVertex == null, toVertex == null); - - final Cigar returnCigar = new Cigar(); - - // add padding to anchor ref/alt bases in the SW matrix - byte[] padding = STARTING_SW_ANCHOR_BYTES; - boolean goodAlignment = false; - SWPairwiseAlignment swConsensus = null; - while( !goodAlignment && padding.length < 1000 ) { - padding = ArrayUtils.addAll(padding, padding); // double the size of the padding each time - final byte[] reference = ArrayUtils.addAll( ArrayUtils.addAll(padding, refBytes), padding ); - final byte[] alternate = ArrayUtils.addAll( ArrayUtils.addAll(padding, bubbleBytes), padding ); - swConsensus = new SWPairwiseAlignment( reference, alternate, NEW_SW_PARAMETERS ); - if( swConsensus.getAlignmentStart2wrt1() == 0 && !swConsensus.getCigar().toString().contains("S") && swConsensus.getCigar().getReferenceLength() == reference.length ) { - goodAlignment = true; + @Ensures({"cigar != null", "refSeq != null", "readSeq != null", "refIndex >= 0", "readIndex >= 0"}) + protected static Cigar leftAlignCigarSequentially(final Cigar cigar, final byte[] refSeq, final byte[] readSeq, int refIndex, int readIndex) { + final Cigar cigarToReturn = new Cigar(); + Cigar cigarToAlign = new Cigar(); + for (int i = 0; i < cigar.numCigarElements(); i++) { + final CigarElement ce = cigar.getCigarElement(i); + if (ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I) { + cigarToAlign.add(ce); + final Cigar leftAligned = AlignmentUtils.leftAlignSingleIndel(cigarToAlign, refSeq, readSeq, refIndex, readIndex, false); + for ( final CigarElement toAdd : leftAligned.getCigarElements() ) { cigarToReturn.add(toAdd); } + refIndex += cigarToAlign.getReferenceLength(); + readIndex += cigarToAlign.getReadLength(); + cigarToAlign = new Cigar(); + } else { + cigarToAlign.add(ce); } } - if( !goodAlignment ) { - returnCigar.add(new CigarElement(1, CigarOperator.N)); - return returnCigar; - } - - final Cigar swCigar = swConsensus.getCigar(); - if( swCigar.numCigarElements() > MAX_CIGAR_ELEMENTS_BEFORE_FAILING_SW ) { // this bubble is too divergent from the reference - returnCigar.add(new CigarElement(1, CigarOperator.N)); - } else { - for( int iii = 0; iii < swCigar.numCigarElements(); iii++ ) { - // now we need to remove the padding from the cigar string - int length = swCigar.getCigarElement(iii).getLength(); - if( iii == 0 ) { length -= padding.length; } - if( iii == swCigar.numCigarElements() - 1 ) { length -= padding.length; } - if( length > 0 ) { - returnCigar.add(new CigarElement(length, swCigar.getCigarElement(iii).getOperator())); - } - } - if( (refBytes == null && returnCigar.getReferenceLength() != 0) || ( refBytes != null && returnCigar.getReferenceLength() != refBytes.length ) ) { - throw new IllegalStateException("SmithWaterman cigar failure: " + (refBytes == null ? "-" : new String(refBytes)) + " against " + new String(bubbleBytes) + " = " + swConsensus.getCigar()); + if( !cigarToAlign.isEmpty() ) { + for( final CigarElement toAdd : cigarToAlign.getCigarElements() ) { + cigarToReturn.add(toAdd); } } - return returnCigar; + final Cigar result = AlignmentUtils.consolidateCigar(cigarToReturn); + if( result.getReferenceLength() != cigar.getReferenceLength() ) + throw new IllegalStateException("leftAlignCigarSequentially failed to produce a valid CIGAR. Reference lengths differ. Initial cigar " + cigar + " left aligned into " + result); + return result; } - // class to keep track of the bubble state machine - private static class BubbleStateMachine { - public boolean inBubble = false; - public byte[] bubbleBytes = null; - public T lastSeenReferenceNode = null; - public Cigar cigar = null; - - public BubbleStateMachine( final Cigar initialCigar ) { - inBubble = false; - bubbleBytes = null; - lastSeenReferenceNode = null; - cigar = initialCigar; - } - } /** * Tests that this and other have the same score and vertices in the same order with the same seq * @param other the other path to consider. Cannot be null * @return true if this and path are equal, false otherwise */ - public boolean equalScoreAndSequence(final Path other) { + public boolean equalScoreAndSequence(final Path other) { if ( other == null ) throw new IllegalArgumentException("other cannot be null"); return getScore() == other.getScore() && equalSequence(other); } @@ -432,7 +379,7 @@ public class Path { * @param other the other path to consider. Cannot be null * @return true if this and path are equal, false otherwise */ - public boolean equalSequence(final Path other) { + public boolean equalSequence(final Path other) { final List mine = getVertices(); final List yours = other.getVertices(); if ( mine.size() == yours.size() ) { // hehehe diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java index bb4b26257..20edcb39b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java @@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; +import org.jgrapht.EdgeFactory; import java.io.File; import java.util.HashSet; @@ -61,7 +62,17 @@ import java.util.Set; * @author: depristo * @since 03/2013 */ -public final class SeqGraph extends BaseGraph { +public final class SeqGraph extends BaseGraph { + /** + * Edge factory that creates non-reference multiplicity 1 edges + */ + private static class MyEdgeFactory implements EdgeFactory { + @Override + public BaseEdge createEdge(SeqVertex sourceVertex, SeqVertex targetVertex) { + return new BaseEdge(false, 1); + } + } + private final static boolean PRINT_SIMPLIFY_GRAPHS = false; /** @@ -82,7 +93,7 @@ public final class SeqGraph extends BaseGraph { * Construct an empty SeqGraph */ public SeqGraph() { - super(); + this(11); } /** @@ -94,7 +105,7 @@ public final class SeqGraph extends BaseGraph { * @param kmer kmer */ public SeqGraph(final int kmer) { - super(kmer); + super(kmer, new MyEdgeFactory()); } /** @@ -154,7 +165,6 @@ public final class SeqGraph extends BaseGraph { didSomeWork |= new MergeCommonSuffices().transformUntilComplete(); if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + iteration + ".4.merge_suffix.dot"), 0); - didSomeWork |= new MergeHeadlessIncomingSources().transformUntilComplete(); didSomeWork |= zipLinearChains(); return didSomeWork; } @@ -289,8 +299,8 @@ public final class SeqGraph extends BaseGraph { final BaseEdge inc = new BaseEdge(false, sharedWeightAmongEdges); // template to make .add function call easy // update the incoming and outgoing edges to point to the new vertex - for( final BaseEdge edge : outEdges ) { addEdge(addedVertex, getEdgeTarget(edge), new BaseEdge(edge).add(inc)); } - for( final BaseEdge edge : inEdges ) { addEdge(getEdgeSource(edge), addedVertex, new BaseEdge(edge).add(inc)); } + for( final BaseEdge edge : outEdges ) { addEdge(addedVertex, getEdgeTarget(edge), edge.copy().add(inc)); } + for( final BaseEdge edge : inEdges ) { addEdge(getEdgeSource(edge), addedVertex, edge.copy().add(inc)); } removeAllVertices(linearChain); return true; @@ -505,40 +515,4 @@ public final class SeqGraph extends BaseGraph { } } } - - /** - * Merge headless configurations: - * - * Performs the transformation: - * - * { x + S_i + y -> Z } - * - * goes to: - * - * { x -> S_i -> y -> Z } - * - * for all nodes that match this configuration. - * - * Differs from the diamond transform in that no top node is required - */ - protected class MergeHeadlessIncomingSources extends VertexBasedTransformer { - @Override - boolean tryToTransform(final SeqVertex bottom) { - final Set incoming = incomingVerticesOf(bottom); - if ( incoming.size() <= 1 ) - return false; - - for ( final SeqVertex inc : incoming ) - if ( ! isSource(inc) || outDegreeOf(inc) > 1 ) - return false; - - if ( dontModifyGraphEvenIfPossible() ) return true; - - final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(SeqGraph.this, incoming); - if (splitter.meetsMinMergableSequenceForPrefix(MIN_COMMON_SEQUENCE_TO_MERGE_SOURCE_SINK_VERTICES)) - return splitter.splitAndUpdate(null, bottom); - else - return false; - } - } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedSequenceMerger.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedSequenceMerger.java index 1c53f2332..0babd8d56 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedSequenceMerger.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedSequenceMerger.java @@ -88,13 +88,13 @@ public class SharedSequenceMerger { for ( final SeqVertex prev : prevs ) { for ( final BaseEdge prevIn : graph.incomingEdgesOf(prev) ) { - graph.addEdge(graph.getEdgeSource(prevIn), newV, new BaseEdge(prevIn)); + graph.addEdge(graph.getEdgeSource(prevIn), newV, prevIn.copy()); edgesToRemove.add(prevIn); } } for ( final BaseEdge e : graph.outgoingEdgesOf(v) ) { - graph.addEdge(newV, graph.getEdgeTarget(e), new BaseEdge(e)); + graph.addEdge(newV, graph.getEdgeTarget(e), e.copy()); } graph.removeAllVertices(prevs); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitter.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitter.java index f6ee4c3c3..205d0027a 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitter.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitter.java @@ -209,7 +209,7 @@ public class SharedVertexSequenceSplitter { splitGraph.addEdge(remaining, suffixV, fromMid); } else { // prefix + suffix completely explain this node - splitGraph.addOrUpdateEdge(prefixV, suffixV, new BaseEdge(toMid).add(fromMid)); + splitGraph.addOrUpdateEdge(prefixV, suffixV, toMid.copy().add(fromMid)); } } } @@ -323,7 +323,7 @@ public class SharedVertexSequenceSplitter { } else { // schedule edge for removal, and return a freshly allocated one for our graph to use edgesToRemove.add(e); - return new BaseEdge(e); + return e.copy(); } } } \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/MultiDeBruijnVertex.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/MultiDeBruijnVertex.java new file mode 100644 index 000000000..814b3b9a7 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/MultiDeBruijnVertex.java @@ -0,0 +1,118 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading; + +import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.DeBruijnVertex; +import org.broadinstitute.sting.utils.Utils; + +import java.util.LinkedList; +import java.util.List; + +/** + * A DeBruijnVertex that supports multiple copies of the same kmer + * + * This is implemented through the same mechanism as SeqVertex, where each + * created MultiDeBruijnVertex has a unique id assigned upon creation. Two + * MultiDeBruijnVertex are equal iff they have the same ID + * + * User: depristo + * Date: 4/17/13 + * Time: 3:20 PM + */ +final class MultiDeBruijnVertex extends DeBruijnVertex { + private final static boolean KEEP_TRACK_OF_READS = false; + private static int idCounter = 0; + + private final List reads = new LinkedList(); + private int id = idCounter++; // TODO -- potential race condition problem here + + /** + * Create a new MultiDeBruijnVertex with kmer sequence + * @param sequence the kmer sequence + */ + MultiDeBruijnVertex(byte[] sequence) { + super(sequence); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + MultiDeBruijnVertex that = (MultiDeBruijnVertex) o; + + return id == that.id; + } + + @Override + public String toString() { + return "MultiDeBruijnVertex_id_" + id + "_seq_" + getSequenceString(); + } + + /** + * Add name information to this vertex for debugging + * + * This information will be captured as a list of strings, and displayed in DOT if this + * graph is written out to disk + * + * This functionality is only enabled when KEEP_TRACK_OF_READS is true + * + * @param name a non-null string + */ + protected void addRead(final String name) { + if ( name == null ) throw new IllegalArgumentException("name cannot be null"); + if ( KEEP_TRACK_OF_READS ) reads.add(name); + } + + @Override + public int hashCode() { return id; } + + @Override + public String additionalInfo() { + return KEEP_TRACK_OF_READS ? (! reads.contains("ref") ? "__" + Utils.join(",", reads) : "") : ""; + } +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java new file mode 100644 index 000000000..db0ce0880 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java @@ -0,0 +1,162 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading; + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.walkers.haplotypecaller.LocalAssemblyEngine; +import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*; +import org.broadinstitute.sting.utils.haplotype.Haplotype; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +import java.io.File; +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; + +public class ReadThreadingAssembler extends LocalAssemblyEngine { + private final static Logger logger = Logger.getLogger(ReadThreadingAssembler.class); + + private final static int DEFAULT_NUM_PATHS_PER_GRAPH = 128; + + /** The min and max kmer sizes to try when building the graph. */ + private final List kmerSizes; + private final int maxAllowedPathsForReadThreadingAssembler; + + private boolean requireReasonableNumberOfPaths = false; + protected boolean removePathsNotConnectedToRef = true; + private boolean justReturnRawGraph = false; + + /** for testing only */ + public ReadThreadingAssembler() { + this(DEFAULT_NUM_PATHS_PER_GRAPH, Arrays.asList(25)); + } + + public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List kmerSizes) { + super(maxAllowedPathsForReadThreadingAssembler); + this.kmerSizes = kmerSizes; + this.maxAllowedPathsForReadThreadingAssembler = maxAllowedPathsForReadThreadingAssembler; + } + + /** for testing purposes */ + protected void setJustReturnRawGraph(boolean justReturnRawGraph) { + this.justReturnRawGraph = justReturnRawGraph; + } + + @Override + public List assemble( final List reads, final Haplotype refHaplotype) { + final List graphs = new LinkedList<>(); + + for ( final int kmerSize : kmerSizes ) { + final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly); + + // add the reference sequence to the graph + rtgraph.addSequence("ref", refHaplotype.getBases(), null, true); + + // Next pull kmers out of every read and throw them on the graph + for( final GATKSAMRecord read : reads ) { + rtgraph.addRead(read); + } + + // actually build the read threading graph + rtgraph.buildGraphIfNecessary(); + if ( debugGraphTransformations ) rtgraph.printGraph(new File("sequenceGraph.0.0.raw_readthreading_graph.dot"), pruneFactor); + + // go through and prune all of the chains where all edges have <= pruneFactor. This must occur + // before recoverDanglingTails in the graph, so that we don't spend a ton of time recovering + // tails that we'll ultimately just trim away anyway, as the dangling tail edges have weight of 1 + rtgraph.pruneLowWeightChains(pruneFactor); + + // look at all chains in the graph that terminate in a non-ref node (dangling sinks) and see if + // we can recover them by merging some N bases from the chain back into the reference uniquely, for + // N < kmerSize + if ( recoverDanglingTails ) rtgraph.recoverDanglingTails(); + + // remove all heading and trailing paths + if ( removePathsNotConnectedToRef ) rtgraph.removePathsNotConnectedToRef(); + + if ( debugGraphTransformations ) rtgraph.printGraph(new File("sequenceGraph.0.1.cleaned_readthreading_graph.dot"), pruneFactor); + + final SeqGraph initialSeqGraph = rtgraph.convertToSequenceGraph(); + + // if the unit tests don't want us to cleanup the graph, just return the raw sequence graph + if ( justReturnRawGraph ) return Collections.singletonList(initialSeqGraph); + + if ( debug ) logger.info("Using kmer size of " + rtgraph.getKmerSize() + " in read threading assembler"); + if ( debugGraphTransformations ) initialSeqGraph.printGraph(new File("sequenceGraph.0.2.initial_seqgraph.dot"), pruneFactor); + initialSeqGraph.cleanNonRefPaths(); // TODO -- I don't this is possible by construction + + final SeqGraph seqGraph = cleanupSeqGraph(initialSeqGraph); + if ( seqGraph != null ) { + if ( ! requireReasonableNumberOfPaths || reasonableNumberOfPaths(seqGraph) ) { + graphs.add(seqGraph); + } + } + } + + return graphs; + } + + /** + * Did we find a reasonable number of paths in this graph? + * @param graph + * @return + */ + private boolean reasonableNumberOfPaths(final SeqGraph graph) { + final KBestPaths pathFinder = new KBestPaths(false); + final List> allPaths = pathFinder.getKBestPaths(graph, 100000); + logger.info("Found " + allPaths.size() + " paths through " + graph + " with maximum " + maxAllowedPathsForReadThreadingAssembler); + return allPaths.size() <= maxAllowedPathsForReadThreadingAssembler; + } + + @Override + public String toString() { + return "ReadThreadingAssembler{" + + "kmerSizes=" + kmerSizes + + '}'; + } +} \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java new file mode 100644 index 000000000..6e9223afb --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java @@ -0,0 +1,640 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading; + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.walkers.haplotypecaller.KMerCounter; +import org.broadinstitute.sting.gatk.walkers.haplotypecaller.Kmer; +import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.collections.PrimitivePair; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.jgrapht.EdgeFactory; + +import java.io.File; +import java.util.*; + +public class ReadThreadingGraph extends BaseGraph { + /** + * Edge factory that creates non-reference multiplicity 1 edges + */ + private static class MyEdgeFactory implements EdgeFactory { + @Override + public MultiSampleEdge createEdge(MultiDeBruijnVertex sourceVertex, MultiDeBruijnVertex targetVertex) { + return new MultiSampleEdge(false, 1); + } + } + + private final static Logger logger = Logger.getLogger(ReadThreadingGraph.class); + + private final static String ANONYMOUS_SAMPLE = "XXX_UNNAMED_XXX"; + private final static boolean WRITE_GRAPH = false; + private final static boolean DEBUG_NON_UNIQUE_CALC = false; + + /** for debugging info printing */ + private static int counter = 0; + + /** we require at least this many bases to be uniquely matching to merge a dangling tail */ + private final static int MIN_MATCH_LENGTH_TO_RECOVER_DANGLING_TAIL = 5; + + /** + * Sequences added for read threading before we've actually built the graph + */ + private final Map> pending = new LinkedHashMap>(); + + /** + * A set of non-unique kmers that cannot be used as merge points in the graph + */ + private Set nonUniqueKmers; + + /** + * A map from kmers -> their corresponding vertex in the graph + */ + private Map uniqueKmers = new LinkedHashMap(); + + /** + * + */ + final int kmerSize; + final boolean debugGraphTransformations; + final byte minBaseQualityToUseInAssembly; + + protected boolean increaseCountsBackwards = true; + protected boolean increaseCountsThroughBranches = false; // this may increase the branches without bounds + + // -------------------------------------------------------------------------------- + // state variables, initialized in resetToInitialState() + // -------------------------------------------------------------------------------- + private Kmer refSource; + private boolean alreadyBuilt; + byte[] refSeq; + MultiDeBruijnVertex[] refKmers; + + public ReadThreadingGraph() { + this(25, false, (byte)6); + } + + public ReadThreadingGraph(final int kmerSize) { + this(kmerSize, false, (byte)6); + } + + /** + * Create a new ReadThreadingAssembler using kmerSize for matching + * @param kmerSize must be >= 1 + */ + protected ReadThreadingGraph(final int kmerSize, final boolean debugGraphTransformations, final byte minBaseQualityToUseInAssembly) { + super(kmerSize, new MyEdgeFactory()); + + if ( kmerSize < 1 ) throw new IllegalArgumentException("bad minkKmerSize " + kmerSize); + this.kmerSize = kmerSize; + this.debugGraphTransformations = debugGraphTransformations; + this.minBaseQualityToUseInAssembly = minBaseQualityToUseInAssembly; + + resetToInitialState(); + } + + /** + * Reset this assembler to its initial state, so we can create another assembly with a different set of reads + */ + private void resetToInitialState() { + pending.clear(); + nonUniqueKmers = null; + uniqueKmers.clear(); + refSource = null; + alreadyBuilt = false; + refSeq = null; + refKmers = null; + } + + /** + * Add the all bases in sequence to the graph + * @param sequence a non-null sequence + * @param isRef is this the reference sequence? + */ + protected void addSequence(final byte[] sequence, final boolean isRef) { + addSequence("anonymous", sequence, null, isRef); + } + + /** + * Add all bases in sequence to this graph + * + * @see #addSequence(String, String, byte[], int, int, int[], boolean) for full information + */ + public void addSequence(final String seqName, final byte[] sequence, final int[] counts, final boolean isRef) { + addSequence(seqName, ANONYMOUS_SAMPLE, sequence, 0, sequence.length, counts, isRef); + } + + /** + * Add bases in sequence to this graph + * + * @param seqName a useful seqName for this read, for debugging purposes + * @param sequence non-null sequence of bases + * @param counts a vector of counts for each bases, indicating how many times that base was observed in the sequence. + * This allows us to support reduced reads in the ReadThreadingAssembler. Can be null, meaning that + * each base is only observed once. If not null, must have length == sequence.length. + * @param start the first base offset in sequence that we should use for constructing the graph using this sequence, inclusive + * @param stop the last base offset in sequence that we should use for constructing the graph using this sequence, exclusive + * @param isRef is this the reference sequence. + */ + public void addSequence(final String seqName, final String sampleName, final byte[] sequence, final int start, final int stop, final int[] counts, final boolean isRef) { + // note that argument testing is taken care of in SequenceForKmers + if ( alreadyBuilt ) throw new IllegalStateException("Graph already built"); + + // get the list of sequences for this sample + List sampleSequences = pending.get(sampleName); + if ( sampleSequences == null ) { // need to create + sampleSequences = new LinkedList<>(); + pending.put(sampleName, sampleSequences); + } + + // add the new sequence to the list of sequences for sample + sampleSequences.add(new SequenceForKmers(seqName, sequence, start, stop, counts, isRef)); + } + + /** + * Return a count appropriate for a kmer starting at kmerStart in sequence for kmers + * + * @param seqForKmers a non-null sequence for kmers object + * @param kmerStart the position where the kmer starts in sequence + * @return a count for a kmer from start -> start + kmerSize in seqForKmers + */ + private int getCountGivenKmerStart(final SequenceForKmers seqForKmers, final int kmerStart) { + return seqForKmers.getCount(kmerStart + kmerSize - 1); + } + + /** + * Thread sequence seqForKmers through the current graph, updating the graph as appropriate + * @param seqForKmers a non-null sequence + */ + private void threadSequence(final SequenceForKmers seqForKmers) { + final Pair startingInfo = findStart(seqForKmers); + if ( startingInfo == null ) + return; + + final MultiDeBruijnVertex startingVertex = startingInfo.getFirst(); + final int uniqueStartPos = startingInfo.getSecond(); + + // increase the counts of all edges incoming into the starting vertex supported by going back in sequence + if ( increaseCountsBackwards ) + increaseCountsInMatchedKmers(seqForKmers, startingVertex, startingVertex.getSequence(), kmerSize - 2); + + if ( debugGraphTransformations ) startingVertex.addRead(seqForKmers.name); + + // keep track of information about the reference kmers for merging dangling tails + if ( seqForKmers.isRef ) { + if ( refSource != null ) throw new IllegalStateException("Found two refSources! prev " + refSource + " new is " + startingVertex); + refSource = new Kmer(seqForKmers.sequence, seqForKmers.start, kmerSize); + refSeq = seqForKmers.sequence; + refKmers = new MultiDeBruijnVertex[refSeq.length]; + for ( int i = 0; i < kmerSize; i++ ) refKmers[i] = null; + } + + // loop over all of the bases in sequence, extending the graph by one base at each point, as appropriate + MultiDeBruijnVertex vertex = startingVertex; + for ( int i = uniqueStartPos + 1; i <= seqForKmers.stop - kmerSize; i++ ) { + final int count = getCountGivenKmerStart(seqForKmers, i); + + vertex = extendChainByOne(vertex, seqForKmers.sequence, i, count, seqForKmers.isRef); + if ( debugGraphTransformations ) vertex.addRead(seqForKmers.name); + + // keep track of the reference kmers for merging dangling tails + if ( seqForKmers.isRef ) refKmers[i + kmerSize - 1] = vertex; + } + } + + /** + * Attempt to attach vertex with out-degree == 0 to the graph by finding a unique matching kmer to the reference + * @param vertex the vertex to recover + */ + protected int recoverDanglingChain(final MultiDeBruijnVertex vertex) { + if ( outDegreeOf(vertex) != 0 ) throw new IllegalStateException("Attempting to recover a dangling tail for " + vertex + " but it has out-degree > 0"); + + final byte[] kmer = vertex.getSequence(); + if ( ! nonUniqueKmers.contains(new Kmer(kmer)) ) { + // don't attempt to fix non-unique kmers! + final MultiDeBruijnVertex uniqueMergePoint = danglingTailMergePoint(kmer); + if ( uniqueMergePoint != null ) { + addEdge(vertex, uniqueMergePoint, new MultiSampleEdge(false, 1)); + return 1; + } + } + + return 0; + } + + /** + * Find a unique merge point for kmer in the reference sequence + * @param kmer the full kmer of the dangling tail + * @return a vertex appropriate to merge kmer into, or null if none could be found + */ + private MultiDeBruijnVertex danglingTailMergePoint(final byte[] kmer) { + final PrimitivePair.Int endAndLength = GraphUtils.findLongestUniqueSuffixMatch(refSeq, kmer); + if ( endAndLength != null && endAndLength.second >= MIN_MATCH_LENGTH_TO_RECOVER_DANGLING_TAIL && endAndLength.first + 1 < refKmers.length) { + final int len = endAndLength.second; + final MultiDeBruijnVertex mergePoint = refKmers[endAndLength.first + 1]; +// logger.info("recoverDanglingChain of kmer " + new String(kmer) + " merged to " + mergePoint + " with match size " + len); + final Set nonUniquesAtLength = determineKmerSizeAndNonUniques(len, len).nonUniques; + final Kmer matchedKmer = new Kmer(kmer, kmer.length - len, len); + if ( nonUniquesAtLength.contains(matchedKmer) ) { +// logger.info("Rejecting merge " + new String(kmer) + " because match kmer " + matchedKmer + " isn't unique across all reads"); + return null; + } else { + return mergePoint; + } + } + + return null; + } + + /** + * Build the read threaded assembly graph if it hasn't already been constructed from the sequences that have + * been added to the graph. + */ + public void buildGraphIfNecessary() { + if ( alreadyBuilt ) return; + + // determine the kmer size we'll uses, and capture the set of nonUniques for that kmer size + final NonUniqueResult result = determineKmerSizeAndNonUniques(kmerSize, kmerSize); + nonUniqueKmers = result.nonUniques; + + if ( DEBUG_NON_UNIQUE_CALC ) { + logger.info("using " + kmerSize + " kmer size for this assembly with the following non-uniques"); + } + + // go through the pending sequences, and add them to the graph + for ( final List sequencesForSample : pending.values() ) { + for ( final SequenceForKmers sequenceForKmers : sequencesForSample ) { + threadSequence(sequenceForKmers); + if ( WRITE_GRAPH ) printGraph(new File("threading." + counter++ + "." + sequenceForKmers.name.replace(" ", "_") + ".dot"), 0); + } + + // flush the single sample edge values from the graph + for ( final MultiSampleEdge e : edgeSet() ) e.flushSingleSampleMultiplicity(); + } + + // clear + pending.clear(); + alreadyBuilt = true; + } + + public void recoverDanglingTails() { + if ( ! alreadyBuilt ) throw new IllegalStateException("recoverDanglingTails requires the graph be already built"); + + int attempted = 0; + int nRecovered = 0; + for ( final MultiDeBruijnVertex v : vertexSet() ) { + if ( outDegreeOf(v) == 0 && ! isRefNodeAndRefSink(v) ) { + attempted++; + nRecovered += recoverDanglingChain(v); + } + } + //logger.info("Recovered " + nRecovered + " of " + attempted + " dangling tails"); + } + + /** structure that keeps track of the non-unique kmers for a given kmer size */ + private static class NonUniqueResult { + final Set nonUniques; + final int kmerSize; + + private NonUniqueResult(Set nonUniques, int kmerSize) { + this.nonUniques = nonUniques; + this.kmerSize = kmerSize; + } + } + + /** + * Compute the smallest kmer size >= minKmerSize and <= maxKmerSize that has no non-unique kmers + * among all sequences added to the current graph. Will always return a result for maxKmerSize if + * all smaller kmers had non-unique kmers. + * + * @param minKmerSize the minimum kmer size to consider when constructing the graph + * @param maxKmerSize the maximum kmer size to consider + * @return a non-null NonUniqueResult + */ + protected NonUniqueResult determineKmerSizeAndNonUniques(final int minKmerSize, final int maxKmerSize) { + final Collection withNonUniques = getAllPendingSequences(); + final Set nonUniqueKmers = new HashSet(); + + // go through the sequences and determine which kmers aren't unique within each read + int kmerSize = minKmerSize; + for ( ; kmerSize <= maxKmerSize; kmerSize++) { + // clear out set of non-unique kmers + nonUniqueKmers.clear(); + + // loop over all sequences that have non-unique kmers in them from the previous iterator + final Iterator it = withNonUniques.iterator(); + while ( it.hasNext() ) { + final SequenceForKmers sequenceForKmers = it.next(); + + // determine the non-unique kmers for this sequence + final Collection nonUniquesFromSeq = determineNonUniqueKmers(sequenceForKmers, kmerSize); + if ( nonUniquesFromSeq.isEmpty() ) { + // remove this sequence from future consideration + it.remove(); + } else { + // keep track of the non-uniques for this kmerSize, and keep it in the list of sequences that have non-uniques + nonUniqueKmers.addAll(nonUniquesFromSeq); + } + } + + if ( nonUniqueKmers.isEmpty() ) + // this kmerSize produces no non-unique sequences, so go ahead and use it for our assembly + break; + } + + // necessary because the loop breaks with kmerSize = max + 1 + return new NonUniqueResult(nonUniqueKmers, Math.min(kmerSize, maxKmerSize)); + } + + /** + * Get the collection of all sequences for kmers across all samples in no particular order + * @return non-null Collection + */ + private Collection getAllPendingSequences() { + final LinkedList result = new LinkedList(); + for ( final List oneSampleWorth : pending.values() ) result.addAll(oneSampleWorth); + return result; + } + + /** + * Get the collection of non-unique kmers from sequence for kmer size kmerSize + * @param seqForKmers a sequence to get kmers from + * @param kmerSize the size of the kmers + * @return a non-null collection of non-unique kmers in sequence + */ + private Collection determineNonUniqueKmers(final SequenceForKmers seqForKmers, final int kmerSize) { + // count up occurrences of kmers within each read + final KMerCounter counter = new KMerCounter(kmerSize); + for ( int i = 0; i <= seqForKmers.stop - kmerSize; i++ ) { + final Kmer kmer = new Kmer(seqForKmers.sequence, i, kmerSize); + counter.addKmer(kmer, 1); + } + + return counter.getKmersWithCountsAtLeast(2); + } + + /** + * Convert this kmer graph to a simple sequence graph. + * + * Each kmer suffix shows up as a distinct SeqVertex, attached in the same structure as in the kmer + * graph. Nodes that are sources are mapped to SeqVertex nodes that contain all of their sequence + * + * @return a newly allocated SequenceGraph + */ + // TODO -- should override base class method + public SeqGraph convertToSequenceGraph() { + buildGraphIfNecessary(); + + final SeqGraph seqGraph = new SeqGraph(kmerSize); + final Map vertexMap = new HashMap(); + + // create all of the equivalent seq graph vertices + for ( final MultiDeBruijnVertex dv : vertexSet() ) { + final SeqVertex sv = new SeqVertex(dv.getAdditionalSequence(isSource(dv))); + sv.setAdditionalInfo(dv.additionalInfo()); + vertexMap.put(dv, sv); + seqGraph.addVertex(sv); + } + + // walk through the nodes and connect them to their equivalent seq vertices + for( final MultiSampleEdge e : edgeSet() ) { + final SeqVertex seqInV = vertexMap.get(getEdgeSource(e)); + final SeqVertex seqOutV = vertexMap.get(getEdgeTarget(e)); + //logger.info("Adding edge " + seqInV + " -> " + seqOutV); + seqGraph.addEdge(seqInV, seqOutV, new BaseEdge(e.isRef(), e.getMultiplicity())); + } + + return seqGraph; + } + + private void increaseCountsInMatchedKmers(final SequenceForKmers seqForKmers, + final MultiDeBruijnVertex vertex, + final byte[] originalKmer, + final int offset) { + if ( offset == -1 ) return; + + for ( final MultiSampleEdge edge : incomingEdgesOf(vertex) ) { + final MultiDeBruijnVertex prev = getEdgeSource(edge); + final byte suffix = prev.getSuffix(); + final byte seqBase = originalKmer[offset]; +// logger.warn(String.format("Increasing counts for %s -> %s via %s at %d with suffix %s vs. %s", +// prev, vertex, edge, offset, (char)suffix, (char)seqBase)); + if ( suffix == seqBase && (increaseCountsThroughBranches || inDegreeOf(vertex) == 1) ) { + edge.incMultiplicity(seqForKmers.getCount(offset)); + increaseCountsInMatchedKmers(seqForKmers, prev, originalKmer, offset-1); + } + } + } + + /** + * Find vertex and its position in seqForKmers where we should start assembling seqForKmers + * + * @param seqForKmers the sequence we want to thread into the graph + * @return a pair of the starting vertex and its position in seqForKmer + */ + private Pair findStart(final SequenceForKmers seqForKmers) { + final int uniqueStartPos = seqForKmers.isRef ? 0 : findUniqueStartPosition(seqForKmers.sequence, seqForKmers.start, seqForKmers.stop); + + if ( uniqueStartPos == -1 ) + return null; + + return getOrCreateKmerVertex(seqForKmers.sequence, uniqueStartPos, true); + } + + /** + * Find a starting point in sequence that begins a unique kmer among all kmers in the graph + * @param sequence the sequence of bases + * @param start the first base to use in sequence + * @param stop the last base to use in sequence + * @return the index into sequence that begins a unique kmer of size kmerSize, or -1 if none could be found + */ + private int findUniqueStartPosition(final byte[] sequence, final int start, final int stop) { + for ( int i = start; i < stop - kmerSize; i++ ) { + final Kmer kmer1 = new Kmer(sequence, i, kmerSize); + if ( uniqueKmers.containsKey(kmer1) ) + return i; + } + return -1; + } + + /** + * Get the vertex for the kmer in sequence starting at start + * @param sequence the sequence + * @param start the position of the kmer start + * @param allowRefSource if true, we will allow matches to the kmer that represents the reference starting kmer + * @return a non-null vertex + */ + private Pair getOrCreateKmerVertex(final byte[] sequence, final int start, final boolean allowRefSource) { + final Kmer kmer = new Kmer(sequence, start, kmerSize); + final MultiDeBruijnVertex vertex = getUniqueKmerVertex(kmer, allowRefSource); + if ( vertex != null ) { + return new Pair<>(vertex, start); + } else { + return new Pair<>(createVertex(kmer), start); + } + } + + /** + * Get the unique vertex for kmer, or null if not possible. + * + * @param allowRefSource if true, we will allow kmer to match the reference source vertex + * @return a vertex for kmer, or null if it's not unique + */ + private MultiDeBruijnVertex getUniqueKmerVertex(final Kmer kmer, final boolean allowRefSource) { + if ( ! allowRefSource && kmer.equals(refSource) ) return null; + return uniqueKmers.get(kmer); + } + + /** + * Create a new vertex for kmer. Add it to the uniqueKmers map if appropriate. + * + * kmer must not have a entry in unique kmers, or an error will be thrown + * + * @param kmer the kmer we want to create a vertex for + * @return the non-null created vertex + */ + private MultiDeBruijnVertex createVertex(final Kmer kmer) { + final MultiDeBruijnVertex newVertex = new MultiDeBruijnVertex(kmer.bases()); + final int prevSize = vertexSet().size(); + addVertex(newVertex); + + // make sure we aren't adding duplicates (would be a bug) + if ( vertexSet().size() != prevSize + 1) throw new IllegalStateException("Adding vertex " + newVertex + " to graph didn't increase the graph size"); + + // add the vertex to the unique kmer map, if it is in fact unique + if ( ! nonUniqueKmers.contains(kmer) && ! uniqueKmers.containsKey(kmer) ) // TODO -- not sure this last test is necessary + uniqueKmers.put(kmer, newVertex); + + return newVertex; + } + + /** + * Workhorse routine of the assembler. Given a sequence whose last vertex is anchored in the graph, extend + * the graph one bp according to the bases in sequence. + * + * @param prevVertex a non-null vertex where sequence was last anchored in the graph + * @param sequence the sequence we're threading through the graph + * @param kmerStart the start of the current kmer in graph we'd like to add + * @param count the number of observations of this kmer in graph (can be > 1 for reduced reads) + * @param isRef is this the reference sequence? + * @return a non-null vertex connecting prevVertex to in the graph based on sequence + */ + private MultiDeBruijnVertex extendChainByOne(final MultiDeBruijnVertex prevVertex, final byte[] sequence, final int kmerStart, final int count, final boolean isRef) { + final Set outgoingEdges = outgoingEdgesOf(prevVertex); + + final int nextPos = kmerStart + kmerSize - 1; + for ( final MultiSampleEdge outgoingEdge : outgoingEdges ) { + final MultiDeBruijnVertex target = getEdgeTarget(outgoingEdge); + if ( target.getSuffix() == sequence[nextPos] ) { + // we've got a match in the chain, so simply increase the count of the edge by 1 and continue + outgoingEdge.incMultiplicity(count); + return target; + } + } + + // none of our outgoing edges had our unique suffix base, so we check for an opportunity to merge back in + final Kmer kmer = new Kmer(sequence, kmerStart, kmerSize); + MultiDeBruijnVertex uniqueMergeVertex = getUniqueKmerVertex(kmer, false); + + if ( isRef && uniqueMergeVertex != null ) + throw new IllegalStateException("Found a unique vertex to merge into the reference graph " + prevVertex + " -> " + uniqueMergeVertex); + + // either use our unique merge vertex, or create a new one in the chain + final MultiDeBruijnVertex nextVertex = uniqueMergeVertex == null ? createVertex(kmer) : uniqueMergeVertex; + addEdge(prevVertex, nextVertex, new MultiSampleEdge(isRef, count)); + return nextVertex; + } + + /** + * Get the start and stop positions (exclusive) of the longest stretch of high quality bases + * in read + * + * @param read a non-null read + * @return the start and stop for high quality bases in read, or null if none exist + */ + protected void addRead(final GATKSAMRecord read) { + final byte[] sequence = read.getReadBases(); + final byte[] qualities = read.getBaseQualities(); + final int[] reducedReadCounts = read.getReducedReadCounts(); // will be null if read is not reduced + + int lastGood = -1; // the index of the last good base we've seen + for( int end = 0; end <= sequence.length; end++ ) { + if ( end == sequence.length || qualities[end] < minBaseQualityToUseInAssembly ) { + // the first good base is at lastGood, can be -1 if last base was bad + final int start = lastGood; + // the stop base is end - 1 (if we're not at the end of the sequence) + final int stop = end == sequence.length ? sequence.length : end; + final int len = stop - start + 1; + + if ( start != -1 && len >= kmerSize ) { + // if the sequence is long enough to get some value out of, add it to the graph + final String name = read.getReadName() + "_" + start + "_" + end; + addSequence(name, read.getReadGroup().getSample(), read.getReadBases(), start, stop, reducedReadCounts, false); + } + + lastGood = -1; // reset the last good base + } else if ( lastGood == -1 ) { + lastGood = end; // we're at a good base, the last good one is us + } + } + } + + /** + * Get the set of non-unique kmers in this graph. For debugging purposes + * @return a non-null set of kmers + */ + protected Set getNonUniqueKmers() { + return nonUniqueKmers; + } + + @Override + public String toString() { + return "ReadThreadingAssembler{" + + "kmerSize=" + kmerSize + + '}'; + } +} \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/SequenceForKmers.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/SequenceForKmers.java new file mode 100644 index 000000000..a4bc0c1c8 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/SequenceForKmers.java @@ -0,0 +1,93 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading; + +/** + * Keeps track of the information needed to add a sequence to the read threading assembly graph + * + * User: depristo + * Date: 4/18/13 + * Time: 8:59 AM + * To change this template use File | Settings | File Templates. + */ +final class SequenceForKmers { + final String name; + final byte[] sequence; + final int start, stop; + final private int[] counts; + final boolean isRef; + + /** + * Create a new sequence for creating kmers + */ + SequenceForKmers(final String name, byte[] sequence, int start, int stop, int[] counts, boolean ref) { + if ( start < 0 ) throw new IllegalArgumentException("Invalid start " + start); + if ( stop < start ) throw new IllegalArgumentException("Invalid stop " + stop); + if ( sequence == null ) throw new IllegalArgumentException("Sequence is null "); + if ( counts != null && counts.length != sequence.length ) throw new IllegalArgumentException("Sequence and counts don't have the same length " + sequence.length + " vs " + counts.length); + + this.name = name; + this.sequence = sequence; + this.start = start; + this.stop = stop; + this.isRef = ref; + this.counts = counts; + } + + /** + * Get the number of observations of the kmer starting at i in this sequence + * + * Can we > 1 because sequence may be a reduced read and therefore count as N observations + * + * @param i the offset into sequence for the start of the kmer + * @return a count >= 1 that indicates the number of observations of kmer starting at i in this sequence. + */ + public int getCount(final int i) { + if ( i < 0 || i > sequence.length ) throw new ArrayIndexOutOfBoundsException("i must be >= 0 and <= " + sequence.length + " but got " + i); + return counts == null ? 1 : counts[i]; + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java index e1559a13a..c5574577d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java @@ -80,59 +80,6 @@ public class DeBruijnAssemblerUnitTest extends BaseTest { Assert.assertTrue(g2 != null, "Reference non-cycle graph should not return null during creation."); } - @Test(enabled = !DEBUG) - public void testLeftAlignCigarSequentially() { - String preRefString = "GATCGATCGATC"; - String postRefString = "TTT"; - String refString = "ATCGAGGAGAGCGCCCCG"; - String indelString1 = "X"; - String indelString2 = "YZ"; - int refIndel1 = 10; - int refIndel2 = 12; - - for ( final int indelSize1 : Arrays.asList(1, 2, 3, 4) ) { - for ( final int indelOp1 : Arrays.asList(1, -1) ) { - for ( final int indelSize2 : Arrays.asList(1, 2, 3, 4) ) { - for ( final int indelOp2 : Arrays.asList(1, -1) ) { - - Cigar expectedCigar = new Cigar(); - expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M)); - expectedCigar.add(new CigarElement(indelSize1, (indelOp1 > 0 ? CigarOperator.I : CigarOperator.D))); - expectedCigar.add(new CigarElement((indelOp1 < 0 ? refIndel1 - indelSize1 : refIndel1), CigarOperator.M)); - expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M)); - expectedCigar.add(new CigarElement(indelSize2 * 2, (indelOp2 > 0 ? CigarOperator.I : CigarOperator.D))); - expectedCigar.add(new CigarElement((indelOp2 < 0 ? (refIndel2 - indelSize2) * 2 : refIndel2 * 2), CigarOperator.M)); - expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M)); - - Cigar givenCigar = new Cigar(); - givenCigar.add(new CigarElement(refString.length() + refIndel1/2, CigarOperator.M)); - givenCigar.add(new CigarElement(indelSize1, (indelOp1 > 0 ? CigarOperator.I : CigarOperator.D))); - givenCigar.add(new CigarElement((indelOp1 < 0 ? (refIndel1/2 - indelSize1) : refIndel1/2) + refString.length() + refIndel2/2 * 2, CigarOperator.M)); - givenCigar.add(new CigarElement(indelSize2 * 2, (indelOp2 > 0 ? CigarOperator.I : CigarOperator.D))); - givenCigar.add(new CigarElement((indelOp2 < 0 ? (refIndel2/2 - indelSize2) * 2 : refIndel2/2 * 2) + refString.length(), CigarOperator.M)); - - String theRef = preRefString + refString + Utils.dupString(indelString1, refIndel1) + refString + Utils.dupString(indelString2, refIndel2) + refString + postRefString; - String theRead = refString + Utils.dupString(indelString1, refIndel1 + indelOp1 * indelSize1) + refString + Utils.dupString(indelString2, refIndel2 + indelOp2 * indelSize2) + refString; - - Cigar calculatedCigar = new DeBruijnAssembler().leftAlignCigarSequentially(AlignmentUtils.consolidateCigar(givenCigar), theRef.getBytes(), theRead.getBytes(), preRefString.length(), 0); - Assert.assertEquals(AlignmentUtils.consolidateCigar(calculatedCigar).toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar strings do not match!"); - } - } - } - } - } - - @Test(enabled = true) - public void testLeftAlignCigarSequentiallyAdjacentID() { - final String ref = "GTCTCTCTCTCTCTCTCTATATATATATATATATTT"; - final String hap = "GTCTCTCTCTCTCTCTCTCTCTATATATATATATTT"; - final Cigar originalCigar = TextCigarCodec.getSingleton().decode("18M4I12M4D2M"); - - final Cigar result = new DeBruijnAssembler().leftAlignCigarSequentially(originalCigar, ref.getBytes(), hap.getBytes(), 0, 0); - logger.warn("Result is " + result); - Assert.assertEquals(originalCigar.getReferenceLength(), result.getReferenceLength(), "Reference lengths are different"); - } - private static class MockBuilder extends DeBruijnGraphBuilder { public final List addedPairs = new LinkedList(); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 9d4c52798..d6c6a4f33 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "0bf5ae740bf9bd14c8d60d7849c45eb3"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "fc11b553fbf16beac0da04a69f419365"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "7d2cc5c4ece386beedf6b07dfbe5bf26"); + "90cbcc7e959eb591fb7c5e12d65e0e40"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "a17856f709b546eaed486841d78248d2"); + "50894abb9d156bf480881cb5cb2a8a7d"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index d5e163a88..15516d090 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -80,12 +80,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "2e10ab97afd4492c2a153b85871a2c2d"); + HCTest(CEUTRIO_BAM, "", "37e462379de17bc6c8aeeed6e9735dd3"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "affed81386dfe60e0b0d4e7e0525918f"); + HCTest(NA12878_BAM, "", "983a0d122714d4aa0ff7af20cc686703"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "e2d32d0dce2c5502a8e877f6bbb65a10"); + "dbbc884a975587d8e7255ce47b58f438"); } @Test @@ -112,7 +112,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "125e91ebe43108b2b514c58a9b6d3a4f"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "ce602282e80cca6d4272f940e20e90c3"); } private void HCTestNearbySmallIntervals(String bam, String args, String md5) { @@ -149,7 +149,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerNearbySmallIntervals() { - HCTestNearbySmallIntervals(NA12878_BAM, "", "2d295ce36066d9d8d9ee9c67e6e2cbd1"); + HCTestNearbySmallIntervals(NA12878_BAM, "", "09335c01d2e90714af7f4c91156da0b1"); } // This problem bam came from a user on the forum and it spotted a problem where the ReadClipper @@ -159,14 +159,14 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("0689d2c202849fd05617648eaf429b9a")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("b34ddc93a7b9919e05da499508f44dd9")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("153d2251de7d22f423cd282b1505fbc0")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("98a78b9f58ab197b827ef2ce3ab043d3")); executeTest("HCTestStructuralIndels: ", spec); } @@ -188,7 +188,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("0c29e4049908ec47a3159dce33d477c3")); + Arrays.asList("6e6ef6e0326bee6d20d9fd37349fdb8c")); executeTest("HC calling on a ReducedRead BAM", spec); } @@ -196,7 +196,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("3306889b8d0735ce575bee281c1b8846")); + Arrays.asList("5e535983b2f7e5fb6c84fecffa092324")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KMerCounterCaseFixUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KMerCounterCaseFixUnitTest.java index c049121a3..9b08e8214 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KMerCounterCaseFixUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KMerCounterCaseFixUnitTest.java @@ -1,48 +1,48 @@ /* - * By downloading the PROGRAM you agree to the following terms of use: - * - * BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY - * - * This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). - * - * WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and - * WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. - * NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: - * - * 1. DEFINITIONS - * 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. - * - * 2. LICENSE - * 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. - * The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. - * 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. - * 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. - * - * 3. OWNERSHIP OF INTELLECTUAL PROPERTY - * LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. - * Copyright 2012 Broad Institute, Inc. - * Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. - * LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. - * - * 4. INDEMNIFICATION - * LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. - * - * 5. NO REPRESENTATIONS OR WARRANTIES - * THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. - * IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. - * - * 6. ASSIGNMENT - * This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. - * - * 7. MISCELLANEOUS - * 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. - * 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. - * 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. - * 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. - * 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. - * 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. - * 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. - */ +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; @@ -50,6 +50,9 @@ import org.broadinstitute.sting.BaseTest; import org.testng.Assert; import org.testng.annotations.Test; +import java.util.HashSet; +import java.util.Set; + public class KMerCounterCaseFixUnitTest extends BaseTest { @Test public void testMyData() { @@ -76,6 +79,18 @@ public class KMerCounterCaseFixUnitTest extends BaseTest { testCounting(counter, "NNC", 0); Assert.assertNotNull(counter.toString()); + + assertCounts(counter, 5); + assertCounts(counter, 4, "ATG"); + assertCounts(counter, 3, "ATG", "ACC"); + assertCounts(counter, 2, "ATG", "ACC", "AAA"); + assertCounts(counter, 1, "ATG", "ACC", "AAA", "CTG", "NNA", "CCC"); + } + + private void assertCounts(final KMerCounter counter, final int minCount, final String ... expecteds) { + final Set expected = new HashSet(); + for ( final String one : expecteds ) expected.add(new Kmer(one)); + Assert.assertEquals(new HashSet(counter.getKmersWithCountsAtLeast(minCount)), expected); } private void testCounting(final KMerCounter counter, final String in, final int expectedCount) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java new file mode 100644 index 000000000..a517e1cb1 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java @@ -0,0 +1,280 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.SAMFileHeader; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.activeregion.ActiveRegion; +import org.broadinstitute.sting.utils.collections.PrimitivePair; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.haplotype.Haplotype; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.*; + +public class LocalAssemblyEngineUnitTest extends BaseTest { + private GenomeLocParser genomeLocParser; + private IndexedFastaSequenceFile seq; + private SAMFileHeader header; + + @BeforeClass + public void setup() throws FileNotFoundException { + seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); + genomeLocParser = new GenomeLocParser(seq); + header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary()); + } + + private enum Assembler {DEBRUIJN_ASSEMBLER, READ_THREADING_ASSEMBLER} + private LocalAssemblyEngine createAssembler(final Assembler type) { + switch ( type ) { + case DEBRUIJN_ASSEMBLER: return new DeBruijnAssembler(); + case READ_THREADING_ASSEMBLER: return new ReadThreadingAssembler(); + default: throw new IllegalStateException("Unexpected " + type); + } + } + + @DataProvider(name = "AssembleIntervalsData") + public Object[][] makeAssembleIntervalsData() { + List tests = new ArrayList(); + + final String contig = "20"; + final int start = 10000000; + final int end = 10100000; + final int windowSize = 100; + final int stepSize = 200; + final int nReadsToUse = 5; + + for ( final Assembler assembler : Assembler.values() ) { + for ( int startI = start; startI < end; startI += stepSize) { + final int endI = startI + windowSize; + final GenomeLoc refLoc = genomeLocParser.createGenomeLoc(contig, startI, endI); + tests.add(new Object[]{assembler, refLoc, nReadsToUse}); + } + } + + return tests.toArray(new Object[][]{}); + } + + @DataProvider(name = "AssembleIntervalsWithVariantData") + public Object[][] makeAssembleIntervalsWithVariantData() { + List tests = new ArrayList(); + + final String contig = "20"; + final int start = 10000000; + final int end = 10001000; + final int windowSize = 100; + final int stepSize = 200; + final int variantStepSize = 1; + final int nReadsToUse = 5; + + for ( final Assembler assembler : Assembler.values() ) { + for ( int startI = start; startI < end; startI += stepSize) { + final int endI = startI + windowSize; + final GenomeLoc refLoc = genomeLocParser.createGenomeLoc(contig, startI, endI); + for ( int variantStart = windowSize / 2 - 10; variantStart < windowSize / 2 + 10; variantStart += variantStepSize ) { + tests.add(new Object[]{assembler, refLoc, nReadsToUse, variantStart}); + } + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "AssembleIntervalsData") + public void testAssembleRef(final Assembler assembler, final GenomeLoc loc, final int nReadsToUse) { + final byte[] refBases = seq.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases(); + + final List reads = new LinkedList(); + for ( int i = 0; i < nReadsToUse; i++ ) { + final byte[] bases = refBases.clone(); + final byte[] quals = Utils.dupBytes((byte) 30, refBases.length); + final String cigar = refBases.length + "M"; + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, loc.getContig(), loc.getContigIndex(), loc.getStart(), bases, quals, cigar); + reads.add(read); + } + + // TODO -- generalize to all assemblers + final Haplotype refHaplotype = new Haplotype(refBases, true); + final List haplotypes = assemble(assembler, refBases, loc, reads); + Assert.assertEquals(haplotypes, Collections.singletonList(refHaplotype)); + } + + @Test(dataProvider = "AssembleIntervalsWithVariantData") + public void testAssembleRefAndSNP(final Assembler assembler, final GenomeLoc loc, final int nReadsToUse, final int variantSite) { + final byte[] refBases = seq.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases(); + final Allele refBase = Allele.create(refBases[variantSite], true); + final Allele altBase = Allele.create((byte)(refBase.getBases()[0] == 'A' ? 'C' : 'A'), false); + final VariantContextBuilder vcb = new VariantContextBuilder("x", loc.getContig(), variantSite, variantSite, Arrays.asList(refBase, altBase)); + testAssemblyWithVariant(assembler, refBases, loc, nReadsToUse, vcb.make()); + } + + @Test(dataProvider = "AssembleIntervalsWithVariantData") + public void testAssembleRefAndDeletion(final Assembler assembler, final GenomeLoc loc, final int nReadsToUse, final int variantSite) { + final byte[] refBases = seq.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases(); + for ( int deletionLength = 1; deletionLength < 10; deletionLength++ ) { + final Allele refBase = Allele.create(new String(refBases).substring(variantSite, variantSite + deletionLength + 1), true); + final Allele altBase = Allele.create(refBase.getBases()[0], false); + final VariantContextBuilder vcb = new VariantContextBuilder("x", loc.getContig(), variantSite, variantSite + deletionLength, Arrays.asList(refBase, altBase)); + testAssemblyWithVariant(assembler, refBases, loc, nReadsToUse, vcb.make()); + } + } + + @Test(dataProvider = "AssembleIntervalsWithVariantData") + public void testAssembleRefAndInsertion(final Assembler assembler, final GenomeLoc loc, final int nReadsToUse, final int variantSite) { + final byte[] refBases = seq.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases(); + for ( int insertionLength = 1; insertionLength < 10; insertionLength++ ) { + final Allele refBase = Allele.create(refBases[variantSite], false); + final Allele altBase = Allele.create(new String(refBases).substring(variantSite, variantSite + insertionLength + 1), true); + final VariantContextBuilder vcb = new VariantContextBuilder("x", loc.getContig(), variantSite, variantSite + insertionLength, Arrays.asList(refBase, altBase)); + testAssemblyWithVariant(assembler, refBases, loc, nReadsToUse, vcb.make()); + } + } + + private void testAssemblyWithVariant(final Assembler assembler, final byte[] refBases, final GenomeLoc loc, final int nReadsToUse, final VariantContext site) { + final String preRef = new String(refBases).substring(0, site.getStart()); + final String postRef = new String(refBases).substring(site.getEnd() + 1, refBases.length); + final byte[] altBases = (preRef + site.getAlternateAllele(0).getBaseString() + postRef).getBytes(); + +// logger.warn("ref " + new String(refBases)); +// logger.warn("alt " + new String(altBases)); + + final List reads = new LinkedList(); + for ( int i = 0; i < nReadsToUse; i++ ) { + final byte[] bases = altBases.clone(); + final byte[] quals = Utils.dupBytes((byte) 30, altBases.length); + final String cigar = altBases.length + "M"; + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, loc.getContig(), loc.getContigIndex(), loc.getStart(), bases, quals, cigar); + reads.add(read); + } + + final Haplotype refHaplotype = new Haplotype(refBases, true); + final Haplotype altHaplotype = new Haplotype(altBases, false); + final List haplotypes = assemble(assembler, refBases, loc, reads); + Assert.assertEquals(haplotypes, Arrays.asList(refHaplotype, altHaplotype)); + } + + + private List assemble(final Assembler assembler, final byte[] refBases, final GenomeLoc loc, final List reads) { + final Haplotype refHaplotype = new Haplotype(refBases, true); + final ActiveRegion activeRegion = new ActiveRegion(loc, null, true, genomeLocParser, 0); + activeRegion.addAll(reads); + final LocalAssemblyEngine engine = createAssembler(assembler); +// logger.warn("Assembling " + activeRegion + " with " + engine); + return engine.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, Collections.emptyList()); + } + + @DataProvider(name = "SimpleAssemblyTestData") + public Object[][] makeSimpleAssemblyTestData() { + List tests = new ArrayList(); + + final String contig = "20"; + final int start = 10000000; + final int windowSize = 200; + final int end = start + windowSize; + + final Map edgeExcludesByAssembler = new EnumMap<>(Assembler.class); + edgeExcludesByAssembler.put(Assembler.DEBRUIJN_ASSEMBLER, 26); + edgeExcludesByAssembler.put(Assembler.READ_THREADING_ASSEMBLER, 25); // TODO -- decrease to zero when the edge calling problem is fixed + + final String ref = new String(seq.getSubsequenceAt(contig, start, end).getBases()); + final GenomeLoc refLoc = genomeLocParser.createGenomeLoc(contig, start, end); + + for ( final Assembler assembler : Assembler.values() ) { + final int excludeVariantsWithXbp = edgeExcludesByAssembler.get(assembler); + for ( int snpPos = 0; snpPos < windowSize; snpPos++) { + if ( snpPos > excludeVariantsWithXbp && (windowSize - snpPos) >= excludeVariantsWithXbp ) { + final byte[] altBases = ref.getBytes(); + altBases[snpPos] = 'N'; + final String alt = new String(altBases); + tests.add(new Object[]{"SNP at " + snpPos, assembler, refLoc, ref, alt}); + } + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "SimpleAssemblyTestData") + public void testSimpleAssembly(final String name, final Assembler assembler, final GenomeLoc loc, final String ref, final String alt) { + final byte[] refBases = ref.getBytes(); + final byte[] altBases = alt.getBytes(); + + final List reads = new LinkedList<>(); + for ( int i = 0; i < 20; i++ ) { + final byte[] bases = altBases.clone(); + final byte[] quals = Utils.dupBytes((byte) 30, altBases.length); + final String cigar = altBases.length + "M"; + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, loc.getContig(), loc.getContigIndex(), loc.getStart(), bases, quals, cigar); + reads.add(read); + } + + final Haplotype refHaplotype = new Haplotype(refBases, true); + final Haplotype altHaplotype = new Haplotype(altBases, false); + final List haplotypes = assemble(assembler, refBases, loc, reads); + Assert.assertTrue(haplotypes.size() > 0, "Failed to find ref haplotype"); + Assert.assertEquals(haplotypes.get(0), refHaplotype); + + Assert.assertEquals(haplotypes.size(), 2, "Failed to find single alt haplotype"); + Assert.assertEquals(haplotypes.get(1), altHaplotype); + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseEdgeUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseEdgeUnitTest.java index 7df6ee6c8..ea1d120b6 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseEdgeUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseEdgeUnitTest.java @@ -83,7 +83,10 @@ public class BaseEdgeUnitTest extends BaseTest { e.setMultiplicity(mult + 1); Assert.assertEquals(e.getMultiplicity(), mult + 1); - final BaseEdge copy = new BaseEdge(e); + e.incMultiplicity(2); + Assert.assertEquals(e.getMultiplicity(), mult + 3); + + final BaseEdge copy = e.copy(); Assert.assertEquals(copy.isRef(), e.isRef()); Assert.assertEquals(copy.getMultiplicity(), e.getMultiplicity()); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraphUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraphUnitTest.java index c829488ba..e57f5d6e0 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraphUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraphUnitTest.java @@ -49,8 +49,8 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; import org.broadinstitute.sting.BaseTest; import org.testng.Assert; import org.testng.annotations.BeforeMethod; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import scala.actors.threadpool.Arrays; import java.io.File; import java.util.*; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java index 8682ae5e4..cfed2f0b8 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java @@ -137,12 +137,12 @@ public class CommonSuffixMergerUnitTest extends BaseTest { public static void assertSameHaplotypes(final String name, final SeqGraph actual, final SeqGraph original) { try { final Set haplotypes = new HashSet(); - final List> originalPaths = new KBestPaths().getKBestPaths(original); - for ( final Path path : originalPaths ) + final List> originalPaths = new KBestPaths().getKBestPaths(original); + for ( final Path path : originalPaths ) haplotypes.add(new String(path.getBases())); - final List> splitPaths = new KBestPaths().getKBestPaths(actual); - for ( final Path path : splitPaths ) { + final List> splitPaths = new KBestPaths().getKBestPaths(actual); + for ( final Path path : splitPaths ) { final String h = new String(path.getBases()); Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitterUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitterUnitTest.java index 1ed20e5f4..9703d76cb 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitterUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitterUnitTest.java @@ -154,16 +154,16 @@ public class CommonSuffixSplitterUnitTest extends BaseTest { original.addEdge(v3, v4, new BaseEdge(false, 34)); original.addEdge(v4, v2, new BaseEdge(false, 42)); - original.printGraph(new File("testSplitInfiniteCycleFailure.dot"), 0); +// original.printGraph(new File("testSplitInfiniteCycleFailure.dot"), 0); final SeqGraph graph = (SeqGraph)original.clone(); final boolean success = new CommonSuffixSplitter().split(graph, v2); Assert.assertTrue(success); for ( final SeqVertex v : graph.vertexSet() ) { - graph.printGraph(new File("testSplitInfiniteCycleFailure.first_split.dot"), 0); +// graph.printGraph(new File("testSplitInfiniteCycleFailure.first_split.dot"), 0); final boolean success2 = new CommonSuffixSplitter().split((SeqGraph)graph.clone(), v); - if ( success2 ) graph.printGraph(new File("testSplitInfiniteCycleFailure.fail.dot"), 0); +// if ( success2 ) graph.printGraph(new File("testSplitInfiniteCycleFailure.fail.dot"), 0); Assert.assertFalse(success2, "Shouldn't be able to split any vertices but CommonSuffixSplitter says it could for " + v); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/GraphUtilsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/GraphUtilsUnitTest.java new file mode 100644 index 000000000..01a6b5dbb --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/GraphUtilsUnitTest.java @@ -0,0 +1,120 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.collections.PrimitivePair; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class GraphUtilsUnitTest extends BaseTest { + @DataProvider(name = "findLongestUniqueMatchData") + public Object[][] makefindLongestUniqueMatchData() { + List tests = new ArrayList(); + + { // test all edge conditions + final String ref = "ACGT"; + for ( int start = 0; start < ref.length(); start++ ) { + for ( int end = start + 1; end <= ref.length(); end++ ) { + final String kmer = ref.substring(start, end); + tests.add(new Object[]{ref, kmer, end - 1, end - start}); + tests.add(new Object[]{ref, "N" + kmer, end - 1, end - start}); + tests.add(new Object[]{ref, "NN" + kmer, end - 1, end - start}); + tests.add(new Object[]{ref, kmer + "N", -1, 0}); + tests.add(new Object[]{ref, kmer + "NN", -1, 0}); + } + } + } + + { // multiple matches + final String ref = "AACCGGTT"; + for ( final String alt : Arrays.asList("A", "C", "G", "T") ) + tests.add(new Object[]{ref, alt, -1, 0}); + tests.add(new Object[]{ref, "AA", 1, 2}); + tests.add(new Object[]{ref, "CC", 3, 2}); + tests.add(new Object[]{ref, "GG", 5, 2}); + tests.add(new Object[]{ref, "TT", 7, 2}); + } + + { // complex matches that have unique substrings of lots of parts of kmer in the ref + final String ref = "ACGTACGTACGT"; + tests.add(new Object[]{ref, "ACGT", -1, 0}); + tests.add(new Object[]{ref, "TACGT", -1, 0}); + tests.add(new Object[]{ref, "GTACGT", -1, 0}); + tests.add(new Object[]{ref, "CGTACGT", -1, 0}); + tests.add(new Object[]{ref, "ACGTACGT", -1, 0}); + tests.add(new Object[]{ref, "TACGTACGT", 11, 9}); + tests.add(new Object[]{ref, "NTACGTACGT", 11, 9}); + tests.add(new Object[]{ref, "GTACGTACGT", 11, 10}); + tests.add(new Object[]{ref, "NGTACGTACGT", 11, 10}); + tests.add(new Object[]{ref, "CGTACGTACGT", 11, 11}); + } + + return tests.toArray(new Object[][]{}); + } + + /** + * Example testng test using MyDataProvider + */ + @Test(dataProvider = "findLongestUniqueMatchData") + public void testfindLongestUniqueMatch(final String seq, final String kmer, final int start, final int length) { + // adaptor this code to do whatever testing you want given the arguments start and size + final PrimitivePair.Int actual = GraphUtils.findLongestUniqueSuffixMatch(seq.getBytes(), kmer.getBytes()); + if ( start == -1 ) + Assert.assertNull(actual); + else { + Assert.assertNotNull(actual); + Assert.assertEquals(actual.first, start); + Assert.assertEquals(actual.second, length); + } + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestPathsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestPathsUnitTest.java index d1bae74b2..d6709672a 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestPathsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestPathsUnitTest.java @@ -114,7 +114,7 @@ public class KBestPathsUnitTest extends BaseTest { if ( addCycle ) graph.addEdge(middleBottom, middleBottom); // enumerate all possible paths - final List> paths = new KBestPaths(allowCycles).getKBestPaths(graph, starts, ends); + final List> paths = new KBestPaths(allowCycles).getKBestPaths(graph, starts, ends); final int expectedNumOfPaths = nStartNodes * nBranchesPerBubble * (addCycle && allowCycles ? 2 : 1) * nEndNodes; Assert.assertEquals(paths.size(), expectedNumOfPaths, "Didn't find the expected number of paths"); @@ -127,7 +127,7 @@ public class KBestPathsUnitTest extends BaseTest { // get the best path, and make sure it's the same as our optimal path overall final Path best = paths.get(0); - final List> justOne = new KBestPaths(allowCycles).getKBestPaths(graph, 1, starts, ends); + final List> justOne = new KBestPaths(allowCycles).getKBestPaths(graph, 1, starts, ends); Assert.assertEquals(justOne.size(), 1); Assert.assertTrue(justOne.get(0).pathsAreTheSame(best), "Best path from complete enumerate " + best + " not the same as from k = 1 search " + justOne.get(0)); } @@ -147,7 +147,7 @@ public class KBestPathsUnitTest extends BaseTest { graph.addEdges(v4, v2); // enumerate all possible paths - final List> paths = new KBestPaths(false).getKBestPaths(graph, v1, v5); + final List> paths = new KBestPaths(false).getKBestPaths(graph, v1, v5); Assert.assertEquals(paths.size(), 1, "Didn't find the expected number of paths"); } @@ -163,7 +163,7 @@ public class KBestPathsUnitTest extends BaseTest { graph.addEdges(v1, v2, v3, v3); // enumerate all possible paths - final List> paths = new KBestPaths(false).getKBestPaths(graph, v1, v3); + final List> paths = new KBestPaths(false).getKBestPaths(graph, v1, v3); Assert.assertEquals(paths.size(), 1, "Didn't find the expected number of paths"); } @@ -201,9 +201,9 @@ public class KBestPathsUnitTest extends BaseTest { graph.addEdge(v2Alt, v3, new BaseEdge(false, 5)); // Construct the test path - Path path = new Path(v, graph); - path = new Path(path, graph.getEdge(v, v2Alt)); - path = new Path(path, graph.getEdge(v2Alt, v3)); + Path path = new Path(v, graph); + path = new Path(path, graph.getEdge(v, v2Alt)); + path = new Path(path, graph.getEdge(v2Alt, v3)); // Construct the actual cigar string implied by the test path Cigar expectedCigar = new Cigar(); @@ -219,7 +219,8 @@ public class KBestPathsUnitTest extends BaseTest { } expectedCigar.add(new CigarElement(postRef.length(), CigarOperator.M)); - Assert.assertEquals(path.calculateCigar().toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar string mismatch"); + final String ref = preRef + v2Ref.getSequenceString() + postRef; + Assert.assertEquals(path.calculateCigar(ref.getBytes()).toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar string mismatch"); } @DataProvider(name = "GetBasesData") @@ -251,9 +252,9 @@ public class KBestPathsUnitTest extends BaseTest { } // enumerate all possible paths - final List> paths = new KBestPaths().getKBestPaths(graph); + final List> paths = new KBestPaths().getKBestPaths(graph); Assert.assertEquals(paths.size(), 1); - final Path path = paths.get(0); + final Path path = paths.get(0); Assert.assertEquals(new String(path.getBases()), Utils.join("", frags), "Path doesn't have the expected sequence"); } @@ -296,6 +297,8 @@ public class KBestPathsUnitTest extends BaseTest { SeqVertex v7 = new SeqVertex(postRef); SeqVertex postV = new SeqVertex(postAltOption); + final String ref = preRef + v2Ref.getSequenceString() + midRef1 + v4Ref.getSequenceString() + midRef2 + v6Ref.getSequenceString() + postRef; + graph.addVertex(preV); graph.addVertex(v); graph.addVertex(v2Ref); @@ -324,18 +327,18 @@ public class KBestPathsUnitTest extends BaseTest { graph.addEdge(v7, postV, new BaseEdge(false, 1)); // Construct the test path - Path path = new Path( (offRefBeginning ? preV : v), graph); + Path path = new Path( (offRefBeginning ? preV : v), graph); if( offRefBeginning ) { - path = new Path(path, graph.getEdge(preV, v)); + path = new Path(path, graph.getEdge(preV, v)); } - path = new Path(path, graph.getEdge(v, v2Alt)); - path = new Path(path, graph.getEdge(v2Alt, v3)); - path = new Path(path, graph.getEdge(v3, v4Ref)); - path = new Path(path, graph.getEdge(v4Ref, v5)); - path = new Path(path, graph.getEdge(v5, v6Alt)); - path = new Path(path, graph.getEdge(v6Alt, v7)); + path = new Path(path, graph.getEdge(v, v2Alt)); + path = new Path(path, graph.getEdge(v2Alt, v3)); + path = new Path(path, graph.getEdge(v3, v4Ref)); + path = new Path(path, graph.getEdge(v4Ref, v5)); + path = new Path(path, graph.getEdge(v5, v6Alt)); + path = new Path(path, graph.getEdge(v6Alt, v7)); if( offRefEnding ) { - path = new Path(path, graph.getEdge(v7,postV)); + path = new Path(path, graph.getEdge(v7,postV)); } // Construct the actual cigar string implied by the test path @@ -373,7 +376,9 @@ public class KBestPathsUnitTest extends BaseTest { expectedCigar.add(new CigarElement(postAltOption.length(), CigarOperator.I)); } - Assert.assertEquals(path.calculateCigar().toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar string mismatch"); + Assert.assertEquals(path.calculateCigar(ref.getBytes()).toString(), + AlignmentUtils.consolidateCigar(expectedCigar).toString(), + "Cigar string mismatch: ref = " + ref + " alt " + new String(path.getBases())); } @Test(enabled = !DEBUG) @@ -389,43 +394,46 @@ public class KBestPathsUnitTest extends BaseTest { graph.addEdges(new BaseEdge(true, 1), top, ref, bot); graph.addEdges(new BaseEdge(false, 1), top, alt, bot); - final KBestPaths pathFinder = new KBestPaths(); - final List> paths = pathFinder.getKBestPaths(graph, top, bot); + final KBestPaths pathFinder = new KBestPaths(); + final List> paths = pathFinder.getKBestPaths(graph, top, bot); Assert.assertEquals(paths.size(), 2); - final Path refPath = paths.get(0); - final Path altPath = paths.get(1); + final Path refPath = paths.get(0); + final Path altPath = paths.get(1); - Assert.assertEquals(refPath.calculateCigar().toString(), "10M"); - Assert.assertEquals(altPath.calculateCigar().toString(), "1M3I5M3D1M"); + final String refString = top.getSequenceString() + ref.getSequenceString() + bot.getSequenceString(); + Assert.assertEquals(refPath.calculateCigar(refString.getBytes()).toString(), "10M"); + Assert.assertEquals(altPath.calculateCigar(refString.getBytes()).toString(), "1M3I5M3D1M"); } @Test(enabled = !DEBUG) public void testHardSWPath() { // Construct the assembly graph SeqGraph graph = new SeqGraph(); - final SeqVertex top = new SeqVertex( "NNN"); - final SeqVertex bot = new SeqVertex( "NNN"); + final SeqVertex top = new SeqVertex( "NNN" ); + final SeqVertex bot = new SeqVertex( "NNN" ); final SeqVertex alt = new SeqVertex( "ACAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGA" ); final SeqVertex ref = new SeqVertex( "TGTGTGTGTGTGTGACAGAGAGAGAGAGAGAGAGAGAGAGAGAGA" ); graph.addVertices(top, bot, alt, ref); graph.addEdges(new BaseEdge(true, 1), top, ref, bot); graph.addEdges(new BaseEdge(false, 1), top, alt, bot); - final KBestPaths pathFinder = new KBestPaths(); - final List> paths = pathFinder.getKBestPaths(graph, top, bot); + final KBestPaths pathFinder = new KBestPaths(); + final List> paths = pathFinder.getKBestPaths(graph, top, bot); Assert.assertEquals(paths.size(), 2); - final Path refPath = paths.get(0); - final Path altPath = paths.get(1); + final Path refPath = paths.get(0); + final Path altPath = paths.get(1); - logger.warn("RefPath : " + refPath + " cigar " + refPath.calculateCigar()); - logger.warn("AltPath : " + altPath + " cigar " + altPath.calculateCigar()); + final String refString = top.getSequenceString() + ref.getSequenceString() + bot.getSequenceString(); - Assert.assertEquals(refPath.calculateCigar().toString(), "51M"); - Assert.assertEquals(altPath.calculateCigar().toString(), "3M6I48M"); + logger.warn("RefPath : " + refPath + " cigar " + refPath.calculateCigar(refString.getBytes())); + logger.warn("AltPath : " + altPath + " cigar " + altPath.calculateCigar(refString.getBytes())); + + Assert.assertEquals(refPath.calculateCigar(refString.getBytes()).toString(), "51M"); + Assert.assertEquals(altPath.calculateCigar(refString.getBytes()).toString(), "3M6I48M"); } // ----------------------------------------------------------------- @@ -466,30 +474,87 @@ public class KBestPathsUnitTest extends BaseTest { // Construct the assembly graph SeqGraph graph = new SeqGraph(); - SeqVertex top = new SeqVertex(""); + final int padSize = 0; + SeqVertex top = new SeqVertex(Utils.dupString("N", padSize)); SeqVertex ref = new SeqVertex(prefix + refMid + end); SeqVertex alt = new SeqVertex(prefix + altMid + end); - SeqVertex bot = new SeqVertex(""); + SeqVertex bot = new SeqVertex(Utils.dupString("N", padSize)); graph.addVertices(top, ref, alt, bot); graph.addEdges(new BaseEdge(true, 1), top, ref, bot); graph.addEdges(new BaseEdge(false, 1), top, alt, bot); // Construct the test path - Path path = Path.makePath(Arrays.asList(top, alt, bot), graph); + Path path = Path.makePath(Arrays.asList(top, alt, bot), graph); Cigar expected = new Cigar(); + expected.add(new CigarElement(padSize, CigarOperator.M)); if ( ! prefix.equals("") ) expected.add(new CigarElement(prefix.length(), CigarOperator.M)); for ( final CigarElement elt : TextCigarCodec.getSingleton().decode(midCigar).getCigarElements() ) expected.add(elt); if ( ! end.equals("") ) expected.add(new CigarElement(end.length(), CigarOperator.M)); + expected.add(new CigarElement(padSize, CigarOperator.M)); expected = AlignmentUtils.consolidateCigar(expected); - final Cigar pathCigar = path.calculateCigar(); + final String refString = top.getSequenceString() + ref.getSequenceString() + bot.getSequenceString(); + final Cigar pathCigar = path.calculateCigar(refString.getBytes()); logger.warn("diffs: " + ref + " vs. " + alt + " cigar " + midCigar); logger.warn("Path " + path + " with cigar " + pathCigar); logger.warn("Expected cigar " + expected); - Assert.assertEquals(pathCigar, expected, "Cigar mismatch"); + Assert.assertEquals(pathCigar, expected, "Cigar mismatch: ref = " + refString + " vs alt = " + new String(path.getBases())); + } + + @Test(enabled = !DEBUG) + public void testLeftAlignCigarSequentially() { + String preRefString = "GATCGATCGATC"; + String postRefString = "TTT"; + String refString = "ATCGAGGAGAGCGCCCCG"; + String indelString1 = "X"; + String indelString2 = "YZ"; + int refIndel1 = 10; + int refIndel2 = 12; + + for ( final int indelSize1 : Arrays.asList(1, 2, 3, 4) ) { + for ( final int indelOp1 : Arrays.asList(1, -1) ) { + for ( final int indelSize2 : Arrays.asList(1, 2, 3, 4) ) { + for ( final int indelOp2 : Arrays.asList(1, -1) ) { + + Cigar expectedCigar = new Cigar(); + expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M)); + expectedCigar.add(new CigarElement(indelSize1, (indelOp1 > 0 ? CigarOperator.I : CigarOperator.D))); + expectedCigar.add(new CigarElement((indelOp1 < 0 ? refIndel1 - indelSize1 : refIndel1), CigarOperator.M)); + expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M)); + expectedCigar.add(new CigarElement(indelSize2 * 2, (indelOp2 > 0 ? CigarOperator.I : CigarOperator.D))); + expectedCigar.add(new CigarElement((indelOp2 < 0 ? (refIndel2 - indelSize2) * 2 : refIndel2 * 2), CigarOperator.M)); + expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M)); + + Cigar givenCigar = new Cigar(); + givenCigar.add(new CigarElement(refString.length() + refIndel1/2, CigarOperator.M)); + givenCigar.add(new CigarElement(indelSize1, (indelOp1 > 0 ? CigarOperator.I : CigarOperator.D))); + givenCigar.add(new CigarElement((indelOp1 < 0 ? (refIndel1/2 - indelSize1) : refIndel1/2) + refString.length() + refIndel2/2 * 2, CigarOperator.M)); + givenCigar.add(new CigarElement(indelSize2 * 2, (indelOp2 > 0 ? CigarOperator.I : CigarOperator.D))); + givenCigar.add(new CigarElement((indelOp2 < 0 ? (refIndel2/2 - indelSize2) * 2 : refIndel2/2 * 2) + refString.length(), CigarOperator.M)); + + String theRef = preRefString + refString + Utils.dupString(indelString1, refIndel1) + refString + Utils.dupString(indelString2, refIndel2) + refString + postRefString; + String theRead = refString + Utils.dupString(indelString1, refIndel1 + indelOp1 * indelSize1) + refString + Utils.dupString(indelString2, refIndel2 + indelOp2 * indelSize2) + refString; + + Cigar calculatedCigar = Path.leftAlignCigarSequentially(AlignmentUtils.consolidateCigar(givenCigar), theRef.getBytes(), theRead.getBytes(), preRefString.length(), 0); + Assert.assertEquals(AlignmentUtils.consolidateCigar(calculatedCigar).toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar strings do not match!"); + } + } + } + } + } + + @Test(enabled = true) + public void testLeftAlignCigarSequentiallyAdjacentID() { + final String ref = "GTCTCTCTCTCTCTCTCTATATATATATATATATTT"; + final String hap = "GTCTCTCTCTCTCTCTCTCTCTATATATATATATTT"; + final Cigar originalCigar = TextCigarCodec.getSingleton().decode("18M4I12M4D2M"); + + final Cigar result = Path.leftAlignCigarSequentially(originalCigar, ref.getBytes(), hap.getBytes(), 0, 0); + logger.warn("Result is " + result); + Assert.assertEquals(originalCigar.getReferenceLength(), result.getReferenceLength(), "Reference lengths are different"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/LowWeightChainPrunerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/LowWeightChainPrunerUnitTest.java new file mode 100644 index 000000000..06d81499c --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/LowWeightChainPrunerUnitTest.java @@ -0,0 +1,163 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.*; + +public class LowWeightChainPrunerUnitTest extends BaseTest { + @DataProvider(name = "pruneChainsData") + public Object[][] makePruneChainsData() { + List tests = new ArrayList<>(); + + final SeqVertex v1 = new SeqVertex("A"); + final SeqVertex v2 = new SeqVertex("C"); + final SeqVertex v3 = new SeqVertex("G"); + final SeqVertex v4 = new SeqVertex("T"); + final SeqVertex v5 = new SeqVertex("AA"); + final SeqVertex v6 = new SeqVertex("CC"); + + for ( final int edgeWeight : Arrays.asList(1, 2, 3) ) { + for ( final int pruneFactor : Arrays.asList(1, 2, 3, 4) ) { + for ( final boolean isRef : Arrays.asList(true, false)) { + { // just an isolated chain + final int nExpected = edgeWeight < pruneFactor && ! isRef ? 3 : 0; + SeqGraph graph = new SeqGraph(); + graph.addVertices(v1, v2, v3); + graph.addEdges(new BaseEdge(isRef, edgeWeight), v1, v2, v3); + tests.add(new Object[]{"combinatorial", graph, pruneFactor, nExpected > 0 ? Collections.emptySet() : graph.vertexSet()}); + } + } + } + } + + { // connects to ref chain + SeqGraph graph = new SeqGraph(); + graph.addVertices(v1, v2, v3); + graph.addVertices(v4, v5); + graph.addEdges(new BaseEdge(true, 1), v4, v5); + graph.addEdges(new BaseEdge(false, 1), v4, v1, v2, v3, v5); + tests.add(new Object[]{"bad internal branch", graph, 2, new HashSet<>(Arrays.asList(v4, v5))}); + } + + { // has bad cycle + SeqGraph graph = new SeqGraph(); + graph.addVertices(v1, v2, v3, v4); + graph.addEdges(new BaseEdge(false, 1), v4, v1, v2, v3, v1); + // note that we'll remove v4 because it's low weight + tests.add(new Object[]{"has bad cycle", graph, 2, Collections.emptySet()}); + } + + { // has good cycle + SeqGraph graph = new SeqGraph(); + graph.addVertices(v1, v2, v3, v4); + graph.addEdges(new BaseEdge(false, 3), v4, v1, v2, v3, v1); + // note that we'll remove v4 because it's low weight + tests.add(new Object[]{"has good cycle", graph, 2, graph.vertexSet()}); + } + + { // has branch + SeqGraph graph = new SeqGraph(); + graph.addVertices(v1, v2, v3, v4, v5, v6); + graph.addEdges(new BaseEdge(false, 1), v1, v2, v3, v4, v6); + graph.addEdges(new BaseEdge(false, 1), v1, v2, v3, v5, v6); + tests.add(new Object[]{"has two bad branches", graph, 2, Collections.emptySet()}); + } + + { // middle vertex above threshold => no one can be removed + SeqGraph graph = new SeqGraph(); + graph.addVertices(v1, v2, v3, v4, v5); + graph.addEdges(new BaseEdge(false, 1), v1, v2); + graph.addEdges(new BaseEdge(false, 3), v2, v3); + graph.addEdges(new BaseEdge(false, 1), v3, v4, v5); + tests.add(new Object[]{"middle vertex above factor", graph, 2, graph.vertexSet()}); + } + + { // the branching node has value > pruneFactor + SeqGraph graph = new SeqGraph(); + graph.addVertices(v1, v2, v3, v4, v5, v6); + graph.addEdges(new BaseEdge(false, 3), v1, v2); + graph.addEdges(new BaseEdge(false, 3), v2, v3); + graph.addEdges(new BaseEdge(false, 1), v3, v4, v6); + graph.addEdges(new BaseEdge(false, 3), v2, v5, v6); + tests.add(new Object[]{"branch node greater than pruneFactor", graph, 2, graph.vertexSet()}); + } + + { // A single isolated chain with weights all below pruning should be pruned + SeqGraph graph = new SeqGraph(); + graph.addVertices(v1, v2, v3, v4, v5); + graph.addEdges(new BaseEdge(false, 1), v1, v2, v3); + graph.addEdges(new BaseEdge(false, 5), v4, v5); + tests.add(new Object[]{"isolated chain", graph, 2, new LinkedHashSet<>(Arrays.asList(v4, v5))}); + } + + { // A chain with weights all below pruning should be pruned, even if it connects to another good chain + SeqGraph graph = new SeqGraph(); + graph.addVertices(v1, v2, v3, v4, v5, v6); + graph.addEdges(new BaseEdge(false, 1), v1, v2, v3, v5); + graph.addEdges(new BaseEdge(false, 5), v4, v5, v6); + tests.add(new Object[]{"bad chain branching into good one", graph, 2, new HashSet<>(Arrays.asList(v4, v5, v6))}); + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "pruneChainsData", enabled = true) + public void testPruneChains(final String name, final SeqGraph graph, final int pruneFactor, final Set remainingVertices) { + final Set copy = new HashSet<>(remainingVertices); +// graph.printGraph(new File("in.dot"), 0); + final LowWeightChainPruner pruner = new LowWeightChainPruner<>(pruneFactor); + pruner.pruneLowWeightChains(graph); +// graph.printGraph(new File("out.dot"), 0); + Assert.assertEquals(graph.vertexSet(), copy); + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdgeUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdgeUnitTest.java new file mode 100644 index 000000000..f11be6635 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdgeUnitTest.java @@ -0,0 +1,103 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; + +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.Utils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class MultiSampleEdgeUnitTest extends BaseTest { + @DataProvider(name = "MultiplicityData") + public Object[][] makeMultiplicityData() { + List tests = new ArrayList(); + + final List countsPerSample = Arrays.asList(0, 1, 2, 3, 4, 5); + for ( final int nSamples : Arrays.asList(1, 2, 3, 4, 5)) { + for ( final List perm : Utils.makePermutations(countsPerSample, nSamples, false) ) { + tests.add(new Object[]{perm}); + } + } + + return tests.toArray(new Object[][]{}); + } + + /** + * Example testng test using MyDataProvider + */ + @Test(dataProvider = "MultiplicityData") + public void testMultiplicity(final List countsPerSample) { + final MultiSampleEdge edge = new MultiSampleEdge(false, 0); + Assert.assertEquals(edge.getMultiplicity(), 0); + Assert.assertEquals(edge.getPruningMultiplicity(), 0); + + int total = 0; + for ( int i = 0; i < countsPerSample.size(); i++ ) { + int countForSample = 0; + for ( int count = 0; count < countsPerSample.get(i); count++ ) { + edge.incMultiplicity(1); + total++; + countForSample++; + Assert.assertEquals(edge.getMultiplicity(), total); + Assert.assertEquals(edge.getCurrentSingleSampleMultiplicity(), countForSample); + } + edge.flushSingleSampleMultiplicity(); + } + + final int max = MathUtils.arrayMax(ArrayUtils.toPrimitive(countsPerSample.toArray(new Integer[countsPerSample.size()]))); + Assert.assertEquals(edge.getMultiplicity(), total); + Assert.assertEquals(edge.getPruningMultiplicity(), max); + Assert.assertEquals(edge.getMaxSingleSampleMultiplicity(), max); + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/PathUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/PathUnitTest.java new file mode 100644 index 000000000..ee07bea33 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/PathUnitTest.java @@ -0,0 +1,80 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; + +import net.sf.samtools.Cigar; +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class PathUnitTest extends BaseTest { + @Test(enabled = true) + public void testAlignReallyLongDeletion() { + final String ref = "ATGGTGGCTCATACCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGAACATCACCTGAGGCCAGGAGTTCAAAACCAGCCTGGCTAACATAGCAAAACCCCATCTCTAATGAAAATACAAAAATTAGCTGGGTGTGGTGGTGTCCGCCTGTAGTCCCAGCTACTCAGGAGACTAAGGCATGAGAATCACTTGAACCCAGGATGCAGAGGCTGTAGTGAGCCGAGATTGCACCACGGCTGCACTCCAGCCTGGGCAACAGAGCGAGACTCTGTCTCAAATAAAATAGCGTAACGTAACATAACATAACATAACATAACATAACATAACATAACATAACATAACATAACATAACACAACAACAAAATAAAATAACATAAATCATGTTGTTAGGAAAAAAATCAGTTATGCAGCTACATGCTATTTACAAGAGATATACCTTAAAATATAAGACACAGAGGCCGGGCGCGGTAGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGCAAGCGGATCATGAGGTCAGGAGATCGAGACCATCC"; + final String hap = "ATGGTGGCTCATACCTGTAATCCCAGCACTTTGGGAGGCTGAGGCAAGCGGATCATGAGGTCAGGAGATCGAGACCATCCT"; + + final SeqGraph graph = new SeqGraph(); + final SeqVertex v = new SeqVertex(hap); + graph.addVertex(v); + final Path path = new Path(v, graph); + final Cigar cigar = path.calculateCigar(ref.getBytes()); + Assert.assertNull(cigar, "Should have failed gracefully"); + } + + @Test(enabled = true) + public void testAlignReallyLongDeletion2() { + final String ref = "CGGCTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGACCTCAGGTGATCCACTCGCCTCGGTCTCCCAAAGTGTTGGGATTACAGGCATGAACCACTGCACCTGGCCTAGTGTTTGGGAAAACTATACTAGGAAAAGAATAGTTGCTTTAAGTCATTCTTTGATTATTCTGAGAATTGGCATATAGCTGCCATTATAACCTACTTTTGCTAAATATAATAATAATAATCATTATTTTTATTTTTTGAGACAGGGTCTTGTTTTGTCACCCCGGCTGGAGTGAAGTGGCGCAATCTCGGCTCACTGCAACCTCCACCTCCGGGTGCAAGCAATTCTCCTGCCTCAGCCTCTTGAGTAGCTAGGATTACAGGCACAAGCCATCATGCCCAGCTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCATGTTGGTCAGGCTGGTCTTGAACTCCTGACCTCAGGT"; + final String hap = "CGGCTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCATGTTGGTCAGGCTGGTCTTGAACTCCTGACCTCAGGT"; + + final SeqGraph graph = new SeqGraph(); + final SeqVertex v = new SeqVertex(hap); + graph.addVertex(v); + final Path path = new Path(v, graph); + final Cigar cigar = path.calculateCigar(ref.getBytes()); + Assert.assertEquals(cigar.toString(), "48M419D30M"); + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraphUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraphUnitTest.java index bd2e3cc2c..c72f426be 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraphUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraphUnitTest.java @@ -280,16 +280,15 @@ public class SeqGraphUnitTest extends BaseTest { all.addEdges(pre2, top, middle2, bottom, tail2); final SeqGraph expected = new SeqGraph(); + SeqVertex newPre1 = new SeqVertex(Utils.dupString("A", SeqGraph.MIN_COMMON_SEQUENCE_TO_MERGE_SOURCE_SINK_VERTICES) + "C"); + SeqVertex newPre2 = new SeqVertex(Utils.dupString("A", SeqGraph.MIN_COMMON_SEQUENCE_TO_MERGE_SOURCE_SINK_VERTICES) + "G"); + final SeqVertex newTop = new SeqVertex("TA"); final SeqVertex newMiddle1 = new SeqVertex("G"); final SeqVertex newMiddle2 = new SeqVertex("T"); final SeqVertex newBottom = new SeqVertex("C" + bottom.getSequenceString()); - final SeqVertex newTop = new SeqVertex(Utils.dupString("A", SeqGraph.MIN_COMMON_SEQUENCE_TO_MERGE_SOURCE_SINK_VERTICES)); - final SeqVertex newTopDown1 = new SeqVertex("G"); - final SeqVertex newTopDown2 = new SeqVertex("C"); - final SeqVertex newTopBottomMerged = new SeqVertex("TA"); - expected.addVertices(newTop, newTopDown1, newTopDown2, newTopBottomMerged, newMiddle1, newMiddle2, newBottom, tail1, tail2); - expected.addEdges(newTop, newTopDown1, newTopBottomMerged, newMiddle1, newBottom, tail1); - expected.addEdges(newTop, newTopDown2, newTopBottomMerged, newMiddle2, newBottom, tail2); + expected.addVertices(newPre1, newPre2, newTop, newMiddle1, newMiddle2, newBottom, tail1, tail2); + expected.addEdges(newPre1, newTop, newMiddle1, newBottom, tail1); + expected.addEdges(newPre2, newTop, newMiddle2, newBottom, tail2); tests.add(new Object[]{all.clone(), expected.clone()}); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitterUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitterUnitTest.java index 2df783b19..5bc13f884 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitterUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitterUnitTest.java @@ -227,8 +227,8 @@ public class SharedVertexSequenceSplitterUnitTest extends BaseTest { } final Set haplotypes = new HashSet(); - final List> originalPaths = new KBestPaths().getKBestPaths((SeqGraph)graph.clone()); - for ( final Path path : originalPaths ) + final List> originalPaths = new KBestPaths().getKBestPaths((SeqGraph)graph.clone()); + for ( final Path path : originalPaths ) haplotypes.add(new String(path.getBases())); final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(graph, v); @@ -238,8 +238,8 @@ public class SharedVertexSequenceSplitterUnitTest extends BaseTest { splitter.updateGraph(top, bot); if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + ".updated.dot"), 0); - final List> splitPaths = new KBestPaths().getKBestPaths(graph); - for ( final Path path : splitPaths ) { + final List> splitPaths = new KBestPaths().getKBestPaths(graph); + for ( final Path path : splitPaths ) { final String h = new String(path.getBases()); Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java new file mode 100644 index 000000000..8efb3d486 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java @@ -0,0 +1,213 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.haplotype.Haplotype; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.*; + +public class ReadThreadingAssemblerUnitTest extends BaseTest { + private final static boolean DEBUG = false; + + private static class TestAssembler { + final ReadThreadingAssembler assembler; + + Haplotype refHaplotype; + final List reads = new LinkedList(); + + private TestAssembler(final int kmerSize) { + this.assembler = new ReadThreadingAssembler(100000, Arrays.asList(kmerSize)); + assembler.setJustReturnRawGraph(true); + assembler.setPruneFactor(0); + } + + public void addSequence(final byte[] bases, final boolean isRef) { + if ( isRef ) { + refHaplotype = new Haplotype(bases, true); + } else { + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, Utils.dupBytes((byte)30,bases.length), bases.length + "M"); + reads.add(read); + } + } + + public SeqGraph assemble() { + assembler.removePathsNotConnectedToRef = false; // need to pass some of the tests + assembler.setDebugGraphTransformations(true); + final SeqGraph graph = assembler.assemble(reads, refHaplotype).get(0); + if ( DEBUG ) graph.printGraph(new File("test.dot"), 0); + return graph; + } + } + + private void assertLinearGraph(final TestAssembler assembler, final String seq) { + final SeqGraph graph = assembler.assemble(); + graph.simplifyGraph(); + Assert.assertEquals(graph.vertexSet().size(), 1); + Assert.assertEquals(graph.vertexSet().iterator().next().getSequenceString(), seq); + } + + private void assertSingleBubble(final TestAssembler assembler, final String one, final String two) { + final SeqGraph graph = assembler.assemble(); + graph.simplifyGraph(); + List> paths = new KBestPaths().getKBestPaths(graph); + Assert.assertEquals(paths.size(), 2); + final Set expected = new HashSet(Arrays.asList(one, two)); + for ( final Path path : paths ) { + final String seq = new String(path.getBases()); + Assert.assertTrue(expected.contains(seq)); + expected.remove(seq); + } + } + + @Test(enabled = ! DEBUG) + public void testRefCreation() { + final String ref = "ACGTAACCGGTT"; + final TestAssembler assembler = new TestAssembler(3); + assembler.addSequence(ref.getBytes(), true); + assertLinearGraph(assembler, ref); + } + + @Test(enabled = ! DEBUG) + public void testRefNonUniqueCreation() { + final String ref = "GAAAAT"; + final TestAssembler assembler = new TestAssembler(3); + assembler.addSequence(ref.getBytes(), true); + assertLinearGraph(assembler, ref); + } + + @Test(enabled = ! DEBUG) + public void testRefAltCreation() { + final TestAssembler assembler = new TestAssembler(3); + final String ref = "ACAACTGA"; + final String alt = "ACAGCTGA"; + assembler.addSequence(ref.getBytes(), true); + assembler.addSequence(alt.getBytes(), false); + assertSingleBubble(assembler, ref, alt); + } + + @Test(enabled = ! DEBUG) + public void testPartialReadsCreation() { + final TestAssembler assembler = new TestAssembler(3); + final String ref = "ACAACTGA"; + final String alt1 = "ACAGCT"; + final String alt2 = "GCTGA"; + assembler.addSequence(ref.getBytes(), true); + assembler.addSequence(alt1.getBytes(), false); + assembler.addSequence(alt2.getBytes(), false); + assertSingleBubble(assembler, ref, "ACAGCTGA"); + } + + @Test(enabled = ! DEBUG) + public void testStartInMiddle() { + final TestAssembler assembler = new TestAssembler(3); + final String ref = "CAAAATG"; + final String read = "AAATG"; + assembler.addSequence(ref.getBytes(), true); + assembler.addSequence(read.getBytes(), false); + assertLinearGraph(assembler, ref); + } + + @Test(enabled = ! DEBUG) + public void testStartInMiddleWithBubble() { + final TestAssembler assembler = new TestAssembler(3); + final String ref = "CAAAATGGGG"; + final String read = "AAATCGGG"; + assembler.addSequence(ref.getBytes(), true); + assembler.addSequence(read.getBytes(), false); + assertSingleBubble(assembler, ref, "CAAAATCGGG"); + } + + @Test(enabled = ! DEBUG) + public void testNoGoodStarts() { + final TestAssembler assembler = new TestAssembler(3); + final String ref = "CAAAATGGGG"; + final String read = "AAATCGGG"; + assembler.addSequence(ref.getBytes(), true); + assembler.addSequence(read.getBytes(), false); + assertSingleBubble(assembler, ref, "CAAAATCGGG"); + } + + + @Test(enabled = !DEBUG) + public void testCreateWithBasesBeforeRefSource() { + final TestAssembler assembler = new TestAssembler(3); + final String ref = "ACTG"; + final String read = "CTGGGACT"; + assembler.addSequence(ReadThreadingGraphUnitTest.getBytes(ref), true); + assembler.addSequence(ReadThreadingGraphUnitTest.getBytes(read), false); + assertLinearGraph(assembler, "ACTGGGACT"); + } + + @Test(enabled = !DEBUG) + public void testSingleIndelAsDoubleIndel3Reads() { + final TestAssembler assembler = new TestAssembler(25); + // The single indel spans two repetitive structures + final String ref = "GTTTTTCCTAGGCAAATGGTTTCTATAAAATTATGTGTGTGTGTCTCTCTCTGTGTGTGTGTGTGTGTGTGTGTGTATACCTAATCTCACACTCTTTTTTCTGG"; + final String read1 = "GTTTTTCCTAGGCAAATGGTTTCTATAAAATTATGTGTGTGTGTCTCT----------GTGTGTGTGTGTGTGTGTATACCTAATCTCACACTCTTTTTTCTGG"; + final String read2 = "GTTTTTCCTAGGCAAATGGTTTCTATAAAATTATGTGTGTGTGTCTCT----------GTGTGTGTGTGTGTGTGTATACCTAATCTCACACTCTTTTTTCTGG"; + assembler.addSequence(ReadThreadingGraphUnitTest.getBytes(ref), true); + assembler.addSequence(ReadThreadingGraphUnitTest.getBytes(read1), false); + assembler.addSequence(ReadThreadingGraphUnitTest.getBytes(read2), false); + + final SeqGraph graph = assembler.assemble(); + final KBestPaths pathFinder = new KBestPaths(); + final List> paths = pathFinder.getKBestPaths(graph); + Assert.assertEquals(paths.size(), 2); + final byte[] refPath = paths.get(0).getBases().length == ref.length() ? paths.get(0).getBases() : paths.get(1).getBases(); + final byte[] altPath = paths.get(0).getBases().length == ref.length() ? paths.get(1).getBases() : paths.get(0).getBases(); + Assert.assertEquals(refPath, ReadThreadingGraphUnitTest.getBytes(ref)); + Assert.assertEquals(altPath, ReadThreadingGraphUnitTest.getBytes(read1)); + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java new file mode 100644 index 000000000..10c1cc00d --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java @@ -0,0 +1,191 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.walkers.haplotypecaller.Kmer; +import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.MultiSampleEdge; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.*; + +public class ReadThreadingGraphUnitTest extends BaseTest { + private final static boolean DEBUG = false; + + public static byte[] getBytes(final String alignment) { + return alignment.replace("-","").getBytes(); + } + + private void assertNonUniques(final ReadThreadingGraph assembler, String ... nonUniques) { + final Set actual = new HashSet<>(); + assembler.buildGraphIfNecessary(); + for ( final Kmer kmer : assembler.getNonUniqueKmers() ) actual.add(kmer.baseString()); + final Set expected = new HashSet<>(Arrays.asList(nonUniques)); + Assert.assertEquals(actual, expected); + } + + @Test(enabled = ! DEBUG) + public void testNonUniqueMiddle() { + final ReadThreadingGraph assembler = new ReadThreadingGraph(3); + final String ref = "GACACACAGTCA"; + final String read1 = "GACAC---GTCA"; + final String read2 = "CAC---GTCA"; + assembler.addSequence(getBytes(ref), true); + assembler.addSequence(getBytes(read1), false); + assembler.addSequence(getBytes(read2), false); + assertNonUniques(assembler, "ACA", "CAC"); + } + + @Test(enabled = ! DEBUG) + public void testReadsCreateNonUnique() { + final ReadThreadingGraph assembler = new ReadThreadingGraph(3); + final String ref = "GCAC--GTCA"; // CAC is unique + final String read1 = "GCACACGTCA"; // makes CAC non unique because it has a duplication + final String read2 = "CACGTCA"; // shouldn't be allowed to match CAC as start + assembler.addSequence(getBytes(ref), true); + assembler.addSequence(getBytes(read1), false); + assembler.addSequence(getBytes(read2), false); +// assembler.convertToSequenceGraph().printGraph(new File("test.dot"), 0); + + assertNonUniques(assembler, "CAC"); + //assertSingleBubble(assembler, ref, "CAAAATCGGG"); + } + + @Test(enabled = ! DEBUG) + public void testCountingOfStartEdges() { + final ReadThreadingGraph assembler = new ReadThreadingGraph(3); + final String ref = "NNNGTCAAA"; // ref has some bases before start + final String read1 = "GTCAAA"; // starts at first non N base + + assembler.addSequence(getBytes(ref), true); + assembler.addSequence(getBytes(read1), false); + assembler.buildGraphIfNecessary(); +// assembler.printGraph(new File("test.dot"), 0); + + for ( final MultiSampleEdge edge : assembler.edgeSet() ) { + final MultiDeBruijnVertex source = assembler.getEdgeSource(edge); + final MultiDeBruijnVertex target = assembler.getEdgeTarget(edge); + final boolean headerVertex = source.getSuffix() == 'N' || target.getSuffix() == 'N'; + if ( headerVertex ) { + Assert.assertEquals(edge.getMultiplicity(), 1, "Bases in the unique reference header should have multiplicity of 1"); + } else { + Assert.assertEquals(edge.getMultiplicity(), 2, "Should have multiplicity of 2 for any edge outside the ref header but got " + edge + " " + source + " -> " + target); + } + } + } + + @Test(enabled = !DEBUG) + public void testCountingOfStartEdgesWithMultiplePrefixes() { + final ReadThreadingGraph assembler = new ReadThreadingGraph(3); + assembler.increaseCountsThroughBranches = true; + final String ref = "NNNGTCAXX"; // ref has some bases before start + final String alt1 = "NNNCTCAXX"; // alt1 has SNP right after N + final String read = "TCAXX"; // starts right after SNP, but merges right before branch + + assembler.addSequence(getBytes(ref), true); + assembler.addSequence(getBytes(alt1), false); + assembler.addSequence(getBytes(read), false); + assembler.buildGraphIfNecessary(); + assembler.printGraph(new File("test.dot"), 0); + + final List oneCountVertices = Arrays.asList("NNN", "NNG", "NNC", "NGT", "NCT"); + final List threeCountVertices = Arrays.asList("CAX", "AXX"); + + for ( final MultiSampleEdge edge : assembler.edgeSet() ) { + final MultiDeBruijnVertex source = assembler.getEdgeSource(edge); + final MultiDeBruijnVertex target = assembler.getEdgeTarget(edge); + final int expected = oneCountVertices.contains(target.getSequenceString()) ? 1 : (threeCountVertices.contains(target.getSequenceString()) ? 3 : 2); + Assert.assertEquals(edge.getMultiplicity(), expected, "Bases at edge " + edge + " from " + source + " to " + target + " has bad multiplicity"); + } + } + + // TODO -- update to use determineKmerSizeAndNonUniques directly +// @DataProvider(name = "KmerSizeData") +// public Object[][] makeKmerSizeDataProvider() { +// List tests = new ArrayList(); +// +// // this functionality can be adapted to provide input data for whatever you might want in your data +// tests.add(new Object[]{3, 3, 3, Arrays.asList("ACG"), Arrays.asList()}); +// tests.add(new Object[]{3, 4, 3, Arrays.asList("CAGACG"), Arrays.asList()}); +// +// tests.add(new Object[]{3, 3, 3, Arrays.asList("AAAAC"), Arrays.asList("AAA")}); +// tests.add(new Object[]{3, 4, 4, Arrays.asList("AAAAC"), Arrays.asList()}); +// tests.add(new Object[]{3, 5, 4, Arrays.asList("AAAAC"), Arrays.asList()}); +// tests.add(new Object[]{3, 4, 3, Arrays.asList("CAAA"), Arrays.asList()}); +// tests.add(new Object[]{3, 4, 4, Arrays.asList("CAAAA"), Arrays.asList()}); +// tests.add(new Object[]{3, 5, 4, Arrays.asList("CAAAA"), Arrays.asList()}); +// tests.add(new Object[]{3, 5, 5, Arrays.asList("ACGAAAAACG"), Arrays.asList()}); +// +// for ( int maxSize = 3; maxSize < 20; maxSize++ ) { +// for ( int dupSize = 3; dupSize < 20; dupSize++ ) { +// final int expectedSize = Math.min(maxSize, dupSize); +// final String dup = Utils.dupString("C", dupSize); +// final List nonUnique = dupSize > maxSize ? Arrays.asList(Utils.dupString("C", maxSize)) : Collections.emptyList(); +// tests.add(new Object[]{3, maxSize, expectedSize, Arrays.asList("ACGT", "A" + dup + "GT"), nonUnique}); +// tests.add(new Object[]{3, maxSize, expectedSize, Arrays.asList("A" + dup + "GT", "ACGT"), nonUnique}); +// } +// } +// +// return tests.toArray(new Object[][]{}); +// } +// +// /** +// * Example testng test using MyDataProvider +// */ +// @Test(dataProvider = "KmerSizeData") +// public void testDynamicKmerSizing(final int min, final int max, final int expectKmer, final List seqs, final List expectedNonUniques) { +// final ReadThreadingGraph assembler = new ReadThreadingGraph(min, max); +// for ( String seq : seqs ) assembler.addSequence(seq.getBytes(), false); +// assembler.buildGraphIfNecessary(); +// Assert.assertEquals(assembler.getKmerSize(), expectKmer); +// assertNonUniques(assembler, expectedNonUniques.toArray(new String[]{})); +// } + + +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/SequenceForKmersUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/SequenceForKmersUnitTest.java new file mode 100644 index 000000000..7c3160c30 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/SequenceForKmersUnitTest.java @@ -0,0 +1,80 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.Utils; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class SequenceForKmersUnitTest extends BaseTest { + @Test + public void testNoCount() { + final byte[] seq = "ACGT".getBytes(); + final SequenceForKmers sk = new SequenceForKmers("foo", seq, 0, seq.length, null, true); + Assert.assertEquals(sk.name, "foo"); + Assert.assertEquals(sk.sequence, seq); + Assert.assertEquals(sk.start, 0); + Assert.assertEquals(sk.stop, seq.length); + Assert.assertEquals(sk.isRef, true); + for ( int i = 0; i < seq.length; i++ ) + Assert.assertEquals(sk.getCount(i), 1); + } + + @Test + public void testWithCounts() { + final int len = 256; + final int[] counts = new int[len]; + for ( int i = 0; i < len; i++ ) counts[i] = i; + final byte[] seq = Utils.dupBytes((byte)'A', len); + + final SequenceForKmers sk = new SequenceForKmers("foo", seq, 0, seq.length, counts, true); + + for ( int i = 0; i < seq.length; i++ ) + Assert.assertEquals(sk.getCount(i), i); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 1daaaf1da..f9a4fcdbb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -438,7 +438,7 @@ public class TraverseActiveRegions extends TraversalEngine= 0 + */ + public static int longestCommonPrefix(final byte[] seq1, final byte[] seq2, final int maxLength) { + if ( seq1 == null ) throw new IllegalArgumentException("seq1 is null"); + if ( seq2 == null ) throw new IllegalArgumentException("seq2 is null"); + if ( maxLength < 0 ) throw new IllegalArgumentException("maxLength < 0 " + maxLength); + + final int end = Math.min(seq1.length, Math.min(seq2.length, maxLength)); + for ( int i = 0; i < end; i++ ) { + if ( seq1[i] != seq2[i] ) + return i; + } + return end; + } + + /** + * Get the length of the longest common suffix of seq1 and seq2 + * @param seq1 non-null byte array + * @param seq2 non-null byte array + * @param maxLength the maximum allowed length to return + * @return the length of the longest common suffix of seq1 and seq2, >= 0 + */ + public static int longestCommonSuffix(final byte[] seq1, final byte[] seq2, final int maxLength) { + if ( seq1 == null ) throw new IllegalArgumentException("seq1 is null"); + if ( seq2 == null ) throw new IllegalArgumentException("seq2 is null"); + if ( maxLength < 0 ) throw new IllegalArgumentException("maxLength < 0 " + maxLength); + + final int end = Math.min(seq1.length, Math.min(seq2.length, maxLength)); + for ( int i = 0; i < end; i++ ) { + if ( seq1[seq1.length - i - 1] != seq2[seq2.length - i - 1] ) + return i; + } + return end; + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SWPairwiseAlignment.java b/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SWPairwiseAlignment.java index 890faa82a..78f81ec5e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SWPairwiseAlignment.java +++ b/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SWPairwiseAlignment.java @@ -45,7 +45,7 @@ import java.util.*; * Date: Mar 23, 2009 * Time: 1:54:54 PM */ -public final class SWPairwiseAlignment { +public final class SWPairwiseAlignment implements SmithWaterman { private int alignment_offset; // offset of s2 w/respect to s1 private Cigar alignmentCigar; @@ -57,7 +57,7 @@ public final class SWPairwiseAlignment { private static final int CLIP = 3; protected static boolean cutoff = false; - private static boolean DO_SOFTCLIP = true; + private boolean doSoftClipping = true; /** * The SW scoring matrix, stored for debugging purposes if keepScoringMatrix is true @@ -90,10 +90,23 @@ public final class SWPairwiseAlignment { * @param parameters the SW parameters to use */ public SWPairwiseAlignment(byte[] seq1, byte[] seq2, Parameters parameters) { - this.parameters = parameters; + this(parameters); align(seq1,seq2); } + /** + * Create a new SW pairwise aligner, without actually doing any alignment yet + * + * @param parameters the SW parameters to use + */ + protected SWPairwiseAlignment(Parameters parameters) { + this.parameters = parameters; + } + + protected void setDoSoftClipping(final boolean doSoftClipping) { + this.doSoftClipping = doSoftClipping; + } + /** * Create a new SW pairwise aligner * @@ -111,8 +124,10 @@ public final class SWPairwiseAlignment { this(seq1,seq2,SWParameterSet.ORIGINAL_DEFAULT); } + @Override public Cigar getCigar() { return alignmentCigar ; } + @Override public int getAlignmentStart2wrt1() { return alignment_offset; } public void align(final byte[] a, final byte[] b) { @@ -265,7 +280,7 @@ public final class SWPairwiseAlignment { List lce = new ArrayList(5); - if ( segment_length > 0 && DO_SOFTCLIP ) { + if ( segment_length > 0 && doSoftClipping ) { lce.add(makeElement(CLIP, segment_length)); segment_length = 0; } @@ -316,7 +331,7 @@ public final class SWPairwiseAlignment { // last 3 bases of the read overlap with/align to the ref), the cigar will be still 5M if // DO_SOFTCLIP is false or 2S3M if DO_SOFTCLIP is true. // The consumers need to check for the alignment offset and deal with it properly. - if (DO_SOFTCLIP ) { + if (doSoftClipping ) { lce.add(makeElement(state, segment_length)); if ( p2> 0 ) lce.add(makeElement(CLIP, p2)); alignment_offset = p1 ; @@ -360,7 +375,7 @@ public final class SWPairwiseAlignment { Cigar cigar = getCigar(); - if ( ! DO_SOFTCLIP ) { + if ( ! doSoftClipping ) { // we need to go through all the hassle below only if we do not do softclipping; // otherwise offset is never negative diff --git a/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SmithWaterman.java b/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SmithWaterman.java new file mode 100644 index 000000000..44fd889c5 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SmithWaterman.java @@ -0,0 +1,56 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.utils.smithwaterman; + +import net.sf.samtools.Cigar; + +/** + * Generic interface for SmithWaterman calculations + * + * This interface allows clients to use a generic SmithWaterman variable, without propogating the specific + * implementation of SmithWaterman throughout their code: + * + * SmithWaterman sw = new SpecificSmithWatermanImplementation(ref, read, params) + * sw.getCigar() + * sw.getAlignmentStart2wrt1() + * + * User: depristo + * Date: 4/26/13 + * Time: 8:24 AM + */ +public interface SmithWaterman { + /** + * Get the cigar string for the alignment of this SmithWaterman class + * @return a non-null cigar + */ + public Cigar getCigar(); + + /** + * Get the starting position of the read sequence in the reference sequence + * @return a positive integer >= 0 + */ + public int getAlignmentStart2wrt1(); +} diff --git a/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java index 154b000ce..3c68b8753 100644 --- a/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java @@ -29,6 +29,7 @@ import org.apache.commons.io.FileUtils; import org.broadinstitute.sting.utils.io.IOUtils; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; @@ -189,4 +190,30 @@ public class UtilsUnitTest extends BaseTest { final String sourceString = FileUtils.readFileToString(source); Assert.assertEquals(Utils.calcMD5(sourceString), sourceMD5); } + + @Test + public void testLongestCommonOps() { + for ( int prefixLen = 0; prefixLen < 20; prefixLen++ ) { + for ( int extraSeq1Len = 0; extraSeq1Len < 10; extraSeq1Len++ ) { + for ( int extraSeq2Len = 0; extraSeq2Len < 10; extraSeq2Len++ ) { + for ( int max = 0; max < 50; max++ ) { + final String prefix = Utils.dupString("A", prefixLen); + final int expected = Math.min(prefixLen, max); + + { + final String seq1 = prefix + Utils.dupString("C", extraSeq1Len); + final String seq2 = prefix + Utils.dupString("G", extraSeq1Len); + Assert.assertEquals(Utils.longestCommonPrefix(seq1.getBytes(), seq2.getBytes(), max), expected, "LongestCommonPrefix failed: seq1 " + seq1 + " seq2 " + seq2 + " max " + max); + } + + { + final String seq1 = Utils.dupString("C", extraSeq1Len) + prefix; + final String seq2 = Utils.dupString("G", extraSeq1Len) + prefix; + Assert.assertEquals(Utils.longestCommonSuffix(seq1.getBytes(), seq2.getBytes(), max), expected, "longestCommonSuffix failed: seq1 " + seq1 + " seq2 " + seq2 + " max " + max); + } + } + } + } + } + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java index ae7c1e01c..6ec4336b0 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java @@ -33,8 +33,10 @@ import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -86,6 +88,30 @@ public class ReadClipperUnitTest extends BaseTest { } } + @DataProvider(name = "ClippedReadLengthData") + public Object[][] makeClippedReadLengthData() { + List tests = new ArrayList(); + + // this functionality can be adapted to provide input data for whatever you might want in your data + final int originalReadLength = 50; + for ( int nToClip = 1; nToClip < originalReadLength - 1; nToClip++ ) { + tests.add(new Object[]{originalReadLength, nToClip}); + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "ClippedReadLengthData", enabled = true) + public void testHardClipReadLengthIsRight(final int originalReadLength, final int nToClip) { + GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(originalReadLength + "M"); + read.getReadLength(); // provoke the caching of the read length + final int expectedReadLength = originalReadLength - nToClip; + GATKSAMRecord clipped = ReadClipper.hardClipByReadCoordinates(read, 0, nToClip - 1); + Assert.assertEquals(clipped.getReadLength(), expectedReadLength, + String.format("Clipped read length %d with cigar %s not equal to the expected read length %d after clipping %d bases from the left from a %d bp read with cigar %s", + clipped.getReadLength(), clipped.getCigar(), expectedReadLength, nToClip, read.getReadLength(), read.getCigar())); + } + @Test(enabled = true) public void testHardClipByReferenceCoordinates() { for (Cigar cigar : cigarList) { From 8b9c6aae3efd4676bd06f90577c6c81cd06bfcfd Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 8 May 2013 23:23:51 -0400 Subject: [PATCH 022/116] Fix bug in Reduce Reads that arises in multi-sample mode. * bitset could legitimately be in an unfinished state but we were trying to access it without finalizing. * added --cancer_mode argument per Mark's suggestion to force the user to explicitly enable multi-sample mode. * tests were easiest to implement as integration tests (this was a really complicated case). --- .../compression/reducereads/ReduceReads.java | 13 ++++++++++ .../reducereads/SlidingWindow.java | 4 ++++ .../ReduceReadsIntegrationTest.java | 24 +++++++++++++++++-- 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java index 71910e566..eb55701ae 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java @@ -64,6 +64,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -236,6 +237,15 @@ public class ReduceReads extends ReadWalker, Redu @Argument(fullName = "downsample_coverage", shortName = "ds", doc = "", required = false) public int downsampleCoverage = 250; + /** + * Generally, this tool is not meant to be run for more than 1 sample at a time. The one valid exception + * brought to our attention by colleagues is the specific case of tumor/normal pairs in cancer analysis. + * To prevent users from unintentionally running the tool in a less than ideal manner, we require them + * to explicitly enable multi-sample analysis with this argument. + */ + @Argument(fullName = "cancer_mode", shortName = "cancer_mode", doc = "enable multi-samples reduction for cancer analysis", required = false) + public boolean ALLOW_MULTIPLE_SAMPLES = false; + @Hidden @Argument(fullName = "nwayout", shortName = "nw", doc = "", required = false) public boolean nwayout = false; @@ -294,6 +304,9 @@ public class ReduceReads extends ReadWalker, Redu if ( minAltProportionToTriggerVariant < 0.0 || minAltProportionToTriggerVariant > 1.0 ) throw new UserException.BadArgumentValue("--minimum_alt_proportion_to_trigger_variant", "must be a value between 0 and 1 (inclusive)"); + if ( SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()).size() > 1 && !ALLOW_MULTIPLE_SAMPLES ) + throw new UserException.BadInput("Reduce Reads is not meant to be run for more than 1 sample at a time except for the specific case of tumor/normal pairs in cancer analysis"); + if ( known.isEmpty() ) knownSnpPositions = null; else diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index d3ca037be..8843d6270 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -877,6 +877,10 @@ public class SlidingWindow { final int start = region.getStart() - windowHeaderStart; int stop = region.getStop() - windowHeaderStart; + // make sure the bitset is complete given the region (it might not be in multi-sample mode) + if ( region.getStop() > markedSites.getStartLocation() + markedSites.getVariantSiteBitSet().length ) + markSites(region.getStop()); + CloseVariantRegionResult closeVariantRegionResult = closeVariantRegion(start, stop, knownSnpPositions); allReads.addAll(closeVariantRegionResult.reads); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index 15b54dbd1..405e616f1 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -53,6 +53,7 @@ import org.testng.annotations.Test; import java.io.File; import java.util.Arrays; +import java.util.Collections; import java.util.List; public class ReduceReadsIntegrationTest extends WalkerTest { @@ -221,13 +222,13 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testCoReduction() { - String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s "; + String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s "; executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5f4d2c1d9c010dfd6865aeba7d0336fe")), COREDUCTION_QUALS_TEST_MD5); } @Test(enabled = true) public void testCoReductionWithKnowns() { - String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s "; + String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s "; executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("ca48dd972bf57595c691972c0f887cb4")), COREDUCTION_QUALS_TEST_MD5); } @@ -281,5 +282,24 @@ public class ReduceReadsIntegrationTest extends WalkerTest { " -o %s --downsample_coverage 250 -dcov 50 "; executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7e7b358443827ca239db3b98f299aec6")), "2af063d1bd3c322b03405dbb3ecf59a9"); } + + /** + * Confirm that this bam does not fail when multi-sample mode is enabled. The provided example is tricky and used to cause + * us to exception out in the code. + */ + @Test(enabled = true) + public void testMultiSampleDoesNotFailWithFlag() { + String cmd = "-T ReduceReads --cancer_mode -npt -R " + b37KGReference + " -I " + privateTestDir + "rr_multisample.bam -o /dev/null"; + executeTestWithoutAdditionalRRTests("testMultiSampleDoesNotFailWithFlag", new WalkerTestSpec(cmd, 0, Collections.emptyList())); + } + + /** + * Confirm that this bam fails when multi-sample mode is not enabled + */ + @Test(enabled = true) + public void testMultiSampleFailsWithoutFlag() { + String cmd = "-T ReduceReads -npt -R " + b37KGReference + " -I " + privateTestDir + "rr_multisample.bam -o /dev/null"; + executeTestWithoutAdditionalRRTests("testMultiSampleDoesNotFailWithFlag", new WalkerTestSpec(cmd, 0, UserException.BadInput.class)); + } } From 639030bd6d6e89ebf796e4e05853f7265cf96789 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Fri, 10 May 2013 17:26:07 -0400 Subject: [PATCH 023/116] Enable convenient display of diff engine output in Bamboo, plus misc. minor test-related improvements -Diff engine output is now included in the actual exception message thrown as a result of an MD5 mismatch, which allows it to be conveniently viewed on the main page of a build in Bamboo. Minor Additional Improvements: -WalkerTestSpec now auto-detects test class name via new JVMUtils.getCallingClass() method, and the test class name is now included as a regular part of integration test output for each test. -Fix race condition in MD5DB.ensureMd5DbDirectory() -integrationtests dir is now cleaned by "ant clean" GSA-915 #resolve --- build.xml | 3 + .../sting/utils/classloader/JVMUtils.java | 58 +++++++ .../test/org/broadinstitute/sting/MD5DB.java | 151 ++++++++++-------- .../org/broadinstitute/sting/MD5Mismatch.java | 19 ++- .../org/broadinstitute/sting/WalkerTest.java | 64 +++++--- .../utils/classloader/JVMUtilsUnitTest.java | 75 +++++++++ .../sting/queue/pipeline/PipelineTest.scala | 2 +- 7 files changed, 270 insertions(+), 102 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/utils/classloader/JVMUtilsUnitTest.java diff --git a/build.xml b/build.xml index 56bf4f0cd..2e9df4d5e 100644 --- a/build.xml +++ b/build.xml @@ -1031,6 +1031,7 @@ + @@ -1043,6 +1044,7 @@ + @@ -1078,6 +1080,7 @@ + diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java b/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java index 2f5115dfa..8f4958f6c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java @@ -248,4 +248,62 @@ public class JVMUtils { interfaces.add(interfaceClass.getSimpleName()); return Utils.join(", ", interfaces); } + + /** + * Returns the Class that invoked the specified "callee" class by examining the runtime stack. + * The calling class is defined as the first class below the callee class on the stack. + * + * For example, given callee == MyClass and the following runtime stack: + * + * JVMUtils.getCallingClass(MyClass) <-- top + * MyClass.foo() + * MyClass.bar() + * OtherClass.foo() + * OtherClass.bar() + * etc. + * + * this method would return OtherClass, since its methods invoked the methods in MyClass. + * + * Considers only the occurrence of the callee class on the stack that is closest to the top + * (even if there are multiple, non-contiguous occurrences). + * + * @param callee Class object for the class whose calling class we want to locate + * @return Class object for the class that invoked the callee class, or null if + * no calling class was found + * @throws IllegalArgumentException if the callee class is not found on the runtime stack + * @throws IllegalStateException if we get an error while trying to load the Class object for the calling + * class reported on the runtime stack + */ + public static Class getCallingClass( final Class callee ) { + final StackTraceElement[] stackTrace = new Throwable().getStackTrace(); + final String calleeClassName = callee.getName(); + + // Start examining the stack at the second-from-the-top position, to remove + // this method call (ie., the call to getCallingClass() itself) from consideration. + int stackTraceIndex = 1; + + // Find the first occurrence of the callee on the runtime stack. Need to use String comparison + // unfortunately, due to limitations of the StackTraceElement class. + while ( stackTraceIndex < stackTrace.length && ! stackTrace[stackTraceIndex].getClassName().equals(calleeClassName) ) { + stackTraceIndex++; + } + + // Make sure we actually found the callee class on the stack + if ( stackTraceIndex == stackTrace.length ) { + throw new IllegalArgumentException(String.format("Specified callee %s is not present on the call stack", callee.getSimpleName())); + } + + // Now find the caller class, which will be the class below the callee on the stack + while ( stackTraceIndex < stackTrace.length && stackTrace[stackTraceIndex].getClassName().equals(calleeClassName) ) { + stackTraceIndex++; + } + + try { + return stackTraceIndex < stackTrace.length ? Class.forName(stackTrace[stackTraceIndex].getClassName()) : null; + } + catch ( ClassNotFoundException e ) { + throw new IllegalStateException(String.format("Could not find caller class %s from the runtime stack in the classpath", + stackTrace[stackTraceIndex].getClassName())); + } + } } diff --git a/public/java/test/org/broadinstitute/sting/MD5DB.java b/public/java/test/org/broadinstitute/sting/MD5DB.java index 2b0d52a11..7bd6f7bc4 100644 --- a/public/java/test/org/broadinstitute/sting/MD5DB.java +++ b/public/java/test/org/broadinstitute/sting/MD5DB.java @@ -97,7 +97,12 @@ public class MD5DB { if ( ! dir.exists() ) { System.out.printf("##### Creating MD5 db %s%n", LOCAL_MD5_DB_DIR); if ( ! dir.mkdir() ) { - throw new ReviewedStingException("Infrastructure failure: failed to create md5 directory " + LOCAL_MD5_DB_DIR); + // Need to check AGAIN whether the dir exists, because we might be doing multi-process parallelism + // within the same working directory, and another GATK instance may have come along and created the + // directory between the calls to exists() and mkdir() above. + if ( ! dir.exists() ) { + throw new ReviewedStingException("Infrastructure failure: failed to create md5 directory " + LOCAL_MD5_DB_DIR); + } } } } @@ -203,98 +208,106 @@ public class MD5DB { } public static class MD5Match { - final String actualMD5, expectedMD5; - final String failMessage; - boolean failed; + public final String actualMD5, expectedMD5; + public final String failMessage; + public final String diffEngineOutput; + public final boolean failed; - public MD5Match(final String actualMD5, final String expectedMD5, final String failMessage, final boolean failed) { + public MD5Match(final String actualMD5, final String expectedMD5, final String failMessage, final String diffEngineOutput, final boolean failed) { this.actualMD5 = actualMD5; this.expectedMD5 = expectedMD5; this.failMessage = failMessage; + this.diffEngineOutput = diffEngineOutput; this.failed = failed; } } /** - * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL throw an exception if the MD5s are different. - * @param name Name of the test. + * Tests a file MD5 against an expected value, returning an MD5Match object containing a description of the + * match or mismatch. In case of a mismatch, outputs a description of the mismatch to various log files/streams. + * + * NOTE: This function WILL NOT throw an exception if the MD5s are different. + * + * @param testName Name of the test. + * @param testClassName Name of the class that contains the test. * @param resultsFile File to MD5. * @param expectedMD5 Expected MD5 value. * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. - * @return The calculated MD5. + * @return an MD5Match object containing a description of the match/mismatch. Will have its "failed" field set + * to true if there was a mismatch (unless we're using the "parameterize" argument) */ - public MD5Match assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { - final String actualMD5 = testFileMD5(name, resultsFile, expectedMD5, parameterize); - String failMessage = null; + public MD5Match testFileMD5(final String testName, final String testClassName, final File resultsFile, final String expectedMD5, final boolean parameterize) { + final String actualMD5 = calculateFileMD5(resultsFile); + String diffEngineOutput = ""; + String failMessage = ""; boolean failed = false; + // copy md5 to integrationtests + updateMD5Db(actualMD5, resultsFile); + if (parameterize || expectedMD5.equals("")) { - // Don't assert - } else if ( actualMD5.equals(expectedMD5) ) { - //BaseTest.log(String.format(" => %s PASSED (expected=%s)", name, expectedMD5)); - } else { + BaseTest.log(String.format("PARAMETERIZATION: file %s has md5 = %s", resultsFile, actualMD5)); + } else if ( ! expectedMD5.equals(actualMD5) ) { failed = true; - failMessage = String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, actualMD5); + failMessage = String.format("%s:%s has mismatching MD5s: expected=%s observed=%s", testClassName, testName, expectedMD5, actualMD5); + diffEngineOutput = logMD5MismatchAndGetDiffEngineOutput(testName, testClassName, expectedMD5, actualMD5); } - return new MD5Match(actualMD5, expectedMD5, failMessage, failed); + return new MD5Match(actualMD5, expectedMD5, failMessage, diffEngineOutput, failed); } - /** - * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL NOT throw an exception if the MD5s are different. - * @param name Name of the test. - * @param resultsFile File to MD5. - * @param expectedMD5 Expected MD5 value. - * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. - * @return The calculated MD5. + * Calculates the MD5 for the specified file and returns it as a String + * + * @param file file whose MD5 to calculate + * @return file's MD5 in String form + * @throws RuntimeException if the file could not be read */ - public String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { + public String calculateFileMD5( final File file ) { try { - final String filemd5sum = Utils.calcMD5(getBytesFromFile(resultsFile)); - - // - // copy md5 to integrationtests - // - updateMD5Db(filemd5sum, resultsFile); - - if (parameterize || expectedMD5.equals("")) { - BaseTest.log(String.format("PARAMETERIZATION: file %s has md5 = %s", resultsFile, filemd5sum)); - } else { - //System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5)); - //System.out.flush(); - - if ( ! expectedMD5.equals(filemd5sum) ) { - // we are going to fail for real in assertEquals (so we are counted by the testing framework). - // prepare ourselves for the comparison - System.out.printf("##### Test %s is going to fail #####%n", name); - String pathToExpectedMD5File = getMD5FilePath(expectedMD5, "[No DB file found]"); - String pathToFileMD5File = getMD5FilePath(filemd5sum, "[No DB file found]"); - BaseTest.log(String.format("expected %s", expectedMD5)); - BaseTest.log(String.format("calculated %s", filemd5sum)); - BaseTest.log(String.format("diff %s %s", pathToExpectedMD5File, pathToFileMD5File)); - - md5MismatchStream.printf("%s\t%s\t%s%n", expectedMD5, filemd5sum, name); - md5MismatchStream.flush(); - - // inline differences - final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - final PrintStream ps = new PrintStream(baos); - DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(ps, 20, 10, 0, MAX_RAW_DIFFS_TO_SUMMARIZE, false); - boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), MAX_RECORDS_TO_READ, params); - if ( success ) { - final String content = baos.toString(); - BaseTest.log(content); - System.out.printf("Note that the above list is not comprehensive. At most 20 lines of output, and 10 specific differences will be listed. Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n", - pathToExpectedMD5File, pathToFileMD5File); - } - ps.close(); - } - } - - return filemd5sum; - } catch (Exception e) { - throw new RuntimeException("Failed to read bytes from calls file: " + resultsFile, e); + return Utils.calcMD5(getBytesFromFile(file)); + } + catch ( Exception e ) { + throw new RuntimeException("Failed to read bytes from file: " + file + " for MD5 calculation", e); } } + + /** + * Logs a description (including diff engine output) of the MD5 mismatch between the expectedMD5 + * and actualMD5 to a combination of BaseTest.log(), the md5MismatchStream, and stdout, then returns + * the diff engine output. + * + * @param testName name of the test that generated the mismatch + * @param testClassName name of the class containing the test that generated the mismatch + * @param expectedMD5 the MD5 we were expecting from this test + * @param actualMD5 the MD5 we actually calculated from the test output + * @return the diff engine output produced while logging the description of the mismatch + */ + private String logMD5MismatchAndGetDiffEngineOutput(final String testName, final String testClassName, final String expectedMD5, final String actualMD5) { + System.out.printf("##### Test %s:%s is going to fail #####%n", testClassName, testName); + String pathToExpectedMD5File = getMD5FilePath(expectedMD5, "[No DB file found]"); + String pathToFileMD5File = getMD5FilePath(actualMD5, "[No DB file found]"); + BaseTest.log(String.format("expected %s", expectedMD5)); + BaseTest.log(String.format("calculated %s", actualMD5)); + BaseTest.log(String.format("diff %s %s", pathToExpectedMD5File, pathToFileMD5File)); + + md5MismatchStream.printf("%s\t%s\t%s%n", expectedMD5, actualMD5, testName); + md5MismatchStream.flush(); + + // inline differences + String diffEngineOutput = ""; + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final PrintStream ps = new PrintStream(baos); + DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(ps, 20, 10, 0, MAX_RAW_DIFFS_TO_SUMMARIZE, false); + boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), MAX_RECORDS_TO_READ, params); + if ( success ) { + diffEngineOutput = baos.toString(); + BaseTest.log(diffEngineOutput); + System.out.printf("Note that the above list is not comprehensive. At most 20 lines of output, and 10 specific differences will be listed. Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n", + pathToExpectedMD5File, pathToFileMD5File); + } + ps.close(); + + return diffEngineOutput; + } } diff --git a/public/java/test/org/broadinstitute/sting/MD5Mismatch.java b/public/java/test/org/broadinstitute/sting/MD5Mismatch.java index e459a24ce..56acedaf0 100644 --- a/public/java/test/org/broadinstitute/sting/MD5Mismatch.java +++ b/public/java/test/org/broadinstitute/sting/MD5Mismatch.java @@ -35,29 +35,32 @@ import java.util.List; * @since Date created */ public class MD5Mismatch extends Exception { - final List actuals, expecteds; + final List actuals, expecteds, diffEngineOutputs; - public MD5Mismatch(final String actual, final String expected) { - this(Collections.singletonList(actual), Collections.singletonList(expected)); + public MD5Mismatch(final String actual, final String expected, final String diffEngineOutput) { + this(Collections.singletonList(actual), Collections.singletonList(expected), Collections.singletonList(diffEngineOutput)); } - public MD5Mismatch(final List actuals, final List expecteds) { - super(formatMessage(actuals, expecteds)); + public MD5Mismatch(final List actuals, final List expecteds, final List diffEngineOutputs) { + super(formatMessage(actuals, expecteds, diffEngineOutputs)); this.actuals = actuals; this.expecteds = expecteds; + this.diffEngineOutputs = diffEngineOutputs; } @Override public String toString() { - return formatMessage(actuals, expecteds); + return formatMessage(actuals, expecteds, diffEngineOutputs); } - private final static String formatMessage(final List actuals, final List expecteds) { + private static String formatMessage(final List actuals, final List expecteds, final List diffEngineOutputs) { final StringBuilder b = new StringBuilder("MD5 mismatch: "); for ( int i = 0; i < actuals.size(); i++ ) { - if ( i > 1 ) b.append("\t\t\n"); + if ( i >= 1 ) b.append("\t\t\n\n"); b.append("actual ").append(actuals.get(i)); b.append(" expected ").append(expecteds.get(i)); + b.append("\nDiff Engine Output:\n"); + b.append(diffEngineOutputs.get(i)); } return b.toString(); } diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java index dd5a2b0a7..40f1f7bcd 100644 --- a/public/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.variant.bcf2.BCF2Utils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.variant.vcf.VCFCodec; @@ -73,10 +74,6 @@ public class WalkerTest extends BaseTest { return md5DB; } - public MD5DB.MD5Match assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5) { - return getMd5DB().assertMatchingMD5(name, resultsFile, expectedMD5, parameterize()); - } - public void validateOutputBCFIfPossible(final String name, final File resultFile) { final File bcfFile = BCF2Utils.shadowBCF(resultFile); if ( bcfFile != null && bcfFile.exists() ) { @@ -114,15 +111,15 @@ public class WalkerTest extends BaseTest { } } - public List assertMatchingMD5s(final String name, List resultFiles, List expectedMD5s) { + public List assertMatchingMD5s(final String testName, final String testClassName, List resultFiles, List expectedMD5s) { List md5s = new ArrayList(); List fails = new ArrayList(); for (int i = 0; i < resultFiles.size(); i++) { - MD5DB.MD5Match result = assertMatchingMD5(name, resultFiles.get(i), expectedMD5s.get(i)); - validateOutputBCFIfPossible(name, resultFiles.get(i)); + MD5DB.MD5Match result = getMd5DB().testFileMD5(testName, testClassName, resultFiles.get(i), expectedMD5s.get(i), parameterize()); + validateOutputBCFIfPossible(testName, resultFiles.get(i)); if ( ! result.failed ) { - validateOutputIndex(name, resultFiles.get(i)); + validateOutputIndex(testName, resultFiles.get(i)); md5s.add(result.expectedMD5); } else { fails.add(result); @@ -132,14 +129,17 @@ public class WalkerTest extends BaseTest { if ( ! fails.isEmpty() ) { List actuals = new ArrayList(); List expecteds = new ArrayList(); + List diffEngineOutputs = new ArrayList(); + for ( final MD5DB.MD5Match fail : fails ) { actuals.add(fail.actualMD5); expecteds.add(fail.expectedMD5); + diffEngineOutputs.add(fail.diffEngineOutput); logger.warn("Fail: " + fail.failMessage); } - final MD5Mismatch failure = new MD5Mismatch(actuals, expecteds); - Assert.fail(failure.toString(), failure); + final MD5Mismatch failure = new MD5Mismatch(actuals, expecteds, diffEngineOutputs); + Assert.fail(failure.toString()); } return md5s; @@ -170,6 +170,9 @@ public class WalkerTest extends BaseTest { boolean includeImplicitArgs = true; boolean includeShadowBCF = true; + // Name of the test class that created this test case + private Class testClass; + // the default output path for the integration test private File outputFileLocation = null; @@ -183,6 +186,7 @@ public class WalkerTest extends BaseTest { this.args = args; this.nOutputFiles = md5s.size(); this.md5s = md5s; + this.testClass = getCallingTestClass(); } public WalkerTestSpec(String args, List exts, List md5s) { @@ -194,12 +198,22 @@ public class WalkerTest extends BaseTest { this.nOutputFiles = md5s.size(); this.md5s = md5s; this.exts = exts; + this.testClass = getCallingTestClass(); } public WalkerTestSpec(String args, int nOutputFiles, Class expectedException) { this.args = args; this.nOutputFiles = nOutputFiles; this.expectedException = expectedException; + this.testClass = getCallingTestClass(); + } + + private Class getCallingTestClass() { + return JVMUtils.getCallingClass(getClass()); + } + + public String getTestClassName() { + return testClass.getSimpleName(); } public String getArgsWithImplicitArgs() { @@ -306,7 +320,7 @@ public class WalkerTest extends BaseTest { if ( spec.expectsException() ) { // this branch handles the case were we are testing that a walker will fail as expected - return executeTest(name, spec.getOutputFileLocation(), null, tmpFiles, args, spec.getExpectedException()); + return executeTest(name, spec.getTestClassName(), spec.getOutputFileLocation(), null, tmpFiles, args, spec.getExpectedException()); } else { List md5s = new LinkedList(); md5s.addAll(spec.md5s); @@ -316,7 +330,7 @@ public class WalkerTest extends BaseTest { md5s.add(md5); tmpFiles.add(spec.auxillaryFiles.get(md5)); } - return executeTest(name, spec.getOutputFileLocation(), md5s, tmpFiles, args, null); + return executeTest(name, spec.getTestClassName(), spec.getOutputFileLocation(), md5s, tmpFiles, args, null); } } @@ -337,35 +351,37 @@ public class WalkerTest extends BaseTest { /** * execute the test, given the following: - * @param name the name of the test + * @param testName the name of the test + * @param testClassName the name of the class that contains the test * @param md5s the list of md5s * @param tmpFiles the temp file corresponding to the md5 list * @param args the argument list * @param expectedException the expected exception or null * @return a pair of file and string lists */ - private Pair, List> executeTest(String name, File outputFileLocation, List md5s, List tmpFiles, String args, Class expectedException) { - if ( md5s != null ) qcMD5s(name, md5s); + private Pair, List> executeTest(String testName, String testClassName, File outputFileLocation, List md5s, List tmpFiles, String args, Class expectedException) { + if ( md5s != null ) qcMD5s(testName, md5s); if (outputFileLocation != null) args += " -o " + outputFileLocation.getAbsolutePath(); - executeTest(name, args, expectedException); + executeTest(testName, testClassName, args, expectedException); if ( expectedException != null ) { return null; } else { // we need to check MD5s - return new Pair, List>(tmpFiles, assertMatchingMD5s(name, tmpFiles, md5s)); + return new Pair, List>(tmpFiles, assertMatchingMD5s(testName, testClassName, tmpFiles, md5s)); } } /** * execute the test, given the following: - * @param name the name of the test - * @param args the argument list + * @param testName the name of the test + * @param testClassName the name of the class that contains the test + * @param args the argument list * @param expectedException the expected exception or null */ - private void executeTest(String name, String args, Class expectedException) { + private void executeTest(String testName, String testClassName, String args, Class expectedException) { CommandLineGATK instance = new CommandLineGATK(); String[] command = Utils.escapeExpressions(args); @@ -374,7 +390,7 @@ public class WalkerTest extends BaseTest { try { final String now = new SimpleDateFormat("HH:mm:ss").format(new Date()); final String cmdline = Utils.join(" ",command); - System.out.println(String.format("[%s] Executing test %s with GATK arguments: %s", now, name, cmdline)); + System.out.println(String.format("[%s] Executing test %s:%s with GATK arguments: %s", now, testClassName, testName, cmdline)); // also write the command line to the HTML log for convenient follow-up // do the replaceAll so paths become relative to the current BaseTest.log(cmdline.replaceAll(publicTestDirRoot, "").replaceAll(privateTestDirRoot, "")); @@ -388,8 +404,8 @@ public class WalkerTest extends BaseTest { // it's the type we expected //System.out.println(String.format(" => %s PASSED", name)); } else { - final String message = String.format("Test %s expected exception %s but instead got %s with error message %s", - name, expectedException, e.getClass(), e.getMessage()); + final String message = String.format("Test %s:%s expected exception %s but instead got %s with error message %s", + testClassName, testName, expectedException, e.getClass(), e.getMessage()); if ( e.getCause() != null ) { final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final PrintStream ps = new PrintStream(baos); @@ -409,7 +425,7 @@ public class WalkerTest extends BaseTest { if ( expectedException != null ) { if ( ! gotAnException ) // we expected an exception but didn't see it - Assert.fail(String.format("Test %s expected exception %s but none was thrown", name, expectedException.toString())); + Assert.fail(String.format("Test %s:%s expected exception %s but none was thrown", testClassName, testName, expectedException.toString())); } else { if ( CommandLineExecutable.result != 0) { throw new RuntimeException("Error running the GATK with arguments: " + args); diff --git a/public/java/test/org/broadinstitute/sting/utils/classloader/JVMUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/classloader/JVMUtilsUnitTest.java new file mode 100644 index 000000000..6ffd47f37 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/classloader/JVMUtilsUnitTest.java @@ -0,0 +1,75 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.utils.classloader; + +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +public class JVMUtilsUnitTest { + + // Test classes used by the tests for JVMUtils.getCallingClass(): + private static class DummyTestClass1 { + public static Class getCaller( final Class callee ) { + return DummyTestClass2.getCaller(callee); + } + } + + private static class DummyTestClass2 { + public static Class getCaller( final Class callee ) { + return DummyTestClass3.getCaller(callee); + } + } + + private static class DummyTestClass3 { + public static Class getCaller( final Class callee ) { + return JVMUtils.getCallingClass(callee); + } + } + + @DataProvider( name = "TestGetCallingClassDataProvider" ) + public Object[][] getTestCallingClassTestData() { + return new Object[][] { + { DummyTestClass1.class, JVMUtilsUnitTest.class }, + { DummyTestClass2.class, DummyTestClass1.class }, + { DummyTestClass3.class, DummyTestClass2.class } + }; + } + + @Test( dataProvider = "TestGetCallingClassDataProvider" ) + public void testGetCallingClass( final Class callee, final Class expectedCaller ) { + final Class reportedCaller = DummyTestClass1.getCaller(callee); + + Assert.assertEquals(reportedCaller, expectedCaller, + String.format("Wrong calling class returned from DummyTestClass1.getCaller(%s)", callee.getSimpleName())); + } + + @Test( expectedExceptions = IllegalArgumentException.class ) + public void testGetCallingClassCalleeNotFound() { + // Trying to get the calling class of a class not on the runtime stack should produce an exception. + JVMUtils.getCallingClass(DummyTestClass1.class); + } +} diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala index 03b38ffe9..6741e4107 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala @@ -113,7 +113,7 @@ object PipelineTest extends BaseTest with Logging { private def assertMatchingMD5s(name: String, fileMD5s: Traversable[(File, String)], parameterize: Boolean) { var failed = 0 for ((file, expectedMD5) <- fileMD5s) { - val calculatedMD5 = md5DB.testFileMD5(name, file, expectedMD5, parameterize) + val calculatedMD5 = md5DB.testFileMD5(name, "", file, expectedMD5, parameterize).actualMD5 if (!parameterize && expectedMD5 != "" && expectedMD5 != calculatedMD5) failed += 1 } From 2f5ef6db442859d9ac09ef6de1f9b7819f27098e Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 1 May 2013 11:13:58 -0400 Subject: [PATCH 024/116] New faster Smith-Waterman implementation that is edge greedy and assumes that ref and haplotype have same global start/end points. * This version inherits from the original SW implementation so it can use the same matrix creation method. * A bunch of refactoring was done to the original version to clean it up a bit and to have it do the right thing for indels at the edges of the alignments. * Enum added for the overhang strategy to use; added implementation for the INDEL version of this strategy. * Lots of systematic testing added for this implementation. * NOT HOOKED UP TO HAPLOTYPE CALLER YET. Committing so that people can play around with this for now. --- ...EdgeGreedySWPairwiseAlignmentUnitTest.java | 259 ++++++++++++++++++ .../org/broadinstitute/sting/utils/Utils.java | 16 ++ .../GlobalEdgeGreedySWPairwiseAlignment.java | 217 +++++++++++++++ .../smithwaterman/SWPairwiseAlignment.java | 255 ++++++++++++----- .../utils/smithwaterman/SmithWaterman.java | 1 + .../sting/utils/UtilsUnitTest.java | 20 ++ .../smithwaterman/SmithWatermanBenchmark.java | 88 ++++++ 7 files changed, 784 insertions(+), 72 deletions(-) create mode 100644 protected/java/test/org/broadinstitute/sting/utils/smithwaterman/GlobalEdgeGreedySWPairwiseAlignmentUnitTest.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/smithwaterman/GlobalEdgeGreedySWPairwiseAlignment.java create mode 100644 public/java/test/org/broadinstitute/sting/utils/smithwaterman/SmithWatermanBenchmark.java diff --git a/protected/java/test/org/broadinstitute/sting/utils/smithwaterman/GlobalEdgeGreedySWPairwiseAlignmentUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/smithwaterman/GlobalEdgeGreedySWPairwiseAlignmentUnitTest.java new file mode 100644 index 000000000..711a60436 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/utils/smithwaterman/GlobalEdgeGreedySWPairwiseAlignmentUnitTest.java @@ -0,0 +1,259 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.utils.smithwaterman; + +import net.sf.samtools.TextCigarCodec; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class GlobalEdgeGreedySWPairwiseAlignmentUnitTest extends BaseTest { + + private final static boolean DEBUG = false; + + @Test(enabled = !DEBUG) + public void testReadAlignedToRefComplexAlignment() { + final String reference = "AAAGGACTGACTG"; + final String read = "ACTGACTGACTG"; + final GlobalEdgeGreedySWPairwiseAlignment sw = new GlobalEdgeGreedySWPairwiseAlignment(reference.getBytes(), read.getBytes()); + Assert.assertEquals(sw.getCigar().toString(), "1M1D11M"); + } + + @Test(enabled = !DEBUG) + public void testIndelsAtStartAndEnd() { + final String match = "CCCCC"; + final String reference = "AAA" + match; + final String read = match + "GGG"; + final int expectedStart = 0; + final String expectedCigar = "3D5M3I"; + final GlobalEdgeGreedySWPairwiseAlignment sw = new GlobalEdgeGreedySWPairwiseAlignment(reference.getBytes(), read.getBytes()); + Assert.assertEquals(sw.getAlignmentStart2wrt1(), expectedStart); + Assert.assertEquals(sw.getCigar().toString(), expectedCigar); + } + + @Test(enabled = !DEBUG) + public void testDegenerateAlignmentWithIndelsAtBothEnds() { + logger.warn("testDegenerateAlignmentWithIndelsAtBothEnds"); + final String ref = "TGTGTGTGTGTGTGACAGAGAGAGAGAGAGAGAGAGAGAGAGAGA"; + final String alt = "ACAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGA"; + final int expectedStart = 0; + final String expectedCigar = "6I45M"; + final GlobalEdgeGreedySWPairwiseAlignment sw = new GlobalEdgeGreedySWPairwiseAlignment(ref.getBytes(), alt.getBytes(), SWParameterSet.STANDARD_NGS); + Assert.assertEquals(sw.getAlignmentStart2wrt1(), expectedStart); + Assert.assertEquals(sw.getCigar().toString(), expectedCigar); + } + + @Test(enabled = !DEBUG) + public void testAlignReallyLongDeletion() { + final String ref = "CGGCTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGACCTCAGGTGATCCACTCGCCTCGGTCTCCCAAAGTGTTGGGATTACAGGCATGAACCACTGCACCTGGCCTAGTGTTTGGGAAAACTATACTAGGAAAAGAATAGTTGCTTTAAGTCATTCTTTGATTATTCTGAGAATTGGCATATAGCTGCCATTATAACCTACTTTTGCTAAATATAATAATAATAATCATTATTTTTATTTTTTGAGACAGGGTCTTGTTTTGTCACCCCGGCTGGAGTGAAGTGGCGCAATCTCGGCTCACTGCAACCTCCACCTCCGGGTGCAAGCAATTCTCCTGCCTCAGCCTCTTGAGTAGCTAGGATTACAGGCACAAGCCATCATGCCCAGCTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCATGTTGGTCAGGCTGGTCTTGAACTCCTGACCTCAGGT"; + final String alt = "CGGCTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCATGTTGGTCAGGCTGGTCTTGAACTCCTGACCTCAGGT"; + + final GlobalEdgeGreedySWPairwiseAlignment sw = new GlobalEdgeGreedySWPairwiseAlignment(ref.getBytes(), alt.getBytes(), SWParameterSet.STANDARD_NGS); + Assert.assertEquals(sw.getAlignmentStart2wrt1(), 0); + Assert.assertEquals(sw.getCigar().toString(), "47M419D31M"); + } + + public static final Parameters params = new Parameters(20.0, -10.0, -26.0, -1.1); + @DataProvider(name = "SWData") + public Object[][] makeSWData() { + List tests = new ArrayList(); + + // simple cases + tests.add(new Object[]{"A", "C", "1M"}); + tests.add(new Object[]{"AAA", "AAA", "3M"}); + tests.add(new Object[]{"AAA", "AGA", "3M"}); + tests.add(new Object[]{"AAA", "GAA", "3M"}); + tests.add(new Object[]{"AAA", "AAG", "3M"}); + + // small single indels + tests.add(new Object[]{"ACACACAC", "ACACAC", "6M2D"}); + tests.add(new Object[]{"ACACAC", "ACACACAC", "6M2I"}); + tests.add(new Object[]{"XXACACACXX", "XXACACACACXX", "8M2I2M"}); + tests.add(new Object[]{"XXACACACXX", "XXACACXX", "6M2D2M"}); + tests.add(new Object[]{"ACGT", "AACGT", "1I4M"}); + tests.add(new Object[]{"ACGT", "ACCGT", "2M1I2M"}); + tests.add(new Object[]{"ACGT", "ACGGT", "3M1I1M"}); + tests.add(new Object[]{"ACGT", "ACGTT", "4M1I"}); + tests.add(new Object[]{"ACGT", "CGT", "1D3M"}); + tests.add(new Object[]{"ACGT", "AGT", "1M1D2M"}); + tests.add(new Object[]{"ACGT", "ACT", "2M1D1M"}); + tests.add(new Object[]{"ACGT", "ACG", "3M1D"}); + + // mismatches through out the sequences + final String ref = "ACGTAACCGGTT"; + for ( int diff = 0; diff < ref.length(); diff++ ) { + final byte[] altBases = ref.getBytes(); + altBases[diff] = 'N'; + tests.add(new Object[]{ref, new String(altBases), ref.length() + "M"}); + } + for ( int diff1 = 0; diff1 < ref.length(); diff1++ ) { + for ( int diff2 = 0; diff2 < ref.length(); diff2++ ) { + final byte[] altBases = ref.getBytes(); + altBases[diff1] = 'N'; + altBases[diff2] = 'N'; + tests.add(new Object[]{ref, new String(altBases), ref.length() + "M"}); + } + } + + // prefixes and suffixes matching + final String totalPrefix = "ACG"; + final String totalSuffix = "GCT"; + for ( int prefixSize = 0; prefixSize < totalPrefix.length(); prefixSize++) { + for ( int suffixSize = 0; suffixSize < totalPrefix.length(); suffixSize++) { + if ( prefixSize + suffixSize == 0 ) + continue; + for ( int indelSize = 1; indelSize < 50; indelSize++ ) { + final String prefix = totalPrefix.substring(0, prefixSize); + final String suffix = totalSuffix.substring(0, suffixSize); + final String insert = Utils.dupString("N", indelSize); + tests.add(new Object[]{prefix + suffix, prefix + insert + suffix, prefix.length() + "M" + indelSize + "I" + suffix.length() + "M"}); + tests.add(new Object[]{prefix + insert + suffix, prefix + suffix, prefix.length() + "M" + indelSize + "D" + suffix.length() + "M"}); + } + } + } + + // larger indels with prefixes/suffixes + tests.add(new Object[]{"ACTGTTTTGAACATCAGTTATTTTAAACTTTTAAGTTGTTAGCACAGCAAAAGCAACAAAATTCTAAGTGCAGTAATCACTTTACTGCGTGGTCATATGAAATCAAGGCAATGTTATGAGTATTACTGGAAAGCTGGACAGAGTAACGGGAAAAGTGACTAAAACTATGC", "CCTGTTTTGAACATCAGTTATTTTAAACTTTTAAGTTGTTAGCACAGCAAAAGCAACAAAATTCTAAGTGCAGTAATCACTTTACTGCGTGGTCATATGAAATCAAGGCAATGTTATGAGTATTACTGGAAAGCTGGACAGAGTAACGGGAAAAGTGACT", "160M10D"}); + tests.add(new Object[]{"LLLLLTATTAAGTAGTGCTCTATGTTGTCAACTAATTTATTTCCCATTTCAAACATTAGTTGACATGTTTTCATTTCTCTTTTGGAAGGAAACAACTAAATATGTTATCAATCCATCATTTACTTGTACAATAAATAAAGTTCTAAATCACTGCACAGTGTAAAATGGCAAATAGACTTCCCCATAACACAAAGCCATCCTGAAAAGTTTTGTTCATTTTAGAAGRRRRR", "LLLLLARRRRR", "5M219D6M"}); + tests.add(new Object[]{"LLLLLTATTTTTTRRRRR", "LLLLLARRRRR", "5M7D6M"}); + + // systematic testing + for ( final int forwardMatches : Arrays.asList(0, 1, 5, 10)) { + for ( final int forwardMismatches : Arrays.asList(0, 1, 2)) { + for ( final int middleMatches : Arrays.asList(0, 1, 5, 10)) { + for ( final int delSize : Arrays.asList(0, 1, 2, 3 )) { + for ( final int insSize : Arrays.asList(0, 1, 2, 3 )) { + for ( final int reverseMismatches : Arrays.asList(0, 1, 2)) { + for ( final int reverseMatches : Arrays.asList(0, 1, 5, 10)) { + // if there is an insertion and deletion, they should cancel each other out (at least partially) + final int overlap = Math.min(delSize, insSize); + final int myDelSize = delSize - overlap; + final int myInsSize = insSize - overlap; + + // this case is too difficult to create a CIGAR for because SW will (legitimately) prefer to switch the indel and mismatches + final int totalMismatches = forwardMismatches + reverseMismatches; + if ( (myDelSize > 0 || myInsSize > 0 ) && (totalMismatches >= myDelSize || totalMismatches >= myInsSize) ) + continue; + + final StringBuilder refBuilder = new StringBuilder(); + final StringBuilder altBuilder = new StringBuilder(); + final StringBuilder cigarBuilder = new StringBuilder(); + + refBuilder.append(Utils.dupString('A', forwardMatches + forwardMismatches + middleMatches)); + altBuilder.append(Utils.dupString('A', forwardMatches)); + altBuilder.append(Utils.dupString('C', forwardMismatches)); + altBuilder.append(Utils.dupString('A', middleMatches)); + cigarBuilder.append(forwardMatches + forwardMismatches + middleMatches); + cigarBuilder.append("M"); + + if ( myDelSize > 0 ) { + refBuilder.append(Utils.dupString('G', myDelSize)); + cigarBuilder.append(myDelSize); + cigarBuilder.append("D"); + } + if ( myInsSize > 0 ) { + altBuilder.append(Utils.dupString('T', myInsSize)); + cigarBuilder.append(myInsSize); + cigarBuilder.append("I"); + } + if ( overlap > 0 ) { + refBuilder.append(Utils.dupString('G', overlap)); + altBuilder.append(Utils.dupString('T', overlap)); + cigarBuilder.append(overlap); + cigarBuilder.append("M"); + } + if ( delSize > 0 || insSize > 0 ) { + refBuilder.append(Utils.dupString('A', middleMatches)); + altBuilder.append(Utils.dupString('A', middleMatches)); + cigarBuilder.append(middleMatches); + cigarBuilder.append("M"); + } + + refBuilder.append(Utils.dupString('A', reverseMismatches + reverseMatches)); + altBuilder.append(Utils.dupString('C', reverseMismatches)); + altBuilder.append(Utils.dupString('A', reverseMatches)); + cigarBuilder.append(reverseMismatches + reverseMatches); + cigarBuilder.append("M"); + + if ( refBuilder.length() > 0 && altBuilder.length() > 0 ) + tests.add(new Object[]{refBuilder.toString(), altBuilder.toString(), cigarBuilder.toString()}); + } + } + } + } + } + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "SWData", enabled = !DEBUG) + public void testSW(final String seq1, final String seq2, final String expectedCigar) { + final GlobalEdgeGreedySWPairwiseAlignment alignment = new GlobalEdgeGreedySWPairwiseAlignment(seq1.getBytes(), seq2.getBytes(), new Parameters(5.0, -5.0, -25.0, -1.0)); + Assert.assertEquals(alignment.getCigar(), AlignmentUtils.consolidateCigar(TextCigarCodec.getSingleton().decode(expectedCigar))); + } + + /** + * For debugging purposes only + */ + @Test(enabled = DEBUG) + public void testDebugging() { + final String ref = "A"; + final String alt = "C"; + + final GlobalEdgeGreedySWPairwiseAlignment sw = new GlobalEdgeGreedySWPairwiseAlignment(ref.getBytes(), alt.getBytes(), new Parameters(5.0, -5.0, -25.0, -1.0)); + Assert.assertEquals(sw.getCigar().toString(), "1M"); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index 5b2bba73c..73a538ee5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -789,4 +789,20 @@ public class Utils { } return end; } + + /** + * Trim any number of bases from the front and/or back of an array + * + * @param seq the sequence to trim + * @param trimFromFront how much to trim from the front + * @param trimFromBack how much to trim from the back + * @return a non-null array; can be the original array (i.e. not a copy) + */ + public static byte[] trimArray(final byte[] seq, final int trimFromFront, final int trimFromBack) { + if ( trimFromFront + trimFromBack > seq.length ) + throw new IllegalArgumentException("trimming total is larger than the original array"); + + // don't perform array copies if we need to copy everything anyways + return ( trimFromFront == 0 && trimFromBack == 0 ) ? seq : Arrays.copyOfRange(seq, trimFromFront, seq.length - trimFromBack); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/smithwaterman/GlobalEdgeGreedySWPairwiseAlignment.java b/public/java/src/org/broadinstitute/sting/utils/smithwaterman/GlobalEdgeGreedySWPairwiseAlignment.java new file mode 100644 index 000000000..27ead2e48 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/smithwaterman/GlobalEdgeGreedySWPairwiseAlignment.java @@ -0,0 +1,217 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.utils.smithwaterman; + +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; + +import java.util.*; + +/** + * Pairwise discrete Smith-Waterman alignment with an edge greedy implementation + * + * ************************************************************************ + * **** IMPORTANT NOTE: **** + * **** This class assumes that all bytes come from UPPERCASED chars! **** + * ************************************************************************ + * + * User: ebanks + */ +public final class GlobalEdgeGreedySWPairwiseAlignment extends SWPairwiseAlignment { + + private final static boolean DEBUG_MODE = false; + + /** + * Create a new greedy SW pairwise aligner + * + * @param reference the reference sequence we want to align + * @param alternate the alternate sequence we want to align + * @param parameters the SW parameters to use + */ + public GlobalEdgeGreedySWPairwiseAlignment(final byte[] reference, final byte[] alternate, final Parameters parameters) { + super(reference, alternate, parameters); + } + + /** + * Create a new SW pairwise aligner + * + * After creating the object the two sequences are aligned with an internal call to align(seq1, seq2) + * + * @param reference the reference sequence we want to align + * @param alternate the alternate sequence we want to align + * @param namedParameters the named parameter set to get our parameters from + */ + public GlobalEdgeGreedySWPairwiseAlignment(final byte[] reference, final byte[] alternate, final SWParameterSet namedParameters) { + this(reference, alternate, namedParameters.parameters); + } + + /** + * @see #GlobalEdgeGreedySWPairwiseAlignment(byte[], byte[], SWParameterSet) with original default parameters + */ + public GlobalEdgeGreedySWPairwiseAlignment(byte[] reference, byte[] alternate) { + this(reference, alternate, SWParameterSet.ORIGINAL_DEFAULT); + } + + /** + * Aligns the alternate sequence to the reference sequence + * + * @param reference ref sequence + * @param alternate alt sequence + */ + @Override + protected void align(final byte[] reference, final byte[] alternate) { + if ( reference == null || reference.length == 0 ) + throw new IllegalArgumentException("Non-null, non-empty reference sequences are required for the Smith-Waterman calculation"); + if ( alternate == null || alternate.length == 0 ) + throw new IllegalArgumentException("Non-null, non-empty alternate sequences are required for the Smith-Waterman calculation"); + + final int forwardEdgeMatch = Utils.longestCommonPrefix(reference, alternate, Integer.MAX_VALUE); + + // edge case: one sequence is a strict prefix of the other + if ( forwardEdgeMatch == reference.length || forwardEdgeMatch == alternate.length ) { + alignmentResult = new SWPairwiseAlignmentResult(makeCigarForStrictPrefixAndSuffix(reference, alternate, forwardEdgeMatch, 0), 0); + return; + } + + int reverseEdgeMatch = Utils.longestCommonSuffix(reference, alternate, Integer.MAX_VALUE); + + // edge case: one sequence is a strict suffix of the other + if ( reverseEdgeMatch == reference.length || reverseEdgeMatch == alternate.length ) { + alignmentResult = new SWPairwiseAlignmentResult(makeCigarForStrictPrefixAndSuffix(reference, alternate, 0, reverseEdgeMatch), 0); + return; + } + + final int sizeOfRefToAlign = reference.length - forwardEdgeMatch - reverseEdgeMatch; + final int sizeOfAltToAlign = alternate.length - forwardEdgeMatch - reverseEdgeMatch; + + // edge case: one sequence is a strict subset of the other accounting for both prefix and suffix + final int minSizeToAlign = Math.min(sizeOfRefToAlign, sizeOfAltToAlign); + if ( minSizeToAlign < 0 ) + reverseEdgeMatch += minSizeToAlign; + if ( sizeOfRefToAlign <= 0 || sizeOfAltToAlign <= 0 ) { + alignmentResult = new SWPairwiseAlignmentResult(makeCigarForStrictPrefixAndSuffix(reference, alternate, forwardEdgeMatch, reverseEdgeMatch), 0); + return; + } + + final byte[] refToAlign = Utils.trimArray(reference, forwardEdgeMatch, reverseEdgeMatch); + final byte[] altToAlign = Utils.trimArray(alternate, forwardEdgeMatch, reverseEdgeMatch); + + final double[] sw = new double[(sizeOfRefToAlign+1)*(sizeOfAltToAlign+1)]; + if ( keepScoringMatrix ) SW = sw; + final int[] btrack = new int[(sizeOfRefToAlign+1)*(sizeOfAltToAlign+1)]; + + calculateMatrix(refToAlign, altToAlign, sw, btrack, OVERHANG_STRATEGY.INDEL); + + if ( DEBUG_MODE ) { + System.out.println(new String(refToAlign) + " vs. " + new String(altToAlign)); + debugMatrix(sw, sizeOfRefToAlign+1, sizeOfAltToAlign+1); + System.out.println("----"); + debugMatrix(btrack, sizeOfRefToAlign + 1, sizeOfAltToAlign + 1); + System.out.println(); + } + + alignmentResult = calculateCigar(forwardEdgeMatch, reverseEdgeMatch, sizeOfRefToAlign, sizeOfAltToAlign, sw, btrack); + } + + private void debugMatrix(final double[] matrix, final int dim1, final int dim2) { + for ( int i = 0; i < dim1; i++ ) { + for ( int j = 0; j < dim2; j++ ) + System.out.print(String.format("%.1f ", matrix[i * dim2 + j])); + System.out.println(); + } + } + + private void debugMatrix(final int[] matrix, final int dim1, final int dim2) { + for ( int i = 0; i < dim1; i++ ) { + for ( int j = 0; j < dim2; j++ ) + System.out.print(matrix[i*dim2 + j] + " "); + System.out.println(); + } + } + + /** + * Creates a CIGAR for the case where the prefix/suffix match combination encompasses an entire sequence + * + * @param reference the reference sequence + * @param alternate the alternate sequence + * @param matchingPrefix the prefix match size + * @param matchingSuffix the suffix match size + * @return non-null CIGAR + */ + private Cigar makeCigarForStrictPrefixAndSuffix(final byte[] reference, final byte[] alternate, final int matchingPrefix, final int matchingSuffix) { + + final List result = new ArrayList(); + + // edge case: no D or I element + if ( reference.length == alternate.length ) { + result.add(makeElement(State.MATCH, matchingPrefix + matchingSuffix)); + } else { + // add the first M element + if ( matchingPrefix > 0 ) + result.add(makeElement(State.MATCH, matchingPrefix)); + + // add the D or I element + if ( alternate.length > reference.length ) + result.add(makeElement(State.INSERTION, alternate.length - reference.length)); + else // if ( reference.length > alternate.length ) + result.add(makeElement(State.DELETION, reference.length - alternate.length)); + + // add the last M element + if ( matchingSuffix > 0 ) + result.add(makeElement(State.MATCH, matchingSuffix)); + } + + return new Cigar(result); + } + + /** + * Calculates the CIGAR for the alignment from the back track matrix + * + * @param matchingPrefix the prefix match size + * @param matchingSuffix the suffix match size + * @param refLength length of the reference sequence + * @param altLength length of the alternate sequence + * @param sw the Smith-Waterman matrix to use + * @param btrack the back track matrix to use + * @return non-null SWPairwiseAlignmentResult object + */ + protected SWPairwiseAlignmentResult calculateCigar(final int matchingPrefix, final int matchingSuffix, + final int refLength, final int altLength, + final double[] sw, final int[] btrack) { + + final SWPairwiseAlignmentResult SW_result = calculateCigar(refLength, altLength, sw, btrack, OVERHANG_STRATEGY.INDEL); + + final LinkedList lce = new LinkedList(SW_result.cigar.getCigarElements()); + if ( matchingPrefix > 0 ) + lce.addFirst(makeElement(State.MATCH, matchingPrefix)); + if ( matchingSuffix > 0 ) + lce.addLast(makeElement(State.MATCH, matchingSuffix)); + + return new SWPairwiseAlignmentResult(AlignmentUtils.consolidateCigar(new Cigar(lce)), 0); + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SWPairwiseAlignment.java b/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SWPairwiseAlignment.java index 78f81ec5e..84c33d4a5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SWPairwiseAlignment.java +++ b/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SWPairwiseAlignment.java @@ -45,19 +45,43 @@ import java.util.*; * Date: Mar 23, 2009 * Time: 1:54:54 PM */ -public final class SWPairwiseAlignment implements SmithWaterman { - private int alignment_offset; // offset of s2 w/respect to s1 - private Cigar alignmentCigar; +public class SWPairwiseAlignment implements SmithWaterman { - private final Parameters parameters; + protected SWPairwiseAlignmentResult alignmentResult; - private static final int MSTATE = 0; - private static final int ISTATE = 1; - private static final int DSTATE = 2; - private static final int CLIP = 3; + protected final Parameters parameters; + + /** + * The state of a trace step through the matrix + */ + protected enum State { + MATCH, + INSERTION, + DELETION, + CLIP + } + + /** + * What strategy should we use when the best path does not start/end at the corners of the matrix? + */ + public enum OVERHANG_STRATEGY { + /* + * Add softclips for the overhangs + */ + SOFTCLIP, + /* + * Treat the overhangs as proper insertions/deletions + */ + INDEL, + /* + * Just ignore the overhangs + */ + IGNORE + } protected static boolean cutoff = false; - private boolean doSoftClipping = true; + + protected OVERHANG_STRATEGY overhang_strategy = OVERHANG_STRATEGY.SOFTCLIP; /** * The SW scoring matrix, stored for debugging purposes if keepScoringMatrix is true @@ -103,10 +127,6 @@ public final class SWPairwiseAlignment implements SmithWaterman { this.parameters = parameters; } - protected void setDoSoftClipping(final boolean doSoftClipping) { - this.doSoftClipping = doSoftClipping; - } - /** * Create a new SW pairwise aligner * @@ -125,42 +145,93 @@ public final class SWPairwiseAlignment implements SmithWaterman { } @Override - public Cigar getCigar() { return alignmentCigar ; } + public Cigar getCigar() { return alignmentResult.cigar ; } @Override - public int getAlignmentStart2wrt1() { return alignment_offset; } + public int getAlignmentStart2wrt1() { return alignmentResult.alignment_offset; } - public void align(final byte[] a, final byte[] b) { - final int n = a.length; - final int m = b.length; + /** + * Aligns the alternate sequence to the reference sequence + * + * @param reference ref sequence + * @param alternate alt sequence + */ + protected void align(final byte[] reference, final byte[] alternate) { + if ( reference == null || reference.length == 0 || alternate == null || alternate.length == 0 ) + throw new IllegalArgumentException("Non-null, non-empty sequences are required for the Smith-Waterman calculation"); + + final int n = reference.length; + final int m = alternate.length; double [] sw = new double[(n+1)*(m+1)]; if ( keepScoringMatrix ) SW = sw; int [] btrack = new int[(n+1)*(m+1)]; - calculateMatrix(a, b, sw, btrack); - calculateCigar(n, m, sw, btrack); // length of the segment (continuous matches, insertions or deletions) + calculateMatrix(reference, alternate, sw, btrack); + alignmentResult = calculateCigar(n, m, sw, btrack, overhang_strategy); // length of the segment (continuous matches, insertions or deletions) } + /** + * Calculates the SW matrices for the given sequences + * + * @param reference ref sequence + * @param alternate alt sequence + * @param sw the Smith-Waterman matrix to populate + * @param btrack the back track matrix to populate + */ + protected void calculateMatrix(final byte[] reference, final byte[] alternate, double[] sw, int[] btrack) { + calculateMatrix(reference, alternate, sw, btrack, overhang_strategy); + } - private void calculateMatrix(final byte[] a, final byte[] b, double [] sw, int [] btrack ) { - final int n = a.length+1; - final int m = b.length+1; + /** + * Calculates the SW matrices for the given sequences + * + * @param reference ref sequence + * @param alternate alt sequence + * @param sw the Smith-Waterman matrix to populate + * @param btrack the back track matrix to populate + * @param overhang_strategy the strategy to use for dealing with overhangs + */ + protected void calculateMatrix(final byte[] reference, final byte[] alternate, double[] sw, int[] btrack, final OVERHANG_STRATEGY overhang_strategy) { + if ( reference.length == 0 || alternate.length == 0 ) + throw new IllegalArgumentException("Non-null, non-empty sequences are required for the Smith-Waterman calculation"); + + final int n = reference.length+1; + final int m = alternate.length+1; //final double MATRIX_MIN_CUTOFF=-1e100; // never let matrix elements drop below this cutoff final double MATRIX_MIN_CUTOFF; // never let matrix elements drop below this cutoff if ( cutoff ) MATRIX_MIN_CUTOFF = 0.0; else MATRIX_MIN_CUTOFF = -1e100; - double [] best_gap_v = new double[m+1]; - Arrays.fill(best_gap_v,-1.0e40); - int [] gap_size_v = new int[m+1]; - double [] best_gap_h = new double[n+1]; + final double[] best_gap_v = new double[m+1]; + Arrays.fill(best_gap_v, -1.0e40); + final int[] gap_size_v = new int[m+1]; + final double[] best_gap_h = new double[n+1]; Arrays.fill(best_gap_h,-1.0e40); - int [] gap_size_h = new int[n+1]; + final int[] gap_size_h = new int[n+1]; + + // we need to initialize the SW matrix with gap penalties if we want to keep track of indels at the edges of alignments + if ( overhang_strategy == OVERHANG_STRATEGY.INDEL ) { + // initialize the first row + sw[1] = parameters.w_open; + double currentValue = parameters.w_open; + for ( int i = 2; i < m; i++ ) { + currentValue += parameters.w_extend; + sw[i] = currentValue; + } + + // initialize the first column + sw[m] = parameters.w_open; + currentValue = parameters.w_open; + for ( int i = 2; i < n; i++ ) { + currentValue += parameters.w_extend; + sw[i*m] = currentValue; + } + } // build smith-waterman matrix and keep backtrack info: for ( int i = 1, row_offset_1 = 0 ; i < n ; i++ ) { // we do NOT update row_offset_1 here, see comment at the end of this outer loop - byte a_base = a[i-1]; // letter in a at the current pos + byte a_base = reference[i-1]; // letter in a at the current pos final int row_offset = row_offset_1 + m; @@ -172,10 +243,10 @@ public final class SWPairwiseAlignment implements SmithWaterman { // data_offset_1 is linearized offset of element [i-1][j-1] - final byte b_base = b[j-1]; // letter in b at the current pos + final byte b_base = alternate[j-1]; // letter in b at the current pos // in other words, step_diag = sw[i-1][j-1] + wd(a_base,b_base); - double step_diag = sw[data_offset_1] + wd(a_base,b_base); + final double step_diag = sw[data_offset_1] + wd(a_base,b_base); // optimized "traversal" of all the matrix cells above the current one (i.e. traversing // all 'step down' events that would end in the current cell. The optimized code @@ -251,65 +322,92 @@ public final class SWPairwiseAlignment implements SmithWaterman { } } + /* + * Class to store the result of calculating the CIGAR from the back track matrix + */ + protected final class SWPairwiseAlignmentResult { + public final Cigar cigar; + public final int alignment_offset; + public SWPairwiseAlignmentResult(final Cigar cigar, final int alignment_offset) { + this.cigar = cigar; + this.alignment_offset = alignment_offset; + } + } - private void calculateCigar(int n, int m, double [] sw, int [] btrack) { + /** + * Calculates the CIGAR for the alignment from the back track matrix + * + * @param refLength length of the reference sequence + * @param altLength length of the alternate sequence + * @param sw the Smith-Waterman matrix to use + * @param btrack the back track matrix to use + * @param overhang_strategy the strategy to use for dealing with overhangs + * @return non-null SWPairwiseAlignmentResult object + */ + protected SWPairwiseAlignmentResult calculateCigar(final int refLength, final int altLength, final double[] sw, final int[] btrack, final OVERHANG_STRATEGY overhang_strategy) { // p holds the position we start backtracking from; we will be assembling a cigar in the backwards order int p1 = 0, p2 = 0; double maxscore = Double.NEGATIVE_INFINITY; // sw scores are allowed to be negative int segment_length = 0; // length of the segment (continuous matches, insertions or deletions) - // look for largest score. we use >= combined with the traversal direction - // to ensure that if two scores are equal, the one closer to diagonal gets picked - for ( int i = 1, data_offset = m+1+m ; i < n+1 ; i++, data_offset += (m+1) ) { - // data_offset is the offset of [i][m] - if ( sw[data_offset] >= maxscore ) { - p1 = i; p2 = m ; maxscore = sw[data_offset]; + // if we want to consider overhangs as legitimate operators, then just start from the corner of the matrix + if ( overhang_strategy == OVERHANG_STRATEGY.INDEL ) { + p1 = refLength; + p2 = altLength; + } else { + // look for largest score. we use >= combined with the traversal direction + // to ensure that if two scores are equal, the one closer to diagonal gets picked + for ( int i = 1, data_offset = altLength+1+altLength ; i < refLength+1 ; i++, data_offset += (altLength+1) ) { + // data_offset is the offset of [i][m] + if ( sw[data_offset] >= maxscore ) { + p1 = i; p2 = altLength ; maxscore = sw[data_offset]; + } } - } - for ( int j = 1, data_offset = n*(m+1)+1 ; j < m+1 ; j++, data_offset++ ) { - // data_offset is the offset of [n][j] - if ( sw[data_offset] > maxscore || sw[data_offset] == maxscore && Math.abs(n-j) < Math.abs(p1 - p2)) { - p1 = n; - p2 = j ; - maxscore = sw[data_offset]; - segment_length = m - j ; // end of sequence 2 is overhanging; we will just record it as 'M' segment + for ( int j = 1, data_offset = refLength*(altLength+1)+1 ; j < altLength+1 ; j++, data_offset++ ) { + // data_offset is the offset of [n][j] + if ( sw[data_offset] > maxscore || sw[data_offset] == maxscore && Math.abs(refLength-j) < Math.abs(p1 - p2)) { + p1 = refLength; + p2 = j ; + maxscore = sw[data_offset]; + segment_length = altLength - j ; // end of sequence 2 is overhanging; we will just record it as 'M' segment + } } } List lce = new ArrayList(5); - if ( segment_length > 0 && doSoftClipping ) { - lce.add(makeElement(CLIP, segment_length)); + if ( segment_length > 0 && overhang_strategy == OVERHANG_STRATEGY.SOFTCLIP ) { + lce.add(makeElement(State.CLIP, segment_length)); segment_length = 0; } // we will be placing all insertions and deletions into sequence b, so the states are named w/regard // to that sequence - int state = MSTATE; + State state = State.MATCH; - int data_offset = p1*(m+1)+p2; // offset of element [p1][p2] + int data_offset = p1*(altLength+1)+p2; // offset of element [p1][p2] do { int btr = btrack[data_offset]; - int new_state; + State new_state; int step_length = 1; if ( btr > 0 ) { - new_state = DSTATE; + new_state = State.DELETION; step_length = btr; } else if ( btr < 0 ) { - new_state = ISTATE; + new_state = State.INSERTION; step_length = (-btr); - } else new_state = MSTATE; // and step_length =1, already set above + } else new_state = State.MATCH; // and step_length =1, already set above // move to next best location in the sw matrix: switch( new_state ) { - case MSTATE: data_offset -= (m+2); p1--; p2--; break; // move back along the diag in the sw matrix - case ISTATE: data_offset -= step_length; p2 -= step_length; break; // move left - case DSTATE: data_offset -= (m+1)*step_length; p1 -= step_length; break; // move up + case MATCH: data_offset -= (altLength+2); p1--; p2--; break; // move back along the diag in the sw matrix + case INSERTION: data_offset -= step_length; p2 -= step_length; break; // move left + case DELETION: data_offset -= (altLength+1)*step_length; p1 -= step_length; break; // move up } // now let's see if the state actually changed: @@ -320,7 +418,7 @@ public final class SWPairwiseAlignment implements SmithWaterman { segment_length = step_length; state = new_state; } -// next condition is equivalent to while ( sw[p1][p2] != 0 ) (with modified p1 and/or p2: + // next condition is equivalent to while ( sw[p1][p2] != 0 ) (with modified p1 and/or p2: } while ( p1 > 0 && p2 > 0 ); // post-process the last segment we are still keeping; @@ -331,28 +429,41 @@ public final class SWPairwiseAlignment implements SmithWaterman { // last 3 bases of the read overlap with/align to the ref), the cigar will be still 5M if // DO_SOFTCLIP is false or 2S3M if DO_SOFTCLIP is true. // The consumers need to check for the alignment offset and deal with it properly. - if (doSoftClipping ) { + final int alignment_offset; + if ( overhang_strategy == OVERHANG_STRATEGY.SOFTCLIP ) { lce.add(makeElement(state, segment_length)); - if ( p2> 0 ) lce.add(makeElement(CLIP, p2)); - alignment_offset = p1 ; - } else { + if ( p2 > 0 ) lce.add(makeElement(State.CLIP, p2)); + alignment_offset = p1; + } else if ( overhang_strategy == OVERHANG_STRATEGY.IGNORE ) { lce.add(makeElement(state, segment_length + p2)); alignment_offset = p1 - p2; + } else { // overhang_strategy == OVERHANG_STRATEGY.INDEL + + // take care of the actual alignment + lce.add(makeElement(state, segment_length)); + + // take care of overhangs at the beginning of the alignment + if ( p1 > 0 ) + lce.add(makeElement(State.DELETION, p1)); + else if ( p2 > 0 ) + lce.add(makeElement(State.INSERTION, p2)); + + alignment_offset = 0; } Collections.reverse(lce); - alignmentCigar = AlignmentUtils.consolidateCigar(new Cigar(lce)); + return new SWPairwiseAlignmentResult(AlignmentUtils.consolidateCigar(new Cigar(lce)), alignment_offset); } - private CigarElement makeElement(int state, int segment_length) { - CigarOperator o = null; - switch(state) { - case MSTATE: o = CigarOperator.M; break; - case ISTATE: o = CigarOperator.I; break; - case DSTATE: o = CigarOperator.D; break; - case CLIP: o = CigarOperator.S; break; + protected CigarElement makeElement(final State state, final int length) { + CigarOperator op = null; + switch (state) { + case MATCH: op = CigarOperator.M; break; + case INSERTION: op = CigarOperator.I; break; + case DELETION: op = CigarOperator.D; break; + case CLIP: op = CigarOperator.S; break; } - return new CigarElement(segment_length,o); + return new CigarElement(length, op); } private double wd(byte x, byte y) { @@ -375,7 +486,7 @@ public final class SWPairwiseAlignment implements SmithWaterman { Cigar cigar = getCigar(); - if ( ! doSoftClipping ) { + if ( overhang_strategy != OVERHANG_STRATEGY.SOFTCLIP ) { // we need to go through all the hassle below only if we do not do softclipping; // otherwise offset is never negative diff --git a/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SmithWaterman.java b/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SmithWaterman.java index 44fd889c5..3a8afca8c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SmithWaterman.java +++ b/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SmithWaterman.java @@ -42,6 +42,7 @@ import net.sf.samtools.Cigar; * Time: 8:24 AM */ public interface SmithWaterman { + /** * Get the cigar string for the alignment of this SmithWaterman class * @return a non-null cigar diff --git a/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java index 3c68b8753..0a6f9898e 100644 --- a/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java @@ -216,4 +216,24 @@ public class UtilsUnitTest extends BaseTest { } } } + + @DataProvider(name = "trim") + public Object[][] createTrimTestData() { + List tests = new ArrayList(); + + final String s = "AAAA"; + for ( int front = 0; front < s.length(); front++ ) { + for ( int back = 0; back < s.length(); back++ ) { + if ( front + back <= s.length() ) + tests.add(new Object[]{s, front, back}); + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "trim", enabled = true) + public void testTrim(final String s, final int frontTrim, final int backTrim) { + Assert.assertEquals(s.length() - frontTrim - backTrim, Utils.trimArray(s.getBytes(), frontTrim, backTrim).length); + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/smithwaterman/SmithWatermanBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/smithwaterman/SmithWatermanBenchmark.java new file mode 100644 index 000000000..ee8f411bf --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/smithwaterman/SmithWatermanBenchmark.java @@ -0,0 +1,88 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.utils.smithwaterman; + +import com.google.caliper.Param; +import com.google.caliper.SimpleBenchmark; +import org.broadinstitute.sting.utils.Utils; + +/** + * Caliper microbenchmark of parsing a VCF file + */ +public class SmithWatermanBenchmark extends SimpleBenchmark { + + @Param({"Original", "Greedy"}) + String version; // set automatically by framework + + @Param({"10", "50", "100", "500"}) + int sizeOfMiddleRegion; // set automatically by framework + + @Param({"10", "50", "100", "500"}) + int sizeOfEndRegions; // set automatically by framework + + String refString; + String hapString; + + @Override protected void setUp() { + final StringBuilder ref = new StringBuilder(); + final StringBuilder hap = new StringBuilder(); + + ref.append(Utils.dupString('A', sizeOfEndRegions)); + hap.append(Utils.dupString('A', sizeOfEndRegions)); + + // introduce a SNP + ref.append("X"); + hap.append("Y"); + + ref.append(Utils.dupString('A', sizeOfMiddleRegion)); + hap.append(Utils.dupString('A', sizeOfMiddleRegion)); + + // introduce a SNP + ref.append("X"); + hap.append("Y"); + + ref.append(Utils.dupString('A', sizeOfEndRegions)); + hap.append(Utils.dupString('A', sizeOfEndRegions)); + + refString = ref.toString(); + hapString = hap.toString(); + } + + public void timeSW(int rep) { + for ( int i = 0; i < rep; i++ ) { + final SmithWaterman sw; + if ( version.equals("Greedy") ) + sw = new GlobalEdgeGreedySWPairwiseAlignment(refString.getBytes(), hapString.getBytes()); + else + sw = new SWPairwiseAlignment(refString.getBytes(), hapString.getBytes()); + sw.getCigar(); + } + } + + public static void main(String[] args) { + com.google.caliper.Runner.main(SmithWatermanBenchmark.class, args); + } +} From b4f482a4212984b9b2927063261917f50b76634f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 15 Apr 2013 08:20:28 -0400 Subject: [PATCH 025/116] NanoScheduled ActiveRegionTraversal and HaplotypeCaller -- Made CountReadsInActiveRegions Nano schedulable, confirming identical results for linear and nano results -- Made Haplotype NanoScheduled, requiring misc. changes in the map/reduce type so that the map() function returns a List and reduce actually prints out the results to disk -- Tests for NanoScheduling -- CountReadsInActiveRegionsIntegrationTest now does NCT 1, 2, 4 with CountReadsInActiveRegions -- HaplotypeCallerParallelIntegrationTest does NCT 1,2,4 calling on 100kb of PCR free data -- Some misc. code cleanup of HaplotypeCaller -- Analysis scripts to assess performance of nano scheduled HC -- In order to make the haplotype caller thread safe we needed to use an AtomicInteger for the class-specific static ID counter in SeqVertex and MultiDebrujinVertex, avoiding a race condition where multiple new Vertex() could end up with the same id. --- .../haplotypecaller/HaplotypeCaller.java | 45 ++-- .../LikelihoodCalculationEngine.java | 40 +-- .../haplotypecaller/graphs/SeqVertex.java | 8 +- .../readthreading/MultiDeBruijnVertex.java | 7 +- ...aplotypeCallerParallelIntegrationTest.java | 79 ++++++ .../sting/gatk/executive/MicroScheduler.java | 2 +- .../traversals/TraverseActiveRegions.java | 228 +++++++++++++----- .../TraverseActiveRegionsUnitTest.java | 16 +- 8 files changed, 306 insertions(+), 119 deletions(-) create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 33d1104bc..f065a0d7d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -139,7 +139,7 @@ import java.util.*; @ActiveRegionTraversalParameters(extension=100, maxRegion=300) @ReadFilters({HCMappingQualityFilter.class}) @Downsample(by= DownsampleType.BY_SAMPLE, toCoverage=250) -public class HaplotypeCaller extends ActiveRegionWalker implements AnnotatorCompatible { +public class HaplotypeCaller extends ActiveRegionWalker, Integer> implements AnnotatorCompatible, NanoSchedulable { // ----------------------------------------------------------------------------------------------- // general haplotype caller arguments // ----------------------------------------------------------------------------------------------- @@ -645,15 +645,16 @@ public class HaplotypeCaller extends ActiveRegionWalker implem // //--------------------------------------------------------------------------------------------------------------- + private final static List NO_CALLS = Collections.emptyList(); @Override - public Integer map( final ActiveRegion originalActiveRegion, final RefMetaDataTracker metaDataTracker ) { + public List map( final ActiveRegion originalActiveRegion, final RefMetaDataTracker metaDataTracker ) { if ( justDetermineActiveRegions ) // we're benchmarking ART and/or the active region determination code in the HC, just leave without doing any work - return 1; + return NO_CALLS; - if( !originalActiveRegion.isActive() ) { return 0; } // Not active so nothing to do! + if( !originalActiveRegion.isActive() ) { return NO_CALLS; } // Not active so nothing to do! - final List activeAllelesToGenotype = new ArrayList(); + final List activeAllelesToGenotype = new ArrayList<>(); if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { for( final VariantContext vc : allelesToGenotype ) { if( originalActiveRegion.getLocation().overlapsP( getToolkit().getGenomeLocParser().createGenomeLoc(vc) ) ) { @@ -662,23 +663,23 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } allelesToGenotype.removeAll( activeAllelesToGenotype ); // No alleles found in this region so nothing to do! - if ( activeAllelesToGenotype.isEmpty() ) { return 0; } + if ( activeAllelesToGenotype.isEmpty() ) { return NO_CALLS; } } else { - if( originalActiveRegion.size() == 0 ) { return 0; } // No reads here so nothing to do! + if( originalActiveRegion.size() == 0 ) { return NO_CALLS; } // No reads here so nothing to do! } // run the local assembler, getting back a collection of information on how we should proceed final AssemblyResult assemblyResult = assembleReads(originalActiveRegion, activeAllelesToGenotype); // abort early if something is out of the acceptable range - if( ! assemblyResult.isVariationPresent() ) { return 1; } // only the reference haplotype remains so nothing else to do! - if (dontGenotype) return 1; // user requested we not proceed + if( ! assemblyResult.isVariationPresent() ) { return NO_CALLS; } // only the reference haplotype remains so nothing else to do! + if (dontGenotype) return NO_CALLS; // user requested we not proceed // filter out reads from genotyping which fail mapping quality based criteria final List filteredReads = filterNonPassingReads( assemblyResult.regionForGenotyping ); final Map> perSampleFilteredReadList = splitReadsBySample( filteredReads ); - if( assemblyResult.regionForGenotyping.size() == 0 ) { return 1; } // no reads remain after filtering so nothing else to do! + if( assemblyResult.regionForGenotyping.size() == 0 ) { return NO_CALLS; } // no reads remain after filtering so nothing else to do! // evaluate each sample's reads against all haplotypes //logger.info("Computing read likelihoods with " + assemblyResult.regionForGenotyping.size() + " reads"); @@ -697,12 +698,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem getToolkit().getGenomeLocParser(), activeAllelesToGenotype ); - for( final VariantContext call : calledHaplotypes.getCalls() ) { - // TODO -- uncomment this line once ART-based walkers have a proper RefMetaDataTracker. - // annotationEngine.annotateDBs(metaDataTracker, getToolkit().getGenomeLocParser().createGenomeLoc(call), call); - vcfWriter.add( call ); - } - + // TODO -- must disable if we are doing NCT, or set the output type of ! presorted if ( bamWriter != null ) { haplotypeBAMWriter.writeReadsAlignedToHaplotypes(assemblyResult.haplotypes, assemblyResult.paddedReferenceLoc, bestHaplotypes, @@ -712,7 +708,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem if( DEBUG ) { logger.info("----------------------------------------------------------------------------------"); } - return 1; // One active region was processed during this map call + return calledHaplotypes.getCalls(); } private final static class AssemblyResult { @@ -855,8 +851,13 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } @Override - public Integer reduce(Integer cur, Integer sum) { - return cur + sum; + public Integer reduce(List callsInRegion, Integer numCalledRegions) { + for( final VariantContext call : callsInRegion ) { + // TODO -- uncomment this line once ART-based walkers have a proper RefMetaDataTracker. + // annotationEngine.annotateDBs(metaDataTracker, getToolkit().getGenomeLocParser().createGenomeLoc(call), call); + vcfWriter.add( call ); + } + return (callsInRegion.isEmpty() ? 0 : 1) + numCalledRegions; } @Override @@ -872,7 +873,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem private void finalizeActiveRegion( final ActiveRegion activeRegion ) { if( DEBUG ) { logger.info("Assembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); } - final List finalizedReadList = new ArrayList(); + final List finalizedReadList = new ArrayList<>(); final FragmentCollection fragmentCollection = FragmentUtils.create( activeRegion.getReads() ); activeRegion.clearReads(); @@ -883,7 +884,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } // Loop through the reads hard clipping the adaptor and low quality tails - final List readsToUse = new ArrayList(finalizedReadList.size()); + final List readsToUse = new ArrayList<>(finalizedReadList.size()); for( final GATKSAMRecord myRead : finalizedReadList ) { final GATKSAMRecord postAdapterRead = ( myRead.getReadUnmappedFlag() ? myRead : ReadClipper.hardClipAdaptorSequence( myRead ) ); if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) { @@ -937,7 +938,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem for( final String sample : samplesList) { List readList = returnMap.get( sample ); if( readList == null ) { - readList = new ArrayList(); + readList = new ArrayList<>(); returnMap.put(sample, readList); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index fbd9b29d5..d5d5f3c09 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -71,7 +71,20 @@ public class LikelihoodCalculationEngine { private final byte constantGCP; private final double log10globalReadMismappingRate; private final boolean DEBUG; - private final PairHMM pairHMM; + private final PairHMM.HMM_IMPLEMENTATION hmmType; + + private final ThreadLocal pairHMM = new ThreadLocal() { + @Override + protected PairHMM initialValue() { + switch (hmmType) { + case EXACT: return new Log10PairHMM(true); + case ORIGINAL: return new Log10PairHMM(false); + case LOGLESS_CACHING: return new LoglessPairHMM(); + default: + throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are ORIGINAL, EXACT, CACHING, and LOGLESS_CACHING."); + } + } + }; /** * The expected rate of random sequencing errors for a read originating from its true haplotype. @@ -96,22 +109,9 @@ public class LikelihoodCalculationEngine { * assigned a likelihood of -13. */ public LikelihoodCalculationEngine( final byte constantGCP, final boolean debug, final PairHMM.HMM_IMPLEMENTATION hmmType, final double log10globalReadMismappingRate ) { - switch (hmmType) { - case EXACT: - pairHMM = new Log10PairHMM(true); - break; - case ORIGINAL: - pairHMM = new Log10PairHMM(false); - break; - case LOGLESS_CACHING: - pairHMM = new LoglessPairHMM(); - break; - default: - throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are ORIGINAL, EXACT, CACHING, and LOGLESS_CACHING."); - } - + this.hmmType = hmmType; this.constantGCP = constantGCP; - DEBUG = debug; + this.DEBUG = debug; this.log10globalReadMismappingRate = log10globalReadMismappingRate; } @@ -143,7 +143,7 @@ public class LikelihoodCalculationEngine { } // initialize arrays to hold the probabilities of being in the match, insertion and deletion cases - pairHMM.initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH); + pairHMM.get().initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH); } public Map computeReadLikelihoods( final List haplotypes, final Map> perSampleReadList ) { @@ -151,7 +151,7 @@ public class LikelihoodCalculationEngine { initializePairHMM(haplotypes, perSampleReadList); // Add likelihoods for each sample's reads to our stratifiedReadMap - final Map stratifiedReadMap = new HashMap(); + final Map stratifiedReadMap = new LinkedHashMap<>(); for( final Map.Entry> sampleEntry : perSampleReadList.entrySet() ) { // evaluate the likelihood of the reads given those haplotypes final PerReadAlleleLikelihoodMap map = computeReadLikelihoods(haplotypes, sampleEntry.getValue()); @@ -170,7 +170,7 @@ public class LikelihoodCalculationEngine { private PerReadAlleleLikelihoodMap computeReadLikelihoods( final List haplotypes, final List reads) { // first, a little set up to get copies of the Haplotypes that are Alleles (more efficient than creating them each time) final int numHaplotypes = haplotypes.size(); - final Map alleleVersions = new HashMap<>(numHaplotypes); + final Map alleleVersions = new LinkedHashMap<>(numHaplotypes); Allele refAllele = null; for ( final Haplotype haplotype : haplotypes ) { final Allele allele = Allele.create(haplotype, true); @@ -202,7 +202,7 @@ public class LikelihoodCalculationEngine { for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { final Haplotype haplotype = haplotypes.get(jjj); final boolean isFirstHaplotype = jjj == 0; - final double log10l = pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotype.getBases(), + final double log10l = pairHMM.get().computeReadLikelihoodGivenHaplotypeLog10(haplotype.getBases(), read.getReadBases(), readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype); if ( haplotype.isNonReference() ) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqVertex.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqVertex.java index f192b54aa..083747db4 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqVertex.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqVertex.java @@ -49,6 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.Utils; import java.util.Arrays; +import java.util.concurrent.atomic.AtomicInteger; /** * A graph vertex containing a sequence of bases and a unique ID that @@ -71,8 +72,9 @@ import java.util.Arrays; * @since 03/2013 */ public final class SeqVertex extends BaseVertex { - private static int idCounter = 0; - public final int id; + // Note that using an AtomicInteger is critical to allow multi-threaded HaplotypeCaller + private static final AtomicInteger idCounter = new AtomicInteger(0); + private int id = idCounter.getAndIncrement(); /** * Create a new SeqVertex with sequence and the next available id @@ -80,7 +82,6 @@ public final class SeqVertex extends BaseVertex { */ public SeqVertex(final byte[] sequence) { super(sequence); - this.id = idCounter++; } /** @@ -89,7 +90,6 @@ public final class SeqVertex extends BaseVertex { */ public SeqVertex(final String sequence) { super(sequence); - this.id = idCounter++; } /** diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/MultiDeBruijnVertex.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/MultiDeBruijnVertex.java index 814b3b9a7..5752583c7 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/MultiDeBruijnVertex.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/MultiDeBruijnVertex.java @@ -51,6 +51,7 @@ import org.broadinstitute.sting.utils.Utils; import java.util.LinkedList; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; /** * A DeBruijnVertex that supports multiple copies of the same kmer @@ -65,10 +66,12 @@ import java.util.List; */ final class MultiDeBruijnVertex extends DeBruijnVertex { private final static boolean KEEP_TRACK_OF_READS = false; - private static int idCounter = 0; + + // Note that using an AtomicInteger is critical to allow multi-threaded HaplotypeCaller + private static final AtomicInteger idCounter = new AtomicInteger(0); + private int id = idCounter.getAndIncrement(); private final List reads = new LinkedList(); - private int id = idCounter++; // TODO -- potential race condition problem here /** * Create a new MultiDeBruijnVertex with kmer sequence diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java new file mode 100644 index 000000000..ff5a501cc --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java @@ -0,0 +1,79 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.haplotypeBAMWriter.HaplotypeBAMWriter; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class HaplotypeCallerParallelIntegrationTest extends WalkerTest { + @DataProvider(name = "NCTDataProvider") + public Object[][] makeNCTDataProvider() { + List tests = new ArrayList(); + + for ( final int nct : Arrays.asList(1, 2, 4) ) { + tests.add(new Object[]{nct, "c277fd65365d59b734260dd8423313bb"}); + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "NCTDataProvider") + public void testHCNCT(final int nct, final String md5) { + WalkerTestSpec spec = new WalkerTestSpec( + "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + + privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam -o %s " + + " -L 20:10,000,000-10,100,000 -G none -A -contamination 0.0 -nct " + nct, 1, + Arrays.asList(md5)); + executeTest("HC test parallel HC with NCT with nct " + nct, spec); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index dc9dfd77e..23b084d66 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -245,7 +245,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { } else if (walker instanceof ReadPairWalker) { return new TraverseReadPairs(); } else if (walker instanceof ActiveRegionWalker) { - return new TraverseActiveRegions(); + return new TraverseActiveRegions(threadAllocation.getNumCPUThreadsPerDataThread()); } else { throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type."); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index f9a4fcdbb..b47a355be 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -41,12 +41,22 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.activeregion.*; +import org.broadinstitute.sting.utils.activeregion.ActiveRegion; +import org.broadinstitute.sting.utils.activeregion.ActivityProfile; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; +import org.broadinstitute.sting.utils.activeregion.BandPassActivityProfile; +import org.broadinstitute.sting.utils.nanoScheduler.NSMapFunction; +import org.broadinstitute.sting.utils.nanoScheduler.NSProgressFunction; +import org.broadinstitute.sting.utils.nanoScheduler.NSReduceFunction; +import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler; import org.broadinstitute.sting.utils.progressmeter.ProgressMeter; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.PrintStream; -import java.util.*; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; /** * Implement active region traversal @@ -67,7 +77,8 @@ import java.util.*; * variable spanOfLastReadSeen * */ -public class TraverseActiveRegions extends TraversalEngine,LocusShardDataProvider> { +public final class TraverseActiveRegions extends TraversalEngine,LocusShardDataProvider> { + private final static boolean DEBUG = false; protected final static Logger logger = Logger.getLogger(TraversalEngine.class); protected final static boolean LOG_READ_CARRYING = false; @@ -84,7 +95,32 @@ public class TraverseActiveRegions extends TraversalEngine walker; + + final NanoScheduler nanoScheduler; + + /** + * Create a single threaded active region traverser + */ + public TraverseActiveRegions() { + this(1); + } + + /** + * Create an active region traverser that uses nThreads for getting its work done + * @param nThreads number of threads + */ + public TraverseActiveRegions(final int nThreads) { + nanoScheduler = new NanoScheduler<>(nThreads); + nanoScheduler.setProgressFunction(new NSProgressFunction() { + @Override + public void progress(ActiveRegion lastActiveRegion) { + if ( lastActiveRegion != null ) + // note, need to use getStopLocation so we don't give an interval to ProgressMeterDaemon + printProgress(lastActiveRegion.getLocation().getStopLocation()); + } + }); + } /** * Have the debugging output streams been initialized already? @@ -98,7 +134,7 @@ public class TraverseActiveRegions extends TraversalEngine)walker; if ( this.walker.wantsExtendedReads() && ! this.walker.wantsNonPrimaryReads() ) { throw new IllegalArgumentException("Active region walker " + this.walker + " requested extended events but not " + "non-primary reads, an inconsistent state. Please modify the walker"); @@ -217,58 +253,108 @@ public class TraverseActiveRegions extends TraversalEngine reads = locusView.getLIBS().transferReadsFromAllPreviousPileups(); - for( final GATKSAMRecord read : reads ) { - if ( ! appearedInLastShard(locOfLastReadAtTraversalStart, read) ) { - rememberLastReadLocation(read); - myReads.add(read); - } - } - - // skip this location -- it's not part of our engine intervals - if ( outsideEngineIntervals(location) ) - continue; - - // we've move across some interval boundary, restart profile - final boolean flushProfile = ! activityProfile.isEmpty() - && ( activityProfile.getContigIndex() != location.getContigIndex() - || location.getStart() != activityProfile.getStop() + 1); - sum = processActiveRegions(walker, sum, flushProfile, false); - - dataProvider.getShard().getReadMetrics().incrementNumIterations(); - - // create reference context. Note that if we have a pileup of "extended events", the context will - // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). - final ReferenceContext refContext = referenceView.getReferenceContext(location); - - // Iterate forward to get all reference ordered data covering this location - final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext); - - // Call the walkers isActive function for this locus and add them to the list to be integrated later - addIsActiveResult(walker, tracker, refContext, locus); - - maxReadsInMemory = Math.max(myReads.size(), maxReadsInMemory); - printProgress(location); - } + nanoScheduler.setDebug(false); + final Iterator activeRegionIterator = new ActiveRegionIterator(dataProvider); + final TraverseActiveRegionMap myMap = new TraverseActiveRegionMap(); + final TraverseActiveRegionReduce myReduce = new TraverseActiveRegionReduce(); + final T result = nanoScheduler.execute(activeRegionIterator, myMap, sum, myReduce); updateCumulativeMetrics(dataProvider.getShard()); - return sum; + return result; + } + + private class ActiveRegionIterator implements Iterator { + private final LocusShardDataProvider dataProvider; + private LinkedList readyActiveRegions = new LinkedList(); + private boolean done = false; + private final LocusView locusView; + private final LocusReferenceView referenceView; + private final ReferenceOrderedView referenceOrderedDataView; + private final GenomeLoc locOfLastReadAtTraversalStart; + + public ActiveRegionIterator( final LocusShardDataProvider dataProvider ) { + this.dataProvider = dataProvider; + locusView = new AllLocusView(dataProvider); + referenceView = new LocusReferenceView( walker, dataProvider ); + referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView); + + // We keep processing while the next reference location is within the interval + locOfLastReadAtTraversalStart = spanOfLastSeenRead(); + } + + @Override public void remove() { throw new UnsupportedOperationException("Cannot remove from ActiveRegionIterator"); } + + @Override + public ActiveRegion next() { + return readyActiveRegions.pop(); + } + @Override + public boolean hasNext() { + if ( ! readyActiveRegions.isEmpty() ) + return true; + if ( done ) + return false; + else { + + while( locusView.hasNext() ) { + final AlignmentContext locus = locusView.next(); + final GenomeLoc location = locus.getLocation(); + + rememberLastLocusLocation(location); + + // get all of the new reads that appear in the current pileup, and them to our list of reads + // provided we haven't seen them before + final Collection reads = locusView.getLIBS().transferReadsFromAllPreviousPileups(); + for( final GATKSAMRecord read : reads ) { + // note that ActiveRegionShards span entire contigs, so this check is in some + // sense no longer necessary, as any read that appeared in the last shard would now + // by definition be on a different contig. However, the logic here doesn't hurt anything + // and makes us robust should we decided to provide shards that don't fully span + // contigs at some point in the future + if ( ! appearedInLastShard(locOfLastReadAtTraversalStart, read) ) { + rememberLastReadLocation(read); + myReads.add(read); + } + } + + // skip this location -- it's not part of our engine intervals + if ( outsideEngineIntervals(location) ) + continue; + + // we've move across some interval boundary, restart profile + final boolean flushProfile = ! activityProfile.isEmpty() + && ( activityProfile.getContigIndex() != location.getContigIndex() + || location.getStart() != activityProfile.getStop() + 1); + final List newActiveRegions = prepActiveRegionsForProcessing(walker, flushProfile, false); + + dataProvider.getShard().getReadMetrics().incrementNumIterations(); + + // create reference context. Note that if we have a pileup of "extended events", the context will + // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). + final ReferenceContext refContext = referenceView.getReferenceContext(location); + + // Iterate forward to get all reference ordered data covering this location + final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext); + + // Call the walkers isActive function for this locus and add them to the list to be integrated later + addIsActiveResult(walker, tracker, refContext, locus); + + maxReadsInMemory = Math.max(myReads.size(), maxReadsInMemory); + printProgress(location); + + if ( ! newActiveRegions.isEmpty() ) { + readyActiveRegions.addAll(newActiveRegions); + if ( DEBUG ) + for ( final ActiveRegion region : newActiveRegions ) + logger.info("Adding region to queue for processing " + region); + return true; + } + } + + return false; + } + } } /** @@ -276,7 +362,11 @@ public class TraverseActiveRegions extends TraversalEngine walker, T sum) { - return processActiveRegions((ActiveRegionWalker)walker, sum, true, true); + for ( final ActiveRegion region : prepActiveRegionsForProcessing((ActiveRegionWalker)walker, true, true) ) { + final M x = ((ActiveRegionWalker) walker).map(region, null); + sum = walker.reduce( x, sum ); + } + return sum; } // ------------------------------------------------------------------------------------- @@ -504,7 +594,7 @@ public class TraverseActiveRegions extends TraversalEngine walker, T sum, final boolean flushActivityProfile, final boolean forceAllRegionsToBeActive) { + private List prepActiveRegionsForProcessing(final ActiveRegionWalker walker, final boolean flushActivityProfile, final boolean forceAllRegionsToBeActive) { if ( ! walkerHasPresetRegions ) { // We don't have preset regions, so we get our regions from the activity profile final Collection activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMinRegionSize(), getMaxRegionSize(), flushActivityProfile); @@ -513,21 +603,23 @@ public class TraverseActiveRegions extends TraversalEngine readyRegions = new LinkedList(); while( workQueue.peek() != null ) { final ActiveRegion activeRegion = workQueue.peek(); if ( forceAllRegionsToBeActive || regionCompletelyWithinDeadZone(activeRegion) ) { writeActivityProfile(activeRegion.getSupportingStates()); writeActiveRegion(activeRegion); - sum = processActiveRegion( workQueue.remove(), sum, walker ); + readyRegions.add(prepActiveRegionForProcessing(workQueue.remove(), walker)); } else { break; } } - return sum; + return readyRegions; + } - private T processActiveRegion(final ActiveRegion activeRegion, final T sum, final ActiveRegionWalker walker) { + private ActiveRegion prepActiveRegionForProcessing(final ActiveRegion activeRegion, final ActiveRegionWalker walker) { final List stillLive = new LinkedList(); for ( final GATKSAMRecord read : myReads.popCurrentReads() ) { boolean killed = false; @@ -561,7 +653,21 @@ public class TraverseActiveRegions extends TraversalEngine { + @Override + public M apply(final ActiveRegion activeRegion) { + if ( DEBUG ) logger.info("Executing walker.map for " + activeRegion + " in thread " + Thread.currentThread().getName()); + return walker.map(activeRegion, null); + } + } + + private class TraverseActiveRegionReduce implements NSReduceFunction { + @Override + public T apply(M one, T sum) { + return walker.reduce(one, sum); + } } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java index b6106d4bc..2e6705d77 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java @@ -77,7 +77,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { @DataProvider(name = "TraversalEngineProvider") public Object[][] makeTraversals() { final List traversals = new LinkedList(); - traversals.add(new Object[]{new TraverseActiveRegions()}); + traversals.add(new Object[]{new TraverseActiveRegions<>()}); return traversals.toArray(new Object[][]{}); } @@ -523,8 +523,8 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { final int maxTests = Integer.MAX_VALUE; int nTests = 0; - for ( final int readLength : Arrays.asList(10, 100) ) { - for ( final int skips : Arrays.asList(0, 1, 10) ) { + for ( final int readLength : Arrays.asList(100) ) { + for ( final int skips : Arrays.asList(0, 10) ) { for ( final int start : starts ) { for ( final int nReadsPerLocus : Arrays.asList(1, 2) ) { for ( final int nLoci : Arrays.asList(1, 1000) ) { @@ -536,7 +536,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { for ( final GenomeLocSortedSet activeRegions : enumerateActiveRegions(bamBuilder.getAlignmentStart(), bamBuilder.getAlignmentEnd())) { nTests++; if ( nTests < maxTests ) // && nTests == 1238 ) - tests.add(new Object[]{nTests, activeRegions, readStates, bamBuilder}); + tests.add(new Object[]{new TraverseActiveRegions<>(), nTests, activeRegions, readStates, bamBuilder}); } } } @@ -586,7 +586,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { @Test(enabled = true && ! DEBUG, dataProvider = "CombinatorialARTTilingProvider") - public void testARTReadsInActiveRegions(final int id, final GenomeLocSortedSet activeRegions, final EnumSet readStates, final ArtificialBAMBuilder bamBuilder) { + public void testARTReadsInActiveRegions(final TraverseActiveRegions traversal, final int id, final GenomeLocSortedSet activeRegions, final EnumSet readStates, final ArtificialBAMBuilder bamBuilder) { logger.warn("Running testARTReadsInActiveRegions id=" + id + " locs " + activeRegions + " against bam " + bamBuilder); final List intervals = Arrays.asList( genomeLocParser.createGenomeLoc("1", bamBuilder.getAlignmentStart(), bamBuilder.getAlignmentEnd()) @@ -595,7 +595,6 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { final DummyActiveRegionWalker walker = new DummyActiveRegionWalker(activeRegions, false); walker.setStates(readStates); - final TraverseActiveRegions traversal = new TraverseActiveRegions(); final Map activeRegionsMap = getActiveRegions(traversal, walker, intervals, bamBuilder.makeTemporarilyBAMFile()); final Set alreadySeenReads = new HashSet(); // for use with the primary / non-primary @@ -640,8 +639,8 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { // // --------------------------------------------------------------------------------------------------------- - @Test(enabled = true && ! DEBUG) - public void ensureAllInsertionReadsAreInActiveRegions() { + @Test(dataProvider = "TraversalEngineProvider", enabled = true && ! DEBUG) + public void ensureAllInsertionReadsAreInActiveRegions(final TraverseActiveRegions traversal) { final int readLength = 10; final int start = 20; @@ -667,7 +666,6 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { final DummyActiveRegionWalker walker = new DummyActiveRegionWalker(activeRegions, false); - final TraverseActiveRegions traversal = new TraverseActiveRegions(); final Map activeRegionsMap = getActiveRegions(traversal, walker, intervals, bamBuilder.makeTemporarilyBAMFile()); final ActiveRegion region = activeRegionsMap.values().iterator().next(); From 39e4396de0189f0acd39af66f08b0932028906cb Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 9 May 2013 11:39:19 -0400 Subject: [PATCH 026/116] New ActiveRegionShardBalancer allows efficient NanoScheduling -- Previously we used the LocusShardBalancer for the haplotype caller, which meant that TraverseActiveRegions saw its shards grouped in chunks of 16kb bits on the genome. These locus shards are useful when you want to use the HierarchicalMicroScheduler, as they provide fine-grained accessed to the underlying BAM, but they have two major drawbacks (1) we have to fairly frequently reset our state in TAR to handle moving between shard boundaries and (2) with the nano scheduled TAR we end up blocking at the end of each shard while our threads all finish processing. -- This commit changes the system over to using an ActiveRegionShardBalancers, that combines all of the shard data for a single contig into a single combined shard. This ensures that TAR, and by extensions the HaplotypeCaller, gets all of the data on a single contig together so the the NanoSchedule runs efficiently instead of blocking over and over at shard boundaries. This simple change allows us to scale efficiently to around 8 threads in the nano scheduler: -- See https://www.dropbox.com/s/k7f280pd2zt0lyh/hc_nano_linear_scale.pdf -- See https://www.dropbox.com/s/fflpnan802m2906/hc_nano_log_scale.pdf -- Misc. changes throughout the codebase so we Use the ActiveRegionShardBalancer where appropriate. -- Added unit tests for ActiveRegionShardBalancer to confirm it does the merging as expected. -- Fix bad toString in FilePointer --- .../sting/gatk/GenomeAnalysisEngine.java | 4 +- .../reads/ActiveRegionShardBalancer.java | 85 +++++++++++++++ .../gatk/datasources/reads/FilePointer.java | 4 +- .../ActiveRegionShardBalancerUnitTest.java | 101 ++++++++++++++++++ .../TraverseActiveRegionsUnitTest.java | 2 +- 5 files changed, 191 insertions(+), 5 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancer.java create mode 100644 public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancerUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 82bee7826..9dcba25ff 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -570,9 +570,9 @@ public class GenomeAnalysisEngine { if (readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate) throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Active region walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately."); if(intervals == null) - return readsDataSource.createShardIteratorOverMappedReads(new LocusShardBalancer()); + return readsDataSource.createShardIteratorOverMappedReads(new ActiveRegionShardBalancer()); else - return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new LocusShardBalancer()); + return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new ActiveRegionShardBalancer()); } else if(walker instanceof ReadWalker || walker instanceof ReadPairWalker || walker instanceof DuplicateWalker) { // Apply special validation to read pair walkers. diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancer.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancer.java new file mode 100644 index 000000000..febdc788e --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancer.java @@ -0,0 +1,85 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.datasources.reads; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; + +/** + * ActiveRegionShardBalancer + * + * Merges all of the file pointer information for a single contig index into a single + * combined shard. The purpose of doing this is to ensure that the HaplotypeCaller, which + * doesn't support TreeReduction by construction, gets all of the data on a single + * contig together so the the NanoSchedule runs efficiently + */ +public class ActiveRegionShardBalancer extends ShardBalancer { + /** + * Convert iterators of file pointers into balanced iterators of shards. + * @return An iterator over balanced shards. + */ + public Iterator iterator() { + return new Iterator() { + public boolean hasNext() { + return filePointers.hasNext(); + } + + public Shard next() { + FilePointer current = getCombinedFilePointersOnSingleContig(); + + // FilePointers have already been combined as necessary at the IntervalSharder level. No + // need to do so again here. + + return new LocusShard(parser,readsDataSource,current.getLocations(),current.fileSpans); + } + + public void remove() { + throw new UnsupportedOperationException("Unable to remove from shard balancing iterator"); + } + }; + } + + /** + * Combine all of the file pointers in the filePointers iterator into a single combined + * FilePointer that spans all of the file pointers on a single contig + * @return a non-null FilePointer + */ + private FilePointer getCombinedFilePointersOnSingleContig() { + FilePointer current = filePointers.next(); + + final List toCombine = new LinkedList<>(); + toCombine.add(current); + + while ( filePointers.hasNext() && + current.isRegionUnmapped == filePointers.peek().isRegionUnmapped && + (current.getContigIndex() == filePointers.peek().getContigIndex() || current.isRegionUnmapped) ) { + toCombine.add(filePointers.next()); + } + + return FilePointer.union(toCombine, parser); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java index 56bf5197d..517903da3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java @@ -407,10 +407,10 @@ public class FilePointer { @Override public String toString() { StringBuilder builder = new StringBuilder(); - builder.append("FilePointer:%n"); + builder.append("FilePointer:\n"); builder.append("\tlocations = {"); builder.append(Utils.join(";",locations)); - builder.append("}%n\tregions = %n"); + builder.append("}\n\tregions = \n"); for(Map.Entry entry: fileSpans.entrySet()) { builder.append(entry.getKey()); builder.append("= {"); diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancerUnitTest.java new file mode 100644 index 000000000..e768faba4 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancerUnitTest.java @@ -0,0 +1,101 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.datasources.reads; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileSpan; +import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.io.FileNotFoundException; +import java.util.*; + +public class ActiveRegionShardBalancerUnitTest extends BaseTest { + // example genome loc parser for this test, can be deleted if you don't use the reference + private GenomeLocParser genomeLocParser; + protected SAMDataSource readsDataSource; + + @BeforeClass + public void setup() throws FileNotFoundException { + // sequence + final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(10, 0, 10000); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); + readsDataSource = null; + } + + @Test + public void testMergingManyContigs() { + executeTest(genomeLocParser.getContigs().getSequences()); + } + + @Test + public void testMergingAllPointersOnSingleContig() { + executeTest(Arrays.asList(genomeLocParser.getContigs().getSequences().get(1))); + } + + @Test + public void testMergingMultipleDiscontinuousContigs() { + final List all = genomeLocParser.getContigs().getSequences(); + executeTest(Arrays.asList(all.get(1), all.get(3))); + } + + private void executeTest(final Collection records) { + final ActiveRegionShardBalancer balancer = new ActiveRegionShardBalancer(); + + final List> expectedLocs = new LinkedList<>(); + final List pointers = new LinkedList<>(); + + for ( final SAMSequenceRecord record : records ) { + final int size = 10; + int end = 0; + for ( int i = 0; i < record.getSequenceLength(); i += size) { + final int myEnd = i + size - 1; + end = myEnd; + final GenomeLoc loc = genomeLocParser.createGenomeLoc(record.getSequenceName(), i, myEnd); + final Map fileSpans = Collections.emptyMap(); + final FilePointer fp = new FilePointer(fileSpans, Collections.singletonList(loc)); + pointers.add(fp); + } + expectedLocs.add(Collections.singleton(genomeLocParser.createGenomeLoc(record.getSequenceName(), 0, end))); + } + + balancer.initialize(readsDataSource, pointers.iterator(), genomeLocParser); + + int i = 0; + int nShardsFound = 0; + for ( final Shard shard : balancer ) { + nShardsFound++; + Assert.assertEquals(new HashSet<>(shard.getGenomeLocs()), expectedLocs.get(i++)); + } + Assert.assertEquals(nShardsFound, records.size(), "Didn't find exactly one shard for each contig in the sequence dictionary"); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java index 2e6705d77..1f5cd6d0e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java @@ -490,7 +490,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { traverseActiveRegions.initialize(engine, walker); List providers = new ArrayList(); - for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) { + for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ActiveRegionShardBalancer())) { for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples)) { providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList())); } From 1466396a31aa0d2014c59dfef691c500c6f0c7c8 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 25 Apr 2013 01:18:40 -0400 Subject: [PATCH 027/116] Diagnose target is outputting intervals out of order Problem ------- When the interval had no reads, it was being sent to the VCF before the intervals that just got processed, therefore violating the sort order of the VCF. Solution -------- Use a linked hash map, and make the insertion and removal all happen in one place regardless of having reads or not. Since the input is ordered, the output has to be ordered as well. Itemized changes -------------- * Clean up code duplication in LocusStratification and SampleStratification * Add number of uncovered sites and number of low covered sites to the VCF output. * Add new VCF format fields * Fix outputting multiple status when threshold is 0 (ratio must be GREATER THAN not equal to the threshold to get reported) [fixes #48780333] [fixes #48787311] --- .../AbstractStratification.java | 10 +++- .../diagnosetargets/DiagnoseTargets.java | 55 +++++++++---------- .../IntervalStratification.java | 5 +- .../diagnosetargets/LocusStratification.java | 13 +---- .../diagnosetargets/PluginUtils.java | 2 +- .../diagnosetargets/SampleStratification.java | 25 ++++++--- 6 files changed, 55 insertions(+), 55 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java index dca83af44..8b7f3dbf2 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java @@ -63,6 +63,10 @@ abstract class AbstractStratification { private Map statusTally = null; protected ThresHolder thresholds; + public AbstractStratification(ThresHolder thresholds) { + this.thresholds = thresholds; + } + /** * Calculates the average "good" coverage of this sample. Good means "passes the base and * mapping quality requirements. @@ -120,7 +124,7 @@ abstract class AbstractStratification { /** - * Tally up all the callable status of all the loci in this sample. + * Tally up all the callable status of all elements of the stratification. * * @return a map of callable status and counts */ @@ -136,10 +140,10 @@ abstract class AbstractStratification { return statusTally; } - public static List queryStatus(List statList, AbstractStratification stratification) { + public List queryStatus(List statList) { List output = new LinkedList(); for (Metric stat : statList) { - final CallableStatus status = stat.status(stratification); + final CallableStatus status = stat.status(this); if (status != null) { output.add(status); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java index 32f87b973..32d866b0a 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java @@ -112,6 +112,9 @@ import java.util.*; public class DiagnoseTargets extends LocusWalker { private static final String AVG_INTERVAL_DP_KEY = "IDP"; + private static final String LOW_COVERAGE_LOCI = "LL"; + private static final String ZERO_COVERAGE_LOCI = "ZL"; + @Output(doc = "File to which interval statistics should be written") private VariantContextWriter vcfWriter = null; @@ -134,7 +137,7 @@ public class DiagnoseTargets extends LocusWalker { if (getToolkit().getIntervals() == null || getToolkit().getIntervals().isEmpty()) throw new UserException("This tool only works if you provide one or more intervals (use the -L argument). If you want to run whole genome, use -T DepthOfCoverage instead."); - intervalMap = new HashMap(INITIAL_HASH_SIZE); + intervalMap = new LinkedHashMap(INITIAL_HASH_SIZE); intervalListIterator = new PeekableIterator(getToolkit().getIntervals().iterator()); // get all of the unique sample names for the VCF Header @@ -151,8 +154,8 @@ public class DiagnoseTargets extends LocusWalker { // process and remove any intervals in the map that are don't overlap the current locus anymore // and add all new intervals that may overlap this reference locus - outputFinishedIntervals(refLocus, ref.getBase()); addNewOverlappingIntervals(refLocus); + outputFinishedIntervals(refLocus, ref.getBase()); // at this point, all intervals in intervalMap overlap with this locus, so update all of them for (IntervalStratification intervalStratification : intervalMap.values()) @@ -203,24 +206,17 @@ public class DiagnoseTargets extends LocusWalker { * @param refBase the reference allele */ private void outputFinishedIntervals(final GenomeLoc refLocus, final byte refBase) { - GenomeLoc interval = intervalListIterator.peek(); - - // output empty statistics for uncovered intervals - while (interval != null && interval.isBefore(refLocus)) { - final IntervalStratification stats = intervalMap.get(interval); - outputStatsToVCF(stats != null ? stats : createIntervalStatistic(interval), UNCOVERED_ALLELE); - if (stats != null) intervalMap.remove(interval); - intervalListIterator.next(); - interval = intervalListIterator.peek(); - } - - // remove any potential leftover interval in intervalMap (this will only happen when we have overlapping intervals) + // output any intervals that were finished + final List toRemove = new LinkedList(); for (GenomeLoc key : intervalMap.keySet()) { if (key.isBefore(refLocus)) { outputStatsToVCF(intervalMap.get(key), Allele.create(refBase, true)); - intervalMap.remove(key); + toRemove.add(key); } } + for (GenomeLoc key : toRemove) { + intervalMap.remove(key); + } } /** @@ -247,10 +243,21 @@ public class DiagnoseTargets extends LocusWalker { GenomeLoc interval = stats.getInterval(); - List alleles = new ArrayList(); - Map attributes = new HashMap(); - ArrayList genotypes = new ArrayList(); + final List alleles = new ArrayList(); + final Map attributes = new HashMap(); + final ArrayList genotypes = new ArrayList(); + for (String sample : samples) { + final GenotypeBuilder gb = new GenotypeBuilder(sample); + + SampleStratification sampleStat = stats.getSampleStatistics(sample); + gb.attribute(AVG_INTERVAL_DP_KEY, sampleStat.averageCoverage(interval.size())); + gb.attribute(LOW_COVERAGE_LOCI, sampleStat.getNLowCoveredLoci()); + gb.attribute(ZERO_COVERAGE_LOCI, sampleStat.getNUncoveredLoci()); + gb.filters(statusToStrings(stats.getSampleStatistics(sample).callableStatuses(), false)); + + genotypes.add(gb.make()); + } alleles.add(refAllele); alleles.add(SYMBOLIC_ALLELE); VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStop(), alleles); @@ -262,16 +269,6 @@ public class DiagnoseTargets extends LocusWalker { attributes.put(AVG_INTERVAL_DP_KEY, stats.averageCoverage(interval.size())); vcb = vcb.attributes(attributes); - for (String sample : samples) { - final GenotypeBuilder gb = new GenotypeBuilder(sample); - - SampleStratification sampleStat = stats.getSampleStatistics(sample); - gb.attribute(AVG_INTERVAL_DP_KEY, sampleStat.averageCoverage(interval.size())); - - gb.filters(statusToStrings(stats.getSampleStatistics(sample).callableStatuses(), false)); - - genotypes.add(gb.make()); - } vcb = vcb.genotypes(genotypes); vcfWriter.add(vcb.make()); @@ -345,6 +342,8 @@ public class DiagnoseTargets extends LocusWalker { // FORMAT fields for each genotype headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY)); headerLines.add(new VCFFormatHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average sample depth across the interval. Sum of the sample specific depth in all loci divided by interval size.")); + headerLines.add(new VCFFormatHeaderLine(LOW_COVERAGE_LOCI, 1, VCFHeaderLineType.Integer, "Number of loci for this sample, in this interval with low coverage (below the minimum coverage) but not zero.")); + headerLines.add(new VCFFormatHeaderLine(ZERO_COVERAGE_LOCI, 1, VCFHeaderLineType.Integer, "Number of loci for this sample, in this interval with zero coverage.")); // FILTER fields for (CallableStatus stat : CallableStatus.values()) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java index 6c20403d1..86e9d0142 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java @@ -56,11 +56,10 @@ import java.util.*; final class IntervalStratification extends AbstractStratification { private final Map samples; private final GenomeLoc interval; - private final ThresHolder thresholds; public IntervalStratification(Set samples, GenomeLoc interval, ThresHolder thresholds) { + super(thresholds); this.interval = interval; - this.thresholds = thresholds; this.samples = new HashMap(samples.size()); for (String sample : samples) this.samples.put(sample, new SampleStratification(interval, thresholds)); @@ -125,7 +124,7 @@ final class IntervalStratification extends AbstractStratification { } } - output.addAll(queryStatus(thresholds.intervalMetricList, this)); + output.addAll(queryStatus(thresholds.intervalMetricList)); return output; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/LocusStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/LocusStratification.java index d6acaf850..5902fce31 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/LocusStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/LocusStratification.java @@ -46,22 +46,20 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.diagnosetargets; -import java.util.LinkedList; import java.util.List; final class LocusStratification extends AbstractStratification { private long coverage; private long rawCoverage; - private final List locusStatisticsList; public LocusStratification(ThresHolder thresholds) { this(0,0,thresholds); } protected LocusStratification(int coverage, int rawCoverage, ThresHolder thresholds) { + super(thresholds); this.coverage = coverage; this.rawCoverage = rawCoverage; - this.locusStatisticsList = thresholds.locusMetricList; } @Override @@ -79,14 +77,7 @@ final class LocusStratification extends AbstractStratification { * @return a set of all statuses that apply */ public List callableStatuses() { - List output = new LinkedList(); - for (Metric stats : locusStatisticsList) { - CallableStatus status = stats.status(this); - if (status != null) { - output.add(status); - } - } - return output; + return queryStatus(thresholds.locusMetricList); } @Override diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/PluginUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/PluginUtils.java index 1085e8cac..7984ba7e7 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/PluginUtils.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/PluginUtils.java @@ -58,6 +58,6 @@ final class PluginUtils { final Map totals = sampleStratification.getStatusTally(); final int size = sampleStratification.getIntervalSize(); final int statusCount = totals.containsKey(CALL) ? totals.get(CALL) : 0; - return ( (double) statusCount / size) >= threshold ? CALL: null; + return ( (double) statusCount / size) > threshold ? CALL: null; } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java index b9ae1f3cf..49aa10cf6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java @@ -61,15 +61,14 @@ import java.util.List; final class SampleStratification extends AbstractStratification { private final GenomeLoc interval; private final ArrayList loci; - private final ThresHolder thresholds; private int nReads = -1; private int nBadMates = -1; public SampleStratification(final GenomeLoc interval, final ThresHolder thresholds) { + super(thresholds); this.interval = interval; this.loci = new ArrayList(interval.size()); - this.thresholds = thresholds; nReads = 0; nBadMates = 0; @@ -121,7 +120,7 @@ final class SampleStratification extends AbstractStratification { public Iterable callableStatuses() { final List output = new LinkedList(); - // get the tally of all the locus callable statuses + // get the sample statuses of all the Loci Metrics for (Metric locusStat : thresholds.locusMetricList) { final CallableStatus status = ((LocusMetric) locusStat).sampleStatus(this); if (status != null) { @@ -130,12 +129,7 @@ final class SampleStratification extends AbstractStratification { } // get the sample specific statitics statuses - for (Metric sampleStat : thresholds.sampleMetricList) { - final CallableStatus status = sampleStat.status(this); - if (status != null) { - output.add(status); - } - } + output.addAll(queryStatus(thresholds.sampleMetricList)); // special case, if there are no reads, then there is no sense reporting coverage gaps. if (output.contains(CallableStatus.NO_READS) && output.contains(CallableStatus.COVERAGE_GAPS)) @@ -159,4 +153,17 @@ final class SampleStratification extends AbstractStratification { read.setTemporaryAttribute("seen", true); } } + + public int getNLowCoveredLoci() { + return getCallableStatusCount(CallableStatus.LOW_COVERAGE); + } + + public int getNUncoveredLoci() { + return getCallableStatusCount(CallableStatus.COVERAGE_GAPS); + } + + private int getCallableStatusCount(CallableStatus status) { + final Integer x = getStatusTally().get(status); + return x == null ? 0 : x; + } } From 3dbb86b05253ce407b540cb8fe6d1cd66cb92a0d Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 26 Apr 2013 23:29:25 -0400 Subject: [PATCH 028/116] Outputting missing intervals in DiagnoseTargets Problem ------ Diagnose Targets identifies holes in the coverage of a targetted experiment, but it only reports them doesn't list the actual missing loci Solution ------ This commit implements an optional intervals file output listing the exact loci that did not pass filters Itemized changes -------------- * Cache callable statuses (to avoid recalculation) * Add functionality to output missing intervals * Implement new tool to qualify the missing intervals (QualifyMissingIntervals) by gc content, size, type of missing coverage and origin (coding sequence, intron, ...) --- .../AbstractStratification.java | 2 +- .../diagnosetargets/DiagnoseTargets.java | 79 ++++++++++++++++--- .../IntervalStratification.java | 9 ++- .../diagnosetargets/SampleStratification.java | 2 +- .../diagnosetargets/ThresHolder.java | 5 ++ 5 files changed, 85 insertions(+), 12 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java index 8b7f3dbf2..ceccdcb2e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java @@ -120,7 +120,7 @@ abstract class AbstractStratification { * * @return the callable status(es) for the whole object */ - public abstract Iterable callableStatuses(); + public abstract List callableStatuses(); /** diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java index 32d866b0a..a3ac21ae0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java @@ -65,6 +65,8 @@ import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.vcf.*; +import java.io.FileWriter; +import java.io.IOException; import java.util.*; /** @@ -122,13 +124,12 @@ public class DiagnoseTargets extends LocusWalker { @ArgumentCollection private ThresHolder thresholds = new ThresHolder(); - private Map intervalMap = null; // maps each interval => statistics + private Map intervalMap = null; // maps each interval => statistics private PeekableIterator intervalListIterator; // an iterator to go over all the intervals provided as we traverse the genome private Set samples = null; // all the samples being processed private static final Allele SYMBOLIC_ALLELE = Allele.create("
", false); // avoid creating the symbolic allele multiple times private static final Allele UNCOVERED_ALLELE = Allele.create("A", true); // avoid creating the 'fake' ref allele for uncovered intervals multiple times - - private static final int INITIAL_HASH_SIZE = 500000; + private static final int INITIAL_HASH_SIZE = 50; // enough room for potential overlapping intervals plus recently finished intervals @Override public void initialize() { @@ -149,7 +150,7 @@ public class DiagnoseTargets extends LocusWalker { } @Override - public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public Long map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { GenomeLoc refLocus = ref.getLocus(); // process and remove any intervals in the map that are don't overlap the current locus anymore @@ -187,7 +188,7 @@ public class DiagnoseTargets extends LocusWalker { * @param result number of loci processed by the walker */ @Override - public void onTraversalDone(Long result) { + public void onTraversalDone(final Long result) { for (GenomeLoc interval : intervalMap.keySet()) outputStatsToVCF(intervalMap.get(interval), UNCOVERED_ALLELE); @@ -197,6 +198,14 @@ public class DiagnoseTargets extends LocusWalker { intervalListIterator.next(); interval = intervalListIterator.peek(); } + + if (thresholds.missingTargets != null) { + try { + thresholds.missingTargets.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } } /** @@ -210,7 +219,11 @@ public class DiagnoseTargets extends LocusWalker { final List toRemove = new LinkedList(); for (GenomeLoc key : intervalMap.keySet()) { if (key.isBefore(refLocus)) { - outputStatsToVCF(intervalMap.get(key), Allele.create(refBase, true)); + final IntervalStratification intervalStats = intervalMap.get(key); + outputStatsToVCF(intervalStats, Allele.create(refBase, true)); + if (hasMissingLoci(intervalStats)) { + outputMissingInterval(intervalStats); + } toRemove.add(key); } } @@ -224,7 +237,7 @@ public class DiagnoseTargets extends LocusWalker { * * @param refLocus the current reference locus */ - private void addNewOverlappingIntervals(GenomeLoc refLocus) { + private void addNewOverlappingIntervals(final GenomeLoc refLocus) { GenomeLoc interval = intervalListIterator.peek(); while (interval != null && !interval.isPast(refLocus)) { intervalMap.put(interval, createIntervalStatistic(interval)); @@ -239,10 +252,9 @@ public class DiagnoseTargets extends LocusWalker { * @param stats The statistics of the interval * @param refAllele the reference allele */ - private void outputStatsToVCF(IntervalStratification stats, Allele refAllele) { + private void outputStatsToVCF(final IntervalStratification stats, final Allele refAllele) { GenomeLoc interval = stats.getInterval(); - final List alleles = new ArrayList(); final Map attributes = new HashMap(); final ArrayList genotypes = new ArrayList(); @@ -274,6 +286,55 @@ public class DiagnoseTargets extends LocusWalker { vcfWriter.add(vcb.make()); } + private boolean hasMissingStatuses(AbstractStratification stats) { + return !stats.callableStatuses().isEmpty(); + } + + private boolean hasMissingLoci(final IntervalStratification stats) { + return thresholds.missingTargets != null && hasMissingStatuses(stats); + } + + private void outputMissingInterval(final IntervalStratification stats) { + final GenomeLoc interval = stats.getInterval(); + final boolean missing[] = new boolean[interval.size()]; + Arrays.fill(missing, true); + for (AbstractStratification sample : stats.getElements()) { + if (hasMissingStatuses(sample)) { + int pos = 0; + for (AbstractStratification locus : sample.getElements()) { + if (locus.callableStatuses().isEmpty()) { + missing[pos] = false; + } + pos++; + } + } + } + int start = -1; + boolean insideMissing = false; + for (int i = 0; i < missing.length; i++) { + if (missing[i] && !insideMissing) { + start = interval.getStart() + i; + insideMissing = true; + } else if (!missing[i] && insideMissing) { + final int stop = interval.getStart() + i - 1; + outputMissingInterval(interval.getContig(), start, stop); + insideMissing = false; + } + } + if (insideMissing) { + outputMissingInterval(interval.getContig(), start, interval.getStop()); + } + } + + private void outputMissingInterval(final String contig, final int start, final int stop){ + final FileWriter out = thresholds.missingTargets; + try { + out.write(String.format("%s:%d-%d\n", contig, start, stop)); + } catch (IOException e) { + e.printStackTrace(); + } + } + /** * Function that process a set of statuses into strings * diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java index 86e9d0142..3b5a23d51 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java @@ -56,6 +56,7 @@ import java.util.*; final class IntervalStratification extends AbstractStratification { private final Map samples; private final GenomeLoc interval; + private List callableStatuses; public IntervalStratification(Set samples, GenomeLoc interval, ThresHolder thresholds) { super(thresholds); @@ -113,7 +114,13 @@ final class IntervalStratification extends AbstractStratification { * {@inheritDoc} */ @Override - public Iterable callableStatuses() { + public List callableStatuses() { + if (callableStatuses == null) + callableStatuses = calculateStatus(); + return callableStatuses; + } + + private List calculateStatus() { final List output = new LinkedList(); // check if any of the votes pass the threshold diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java index 49aa10cf6..0f84c7d22 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java @@ -117,7 +117,7 @@ final class SampleStratification extends AbstractStratification { * {@inheritDoc} */ @Override - public Iterable callableStatuses() { + public List callableStatuses() { final List output = new LinkedList(); // get the sample statuses of all the Loci Metrics diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java index b0c999460..8c5a75148 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java @@ -47,7 +47,9 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.diagnosetargets; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import java.io.FileWriter; import java.util.LinkedList; import java.util.List; @@ -114,6 +116,9 @@ final class ThresHolder { @Argument(fullName = "quality_status_threshold", shortName = "stQ", doc = "The proportion of the loci needed for calling POOR_QUALITY", required = false) public double qualityStatusThreshold = 0.50; + @Output(fullName = "missing_intervals", shortName = "missing", doc ="Produces a file with the intervals that don't pass filters", required = false) + public FileWriter missingTargets = null; + public final List locusMetricList = new LinkedList(); public final List sampleMetricList = new LinkedList(); public final List intervalMetricList = new LinkedList(); From 9eceae793a249f0025d5f1c18d803121c5564d4c Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 2 May 2013 13:41:25 -0400 Subject: [PATCH 029/116] Tool to manipulate intervals outside the GATK Performs basic set operations on intervals like union, intersect and difference between two or more intervals. Useful for techdev and QC purposes. --- .../diagnosetargets/DiagnoseTargets.java | 19 +++++-------------- .../diagnosetargets/ThresHolder.java | 4 ++-- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java index a3ac21ae0..4bd08294b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java @@ -65,8 +65,7 @@ import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.vcf.*; -import java.io.FileWriter; -import java.io.IOException; +import java.io.PrintStream; import java.util.*; /** @@ -200,11 +199,7 @@ public class DiagnoseTargets extends LocusWalker { } if (thresholds.missingTargets != null) { - try { - thresholds.missingTargets.close(); - } catch (IOException e) { - e.printStackTrace(); - } + thresholds.missingTargets.close(); } } @@ -326,13 +321,9 @@ public class DiagnoseTargets extends LocusWalker { } } - private void outputMissingInterval(final String contig, final int start, final int stop){ - final FileWriter out = thresholds.missingTargets; - try { - out.write(String.format("%s:%d-%d\n", contig, start, stop)); - } catch (IOException e) { - e.printStackTrace(); - } + private void outputMissingInterval(final String contig, final int start, final int stop) { + final PrintStream out = thresholds.missingTargets; + out.println(String.format("%s:%d-%d", contig, start, stop)); } /** diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java index 8c5a75148..ebe2192b4 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java @@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.diagnosetargets; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; -import java.io.FileWriter; +import java.io.PrintStream; import java.util.LinkedList; import java.util.List; @@ -117,7 +117,7 @@ final class ThresHolder { public double qualityStatusThreshold = 0.50; @Output(fullName = "missing_intervals", shortName = "missing", doc ="Produces a file with the intervals that don't pass filters", required = false) - public FileWriter missingTargets = null; + public PrintStream missingTargets = null; public final List locusMetricList = new LinkedList(); public final List sampleMetricList = new LinkedList(); From adcbf947bfd57b1fef9305dd73ef1f0b2d020b04 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 13 May 2013 11:28:44 -0400 Subject: [PATCH 030/116] Update MD5s and the Diagnose Target scala script --- .../diagnosetargets/DiagnoseTargetsIntegrationTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargetsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargetsIntegrationTest.java index bac09f30d..52e385957 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargetsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargetsIntegrationTest.java @@ -66,11 +66,11 @@ public class DiagnoseTargetsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testSingleSample() { - DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "850304909477afa8c2a8f128d6eedde9"); + DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "1771e95aed2b3b240dc353f84e19847d"); } @Test(enabled = true) public void testMultiSample() { - DTTest("testMultiSample ", "-I " + multiSample, "bedd19bcf21d1a779f6706c0351c9d26"); + DTTest("testMultiSample ", "-I " + multiSample, "c7f1691dbe5f121c4a79be823d3057e5"); } } From 7d78a77f1706016aaca35cc1b0879a07a2801011 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 14 Apr 2013 18:45:27 -0400 Subject: [PATCH 032/116] Trivial update to ceutrio.ped file to make it really the CEU trio sample names --- .../org/broadinstitute/sting/gatk/samples/SampleDBUnitTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDBUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDBUnitTest.java index 295b31203..23f8bc1f7 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDBUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDBUnitTest.java @@ -45,7 +45,7 @@ import java.util.*; public class SampleDBUnitTest extends BaseTest { private static SampleDBBuilder builder; // all the test sample files are located here - private File testPED = new File(privateTestDir + "ceutrio.ped"); + private File testPED = new File(privateTestDir + "testtrio.ped"); private static final Set testPEDSamples = new HashSet(Arrays.asList( new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), From 6da0aed30ff01d0a35fec96f84d2895c17c6ee12 Mon Sep 17 00:00:00 2001 From: Chris Hartl Date: Tue, 14 May 2013 19:45:30 -0400 Subject: [PATCH 033/116] Update GCIT md5s to account for trivial changes to description strings --- .../GenotypeConcordanceIntegrationTest.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java index ffd358a6e..830b9169d 100755 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java @@ -64,7 +64,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("NA12878.Jan2013.haplotypeCaller.subset.indels.vcf", "NA12878.Jan2013.bestPractices.subset.indels.vcf"), 0, - Arrays.asList("6fe03c63a76cb61a76e550137ebf8c5e") + Arrays.asList("e4368146ffed2c6abf8265f5fbc5875d") ); executeTest("test indel concordance", spec); @@ -75,7 +75,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf"), 0, - Arrays.asList("6246d81b25a9a96e379c47056177a65d") + Arrays.asList("361e00e430f36e4237f888c97d40efca") ); executeTest("test non-overlapping samples", spec); @@ -86,7 +86,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf") + " -moltenize", 0, - Arrays.asList("ee1da9b0119ce7869b2d05d81cef255e") + Arrays.asList("9573b763303d70405ea48ab1515a0802") ); executeTest("Test moltenized output",spec); @@ -97,7 +97,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("GenotypeConcordance.multipleRecordsTest1.eval.vcf","GenotypeConcordance.multipleRecordsTest1.comp.vcf"), 0, - Arrays.asList("a1c48b041b0f0b8bf9387d5db337e5a1") + Arrays.asList("0105fcde492fe55ee12a4a4508238806") ); executeTest("test multiple records per site",spec); @@ -108,7 +108,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfe 'GQ<30'", 0, - Arrays.asList("7f52e70482c30031bedf2fcc6bd359b2") + Arrays.asList("d70a7a90900560f525b58004ba258111") ); executeTest("Test filtering on the EVAL rod",spec); @@ -119,7 +119,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.50'", 0, - Arrays.asList("1402712d1ab18bafa5bac130af2f974c") + Arrays.asList("2b01ef6285eefc27d86f5f8050272e51") ); executeTest("Test filtering on the COMP rod", spec); @@ -130,7 +130,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.52' -gfe 'DP<5' -gfe 'GQ<37'", 0, - Arrays.asList("6b83695122481d2dcbe3c792caf743a1") + Arrays.asList("323fba26a65596f142cfa387ca464c32") ); executeTest("Test filtering on both rods",spec); From 371f3752c161d13eadd1cf75e06287037f51f3a6 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 14 May 2013 17:03:36 -0400 Subject: [PATCH 034/116] Subshard timeouts in the GATK -- The previous implementation of the maxRuntime would require us to wait until all of the work was completed within a shard, which can be a substantial amount of work in the case of a locus walker with 16kb shards. -- This implementation ensures that we exit from the traversal very soon after the max runtime is exceeded, without completely all of our work within the shard. This is done by updating all of the traversal engines to return false for hasNext() in the nano scheduled input provider. So as soon as the timeout is exceeeded, we stop generating additional data to process, and we only have to wait until the currently executing data processing unit (locus, read, active region) completes. -- In order to implement this timeout efficiently at this fine scale, the progress meter now lives in the genome analysis engine, and the exceedsTimeout() call in the engine looks at a periodically updated runtime variable in the meter. This variable contains the elapsed runtime of the engine, but is updated by the progress meter daemon thread so that the engine doesn't call System.nanotime() in each cycle of the engine, which would be very expense. Instead we basically wait for the daemon to update this variable, and so our precision of timing out is limited by the update frequency of the daemon, which is on the order of every few hundred milliseconds, totally fine for a timeout. -- Added integration tests to ensure that subshard timeouts are working properly --- .../sting/gatk/GenomeAnalysisEngine.java | 76 ++++++++++++++++--- .../sting/gatk/executive/MicroScheduler.java | 24 ++---- .../traversals/TraverseActiveRegions.java | 2 + .../gatk/traversals/TraverseLociNano.java | 2 +- .../gatk/traversals/TraverseReadsNano.java | 2 +- .../utils/progressmeter/ProgressMeter.java | 36 +++++++++ .../progressmeter/ProgressMeterDaemon.java | 1 + .../sting/gatk/MaxRuntimeIntegrationTest.java | 64 ++++++++++++++++ .../ProgressMeterDaemonUnitTest.java | 11 +++ 9 files changed, 190 insertions(+), 28 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 9dcba25ff..314de29c7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -60,6 +60,7 @@ import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalUtils; +import org.broadinstitute.sting.utils.progressmeter.ProgressMeter; import org.broadinstitute.sting.utils.recalibration.BQSRArgumentSet; import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor; @@ -194,6 +195,11 @@ public class GenomeAnalysisEngine { */ private ThreadEfficiencyMonitor threadEfficiencyMonitor = null; + /** + * The global progress meter we are using to track our progress through the genome + */ + private ProgressMeter progressMeter = null; + /** * Set the reference metadata files to use for this traversal. * @param referenceMetaDataFiles Collection of files and descriptors over which to traverse. @@ -202,6 +208,12 @@ public class GenomeAnalysisEngine { this.referenceMetaDataFiles = referenceMetaDataFiles; } + /** + * The maximum runtime of this engine, in nanoseconds, set during engine initialization + * from the GATKArgumentCollection command line value + */ + private long runtimeLimitInNanoseconds = -1; + /** * Static random number generator and seed. */ @@ -252,6 +264,9 @@ public class GenomeAnalysisEngine { if (args.BQSR_RECAL_FILE != null) setBaseRecalibration(args); + // setup the runtime limits + setupRuntimeLimits(args); + // Determine how the threads should be divided between CPU vs. IO. determineThreadAllocation(); @@ -1067,22 +1082,52 @@ public class GenomeAnalysisEngine { return CommandLineUtils.createApproximateCommandLineArgumentString(parsingEngine,argumentProviders); } + // ------------------------------------------------------------------------------------- + // + // code for working with progress meter + // + // ------------------------------------------------------------------------------------- + + /** + * Register the global progress meter with this engine + * + * Calling this function more than once will result in an IllegalStateException + * + * @param meter a non-null progress meter + */ + public void registerProgressMeter(final ProgressMeter meter) { + if ( meter == null ) throw new IllegalArgumentException("Meter cannot be null"); + if ( progressMeter != null ) throw new IllegalStateException("Progress meter already set"); + + progressMeter = meter; + } + + /** + * Get the progress meter being used by this engine. May be null if no meter has been registered yet + * @return a potentially null pointer to the progress meter + */ + public ProgressMeter getProgressMeter() { + return progressMeter; + } + /** * Does the current runtime in unit exceed the runtime limit, if one has been provided? * - * @param runtime the runtime of this GATK instance in minutes - * @param unit the time unit of runtime * @return false if not limit was requested or if runtime <= the limit, true otherwise */ - public boolean exceedsRuntimeLimit(final long runtime, final TimeUnit unit) { + public boolean exceedsRuntimeLimit() { + if ( progressMeter == null ) + // not yet initialized or not set because of testing + return false; + + final long runtime = progressMeter.getRuntimeInNanosecondsUpdatedPeriodically(); if ( runtime < 0 ) throw new IllegalArgumentException("runtime must be >= 0 but got " + runtime); if ( getArguments().maxRuntime == NO_RUNTIME_LIMIT ) return false; else { - final long actualRuntimeNano = TimeUnit.NANOSECONDS.convert(runtime, unit); final long maxRuntimeNano = getRuntimeLimitInNanoseconds(); - return actualRuntimeNano > maxRuntimeNano; + return runtime > maxRuntimeNano; } } @@ -1090,9 +1135,22 @@ public class GenomeAnalysisEngine { * @return the runtime limit in nanoseconds, or -1 if no limit was specified */ public long getRuntimeLimitInNanoseconds() { - if ( getArguments().maxRuntime == NO_RUNTIME_LIMIT ) - return -1; - else - return TimeUnit.NANOSECONDS.convert(getArguments().maxRuntime, getArguments().maxRuntimeUnits); + return runtimeLimitInNanoseconds; + } + + /** + * Setup the runtime limits for this engine, updating the runtimeLimitInNanoseconds + * as appropriate + * + * @param args the GATKArgumentCollection to retrieve our runtime limits from + */ + private void setupRuntimeLimits(final GATKArgumentCollection args) { + if ( args.maxRuntime == NO_RUNTIME_LIMIT ) + runtimeLimitInNanoseconds = -1; + else if (args.maxRuntime < 0 ) + throw new UserException.BadArgumentValue("maxRuntime", "must be >= 0 or == -1 (meaning no limit) but received negative value " + args.maxRuntime); + else { + runtimeLimitInNanoseconds = TimeUnit.NANOSECONDS.convert(args.maxRuntime, args.maxRuntimeUnits); + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 23b084d66..4ffdc88d8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -120,8 +120,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { */ ThreadEfficiencyMonitor threadEfficiencyMonitor = null; - final ProgressMeter progressMeter; - /** * MicroScheduler factory function. Create a microscheduler appropriate for reducing the * selected walker. @@ -146,8 +144,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { logger.warn(String.format("Number of requested GATK threads %d is more than the number of " + "available processors on this machine %d", threadAllocation.getTotalNumThreads(), Runtime.getRuntime().availableProcessors())); -// if ( threadAllocation.getNumDataThreads() > 1 && threadAllocation.getNumCPUThreadsPerDataThread() > 1) -// throw new UserException("The GATK currently doesn't support running with both -nt > 1 and -nct > 1"); } if ( threadAllocation.getNumDataThreads() > 1 ) { @@ -206,14 +202,14 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { availableTraversalEngines.add(traversalEngine); } - // Create our progress meter - this.progressMeter = new ProgressMeter(progressLogFile, + // Create the progress meter, and register it with the analysis engine + engine.registerProgressMeter(new ProgressMeter(progressLogFile, availableTraversalEngines.peek().getTraversalUnits(), - engine.getRegionsOfGenomeBeingProcessed()); + engine.getRegionsOfGenomeBeingProcessed())); // Now that we have a progress meter, go through and initialize the traversal engines for ( final TraversalEngine traversalEngine : allCreatedTraversalEngines ) - traversalEngine.initialize(engine, walker, progressMeter); + traversalEngine.initialize(engine, walker, engine.getProgressMeter()); // JMX does not allow multiple instances with the same ObjectName to be registered with the same platform MXBean. // To get around this limitation and since we have no job identifier at this point, register a simple counter that @@ -282,7 +278,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { * @return true if we should abort execution, or false otherwise */ protected boolean abortExecution() { - final boolean abort = engine.exceedsRuntimeLimit(progressMeter.getRuntimeInNanoseconds(), TimeUnit.NANOSECONDS); + final boolean abort = engine.exceedsRuntimeLimit(); if ( abort ) { final AutoFormattingTime aft = new AutoFormattingTime(engine.getRuntimeLimitInNanoseconds(), -1, 4); logger.info("Aborting execution (cleanly) because the runtime has exceeded the requested maximum " + aft); @@ -308,7 +304,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { * Currently only starts the progress meter timer running, but other start up activities could be incorporated */ protected void startingExecution() { - progressMeter.start(); + engine.getProgressMeter().start(); } /** @@ -330,7 +326,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { * Must be called by subclasses when execute is done */ protected void executionIsDone() { - progressMeter.notifyDone(engine.getCumulativeMetrics().getNumIterations()); + engine.getProgressMeter().notifyDone(engine.getCumulativeMetrics().getNumIterations()); printReadFilteringStats(); shutdownTraversalEngines(); @@ -347,12 +343,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { * pointers to the traversal engines */ public synchronized void shutdownTraversalEngines() { - // no longer applicable because engines are allocated to keys now -// if ( availableTraversalEngines.size() != allCreatedTraversalEngines.size() ) -// throw new IllegalStateException("Shutting down TraversalEngineCreator but not all engines " + -// "have been returned. Expected " + allCreatedTraversalEngines.size() + " but only " + availableTraversalEngines.size() -// + " have been returned"); - for ( final TraversalEngine te : allCreatedTraversalEngines) te.shutdown(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index b47a355be..b1e5b907f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -291,6 +291,8 @@ public final class TraverseActiveRegions extends TraversalEngine extends TraversalEngine, @Override public boolean hasNext() { - return locusView.hasNext(); + return locusView.hasNext() && ! engine.exceedsRuntimeLimit(); } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java index 40b3a1812..09c79a168 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java @@ -133,7 +133,7 @@ public class TraverseReadsNano extends TraversalEngine, final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider); final Iterator readIterator = reads.iterator(); - @Override public boolean hasNext() { return readIterator.hasNext(); } + @Override public boolean hasNext() { return ! engine.exceedsRuntimeLimit() && readIterator.hasNext(); } @Override public MapData next() { diff --git a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java index f76490552..9d1011c8f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java +++ b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java @@ -149,6 +149,12 @@ public class ProgressMeter { private Position position = new Position(PositionStatus.STARTING); private long nTotalRecordsProcessed = 0; + /** + * The elapsed time in nanosecond, updated by the daemon thread, so that + * we don't pay any system call overhead to determine the the elapsed time. + */ + private long elapsedTimeInNanosecondUpdatedByDaemon = 0; + final ProgressMeterDaemon progressMeterDaemon; /** @@ -225,6 +231,36 @@ public class ProgressMeter { return timer.getElapsedTimeNano(); } + /** + * This function is just like getRuntimeInNanoseconds but it doesn't actually query the + * system timer to determine the value, but rather uses a local variable in this meter + * that is updated by the daemon thread. This means that the result is ridiculously imprecise + * for a nanosecond value (as it's only updated each pollingFrequency of the daemon) but + * it is free for clients to access, which can be critical when one wants to do tests like: + * + * for some work unit: + * do unit if getRuntimeInNanosecondsUpdatedPeriodically < X + * + * and have this operation eventually timeout but don't want to pay the system call time to + * ensure that the loop exits as soon as the elapsed time exceeds X + * + * @return the current runtime in nanoseconds + */ + @Ensures("result >= 0") + public long getRuntimeInNanosecondsUpdatedPeriodically() { + return elapsedTimeInNanosecondUpdatedByDaemon; + } + + /** + * Update the period runtime variable to the current runtime in nanoseconds. Should only + * be called by the daemon thread + */ + protected void updateElapsedTimeInNanoseconds() { + elapsedTimeInNanosecondUpdatedByDaemon = getRuntimeInNanoseconds(); + } + + + /** * Utility routine that prints out process information (including timing) every N records or * every M seconds, for N and M set in global variables. diff --git a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemon.java b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemon.java index 30abef8b8..38316e537 100644 --- a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemon.java +++ b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemon.java @@ -100,6 +100,7 @@ public final class ProgressMeterDaemon extends Thread { public void run() { while (! done) { meter.printProgress(false); + meter.updateElapsedTimeInNanoseconds(); try { Thread.sleep(getPollFrequencyMilliseconds()); } catch (InterruptedException e) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/MaxRuntimeIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/MaxRuntimeIntegrationTest.java index e6176dbe8..5b3f1e790 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/MaxRuntimeIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/MaxRuntimeIntegrationTest.java @@ -26,19 +26,52 @@ package org.broadinstitute.sting.gatk; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.utils.SimpleTimer; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.PrintStream; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.List; import java.util.concurrent.TimeUnit; /** * */ public class MaxRuntimeIntegrationTest extends WalkerTest { + public static class SleepingWalker extends LocusWalker { + @Output PrintStream out; + + @Argument(fullName="sleepTime",shortName="sleepTime",doc="x", required=false) + public int sleepTime = 100; + + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + try {Thread.sleep(sleepTime);} catch (InterruptedException e) {}; + return 1; + } + + @Override public Integer reduceInit() { return 0; } + @Override public Integer reduce(Integer value, Integer sum) { return sum + value; } + + @Override + public void onTraversalDone(Integer result) { + out.println(result); + } + } + private static final long STARTUP_TIME = TimeUnit.NANOSECONDS.convert(60, TimeUnit.SECONDS); private class MaxRuntimeTestProvider extends TestDataProvider { @@ -84,4 +117,35 @@ public class MaxRuntimeIntegrationTest extends WalkerTest { + " exceeded max. tolerated runtime " + TimeUnit.SECONDS.convert(cfg.expectedMaxRuntimeNano(), TimeUnit.NANOSECONDS) + " given requested runtime " + cfg.maxRuntime + " " + cfg.unit); } + + @DataProvider(name = "SubshardProvider") + public Object[][] makeSubshardProvider() { + List tests = new ArrayList(); + + // this functionality can be adapted to provide input data for whatever you might want in your data + tests.add(new Object[]{10}); + tests.add(new Object[]{100}); + tests.add(new Object[]{500}); + tests.add(new Object[]{1000}); + tests.add(new Object[]{2000}); + + return tests.toArray(new Object[][]{}); + } + + @Test(enabled = true, dataProvider = "SubshardProvider", timeOut = 120 * 1000) + public void testSubshardTimeout(final int sleepTime) throws Exception { + final int maxRuntime = 5000; + + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T SleepingWalker -R " + b37KGReference + + " -I " + privateTestDir + "NA12878.100kb.BQSRv2.example.bam -o %s" + + " -maxRuntime " + maxRuntime + " -maxRuntimeUnits MILLISECONDS -sleepTime " + sleepTime, 1, + Collections.singletonList("")); + final File result = executeTest("Subshard max runtime ", spec).getFirst().get(0); + final int cycle = Integer.valueOf(new BufferedReader(new FileReader(result)).readLine()); + + final int maxCycles = (int)Math.ceil((maxRuntime * 5) / sleepTime); + logger.warn(String.format("Max cycles %d saw %d in file %s with sleepTime %d and maxRuntime %d", maxCycles, cycle, result, sleepTime, maxRuntime)); + Assert.assertTrue(cycle < maxCycles, "Too many cycles seen -- saw " + cycle + " in file " + result + " but max should have been " + maxCycles); + } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemonUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemonUnitTest.java index d127a2937..767646963 100644 --- a/public/java/test/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemonUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemonUnitTest.java @@ -84,10 +84,19 @@ public class ProgressMeterDaemonUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } + @Test + public void testPeriodUpdateNano() { + final ProgressMeter meter = new TestingProgressMeter(10); + final long currentTime = meter.getRuntimeInNanoseconds(); + meter.updateElapsedTimeInNanoseconds(); + Assert.assertTrue( meter.getRuntimeInNanosecondsUpdatedPeriodically() > currentTime, "Updating the periodic runtime failed" ); + } + @Test(dataProvider = "PollingData", invocationCount = 10, successPercentage = 90) public void testProgressMeterDaemon(final long poll, final int ticks) throws InterruptedException { final TestingProgressMeter meter = new TestingProgressMeter(poll); final ProgressMeterDaemon daemon = meter.getProgressMeterDaemon(); + Assert.assertTrue(daemon.isDaemon()); Assert.assertFalse(daemon.isDone()); @@ -106,5 +115,7 @@ public class ProgressMeterDaemonUnitTest extends BaseTest { final int tolerance = (int)Math.ceil(0.8 * meter.progressCalls.size()); Assert.assertTrue(Math.abs(meter.progressCalls.size() - ticks) <= tolerance, "Expected " + ticks + " progress calls from daemon thread, but got " + meter.progressCalls.size() + " and a tolerance of only " + tolerance); + + Assert.assertTrue(meter.getRuntimeInNanosecondsUpdatedPeriodically() > 0, "Daemon should have updated our periodic runtime"); } } From 3e2a0b15ed1fec4915abc15896425b0cf77c5600 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Mon, 13 May 2013 17:19:40 -0400 Subject: [PATCH 035/116] - Added a @Hidden option ( -outputInsertLength ) to PileupWalker that causes it to emit insert sizes together with the pileup (to assist Mark Daly's investigation of the contamination dependance on insert length) - Converted my old GATKBAMIndexText (within PileupWalkerIntegrationTest) to use a dataProvider - Added two integration tests to test -outputInsertLength option --- .../sting/gatk/walkers/qc/Pileup.java | 23 +++++-- .../qc/PileupWalkerIntegrationTest.java | 60 +++++++++++++------ 2 files changed, 62 insertions(+), 21 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/Pileup.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/Pileup.java index bc98c670a..23bbf1460 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/Pileup.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/Pileup.java @@ -26,10 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.qc; import org.broad.tribble.Feature; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -96,6 +93,10 @@ public class Pileup extends LocusWalker implements TreeReducibl @Input(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false) public List> rods = Collections.emptyList(); + @Hidden + @Argument(fullName="outputInsertLength",shortName = "outputInsertLength",doc="Add a column which contains the length of the insert each base comes from.",required=false) + public boolean outputInsertLength=false; + @Override public String map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { final String rods = getReferenceOrderedData( tracker ); @@ -104,6 +105,8 @@ public class Pileup extends LocusWalker implements TreeReducibl final StringBuilder s = new StringBuilder(); s.append(String.format("%s %s", basePileup.getPileupString((char)ref.getBase()), rods)); + if ( outputInsertLength ) + s.append(" ").append(insertLengthOutput(basePileup)); if ( SHOW_VERBOSE ) s.append(" ").append(createVerboseOutput(basePileup)); s.append("\n"); @@ -143,6 +146,18 @@ public class Pileup extends LocusWalker implements TreeReducibl return rodString; } + private static String insertLengthOutput(final ReadBackedPileup pileup) { + + Integer[] insertSizes=new Integer[pileup.depthOfCoverage()]; + + int i=0; + for ( PileupElement p : pileup ) { + insertSizes[i]=p.getRead().getInferredInsertSize(); + ++i; + } + return Utils.join(",",insertSizes); + } + private static String createVerboseOutput(final ReadBackedPileup pileup) { final StringBuilder sb = new StringBuilder(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/PileupWalkerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/PileupWalkerIntegrationTest.java index 76654fb74..6141a484c 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/PileupWalkerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/PileupWalkerIntegrationTest.java @@ -26,13 +26,14 @@ package org.broadinstitute.sting.gatk.walkers.qc; import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; public class PileupWalkerIntegrationTest extends WalkerTest { - String gatkSpeedupArgs="-T Pileup -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam " - + "-R " + hg19Reference + " -o %s "; @Test public void testGnarleyFHSPileup() { @@ -73,25 +74,50 @@ public class PileupWalkerIntegrationTest extends WalkerTest { //testing speedup to GATKBAMIndex - @Test - public void testPileupOnLargeBamChr20(){ - WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:1-76,050", 1, Arrays.asList("8702701350de11a6d28204acefdc4775")); - executeTest("Testing single on big BAM at start of chromosome 20", spec); + @DataProvider(name="GATKBAMIndexTest") + public Object[][] makeMyDataProvider() { + List tests = new ArrayList(); + tests.add(new Object[]{"-L 20:1-76,050","8702701350de11a6d28204acefdc4775"}); + tests.add(new Object[]{"-L 20:10,000,000-10,001,100","818cf5a8229efe6f89fc1cd8145ccbe3"}); + tests.add(new Object[]{"-L 20:62,954,114-63,025,520","22471ea4a12e5139aef62bf8ff2a5b63"}); + tests.add(new Object[]{"-L 20:1-76,050 -L 20:20,000,000-20,000,100 -L 20:40,000,000-40,000,100 -L 20:30,000,000-30,000,100 -L 20:50,000,000-50,000,100 -L 20:62,954,114-63,025,520 ","08d899ed7c5a76ef3947bf67338acda1"}); + return tests.toArray(new Object[][]{}); } - @Test - public void testPileupOnLargeBamMid20(){ - WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:10,000,000-10,001,100", 1, Arrays.asList("818cf5a8229efe6f89fc1cd8145ccbe3")); - executeTest("Testing single on big BAM somewhere in chromosome 20", spec); + + @Test(dataProvider = "GATKBAMIndexTest") + public void testGATKBAMIndexSpeedup(final String intervals, final String md5){ + final String gatkArgs="-T Pileup -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam " + + "-R " + hg19Reference + " -o %s "; + + WalkerTestSpec spec = new WalkerTestSpec(gatkArgs + intervals, 1, Arrays.asList(md5)); + executeTest("Testing with intervals="+intervals, spec); } + + + /***********************/ + + // testing hidden option -outputInsertLength + private final static String SingleReadAligningOffChromosome1withInsertLengthMD5 = "279e2ec8832e540f47a6e2bdf4cef5ea"; @Test - public void testPileupOnLargeBamEnd20(){ - WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:62,954,114-63,025,520", 1, Arrays.asList("22471ea4a12e5139aef62bf8ff2a5b63")); - executeTest("Testing single at end of chromosome 20", spec); + public void testSingleReadAligningOffChromosome1withInsertLength() { + String gatk_args = "-T Pileup " + + " -I " + privateTestDir + "readOffb37contig1.bam" + + " -R " + b37KGReference + + " -outputInsertLength" + + " -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(SingleReadAligningOffChromosome1withInsertLengthMD5)); + executeTest("Testing single read spanning off chromosome 1 (with insert length)", spec); } + @Test - public void testPileupOnLargeBam20Many(){ - WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:1-76,050 -L 20:20,000,000-20,000,100 -L 20:40,000,000-40,000,100 -L 20:30,000,000-30,000,100 -L 20:50,000,000-50,000,100 -L 20:62,954,114-63,025,520 ", - 1, Arrays.asList("08d899ed7c5a76ef3947bf67338acda1")); - executeTest("Testing single on big BAM many places", spec); + public void testGnarleyFHSPileupwithInsertLength() { + String gatk_args = "-T Pileup -I " + validationDataLocation + "FHS_Pileup_Test.bam " + + "-R " + hg18Reference + + " -outputInsertLength" + + " -L chr15:46,347,148 -o %s"; + String expected_md5 = "53ced173768f3d4d90b8a8206e72eae5"; + WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(expected_md5)); + executeTest("Testing the standard (no-indel) pileup on three merged FHS pools with 27 deletions in 969 bases (with insert length)", spec); } + } From 8a442d3c9f33dcda2e5ffececd06f0c5359b0df2 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 17 May 2013 09:38:55 -0400 Subject: [PATCH 036/116] @Output needs to be required for LiftoverVariants to prevent a NPE and documentation needed updating. --- .../variantutils/LiftoverVariantsIntegrationTest.java | 10 ++++++++++ .../gatk/walkers/variantutils/LiftoverVariants.java | 8 ++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java index 25f6f3d97..c17c9ca55 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java @@ -47,6 +47,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.annotations.Test; import java.util.Arrays; @@ -91,4 +92,13 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { Arrays.asList("0909a953291a5e701194668c9b8833ab")); executeTest("test liftover filtering of indels", spec); } + + @Test + public void testLiftoverFailsWithNoOutput() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T LiftoverVariants -R " + hg18Reference + " --variant:vcf " + privateTestDir + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", + 0, + UserException.class); + executeTest("test liftover fails with no output", spec); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 17d50f101..0e38869c6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -54,7 +54,11 @@ import java.io.File; import java.util.*; /** - * Lifts a VCF file over from one build to another. Note that the resulting VCF could be mis-sorted. + * Lifts a VCF file over from one build to another. + * + * Important note: the resulting VCF is not guaranteed to be valid according to the official specification. The file could + * possibly be mis-sorted and the header may not be complete. LiftoverVariants is intended to be the first of two processing steps + * for the liftover; the second step, FilterLiftedVariants, will produce a valid well-behaved VCF file. */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) public class LiftoverVariants extends RodWalker { @@ -62,7 +66,7 @@ public class LiftoverVariants extends RodWalker { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); - @Output(doc="File to which variants should be written") + @Output(doc="File to which variants should be written", required=true, defaultToStdout=false) protected File file = null; protected VariantContextWriter writer = null; From c8b1c477641007efb4d2f6e97891bd33fc125301 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Sat, 18 May 2013 11:01:06 -0400 Subject: [PATCH 037/116] Updating gsalib for R-3.0 compatibility * add package namespace that exports all the visible objects * list gsalib dependencies in the package requirements [fixes #49987933] --- .../sting/utils/R/gsalib/DESCRIPTION | 5 ++++- .../sting/utils/R/gsalib/NAMESPACE | 1 + .../sting/utils/R/gsalib/data/tearsheetdrop.jpg | Bin 50343 -> 0 bytes 3 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 public/R/src/org/broadinstitute/sting/utils/R/gsalib/NAMESPACE delete mode 100755 public/R/src/org/broadinstitute/sting/utils/R/gsalib/data/tearsheetdrop.jpg diff --git a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/DESCRIPTION b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/DESCRIPTION index 6116e8c66..ecf76a95b 100644 --- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/DESCRIPTION +++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/DESCRIPTION @@ -3,8 +3,11 @@ Type: Package Title: Utility functions Version: 1.0 Date: 2010-10-02 +Imports: gplots, ggplot2, png Author: Kiran Garimella -Maintainer: Kiran Garimella +Maintainer: Mauricio Carneiro +BugReports: http://gatkforums.broadinstitute.org Description: Utility functions for GATK NGS analyses License: BSD LazyLoad: yes +NeedsCompilation: no diff --git a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/NAMESPACE b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/NAMESPACE new file mode 100644 index 000000000..0bfe475b4 --- /dev/null +++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/NAMESPACE @@ -0,0 +1 @@ +exportPattern("^[^\\.]") \ No newline at end of file diff --git a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/data/tearsheetdrop.jpg b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/data/tearsheetdrop.jpg deleted file mode 100755 index c9d480fa05f4acf066e3bf1cf469db47b8a1afc3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 50343 zcmdSAcT`kOw>Nmm8I+tsL86kgK!Zw_tbjx%2gx}$K|r!(1q2k33@SP2B$65=$3{eQ zw?NZC(|pJ0eeb>Bnl*RLH+RiHQ_bn~qq@@GRl92MT~*if*XzIyO;rt500##LPzOK2 zH5^Y*F_Vy{1;pQ3lIOrel`w&^Ui}<#Ld>p4FGV7 zK`diy=WPRGB@o-&dfT{y_-{FyuCDH2`M4(_=Ckqe0`uarf|&a+U%)pQ+||JBSTH{A zzv5ni?PvhG!1%w#@n``6H5&jl;rv&+WBPx1g+u?%+g@DkpW<-Hy#Rm!eSLilrX^|x zfGe-->&y4o*H^`09SZ=^?P=rd<@ZlokPw_P0Qi6U|6er!t8@UE0%-ulKYjFnkN?G& ze^QA6Kv6dUJP8MY$Kd-N_~vB*fSbVppa+2QN&v37_Joyg{JOWgJ$KZz(Fa*Q^ z2|xsp1;D2LtAD{%z>O*ZD3lSARCzimN)`S$=6?@9b^-tJ?V-CzpqG<_qYtMtIJ)ib za%#BQ2;Jor6A={!t~Y>il>jGu0MOP31i;+H009m)C^>(*4e}d@8I+@cab6RL<+(O*{NI_6oi1!~Dc=b2!^%C$9z{SD&`}gmF zM}YS?5aQ$G5fBj)5fKp*5)u)Ukq{A+5)%@VP>_(4k&%;=6Om9*Qjk-EknC?Ef3xG^ z{>@83ModUd_WwFucLO(xfmZ~DcsREK+?zOfH*v1}0S=IQ0&oWUI|=KhuHnp;|X`}zk4hlWSK!KP<^ z%+Ad(EN;Lze{TKS{=Kt{{BwMAiaJA|U;L#D2f+ImS^r7d|AnrbAYHg%AK(-Hr3(kw zAAIm`;uG8wC8So?Cwk#Yb6YHenD#;Hhc7)OT;c`@I%}^fQhIKQjXTJ{r2RwL{~2MC z|0Bx&ld%7yYY`yF!vQ-F?y%^ zC12}S58UFGI^;7<@420k3QLY+`wa{Rb{ENT$x}JYozgawgv%0!->88T$Y#(azEP2& zDBaU)!cQ>;h?jEk97SSvoytMP-cBv#%>q&Bs4e3t-_ivTmOe*IAF_J=hnD?x##Q01}Dmn$0^iu=1<*v%)mh z(BA-;oBGMlW20{Fi|Ika0E`pyL`akpj%$kyE$_I3(-T%^<*W%>K-{JpIAHANhVhTE z&)nJy_ASIAxz*(PATOVUwm3P%hw(^>P<)(#sfTGjSbwgpr28?>umlk8e}n&fVV-Ry zf1xYH)S{U?XcACeo@7zrTd99O?N1r3hnqe;#`1N(_$A$h;c5m&zkeaYK@KHjFw9G1 z$eB{}SNM%YX^A41&rVXCIXK(0drHKLFY-=GSqo|3F18$Ls@pOBoCGQ~)=x`bMC}vY z!b_a`tcOdjt8-HQC%-gaPlR9Eb?cCGEKT^VB@lq?*v~I-Voy`{!AKFw`4gSdtsK~ygJdyskP>O$Eo*RTzBAfBq(`v(` zwb_D)OV!SX(>RffYjN4N=CDPv`s1d55(QV^pVpD0Gt=)Pn;L+$Qd+>3)1i@%Nbmus zXixd{q>wQBZodzXQ_h%1= z(bVhV<}<97@Uny+$E!Z~)Y?kE2226HmL2Wq-aUJoH07lpa-@LMgQ0311$2=t0FO#v z3vm)nQ)G}&yCOv1DbjRalClIQtEP>_JcPpeUuGbkH71P z!^nyVf@Rzdnr>zzkOnIc2l6p`fceE~;ZIcYreZzN1~x)E&q3m#eVeiHXAW>9F+wvy ze`HU|Qf?Dbq7bkh59lmUKUrr-mlFKEm^R7VoDFA9%0uu|;Z5q%&=L%7DpA_*Bxfw# za9~aT6+^_8f6}HQgZT~M_kJ^s*`;v*ZF%e2^lI)NynMa2q%z!?V;e_%SzwBkleHceZUb5(bK)HVE%p|Uj<48%jo7(L+ zO*(-`s;WA&h4^HdMKYTmd@pk>J(X|_l!kG0`q*cR!-KXFAEkb7mKWmQmV-qLMoh0p zMT~Kt2NdGvc`xRLIUet5b5Z12d}t*HdrtR7g?4GPolV`eBy5NMc7g%G&V5#+l9Bx0 z>1jOmlZ`UGHu5|2KBZEIiTN%$un;}e!PeODY4xSq+rvp(&R-F~rH-^?uGD=jR>b3b zezXhU`*4rxTd1dnj5&J{DJ?FgEZgs!eabd>`twbbL-#(wLU^)9TCJI4t#PV-;<%m< z7b)xn|2&ZZe%zKpRYV^|Xcc7BmINoOsCeG>xP^TqL%3+369oRcu*2!;>O+W_gMMA}cr;ThlG{^L-1Nob!&0jHvG%UWroe2i$V zz!}i%j+#HcfH~kvRQ}n{r*d{750d~|mR0Xh0{0U2nwhpC(L(T8_IdzUs=!(Q%zIB; zAqaLz0A&W8WMBHzZj6#X5TTTtYKx=*BsB>0`|y@0ZAb73!#%u}h)4g>{3w^9iW)AZ ziFYI8Ko%0+Y<7UX5#^nZ7ax)-S&Ctg3CEWmclmY=xV?X<59_inNXRx-88~RHc3odF zHRFG|k_P9G&MRT&(S0lPoLCT)ywit$>=Xe<7+R46^z+&zwLEAqSv7Qr{Ks^palfI} zs@1!(B)`5H=n6Cy^HcgD8tH2ih17z}a79wZxd&VW!}ZWU>k#v6;A1J0;~F3}Tf^Sh zLFDIt3JH5OEi7DhV#Ugl_U+=&)0iPkNFT*a)q@_7pu78z+&q6}$YwUX(*+nBBBbMl z>qA|1+hGl^Qua-4b#2if@)uQPY<-@Sz6oZ=h9@yoy9tqARdrQzOr%v?@@r@}tR5Yn z-iM-J>LwJNJi&5Z0~=bw*T9hDH4xK}Zn*|jbm0&zZBrM zYX9W_Fk5H$RXAil9>NH9hkV@=F`>dolUg&it5rW3&L&x2=;>QN>!bLBP=3QMm`it; zo-nu4LRIt-hAkN0=~ZJ%E-QbrTp~2mwG&j=laA^^w{b;V#arE6c)N}R*MezW16>WM ziGAa10RP8T1H`4j?uRfY5J|Jq^~%j-^9hpSwL3iDaROGrD#%{U~vdO_Z=2X& zyP#h(?{uIe+G|_yz6lvm!aPDI2lt)(%%N8O<%1be1b@iVH6VCiU>Y`Qg)qGaQVKZo1F|irFgHJqL^RRtHQ>hRPI9C`MZZleR40SC&b@zzgwIY8{ZGK=T6i3nQrxM1!{N= zeD&w}I@AXa@5#!NYhZ`_={jkZzqfIDdC*Qh^sH#bO4);x(&jH6B|k-^&&gdIbo|r zu-r(JAy2J3XPTKD;PEAl@NQAgQ$|ZCU=Y|*qTC4F(1-PYb2gKO_?u65&dDo*R0xwI1(iwQ(k^1u5rE z;x^&M>!}7xEFbVz^Ys)05#toXci%gafXs;&7NQKx0G3gE4}{Ee^rPX~v_Rf^nw;OK zC4*p_RN=F>cH-+2f#R5u-Cxu&3Q&m|`6mB>HBkRXxS&1330@dXCh0#Y6%M6TRdN8d;Hpp^gAVWYCEv(Z7(@Zv<7c8;Ie< zk&0oH^VVJJzA*GqmO142bA1q$u0&E0q%@G>tyx4i#>v>t#)q>ktl;#4O$A`(=cp#CE&G(0 zwcQ~bFkDDr`4$qCj_0OkYVN%lF#)Lt@D49w(wZcj)p$7ofbY;DgmW~;3Ixa-Ja2kT zRSod%%EB{}L!D}HKM&kZCZ3Q0-VQqH+5YiX572p)uFQxxM+-zZ(nM%d;qkl)UWZ)+ z#1p1~3>W8#1Wp_G)bggxfWR9yc|AN`;CMCzxZ%(NcrQkZt^cyZrIZI20CQkjZI4VeO#^-~${kEdJ74QSF=!|5y%e8;$+ zvu;a>Z8F}@QT5zYqOv)*VfFoxYS-dBedBbw-@SgHF62}gdG0^@lB3+sYXSvX^GJzd~}yOY$ZgO;0H_zVXEbh z?=>Lvj6D2PKvEn&F4ar{J!HLtg9LjcY$~UqS8D;IUyf0kzBA1w$DZ=R+?hd|Kdfp; z!uk%^NSvV$?8}pcAY89qVj>vFK)81SEHm5<=d3mzD|ij`*oIMu@FIueed<189wSxL z*I9->BmLtrlIxc%#!84PWVn4Y3PmeWP5s7}m}B(!JZJnN)R6qcEHTTdad(W{T3S`iqk_##`8hFBo9mGBT8^b+CPI3D z^coS(|e;+%Yt#Y&{9W7qj*X2v(%c7Lo^?oW0D398K9Pv@H6Xp_Dp7 zdPH^5fC~d&ordGm17)yQIdt&lJ&e8;-b8W81LRSX^>oaa&<_C|ebRELzf!Grezdlm zsLpe7v%0wsG2x{a5L}f*)*BafV;Zi3g*p?oaVffP5KDlrs~Wi~4J<~>of!mIK8oLA zAD_7$!~Dvh{u)qdB#)2(Zp=DK^jmh_VU2c8geGcQrZy*x2|I(OMmY2zEgBmmg$J~C zww2HhP#eWMyQ-a02b=Y`xjM-0yhiCHUuY_0v;Scn#Me9kpFIC{$hptO~K^pg3HTVy_F8LAiqIBNu zx2{bI%RRZ(Yd}vu(NlUMr1zGFNABD7dbbJ5{p>JBDEiJ1m}SV*tHLm0jB6KiFG;W$ zOIM)W*!9D`#^&m0tZx0ZYt}Q9TLcT8D(1G4s!EEd&HADCy<#{$OnZwhY!7vE293lb zRfF>2M))=u5NUeGT+q? zzmgZ>zZ6~cj7IEX$z%~$@S{Ww)5ngK{s{`FYk>Ob#dHV@oZ}{%!vIzfp7{5`=Ey8uzZ-n)?sj+|5gzIeF8qzFd39Fvj z=h-iz(z?FqyV{F;S@({G9Pnpz$~A>V8^1c0_`S>RhsC$0sZDQ2kMsLsx=$zQK%uKe z^YLQDYcX_q{rNOVf@>iDbb@jPqcAP(Lq!$2)w~ZEpcn*!NB@bOTRY;0Lr;&z!f@DkpQ}dxdIjTp-Oyg_3~}4+SP#O`fRI zy-O@3)WB`^roe-)xK;rZcmSQjhCtLa+3zfCY7g6 zQ*z6vaPj1KdzMs)U@WQNx^TiZz;H5wtMv)AoqF;G=MJ{pr|yQBZMdw^?tNDJu$4Bo zoyeVgGm3WDLsmeq+-UHA|v({)NkH~j!-f3DCKBdmdcw=hv zZ0SMUcDi%))2F?dPQ5LcMY{C8xI15vwT`r$GwdR+u@4fdsR+{p0^sO9o4!s>5DkTws z8H{kVc%UJ#g>94>4YZ}>_2BffmQj-|QHGaK=IyA8WE6i8GP!(|XXtvzKa{yOd%@M& zpqB95Y|g6tOp?hkhmlUtJ7v!bKOhAajr1QP6UN&EogUabPQu6lEnfs^`jz$#e*N;p z%_qZ3`clnkre@+WE@C}3QVUmQoBO%eEuN|n=Bp}6UA%zRHT}tU6(m>x8r8iJr~_hX zfY~9%ratQ$cr+`N#clk#v+2$!6QP=W2M!vEub;jDkQ(_*x40n21U;3JY0?@8@n5ZT z)?uwMs!PwwxIK5cCG$Z18JLvDhi9)q0> zBSGFqpACNA(MNl^lcBA8vFy3eIvS!+X0tAQ;_<(!{E{cgQRNUXGf$64t)T-MdY?h< zgwYlxwdT?hx4UAs+FM*SZa2dYi(8Us`h4*tOc^xn^ftm572S;S&!45lE*#=)EcT9j z)G||2C)&=u5ArN0z9*UfYpZcwKP@4os@Zx+6MI@6jCud=@SsN)673uiHn{c4`0?xG z9V>--Xpc6Q+62QCHI#M@^g1ZMt8!UGB%|F%7Fh$sC1HE92Vi$|?Im7mO7v$W!Fy#*#e8-&c* zTgq~ADTZ-H)qUq-|0K{!TI|%bsNGXz?8^`;QJ<3#%rD8{bf5kUZSl;L%!LC-%J1oE zGCMlU#yhK>Mu<5Kz4-cC+$of*D;r6rgD6dCAoi)o8#GOcl4Q}PaA7bQwuzT<|M`k< zbv@djG!{RGlUnX*kpoMrg4jlRpuaFgxECTz<1yxURVzb_(Q19hIj7?(wmJl3@WrA) z(KJf&|rq* zkZs||ee)SVU&Pj{@QXm(YnfBeRp#xtj~^lG9W|@fHTBb8@gDvW&~98d(2)C*FpGpz zYYZY;?fCET?00Q|KAH<>C3Ts-_+P$PipSsY9gA_GJS{ECp*g1uSvDD{|ULXW3LIWibH2hY0=)I^CebSyns5ws1Gc~b{5 z(Z9kqcnwgU*!`%%a8;od|EyX5;o*@H`uKHGyPy5}{d}>W2coXc+apCi&klU~@6tUg zf;jNN=kCsAuv*L}i)GjdZ@Kc?PZjmUtejk-sr!$mkdtS$mF#fc$8&?wy4Zs?$idS~ z5$u?v2-50xyA|T7A1jLVZA=QpSELIL=3ugD$g5)sNbE~s;1Fp^OzosDL^v1hBf)9O z$ejj#hDysG>|{eWCT&nf%~4w|E-wc>IQ+iggjiDg)93StoUAd>^igzUo2oQUy?w{B zZgdNZ4gS2`TPJ!3X0WytjQ?@j;Qe;Rmb=y}%t*{+S2uGF`lki;SspaLh(U*|A{Ta+ zoeHgwSnBB%oD~xO2$T0O-^tO*m|CnG;l4-^SRzvXhHGw{p9%Iv9WRDf_X!$QatUzJ zn}{K!NqPO!0GA$)g|7KIV@sx%y5#QAEaSG|%WM`d4NDQ!{^g@Dkt5CCmXvr=buWjA z;)h^`SDD1JBW=}O2DCqmR`oO08(Rgtc#FT#Ztx9sSe;8>a9H4_giL3Ok?iH1$M#tv zjD@3Cd67cz@V(IAM*El#3nkw34*GX8b~GXy%l*th4p4qVzE0jYsQCHHTE@mo$2Qz0 zN_qn`!I^RE-HTFa5rq=TZhiXaWQ{3IR=xzCk|`^nvd5Y5L?w|F|uk?t6}?G52q zl2=xtO)00sBxr9vS3whGP40%-XcxJYF<%e2Mg#F=&CR!haxqV3Mt(Y!Ull^vi@Qj> z?7LWZ@>!9DB^4`sL8z!}fP^y1sc0wN*=x^(DT_9F>^Zr8X2Kz95ewb&n+o-lYhcFm zHY%g6sRO1?&2<{Z83CTp<%IpX z1`v9#vJjgCJX9Xq0?u+^+-*fAZ%_$0lZZ>>xbb_;|0*}MQHn{JOub2OShS#SGxJT< zK+;CY=Dl7uj`-du(Ij4rUJ3dkBh9-%I)3IAt;$taYbFJGXWzBWRh(Yv*~F8erlXRd zq4)p1o8@6{_APyB`SvL7>!mCeu3Hgo%J{4^m#6|$g!ZFxNG3~g;>A=Vr3y|RES#1# z@mN)9FGqV`hL(PDL4Fc9Vn}y9S>tGcm-`f+L5)*LAZx!XI0RY$(4R8TyQ#O0ePrQs z4XE4KK3^pn>Ih`O&{9fdqiE6J@4OkH3E+B0x#9nk<3%nn8*+~BM+B%9lwj|i=QSUl zId^SxBoCxs z_!kF6&i|=fun-;q=fd2VdW|5TU{`JQp2hdKEJTT`!<2>Ot&0pGb0*(7zq>;8J%i-OHB8NW05_ zbNqR0CVJG9Z1!&BDwc}$MPo}YFh5;cG zq3?yJRzMBgZ1m3`Qo-3SJC1o9cmCQd+8Xf`*}h(e+S9T>cw#X zEmfulyh|SoYUVwi5JHG86_N5LbTJ*Slj8U0#hSME!%IO`V?0Iu_wmaX(jSMmX!2a$ zjzlIz0$qMq3QiQ;R27V3_L=-t62H$W$Skti-Mifh-SYk;yuQ}$&WxR0y{k>uy-GLN z{;X1wq!G4X+~U|HvvN!Ck<0FPCAP7!xUf+O!ews2Do)V7EJP0}lz`aQ4H2I=ji_{f zyZiLjOU@JpX%gEZ-_r{+cnXG9`cg4$D4)iiIBdd-V$QO${#!pP%can9=}U_ETg-WG zV%pRP_oL$+v@{A-8Y9tI7_{ES+-zX;4uU>aO6d!tn5QSh?;6m&J10A2u8DI}%WhJ2 zBz_I}7nk+5pJuO2Pu0HLS0hW^f*#RX1fpV6FE20!!&f?pSPZoqN)RkF$Psl>+s_{& zTekM7ZT1>~6E={=4ewHq#4GbEFnsenNMGBviZNacD@RQCHOXm&?-(=N)i?mgfFkFj)R)ZRd+=p)_g6pJd>R(FGaQ5B+kSa(}!Zxf?oCyQ_uL*+xA> zyA)i|`E*@z$9F!A|5LFLD)&?ne#gMTJv6gTux>vl2mONGiuIsN?m!J%O)eG$db7u=WKf0F$~|mOxQ8!;^p?W{ z3JG|*_LTLL<>IBb%K7Hk zp!9g7!64s4K+(9=)UdnAjVW}8WovjZ9-#UzQ!==Pm&`Akc`_yp;H-wzBnB}*2!ee* zq@A9jJONrZn_%I~`68mT(M(M8)9oFj=t*3HajFu&m)o)!zHu2;u^Ui`CxdEA|+yj$jS0{(dMPk#p|gPhSP54mVNrdmFy%P z>3O8NKqOlTlpQQ4+MdBv1DHw@NI#SnRh)UDOkxL_zpgY=8!ny35O+1b#nUE>+V7-{cJ;5T!BwO$H1K0Z++@Lr5 zc3OWVgxO38uR5~~N<~damX5b)r@iICt6L#oRPT*`m3Sx@yRpXhb8-iUu8~@A=7cw{^CpDwpKRmTjDH zjol;PmtvpID}nAv99FgccORZDgUf>FHQ@4IN6WE5uO879>G~nB_ULxJnaf}*&OTG& ztJF2?3*)7!JQ4Pp0+1<_L_gQ|2`;@bLf}jZ`rmpWotz;Y=z+?4M=Qp5hH7*gg;noG zKU@8g%)n*i*kEWJ*e)%IO6@8+nDQ5B10`)HG>=T zEZ`0`{T~-*XhInI&Z16gdEZN94?GY%QEo<-U~fkl&_0Fo&A_d49fy6xQDnPRf%ZJ= zWWs>X?nttbJ80#N_pYa1uT52ABkQfGf4_M)hN>l#dLLRk_FJfT|bJ=$#Sa9EcK;~V`Dklvevtw_69``TyELVjt~)%|uk#l8B1Td&v- zennQON%mtW*Erg5xg$1F(&#l`#G!Bm2YKuu*^1>K!AgmGt^2{bS|_pk?a6!@N^bKc zU%Lxd%k5>y!pfb4>}qPhl&q9Az#hRLQ4g|mB`4yr-rO@VH!Fcjczsk)`C($qQ6_({M&`8~(= z5L|+|xi9fbvffqXph;6tKmiphK^|6c;KEp+_DMby)lpYRJq}c9LsuV7wWz- zW}xw?B3l);8sIv+d-u^Df#SErtAy)N?K9+QnIwhcnn_*O;_fO-LSRN!s%8XCDJaIU zhr<-Wq$0ADrZqobRbmxF&4pv~+*x?dkeZ${}@xRo|7| z{rWXYxane)v(4Sl18ieGp`RYsAKZa@`5eftsZGNh8)8^o0vKL;^hZ!jvSq3Pp#p-s zR8FWM^F|MoXAfP(#Yf0JDu!;^WZf-%o)&a5Ni<@G&>r?#y9w=4hPuDkk*O{TH5_1P zPb%qF3=!Nwzhh9CJ4?HXMb^fhx|1(~{!4_tT5Qch>IavWFUhuwsmDEvZp%Yj2sjPX2b5Dsi@r#Bl|1-LX9atAH#DPU4L=@~Dk*nHvK`7}! zZGiZIqQcZ^){(;GyV7H-8tdj*qSp1Z+68U1hbj81>m;H*10gSvby$)jxYk zca+G^i`E5AQHRi46URnc@eIGDzDOyrwEdRRrBlOu{~T!& zAAh6NfX2;y_KZ2RfZO4ot4rCpxsF6p0iBGDr%Pr|7Ykv$V{2bxV`{tB-&)mAyaIPM zN4pr%YN-6FlVNLQRwcPh@TY(@mKR$)U%wS?)#vaGwA;Q(%%y#PpMw{HfV=Eaf7U9LuiB~n;ZBc&b4uA&NqeT50})YzyDUi%UCAbw3?g` zlh{~nUa)6Mexd2hQ(h7G;A^Szf-Il6K|X1Tp{=O z6L6`KQ51$wtzgVR_zz9m=~$8mq}MMZ4TwuYf-p;DRdwmJ8{G(H0+(;yNDj&kWY}t_ z;;WrYc?D@o^QYCZdAvmd4oEMF^sak{=Rc-lTGX3%sv0Y8t87oZa=)MZgxDj-z>`>A zC^IBi*rXyadZEN*_hL1-JRt3Ge0-u$PW#|j>RqEOzX=CdT8LA=YER+_S2!1u+eTX~ zS`CrjUr=|#TzlZ;6kHLoq>eU2+6?$z483w_N;V7SeUPh{B{+JZpuXIc`sFz1ak=B1 z)2$namj+a59^rxe_laGtuc{RJRb1Tb2S}Za$XFk-CRH%t5v9g|hOFmtFic=p)&{yT z3=2@CPd9cm9-W7? zw3mH5Yn_$5e`nSW)nxb$%M-AqfzG~k!-OFzqoD2;zjL?T8> zF1|Km(+t+hdDH}JSGKloa`d%eUe};DCk(#D)9<=uC#r5^yKRV)K))zLx53Ag5Qn4i z(%3Nm0N!3jp_wo)vNf_D!xhEa3Jw2z{S?)^Q{vCmxW&ewalVSptcF|D=~V{Vonfet z;H3%G+L=cOX2=KFRdcuaZOzYFQWurC=%#ic2+fp)EU7OQ*tr7wd$S^R)Cxj<&JmMT z*%6kF!f{{a(3%@pGa%Th12jpBO*lG?Qri=vVw+10P~<-E#v1{${%Eev#i zeq(%B%B1giR*kp<$q6>I8tw;Mn1D{uy~^ua{|LHcsRlWi1)qiRBRXndhd@HBHCR|6 zq*iNYkFuthX^BQPA|;!>aBIS>2?8e)6&3m}A3=MR6!|gkXu(elA-agtUia|Dg4oi| zhPXqYHCj+>Y1o&0x3KjWIw`db!WTQa|`GPs@_!C|=0mtYTy5rM)$@_u>U!?uw_?-n-QKv@H!Kg`rG9XlNc+@+YxqBUfD1Qrk zAN0g7R=djJ(~eGk#b^#0#n|{&@rJMdy(i~^>0T{Y_6Dx{PST+p!3+1!+uJxO!lti* z`qflqI*J;hSO#LnFPkeyi z?K$yr^LdM&@|a&ExU4>g#v6H>t1%o{Ayv8C{G5JBR1(mLdwgFP{;um4G+hJgPjA;~ zYcVMh)Ms>w^n$qt(Yq28>9oG^k<4@TQ@RtT0(wM@Q&f4DE)(&_uZm9}D|$6_N=PfS z({UJup1pwQdSuJn9v$7elW{BIRmN3%Ym-Swx`jhn$3%)7M#~j`guc@?v|`=&xP9uS zWXj@WtHlM2#fIvEc7r{+C$GOg=$6oJ$jIQN<7H&5ZSGos-}*|51EB|95S=j5uU^Bzc2 z=ij(mK}C3Cf^7GuhsgJGWn*vuMgj8*!(Ppat9RaJyNw+`&W}okJ4*QA{NKcUEVK5g zj-bJpYZ1q1efVe>2!`6bPXwb2dh+H}5S<0cK9j7Wflh`N`ZEs7BV(hkOR0*(hs1@d zeu-byo{A@it8@K;*nkHg^Y5bHT^e9i(Y_xsPZ4Ja3hTcWnO4lzHb)h3XO|2n4_DoV zDA-*ed?yyd{1BE}I`CbBO1mf({)pQd8BJAmc7tl=Re_RL8f}z|$ZpP&Kz)QqggZ zB$X?N zhOV#02ZcpcFBRHlYjwqHcR1bqqzY8Uf4#+tdLBIIuz9k#YXkm*aC5UXRqgO#gyHf>Df;!PzkuQ!F*%i}9wm%KI5)G>&6p+4YENWLe9 zEvu#DUWaMaAAQfM)6Bo6!mDzH#9CVYIxi3in_g?0=vjx}m|sti%-|C)1{PjE=_6Islx#ZQKv<~UfRlY!!FLSmE0QC+Q#7nQj1sL2 zy?lVtL-I6K&O3!M7p(tE*hr#OaM$UG*f4JJaiPs$y#MTlklP)f^B2qJ=@YxrM?+Q= zdr%t5i6}Yu*SrvFpEc4XyUk!HsITiSDdh4yRN%{yWxI!NM~TK6O&Dqicr5CB(G_m#|5NWS`RJuTQmDOG&G!|eJ&-%n$ z&Lh#zsMsL-QKo6l`=uuciJ6Z@+C0S_#V20nwjZ68;f~PJER@-h-JbN?^zgYMD&Xp? zB1_mr7ee*GYCSftBuWe2SNNNGE~*C0u1#B8wQJh&J)57^(@Y@yy_+RmR(`mThX*1} z*tWL7u|EC*QG_L>x|Bg%4d|&!$I0Kkl)TD$!NWSgG8F8DAD;+m{;EZ<_LO(FD!e9N-LSh#BREOQ-x4q33C;Tn@3<>l9qD1rDL$vmWP2`uaK8*yRFh6a0&Ujq&t zi`m_42yg;fq(NnX8z2K$nwx#>yhw-s`?U;?x}G)?)$mY(UyCcQHlIa{LQ6Isw_f+Z zv$IKbkUt08etu6?qHa(s;S>tI``G2Kx< z|KH(=h0!mf3;H09?#Z|7+TJRBbG{H;86g^toJ{qofb8W|P-=`jul0ap&P8$7*CE); zPCpkWfi4<^QpiK##O>N;0S4E=HGGEoEhYGl$H|8OCmThK`{gnxjK?Y_bKjo?Y} z94#;1W*0(l75Y3S$qL2gVs-IQG8i{$Y{>Z!c)-%2F02Q_9QiYS-PQ3q`Sul|^KSPw z&<-BJrN&H+CSL=7l~{&z@$)9M6;k~1^j)*QPT|kZvmv+q`BSr2GTl!CO#rtli)O0u zHn{uB_X<2$yJVBj$gZnodBIES-%=bmI|YV(HxFn`e4&H^VZE!r2HBt@LvmV`!*V$sqFXd^lcMzm*Z*EkBBl5*Atim7d!FG-#qpj^hz7_5KCs#k)=E0xB+=?A-p-!_x>T=VSmH7ETFG1C@Am( z9M3`~8t@dt^tljCqNQr@Ga+lHAC6wA>3NVRoc3`2rx6C!qiiohyF?y9in-qQZ+Jw#H%h)XrC zrRDA&at456HB5;t`n?#fKZG1qBv#j=uYoVZu?S$KBzK(J=ylj^>ZCWF6og5o2V@z-l?% zD9xQZ-){4v@yGU^ZqLG;1iUX2e0J`eQd*w2{%KRxIx+V9Gk;<7OV(WA;oiuTs-~Qj zwy?Z!dy8h3U%TENLF$_@tg&Uk6=|*T!^SM}rmzeFTDlM29m>oXl;%83Z3uGk+(Cm4 z_qc8@$_g|zvC`l zi4;z#jlkK2`Rus0SDn52_$F%QHQsDjX~9Cy3@`k)?xoNTM=(WF{JJ|Cy(y>z0wA8(j6y0nB|59T&x?t9gJInq!Inv_R-8Mn7dp zqC`Y|HJivyW2gZTXMwUiEv%AENX*F$F%@3HzzefOTW(Z4k1ZBtWll;(mzBmChnU6y z(&9)bzeI=C$ahR9#RfXwO8=wff;Q=xlAqSi?Y`l8PtO1#?iX|-@gZ7J$aqg(Rm}># zN%+e4Vp0S^i2nB`&M)m>{1# z?0*sVrr}Wj;oJBidyJj3j8I6(zMGIOSt9!~l?rJ{2pKaXWDi9VqwLwT%Qp5Up{!ZL z43T{%8h6dobANyT<9{5_@w|ARm%K2y&;6NeIj{3PFD1}1A~?oRy#A;r;S{TSy#C3@ zc8q&VI^gLEyxAM{-NtZ09+ttq{7SlR`EEWKx^hu17s9!={JcvKEua>h#JIkx4k`Tc zz(#G)qNACa`2jZ|Y1rM>aMF@}I+a)XY={gD){wNalJLojNWo zHH(rL`0p*Ru2ZhHRuHoEY$B74y5Zx|5C+4e{h*_b81()9F(uB5kwTVVwg&RwAisC4 zPoz+;^@x#jA!=FMTvD5JOp`fpu#b&Df*N6yfUVgVCt*9*F7`GG0{~ zDpN0pu;K)ww#^_+dt(rhMPRamY0IMM;ikCnknKH)&LY-LfaUL$<<5{B>+%4^0Q+R( zimC`{;WgF8uwyETdoY@jP3Lzp=S0c*QdK`D)bdk1$k0h6g9?+Ap(W=pji^U&=7NfB zu>$B7e^hL>65Q>7J|Du}`S{v7k+rE@ZLT+yv~AEb1d5hdX(7FhhERfyQ%B!~7Bbtj zPG;qQ{9Pn&P*MDQts0_pDX>}r&Q$QC=eW+CbmQ;gCFrpC&^=@D0SXIueXkE%LpVCa zi=u6-CGWS`nChn{Y|Am^b%qa@oLZYqEvxY}#823Ps{fX<;47-a8Do;qTgKAqO^8YL zx{!gD^=BC2YnQmbHo9uCx9~n2QruM-0EsuXDP$N&2ZpG16#?^KX2sq&s+h(R}X}NMU%#Q;3a{E!H-vmm4xT^^%+7tW=g*beLIuAzusFS)%0iB z)ieH3pxx-(x4&Ae*g*sr{VSZk5zNRY4#{rBp``n_rb8CL%{;f|Oo?%fz9aqC2~u%_ zhfU*G7?pR^tSO5iyqEO=iC6nhMb9BO9!9_Rc30GYuk}9m!W&F;)I9{C^~elMXPY3J zM5+naP4n;tM{8JhvwnfF;xC z*){esj-7F3X`jaZW+oRz-Y!hYrF0RUpBsq>b|1uG8!@~26vuFHlN%GN>n zK(Hoh6rX)fNap|s{auSE-%g)t(mc)$UHbXdNagmSoeF1zSj?r`Jn9|cT^dMtbRhfA z;O@{A`u{d3f+X4VoPnpp$EVz8Ya4|M!tX{#>FOveNw61V&PTY9*z=f0HzSY`bPDw` z`0v(I6#;gVIfD428NoxnSpjH@?CU#fJ=H%ORkIL&MTcj&x_kmDDkD><+;9PpzEv3P z+@*@?xlU)JauL(}YmRVBFO>leGv{rY;-e#3bwqu9u*u}XTe zJ&C#jD1v-1XxvQbI)|R|lB2!pFbO4TxspP=p6eSFdDbQ$EzVjhYKWdvzJD%v zFQ4IQ4`$*LR~yWJd_8wFt*KHGKIqgu`?oW5qSVvt z^0}C$2)q38`l@>jmp-wbWP`v8Vf6`A0ljW)(@IK*13pS(Xs|$llpcF4e)BrXmbWMH*z0VS;-1x43DMyd%6gt;5jo*T+wdZodUIGfAI zLsTkmpezjl{Yn1!a;BrewF#;a6W)Axiu67(9xW-MHm8qn)9Ctm=WTdkUyj=tz4FB5m1MGS zbw=__ihf+;Lb>Mt%~qc$)0kB}X1?|sQO5AhR1I1>JkO}<*%(1#PrB#y%V|TlYYN6I zi&yJzjl>qAQnMABqR)RC11rS3oIbtUcm&aN>b6 z-k^+qntCd-JQ)jJ(afpd0yQL>uPc8#-&4KOQQzvAE!;Ca+{gWNb?p~*aT~#c@dNc| z2trsAUEO~LLS-f#%*Xc81tw8ZEIDOf^X}WvpVgF2?w3p>ed4Fb1d??>q3SoJo;*9r zI)@;dPe6Xc_`2ZqFIYj84vCZCJdlSFT=HE|U)GsBiqg9}sqd~4*HO8#6*O-Ic`a$_ z^u0_a?qQ*{w!JUo3RMU}t|gi8{#eC5{mcDiO%ERDh8cj1x&uC*B-)aSB`of~+o`h| zUp}9_X04o4clO17+t+CqPX!smK%_(r&4BU({0Qk!|3So2-V_t+1=6|tTuf>RPGq~y zlj8F23OFX_w>G4UtV)ld_yh7r6CmT1$XY$h~+H3WH5D2EX4+wh)ERv&0nkB&{ zD0AF&NcN0FUyz82%XiCrf-}|B-*;Zli%8C=sMw)7Ny~{8jqosX4Im#yVF`a~MDlkd zjlV)!76aV$Uz^75#ElKDgG%>(&1PcpZHc5mviGtXo--YZf)`Q{gAeS6CH_5rdZ!+U zTN8KasWT5ERG`k_74{xY^^sZd^)E8*0k4q446XGhyGpA~}jP z+}l|DDq-JpRs(lMNeV8LbV)ybz^Q~*FE$M>Jk+=2iweg7gGlrhecx#T+3&LPN=H%VSDTx8h5hiY zrqT~ybExh+_aSdNM(-ivf$8zp6c*TmlmD12T1Y8VqbTHp$amVS(H~4?)c$u|it`{{ zj8ix=HZ({R?#m1hbR)XdVJXf=nTrnJBQ$p1;a~Ad0%3<@ zaWd~>TnN!{uYV9fC?J>K7dS5qvi=j4c02smprSy7Ez;SVSR0Gd3$69f`c}cPzdZZh zGKu-V@~a<4_v1!bPvwA|ci59=Y`x$=$l$!bZ7A$Xqi29J*N(LzRs+OO6MhSSOq#E* z-8*+Zd)8vTK$T!BdM)->F+UNp$DM;~NT&+P_7|v;a6@BZe{K9@@s>E$tz4Uz zp{3hgciRJwUMqzvSgy6a&=Ec(FzBIabC4xrO_~5@JtSu;KZYDj8q6RBKEv@I!IQAU zD2a1mhZd5|r8+fG?O*Y&=PW(n&+p`UWQmi@=W4z`rF=`jELgXWd!PVVt@J9V;Uv^; z6SAFFc&G+d;a!siN^cnY95ITXS_TYqh8%*}c)^B-Fh?Ew_}ve~n;9v;aE&;sP#@m5 z8=6>fj@a(E!l&;+Kf#L?-bV8i^OqY+!!PL`z!tPrl!l+#zJ3{2pR&%#CdNt?AmH=J zWZJVDZPzRUOEq6xifYl%mNPGSzwoe_qnsyjbq9Mb#8Emsc6DXJryC}AaO*2t5}SyY zBB{Fx{;10Iq#D&vU+H{9=G|2YQG)!^tl2BMNHOze+8*;KPZFJ}8UfhaMe6n0P}oT; z;@!g`BJgm?%C)iD{A${^Ed85RZI=(*Wh>Uu=LPRRP&L5YG6wJyMmo-hfnXgE^eM_G z*r#khe9)F{MK=CsR~!3}dFu)hYRMb*7%j-l)R~!Ag7;Ciq+1qcwD(^A7)TGXo)i16 zwm*y)Fcw)$MQPVY@h2N+ypvMD*9{T#?2F?cvM8*R8sE61$9!$IM9Y!go_$29rsdlS+{ce0W1jqnp};SUcG zGg2R%e=Yp-kGy_24f4lxFSY-oUB4I-J&bM9>ccFST_y>Y5vN=}&hF|fR48SYTWO@s z-c`st5&VjL^*#ROUB?^O1s{Cs7L@9;oyPb=Sus6^AWHB7D^7JKRf430rzU+kVSPn> zDbe}a!QQ%V2&j+_79W)6;u|KIM7OX(m;`;c>%!NgLlG9*f#hk(Uw`sDTDv}!A_pW~B9a>d2{^v+ zp@9cW9zW@)h&8dn)%T8xJuTy}Z?u^F7|fefwHWSL<&RCX8KrZy69esN*`Q%bAjQ02 zs6ZS*JW@Xp{D$xib8;Jx3++04(Zgjt@qDY=(6=-j_JyFsIjTT1x$z?aG7Ond5hr3a zCK9_OXn7s5P$J#a4VI_|JQ_cAoZUBEwnqxh{{5>X*=(9Bm7nFUr{ckVeXb9!OWH^d zrf^|;EJA!cTnPt!+inBnu>^75!$F>8&#?Z}Hz_CmD_-fp!hBu7RjTQdaXF9&5_Abx ziQvMl)_`>pS`HORT#30qH{`@L#BYVf!+AmBZRZE#aq`(z*VpQM#xDI~r>@_15^)w; z82cFfmb6Ldz!Do)p@T@S!th&__i)&dA3MGDzA$*s*@-QnW_j{Ej5V{5 zHFpBUO(O%O{~NhQ=#zYp4JFb7#lzoDU1?1i&b!L${84Jz0;G%LR27C#|8%GXfu*Ov0N3Vg2@sxH>3)a5p1y$HV3vz`A3 zm%chs{;#(u_UXDAZy@&ve(8yV7G2{7!fz;LS- z7%n;^_0}wku?^mjlK?7vXJ_ZSr0a>cv4OwAcEjvOt1;0s>TJgyspTXOGfoSKjrqn{ zFJ}sPwR$d15ezdZr-8-Qmel2)Zj1!0mbeO^<| z%PJz(6%G_p?wh_xgA$EK;W$(TX&W>D7|!LN-G}AFf#+i0Ax)@_g*P97?9ZHDp}(oksT9ojpO3R&hxsOq zeA%!>j}cx&DQ@m^pZ#?1fKkb^2oYtx4^&kev&qldfT;4?lus%4{^wG@o$ zD_3vXeb)@pdsgeXn!mSnp;!3V2;bYKZ*~v^HNw!|#3~{M$6zQ3to7pfsMl8W6}*6w zV!+L|;U{+}XTHfE@?;x#k^h6YyT`+&Iv&x(5NXMnP_! zh9^?bk^%o>vI+knACO(ruIAxblD1~-j}@8t>#AqumYo>%tR%9???`367?8Aq z+2gGZcPD3%V7sv-x2fxg9VYm6;raP{bO|7)((hEu%!S6CIG@)_cCSX=ak(6t-mIe0 za2vD)>eAOP1%Tg_WS$Tz8ilw-5=_y}ejUYdPGQvVxusY|!GqyXW)U0BBI|^XFgklD zVL}+?PK!Yc0JVc&JPn|eSLh0~Cp7Y%>%6E!q*U;Sp@4`uPeu!A{j%3BO_5?rH}y#e zmmq^G7MH{;9z4(%U}`r|yTo`~qTjABNZ^(1LYnhWIfy=w{QLw{U2%7CE6Ic@B@M){ z7^(BawKBA>P$6(;c5@;nOg=sm!|cyjeql7T*+JsH33t8`>zAb8^_r9<(gYxP7rHA* zssi)CpSIpTYG0gGm3Woih}RYUzC#p4Uf5k`tKDBL*!ZQ-#d5 z9a|xo%3GH#qdEGcA9dLU=NhI)5ER|!Y^U}D0&5RSTom`J34V85GoCqKHa*e6cL{Pb z)4!Tl5TJh(IuNCU;u5v38J9^P2~FMeX_33S^QZy6zVPSf=dQ5*r#F-FMjB+BCqDQ+ zXb)Yci}vE(WT2sn`{m94_yxZj4KAg{VML==Y<4L0?1y08j-jaSbDHP_RosDgipPiA z?76Oq+qnL}N)t7+U`&2;^Fz=;P2|F2^&%K%w<$Gw%ZqjZLm6OBXWJntz$K`crUR*_ zq+bhsf6b%S4Xi=82E7esXsu^2`VTAorty#nUkZF=tL-0TPlwn}=d3RB1sje^kdDrQ zLQYC+wh00qxd>MOt3y(1NcE?#pND=7-*Er&XI<|W?A7tH+A_jgV>)v*_(ICBjw;vh zBcnpHnU>?0<(0Xm)~6@QzshMXmNf-ebZZc@k-x*Hx69sqNRod}6M7y?w5?6Teh5s#)pwl%0wNVG*X6yd+%}?=9hf>m*Jm3RR5Pcf_;u&!zHWlY zNg8>cdRH6Tr$agCokSzuUEKU?WW3}4dmH@SklS;+vQ(aleq1Ab023E>8rEM8H6bB( zJIUr&mAGfDygiKE+0gTK1!sp8Brld53t0^J{2d}Y0iG0n0M;h~pZN#zwpPGH6VbA6 zmgo7Ssy(hMzIr3*`YhXbFWr=9H_2E9my!suDh3ZAPK6qBP?rHhCRu2Z;ElZA?S@UOUO3 zY(5<~(N_x#Pww5qeU7w>#HtS7{hc{^>C14`Jl9>os|KOvEMK#_GM`6aM|kg= zB6wjTraWgME%P5lc|dv@?rRH3ChTbt8OmOxPU;7i8exxCIF?e)KZAqzMb3wqgBkQ5 zdh}7|)=GnNU%UUSJ1rkuh=X}zbrx|FnnD?jRb|hOPVo5uS|Nky5nJ97fxq5T50tEm z^A9xdJdH9pecd?vIV>zOc0f8(`31aohqc#M&>h&so5yR6y8K95ah*g4zO%fd2yV5qsumF=dOL(m4cf#=f@VV%^&a3y%J`L`C|1N=ch;CTYgs;XCdF<<+B1Jo@m4R1 z*vz`btzTOv(09zbV>2W+$%mnZK#-g2-{1V+vE@pO`0L<;>MLgj{Yztgdy2Wk9w+IX zR1M!U*H40oD(+pelnfauhNP9I2ud5URzsMdt-hg5wk+*fJd9E?5&;!y3{>gZD~1P? zS?`FJ>PqegUy>=MHV33l-LbHmbTu?M1t(_b#m7jkdJEZRacE^+s;*5a>Hx*ZY}Gs8G-@QBV>UQRgvX7?%$o^yBmq2SsEV zZ?3gm2)0@*9Mb|Xh=5q3!I(Uy^!Y>cx4-vHNJH>A$cZ~fM@63twTuQ;ETe-{K$)X` z`0T}@ZDiX|ZLvU(;n5mt z#{ycnb4BO(+g9;%NKiq(JmS=F=^B%4n0j&0TnU6v<%Zm#@QxX`&ekQ#V|9q+S)0e? z<0Ac-$VY{1b0t!5vZgm@qlHh2O@jWjFWv-=oxpmeKZe+tubJHDG1#&dtE2gj1iXS@ zfjIOWjGZ|p^}Lx&{*-|=L@B&@F@31|Tx9To9QYS_VBWJY2Oo!kk5Tz+h|Zo*OkSJV zJD`YcAq1Nc3!8+MxTYv?`SEO^#+~{FWv%O)@lMkqY6*IEzfayGqn`}=2^VmMV|EX~a#|Aj9}a+*#>^^i26a}ePhB;9$ei@( zwNmo?a5uPoDWBKkI76CdqJ-&lA!7l2J)1-z2u1=*vfb8q*ZQVBas5$DHl%_Q|5Cu& zZnlHBVvB637@7DkUPpcB^;?%s>nUm2tBY5=z85}cAWP(xow~jJ4{|}eqHH@iXF>Yc zyFcErC$)KK6|lfIe@ zvbos(3xAGX1P<)sh%Shy2zJyp!j3%BPzH##1R^3d^T7!aFM#QBORFdZyfy^6)SYua z^xLUl=^puU@awPP)Bq&Po`F4mHkw6eJ?YQl+5uHiYjtyXEsRJPk940=&Ld=JC^g}Y z-fd}}F7p3S_~a$BsKqSPQbySBg{i%T3{fCs;|rJ)Bj_ymbkDB&wP6O3i2gSH!@inu zo6+vJr^}K=LT~xxSh?%7LnKFK#o?*uW-%o~ngG+D;=BP{@z7ZA4?|Yeed5I48Dc`; zvOe){g+v1$Bj{>g?lXL7UikyYPDkP%4c z?^WAQ7I#xD#bUp{Aezr&k^Wq9@7))N#4cWbVy``YUQ!&w6m3ws0SCD$MCZV#;J8vd zJg`T|YAxRY6|CO8%uf=kz`HL(q2cu|Gx=FI(`Qr6gI*dt#tRN=HUxN3z;1=0HvvlS zyYyxn7vr0zLvY;N!jBpXncrsX`H749q{Q1r92IhB*%8UuDlfN*NN>L|jjV{~Nh^+F z@&RrAf@;~4BlX8aZH+%5(}(2%y@=q&_+g{Fq=^%QFrEnl0tQ5d;CL6dgNMXyG4Hm? zmO=L}nc5BNc(=zr6BB??e!=T}_6|u_(FH;B|L^r(J?whMY-W%txj++ z`R5LO<89X1{a~yjp!ci&P3mVhy9W;#j+7)j8!{*CT3T>*Z;;vSYp&iwKR@-cl4@|5 zvuCYD-Zi@1OcP+skmRT<7vXrX4Pp4=JgkW$I$~m4K&Rco9MWSL?>$w(+jUmB&nY{| zckbbi+Riz%6hHqmnY*jYf#wo+HnHeOsF~nZC{-Y*2hk{-r0=l0cY1Ni@Fb9oALD8o zP3H=4>C1U6QJQ^qjL&mO?5B_~s#_R!znhN%w6cxzFZcVfBeW;*wNg5!O5`L@pvE2bZhZbGpO$Pel1Vme@+8ae>ZoA;$Zfr)6;f(<(;g5%eZB49Ze z-MxUcp&mmn9EpKA*4Zl@Z?)NyQtbZK>RFE;2WnCCxi8ZUYcvw&9ObXEpYIoZ_GtmK zqoKQI7QExP*d6v28XCV;zisqgPdv-r?s4&C6_QRONSBxif5K)|iT|(`|-@i9rO+JhJ-Qj$`)IxvIQL%?PTGEaM`|Ka&mQfWb zhv{G@wN1D+Of`ZWw{T?#V&+ekZ{8CW&whUPY&*R7^Mu zI^8*QYw4Y+f$KZXNy|1ZN15G+5aa$7+E?PJa~lcTr_8;AusIeE%k``}yfU=R!4Zk^%is;qVeEH6GE7PsVkFifC;&3m2L?Qyw`PCfl-}qlj zE?mI%RN5|tfjtxqea_EIiFmL(U*qbg+;6)J?2cC=BLdF4yB0PvOYEQ0>~=c4bRbnT z`Tde1?Pf+&tziZ-GWf*QqPw+0h2n{H5uc|8CG4j>D9+=!hJO&MAovJFxD+>^=&~<# z8LB?$gH?T}lSoTX)0Yn|#Pap<9lYF?xU;Bo`OLhknEdz>Q=0c67=3JOaojjn?~!H^m{cv6lpKzL@!; zwY$}%K01H;Q{1U4eYL=dl7Z3{z+u1(mgV{7^`zO&$oqAkMrWJ4PcrnWb`1=R<*v}c zRO9_0V*?!KEuU$g*0&B_3lsr%`n9|+fT!crUynAK# zmKr}`jNpK!=&@`s0+2o4T_L~{A15SsID%93q1cecJT7IEud*{uIR52SUD4!^XPTN) zh;NVEpJ$YkGF2J=1Gk-_Y}=T+^uZO;a8_GVM}Zb#RdOXaC%#FU4PjCh{+RCWoc;CS z=#R9k(Bc#6&uSv6GPl0jUt*opElu{RVnfgQAs->v`As+a>mzJ*Ro78f2Id;d^Y5&( zv=dMKU_7OnIuQKEr$*IQ;x`GpyG03W**jZ`9$Fx`6Vk;1Xg`K`_W(VIA3@2H9~o+O zB%4^}C9GWh&h_U0SkQ^#E2Gg2(z$!+3z(kUhP+gH5-usE=>Opc%p*C>1JC*T!3e^% z-Ip2MJ?4!2djB_PrRzRYu|~Ebiu}uxuf4nf#tWG{Hrm6r48S2+1zG`G6g^LXgUiPg zH@!lL&(dtb!*>l-qg*?T_##BT8?8cpa$V6g(Cnn*5C0WE?UXa< z(AP_bDE?Wf>kRQA@yPNh%G<*4Yw?rP=2wh-*Uja+pBtZskPGL%-|#PFICES-+*jBL z1kNk(MG?~H5qQ?P5L|+j+oQK6Tf$sY6$*+MoW9Z9Bo~w2PVzp0Zni<*7 zbyN<5b%`x*Aq;Qcmj~jNa9R~wDjZan=lce_UnXhhm9)g`TL+1(kkuP`-hQxr=G&jF z=@70L|9zU__|b#1G+c?~_mmhTaxEW#r67G*8iYae#DOpsF?B$dB?JnX-?^o)P-Yxj zZNKJ{D>+f-YT~mY`cbeyT4Yq-amhk#*12Z_kT2^~PVo{V?X4q-+2P*qz|B&_1_I(F zNOozR)Ywz~ddu(W#&{@~%>D93FV#;rEDu5R+r;_;(WqOT^VP^79WiQRVO)H#y&Yy8 zCnUulswHiyce;&-MI{SB)dHVK3xB@PkUiuO%@P%dI*Z2wN_inC9@FS8b^T!t#{Od%2jG}NRKti9cPclde zso!1*%`Wq6g=d8_T1()+>^(3uuSP!x^9ydZA$x@bRF9v=4k^cIITu`L?QCoLFr_pY z8yw+Z?7`Vu)SB-2G|peDVq+k&8U35!Wu+9=f=BE{6j5)`+EKE1V9997@O#KGeCTo) zmb(eelA5ni_vaSd^(Ohn2b1&6pW@R0w^K1jjm0&ctgZ$-R)0l5;c#H>l|{ zTyMg%ZW&5-Hhr8{@6`Rf(SOjAOPD{K$@1Im#uaQ((jTU)VE!UeS*EGTn@AY-0vs?7 zMf%fu7nPrl;YA0C)-!){hI!7_IFCw>QE*xkUJt|em=kPC$8-+xZnm$&1`q;(CCC*( ztA7MAVh@47Ys7@uz|d;7-ZKP#hH{VI67QS5e1g`ym6_c)R++YAOj3Ak175lv7`Jqo zynxUhP4Gkg*8GIczCGvw`t)>lQ#v#}vzN=^Dy~IlAOflHFJu2!fA994n{Ooiefcxq zpamGZH>Gk}xD6t|v?uD5OKy*=U4onmdNY|ZvM9J$#a?zFT2XZF* z&QmcV$FQYOc{&FiK;?hCLN0{n3)^*0h9Xr%KQ?qWzJ6OU>!{AyT9x&T?tcfUeWz*obYTK|lX|Q#r3Q{~kbDG3iXab@!zWC(hu*9kh>|gam zfqn9>lf;j@6XX0~!jmdUAXB zzG+MHKUFFYF;DSWAljCx z;yR8ys%CySw&+vHYv83WMY+;CnxJ4JckvYNneFF{2A>kMGJCaTN>xVz8j>(y*cY`7z~=9B}282&--e2G!}lJ@k0{{cJ|OEeYN zTEq6u(`D(ybd_eHZZ&6g33wEdt<2+FW46XA&3wpqs#iPBSaXkWGYDWBkPw=t@)AVH zq}`@!k5Zj&U-X6Ei3}hg3r%uY$OJG+LyZ|~ofWHNs?ERQ?Ja%R{59xuooE_$LRPIP zt;p4BKyz{C%CRj~)h%i<0tPCfLE5F*gRb+$On{SINHohK3HKf+Nn%}ErV6utyk56| zc+Tdtllk?}ZH8W*S{G6Rl~;l637$tPV7563Z?Nr84&?H((4P;X__@)0S@oY5jNf&y z)b#$=458E!a033ppy|&9FZB!wS%bq{hdC3MTW*ILVM;wm{DXuiErul}*0c?FCTjaW zy~y`yt}HSfb%DEd8XmMB$pwj43!1AIR~m&rwtj5se|)e6GBrSCKSm1u6;U6%w6p@8 zTN_y5>ar!{;*#$@K#IE(9Qpep>kWhN4gA+XzKx71(BUo0RCRnA&yg|B`)zKkb{FLB zvg)FofGgQ;hyeqB8{l=Z`BN>D;8bY%T&dN&S9iO2y`8*g{)|PH6{BZXJ~&eaGX-J? zg(UbskQ}$;RKg3EeBHXeyDv{mhw_OXUCvLZXpCOiR2x+v!ha-meK$b=ra#gx%DVHq zwDqK|3@)=28;mHK*qp_^htBIQs!=#b8>LUxgg{8nUY6&ZGNRSIivfA4Nq0Cdf9rl%zEFRY_L|}U70vAcN2zW z>N%)2b*A4F;oiiSM1XTIYswYCfviWv>N}Fu2Ip|Mh$~O;pC+%@oNOo%(v~_AJvk~Z z z)s^3iz0P6#oMunY`(BWJG*vTJU)_eMzRtEFm_I1K%^WHTD~|ZnN+{rVUQwR}Ad!<~ zn5IqeWHh%0+)eMsaqWcdd@}hR0OIn@NMMK5q%ThLb^!E;MOZ-u%?t%MU zjrTZ(GMZ|Zi2PLpeL>7ru+W0e^Pog~k7{q`o=;>>w8K z6gTx^g)Met~!PgG|qD6n9!Vh;7@1}YT0(5nmBJR({j|#cJ z#0YG>L5G6tw`5|+I3f&RKprGt(u+HB8?DX+Dh`q$#%9na(fY`wKkok}UIHR%g%xZh z0#8w`0Y{RmE#o~bccWG3U4MCXQAPVjcb8sV&Y9NKf^!^cg3q7i{CW97<*;Q}rxvu^ zXkMZ6>-8c|qqHa{u03ljyx$OBMtGVnxEf|l%fToti1E_hB<&y1w4Y!ivKQR7C2vK; zuPz=)*f5Lsh}44Z%j#1c(W%;s&e23M2nT;2(B(=`phm7L8Y^mf#CNOCd$M24J%u;F z8{QJ}u+r@FHpgKa?-Lu{PJq*_t(Rj|cESrcVNS!{^*I(hbi8ypCt8f^WtPQA%#%P z2_>~$dmV!TvZ59Rei*Gx&qhjt?26P zcWH;F8&Ya2X&?gS7<$rq!_0%_d?5_o^1szJ2w6*ldbjDok&6M|HvJm=y@!V-Mk^Xm>jy;H5YWkxa#5KEU#qn`A0#x($7<;sj9))TnsDUo%YQyMCtm`nyW)dB)*9>}aU8437)b?hS@( zOOvf^<`u#plD}zRK20*cn z%pn3)baEIffVgEP&U=LewlI@?48xW+<`|cec7i&OJCi@Wq!Y-*29zSs&a4h z#Crx6QA@j-W1+)L^DE#~^4q}`+bx6+h@xKUkR>%Ata8^9Y@e(MC{h^+w(NdkZ5bar zh_uoQKLV53Ez`K=5aobxsWYCcYOQZxc9UyQ? z;CCl`P}ho=dUI6t9~53cmtcIH3woVK8BgB6$vmu!rh-uhIxzhKz6(_g#N>fIpE)-oz&!UWrFb_n1BwF=! zoh3s1BDs_YmQ~wGWE{&{X!S(tqTkpfD?Ug|^R?6GV+0too{Y%mLUxQ-`ONR#ebiF7 zXTq>_cvL%b0LYu~!70{g&<4%HHX3)b5t_Cq8)InbzjG^NWOYww8F-}G6)$A*!|_&pY1s6M>w4_p2IIV;j!IUgQXBe@KIaOwzJ`FFjh0;prXp#k6pgXBFUV)it|T zFt=M@a@3q*&>jAu%YohX*DFz55pZT3HvfM~Wkn*aUH%|st}rnGPw(PQUM&Mf2R>1E zM=z1m`_o0axPSdUw2HNBX{>WsNKPz$dDomH;{uEEb(S-lX*O)E^OtXY`F{A#c^yX; z^d~>3m4Xg0XgDpO3NJx_nOi;=m`p#X%TgH>wpuO3;QP*9`*Zfs=FJndigEEgQFGZh ze>eB6|3SF?Dbn;w1Va;-)|v@utzx^TEWU*nCWYo7#2&S~@9FC0#hE_04upJG zoYs;b4g3Rv8;`Kk|A52i#s{|Il`nz0)fK*Q3Vwi|gI5;$9?pqqT|kDmM`&^hX|y5l?8vV37E);{sWP>B}Od^gZ^lcS|S{ zl7Q|B;nP~N>UOyqs>}VqbjR)wL^Rk#;DZmXqZo?j>9ZIH%xW2oLtm#Bb)^+t5+gh~ z`?!{zzd}7j))YRi_`Nfwen4=E{2@9~CPrE;y6~1b+o~{xQu)-4VM$I}np7y>srLq4 zr+YD~{Cn1t^QRS}>&l{*J1rh}19mFBwrArm@T6jhmVv(=KOb=0eO@|3A~f;kOZo(o zYv$|8s@4|0C#P!C>Fm~&yJdTSC=jYP@LJ}pRrPl6y#@bIPn<`m_eNrN)3b$%xCWG1!MgU& zYq)P@C6*m1>zmvm`E#!33uNrJJiy86qFQb~<3egTkIBNI61bM;{ zAQ30*fP^(grnRu1z-P{xF3Y1q_-`S|U=!bzyC$LwyeG?Uzn!kX+0kOmgCAzyt`r*@ z5N+YTd|P$>ArzcVf6p3#u<7XN+kfPo$_-Fk{9PihYsWA$(J6aAc-j#z5S~5NvwUh= zBg*(1b9|1!;*ZY>SS9T*?JcYCti?Yp=W^6{o&;v;Hgq{iVro$MK6vfCEU+r&!Rx zbji^^qV=g_0O@^rFt}DC*DLW{BBscX$u_|~W~*kAVw4liD^vcVayXJs%&6Y4$eYM`EUn0+p8Wp2l34f6lg5X)$WwhWq4-zLYc@ z`;cv1qj^${4>|xN2H^G_NC$KdVWP4#&_Z%1e3(@qgL}0Rf8*k-^)<@oP0pk(EF&7N zf(4B2=RbCvE*M_TobQIvl%&r+ubp^9LcGKhAK32sQ$Qwhk3kcLF}wwj85H$b=AxR; z#mDHwqwvwbI@=Pze?beLK4CJhfDWQ6c_-85-Hcr{P}1qXusOT$M8j}bg5BTh9B ze}}C{3w~JH{p!y8D&L~Q?I@Suh1^{>L(B2T{k&E!KgPqi5U;^*B#0FZl}4BzgD!S( zBrXhQPLK2}Ujv&KtywUf%i#!{jx?sT9TS^G7Pr#S@y7ixyaq}#w zD{ZQ>9|{VPN_8^DsL5u_Ek1u!0hWsk`AZBIJJ~yCHc`wdQBp%cripcM1yC?7Ax_|1 z`kxt!QpJg7vFdwaUemVm_e1sK+dt-RYwztX5Y#9m%pU`6rd?jGW->vd*26`p@~)V$ zVZlEJ;1_?vmYhYBr3&o~VCqNdUv#UyM8K*s4y>wN5v;y<>8@@Iv&a%-zqh7Rl6MEmC-3J{t`e{Rg?9&gu^*I$#9o^H^R(rk-E|h9&*!U-Ovz zq1NYR{V}KSJ!*SC9k5n+3%6x0{@$QYWnCykFic2r3EXq@Um9eF`@^4L8T8K(dw&9% zbmb&3?k=ot%ap7W@%lByU|wLDq`PZ9;oIe*phHne`8k|5{UTK(j}X>l9oF;LtFsa~ z2MW(J2Pd4qtP36I>eWBba({lYDm8}HBt?P?)p-X@Du+NH2{Qoh!$EdwQST9t(>kEU zAK{-a~ymOzXGmhv{yFhWW za`E~#+S|q268{j_ktAPTd-}4&H#H+(=^0^guN3KEFBrbZL=;}z05xVQ3qyTSagS{; z*-1r1MwA{gKfkoL{5zT^rB$y{b)16?>GH^08vJTr_a^%)A3U&e&$LQLS}&umwp!Sb zyUu;SDN@hsX0R^XMyt!qJE7u&c?UdDqlIX=H0G8qpD&D`Ziwko4-9Ev}=Oxji)7H6JspCr52vOA{EmFBNx6Ci8(_re)dFiDh_tc_B0jzPhxb44&g{K+Yw5 zpr*V?f0fG%U;rS@C?xpBwM8y~wHzeH6vtZKb^9~vTn37JoG=}bvE<~PS{1~bOk=chH5ST^u@KB z*Q|@T=haYG(INny2~aBjJD_GGoTJ^Fa_GcJK*^-FPR&IwveVO39L$C0)#0w2Y|XYe z1-aCoE}rQMc8olc)n*wV527yH`P(X=W`K412;($M%iN=2Loiw%bJ`^vO&aeS`2*Zt zT-{C6PrtA3{;bgSx*XOO4L1gw!DFC8GaWTh#&(>=;*05WBkI&aU1PFfYDFyvOJjAo zh4NCCzWnI!*Jo^w4fe`{_nAMmKRa4k>H!@rAs|^$fhO(Pfr>?ob|;?98tTSj&FH)O@Cu_c@8h!z4GBAoi#uX*zgCmZ z?XXGWlWvK?XqAU=k^YGssge2Idj9xSM#vKn?`lgx3f2nd6RtEa$*Hu0GP$dIm+I0l zJY|`nmY=+KjZGoJIgw3h6?w#(D&&2YnFtzV5xnV z|8XP0CwNNjs#ovqvRWs_#bu5;`gl0%z-lFGh2*qC-~DP%tc9Ti!Ex69&;ERpnM!1c(l=!Mbjmqw zu?GgCydmYfA(; zE@eSLk1Eg(V0-!eb5VrU{dTUKt_Z65{0i5r*?976zNsJeZyXTfP%P(s%`_OX{bs|{ zq#-k1d&HW$qEO(k%_J*s4HKpaj9=Xd!1X;3$Rmo&F_FFyWw(hW3_!t9*u@X$@}k|r zX5RG^En800J$^RX*L=L@AX$0mV+A{FoOaHlYhNxY1`*l_qc_up+yE1*H1~stzR1I4 znVZLcW?h+m{=3R~w)o)d=|Jl$xvbl$rVr2nlPoMJ-hs6260e}yRsqB4(A$CicL^7b ztDEW@o?}BnFP8wc!7-m4Y@dGkrD$`sS(*3yI)d&aP4z1wc2cy_#`&+VuRUQ5V2dMN z&Yv#qzpG4bBZRgGa;MPT3Vzm&0@{>fS3n&8&O+|byN25i9M!79 zJY9$5#Qk{N(W0?sg>Pdmj>LNReTF}6RBQS;1_mZI9W#UcoF9i_fnHfaF%kGwvC7nD ziw|B&Q^!L6E$VAtis&{*Z^wRgzxBj)`oUKQ#)dXXwvEP*!l!PzksT=C#xSx_USEd@ za2dt)EI$|ryf>0`3tm7!{&jcy1ogs?vzphfhV-Bz+#>to#Zh%}WEZ(VOH(t;QupPp z8u%T>lX+7NXS zDM{b=8`^DB?Ppu{?{JgwpAK14**Z$LB;3i5z^gF)%u^@Vy(xXQ3F>h#PNCxV*8>*r zvh`nazNyqE%K2FHOaKlzKee)PNZ(XQ4siMsuq+D}<)PMd6px0nhYL6SmQ$u&uI}uL zSZuqs@4ay@%0Z}_j*`q7vi;*pY|h=uD9DHAu%V6epm8fV*rs)zw>i+$y

Wi2kB9vvK-Kn(WvvTD}Y_TktzT ziFUB2mCX%?crd!Qe|C}H>*4ehGh%#P z)Z}^?jbCD39imTltPAwiL`GgQpBK2XX^&|~)vI*k{%Hf>W*><4w5s}0MBu$DEWQ}& zn5(4Lf^;s+UMInjY(AGkPu7Jg?;tUfY3lJ6Nv3Byl@rqiDf9VCJOVGO*j3Z5E&iql zI_^9aH`sHGru&LoRBjtxz2`3AeS_`2lavCyssKKKoA#t&rdk;=KaeEr%A=vHy_Q&k z(2%8&qskoj$Goc5LtFyOdN`EQ!Me0eNeiO=j z#&?2qu}DRDzp`d8Yfu}XS^-0Y!V#9Y0AnK2{|Inq2bNUeKJLoR5EGHAa@?m$<`th+ z#CvZR363AWyldZI@?Llb-zm(SHV~i&U?!1OW;A}jj$@mD^;e6@8HB^$M43!Hjsx<& zJaziOH?e+k#8@M@=+g&bhf&zR!z5N10J9bVb z2$;Q2iJj)G0;~c{t=WZ8w((&m*eG7N9oI7=4)dY)NH&ij;*NQr`wy19y-$s>wmE`A zsbu;F28nO+^;UfyY58Jx@}^Szj=gC_Mpv+b7&J$AqQpS)AgG)K@{KXb!6lWbbGSGa zJzwRtBIi(#y9CR}V<+=EXW_N;rXJn{w(COAqL*GMVd`v{ow9xzt6Qj_Ep;+u$c)-J zqHpy6Ve;|e^Pdt%dnadmhq$HU7Eh&U_Xhqdi)51qHqoyyl$z0kA3);qR7TOO9`(P_ zJfy!H$kTrEiW}$DYGTBNsvBiLUi~|%)#F;8Iuk?ZV4<3BwCW$e)fk?swt`-H6!_4O zPg!Ra4VOS=L!*6LV{=MFg`Q{ols*)so)CBLp8J~rd}?!|;(2yuEq~SU=*I<`%2>1I zy)<7<&78+_ohEUs0+~$QMzG~uVIypWz8;&$b|9O(g8{&;@IzNN0R+kx<|H&gb8pH| zP*3zTSnEELll6*{>@qntD%X9bTIoZE^3NarR~{7bJE{Qj79A8&0RQ zDA#L-oBsR{On=9DJ6xYVq9ZH>?8B*UTr7^c%@;AN*e$_-SgdDBMbNI+PcSc@B=seJ z*b2gO)6P5Aq;CB!&U$(O!`P{>cTP_JdQ?z&=S;wtgFg=w%@1T+kJk}?UEtAZPexA~ zKH#rAYq($5)b{vcDzCfCJZ?QK=&&clzwTegNqFE!QhTNWk)MHxB3<%-x~KqnQD8J# zvRQJo)D651zi*yI^aj=V^JmQ~lNZ)*B+TzIHZealePkrFII+{cdyU=b*XtDTQw==7 z2Ky-m+R>o7&356FDu|(n=|?LPe@uP><5?>`g33f^3kp()V~?V!AX9Z*s$1 zx%Ih%u45Oyu}?9c%DRWMl@Mxqcy)LQ)%gu2;5t|Vr8*c$kb-k%vqLZ^`L4N{!~CnA z&ztY<9Fa;Ylf3OM&L16YpoSR@$v@VtavM1R@EL69DH74_#qfIyCFnZlgT!41m`R6NI+FyI6j(KlRaGa zE5xica&QyR_pU<0Kh(lqW#z}8a<%CZs1GgqW?k8SV9E zLNEQS@R>!cQ6f}$e{l93ZHHVL71S2DDAV)>*;$}08EV}T&IMxCx4OO4AXIq%N+ z(%CJS5|KygV+i=eRL;#yd6&Xs_gQzXujujlV}%4YFTBnRE{pm8uDSGP^`#PM|F8wQ znv&sOHA8FltPoRnd<8&1F`Xs9K;nbNy;iIgSvleGEp!0v;(<^OqHH{<)vn&{5jyc{ z+;-5@yUe=jWA=TxjSL;9T)K0`V;7Z=0MMd5ILgz*%vl2V7+QN$!|EhwM%hn}!*b70 zSW%&~WR>RknF~<_F;?V4iKBdQRw@~{5*Q=i)fk7`y<9Yp%`x~z)BH2Ng!Qz64h?z^;P8@I8m_IFecU?Dw+o}8?I!9pU* z^rg?oCQLPP^K$ERi6be!6mz1%_b%+@2~>rU@Md3QB@1{HvoRg52=M*?)fENfDgelQmH1U&7y`h znJbXy*!%PNq^bn4OK+(NfaqW!0#ZV3P56<)u{NB%8$(wFKA*)nVFImExPjuX)jlu^ zB@gnZ(a28px2Yy#n^1d~97`H+-4{|1O#2Kq-bo$b9DG81b&-Pm4nL zy6ECDu*db}i7Jqs8BGvQ34^|ns7saI za+u?^Q=f8hE@Z8zHAkCun71VxulrpeXLb@~e|NR_igFP-(h7zt4%O;TgQhw~+^Yrh zic8*=dkYX;HjY>NhTM}jT#}7#_bR%!loqLA#FplAsmDey;9?2Kj)Lx&pw_x{nGR>$ z(!V?jFk1Lw2^ODRAE z6@CVJw+tLB_Ji##HC~f>`^!)JzKFghf2}^8M5Wvr$KJDnl=|Y3KE4QmiqV1PHV_Oo z+SfOrcDr2qYw(7sbkVWp$v1)6e#<8nZ=-2Zr+>T&HenZjWD6e^uqw>+K5MjT{#_fP zTA7Cge7$wnAcq$uIquK|NJN5DcO_!uZ^Sr8Y^+&v$#UV2+{Xz=k93M_XpU}Z=CWzm zvmu^NMFDLX2!;}5w>_x3AdFUu^&|fdF zyOx@@dLn_!K|eA_(AC9Go3*#{ppKv~kuZ(;Ly1Xo3|ttpry4gljjvtFqlC_GXt<`G z8k;vC<}=_(o<6PiXOFzsyLk362LREzVpFjk25gv-ML81u$$l*G=($r+xSdOPtVhT10@h4PZrW%}#_02hl!8`m$y+0AphlVG zoR$imLi~$U-PA1k5B651RT8|>1~mV};_Ki-1FcFb>8OmF)T*~%Z_2F8B>EqNnmVV< zqj2_L0MD`D&W4Una zA||PiX{X-%-nboP{`=ZDUyi=Jg@c@WllDuf>o<$IIp|z-WI!hdhh!@wTX{fm1|+MX z7@)a!iqke0{*)$Ix~}=4aKqtiGRt;12lEH}RkfA)J?+4HpR5AzUHXv@uy$3{fvz1f z-?Q^Xny2`>>FVSw1wpC28)=G4%^BNAp3TKEG`}AgQ=@$!+K%Pq!L+e6@lC0ZgsAo? z-?wS#fuViD_RRhXr2A{2ABrOh4&d(+4wv;d{F4%Y8m|+c6)xLB$?u;6QJszsjNUjJ z?B~o>D2DW^vy}lVCANt`YnN(8&=P{AD<$Q#rf>hsc<8S*b1`2!eiWWHyz1dS7stE{ zk`G$t+=r0lu$g;o8hhZxQjO3Bvd$A)PyD1{cmWEXeqe8_ z&_s=uETFNEt*jj=YRVZh9~+`dTi2~OnfzH%XbDJHHALj%NQS6XETS1K=gcXjomf%^ zUR~mBW~~xxOXAcd+U>@>b)HMVtpk-l1%C-jP7yvYvt8nwwAu>dQPC>c4tW(UJ|`cp z=T{!u)t!~63{{X(4sNKPu;lc!Gg5_@esuXaI=(c%Hm|GWIDd;;AXj+`*cEqZMwMuZ z!+3T7m^>ZGmmJcLQTq|ZWqp|Xq#0i3Jsb3$cX+kjEaEgH*K~%2>$ygSX?s@R{rZnq zFx}n8%9kKvTM=ll%KxpsFaKGQio{_^X}J5<7x=Uw!7Os$h8cZ{Je5E{3HeYjj@|`H zb6mzQCg~1LkkHq1Z2 zZ}pw-p6;}eKF}ZmfL0WxE`#C(=FA5~5>aFoeEs%`g3!WFJKU`M9LS}5JGJY#WAD28 z^haUn>3IRwp@O(sV^*Qwqq}V&`qu|Iko|$B7hF|S4N@7imrB~UMsMfiizl4W%!{8&PLd@L0{h)Zr62LYH146iN@sR&uR%26@ z50Nf^aCG)u@->?N|5hA6DCYktjy_PF{C|qG(g+l1#EN2`>o@i=aqp{L7*YCWq(K09 zj851U?mSK+-NU7io;84a`wh2=%E%`frp~I_9fGL@P~=Ws*rzx)q!*4m^p)|%0Y|F5 z0WTCsQG8$W;%iE}>r4XzUCLFn5^=_N8f3{~XG=hd>~<^|YAD&gq7(g}AmKL6>WHlVCPvTul7$?#mVVe@gB zzv=ApE2^QPHi}RW>lfucSIy%0rpi!km?j$B2%ewDl7V?L%H5XgYmV3sJobs1YxW z(%8td1lcDv%fgN0A_mz@a1QrP>)))5Pbt}TPe&*9M@xoJFh|ZYWu9#AqaVqnUf7h- z0!kr{)+UE^!4pwj&7P#G&a88%_T3c#NMPpe(5$pRpY%DmVcs73AoICz^U}ip&tsJg zC3yZtQBITAXaaBw0%XDjlyF4ocK^lpA(+<}6@ECRQaF zux+TWCEaNg1xM2PkhFgS)>8#iM0M;Yp;&9S-ROd}OQ6b&T8a26at*H3ni0WTDBj+h zF3o(Zn&))fd06cC!UrEZd?|+V+5j`+f2`WRR7hLX>K+18Ox==L6p6psNM?kdrsN(k zQGYfw5O7#n^2b*%t+2_|RWzaem|A3*)0P2i7B2EbXZx<|~f6vn(A=^PT zn4<@Pneh=$gb;M8h`8DIxF9#vnPpea^aYb%-bqs@)I#g>?6O!SyifxtfEmq3kesnI z!!}UWJCKsE$E2J_@P3#N8P_U)nZCDc62EJ*xnRP_cH!4|AE~+cXK^0S^51S54hZ}* z>w!ZGZSfD`%nO@gTJ5{DsGhtNV->~O|GX_-0>`6wuitP8<%MgcNcOQ0H{kiY9%{DX z6X}v*2TGk*GbgL`zJl|6BY4n`mA+xEh3_^g^~bZre7=>=3te{&e+b@2CjAg&yN{ETq>6hI!Jem@h62EEWHy={OoRI-+(`4|n=RPml=A**b z=n3lie;tdbHs6`+`;z17+&tvAM4+t5StKPK!EJC9&b}PmjHrzN@e$Ds@nfH|ssrr96hP@Fz73e7zWnI13lnDu4{7qAq@3 z(Dvwp-y;zn)QcPA{CCAvJKEXq9KGqK6e(})kreb1MHK8wL`1v8TF4T`C87CFwV&m< zO5$ro2mNhdRI@PZT51=){39i;%zvv2#%Bl|Pty5ZW1>mwwhh%E+xcb`Be?M(|B38& zNzdQfgC{9Abv~_5&y|$kEtdR#RWHyhC_K;y_=`yATfzxiPXQ*aO9NDwW<(aGAICkB zn#4yXy1ERtd9o_r&7j5#@%(!5%G&w?2E_fMMbPeK|1E#2IcBVBxEn-_YD(hwZ4z5$ zQc)ux3nAwU_nR7xZGmFzEY)WX<{He~rQ(^BI1hQK!E_vo$3QLg%aUi8e-x7SKbTjt z$Z!wtn7VVbH-VZO?=l>}y}zKUQZak~PR^nh>WtjpZNK*?ECil%2S;L_|6i6~r3OfE z^nQZ9je1;fAd6+C8IiX~0$K7B@i=Zb->}%_Mc1a%lm*cN<4-Z_#UA#TkAIVsL`Y&r z5p}~IjY#|l-tywMMyM=&G z5+!8Gtd&F?L9ij(;RnskKQ}csxA;X*?hpjx98;#9JFDUs zc@a&FuhDP*E#w)!iy*~1x*E*^Es<>S0Eu;1G_-B$Ve&huW8WX<^Vxeg>$R}+jpzeE zTImeFx$SZD?eTlIt}8t-%r};SK?cK{VXF6%&v0>U)?|a& zptnB3A4)hs&(6PX!pFqH%DEk19P|D7Nbgg3^nG2<{I8Kn;kLWLfD3)qfh7guy12|b z7Wr&MHc4pR0RO_)a)}qp#?BjMYujhb75wZ6`*r404xE-&4%=vAz;Zy4jjN!a0yP@zwDx?JR$%*~V5x`yh z0l<;10_duts@twtf85=F6@?}Eoin*?bFS-p?#c!1$*a!Hae|WDXBZ3-Wp_j5n3qSz z%to7aDME+UD~F#kVkYl5aGZ2`ngUrPvqq)-XR9iymXa>{vWu~g5|Oj9aTX1F)|(HC-# zdlk1+ycuI}{V~Pb1B0Vzk$oH^jqT8F)$>*ZEImkg#{3;ae!QNHUc94;TK<~(jI)SjoVnW zg~$bFH8X=(EbW$A$xef@lusGj)Va%yHjos8CV=a}ksSObLVL%qK|%hcIZ7m{?iE-5 zlWiBVLF^6Yox*P~f84noM<5tZ{z~eeV&bGur;eE1Gw;DX0^(vlk^_6dd*tY!l3zPi zvZUm(^gE7*Ep~+hJ7<-veeNFknF^$Q+dh{$@kV!BZ{RfFE&H;5cHnS7o*!h$NX6X7SN}EuqU4WGl6c9KACUHvlqbty<6)MY&t9+YQZO4 zcU^3(3wOQ1wEemLWD1xBuJp56e}Z?UBN+%-(t{Ro_T$?Ym(LjAldQ6&5tpSGAzbBu zbR#Twq04GT?CF(_*;7aB^-5*6i2~FdZo|sHd`O3Y5~Ba0;5cY-(%KTDw^G7q5k|f~ zr4e2s*zQs>^djwuppS8w675x-zuJdO8^z97d`?0;{>XlSP4esz%Yvp(| zI<8F)i(lu!@ne2qKj-C=!k5-MaY-0m^6kom(?R8>`znk+*-dUOpIzo-5`S)gKile= zbT|2R>Aa}7y&NNMyNd{)ZL@~pRDI)euQv95Tx>l?k95gEvY9kgg$*e`4e+wDcbu~b zi~Q#9*y{LcxB9!NrmOiA8sfuY00~Tx#^2RtL1OEqv`Rv%)QfXIS?8?M7uNY`!eo}N zt^iK2;CNTfrwRR$^Ic}q7s~hA;I|Z=S?P z8?R(9VjSu@xnSrh0!olRK?hD~jGwv_$vu%&hGbOf@=B-E$DG=*yvA=|R0Sv)POdYW zpV-q%Q}naydC=}b328r@O_E7`oqqSabOvB|n`}d3@!9!x3nd*$NY70W%xdNPXc7;aYPG5w7bpxn9qy>YTAFpp6 z>=$HAjaH*Q3FTNu$kKlDBYu1bAVE7TuO?cLt&Jl>hol}oejT7Um=F-9g7K|~~~!y)Z2AR$?xuU%!N~iZI zKdj8Wf7@IvN#vgtjpAMmmdk;YFRsQN6hepIe|G0b7|#rG$?N?eA!gKJT1LYsBehi2 z%V0V}gy&j5FyEMza}xV6WTR(F)Q)MdTjX|WuNyMa8inwVEox4F%GZgj?u0VGh2b0K zC24766)?ZW^{S1@cZ9jp<@aj>Dfn)4|7KPNUoOX*nGRFk@R)v$s&EhZlM3c%+s^UQ zo|Slm8iq)n<^5t|DQWv*&a97iDsbc7KXl5etx>aN#7bv zKHEEEhr?j&vXR$%YU7OWo7~*M9H0L{_DrHR3{)h#Htc%fL`#L%^L(uO{O^viaVdT2 zkeOl$*pPqBySP|8=kEOjWl}qSukYH;#s!BihuIyk-et-_0cT62_I@!Fe*$~PGvlz3 zV(mGgdWbx1ZDj&Ff*eTakV15dBZ;`gR!->TFn*yek^DAS-(c!}t`A{k$G2Y^p$@V? z|ASrD-wOwP*qdbd5|+jv-h*U0P)RnU#6pj5fKygF?_9DE;cJHmz|m0p8Flxg@@Yqp z?4iW(&!j3}XfiUsKh=z+v+EK7r&%YK=zgi16Dukj(wi!71%yAC&tAU$)- zluJl4{&SMJ+Go|JH-^1qL@ES%>h^6MKDesj%!qfi6N#NJyELV5EfQZTU7bxawN)aQ z0($P3ax9+er7ZBSeUtWNXopc9{s;213C(}_Ve3H2xa04~FIw>r5nSI05gy%IO?s3Z zU%+cvft~B3OhF*d#3FQ`)ttx`}T zF_kCOpNj1I>oWij_M!`_zeE#OS1J09HI`TLp$o+vzX4Vtyt-8v+hK(fr*T2`<&f~h z{3mp9G92f(y{l(@8$Wym<%=Vw7=e1N^gr7~`Q!Wcgfo|7F@7&3Co1{Cf^6N&0eMg^ zk=ANJWsV+&7j^d>M0a5FQUvVNwz_L! z`P*IOwa&$J5D~dGlqWX1efQxoSEzAkomo#lkKbU?O%C#-K;MA>mEorA_4|J^+|auK z6`TGIkK1}hJ%%X?vj!GQoJuQT1Fb?M= zIFw)!Q=~pR`z5S53R3vpg=2zlZuUut{E#9Yug4NlY*2pJZxNX{exVuDubfNjnv!pI zq&Nqk<4w*IK4q;RPiLo+GrIo=kcZEV$d;@pT0ZY(AV;D+o&qo=jO7--Dz8L#Z1JVq zH+9%5&};a%*~E54EsiEwzLZx%8lunjgqdcND&5CQ$eQ`07A2@5mD9_bpWHm|uCio0 zQ;uKOGUWSmUjhU`SqP-@EMRe(siana>#*Uh>O<}=D!@FS04w@E7LN4nApP!G)ciuW zE+O;v-Tr%{}V!Zf?`53&ml>^&x)j%9ji^opI{^N)h zq6rYH-nc=D79-7$`0#BNTe5y;S=`H-hFqK3BHJJI6`NPooZS^1HLj}D880L>G-=jH=oq~HnRT7Idi=z>S|ln z?`@k>Z!pbvso>ZSjuHE3i)<$X?&hkAwQ-s8eM`mRV>GpzgyDxy_M2H=jas~qg155S zKhE?Xy;0wRHYUSOkR&tz6fE~E+XN)Q=Ak6UxnWJ7opl3QZqSfc-J@eszrI#AUJ+*V z?jS@OP@sPqlvgJXKm`PVCyx4C5{veSN7EFS%EP}XcHuYsCueumf`4l{2|m8-DsR7V zWzpktCjIhFYsEQ`&=>uTrjkfuM%Gj{gUo`ZE4k9XSZ^xRVf9uoN)hQvovY8w+%s8A ztjT8q4mY{8i`~HAGy^Ncf z06RPZLNf0+l|TcBfTEt@g?|z*zJ_8PPXe5=T2#T9-^fh^)IG8&_$TEpRU{(-U+lvJ-kSSRgcJXtM-d!feLT+Jm$!KlKiK7Qc z?+qv)th80dcTylF5G6fBdg}H-1u~^6y<2~)bekAqI&b19a^&MB=cKQZ%zlqQUdE>- z?EmNln4;$kk;ys@h**WFnzhCvreHIGnR|pyFUtHiOsCMhV>P+GbkmS+aVWqvpid@S zH@o&ES8H4(%nUsVz{TSKg2gg^b}hDc(0(+`C5GXy@(=&Pru$0&gZ;8%q94(qDk1)q zIH4fB4J<7Kb>s1H3rkF}u)sRt;#)=R^C4;MsMSaL(=S5~nT-E~F{J)MNaRD3eOvW; z24dh;ZprL_T4MX3mMBm_5U9+N059o)2IZ)JpO2%iX?PB@ZV&$VXg-}qWe>h}=?gvT z4IpUlqot;@(T;giT#0n0i7DiP>OTR@XxiWtuK!@t8*2#4|9phHQ8jG?8xJsqM+VHKFi7>eDyc`fBGdR;%C%Eli#6LZ3;p zvFd|-mhoG&cT&9tl8ba_Xzy#jmnx`p=gWX3y@XL&D+^-NrzU0#tP)H(eK zJJAKd_=}h9Y*Az_+w5vD88-RFJ9`j z$`lF6L_KxZA$z}RQ?R2c(Sbg1do;t=RZOa%LFwB?% z(2MiNi;nAu7BhP;Mv5@3{T)%Ae|_k^_Gj$AU`QBDbXEL}SwRtd`(9E`06Hq=T~3H+ z#`p+hr9&k5xzV(Y*9kTR820Z^r2G&I?1Wu@@9TMZXQtt7;QT1V7b$>%%*>Q$SY!tih^beONxFZ0a@7dzB=HWsiZ;o&Z@Sz?P$N@pnRvo&FT6PV}&_(j9}s*{<| z8<-#NT71(IZyI4LlssrJ(o2BrUBC6CX1fQ*j3*}93o7oHoV!tabUg?5|9%><|Gzv8 zd9ffwu8f7)Yg=mq*NM9tFma*POmpcxZ`XSaVyig@zxiSG^2M}QALWJeOZXG_{zbhx zl2WwaJdfHn-MSu9e6MI3pgA+)9}dEPodr(mMU!QW`iBUM0w%qKO{04O>VEgYvZ9=q~A;Z`G z0#(^Gqt(m@jcYmw1q>-RcWupCqXzUI7TuB_0YKcFrsp0O8HSHlrFb!XFxnl21q=|f z8B$DG8hY9IVGIo(e@0kw7q9Ku7+dMR286i>I{Gd0rb53F?)%*O@~ln(-SCL^Kp(#c z$LMqPw0cP-z=oCymDm>p^sj?#IWQ*PNBkcigx`$)y7qc)@?XaVuxDH+#BC}#ALm?P zYk?_){%^uD-qU^RFE}5_F~Eo!J#wOP$lU*tpm9DgF7g;x>+_(0{laA>o-cvILnhAf z)XFd~qO6#G(nlNCsJ-hWFl})zCgI@Lv}MUjQX1p6@E{}sCj#{s5~GS z_dL@e(`ui!7lzyH;t}QmLk^iyR@hL4LWHhugy!}B?i8ZlWf*ONRsI8qs#K)D98*b_ zE|zDO_1fw7TSUf2>b8NELBz+3s1ag?O#UO8{4<#re?2Q628Y}a&6?vZ`wX|EN60a~ zc-XI=)L=Kf;-nFA(n^`QI$(oxDP#SSF&Zf~%ky-ZY$F>rs7?wT7pX2@f8bNkV0PqO z4lHGHX7@ISknH%B!&@(&^#V1n-Q6LD2&84aWulj(^0_L3+zKf7`uF>uA-|h!S>sA7 zOiDVt&!|j$o@|V;an7(qsvUzHayd0*u4M$i&KOvI(T;fzfL{#Yq?kyHmGi^^wBTl0v1lR8 z!|P|SFGbOObAHflCkH>@{k%UQo%7RAIsMm}e#`;Pp@phwpJ>nN`W)77^v zRo)AK?7{pzSqS0}%x5@tV+_SkAPsbc4vKHlHs6gmpU(DiIV~?saZI+VuVv? zc&4uFTH&tPTNCa2)3%R)^^`Y&8#E?@lA$ZTukcrQx;wp2@VN2bv5Mil1q+EbxW}rq zrC2cbQ10$CuCgKIlN6tg=ea1{Ung*|+~>n~Mf>-I!qBK3V}b$}u}rS}m(gNG?;F%kbp zXRKyh%~H^6)dnP(;rCwUzobMp94wD(waV1j<(NkCea~Qct)i0hQtG23`wrc;(ICWQ z%j$y1LjR`WaSf#_R;IT-*@@jlx2M5xW(h^7LwvjTJ~%{ zFHBuqbeU74Bc-wyWaQj*X2Rp zyf@d9+d)lL1A=Q%ekjoJ=of!PEL#$TMT2LaJoQ2(J_mL~hqgX5%PsddQUuOC`ZhW) zL5^YikDQdyUpo_+M);xEi#-#57!UQ$#lmHNK!7auJ7zI61pzMEyB-a^1$V1v8H}^) zTJhU586!3?mIMca`^)Z|Z(FAsqKf%#8KroW_luZdB60;ReL6)RQQ+%UogR(GzDe-o zZV$x8ODoCXwzVXO;OM}`1UVQoIAs&u)UcXnZNhI0>&oxVY$ZvI+ZnP*B2?c3)d%8= zRq)#~yuSXsSV{)8Qna>L;_0OgzVp)KyO_WZ`eO4~E=clN2klJk9=!D!1Pz((#44md z1H)2JU4(~S(ME$yMXoi_Kml{_RjRGfwmKLfC$>T%cLvEMT-R*7zsa5H6rAo}>tQwJh*sjyzSX*`vc@as{x%SzRC^BQeKB zhu_YSi5n|&h1wntdgPBp_kux6d1)6ssIfC-4TxEMo|FSbxc9OsSQ=cgv7T-o^NUe}x1uemr6ExXDAM9E@#$1J9z$~&QEh=`YE^-A7eFNr&jNqOQBZo`= z&##AoS%h^zT%v#YC+xo#z%cT>K?`yPOxKG*3!vdMv)aX=1@O&*(7~I&0q{}68F|nG zxaj?3$`%G%0J6frwk7L~$o2nwoTF!%LIxT7*}w={j51|gU?6*)feojwwFf#h)xNHK z`Pui$Dakh-U})kIrSy?%#_M7{*Xke^&S!h;z1AAa`6dAk8iA6{CwwWtQg3 zqu=1y{bZR!xv#99VK*BOgfmmaz#I}4t=u1n|1g0m>-^Kv=|5_A@iOi$@83p>=Xncx z$^y}u)QrU!zyp;Ih^5IF?i^YHjcQmxG7d;8Jo#6@X@mJmA6rzu&jdBf5dTm8ODayPo(`M_606u+-S!Q8 zUYB+zxsDF*7h4$oHecuV*+;bpbk`h>dCrsuevouuZax^?u6#CflM&lQ Date: Mon, 20 May 2013 13:45:01 -0400 Subject: [PATCH 038/116] Active region boundary parameters need to be bigger when running in GGA mode. CGL performance is quite a bit better as a result. -- The troule stems from the fact that we may be trying to genotype indels even though it appears there are only SNPs in the reads. --- .../gatk/walkers/haplotypecaller/HaplotypeCaller.java | 11 ++++++++--- ...llerComplexAndSymbolicVariantsIntegrationTest.java | 4 ++-- .../HaplotypeCallerIntegrationTest.java | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index f065a0d7d..fd8a1968b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -418,7 +418,8 @@ public class HaplotypeCaller extends ActiveRegionWalker, In private final static int PADDING_AROUND_OTHERS_FOR_CALLING = 150; // the maximum extent into the full active region extension that we're willing to go in genotyping our events - private final static int MAX_GENOTYPING_ACTIVE_REGION_EXTENSION = 25; + private final static int MAX_DISCOVERY_ACTIVE_REGION_EXTENSION = 25; + private final static int MAX_GGA_ACTIVE_REGION_EXTENSION = 100; private ActiveRegionTrimmer trimmer = null; @@ -549,7 +550,8 @@ public class HaplotypeCaller extends ActiveRegionWalker, In haplotypeBAMWriter = HaplotypeBAMWriter.create(bamWriterType, bamWriter, getToolkit().getSAMFileHeader()); trimmer = new ActiveRegionTrimmer(DEBUG, PADDING_AROUND_SNPS_FOR_CALLING, PADDING_AROUND_OTHERS_FOR_CALLING, - MAX_GENOTYPING_ACTIVE_REGION_EXTENSION, getToolkit().getGenomeLocParser()); + UAC.GenotypingMode.equals(GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) ? MAX_GGA_ACTIVE_REGION_EXTENSION : MAX_DISCOVERY_ACTIVE_REGION_EXTENSION, + getToolkit().getGenomeLocParser()); } //--------------------------------------------------------------------------------------------------------------- @@ -751,7 +753,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In final List haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, paddedReferenceLoc, activeAllelesToGenotype ); if ( ! dontTrimActiveRegions ) { - return trimActiveRegion(activeRegion, haplotypes, fullReferenceWithPadding, paddedReferenceLoc); + return trimActiveRegion(activeRegion, haplotypes, activeAllelesToGenotype, fullReferenceWithPadding, paddedReferenceLoc); } else { // we don't want to trim active regions, so go ahead and use the old one return new AssemblyResult(haplotypes, activeRegion, fullReferenceWithPadding, paddedReferenceLoc, true); @@ -763,6 +765,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In * * @param originalActiveRegion our full active region * @param haplotypes the list of haplotypes we've created from assembly + * @param activeAllelesToGenotype additional alleles we might need to genotype (can be empty) * @param fullReferenceWithPadding the reference bases over the full padded location * @param paddedReferenceLoc the span of the reference bases * @return an AssemblyResult containing the trimmed active region with all of the reads we should use @@ -771,12 +774,14 @@ public class HaplotypeCaller extends ActiveRegionWalker, In */ private AssemblyResult trimActiveRegion(final ActiveRegion originalActiveRegion, final List haplotypes, + final List activeAllelesToGenotype, final byte[] fullReferenceWithPadding, final GenomeLoc paddedReferenceLoc) { if ( DEBUG ) logger.info("Trimming active region " + originalActiveRegion + " with " + haplotypes.size() + " haplotypes"); EventMap.buildEventMapsForHaplotypes(haplotypes, fullReferenceWithPadding, paddedReferenceLoc, DEBUG); final TreeSet allVariantsWithinFullActiveRegion = EventMap.getAllVariantContexts(haplotypes); + allVariantsWithinFullActiveRegion.addAll(activeAllelesToGenotype); final ActiveRegion trimmedActiveRegion = trimmer.trimRegion(originalActiveRegion, allVariantsWithinFullActiveRegion); if ( trimmedActiveRegion == null ) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index d6c6a4f33..9ef9fea77 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "90cbcc7e959eb591fb7c5e12d65e0e40"); + "008029ee34e1becd8312e3c4d608033c"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "50894abb9d156bf480881cb5cb2a8a7d"); + "ae8d95ffe77515cc74a55c2afd142826"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 15516d090..2d4223e5c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "dbbc884a975587d8e7255ce47b58f438"); + "bb30d0761dc9e2dfd57bfe07b72d06d8"); } @Test From 62fc88f92e239edd44ec05ac75e0f99fdc9e62e7 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 20 May 2013 13:36:30 -0400 Subject: [PATCH 039/116] CombineVariants no longer adds PASS to unfiltered records -- [Delivers #49876703] -- Add integration test and test file -- Update SymbolicAlleles combine variant tests, which was turning unfiltered records into PASS! --- .../variantutils/CombineVariantsIntegrationTest.java | 11 +++++++++++ .../sting/utils/variant/GATKVariantContextUtils.java | 6 +++++- .../walkers/CNV/SymbolicAllelesIntegrationTest.java | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 6c4072962..917cbd542 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -189,4 +189,15 @@ public class CombineVariantsIntegrationTest extends WalkerTest { Arrays.asList("aa926eae333208dc1f41fe69dc95d7a6")); cvExecuteTest("combineDBSNPDuplicateSites:", spec, true); } + + @Test + public void combineLeavesUnfilteredRecordsUnfiltered() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T CombineVariants --no_cmdline_in_header -o %s " + + " -R " + b37KGReference + + " -V " + privateTestDir + "combineVariantsLeavesRecordsUnfiltered.vcf", + 1, + Arrays.asList("f8c014d0af7e014475a2a448dc1f9cef")); + cvExecuteTest("combineLeavesUnfilteredRecordsUnfiltered: ", spec, false); + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java index 4565402b9..b5a6e82a0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java @@ -697,6 +697,7 @@ public class GATKVariantContextUtils { int maxAC = -1; final Map attributesWithMaxAC = new LinkedHashMap(); double log10PError = CommonInfo.NO_LOG10_PERROR; + boolean anyVCHadFiltersApplied = false; VariantContext vcWithMaxAC = null; GenotypesContext genotypes = GenotypesContext.create(); @@ -729,6 +730,7 @@ public class GATKVariantContextUtils { log10PError = vc.getLog10PError(); filters.addAll(vc.getFilters()); + anyVCHadFiltersApplied |= vc.filtersWereApplied(); // // add attributes @@ -841,7 +843,9 @@ public class GATKVariantContextUtils { builder.alleles(alleles); builder.genotypes(genotypes); builder.log10PError(log10PError); - builder.filters(filters.isEmpty() ? filters : new TreeSet(filters)); + if ( anyVCHadFiltersApplied ) { + builder.filters(filters.isEmpty() ? filters : new TreeSet<>(filters)); + } builder.attributes(new TreeMap(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes)); // Trim the padded bases of all alleles if necessary diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java index 4aaba0d70..bfabe2bc1 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java @@ -57,7 +57,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(b36KGReference, "symbolic_alleles_2.vcf"), 1, - Arrays.asList("bf5a09f783ab1fa44774c81f91d10921")); + Arrays.asList("30f66a097987330d42e87da8bcd6be21")); executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec); } } From 1f3624d2046738a0a8f827489e49b6a8282c7477 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 7 May 2013 11:59:18 -0400 Subject: [PATCH 041/116] Base Recalibrator doesn't recalibrate all reads, so the final output line was confusing --- .../sting/gatk/walkers/bqsr/BaseRecalibrator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index dde49b7db..278317da3 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -519,7 +519,7 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche generatePlots(); } - logger.info("Processed: " + result + " reads"); + logger.info("BaseRecalibrator was able to recalibrate " + result + " reads"); } private RecalibrationTables getRecalibrationTable() { From 58f4b8122221e052143e4d0e4771bd0a52995c17 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 7 May 2013 12:23:24 -0400 Subject: [PATCH 042/116] Count Reads should use a Long instead of an Integer for counts to prevent overflows. Added unit test. --- .../sting/gatk/walkers/qc/CountReads.java | 11 +++-- .../traversals/TraverseReadsUnitTest.java | 8 +-- .../gatk/walkers/qc/CountReadsUnitTest.java | 49 +++++++++++++++++++ 3 files changed, 61 insertions(+), 7 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java index 825fcac90..45beea28f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java @@ -66,11 +66,16 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @Requires({DataSource.READS, DataSource.REFERENCE}) -public class CountReads extends ReadWalker implements NanoSchedulable { +public class CountReads extends ReadWalker implements NanoSchedulable { public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) { return 1; } - @Override public Integer reduceInit() { return 0; } - @Override public Integer reduce(Integer value, Integer sum) { return value + sum; } + @Override public Long reduceInit() { return 0L; } + + public Long reduce(Integer value, Long sum) { return (long) value + sum; } + + public void onTraversalDone(Long result) { + logger.info("CountReads counted " + result + " reads in the traversal"); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java index 8bc373fe8..e8840c39f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java @@ -153,11 +153,11 @@ public class TraverseReadsUnitTest extends BaseTest { countReadWalker.onTraversalDone(accumulator); - if (!(accumulator instanceof Integer)) { - fail("Count read walker should return an interger."); + if (!(accumulator instanceof Long)) { + fail("Count read walker should return a Long."); } - if (((Integer) accumulator) != 10000) { - fail("there should be 10000 mapped reads in the index file, there was " + ((Integer) accumulator)); + if (!accumulator.equals(new Long(10000))) { + fail("there should be 10000 mapped reads in the index file, there was " + (accumulator)); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java new file mode 100644 index 000000000..cf115cc76 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java @@ -0,0 +1,49 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.walkers.qc; + +import org.testng.Assert; +import org.testng.annotations.Test; + +public class CountReadsUnitTest { + + @Test + public void testReadsDoNotOverflowInt() { + + final CountReads walker = new CountReads(); + + final long moreThanMaxInt = ((long)Integer.MAX_VALUE) + 1L; + + Long sum = walker.reduceInit(); + + for ( long i = 0L; i < moreThanMaxInt; i++ ) { + final Integer x = walker.map(null, null, null); + sum = walker.reduce(x, sum); + } + + Assert.assertEquals(sum.longValue(), moreThanMaxInt); + } +} From 20c7a8903020900b1dbc9af0c7e1877118bbe764 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 7 May 2013 13:53:43 -0400 Subject: [PATCH 043/116] Fixes to get accurate read counts for Read traversals 1. Don't clone the dataSource's metrics object (because then the engine won't continue to get updated counts) 2. Use the dataSource's metrics object in the CountingFilteringIterator and not the first shard's object! 3. Synchronize ReadMetrics.incrementMetrics to prevent race conditions. Also: * Make sure users realize that the read counts are approximate in the print outs. * Removed a lot of unused cruft from the metrics object while I was in there. * Added test to make sure that the ReadMetrics read count does not overflow ints. * Added unit tests for traversal metrics (reads, loci, and active region traversals); these test counts of reads and records. --- .../sting/gatk/ReadMetrics.java | 135 +------- .../providers/LocusReferenceView.java | 4 +- .../gatk/datasources/reads/SAMDataSource.java | 14 +- .../sting/gatk/executive/MicroScheduler.java | 3 +- .../filters/CountingFilteringIterator.java | 65 ++-- .../gatk/traversals/TraversalEngine.java | 9 - .../traversals/TraverseActiveRegions.java | 2 - .../gatk/traversals/TraverseDuplicates.java | 1 - .../gatk/traversals/TraverseLociNano.java | 1 - .../gatk/traversals/TraverseReadPairs.java | 1 - .../gatk/traversals/TraverseReadsNano.java | 2 - .../sting/gatk/ReadMetricsUnitTest.java | 321 ++++++++++++++++++ .../gatk/walkers/qc/CountReadsUnitTest.java | 4 +- 13 files changed, 384 insertions(+), 178 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java index aadb57985..f73e7ccd5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java @@ -40,37 +40,27 @@ public class ReadMetrics implements Cloneable { private long nRecords; // How many reads have we processed, along with those skipped for various reasons private long nReads; - private long nSkippedReads; - private long nUnmappedReads; - private long nNotPrimary; - private long nBadAlignments; - private long nSkippedIndels; - private long nDuplicates; - private Map counter = new HashMap(); + + // keep track of filtered records by filter type (class) + private Map filterCounter = new HashMap<>(); /** * Combines these metrics with a set of other metrics, storing the results in this class. * @param metrics The metrics to fold into this class. */ - public void incrementMetrics(ReadMetrics metrics) { + public synchronized void incrementMetrics(ReadMetrics metrics) { nRecords += metrics.nRecords; nReads += metrics.nReads; - nSkippedReads += metrics.nSkippedReads; - nUnmappedReads += metrics.nUnmappedReads; - nNotPrimary += metrics.nNotPrimary; - nBadAlignments += metrics.nBadAlignments; - nSkippedIndels += metrics.nSkippedIndels; - nDuplicates += metrics.nDuplicates; - for(Map.Entry counterEntry: metrics.counter.entrySet()) { + for(Map.Entry counterEntry: metrics.filterCounter.entrySet()) { Class counterType = counterEntry.getKey(); - long newValue = (counter.containsKey(counterType) ? counter.get(counterType) : 0) + counterEntry.getValue(); - counter.put(counterType,newValue); + long newValue = (filterCounter.containsKey(counterType) ? filterCounter.get(counterType) : 0) + counterEntry.getValue(); + filterCounter.put(counterType, newValue); } } /** * Create a copy of the given read metrics. - * @return + * @return a non-null clone */ public ReadMetrics clone() { ReadMetrics newMetrics; @@ -82,13 +72,7 @@ public class ReadMetrics implements Cloneable { } newMetrics.nRecords = nRecords; newMetrics.nReads = nReads; - newMetrics.nSkippedReads = nSkippedReads; - newMetrics.nUnmappedReads = nUnmappedReads; - newMetrics.nNotPrimary = nNotPrimary; - newMetrics.nBadAlignments = nBadAlignments; - newMetrics.nSkippedIndels = nSkippedIndels; - newMetrics.nDuplicates = nDuplicates; - newMetrics.counter = new HashMap(counter); + newMetrics.filterCounter = new HashMap<>(filterCounter); return newMetrics; } @@ -96,16 +80,16 @@ public class ReadMetrics implements Cloneable { public void incrementFilter(SamRecordFilter filter) { long c = 0; - if ( counter.containsKey(filter.getClass()) ) { - c = counter.get(filter.getClass()); + if ( filterCounter.containsKey(filter.getClass()) ) { + c = filterCounter.get(filter.getClass()); } - counter.put(filter.getClass(), c + 1L); + filterCounter.put(filter.getClass(), c + 1L); } public Map getCountsByFilter() { - final TreeMap sortedCounts = new TreeMap(); - for(Map.Entry counterEntry: counter.entrySet()) { + final TreeMap sortedCounts = new TreeMap<>(); + for(Map.Entry counterEntry: filterCounter.entrySet()) { sortedCounts.put(counterEntry.getKey().getSimpleName(),counterEntry.getValue()); } return sortedCounts; @@ -143,95 +127,4 @@ public class ReadMetrics implements Cloneable { public void incrementNumReadsSeen() { nReads++; } - - /** - * Gets the cumulative number of reads skipped in the course of this run. - * @return Cumulative number of reads skipped in the course of this run. - */ - public long getNumSkippedReads() { - return nSkippedReads; - } - - /** - * Increments the cumulative number of reads skipped in the course of this run. - */ - public void incrementNumSkippedReads() { - nSkippedReads++; - } - - /** - * Gets the number of unmapped reads skipped in the course of this run. - * @return The number of unmapped reads skipped. - */ - public long getNumUnmappedReads() { - return nUnmappedReads; - } - - /** - * Increments the number of unmapped reads skipped in the course of this run. - */ - public void incrementNumUnmappedReads() { - nUnmappedReads++; - } - - /** - * - * @return - */ - public long getNumNonPrimaryReads() { - return nNotPrimary; - } - - /** - * - */ - public void incrementNumNonPrimaryReads() { - nNotPrimary++; - } - - /** - * - * @return - */ - public long getNumBadAlignments() { - return nBadAlignments; - } - - /** - * - */ - public void incrementNumBadAlignments() { - nBadAlignments++; - } - - /** - * - * @return - */ - public long getNumSkippedIndels() { - return nSkippedIndels; - } - - /** - * - */ - public void incrementNumSkippedIndels() { - nSkippedIndels++; - } - - /** - * - * @return - */ - public long getNumDuplicates() { - return nDuplicates; - } - - /** - * - */ - public void incrementNumDuplicates() { - nDuplicates++; - } - } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java index d5b7d0487..b5efbc693 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java @@ -176,13 +176,13 @@ public class LocusReferenceView extends ReferenceView { /** * Gets the reference context associated with this particular point or extended interval on the genome. - * @param genomeLoc Region for which to retrieve the base(s). If region spans beyond contig end or beoynd current bounds, it will be trimmed down. + * @param genomeLoc Region for which to retrieve the base(s). If region spans beyond contig end or beyond current bounds, it will be trimmed down. * @return The base at the position represented by this genomeLoc. */ public ReferenceContext getReferenceContext( GenomeLoc genomeLoc ) { //validateLocation( genomeLoc ); - GenomeLoc window = genomeLocParser.createGenomeLoc( genomeLoc.getContig(), bounds.getContigIndex(), + GenomeLoc window = genomeLocParser.createGenomeLoc( genomeLoc.getContig(), genomeLoc.getContigIndex(), getWindowStart(genomeLoc), getWindowStop(genomeLoc) ); int refStart = -1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 1223dd2af..bf25582ab 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -440,9 +440,8 @@ public class SAMDataSource { * @return Cumulative read metrics. */ public ReadMetrics getCumulativeReadMetrics() { - synchronized(readMetrics) { - return readMetrics.clone(); - } + // don't return a clone here because the engine uses a pointer to this object + return readMetrics; } /** @@ -450,9 +449,7 @@ public class SAMDataSource { * @param readMetrics The 'incremental' read metrics, to be incorporated into the cumulative metrics. */ public void incorporateReadMetrics(final ReadMetrics readMetrics) { - synchronized(this.readMetrics) { - this.readMetrics.incrementMetrics(readMetrics); - } + this.readMetrics.incrementMetrics(readMetrics); } public StingSAMIterator seek(Shard shard) { @@ -548,7 +545,10 @@ public class SAMDataSource { MergingSamRecordIterator mergingIterator = readers.createMergingIterator(iteratorMap); - return applyDecoratingIterators(shard.getReadMetrics(), + // The readMetrics object being passed in should be that of this dataSource and NOT the shard: the dataSource's + // metrics is intended to keep track of the reads seen (and hence passed to the CountingFilteringIterator when + // we apply the decorators), whereas the shard's metrics is used to keep track the "records" seen. + return applyDecoratingIterators(readMetrics, enableVerification, readProperties.useOriginalBaseQualities(), new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)), diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 4ffdc88d8..7077db49c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -52,7 +52,6 @@ import javax.management.ObjectName; import java.io.File; import java.lang.management.ManagementFactory; import java.util.*; -import java.util.concurrent.TimeUnit; /** @@ -368,7 +367,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { for ( final long countsByFilter : cumulativeMetrics.getCountsByFilter().values()) nSkippedReads += countsByFilter; - logger.info(String.format("%d reads were filtered out during traversal out of %d total (%.2f%%)", + logger.info(String.format("%d reads were filtered out during the traversal out of approximately %d total reads (%.2f%%)", nSkippedReads, cumulativeMetrics.getNumReadsSeen(), 100.0 * MathUtils.ratio(nSkippedReads, cumulativeMetrics.getNumReadsSeen()))); diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java b/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java index 3e50632d9..6c926e3cf 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java @@ -1,28 +1,28 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + package org.broadinstitute.sting.gatk.filters; import net.sf.picard.filter.SamRecordFilter; @@ -41,7 +41,8 @@ import java.util.NoSuchElementException; * @author Mark DePristo */ public class CountingFilteringIterator implements CloseableIterator { - private final ReadMetrics runtimeMetrics; + private final ReadMetrics globalRuntimeMetrics; + private final ReadMetrics privateRuntimeMetrics; private final Iterator iterator; private final Collection filters; private SAMRecord next = null; @@ -54,7 +55,8 @@ public class CountingFilteringIterator implements CloseableIterator { * @param filters the filter (which may be a FilterAggregator) */ public CountingFilteringIterator(ReadMetrics metrics, Iterator iterator, Collection filters) { - this.runtimeMetrics = metrics; + this.globalRuntimeMetrics = metrics; + privateRuntimeMetrics = new ReadMetrics(); this.iterator = iterator; this.filters = filters; next = getNextRecord(); @@ -95,6 +97,8 @@ public class CountingFilteringIterator implements CloseableIterator { public void close() { CloserUtil.close(iterator); + // update the global metrics with all the data we collected here + globalRuntimeMetrics.incrementMetrics(privateRuntimeMetrics); } /** @@ -105,12 +109,15 @@ public class CountingFilteringIterator implements CloseableIterator { private SAMRecord getNextRecord() { while (iterator.hasNext()) { SAMRecord record = iterator.next(); - runtimeMetrics.incrementNumReadsSeen(); + + // update only the private copy of the metrics so that we don't need to worry about race conditions + // that can arise when trying to update the global copy; it was agreed that this is the cleanest solution. + privateRuntimeMetrics.incrementNumReadsSeen(); boolean filtered = false; for(SamRecordFilter filter: filters) { if(filter.filterOut(record)) { - runtimeMetrics.incrementFilter(filter); + privateRuntimeMetrics.incrementFilter(filter); filtered = true; break; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index 0811e5e70..529b3ef17 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -100,15 +100,6 @@ public abstract class TraversalEngine,Provide // by default there's nothing to do } - /** - * Update the cumulative traversal metrics according to the data in this shard - * - * @param shard a non-null shard - */ - public void updateCumulativeMetrics(final Shard shard) { - updateCumulativeMetrics(shard.getReadMetrics()); - } - /** * Update the cumulative traversal metrics according to the data in this shard * diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index b1e5b907f..cac93cb07 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -259,8 +259,6 @@ public final class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine, final TraverseResults result = traverse( walker, locusView, referenceView, referenceOrderedDataView, sum ); sum = result.reduceResult; dataProvider.getShard().getReadMetrics().incrementNumIterations(result.numIterations); - updateCumulativeMetrics(dataProvider.getShard()); } // We have a final map call to execute here to clean up the skipped based from the diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java index aed88509e..764011a48 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java @@ -90,7 +90,6 @@ public class TraverseReadPairs extends TraversalEngine extends TraversalEngine, final Iterator aggregatedInputs = aggregateMapData(dataProvider); final T result = nanoScheduler.execute(aggregatedInputs, myMap, sum, myReduce); - updateCumulativeMetrics(dataProvider.getShard()); - return result; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java new file mode 100644 index 000000000..32fd35d95 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java @@ -0,0 +1,321 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk; + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.*; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.reads.*; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.executive.WindowMaker; +import org.broadinstitute.sting.gatk.filters.ReadFilter; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; +import org.broadinstitute.sting.gatk.traversals.*; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.activeregion.ActiveRegion; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.sam.*; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; +import java.util.*; + +public class ReadMetricsUnitTest extends BaseTest { + + @Test + public void testReadsSeenDoNotOverflowInt() { + + final ReadMetrics metrics = new ReadMetrics(); + + final long moreThanMaxInt = ((long)Integer.MAX_VALUE) + 1L; + + for ( long i = 0L; i < moreThanMaxInt; i++ ) { + metrics.incrementNumReadsSeen(); + } + + Assert.assertEquals(metrics.getNumReadsSeen(), moreThanMaxInt); + Assert.assertTrue(metrics.getNumReadsSeen() > (long) Integer.MAX_VALUE); + + logger.warn(String.format("%d %d %d", Integer.MAX_VALUE, moreThanMaxInt, Long.MAX_VALUE)); + } + + + // Test the accuracy of the read metrics + + private IndexedFastaSequenceFile reference; + private SAMSequenceDictionary dictionary; + private SAMFileHeader header; + private GATKSAMReadGroupRecord readGroup; + private GenomeLocParser genomeLocParser; + private File testBAM; + + private static final int numReadsPerContig = 250000; + private static final List contigs = Arrays.asList("1", "2", "3"); + + @BeforeClass + private void init() throws IOException { + reference = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); + dictionary = reference.getSequenceDictionary(); + genomeLocParser = new GenomeLocParser(dictionary); + header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test"); + header.setSequenceDictionary(dictionary); + header.setSortOrder(SAMFileHeader.SortOrder.coordinate); + readGroup = new GATKSAMReadGroupRecord(header.getReadGroup("test")); + + final List reads = new ArrayList<>(); + for ( final String contig : contigs ) { + for ( int i = 1; i <= numReadsPerContig; i++ ) { + reads.add(buildSAMRecord("read" + contig + "_" + i, contig, i)); + } + } + + createBAM(reads); + } + + private void createBAM(final List reads) throws IOException { + testBAM = File.createTempFile("TraverseActiveRegionsUnitTest", ".bam"); + testBAM.deleteOnExit(); + + SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reads.get(0).getHeader(), true, testBAM); + for (GATKSAMRecord read : reads ) { + out.addAlignment(read); + } + out.close(); + + new File(testBAM.getAbsolutePath().replace(".bam", ".bai")).deleteOnExit(); + new File(testBAM.getAbsolutePath() + ".bai").deleteOnExit(); + } + + // copied from LocusViewTemplate + protected GATKSAMRecord buildSAMRecord(final String readName, final String contig, final int alignmentStart) { + GATKSAMRecord record = new GATKSAMRecord(header); + + record.setReadName(readName); + record.setReferenceIndex(dictionary.getSequenceIndex(contig)); + record.setAlignmentStart(alignmentStart); + + record.setCigarString("1M"); + record.setReadString("A"); + record.setBaseQualityString("A"); + record.setReadGroup(readGroup); + + return record; + } + + @Test + public void testCountsFromReadTraversal() { + final GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); + engine.setGenomeLocParser(genomeLocParser); + + final Collection samFiles = new ArrayList<>(); + final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags()); + samFiles.add(readerID); + + final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser, + false, + SAMFileReader.ValidationStringency.STRICT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + new ArrayList(), + false, (byte)30, false, true); + + engine.setReadsDataSource(dataSource); + + final TraverseReadsNano traverseReadsNano = new TraverseReadsNano(1); + final DummyReadWalker walker = new DummyReadWalker(); + traverseReadsNano.initialize(engine, walker, null); + + for ( final Shard shard : dataSource.createShardIteratorOverAllReads(new ReadShardBalancer()) ) { + final ReadShardDataProvider dataProvider = new ReadShardDataProvider(shard, engine.getGenomeLocParser(), dataSource.seek(shard), reference, new ArrayList()); + traverseReadsNano.traverse(walker, dataProvider, 0); + dataProvider.close(); + } + + Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig); + Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig); + } + + @Test + public void testCountsFromLocusTraversal() { + final GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); + engine.setGenomeLocParser(genomeLocParser); + + final Collection samFiles = new ArrayList<>(); + final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags()); + samFiles.add(readerID); + + final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser, + false, + SAMFileReader.ValidationStringency.STRICT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + new ArrayList(), + false, (byte)30, false, true); + + engine.setReadsDataSource(dataSource); + final Set samples = SampleUtils.getSAMFileSamples(dataSource.getHeader()); + + final TraverseLociNano traverseLociNano = new TraverseLociNano(1); + final DummyLocusWalker walker = new DummyLocusWalker(); + traverseLociNano.initialize(engine, walker, null); + + for ( final Shard shard : dataSource.createShardIteratorOverAllReads(new LocusShardBalancer()) ) { + final WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples); + for ( WindowMaker.WindowMakerIterator window : windowMaker ) { + final LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList()); + traverseLociNano.traverse(walker, dataProvider, 0); + dataProvider.close(); + } + windowMaker.close(); + } + + //dataSource.close(); + Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig); + Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig); + } + + @Test + public void testCountsFromActiveRegionTraversal() { + final GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); + engine.setGenomeLocParser(genomeLocParser); + + final Collection samFiles = new ArrayList<>(); + final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags()); + samFiles.add(readerID); + + final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser, + false, + SAMFileReader.ValidationStringency.STRICT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + new ArrayList(), + false, (byte)30, false, true); + + engine.setReadsDataSource(dataSource); + final Set samples = SampleUtils.getSAMFileSamples(dataSource.getHeader()); + + final List intervals = new ArrayList<>(contigs.size()); + for ( final String contig : contigs ) + intervals.add(genomeLocParser.createGenomeLoc(contig, 1, numReadsPerContig)); + + final TraverseActiveRegions traverseActiveRegions = new TraverseActiveRegions(); + final DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); + traverseActiveRegions.initialize(engine, walker, null); + + for ( final Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ActiveRegionShardBalancer()) ) { + final WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples); + for ( WindowMaker.WindowMakerIterator window : windowMaker ) { + final LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList()); + traverseActiveRegions.traverse(walker, dataProvider, 0); + dataProvider.close(); + } + windowMaker.close(); + } + traverseActiveRegions.endTraversal(walker, 0); + + Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig); + Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig); + } + + class DummyLocusWalker extends LocusWalker { + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + return 0; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return 0; + } + } + + class DummyReadWalker extends ReadWalker { + @Override + public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) { + return 0; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return 0; + } + } + + class DummyActiveRegionWalker extends ActiveRegionWalker { + @Override + public ActivityProfileState isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + return new ActivityProfileState(ref.getLocus(), 0.0); + } + + @Override + public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) { + return 0; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return 0; + } + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java index cf115cc76..8f5541c41 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/CountReadsUnitTest.java @@ -25,10 +25,11 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.BaseTest; import org.testng.Assert; import org.testng.annotations.Test; -public class CountReadsUnitTest { +public class CountReadsUnitTest extends BaseTest { @Test public void testReadsDoNotOverflowInt() { @@ -45,5 +46,6 @@ public class CountReadsUnitTest { } Assert.assertEquals(sum.longValue(), moreThanMaxInt); + Assert.assertTrue(sum.longValue() > (long) Integer.MAX_VALUE); } } From d9cdc5d006e470fb5429c55efa1e74b0be68a4bc Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 21 May 2013 15:32:24 -0400 Subject: [PATCH 044/116] Optimization: track alleles in the PerReadAlleleLikelihoodMap with a HashSet -- The previous version of PerReadAlleleLikelihoodMap only stored the alleles in an ArrayList, and used ArrayList.contains() to determine if an allele was already present in the map. This is very slow with many alleles. Now keeps both the ArrayList (for get() performance) and a Set of alleles for contains(). --- .../genotyper/PerReadAlleleLikelihoodMap.java | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java index 150e24c51..c8bb7ff79 100644 --- a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java @@ -42,13 +42,13 @@ import java.util.*; * For each read, this holds underlying alleles represented by an aligned read, and corresponding relative likelihood. */ public class PerReadAlleleLikelihoodMap { - protected final List alleles; - protected final Map> likelihoodReadMap; + /** A set of all of the allele, so we can efficiently determine if an allele is already present */ + private final Set allelesSet = new HashSet<>(); + /** A list of the unique allele, as an ArrayList so we can call get(i) efficiently */ + protected final List alleles = new ArrayList<>(); + protected final Map> likelihoodReadMap = new LinkedHashMap<>(); - public PerReadAlleleLikelihoodMap() { - likelihoodReadMap = new LinkedHashMap>(); - alleles = new ArrayList(); - } + public PerReadAlleleLikelihoodMap() { } /** * Add a new entry into the Read -> ( Allele -> Likelihood ) map of maps. @@ -61,18 +61,20 @@ public class PerReadAlleleLikelihoodMap { if ( a == null ) throw new IllegalArgumentException("Cannot add a null allele to the allele likelihood map"); if ( likelihood == null ) throw new IllegalArgumentException("Likelihood cannot be null"); if ( likelihood > 0.0 ) throw new IllegalArgumentException("Likelihood must be negative (L = log(p))"); + Map likelihoodMap = likelihoodReadMap.get(read); if (likelihoodMap == null){ // LinkedHashMap will ensure iterating through alleles will be in consistent order - likelihoodMap = new LinkedHashMap(); + likelihoodMap = new LinkedHashMap<>(); } likelihoodReadMap.put(read,likelihoodMap); likelihoodMap.put(a,likelihood); - if (!alleles.contains(a)) + if (!allelesSet.contains(a)) { + allelesSet.add(a); alleles.add(a); - + } } public ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction) { @@ -165,6 +167,7 @@ public class PerReadAlleleLikelihoodMap { } public void clear() { + allelesSet.clear(); alleles.clear(); likelihoodReadMap.clear(); } From a1093ad230f1c24e4b9c2bf0e622d43ff2138e05 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 21 May 2013 15:35:43 -0400 Subject: [PATCH 045/116] Optimization for ActiveRegion.removeAll -- Previous version took a Collection to remove, and called ArrayList.removeAll() on this collection to remove reads from the ActiveRegion. This can be very slow when there are lots of reads, as ArrayList.removeAll ultimately calls indexOf() that searches through the list calling equals() on each element. New version takes a set, and uses an iterator on the list to remove() from the iterator any read that is in the set. Given that we were already iterating over the list of reads to update the read span, this algorithm is actually simpler and faster than the previous one. -- Update HaplotypeCaller filterReadsInRegion to use a Set not a List. -- Expanded the unit tests a bit for ActiveRegion.removeAll --- .../haplotypecaller/HaplotypeCaller.java | 11 ++++------- .../sting/utils/activeregion/ActiveRegion.java | 14 +++++++++----- .../activeregion/ActiveRegionUnitTest.java | 18 +++++++++++++++--- 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index fd8a1968b..24499def8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -678,7 +678,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In if (dontGenotype) return NO_CALLS; // user requested we not proceed // filter out reads from genotyping which fail mapping quality based criteria - final List filteredReads = filterNonPassingReads( assemblyResult.regionForGenotyping ); + final Collection filteredReads = filterNonPassingReads( assemblyResult.regionForGenotyping ); final Map> perSampleFilteredReadList = splitReadsBySample( filteredReads ); if( assemblyResult.regionForGenotyping.size() == 0 ) { return NO_CALLS; } // no reads remain after filtering so nothing else to do! @@ -918,17 +918,14 @@ public class HaplotypeCaller extends ActiveRegionWalker, In activeRegion.addAll(DownsamplingUtils.levelCoverageByPosition(ReadUtils.sortReadsByCoordinate(readsToUse), maxReadsInRegionPerSample, minReadsPerAlignmentStart)); } - private List filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { - final List readsToRemove = new ArrayList<>(); -// logger.info("Filtering non-passing regions: n incoming " + activeRegion.getReads().size()); + private Set filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { + final Set readsToRemove = new LinkedHashSet<>(); for( final GATKSAMRecord rec : activeRegion.getReads() ) { if( rec.getReadLength() < MIN_READ_LENGTH || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) { readsToRemove.add(rec); -// logger.info("\tremoving read " + rec + " len " + rec.getReadLength()); } } activeRegion.removeAll( readsToRemove ); -// logger.info("Filtered non-passing regions: n remaining " + activeRegion.getReads().size()); return readsToRemove; } @@ -938,7 +935,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In return getToolkit().getGenomeLocParser().createGenomeLoc(activeRegion.getExtendedLoc().getContig(), padLeft, padRight); } - private Map> splitReadsBySample( final List reads ) { + private Map> splitReadsBySample( final Collection reads ) { final Map> returnMap = new HashMap>(); for( final String sample : samplesList) { List readList = returnMap.get( sample ); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index 2f4c1b55d..7f2fe6833 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -336,13 +336,17 @@ public class ActiveRegion implements HasGenomeLocation { /** * Remove all of the reads in readsToRemove from this active region - * @param readsToRemove the collection of reads we want to remove + * @param readsToRemove the set of reads we want to remove */ - public void removeAll( final Collection readsToRemove ) { - reads.removeAll(readsToRemove); + public void removeAll( final Set readsToRemove ) { + final Iterator it = reads.iterator(); spanIncludingReads = extendedLoc; - for ( final GATKSAMRecord read : reads ) { - spanIncludingReads = spanIncludingReads.union( genomeLocParser.createGenomeLoc(read) ); + while ( it.hasNext() ) { + final GATKSAMRecord read = it.next(); + if ( readsToRemove.contains(read) ) + it.remove(); + else + spanIncludingReads = spanIncludingReads.union( genomeLocParser.createGenomeLoc(read) ); } } diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActiveRegionUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActiveRegionUnitTest.java index ad5fd3642..0f9b8531a 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActiveRegionUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActiveRegionUnitTest.java @@ -144,7 +144,7 @@ public class ActiveRegionUnitTest extends BaseTest { } @Test(enabled = !DEBUG, dataProvider = "ActiveRegionReads") - public void testActiveRegionReads(final GenomeLoc loc, final GATKSAMRecord read) { + public void testActiveRegionReads(final GenomeLoc loc, final GATKSAMRecord read) throws Exception { final GenomeLoc expectedSpan = loc.union(genomeLocParser.createGenomeLoc(read)); final ActiveRegion region = new ActiveRegion(loc, null, true, genomeLocParser, 0); @@ -176,19 +176,31 @@ public class ActiveRegionUnitTest extends BaseTest { Assert.assertEquals(region.getReadSpanLoc(), expectedSpan); Assert.assertTrue(region.equalExceptReads(region2)); - region.removeAll(Collections.emptyList()); + region.removeAll(Collections.emptySet()); Assert.assertEquals(region.getReads(), Collections.singletonList(read)); Assert.assertEquals(region.size(), 1); Assert.assertEquals(region.getExtendedLoc(), loc); Assert.assertEquals(region.getReadSpanLoc(), expectedSpan); Assert.assertTrue(region.equalExceptReads(region2)); - region.removeAll(Collections.singletonList(read)); + region.removeAll(Collections.singleton(read)); Assert.assertEquals(region.getReads(), Collections.emptyList()); Assert.assertEquals(region.size(), 0); Assert.assertEquals(region.getExtendedLoc(), loc); Assert.assertEquals(region.getReadSpanLoc(), loc); Assert.assertTrue(region.equalExceptReads(region2)); + + final GATKSAMRecord read2 = (GATKSAMRecord)read.clone(); + read2.setReadName(read.getReadName() + ".clone"); + + for ( final GATKSAMRecord readToKeep : Arrays.asList(read, read2)) { + region.addAll(Arrays.asList(read, read2)); + final GATKSAMRecord readToDiscard = readToKeep == read ? read2 : read; + region.removeAll(Collections.singleton(readToDiscard)); + Assert.assertEquals(region.getReads(), Arrays.asList(readToKeep)); + Assert.assertEquals(region.size(), 1); + Assert.assertEquals(region.getExtendedLoc(), loc); + } } // ----------------------------------------------------------------------------------------------- From 010034a65024a3c6c2b89f8a68c34efae881c59a Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 21 May 2013 16:01:57 -0400 Subject: [PATCH 046/116] Optimization/bugfix for PerReadAlleleLikelihoodMap -- Add() call had a misplaced map.put call, so that we were always putting the result of get() back into the map, when what we really intended was to only put the value back in if the original get() resulted in a null and so initialized the result --- .../sting/utils/genotyper/PerReadAlleleLikelihoodMap.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java index c8bb7ff79..f253fc9c9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java @@ -66,8 +66,8 @@ public class PerReadAlleleLikelihoodMap { if (likelihoodMap == null){ // LinkedHashMap will ensure iterating through alleles will be in consistent order likelihoodMap = new LinkedHashMap<>(); + likelihoodReadMap.put(read,likelihoodMap); } - likelihoodReadMap.put(read,likelihoodMap); likelihoodMap.put(a,likelihood); From 881b2b50abe450f5d28a3d26d5b2a63f826687e0 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 21 May 2013 18:19:23 -0400 Subject: [PATCH 047/116] Optimized counting of filtered records by filter. Don't map class to counts in the ReadMetrics (necessitating 2 HashMap lookups for every increment). Instead, wrap the ReadFilters with a counting version and then set those counts only when updating global metrics. --- .../sting/gatk/ReadMetrics.java | 23 +++----- .../filters/CountingFilteringIterator.java | 31 ++++++++--- .../sting/gatk/ReadMetricsUnitTest.java | 53 ++++++++++++++++++- 3 files changed, 84 insertions(+), 23 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java index f73e7ccd5..29372abcd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java @@ -42,7 +42,7 @@ public class ReadMetrics implements Cloneable { private long nReads; // keep track of filtered records by filter type (class) - private Map filterCounter = new HashMap<>(); + private Map filterCounter = new HashMap<>(); /** * Combines these metrics with a set of other metrics, storing the results in this class. @@ -51,9 +51,9 @@ public class ReadMetrics implements Cloneable { public synchronized void incrementMetrics(ReadMetrics metrics) { nRecords += metrics.nRecords; nReads += metrics.nReads; - for(Map.Entry counterEntry: metrics.filterCounter.entrySet()) { - Class counterType = counterEntry.getKey(); - long newValue = (filterCounter.containsKey(counterType) ? filterCounter.get(counterType) : 0) + counterEntry.getValue(); + for(Map.Entry counterEntry: metrics.filterCounter.entrySet()) { + final String counterType = counterEntry.getKey(); + final long newValue = (filterCounter.containsKey(counterType) ? filterCounter.get(counterType) : 0) + counterEntry.getValue(); filterCounter.put(counterType, newValue); } } @@ -78,21 +78,12 @@ public class ReadMetrics implements Cloneable { } - public void incrementFilter(SamRecordFilter filter) { - long c = 0; - if ( filterCounter.containsKey(filter.getClass()) ) { - c = filterCounter.get(filter.getClass()); - } - - filterCounter.put(filter.getClass(), c + 1L); + public void setFilterCount(final String filter, final long count) { + filterCounter.put(filter, count); } public Map getCountsByFilter() { - final TreeMap sortedCounts = new TreeMap<>(); - for(Map.Entry counterEntry: filterCounter.entrySet()) { - sortedCounts.put(counterEntry.getKey().getSimpleName(),counterEntry.getValue()); - } - return sortedCounts; + return new TreeMap<>(filterCounter); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java b/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java index 6c926e3cf..1942fc19a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/CountingFilteringIterator.java @@ -31,9 +31,7 @@ import net.sf.samtools.util.CloseableIterator; import net.sf.samtools.util.CloserUtil; import org.broadinstitute.sting.gatk.ReadMetrics; -import java.util.Collection; -import java.util.Iterator; -import java.util.NoSuchElementException; +import java.util.*; /** * Filtering Iterator which takes a filter and an iterator and iterates @@ -44,9 +42,27 @@ public class CountingFilteringIterator implements CloseableIterator { private final ReadMetrics globalRuntimeMetrics; private final ReadMetrics privateRuntimeMetrics; private final Iterator iterator; - private final Collection filters; + private final List filters = new ArrayList<>(); private SAMRecord next = null; + // wrapper around ReadFilters to count the number of filtered reads + private final class CountingReadFilter extends ReadFilter { + protected final ReadFilter readFilter; + protected long counter = 0L; + + public CountingReadFilter(final ReadFilter readFilter) { + this.readFilter = readFilter; + } + + @Override + public boolean filterOut(final SAMRecord record) { + final boolean result = readFilter.filterOut(record); + if ( result ) + counter++; + return result; + } + } + /** * Constructor * @@ -58,7 +74,8 @@ public class CountingFilteringIterator implements CloseableIterator { this.globalRuntimeMetrics = metrics; privateRuntimeMetrics = new ReadMetrics(); this.iterator = iterator; - this.filters = filters; + for ( final ReadFilter filter : filters ) + this.filters.add(new CountingReadFilter(filter)); next = getNextRecord(); } @@ -97,8 +114,11 @@ public class CountingFilteringIterator implements CloseableIterator { public void close() { CloserUtil.close(iterator); + // update the global metrics with all the data we collected here globalRuntimeMetrics.incrementMetrics(privateRuntimeMetrics); + for ( final CountingReadFilter filter : filters ) + globalRuntimeMetrics.setFilterCount(filter.readFilter.getClass().getSimpleName(), filter.counter); } /** @@ -117,7 +137,6 @@ public class CountingFilteringIterator implements CloseableIterator { boolean filtered = false; for(SamRecordFilter filter: filters) { if(filter.filterOut(record)) { - privateRuntimeMetrics.incrementFilter(filter); filtered = true; break; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java index 32fd35d95..3225a128c 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java @@ -34,7 +34,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.reads.*; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.executive.WindowMaker; @@ -263,6 +262,43 @@ public class ReadMetricsUnitTest extends BaseTest { Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig); } + @Test + public void testFilteredCounts() { + final GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); + engine.setGenomeLocParser(genomeLocParser); + + final Collection samFiles = new ArrayList<>(); + final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags()); + samFiles.add(readerID); + + final List filters = new ArrayList<>(); + filters.add(new EveryTenthReadFilter()); + + final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser, + false, + SAMFileReader.ValidationStringency.STRICT, + null, + null, + new ValidationExclusion(), + filters, + new ArrayList(), + false, (byte)30, false, true); + + engine.setReadsDataSource(dataSource); + + final TraverseReadsNano traverseReadsNano = new TraverseReadsNano(1); + final DummyReadWalker walker = new DummyReadWalker(); + traverseReadsNano.initialize(engine, walker, null); + + for ( final Shard shard : dataSource.createShardIteratorOverAllReads(new ReadShardBalancer()) ) { + final ReadShardDataProvider dataProvider = new ReadShardDataProvider(shard, engine.getGenomeLocParser(), dataSource.seek(shard), reference, new ArrayList()); + traverseReadsNano.traverse(walker, dataProvider, 0); + dataProvider.close(); + } + + Assert.assertEquals((long)engine.getCumulativeMetrics().getCountsByFilter().get(EveryTenthReadFilter.class.getSimpleName()), contigs.size() * numReadsPerContig / 10); + } + class DummyLocusWalker extends LocusWalker { @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { @@ -318,4 +354,19 @@ public class ReadMetricsUnitTest extends BaseTest { return 0; } } + + private final class EveryTenthReadFilter extends ReadFilter { + + private int myCounter = 0; + + @Override + public boolean filterOut(final SAMRecord record) { + if ( ++myCounter == 10 ) { + myCounter = 0; + return true; + } + + return false; + } + } } \ No newline at end of file From d167743852085aaef28d3d202e97805263a14b53 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 10 May 2013 11:09:53 -0400 Subject: [PATCH 048/116] Archived banded logless PairHMM BandedHMM --------- -- An implementation of a linear runtime, linear memory usage banded logless PairHMM. Thought about 50% faster than current PairHMM, this implementation will be superceded by the GraphHMM when it becomes available. The implementation is being archived for future reference Useful infrastructure changes ----------------------------- -- Split PairHMM into a N2MemoryPairHMM that allows smarter implementation to not allocate the double[][] matrices if they don't want, which was previously occurring in the base class PairHMM -- Added functionality (controlled by private static boolean) to write out likelihood call information to a file from inside of LikelihoodCalculationEngine for using in unit or performance testing. Added example of 100kb of data to private/testdata. Can be easily read in with the PairHMMTestData class. -- PairHMM now tracks the number of possible cell evaluations, and the LoglessCachingPairHMM updates the nCellsEvaluated so we can see how many cells are saved by the caching calculation. --- .../haplotypecaller/HaplotypeCaller.java | 1 + .../LikelihoodCalculationEngine.java | 47 ++++- .../sting/utils/pairhmm/LoglessPairHMM.java | 16 +- .../sting/utils/pairhmm/PairHMMTestData.java | 162 ++++++++++++++++++ .../sting/utils/pairhmm/Log10PairHMM.java | 2 +- .../sting/utils/pairhmm/N2MemoryPairHMM.java | 91 ++++++++++ .../sting/utils/pairhmm/PairHMM.java | 58 +------ 7 files changed, 316 insertions(+), 61 deletions(-) create mode 100644 protected/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMMTestData.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 24499def8..2ebfbcee9 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -867,6 +867,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @Override public void onTraversalDone(Integer result) { + likelihoodCalculationEngine.close(); logger.info("Ran local assembly on " + result + " active regions"); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index d5d5f3c09..ca1877142 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -48,20 +48,27 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; +import net.sf.samtools.SAMUtils; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.genotyper.MostLikelyAllele; -import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.haplotype.Haplotype; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.genotyper.MostLikelyAllele; +import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; +import org.broadinstitute.sting.utils.haplotype.Haplotype; import org.broadinstitute.sting.utils.haplotype.HaplotypeScoreComparator; -import org.broadinstitute.sting.utils.pairhmm.*; +import org.broadinstitute.sting.utils.pairhmm.Log10PairHMM; +import org.broadinstitute.sting.utils.pairhmm.LoglessPairHMM; +import org.broadinstitute.sting.utils.pairhmm.PairHMM; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.variant.variantcontext.Allele; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.PrintStream; import java.util.*; public class LikelihoodCalculationEngine { @@ -71,6 +78,7 @@ public class LikelihoodCalculationEngine { private final byte constantGCP; private final double log10globalReadMismappingRate; private final boolean DEBUG; + private final PairHMM.HMM_IMPLEMENTATION hmmType; private final ThreadLocal pairHMM = new ThreadLocal() { @@ -86,6 +94,10 @@ public class LikelihoodCalculationEngine { } }; + private final static boolean WRITE_LIKELIHOODS_TO_FILE = false; + private final static String LIKELIHOODS_FILENAME = "likelihoods.txt"; + private final PrintStream likelihoodsStream; + /** * The expected rate of random sequencing errors for a read originating from its true haplotype. * @@ -113,12 +125,28 @@ public class LikelihoodCalculationEngine { this.constantGCP = constantGCP; this.DEBUG = debug; this.log10globalReadMismappingRate = log10globalReadMismappingRate; + + if ( WRITE_LIKELIHOODS_TO_FILE ) { + try { + likelihoodsStream = new PrintStream(new FileOutputStream(new File(LIKELIHOODS_FILENAME))); + } catch ( FileNotFoundException e ) { + throw new RuntimeException(e); + } + } else { + likelihoodsStream = null; + } } public LikelihoodCalculationEngine() { this((byte)10, false, PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, -3); } + public void close() { + if ( likelihoodsStream != null ) likelihoodsStream.close(); + } + + + /** * Initialize our pairHMM with parameters appropriate to the haplotypes and reads we're going to evaluate * @@ -205,6 +233,17 @@ public class LikelihoodCalculationEngine { final double log10l = pairHMM.get().computeReadLikelihoodGivenHaplotypeLog10(haplotype.getBases(), read.getReadBases(), readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype); + if ( WRITE_LIKELIHOODS_TO_FILE ) { + likelihoodsStream.printf("%s %s %s %s %s %s %f%n", + haplotype.getBaseString(), + new String(read.getReadBases()), + SAMUtils.phredToFastq(readQuals), + SAMUtils.phredToFastq(readInsQuals), + SAMUtils.phredToFastq(readDelQuals), + SAMUtils.phredToFastq(overallGCP), + log10l); + } + if ( haplotype.isNonReference() ) bestNonReflog10L = Math.max(bestNonReflog10L, log10l); else diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java index ab2a5bb2a..184a2689d 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java @@ -55,7 +55,7 @@ import org.broadinstitute.sting.utils.QualityUtils; * User: rpoplin, carneiro * Date: 10/16/12 */ -public final class LoglessPairHMM extends PairHMM { +public final class LoglessPairHMM extends N2MemoryPairHMM { protected static final double INITIAL_CONDITION = Math.pow(2, 1020); protected static final double INITIAL_CONDITION_LOG10 = Math.log10(INITIAL_CONDITION); @@ -99,8 +99,13 @@ public final class LoglessPairHMM extends PairHMM { } } - if ( ! constantsAreInitialized || recacheReadValues ) - initializeProbabilities(insertionGOP, deletionGOP, overallGCP); + if ( ! constantsAreInitialized || recacheReadValues ) { + initializeProbabilities(transition, insertionGOP, deletionGOP, overallGCP); + + // note that we initialized the constants + constantsAreInitialized = true; + } + initializePriors(haplotypeBases, readBases, readQuals, hapStartIndex); for (int i = 1; i < paddedReadLength; i++) { @@ -159,7 +164,7 @@ public final class LoglessPairHMM extends PairHMM { "overallGCP != null" }) @Ensures("constantsAreInitialized") - private void initializeProbabilities(final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP) { + protected static void initializeProbabilities(final double[][] transition, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP) { for (int i = 0; i < insertionGOP.length; i++) { final int qualIndexGOP = Math.min(insertionGOP[i] + deletionGOP[i], Byte.MAX_VALUE); transition[i+1][matchToMatch] = QualityUtils.qualToProb((byte) qualIndexGOP); @@ -169,9 +174,6 @@ public final class LoglessPairHMM extends PairHMM { transition[i+1][matchToDeletion] = QualityUtils.qualToErrorProb(deletionGOP[i]); transition[i+1][deletionToDeletion] = QualityUtils.qualToErrorProb(overallGCP[i]); } - - // note that we initialized the constants - constantsAreInitialized = true; } /** diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMMTestData.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMMTestData.java new file mode 100644 index 000000000..3d8137ecf --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMMTestData.java @@ -0,0 +1,162 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.utils.pairhmm; + +import net.sf.samtools.SAMUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.text.XReadLines; + +import java.io.*; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.zip.GZIPInputStream; + +/** + * Useful single class carrying test data for PairHMMs (for use in benchmarking and unit tests) + * + * User: depristo + * Date: 5/12/13 + * Time: 3:52 PM + * To change this template use File | Settings | File Templates. + */ +public class PairHMMTestData { + public final String ref; + private final String read; + public final byte[] baseQuals, insQuals, delQuals, gcp; + public final double log10l; + + PairHMMTestData(String ref, String read, byte[] baseQuals, byte[] insQuals, byte[] delQuals, byte[] gcp, double log10l) { + this.ref = ref; + this.read = read; + this.baseQuals = baseQuals; + this.insQuals = insQuals; + this.delQuals = delQuals; + this.gcp = gcp; + this.log10l = log10l; + } + + PairHMMTestData(String ref, String read, final byte qual) { + this.ref = ref; + this.read = read; + this.baseQuals = this.insQuals = this.delQuals = Utils.dupBytes(qual, read.length()); + this.gcp = Utils.dupBytes((byte)10, read.length()); + this.log10l = -1; + } + + public double runHMM(final PairHMM hmm) { + hmm.initialize(getRead().length(), ref.length()); + return hmm.computeReadLikelihoodGivenHaplotypeLog10(ref.getBytes(), getRead().getBytes(), + baseQuals, insQuals, delQuals, gcp, true); + } + + @Override + public String toString() { + return "Info{" + + "ref='" + ref + '\'' + + ", read='" + getRead() + '\'' + + ", log10l=" + log10l + + '}'; + } + + public static void runHMMs(final PairHMM hmm, final List data, final boolean runSingly) { + if ( runSingly ) { + for ( final PairHMMTestData datum : data ) + datum.runHMM(hmm); + } else { + // running in batch mode + final PairHMMTestData first = data.get(0); + int maxHaplotypeLen = calcMaxHaplotypeLen(data); + hmm.initialize(first.getRead().length(), maxHaplotypeLen); + for ( final PairHMMTestData datum : data ) { + hmm.computeReadLikelihoodGivenHaplotypeLog10(datum.ref.getBytes(), datum.getRead().getBytes(), + datum.baseQuals, datum.insQuals, datum.delQuals, datum.gcp, false); + + } + } + } + + public static int calcMaxHaplotypeLen(final List data) { + int maxHaplotypeLen = 0; + for ( final PairHMMTestData datum : data ) + maxHaplotypeLen = Math.max(maxHaplotypeLen, datum.ref.length()); + return maxHaplotypeLen; + } + + public static Map> readLikelihoods(final File file) throws IOException { + final Map> results = new LinkedHashMap<>(); + + InputStream in = new FileInputStream(file); + if ( file.getName().endsWith(".gz") ) { + in = new GZIPInputStream(in); + } + + for ( final String line : new XReadLines(in) ) { + final String[] parts = line.split(" "); + final PairHMMTestData info = new PairHMMTestData( + parts[0], parts[1], + SAMUtils.fastqToPhred(parts[2]), + SAMUtils.fastqToPhred(parts[3]), + SAMUtils.fastqToPhred(parts[4]), + SAMUtils.fastqToPhred(parts[5]), + Double.parseDouble(parts[6])); + + if ( ! results.containsKey(info.read) ) { + results.put(info.read, new LinkedList()); + } + final List byHap = results.get(info.read); + byHap.add(info); + } + + return results; + } + + public String getRead() { + return read; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java index ab6c321e8..ddc1a4559 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java @@ -38,7 +38,7 @@ import java.util.Arrays; * User: rpoplin, carneiro * Date: 3/1/12 */ -public final class Log10PairHMM extends PairHMM { +public final class Log10PairHMM extends N2MemoryPairHMM { /** * Should we use exact log10 calculation (true), or an approximation (false)? */ diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java new file mode 100644 index 000000000..a091a0716 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java @@ -0,0 +1,91 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.utils.pairhmm; + +import com.google.java.contract.Requires; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.MathUtils; + +import java.util.Arrays; + +/** + * Superclass for PairHMM that want to use a full read x haplotype matrix for their match, insertion, and deletion matrix + * + * User: rpoplin + * Date: 10/16/12 + */ +abstract class N2MemoryPairHMM extends PairHMM { + protected double[][] transition = null; // The transition probabilities cache + protected double[][] prior = null; // The prior probabilities cache + protected double[][] matchMatrix = null; + protected double[][] insertionMatrix = null; + protected double[][] deletionMatrix = null; + + /** + * Initialize this PairHMM, making it suitable to run against a read and haplotype with given lengths + * + * Note: Do not worry about padding, just provide the true max length of the read and haplotype. The HMM will take care of the padding. + * + * @param haplotypeMaxLength the max length of haplotypes we want to use with this PairHMM + * @param readMaxLength the max length of reads we want to use with this PairHMM + */ + public void initialize( final int readMaxLength, final int haplotypeMaxLength ) { + super.initialize(readMaxLength, haplotypeMaxLength); + + matchMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength]; + insertionMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength]; + deletionMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength]; + } + + /** + * Print out the core hmm matrices for debugging + */ + protected void dumpMatrices() { + dumpMatrix("matchMetricArray", matchMatrix); + dumpMatrix("insertionMatrix", insertionMatrix); + dumpMatrix("deletionMatrix", deletionMatrix); + } + + /** + * Print out in a human readable form the matrix for debugging + * @param name the name of this matrix + * @param matrix the matrix of values + */ + @Requires({"name != null", "matrix != null"}) + private void dumpMatrix(final String name, final double[][] matrix) { + System.out.printf("%s%n", name); + for ( int i = 0; i < matrix.length; i++) { + System.out.printf("\t%s[%d]", name, i); + for ( int j = 0; j < matrix[i].length; j++ ) { + if ( Double.isInfinite(matrix[i][j]) ) + System.out.printf(" %15s", String.format("%f", matrix[i][j])); + else + System.out.printf(" % 15.5e", matrix[i][j]); + } + System.out.println(); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java index 6b57a1354..85ac97f95 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java @@ -40,8 +40,6 @@ import java.util.Arrays; public abstract class PairHMM { protected final static Logger logger = Logger.getLogger(PairHMM.class); - protected double[][] transition = null; // The transition probabilities cache - protected double[][] prior = null; // The prior probabilities cache protected boolean constantsAreInitialized = false; protected byte[] previousHaplotypeBases; @@ -52,12 +50,9 @@ public abstract class PairHMM { /* PairHMM as implemented for the UnifiedGenotyper. Uses log10 sum functions accurate to only 1E-4 */ ORIGINAL, /* Optimized version of the PairHMM which caches per-read computations and operations in real space to avoid costly sums of log10'ed likelihoods */ - LOGLESS_CACHING + LOGLESS_CACHING, } - protected double[][] matchMatrix = null; - protected double[][] insertionMatrix = null; - protected double[][] deletionMatrix = null; protected int maxHaplotypeLength, maxReadLength; protected int paddedMaxReadLength, paddedMaxHaplotypeLength; protected int paddedReadLength, paddedHaplotypeLength; @@ -82,18 +77,12 @@ public abstract class PairHMM { paddedMaxReadLength = readMaxLength + 1; paddedMaxHaplotypeLength = haplotypeMaxLength + 1; - matchMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength]; - insertionMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength]; - deletionMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength]; - previousHaplotypeBases = null; constantsAreInitialized = false; initialized = true; } - - /** * Compute the total probability of read arising from haplotypeBases given base substitution, insertion, and deletion * probabilities. @@ -152,44 +141,15 @@ public abstract class PairHMM { * To be overloaded by subclasses to actually do calculation for #computeReadLikelihoodGivenHaplotypeLog10 */ @Requires({"readBases.length == readQuals.length", "readBases.length == insertionGOP.length", "readBases.length == deletionGOP.length", - "readBases.length == overallGCP.length", "matchMatrix!=null", "insertionMatrix!=null", "deletionMatrix!=null"}) + "readBases.length == overallGCP.length", "matchMatrix!=null", "insertionMatrix!=null", "deletionMatrix!=null"}) protected abstract double subComputeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotypeBases, - final byte[] readBases, - final byte[] readQuals, - final byte[] insertionGOP, - final byte[] deletionGOP, - final byte[] overallGCP, - final int hapStartIndex, - final boolean recacheReadValues ); - - /** - * Print out the core hmm matrices for debugging - */ - protected void dumpMatrices() { - dumpMatrix("matchMetricArray", matchMatrix); - dumpMatrix("insertionMatrix", insertionMatrix); - dumpMatrix("deletionMatrix", deletionMatrix); - } - - /** - * Print out in a human readable form the matrix for debugging - * @param name the name of this matrix - * @param matrix the matrix of values - */ - @Requires({"name != null", "matrix != null"}) - private void dumpMatrix(final String name, final double[][] matrix) { - System.out.printf("%s%n", name); - for ( int i = 0; i < matrix.length; i++) { - System.out.printf("\t%s[%d]", name, i); - for ( int j = 0; j < matrix[i].length; j++ ) { - if ( Double.isInfinite(matrix[i][j]) ) - System.out.printf(" %15s", String.format("%f", matrix[i][j])); - else - System.out.printf(" % 15.5e", matrix[i][j]); - } - System.out.println(); - } - } + final byte[] readBases, + final byte[] readQuals, + final byte[] insertionGOP, + final byte[] deletionGOP, + final byte[] overallGCP, + final int hapStartIndex, + final boolean recacheReadValues ); /** * Compute the first position at which two haplotypes differ From da21924b44342321150d1f05ee9f6850969ffb44 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 22 May 2013 14:22:54 -0400 Subject: [PATCH 049/116] Make the missing targets output never use stdout Problem -------- Diagnose Targets is outputting missing intervals to stdout if the argument -missing is not provided Solution -------- Make it NOT default to stdout [Delivers #50386741] --- .../gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java index ebe2192b4..a6cbc1da3 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java @@ -116,7 +116,7 @@ final class ThresHolder { @Argument(fullName = "quality_status_threshold", shortName = "stQ", doc = "The proportion of the loci needed for calling POOR_QUALITY", required = false) public double qualityStatusThreshold = 0.50; - @Output(fullName = "missing_intervals", shortName = "missing", doc ="Produces a file with the intervals that don't pass filters", required = false) + @Output(fullName = "missing_intervals", shortName = "missing", defaultToStdout = false, doc ="Produces a file with the intervals that don't pass filters", required = false) public PrintStream missingTargets = null; public final List locusMetricList = new LinkedList(); From 85905dba9238a0f558d939d66e23eaca758ccf1d Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 23 May 2013 15:15:56 -0400 Subject: [PATCH 050/116] Bugfix for GGA mode in UG silently ignoring indels -- Started by Mark. Finished up by Ryan. -- GGA mode still respected glm argument for SNP and INDEL models, so that you would silently fail to genotype indels at all if the -glm INDEL wasn't provided, but you'd still emit the sites, so you'd see records in the VCF but all alleles would be no calls. -- https://www.pivotaltracker.com/story/show/48924339 for more information -- [resolves #48924339] --- .../genotyper/UnifiedGenotyperEngine.java | 38 +++++++++---------- ...perGeneralPloidySuite1IntegrationTest.java | 2 +- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 3380efcc9..fc11706e5 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -83,6 +83,9 @@ public class UnifiedGenotyperEngine { public static final double HUMAN_SNP_HETEROZYGOSITY = 1e-3; public static final double HUMAN_INDEL_HETEROZYGOSITY = 1e-4; + private static final int SNP_MODEL = 0; + private static final int INDEL_MODEL = 1; + public enum OUTPUT_MODE { /** produces calls only at variant sites */ EMIT_VARIANTS_ONLY, @@ -693,13 +696,13 @@ public class UnifiedGenotyperEngine { } private void determineGLModelsToUse() { - String modelPrefix = ""; if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY ) modelPrefix = GPSTRING; - if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) { - modelPrefix += UAC.GLmodel.name().toUpperCase().replaceAll("BOTH",""); + // GGA mode => must initialize both the SNP and indel models + if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES || + UAC.GLmodel.name().toUpperCase().contains("BOTH") ) { modelsToUse.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP")); modelsToUse.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL")); } @@ -712,31 +715,24 @@ public class UnifiedGenotyperEngine { private List getGLModelsToUse(final RefMetaDataTracker tracker, final ReferenceContext refContext, final AlignmentContext rawContext) { - if ( UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) return modelsToUse; + if ( modelsToUse.size() != 2 ) + throw new IllegalStateException("GGA mode assumes that we have initialized both the SNP and indel models but found " + modelsToUse); + // if we're genotyping given alleles then we need to choose the model corresponding to the variant type requested - final List GGAmodel = new ArrayList(1); final VariantContext vcInput = getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles); - if ( vcInput == null ) - return GGAmodel; // no work to be done - if ( vcInput.isSNP() ) { - // use the SNP model unless the user chose INDEL mode only - if ( modelsToUse.size() == 2 || modelsToUse.get(0).name().endsWith("SNP") ) - GGAmodel.add(modelsToUse.get(0)); + if ( vcInput == null ) { + return Collections.emptyList(); // no work to be done + } else if ( vcInput.isSNP() ) { + return Collections.singletonList(modelsToUse.get(SNP_MODEL)); + } else if ( vcInput.isIndel() || vcInput.isMixed() ) { + return Collections.singletonList(modelsToUse.get(INDEL_MODEL)); + } else { + return Collections.emptyList(); // No support for other types yet } - else if ( vcInput.isIndel() || vcInput.isMixed() ) { - // use the INDEL model unless the user chose SNP mode only - if ( modelsToUse.size() == 2 ) - GGAmodel.add(modelsToUse.get(1)); - else if ( modelsToUse.get(0).name().endsWith("INDEL") ) - GGAmodel.add(modelsToUse.get(0)); - } - // No support for other types yet - - return GGAmodel; } /** diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index 88506fda3..1cfc41a27 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -74,7 +74,7 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_GGA_Pools() { - executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "3f7d763c654f1d708323f369ea4a099b"); + executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "ceb105e3db0f2b993e3d725b0d60b6a3"); } @Test(enabled = true) From f1affa9fbb061720a7b67d2e26083006f206aeb7 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 28 May 2013 14:58:50 -0400 Subject: [PATCH 051/116] Turn off downsampling for DiagnoseTargets Diagnose targets should never be downsampled. (and I didn't know there was a default downsampling going on for locus walkers) --- .../walkers/diagnostics/diagnosetargets/DiagnoseTargets.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java index 4bd08294b..bde324e3c 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java @@ -52,6 +52,7 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; @@ -110,6 +111,7 @@ import java.util.*; @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @By(value = DataSource.READS) @PartitionBy(PartitionType.INTERVAL) +@Downsample(by = DownsampleType.NONE) public class DiagnoseTargets extends LocusWalker { private static final String AVG_INTERVAL_DP_KEY = "IDP"; From 38e765f00d340bc600fa6645b29f69b5551fc445 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 28 May 2013 15:29:43 -0400 Subject: [PATCH 052/116] Somehow the index of exampleDBSNP.vcf was missing This was missed when we added all the indices of our testdata --- public/testdata/exampleDBSNP.vcf.idx | Bin 0 -> 330 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 public/testdata/exampleDBSNP.vcf.idx diff --git a/public/testdata/exampleDBSNP.vcf.idx b/public/testdata/exampleDBSNP.vcf.idx new file mode 100644 index 0000000000000000000000000000000000000000..7239e366f87c568e1698e7959710af9af88be441 GIT binary patch literal 330 zcmZ9I-Acni5QRrXe4A|cC)q@=Vm4|(Q)#!wyReC?DaIyjQt6HF;FV7x_(muCKZs8cdCz)cB0Hn_*N{t5LtL+I5Xa zJ&X@(R7mgCOW$g7u_*4m*ZL8@-!0Fo`TSzc@tsa=U1o_~&a?6+E3V{qd7C8#P%jLk zI0`WjFb;^%`G4C&ic(}Nz@9ZQ&U_MJ!f70Y;0Wb`x=QAGQlyzR62?eGsPA)1V@%l} tGMz082?TQxc`DE9#$dp{6S$!J9p0vooVwUnKhxOvkMn71%4r@>{RLQ8OlJT9 literal 0 HcmV?d00001 From a7cb599945889d9011e5d71d9ffcee94c4bba5ee Mon Sep 17 00:00:00 2001 From: David Roazen Date: Tue, 28 May 2013 16:52:28 -0400 Subject: [PATCH 053/116] Require a minimum dcov value of 200 for Locus and ActiveRegion walkers when downsampling to coverage -Throw a UserException if a Locus or ActiveRegion walker is run with -dcov < 200, since low dcov values can result in problematic downsampling artifacts for locus-based traversals. -Read-based traversals continue to have no minimum for -dcov, since dcov for read traversals controls the number of reads per alignment start position, and even a dcov value of 1 might be safe/desirable in some circumstances. -Also reorganize the global downsampling defaults so that they are specified as annotations to the Walker, LocusWalker, and ActiveRegionWalker classes rather than as constants in the DownsamplingMethod class. -The default downsampling settings have not been changed: they are still -dcov 1000 for Locus and ActiveRegion walkers, and -dt NONE for all other walkers. --- .../HaplotypeCallerIntegrationTest.java | 2 +- .../sting/gatk/GenomeAnalysisEngine.java | 3 +- .../gatk/downsampling/DownsamplingMethod.java | 35 ++++++--------- .../gatk/walkers/ActiveRegionWalker.java | 2 + .../sting/gatk/walkers/LocusWalker.java | 2 + .../sting/gatk/walkers/Walker.java | 2 + .../reads/DownsamplerBenchmark.java | 4 +- .../DownsamplingIntegrationTest.java | 44 +++++++++++++++++++ 8 files changed, 68 insertions(+), 26 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/downsampling/DownsamplingIntegrationTest.java diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 2d4223e5c..91e80b45c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -101,7 +101,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerInsertionOnEdgeOfContig() { - HCTest(CEUTRIO_MT_TEST_BAM, "-dcov 90 -L MT:1-10", "7f1fb8f9587f64643f6612ef1dd6d4ae"); + HCTest(CEUTRIO_MT_TEST_BAM, "-L MT:1-10", "7f1fb8f9587f64643f6612ef1dd6d4ae"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 314de29c7..3a8431dca 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -463,9 +463,8 @@ public class GenomeAnalysisEngine { DownsamplingMethod commandLineMethod = argCollection.getDownsamplingMethod(); DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker); - DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker); - DownsamplingMethod method = commandLineMethod != null ? commandLineMethod : (walkerMethod != null ? walkerMethod : defaultMethod); + DownsamplingMethod method = commandLineMethod != null ? commandLineMethod : walkerMethod; method.checkCompatibilityWithWalker(walker); return method; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java index 5aa27608d..8e92b1ff3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java @@ -61,20 +61,10 @@ public class DownsamplingMethod { public static final DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE; /** - * Default target coverage for locus-based traversals + * Don't allow dcov values below this threshold for locus-based traversals (ie., Locus + * and ActiveRegion walkers), as they can result in problematic downsampling artifacts */ - public static final int DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000; - - /** - * Default downsampling method for locus-based traversals - */ - public static final DownsamplingMethod DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_METHOD = - new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_COVERAGE, null); - - /** - * Default downsampling method for read-based traversals - */ - public static final DownsamplingMethod DEFAULT_READ_TRAVERSAL_DOWNSAMPLING_METHOD = NONE; + public static final int MINIMUM_SAFE_COVERAGE_TARGET_FOR_LOCUS_BASED_TRAVERSALS = 200; public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction ) { @@ -118,6 +108,16 @@ public class DownsamplingMethod { if ( isLocusTraversal && type == DownsampleType.ALL_READS && toCoverage != null ) { throw new UserException("Downsampling to coverage with the ALL_READS method for locus-based traversals (eg., LocusWalkers) is not currently supported (though it is supported for ReadWalkers)."); } + + // For locus traversals, ensure that the dcov value (if present) is not problematically low + if ( isLocusTraversal && type != DownsampleType.NONE && toCoverage != null && + toCoverage < MINIMUM_SAFE_COVERAGE_TARGET_FOR_LOCUS_BASED_TRAVERSALS ) { + throw new UserException(String.format("Locus-based traversals (ie., Locus and ActiveRegion walkers) require " + + "a minimum -dcov value of %d when downsampling to coverage. Values less " + + "than this can produce problematic downsampling artifacts while providing " + + "only insignificant improvements in memory usage in most cases.", + MINIMUM_SAFE_COVERAGE_TARGET_FOR_LOCUS_BASED_TRAVERSALS)); + } } public String toString() { @@ -139,13 +139,4 @@ public class DownsamplingMethod { return builder.toString(); } - - public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker ) { - if ( walker instanceof LocusWalker || walker instanceof ActiveRegionWalker ) { - return DEFAULT_LOCUS_TRAVERSAL_DOWNSAMPLING_METHOD; - } - else { - return DEFAULT_READ_TRAVERSAL_DOWNSAMPLING_METHOD; - } - } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index 9595b8f42..962f81d0d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -31,6 +31,7 @@ import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.filters.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; @@ -57,6 +58,7 @@ import java.util.*; @PartitionBy(PartitionType.READ) @ActiveRegionTraversalParameters(extension=50,maxRegion=1500) @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class}) +@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000) @RemoveProgramRecords public abstract class ActiveRegionWalker extends Walker { /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java index 788bf11f9..9997723b8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter; import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter; @@ -44,6 +45,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @Requires({DataSource.READS,DataSource.REFERENCE}) @PartitionBy(PartitionType.LOCUS) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class}) +@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 1000) @RemoveProgramRecords public abstract class LocusWalker extends Walker { // Do we actually want to operate on the context? diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index 522414c00..40485596d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -29,6 +29,7 @@ import net.sf.samtools.SAMSequenceDictionary; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.filters.MalformedReadFilter; import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.gatk.samples.Sample; @@ -50,6 +51,7 @@ import java.util.List; */ @ReadFilters(MalformedReadFilter.class) @PartitionBy(PartitionType.NONE) +@Downsample(by = DownsampleType.NONE) @BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT) @BQSRMode(ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT) @DocumentedGATKFeature(groupName = "Uncategorized", extraDocs = {CommandLineGATK.class}) diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java index 00389be97..25c71d570 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java @@ -26,7 +26,9 @@ package org.broadinstitute.sting.gatk.datasources.reads; import com.google.caliper.Param; +import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.qc.CountLoci; /** @@ -86,7 +88,7 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark { }, PER_SAMPLE { @Override - DownsamplingMethod create() { return DownsamplingMethod.getDefaultDownsamplingMethod(new CountLoci()); } + DownsamplingMethod create() { return WalkerManager.getDownsamplingMethod(LocusWalker.class); } }; abstract DownsamplingMethod create(); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/downsampling/DownsamplingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/downsampling/DownsamplingIntegrationTest.java new file mode 100644 index 000000000..85f9169da --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/DownsamplingIntegrationTest.java @@ -0,0 +1,44 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.downsampling; + +import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.Test; + +public class DownsamplingIntegrationTest extends WalkerTest { + + @Test + public void testDetectLowDcovValueWithLocusTraversal() { + final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T CountLoci -R " + publicTestDir + "exampleFASTA.fasta -I " + publicTestDir + "exampleBAM.bam -o %s " + + "-dcov " + (DownsamplingMethod.MINIMUM_SAFE_COVERAGE_TARGET_FOR_LOCUS_BASED_TRAVERSALS - 1), + 1, + UserException.class + ); + executeTest("testDetectLowDcovValueWithLocusTraversal", spec); + } +} From eb206e9f716f73a7fe6b6a69d8fb02fa4a08b05b Mon Sep 17 00:00:00 2001 From: David Roazen Date: Wed, 29 May 2013 14:43:57 -0400 Subject: [PATCH 054/116] Fix confusing log output from the engine -ReadShardBalancer was printing out an extra "Loading BAM index data for next contig" message at traversal end, which was confusing users and making the GATK look stupid. Suppress the extraneous message, and reword the log messages to be less confusing. -Improve log message output when initializing the shard iterator in GenomeAnalysisEngine. Don't mention BAMs when the are none, and say "Preparing for traversal" rather than mentioning the meaningless-for-users concept of "shard strategy" -These log messages are needed because the operations they surround might take a while under some circumstances, and the user should know that the GATK is actively doing something rather than being hung. --- .../broadinstitute/sting/gatk/GenomeAnalysisEngine.java | 6 ++++-- .../sting/gatk/datasources/reads/ReadShardBalancer.java | 8 +++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 3a8431dca..de7439b85 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -293,9 +293,11 @@ public class GenomeAnalysisEngine { // create the output streams initializeOutputStreams(microScheduler.getOutputTracker()); - logger.info("Creating shard strategy for " + readsDataSource.getReaderIDs().size() + " BAM files"); + // Initializing the shard iterator / BAM schedule might take some time, so let the user know vaguely what's going on + logger.info("Preparing for traversal" + + (readsDataSource.getReaderIDs().size() > 0 ? String.format(" over %d BAM files", readsDataSource.getReaderIDs().size()) : "")); Iterable shardStrategy = getShardStrategy(readsDataSource,microScheduler.getReference(),intervals); - logger.info("Done creating shard strategy"); + logger.info("Done preparing for traversal"); // execute the microscheduler, storing the results return microScheduler.execute(this.walker, shardStrategy); diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancer.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancer.java index 7772dbc1f..dc1b80efd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancer.java @@ -177,7 +177,9 @@ public class ReadShardBalancer extends ShardBalancer { currentContigFilePointer = null; List nextContigFilePointers = new ArrayList(); - logger.info("Loading BAM index data for next contig"); + if ( filePointers.hasNext() ) { + logger.info("Loading BAM index data"); + } while ( filePointers.hasNext() ) { @@ -215,8 +217,8 @@ public class ReadShardBalancer extends ShardBalancer { } if ( currentContigFilePointer != null ) { - logger.info("Done loading BAM index data for next contig"); - logger.debug(String.format("Next contig FilePointer: %s", currentContigFilePointer)); + logger.info("Done loading BAM index data"); + logger.debug(String.format("Next FilePointer: %s", currentContigFilePointer)); } } From a5a68c09fac06d2ebc3ee6b9b94b31bded7cbfdd Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 29 May 2013 14:42:15 -0400 Subject: [PATCH 055/116] Fix for the "Removed too many insertions, header is now negative" bug in ReduceReads. The problem ultimately was that ReadUtils.readStartsWithInsertion() ignores leading hard/softclips, but ReduceReads does not. So I refactored that method to include a boolean argument as to whether or not clips should be ignored. Also rebased so that return type is no longer a Pair. Added unit test to cover this situation. --- .../reducereads/HeaderElement.java | 2 +- .../reducereads/SlidingWindow.java | 2 +- .../reducereads/SlidingWindowUnitTest.java | 19 ++++++++++ .../sting/utils/sam/ReadUtils.java | 35 ++++++++++--------- 4 files changed, 40 insertions(+), 18 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java index 38b9e957b..ba2c2ae56 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java @@ -207,7 +207,7 @@ public class HeaderElement { public void removeInsertionToTheRight() { this.insertionsToTheRight--; if (insertionsToTheRight < 0) - throw new ReviewedStingException("Removed too many insertions, header is now negative!"); + throw new ReviewedStingException("Removed too many insertions, header is now negative at position " + location); } public boolean hasInsertionToTheRight() { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index 8843d6270..0425af3df 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -1199,7 +1199,7 @@ public class SlidingWindow { } // Special case for leading insertions before the beginning of the sliding read - if ( ReadUtils.readStartsWithInsertion(read).getFirst() && (readStart == headerStart || headerStart < 0) ) { + if ( (readStart == headerStart || headerStart < 0) && ReadUtils.readStartsWithInsertion(read.getCigar(), false) != null ) { // create a new first element to the window header with no bases added header.addFirst(new HeaderElement(readStart - 1)); // this allows the first element (I) to look at locationIndex - 1 when we update the header and do the right thing diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java index 56ad02084..c9bb2f084 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java @@ -89,6 +89,25 @@ public class SlidingWindowUnitTest extends BaseTest { return variantRegionBitset; } + ////////////////////////////////////////////////////////////////////////////////////// + //// Test for leading softclips immediately followed by an insertion in the CIGAR //// + ////////////////////////////////////////////////////////////////////////////////////// + + @Test(enabled = true) + public void testLeadingClipThenInsertion() { + + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 10); + read.setReadBases(Utils.dupBytes((byte) 'A', 10)); + read.setBaseQualities(Utils.dupBytes((byte)30, 10)); + read.setMappingQuality(30); + read.setCigarString("2S2I6M"); + + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 1); + slidingWindow.addRead(read); + Pair, CompressionStash> result = slidingWindow.close(null); + + } + ////////////////////////////////////////////////////////////////////////////////////// //// This section tests the findVariantRegions() method and related functionality //// ////////////////////////////////////////////////////////////////////////////////////// diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 0db3aa043..5b15fdd1b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -424,9 +424,9 @@ public class ReadUtils { // clipping the left tail and first base is insertion, go to the next read coordinate // with the same reference coordinate. Advance to the next cigar element, or to the // end of the read if there is no next element. - Pair firstElementIsInsertion = readStartsWithInsertion(cigar); - if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion.getFirst()) - readCoord = Math.min(firstElementIsInsertion.getSecond().getLength(), cigar.getReadLength() - 1); + final CigarElement firstElementIsInsertion = readStartsWithInsertion(cigar); + if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion != null) + readCoord = Math.min(firstElementIsInsertion.getLength(), cigar.getReadLength() - 1); return readCoord; } @@ -595,25 +595,28 @@ public class ReadUtils { } /** - * Checks if a read starts with an insertion. It looks beyond Hard and Soft clips - * if there are any. - * - * @param read - * @return A pair with the answer (true/false) and the element or null if it doesn't exist + * @see #readStartsWithInsertion(net.sf.samtools.Cigar, boolean) with ignoreClipOps set to true */ - public static Pair readStartsWithInsertion(GATKSAMRecord read) { - return readStartsWithInsertion(read.getCigar()); + public static CigarElement readStartsWithInsertion(final Cigar cigarForRead) { + return readStartsWithInsertion(cigarForRead, true); } - public static Pair readStartsWithInsertion(final Cigar cigar) { - for (CigarElement cigarElement : cigar.getCigarElements()) { - if (cigarElement.getOperator() == CigarOperator.INSERTION) - return new Pair(true, cigarElement); + /** + * Checks if a read starts with an insertion. + * + * @param cigarForRead the CIGAR to evaluate + * @param ignoreClipOps should we ignore S and H operators when evaluating whether an I operator is at the beginning? + * @return the element if it's a leading insertion or null otherwise + */ + public static CigarElement readStartsWithInsertion(final Cigar cigarForRead, final boolean ignoreClipOps) { + for ( final CigarElement cigarElement : cigarForRead.getCigarElements() ) { + if ( cigarElement.getOperator() == CigarOperator.INSERTION ) + return cigarElement; - else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP) + else if ( !ignoreClipOps || (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP) ) break; } - return new Pair(false, null); + return null; } /** From 61af37d0d25a553eab872d86ebeed38927398ff3 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 29 May 2013 16:17:43 -0400 Subject: [PATCH 056/116] Create a new normalDistributionLog10 function that is unit tested for use in the VQSR. --- .../GaussianMixtureModel.java | 5 +-- .../broadinstitute/sting/utils/MathUtils.java | 42 +++++++++++++++++-- .../activeregion/BandPassActivityProfile.java | 3 +- .../sting/utils/MathUtilsUnitTest.java | 17 ++++++++ .../activeregion/ActivityProfileUnitTest.java | 2 +- 5 files changed, 59 insertions(+), 10 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java index eef9da84a..92b0d4df2 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java @@ -47,7 +47,6 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import Jama.Matrix; -import cern.jet.random.Normal; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.MathUtils; @@ -243,12 +242,10 @@ public class GaussianMixtureModel { public Double evaluateDatumInOneDimension( final VariantDatum datum, final int iii ) { if(datum.isNull[iii]) { return null; } - final Normal normal = new Normal(0.0, 1.0, null); final double[] pVarInGaussianLog10 = new double[gaussians.size()]; int gaussianIndex = 0; for( final MultivariateGaussian gaussian : gaussians ) { - normal.setState( gaussian.mu[iii], gaussian.sigma.get(iii, iii) ); - pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + Math.log10( normal.pdf( datum.annotations[iii] ) ); + pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + MathUtils.normalDistributionLog10(gaussian.mu[iii], gaussian.sigma.get(iii, iii), datum.annotations[iii]); } return MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k)) } diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 38c131bc6..c8cf9d6a1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -63,6 +63,8 @@ public class MathUtils { */ public final static double LOG10_P_OF_ZERO = -1000000.0; public final static double FAIR_BINOMIAL_PROB_LOG10_0_5 = Math.log10(0.5); + private final static double NATURAL_LOG_OF_TEN = Math.log(10.0); + private final static double SQUARE_ROOT_OF_TWO_TIMES_PI = Math.sqrt(2.0 * Math.PI); static { log10Cache = new double[LOG10_CACHE_SIZE]; @@ -301,12 +303,46 @@ public class MathUtils { return 1; } - public static double NormalDistribution(final double mean, final double sd, final double x) { - double a = 1.0 / (sd * Math.sqrt(2.0 * Math.PI)); - double b = Math.exp(-1.0 * (Math.pow(x - mean, 2.0) / (2.0 * sd * sd))); + /** + * Calculate f(x) = Normal(x | mu = mean, sigma = sd) + * @param mean the desired mean of the Normal distribution + * @param sd the desired standard deviation of the Normal distribution + * @param x the value to evaluate + * @return a well-formed double + */ + public static double normalDistribution(final double mean, final double sd, final double x) { + final double a = 1.0 / (sd * SQUARE_ROOT_OF_TWO_TIMES_PI); + final double b = Math.exp(-1.0 * (square(x - mean) / (2.0 * square(sd)))); return a * b; } + /** + * Calculate f(x) = log10 ( Normal(x | mu = mean, sigma = sd) ) + * @param mean the desired mean of the Normal distribution + * @param sd the desired standard deviation of the Normal distribution + * @param x the value to evaluate + * @return a well-formed double + */ + + public static double normalDistributionLog10(final double mean, final double sd, final double x) { + if( sd < 0 ) + throw new IllegalArgumentException("sd: Standard deviation of normal must be >0"); + if ( ! wellFormedDouble(mean) || ! wellFormedDouble(sd) || ! wellFormedDouble(x) ) + throw new IllegalArgumentException("mean, sd, or, x : Normal parameters must be well formatted (non-INF, non-NAN)"); + final double a = -1.0 * Math.log10(sd * SQUARE_ROOT_OF_TWO_TIMES_PI); + final double b = -1.0 * (square(x - mean) / (2.0 * square(sd))) / NATURAL_LOG_OF_TEN; + return a + b; + } + + /** + * Calculate f(x) = x^2 + * @param x the value to square + * @return x * x + */ + public static double square(final double x) { + return x * x; + } + /** * Calculates the log10 of the binomial coefficient. Designed to prevent * overflows even with very large numbers. diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java index f2bc86dfc..f352bc332 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java @@ -31,7 +31,6 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.MathUtils; -import java.util.ArrayList; import java.util.Collection; import java.util.LinkedList; @@ -108,7 +107,7 @@ public class BandPassActivityProfile extends ActivityProfile { final int bandSize = 2 * filterSize + 1; final double[] kernel = new double[bandSize]; for( int iii = 0; iii < bandSize; iii++ ) { - kernel[iii] = MathUtils.NormalDistribution(filterSize, sigma, iii); + kernel[iii] = MathUtils.normalDistribution(filterSize, sigma, iii); } return MathUtils.normalizeFromRealSpace(kernel); } diff --git a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java index 27af8ec68..e4c74a0ad 100644 --- a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.utils; +import cern.jet.random.Normal; import org.broadinstitute.sting.BaseTest; import org.testng.Assert; import org.testng.annotations.BeforeClass; @@ -398,4 +399,20 @@ public class MathUtilsUnitTest extends BaseTest { Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0,-3.0,2.0}, new double[]{6.0,7.0,8.0}),10.0,1e-3); Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0}, new double[]{6.0}),1.0,1e-3); } + + @Test + public void testNormalDistribution() { + final double requiredPrecision = 1E-10; + + final Normal n = new Normal(0.0, 1.0, null); + for( final double mu : new double[]{-5.0, -3.2, -1.5, 0.0, 1.2, 3.0, 5.8977} ) { + for( final double sigma : new double[]{1.2, 3.0, 5.8977} ) { + for( final double x : new double[]{-5.0, -3.2, -1.5, 0.0, 1.2, 3.0, 5.8977} ) { + n.setState(mu, sigma); + Assert.assertEquals(n.pdf(x), MathUtils.normalDistribution(mu, sigma, x), requiredPrecision); + Assert.assertEquals(Math.log10(n.pdf(x)), MathUtils.normalDistributionLog10(mu, sigma, x), requiredPrecision); + } + } + } + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index 9be250b8e..f208815f7 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -450,7 +450,7 @@ public class ActivityProfileUnitTest extends BaseTest { private double[] makeGaussian(final int mean, final int range, final double sigma) { final double[] gauss = new double[range]; for( int iii = 0; iii < range; iii++ ) { - gauss[iii] = MathUtils.NormalDistribution(mean, sigma, iii) + ActivityProfile.ACTIVE_PROB_THRESHOLD; + gauss[iii] = MathUtils.normalDistribution(mean, sigma, iii) + ActivityProfile.ACTIVE_PROB_THRESHOLD; } return gauss; } From b16de45ce436fda30ef425f80b30e25a65c9f741 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 30 May 2013 16:53:23 -0400 Subject: [PATCH 057/116] Command-line read filters are now applied before Walker default filters -- This allows us to use -rf ReassignMappingQuality to reassign mapping qualities to 60 *before* the BQSR filters them out with MappingQualityUnassignedFilter. -- delivers #50222251 --- .../sting/gatk/GenomeAnalysisEngine.java | 9 +++- .../gatk/EngineFeaturesIntegrationTest.java | 48 +++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index de7439b85..6fa1b741c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -344,11 +344,18 @@ public class GenomeAnalysisEngine { * @return A collection of available filters. */ public Collection createFilters() { - final List filters = WalkerManager.getReadFilters(walker,this.getFilterManager()); + final List filters = new LinkedList<>(); + + // First add the user requested filters if (this.getArguments().readGroupBlackList != null && this.getArguments().readGroupBlackList.size() > 0) filters.add(new ReadGroupBlackListFilter(this.getArguments().readGroupBlackList)); for(final String filterName: this.getArguments().readFilters) filters.add(this.getFilterManager().createByName(filterName)); + + // now add the walker default filters. This ordering is critical important if + // users need to apply filters that fix up reads that would be removed by default walker filters + filters.addAll(WalkerManager.getReadFilters(walker,this.getFilterManager())); + return Collections.unmodifiableList(filters); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index 8d0874ea1..c60c6430c 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -26,12 +26,20 @@ package org.broadinstitute.sting.gatk; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ReadFilters; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.qc.ErrorThrowing; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.PrintStream; import java.util.Arrays; /** @@ -126,4 +134,44 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { executeTest(cfg.toString(), spec); } } + + // -------------------------------------------------------------------------------- + // + // Test that read filters are being applied in the order we expect + // + // -------------------------------------------------------------------------------- + + @ReadFilters({MappingQualityUnavailableFilter.class}) + public static class DummyReadWalkerWithMapqUnavailableFilter extends ReadWalker { + @Output + PrintStream out; + + @Override + public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) { + return 1; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + @Override + public void onTraversalDone(Integer result) { + out.println(result); + } + } + + @Test(enabled = true) + public void testUserReadFilterAppliedBeforeWalker() { + WalkerTestSpec spec = new WalkerTestSpec("-R " + b37KGReference + " -I " + privateTestDir + "allMAPQ255.bam" + + " -T DummyReadWalkerWithMapqUnavailableFilter -o %s -L MT -rf ReassignMappingQuality", + 1, Arrays.asList("ecf27a776cdfc771defab1c5d19de9ab")); + executeTest("testUserReadFilterAppliedBeforeWalker", spec); + } } \ No newline at end of file From 199476eae1431653a2d71f3552f7f89c6e7478af Mon Sep 17 00:00:00 2001 From: Chris Hartl Date: Thu, 30 May 2013 22:48:37 -0400 Subject: [PATCH 058/116] Three squashed commits: 1) Add in checks for input parameters in MathUtils method. I was careful to use the bottom-level methods whenever possible, so that parameters don't needlessly go through multiple checks (so for instance, the parameters n and k for a binomial aren't checked on log10binomial, but rather in the log10binomialcoefficient subroutine). This addresses JIRA GSA-767 Unit tests pass (we'll let bamboo deal with the integrations) 2) Address reviewer comments (change UserExceptions to IllegalArgumentExceptions). 3) .isWellFormedDouble() tests for infinity and not strictly positive infinity. Allow negative-infinity values for log10sumlog10 (as these just correspond to p=0). After these commits, unit and integration tests now pass, and GSA-767 is done. rebase and fix conflict: public/java/src/org/broadinstitute/sting/utils/MathUtils.java --- build.xml | 2 +- .../broadinstitute/sting/utils/MathUtils.java | 42 ++++++++++++++++--- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/build.xml b/build.xml index 2e9df4d5e..d9b37f4de 100644 --- a/build.xml +++ b/build.xml @@ -39,7 +39,7 @@ - + diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index c8cf9d6a1..49157a206 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.lang.IllegalArgumentException; import java.math.BigDecimal; import java.util.*; @@ -205,15 +206,16 @@ public class MathUtils { } /** - * Converts a real space array of probabilities into a log10 array + * Converts a real space array of numbers (typically probabilities) into a log10 array * * @param prRealSpace * @return */ public static double[] toLog10(final double[] prRealSpace) { double[] log10s = new double[prRealSpace.length]; - for (int i = 0; i < prRealSpace.length; i++) + for (int i = 0; i < prRealSpace.length; i++) { log10s[i] = Math.log10(prRealSpace[i]); + } return log10s; } @@ -229,6 +231,9 @@ public class MathUtils { return maxValue; for (int i = start; i < finish; i++) { + if ( Double.isNaN(log10p[i]) || log10p[i] == Double.POSITIVE_INFINITY ) { + throw new IllegalArgumentException("log10p: Values must be non-infinite and non-NAN"); + } sum += Math.pow(10.0, log10p[i] - maxValue); } @@ -311,8 +316,12 @@ public class MathUtils { * @return a well-formed double */ public static double normalDistribution(final double mean, final double sd, final double x) { - final double a = 1.0 / (sd * SQUARE_ROOT_OF_TWO_TIMES_PI); - final double b = Math.exp(-1.0 * (square(x - mean) / (2.0 * square(sd)))); + if( sd < 0 ) + throw new IllegalArgumentException("sd: Standard deviation of normal must be >0"); + if ( ! wellFormedDouble(mean) || ! wellFormedDouble(sd) || ! wellFormedDouble(x) ) + throw new IllegalArgumentException("mean, sd, or, x : Normal parameters must be well formatted (non-INF, non-NAN)"); + double a = 1.0 / (sd * Math.sqrt(2.0 * Math.PI)); + double b = Math.exp(-1.0 * (Math.pow(x - mean, 2.0) / (2.0 * sd * sd))); return a * b; } @@ -359,6 +368,13 @@ public class MathUtils { * @see #binomialCoefficient(int, int) with log10 applied to result */ public static double log10BinomialCoefficient(final int n, final int k) { + if ( n < 0 ) { + throw new IllegalArgumentException("n: Must have non-negative number of trials"); + } + if ( k > n || k < 0 ) { + throw new IllegalArgumentException("k: Must have non-negative number of successes, and no more successes than number of trials"); + } + return log10Factorial(n) - log10Factorial(k) - log10Factorial(n - k); } @@ -382,6 +398,8 @@ public class MathUtils { * @see #binomialProbability(int, int, double) with log10 applied to result */ public static double log10BinomialProbability(final int n, final int k, final double log10p) { + if ( log10p > 1e-18 ) + throw new IllegalArgumentException("log10p: Log-probability must be 0 or less"); double log10OneMinusP = Math.log10(1 - Math.pow(10, log10p)); return log10BinomialCoefficient(n, k) + log10p * k + log10OneMinusP * (n - k); } @@ -441,10 +459,20 @@ public class MathUtils { * @return */ public static double log10MultinomialCoefficient(final int n, final int[] k) { + if ( n < 0 ) + throw new IllegalArgumentException("n: Must have non-negative number of trials"); double denominator = 0.0; + int sum = 0; for (int x : k) { + if ( x < 0 ) + throw new IllegalArgumentException("x element of k: Must have non-negative observations of group"); + if ( x > n ) + throw new IllegalArgumentException("x element of k, n: Group observations must be bounded by k"); denominator += log10Factorial(x); + sum += x; } + if ( sum != n ) + throw new IllegalArgumentException("k and n: Sum of observations in multinomial must sum to total number of trials"); return log10Factorial(n) - denominator; } @@ -459,9 +487,11 @@ public class MathUtils { */ public static double log10MultinomialProbability(final int n, final int[] k, final double[] log10p) { if (log10p.length != k.length) - throw new UserException.BadArgumentValue("p and k", "Array of log10 probabilities must have the same size as the array of number of sucesses: " + log10p.length + ", " + k.length); + throw new IllegalArgumentException("p and k: Array of log10 probabilities must have the same size as the array of number of sucesses: " + log10p.length + ", " + k.length); double log10Prod = 0.0; for (int i = 0; i < log10p.length; i++) { + if ( log10p[i] > 1e-18 ) + throw new IllegalArgumentException("log10p: Log-probability must be <= 0"); log10Prod += log10p[i] * k[i]; } return log10MultinomialCoefficient(n, k) + log10Prod; @@ -504,7 +534,7 @@ public class MathUtils { */ public static double multinomialProbability(final int[] k, final double[] p) { if (p.length != k.length) - throw new UserException.BadArgumentValue("p and k", "Array of log10 probabilities must have the same size as the array of number of sucesses: " + p.length + ", " + k.length); + throw new IllegalArgumentException("p and k: Array of log10 probabilities must have the same size as the array of number of sucesses: " + p.length + ", " + k.length); int n = 0; double[] log10P = new double[p.length]; From b5b9d745a7b5b12927414e104b6259e25dd26508 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 22 May 2013 10:35:19 -0400 Subject: [PATCH 059/116] New implementation of the GGA mode in the HaplotypeCaller -- We now inject the given alleles into the reference haplotype and add them to the graph. -- Those paths are read off of the graph and then evaluated with the appropriate marginalization for GGA mode. -- This unifies how Smith-Waterman is performed between discovery and GGA modes. -- Misc minor cleanup in several places. --- .../haplotypecaller/DeBruijnAssembler.java | 24 ++- .../haplotypecaller/GenotypingEngine.java | 103 +++---------- .../haplotypecaller/HaplotypeCaller.java | 38 +++-- .../LikelihoodCalculationEngine.java | 7 +- .../haplotypecaller/LocalAssemblyEngine.java | 143 +++++------------- .../readthreading/ReadThreadingAssembler.java | 9 +- .../readthreading/ReadThreadingGraph.java | 4 +- .../indels/PairHMMIndelErrorModel.java | 6 +- .../DeBruijnAssemblerUnitTest.java | 6 +- ...lexAndSymbolicVariantsIntegrationTest.java | 4 +- .../HaplotypeCallerIntegrationTest.java | 2 +- .../LocalAssemblyEngineUnitTest.java | 7 + .../ReadThreadingAssemblerUnitTest.java | 2 +- .../broadinstitute/sting/utils/MathUtils.java | 12 +- .../genotyper/PerReadAlleleLikelihoodMap.java | 3 +- .../sting/utils/haplotype/Haplotype.java | 34 +---- 16 files changed, 135 insertions(+), 269 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java index 48972dfd5..3c0642f83 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java @@ -77,6 +77,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { private final static int NUM_PATHS_PER_GRAPH = 25; private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers private static final int GRAPH_KMER_STEP = 6; + private static final int GGA_MODE_ARTIFICIAL_COUNTS = 1000; private final int minKmer; private final int onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms; @@ -92,8 +93,8 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { } @Override - protected List assemble(final List reads, final Haplotype refHaplotype) { - final List graphs = new LinkedList(); + protected List assemble(final List reads, final Haplotype refHaplotype, final List activeAlleleHaplotypes ) { + final List graphs = new LinkedList<>(); final int maxKmer = ReadUtils.getMaxReadLength(reads) - KMER_OVERLAP - 1; if( maxKmer < minKmer) { @@ -106,7 +107,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { continue; if ( debug ) logger.info("Creating de Bruijn graph for " + kmer + " kmer using " + reads.size() + " reads"); - DeBruijnGraph graph = createGraphFromSequences( reads, kmer, refHaplotype); + DeBruijnGraph graph = createGraphFromSequences(reads, kmer, refHaplotype, activeAlleleHaplotypes); if( graph != null ) { // graphs that fail during creation ( for example, because there are cycles in the reference graph ) will show up here as a null graph object // do a series of steps to clean up the raw assembly graph to make it analysis-ready if ( debugGraphTransformations ) graph.printGraph(new File("unpruned.dot"), pruneFactor); @@ -133,7 +134,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { } @Requires({"reads != null", "kmerLength > 0", "refHaplotype != null"}) - protected DeBruijnGraph createGraphFromSequences( final List reads, final int kmerLength, final Haplotype refHaplotype ) { + protected DeBruijnGraph createGraphFromSequences( final List reads, final int kmerLength, final Haplotype refHaplotype, final List activeAlleleHaplotypes ) { final DeBruijnGraph graph = new DeBruijnGraph(kmerLength); final DeBruijnGraphBuilder builder = new DeBruijnGraphBuilder(graph); @@ -142,8 +143,8 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { // something went wrong, so abort right now with a null graph return null; - // now go through the graph already seeded with the reference sequence and add the read kmers to it - if ( ! addReadKmersToGraph(builder, reads) ) + // now go through the graph already seeded with the reference sequence and add the read kmers to it as well as the artificial GGA haplotypes + if ( ! addReadKmersToGraph(builder, reads, activeAlleleHaplotypes) ) // some problem was detected adding the reads to the graph, return null to indicate we failed return null; @@ -156,11 +157,20 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { * * @param builder a debruijn graph builder to add the read kmers to * @param reads a non-null list of reads whose kmers we want to add to the graph + * @param activeAlleleHaplotypes a list of haplotypes to add to the graph for GGA mode * @return true if we successfully added the read kmers to the graph without corrupting it in some way */ - protected boolean addReadKmersToGraph(final DeBruijnGraphBuilder builder, final List reads) { + protected boolean addReadKmersToGraph(final DeBruijnGraphBuilder builder, final List reads, final List activeAlleleHaplotypes) { final int kmerLength = builder.getKmerSize(); + // First pull kmers out of the artificial GGA haplotypes and throw them on the graph + for( final Haplotype haplotype : activeAlleleHaplotypes ) { + final int end = haplotype.length() - kmerLength; + for( int start = 0; start < end; start++ ) { + builder.addKmerPairFromSeqToGraph( haplotype.getBases(), start, GGA_MODE_ARTIFICIAL_COUNTS ); + } + } + // Next pull kmers out of every read and throw them on the graph for( final GATKSAMRecord read : reads ) { final byte[] sequence = read.getReadBases(); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 419ea378f..9bb456230 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -71,7 +71,7 @@ public class GenotypingEngine { private final boolean DEBUG; private final boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS; - private final static List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied + private final static List noCall = new ArrayList<>(); // used to noCall all genotypes until the exact model is applied private final VariantAnnotatorEngine annotationEngine; private final MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger; @@ -162,8 +162,8 @@ public class GenotypingEngine { final TreeSet startPosKeySet = decomposeHaplotypesIntoVariantContexts(haplotypes, haplotypeReadMap, ref, refLoc, activeAllelesToGenotype); // Walk along each position in the key set and create each event to be outputted - final Set calledHaplotypes = new HashSet(); - final List returnCalls = new ArrayList(); + final Set calledHaplotypes = new HashSet<>(); + final List returnCalls = new ArrayList<>(); for( final int loc : startPosKeySet ) { if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region final List eventsAtThisLoc = getVCsAtThisLocation(haplotypes, loc, activeAllelesToGenotype); @@ -183,7 +183,7 @@ public class GenotypingEngine { if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) { throw new ReviewedStingException("Record size mismatch! Something went wrong in the merging of alleles."); } - final Map mergeMap = new LinkedHashMap(); + final Map mergeMap = new LinkedHashMap<>(); mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele for(int iii = 0; iii < mergedVC.getAlternateAlleles().size(); iii++) { mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function @@ -244,7 +244,7 @@ public class GenotypingEngine { if ( in_GGA_mode ) startPosKeySet.clear(); - cleanUpSymbolicUnassembledEvents( haplotypes ); + //cleanUpSymbolicUnassembledEvents( haplotypes ); // We don't make symbolic alleles so this isn't needed currently if ( !in_GGA_mode ) { // run the event merger if we're not in GGA mode final boolean mergedAnything = crossHaplotypeEventMerger.merge(haplotypes, haplotypeReadMap, startPosKeySet, ref, refLoc); @@ -267,7 +267,7 @@ public class GenotypingEngine { * @return the list of the sources of vcs in the same order */ private List makePriorityList(final List vcs) { - final List priorityList = new LinkedList(); + final List priorityList = new LinkedList<>(); for ( final VariantContext vc : vcs ) priorityList.add(vc.getSource()); return priorityList; } @@ -276,7 +276,7 @@ public class GenotypingEngine { final int loc, final List activeAllelesToGenotype) { // the overlapping events to merge into a common reference view - final List eventsAtThisLoc = new ArrayList(); + final List eventsAtThisLoc = new ArrayList<>(); if( activeAllelesToGenotype.isEmpty() ) { for( final Haplotype h : haplotypes ) { @@ -292,7 +292,7 @@ public class GenotypingEngine { if( compVC.getStart() == loc ) { int alleleCount = 0; for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { - List alleleSet = new ArrayList(2); + List alleleSet = new ArrayList<>(2); alleleSet.add(compVC.getReference()); alleleSet.add(compAltAllele); final String vcSourceName = "Comp" + compCount + "Allele" + alleleCount; @@ -348,7 +348,7 @@ public class GenotypingEngine { final Map> perSampleFilteredReadList, final VariantContext call ) { - final Map returnMap = new LinkedHashMap(); + final Map returnMap = new LinkedHashMap<>(); final GenomeLoc callLoc = parser.createGenomeLoc(call); for( final Map.Entry sample : perSampleReadMap.entrySet() ) { final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap(); @@ -384,7 +384,7 @@ public class GenotypingEngine { // TODO - split into input haplotypes and output haplotypes as not to share I/O arguments @Requires("haplotypes != null") protected static void cleanUpSymbolicUnassembledEvents( final List haplotypes ) { - final List haplotypesToRemove = new ArrayList(); + final List haplotypesToRemove = new ArrayList<>(); for( final Haplotype h : haplotypes ) { for( final VariantContext vc : h.getEventMap().getVariantContexts() ) { if( vc.isSymbolic() ) { @@ -407,7 +407,7 @@ public class GenotypingEngine { final Map> alleleMapper, final double downsamplingFraction ) { - final Map alleleReadMap = new LinkedHashMap(); + final Map alleleReadMap = new LinkedHashMap<>(); for( final Map.Entry haplotypeReadMapEntry : haplotypeReadMap.entrySet() ) { // for each sample final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap(); for( final Map.Entry> alleleMapperEntry : alleleMapper.entrySet() ) { // for each output allele @@ -430,7 +430,7 @@ public class GenotypingEngine { } protected static Map> createAlleleMapper( final Map mergeMap, final Map> eventMap ) { - final Map> alleleMapper = new LinkedHashMap>(); + final Map> alleleMapper = new LinkedHashMap<>(); for( final Map.Entry entry : mergeMap.entrySet() ) { alleleMapper.put(entry.getValue(), eventMap.get(new Event(entry.getKey()))); } @@ -441,100 +441,33 @@ public class GenotypingEngine { @Ensures({"result.size() == eventsAtThisLoc.size() + 1"}) protected static Map> createEventMapper( final int loc, final List eventsAtThisLoc, final List haplotypes ) { - final Map> eventMapper = new LinkedHashMap>(eventsAtThisLoc.size()+1); - VariantContext refVC = eventsAtThisLoc.get(0); // the genome loc is the only safe thing to pull out of this VC because ref/alt pairs might change reference basis - eventMapper.put(new Event(null), new ArrayList()); + final Map> eventMapper = new LinkedHashMap<>(eventsAtThisLoc.size()+1); + final Event refEvent = new Event(null); + eventMapper.put(refEvent, new ArrayList()); for( final VariantContext vc : eventsAtThisLoc ) { eventMapper.put(new Event(vc), new ArrayList()); } - final List undeterminedHaplotypes = new ArrayList(haplotypes.size()); for( final Haplotype h : haplotypes ) { - if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() ) { - final List alleles = new ArrayList(2); - alleles.add(h.getArtificialRefAllele()); - alleles.add(h.getArtificialAltAllele()); - final Event artificialVC = new Event( (new VariantContextBuilder()).source("artificialHaplotype") - .alleles(alleles) - .loc(refVC.getChr(), refVC.getStart(), refVC.getStart() + h.getArtificialRefAllele().length() - 1).make() ); - if( eventMapper.containsKey(artificialVC) ) { - eventMapper.get(artificialVC).add(h); - } - } else if( h.getEventMap().get(loc) == null ) { // no event at this location so let's investigate later - undeterminedHaplotypes.add(h); + if( h.getEventMap().get(loc) == null ) { + eventMapper.get(refEvent).add(h); } else { - boolean haplotypeIsDetermined = false; for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) { if( h.getEventMap().get(loc).hasSameAllelesAs(vcAtThisLoc) ) { eventMapper.get(new Event(vcAtThisLoc)).add(h); - haplotypeIsDetermined = true; break; } } - - if( !haplotypeIsDetermined ) - undeterminedHaplotypes.add(h); } } - for( final Haplotype h : undeterminedHaplotypes ) { - Event matchingEvent = new Event(null); - for( final Map.Entry> eventToTest : eventMapper.entrySet() ) { - // don't test against the reference allele - if( eventToTest.getKey().equals(new Event(null)) ) - continue; - - // only try to disambiguate for alleles that have had haplotypes previously assigned above - if( eventToTest.getValue().isEmpty() ) - continue; - - final Haplotype artificialHaplotype = eventToTest.getValue().get(0); - if( isSubSetOf(artificialHaplotype.getEventMap(), h.getEventMap(), true) ) { - matchingEvent = eventToTest.getKey(); - break; - } - } - - eventMapper.get(matchingEvent).add(h); - } - return eventMapper; } - protected static boolean isSubSetOf(final Map subset, final Map superset, final boolean resolveSupersetToSubset) { - - for ( final Map.Entry fromSubset : subset.entrySet() ) { - final VariantContext fromSuperset = superset.get(fromSubset.getKey()); - if ( fromSuperset == null ) - return false; - - List supersetAlleles = fromSuperset.getAlternateAlleles(); - if ( resolveSupersetToSubset ) - supersetAlleles = resolveAlternateAlleles(fromSubset.getValue().getReference(), fromSuperset.getReference(), supersetAlleles); - - if ( !supersetAlleles.contains(fromSubset.getValue().getAlternateAllele(0)) ) - return false; - } - - return true; - } - - private static List resolveAlternateAlleles(final Allele targetReference, final Allele actualReference, final List currentAlleles) { - if ( targetReference.length() <= actualReference.length() ) - return currentAlleles; - - final List newAlleles = new ArrayList(currentAlleles.size()); - final byte[] extraBases = Arrays.copyOfRange(targetReference.getBases(), actualReference.length(), targetReference.length()); - for ( final Allele a : currentAlleles ) { - newAlleles.add(Allele.extend(a, extraBases)); - } - return newAlleles; - } - @Ensures({"result.size() == haplotypeAllelesForSample.size()"}) protected static List findEventAllelesInSample( final List eventAlleles, final List haplotypeAlleles, final List haplotypeAllelesForSample, final List> alleleMapper, final List haplotypes ) { if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return noCall; } - final List eventAllelesForSample = new ArrayList(); + final List eventAllelesForSample = new ArrayList<>(); for( final Allele a : haplotypeAllelesForSample ) { final Haplotype haplotype = haplotypes.get(haplotypeAlleles.indexOf(a)); for( int iii = 0; iii < alleleMapper.size(); iii++ ) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 2ebfbcee9..e0a755c7b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -47,6 +47,9 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; @@ -433,8 +436,6 @@ public class HaplotypeCaller extends ActiveRegionWalker, In private final static int MIN_READ_LENGTH = 10; private List samplesList = new ArrayList(); - private final static double LOG_ONE_HALF = -Math.log10(2.0); - private final static double LOG_ONE_THIRD = -Math.log10(3.0); private final List allelesToGenotype = new ArrayList(); private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file @@ -603,7 +604,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // if we don't have any data, just abort early return new ActivityProfileState(ref.getLocus(), 0.0); - final List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied + final List noCall = new ArrayList<>(); // used to noCall all genotypes until the exact model is applied noCall.add(Allele.NO_CALL); final Map splitContexts = AlignmentContextUtils.splitContextBySampleName(context); @@ -625,14 +626,14 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } } genotypeLikelihoods[AA] += p.getRepresentativeCount() * QualityUtils.qualToProbLog10(qual); - genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF ); - genotypeLikelihoods[BB] += p.getRepresentativeCount() * QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD; + genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + MathUtils.LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + MathUtils.LOG_ONE_THIRD + MathUtils.LOG_ONE_HALF ); + genotypeLikelihoods[BB] += p.getRepresentativeCount() * QualityUtils.qualToErrorProbLog10(qual) + MathUtils.LOG_ONE_THIRD; } } genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() ); } - final List alleles = new ArrayList(); + final List alleles = new ArrayList<>(); alleles.add( FAKE_REF_ALLELE ); alleles.add( FAKE_ALT_ALLELE ); final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL); @@ -746,9 +747,9 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // Create the reference haplotype which is the bases from the reference that make up the active region finalizeActiveRegion(activeRegion); // merge overlapping fragments, clip adapter and low qual tails - final Haplotype referenceHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader), true); final byte[] fullReferenceWithPadding = activeRegion.getActiveRegionReference(referenceReader, REFERENCE_PADDING); final GenomeLoc paddedReferenceLoc = getPaddedLoc(activeRegion); + final Haplotype referenceHaplotype = createReferenceHaplotype(activeRegion, paddedReferenceLoc); final List haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, paddedReferenceLoc, activeAllelesToGenotype ); @@ -760,6 +761,21 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } } + /** + * Helper function to create the reference haplotype out of the active region and a padded loc + * @param activeRegion the active region from which to generate the reference haplotype + * @param paddedReferenceLoc the GenomeLoc which includes padding and shows how big the reference haplotype should be + * @return a non-null haplotype + */ + private Haplotype createReferenceHaplotype(final ActiveRegion activeRegion, final GenomeLoc paddedReferenceLoc) { + final Haplotype refHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader), true); + refHaplotype.setAlignmentStartHapwrtRef(activeRegion.getExtendedLoc().getStart() - paddedReferenceLoc.getStart()); + final Cigar c = new Cigar(); + c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M)); + refHaplotype.setCigar(c); + return refHaplotype; + } + /** * Trim down the active region to just enough to properly genotype the events among the haplotypes * @@ -791,7 +807,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } // trim down the haplotypes - final Set haplotypeSet = new HashSet(haplotypes.size()); + final Set haplotypeSet = new HashSet<>(haplotypes.size()); for ( final Haplotype h : haplotypes ) { final Haplotype trimmed = h.trim(trimmedActiveRegion.getExtendedLoc()); if ( trimmed != null ) { @@ -802,7 +818,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } // create the final list of trimmed haplotypes - final List trimmedHaplotypes = new ArrayList(haplotypeSet); + final List trimmedHaplotypes = new ArrayList<>(haplotypeSet); // sort haplotypes to take full advantage of haplotype start offset optimizations in PairHMM Collections.sort( trimmedHaplotypes, new HaplotypeBaseComparator() ); @@ -816,7 +832,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // trim down the reads and add them to the trimmed active region - final List trimmedReads = new ArrayList(originalActiveRegion.getReads().size()); + final List trimmedReads = new ArrayList<>(originalActiveRegion.getReads().size()); for( final GATKSAMRecord read : originalActiveRegion.getReads() ) { final GATKSAMRecord clippedRead = ReadClipper.hardClipToRegion( read, trimmedActiveRegion.getExtendedLoc().getStart(), trimmedActiveRegion.getExtendedLoc().getStop() ); if( trimmedActiveRegion.readOverlapsRegion(clippedRead) && clippedRead.getReadLength() > 0 ) { @@ -937,7 +953,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } private Map> splitReadsBySample( final Collection reads ) { - final Map> returnMap = new HashMap>(); + final Map> returnMap = new HashMap<>(); for( final String sample : samplesList) { List readList = returnMap.get( sample ); if( readList == null ) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index ca1877142..4a1a5993a 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -74,7 +74,6 @@ import java.util.*; public class LikelihoodCalculationEngine { private final static Logger logger = Logger.getLogger(LikelihoodCalculationEngine.class); - private static final double LOG_ONE_HALF = -Math.log10(2.0); private final byte constantGCP; private final double log10globalReadMismappingRate; private final boolean DEBUG; @@ -299,7 +298,7 @@ public class LikelihoodCalculationEngine { // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2) // First term is approximated by Jacobian log with table lookup. haplotypeLikelihood += ReadUtils.getMeanRepresentativeReadCount( entry.getKey() ) * - ( MathUtils.approximateLog10SumLog10(entry.getValue().get(iii_allele), entry.getValue().get(jjj_allele)) + LOG_ONE_HALF ); + ( MathUtils.approximateLog10SumLog10(entry.getValue().get(iii_allele), entry.getValue().get(jjj_allele)) + MathUtils.LOG_ONE_HALF ); } } haplotypeLikelihoodMatrix[iii][jjj] = haplotypeLikelihood; @@ -397,11 +396,11 @@ public class LikelihoodCalculationEngine { if ( haplotypes.size() == 2 ) return haplotypes; // fast path -- we'll always want to use 2 haplotypes // all of the haplotypes that at least one sample called as one of the most likely - final Set selectedHaplotypes = new HashSet(); + final Set selectedHaplotypes = new HashSet<>(); selectedHaplotypes.add(findReferenceHaplotype(haplotypes)); // ref is always one of the selected // our annoying map from allele -> haplotype - final Map allele2Haplotype = new HashMap(); + final Map allele2Haplotype = new HashMap<>(); for ( final Haplotype h : haplotypes ) { h.setScore(h.isReference() ? Double.MAX_VALUE : 0.0); // set all of the scores to 0 (lowest value) for all non-ref haplotypes allele2Haplotype.put(Allele.create(h, h.isReference()), h); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java index 20b005b40..3a377409c 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java @@ -111,7 +111,11 @@ public abstract class LocalAssemblyEngine { * @param refHaplotype the reference haplotype * @return a non-null list of reads */ - protected abstract List assemble(List reads, Haplotype refHaplotype); + protected abstract List assemble(List reads, Haplotype refHaplotype, List activeAlleleHaplotypes); + + protected List assemble(List reads, Haplotype refHaplotype) { + return assemble(reads, refHaplotype, Collections.emptyList()); + } /** * Main entry point into the assembly engine. Build a set of deBruijn graphs out of the provided reference sequence and list of reads @@ -128,8 +132,11 @@ public abstract class LocalAssemblyEngine { if( fullReferenceWithPadding.length != refLoc.size() ) { throw new IllegalArgumentException("Reference bases and reference loc must be the same size."); } if( pruneFactor < 0 ) { throw new IllegalArgumentException("Pruning factor cannot be negative"); } + // create the list of artificial haplotypes that should be added to the graph for GGA mode + final List activeAlleleHaplotypes = createActiveAlleleHaplotypes(refHaplotype, activeAllelesToGenotype, activeRegion.getExtendedLoc()); + // create the graphs by calling our subclass assemble method - final List graphs = assemble(activeRegion.getReads(), refHaplotype); + final List graphs = assemble(activeRegion.getReads(), refHaplotype, activeAlleleHaplotypes); // do some QC on the graphs for ( final SeqGraph graph : graphs ) { sanityCheckGraph(graph, refHaplotype); } @@ -138,45 +145,53 @@ public abstract class LocalAssemblyEngine { if ( graphWriter != null ) { printGraphs(graphs); } // find the best paths in the graphs and return them as haplotypes - return findBestPaths( graphs, refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() ); + return findBestPaths( graphs, refHaplotype, refLoc, activeRegion.getExtendedLoc() ); } - @Requires({"refWithPadding.length > refHaplotype.getBases().length", "refLoc.containsP(activeRegionWindow)"}) - @Ensures({"result.contains(refHaplotype)"}) - protected List findBestPaths(final List graphs, final Haplotype refHaplotype, final byte[] refWithPadding, final GenomeLoc refLoc, final List activeAllelesToGenotype, final GenomeLoc activeRegionWindow) { - // add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes - final Set returnHaplotypes = new LinkedHashSet(); - refHaplotype.setAlignmentStartHapwrtRef(activeRegionWindow.getStart() - refLoc.getStart()); - final Cigar c = new Cigar(); - c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M)); - refHaplotype.setCigar(c); - returnHaplotypes.add( refHaplotype ); - + /** + * Create the list of artificial GGA-mode haplotypes by injecting each of the provided alternate alleles into the reference haplotype + * @param refHaplotype the reference haplotype + * @param activeAllelesToGenotype the list of alternate alleles in VariantContexts + * @param activeRegionWindow the window containing the reference haplotype + * @return a non-null list of haplotypes + */ + private List createActiveAlleleHaplotypes(final Haplotype refHaplotype, final List activeAllelesToGenotype, final GenomeLoc activeRegionWindow) { + final Set returnHaplotypes = new LinkedHashSet<>(); final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef(); - final int activeRegionStop = refHaplotype.getAlignmentStartHapwrtRef() + refHaplotype.getCigar().getReferenceLength(); - // for GGA mode, add the desired allele into the haplotype for( final VariantContext compVC : activeAllelesToGenotype ) { for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()); - addHaplotypeForGGA( insertedRefHaplotype, refWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, true ); + if( insertedRefHaplotype != null ) { // can be null if the requested allele can't be inserted into the haplotype + returnHaplotypes.add(insertedRefHaplotype); + } } } + return new ArrayList<>(returnHaplotypes); + } + + @Ensures({"result.contains(refHaplotype)"}) + protected List findBestPaths(final List graphs, final Haplotype refHaplotype, final GenomeLoc refLoc, final GenomeLoc activeRegionWindow) { + // add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes + final Set returnHaplotypes = new LinkedHashSet<>(); + returnHaplotypes.add( refHaplotype ); + + final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef(); + for( final SeqGraph graph : graphs ) { final SeqVertex source = graph.getReferenceSourceVertex(); final SeqVertex sink = graph.getReferenceSinkVertex(); if ( source == null || sink == null ) throw new IllegalArgumentException("Both source and sink cannot be null but got " + source + " and sink " + sink + " for graph "+ graph); - final KBestPaths pathFinder = new KBestPaths(allowCyclesInKmerGraphToGeneratePaths); + final KBestPaths pathFinder = new KBestPaths<>(allowCyclesInKmerGraphToGeneratePaths); for ( final Path path : pathFinder.getKBestPaths(graph, numBestHaplotypesPerGraph, source, sink) ) { -// logger.info("Found path " + path); Haplotype h = new Haplotype( path.getBases() ); if( !returnHaplotypes.contains(h) ) { final Cigar cigar = path.calculateCigar(refHaplotype.getBases()); if ( cigar == null ) { - // couldn't produce a meaningful alignment of haplotype to reference, fail quitely + // couldn't produce a meaningful alignment of haplotype to reference, fail quietly continue; } else if( cigar.isEmpty() ) { throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + @@ -197,25 +212,6 @@ public abstract class LocalAssemblyEngine { if ( debug ) logger.info("Adding haplotype " + h.getCigar() + " from debruijn graph with kmer " + graph.getKmerSize()); - - // for GGA mode, add the desired allele into the haplotype if it isn't already present - if( !activeAllelesToGenotype.isEmpty() ) { - final Map eventMap = GenotypingEngine.generateVCsFromAlignment( h, refWithPadding, refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place - for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present - final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart()); - - // This if statement used to additionally have: - // "|| !vcOnHaplotype.hasSameAllelesAs(compVC)" - // but that can lead to problems downstream when e.g. you are injecting a 1bp deletion onto - // a haplotype that already contains a 1bp insertion (so practically it is reference but - // falls into the bin for the 1bp deletion because we keep track of the artificial alleles). - if( vcOnHaplotype == null ) { - for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { - addHaplotypeForGGA( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()), refWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false ); - } - } - } - } } } } @@ -238,7 +234,7 @@ public abstract class LocalAssemblyEngine { } } - return new ArrayList(returnHaplotypes); + return new ArrayList<>(returnHaplotypes); } /** @@ -256,71 +252,6 @@ public abstract class LocalAssemblyEngine { return false; } - /** - * Take a haplotype which was generated by injecting an allele into a string of bases and run SW against the reference to determine the variants on the haplotype. - * Unfortunately since this haplotype didn't come from the assembly graph you can't straightforwardly use the bubble traversal algorithm to get this information. - * This is a target for future work as we rewrite the HaplotypeCaller to be more bubble-caller based. - * @param haplotype the candidate haplotype - * @param ref the reference bases to align against - * @param haplotypeList the current list of haplotypes - * @param activeRegionStart the start of the active region in the reference byte array - * @param activeRegionStop the stop of the active region in the reference byte array - * @param FORCE_INCLUSION_FOR_GGA_MODE if true will include in the list even if it already exists - * @return true if the candidate haplotype was successfully incorporated into the haplotype list - */ - @Requires({"ref != null", "ref.length >= activeRegionStop - activeRegionStart"}) - private boolean addHaplotypeForGGA( final Haplotype haplotype, final byte[] ref, final Set haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) { - if( haplotype == null ) { return false; } - - final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SWParameterSet.STANDARD_NGS ); - haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() ); - - if( swConsensus.getCigar().toString().contains("S") || swConsensus.getCigar().getReferenceLength() < 60 || swConsensus.getAlignmentStart2wrt1() < 0 ) { // protect against unhelpful haplotype alignments - return false; - } - - haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0, true) ); - - final int hapStart = ReadUtils.getReadCoordinateForReferenceCoordinate(haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStart, ReadUtils.ClippingTail.LEFT_TAIL, true); - int hapStop = ReadUtils.getReadCoordinateForReferenceCoordinate( haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStop, ReadUtils.ClippingTail.RIGHT_TAIL, true ); - if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED && activeRegionStop == haplotype.getAlignmentStartHapwrtRef() + haplotype.getCigar().getReferenceLength() ) { - hapStop = activeRegionStop; // contract for getReadCoordinateForReferenceCoordinate function says that if read ends at boundary then it is outside of the clipping goal - } - byte[] newHaplotypeBases; - // extend partial haplotypes to contain the full active region sequence - if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED && hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { - newHaplotypeBases = ArrayUtils.addAll(ArrayUtils.addAll(ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()), - haplotype.getBases()), - ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop)); - } else if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { - newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()), ArrayUtils.subarray(haplotype.getBases(), 0, hapStop) ); - } else if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { - newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(haplotype.getBases(), hapStart, haplotype.getBases().length), ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop) ); - } else { - newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), hapStart, hapStop); - } - - final Haplotype h = new Haplotype( newHaplotypeBases ); - final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( ref, h.getBases(), SWParameterSet.STANDARD_NGS ); - - h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() ); - if ( haplotype.isArtificialHaplotype() ) { - h.setArtificialEvent(haplotype.getArtificialEvent()); - } - if( swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() != activeRegionStop - activeRegionStart || swConsensus2.getAlignmentStart2wrt1() < 0 ) { // protect against unhelpful haplotype alignments - return false; - } - - h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0, true) ); - - if( FORCE_INCLUSION_FOR_GGA_MODE || !haplotypeList.contains(h) ) { - haplotypeList.add(h); - return true; - } else { - return false; - } - } - protected SeqGraph cleanupSeqGraph(final SeqGraph seqGraph) { if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.1.dot"), pruneFactor); @@ -372,7 +303,6 @@ public abstract class LocalAssemblyEngine { * Perform general QC on the graph to make sure something hasn't gone wrong during assembly * @param graph the graph to check * @param refHaplotype the reference haplotype - * @param */ private void sanityCheckGraph(final BaseGraph graph, final Haplotype refHaplotype) { sanityCheckReferenceGraph(graph, refHaplotype); @@ -383,7 +313,6 @@ public abstract class LocalAssemblyEngine { * * @param graph the graph to check * @param refHaplotype the reference haplotype - * @param */ private void sanityCheckReferenceGraph(final BaseGraph graph, final Haplotype refHaplotype) { if( graph.getReferenceSourceVertex() == null ) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java index db0ce0880..3d4d38d8e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java @@ -62,6 +62,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { private final static Logger logger = Logger.getLogger(ReadThreadingAssembler.class); private final static int DEFAULT_NUM_PATHS_PER_GRAPH = 128; + private final static int GGA_MODE_ARTIFICIAL_COUNTS = 1000; /** The min and max kmer sizes to try when building the graph. */ private final List kmerSizes; @@ -88,7 +89,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { } @Override - public List assemble( final List reads, final Haplotype refHaplotype) { + public List assemble( final List reads, final Haplotype refHaplotype, final List activeAlleleHaplotypes ) { final List graphs = new LinkedList<>(); for ( final int kmerSize : kmerSizes ) { @@ -96,6 +97,12 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { // add the reference sequence to the graph rtgraph.addSequence("ref", refHaplotype.getBases(), null, true); + int hapCount = 0; + for( final Haplotype h : activeAlleleHaplotypes ) { + final int[] counts = new int[h.length()]; + Arrays.fill(counts, GGA_MODE_ARTIFICIAL_COUNTS); + rtgraph.addSequence("activeAllele" + hapCount++, h.getBases(), counts, false); + } // Next pull kmers out of every read and throw them on the graph for( final GATKSAMRecord read : reads ) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java index 6e9223afb..8e879377f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java @@ -590,11 +590,9 @@ public class ReadThreadingGraph extends BaseGraph(), 10, new Haplotype(refCycle.getBytes(), true)); - final DeBruijnGraph g2 = new DeBruijnAssembler().createGraphFromSequences(new ArrayList(), 10, new Haplotype(noCycle.getBytes(), true)); + final DeBruijnGraph g1 = new DeBruijnAssembler().createGraphFromSequences(new ArrayList(), 10, new Haplotype(refCycle.getBytes(), true), Collections.emptyList()); + final DeBruijnGraph g2 = new DeBruijnAssembler().createGraphFromSequences(new ArrayList(), 10, new Haplotype(noCycle.getBytes(), true), Collections.emptyList()); Assert.assertTrue(g1 == null, "Reference cycle graph should return null during creation."); Assert.assertTrue(g2 != null, "Reference non-cycle graph should not return null during creation."); @@ -147,7 +147,7 @@ public class DeBruijnAssemblerUnitTest extends BaseTest { } } - assembler.addReadKmersToGraph(builder, Arrays.asList(read)); + assembler.addReadKmersToGraph(builder, Arrays.asList(read), Collections.emptyList()); Assert.assertEquals(builder.addedPairs.size(), expectedStarts.size()); for ( final Kmer addedKmer : builder.addedPairs ) { Assert.assertTrue(expectedBases.contains(new String(addedKmer.bases())), "Couldn't find kmer " + addedKmer + " among all expected kmers " + expectedBases); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 9ef9fea77..3f3b295f8 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "008029ee34e1becd8312e3c4d608033c"); + "38b4596c3910fdde51ea59aa1a8f848f"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "ae8d95ffe77515cc74a55c2afd142826"); + "08147870d73d9749ced8cfc7cdd4714f"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 91e80b45c..5fc0f4f52 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "bb30d0761dc9e2dfd57bfe07b72d06d8"); + "ffd69c410dca0d2f9fe75f3cb5d08179"); } @Test diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java index a517e1cb1..74361de1b 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java @@ -47,6 +47,9 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; @@ -216,6 +219,10 @@ public class LocalAssemblyEngineUnitTest extends BaseTest { private List assemble(final Assembler assembler, final byte[] refBases, final GenomeLoc loc, final List reads) { final Haplotype refHaplotype = new Haplotype(refBases, true); + final Cigar c = new Cigar(); + c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M)); + refHaplotype.setCigar(c); + final ActiveRegion activeRegion = new ActiveRegion(loc, null, true, genomeLocParser, 0); activeRegion.addAll(reads); final LocalAssemblyEngine engine = createAssembler(assembler); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java index 8efb3d486..3f10fc72c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java @@ -85,7 +85,7 @@ public class ReadThreadingAssemblerUnitTest extends BaseTest { public SeqGraph assemble() { assembler.removePathsNotConnectedToRef = false; // need to pass some of the tests assembler.setDebugGraphTransformations(true); - final SeqGraph graph = assembler.assemble(reads, refHaplotype).get(0); + final SeqGraph graph = assembler.assemble(reads, refHaplotype, Collections.emptyList()).get(0); if ( DEBUG ) graph.printGraph(new File("test.dot"), 0); return graph; } diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 49157a206..b158d1509 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -55,17 +55,19 @@ public class MathUtils { private static final double JACOBIAN_LOG_TABLE_INV_STEP = 1.0 / JACOBIAN_LOG_TABLE_STEP; private static final double MAX_JACOBIAN_TOLERANCE = 8.0; private static final int JACOBIAN_LOG_TABLE_SIZE = (int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1; - private static final int MAXN = 70000; + private static final int MAXN = 70_000; private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients /** * The smallest log10 value we'll emit from normalizeFromLog10 and other functions * where the real-space value is 0.0. */ - public final static double LOG10_P_OF_ZERO = -1000000.0; - public final static double FAIR_BINOMIAL_PROB_LOG10_0_5 = Math.log10(0.5); - private final static double NATURAL_LOG_OF_TEN = Math.log(10.0); - private final static double SQUARE_ROOT_OF_TWO_TIMES_PI = Math.sqrt(2.0 * Math.PI); + public static final double LOG10_P_OF_ZERO = -1000000.0; + public static final double FAIR_BINOMIAL_PROB_LOG10_0_5 = Math.log10(0.5); + public static final double LOG_ONE_HALF = -Math.log10(2.0); + public static final double LOG_ONE_THIRD = -Math.log10(3.0); + private static final double NATURAL_LOG_OF_TEN = Math.log(10.0); + private static final double SQUARE_ROOT_OF_TWO_TIMES_PI = Math.sqrt(2.0 * Math.PI); static { log10Cache = new double[LOG10_CACHE_SIZE]; diff --git a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java index f253fc9c9..b309ef633 100644 --- a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java @@ -221,7 +221,7 @@ public class PerReadAlleleLikelihoodMap { final int count = ReadUtils.getMeanRepresentativeReadCount(read); final double likelihood_iii = entry.getValue().get(iii_allele); final double likelihood_jjj = entry.getValue().get(jjj_allele); - haplotypeLikelihood += count * (MathUtils.approximateLog10SumLog10(likelihood_iii, likelihood_jjj) + LOG_ONE_HALF); + haplotypeLikelihood += count * (MathUtils.approximateLog10SumLog10(likelihood_iii, likelihood_jjj) + MathUtils.LOG_ONE_HALF); // fast exit. If this diploid pair is already worse than the max, just stop and look at the next pair if ( haplotypeLikelihood < maxElement ) break; @@ -241,7 +241,6 @@ public class PerReadAlleleLikelihoodMap { return new MostLikelyAllele(alleles.get(hap1), alleles.get(hap2), maxElement, maxElement); } - private static final double LOG_ONE_HALF = -Math.log10(2.0); /** * Given a map from alleles to likelihoods, find the allele with the largest likelihood. diff --git a/public/java/src/org/broadinstitute/sting/utils/haplotype/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/haplotype/Haplotype.java index bacee7942..1f932b222 100644 --- a/public/java/src/org/broadinstitute/sting/utils/haplotype/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/haplotype/Haplotype.java @@ -46,7 +46,6 @@ public class Haplotype extends Allele { private EventMap eventMap = null; private Cigar cigar; private int alignmentStartHapwrtRef; - private Event artificialEvent = null; private double score = 0; /** @@ -93,11 +92,6 @@ public class Haplotype extends Allele { super(allele, true); } - protected Haplotype( final byte[] bases, final Event artificialEvent ) { - this(bases, false); - this.artificialEvent = artificialEvent; - } - public Haplotype( final byte[] bases, final GenomeLoc loc ) { this(bases, false); this.genomeLocation = loc; @@ -189,7 +183,7 @@ public class Haplotype extends Allele { } /** - * Get the cigar for this haplotype. Note that cigar is guarenteed to be consolidated + * Get the cigar for this haplotype. Note that the cigar is guaranteed to be consolidated * in that multiple adjacent equal operates will have been merged * @return the cigar of this haplotype */ @@ -223,30 +217,6 @@ public class Haplotype extends Allele { throw new IllegalArgumentException("Read length " + length() + " not equal to the read length of the cigar " + cigar.getReadLength()); } - public boolean isArtificialHaplotype() { - return artificialEvent != null; - } - - public Event getArtificialEvent() { - return artificialEvent; - } - - public Allele getArtificialRefAllele() { - return artificialEvent.ref; - } - - public Allele getArtificialAltAllele() { - return artificialEvent.alt; - } - - public int getArtificialAllelePosition() { - return artificialEvent.pos; - } - - public void setArtificialEvent( final Event artificialEvent ) { - this.artificialEvent = artificialEvent; - } - @Requires({"refInsertLocation >= 0"}) public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation, final int genomicInsertLocation ) { // refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates @@ -260,7 +230,7 @@ public class Haplotype extends Allele { newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(myBases, 0, haplotypeInsertLocation)); // bases before the variant newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(myBases, haplotypeInsertLocation + refAllele.length(), myBases.length)); // bases after the variant - return new Haplotype(newHaplotypeBases, new Event(refAllele, altAllele, genomicInsertLocation)); + return new Haplotype(newHaplotypeBases); } public static LinkedHashMap makeHaplotypeListFromAlleles(final List alleleList, From ed4f19d79b3d8039ae50c2122c88a76a6b9e5796 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Fri, 31 May 2013 11:28:29 -0400 Subject: [PATCH 060/116] Restore scala compilation by default in build.xml -This was accidentally clobbered in a recent commit. -If you want to compile Java-only, easiest thing to do is run "ant gatk" rather than modifying build.xml --- build.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.xml b/build.xml index d9b37f4de..2e9df4d5e 100644 --- a/build.xml +++ b/build.xml @@ -39,7 +39,7 @@ - + From 64b4d8072923612b38662aad984a836dc8093fcb Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 31 May 2013 13:16:00 -0400 Subject: [PATCH 061/116] Make BQSR calculateIsIndel robust to indel CIGARs are start/end of read -- The previous implementation attempted to be robust to this, but not all cases were handled properly. Added a helper function updateInde() that bounds up the update to be in the range of the indel array, and cleaned up logic of how the method works. The previous behavior was inconsistent across read fwd/rev stand, so that the indel cigars at the end of read were put at the start of reads if the reads were in the forward strand but not if they were in the reverse strand. Everything is now consistent, as can be seen in the symmetry of the unit tests: tests.add(new Object[]{"1D3M", false, EventType.BASE_DELETION, new int[]{0,0,0}}); tests.add(new Object[]{"1M1D2M", false, EventType.BASE_DELETION, new int[]{1,0,0}}); tests.add(new Object[]{"2M1D1M", false, EventType.BASE_DELETION, new int[]{0,1,0}}); tests.add(new Object[]{"3M1D", false, EventType.BASE_DELETION, new int[]{0,0,1}}); tests.add(new Object[]{"1D3M", true, EventType.BASE_DELETION, new int[]{1,0,0}}); tests.add(new Object[]{"1M1D2M", true, EventType.BASE_DELETION, new int[]{0,1,0}}); tests.add(new Object[]{"2M1D1M", true, EventType.BASE_DELETION, new int[]{0,0,1}}); tests.add(new Object[]{"3M1D", true, EventType.BASE_DELETION, new int[]{0,0,0}}); tests.add(new Object[]{"4M1I", false, EventType.BASE_INSERTION, new int[]{0,0,0,1,0}}); tests.add(new Object[]{"3M1I1M", false, EventType.BASE_INSERTION, new int[]{0,0,1,0,0}}); tests.add(new Object[]{"2M1I2M", false, EventType.BASE_INSERTION, new int[]{0,1,0,0,0}}); tests.add(new Object[]{"1M1I3M", false, EventType.BASE_INSERTION, new int[]{1,0,0,0,0}}); tests.add(new Object[]{"1I4M", false, EventType.BASE_INSERTION, new int[]{0,0,0,0,0}}); tests.add(new Object[]{"4M1I", true, EventType.BASE_INSERTION, new int[]{0,0,0,0,0}}); tests.add(new Object[]{"3M1I1M", true, EventType.BASE_INSERTION, new int[]{0,0,0,0,1}}); tests.add(new Object[]{"2M1I2M", true, EventType.BASE_INSERTION, new int[]{0,0,0,1,0}}); tests.add(new Object[]{"1M1I3M", true, EventType.BASE_INSERTION, new int[]{0,0,1,0,0}}); tests.add(new Object[]{"1I4M", true, EventType.BASE_INSERTION, new int[]{0,1,0,0,0}}); -- delivers #50445353 --- .../gatk/walkers/bqsr/BaseRecalibrator.java | 22 ++++++++++--------- .../walkers/bqsr/BQSRIntegrationTest.java | 2 +- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index 278317da3..c60eceaa4 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -366,9 +366,7 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche } protected static int[] calculateIsIndel( final GATKSAMRecord read, final EventType mode ) { - final byte[] readBases = read.getReadBases(); - final int[] indel = new int[readBases.length]; - Arrays.fill(indel, 0); + final int[] indel = new int[read.getReadBases().length]; int readPos = 0; for ( final CigarElement ce : read.getCigar().getCigarElements() ) { final int elementLength = ce.getLength(); @@ -383,21 +381,19 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche } case D: { - final int index = ( read.getReadNegativeStrandFlag() ? readPos : ( readPos > 0 ? readPos - 1 : readPos ) ); - indel[index] = ( mode.equals(EventType.BASE_DELETION) ? 1 : 0 ); + final int index = ( read.getReadNegativeStrandFlag() ? readPos : readPos - 1 ); + updateIndel(indel, index, mode, EventType.BASE_DELETION); break; } case I: { final boolean forwardStrandRead = !read.getReadNegativeStrandFlag(); if( forwardStrandRead ) { - indel[(readPos > 0 ? readPos - 1 : readPos)] = ( mode.equals(EventType.BASE_INSERTION) ? 1 : 0 ); - } - for (int iii = 0; iii < elementLength; iii++) { - readPos++; + updateIndel(indel, readPos - 1, mode, EventType.BASE_INSERTION); } + readPos += elementLength; if( !forwardStrandRead ) { - indel[(readPos < indel.length ? readPos : readPos - 1)] = ( mode.equals(EventType.BASE_INSERTION) ? 1 : 0 ); + updateIndel(indel, readPos, mode, EventType.BASE_INSERTION); } break; } @@ -412,6 +408,12 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche return indel; } + private static void updateIndel(final int[] indel, final int index, final EventType mode, final EventType requiredMode) { + if ( mode == requiredMode && index >= 0 && index < indel.length ) + // protect ourselves from events at the start or end of the read (1D3M or 3M1D) + indel[index] = 1; + } + protected static double[] calculateFractionalErrorArray( final int[] errorArray, final byte[] baqArray ) { if(errorArray.length != baqArray.length ) { throw new ReviewedStingException("Array length mismatch detected. Malformed read?"); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index 907046704..71c29fe0b 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -111,7 +111,7 @@ public class BQSRIntegrationTest extends WalkerTest { {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "85a120b7d86b61597b86b9e93decbdfc")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "5248dc49aec0323c74b496bb4928c73c")}, {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "cb52f267e0010f849f50b0bf1de474a1")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "1425a5063ee757dbfc013df24e65a67a")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "fb372d0a8fc41b01ced1adab31546850")}, {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "c1c3cda8caceed619d3d439c3990cd26")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "c9953f020a65c1603a6d71aeeb1b95f3")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "5bfff0c699345cca12a9b33acf95588f")}, From 4b206a3540485a4a747df59ca127ef6d4305d4bd Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 31 May 2013 13:54:33 -0400 Subject: [PATCH 062/116] Check that -compress arguments are within range 0-9 -- Although the original bug report was about SplitSamFile it actually was an engine wide error. The two places in the that provide compression to the BAM write now check the validity of the compress argument via a static method in ReadUtils -- delivers #49531009 --- .../SAMFileWriterArgumentTypeDescriptor.java | 7 ++++--- .../sting/utils/sam/ReadUtils.java | 8 ++++++++ .../gatk/EngineFeaturesIntegrationTest.java | 16 ++++++++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index 458846db0..3b89787ad 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -30,6 +30,7 @@ import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.sam.ReadUtils; import java.io.OutputStream; import java.lang.annotation.Annotation; @@ -132,9 +133,9 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor if (writerFileName != null && writerFileName.asFile() != null ) { stub = new SAMFileWriterStub(engine, writerFileName.asFile()); - if ( compressionLevel != null ) - stub.setCompressionLevel(compressionLevel); - if ( indexOnTheFly ) + if ( compressionLevel != null ) { + stub.setCompressionLevel(ReadUtils.validateCompressionLevel(compressionLevel)); + } if ( indexOnTheFly ) stub.setIndexOnTheFly(indexOnTheFly); if ( generateMD5 ) stub.setGenerateMD5(generateMD5); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 5b15fdd1b..cf1c9cb8e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; import java.util.*; @@ -152,11 +153,18 @@ public class ReadUtils { * @return a SAMFileWriter with the compression level if it is a bam. */ public static SAMFileWriter createSAMFileWriterWithCompression(SAMFileHeader header, boolean presorted, String file, int compression) { + validateCompressionLevel(compression); if (file.endsWith(".bam")) return new SAMFileWriterFactory().makeBAMWriter(header, presorted, new File(file), compression); return new SAMFileWriterFactory().makeSAMOrBAMWriter(header, presorted, new File(file)); } + public static int validateCompressionLevel(final int requestedCompressionLevel) { + if ( requestedCompressionLevel < 0 || requestedCompressionLevel > 9 ) + throw new UserException.BadArgumentValue("compress", "Compression level must be 0-9 but got " + requestedCompressionLevel); + return requestedCompressionLevel; + } + /** * is this base inside the adaptor of the read? * diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index c60c6430c..6cfa90d90 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -174,4 +174,20 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { 1, Arrays.asList("ecf27a776cdfc771defab1c5d19de9ab")); executeTest("testUserReadFilterAppliedBeforeWalker", spec); } + + @Test + public void testNegativeCompress() { + testBadCompressArgument(-1); + } + + @Test + public void testTooBigCompress() { + testBadCompressArgument(100); + } + + private void testBadCompressArgument(final int compress) { + WalkerTestSpec spec = new WalkerTestSpec("-T PrintReads -R " + b37KGReference + " -I private/testdata/NA12878.1_10mb_2_10mb.bam -o %s -compress " + compress, + 1, UserException.class); + executeTest("badCompress " + compress, spec); + } } \ No newline at end of file From 6555361742e64829183c9cb056795f0fc0b43443 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 31 May 2013 15:21:12 -0400 Subject: [PATCH 063/116] Fix error in merging code in HC -- Ultimately this was caused by an underlying bug in the reverting of soft clipped bases in the read clipper. The read clipper would fail to properly set the alignment start for reads that were 100% clipped before reverting, such as 10H2S5H => 10H2M5H. This has been fixed and unit tested. -- Update 1 ReduceReads MD5, which was due to cases where we were clipping away all of the MATCH part of the read, leaving a cigar like 50H11S and the revert soft clips was failing to properly revert the bases. -- delivers #50655421 --- .../ReduceReadsIntegrationTest.java | 2 +- .../sting/utils/clipping/ClippingOp.java | 47 +++++++++++-------- .../utils/clipping/ReadClipperUnitTest.java | 9 +++- 3 files changed, 37 insertions(+), 21 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index 405e616f1..4fbbe1d0c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -260,7 +260,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { public void testDivideByZero() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s "; // we expect to lose coverage due to the downsampling so don't run the systematic tests - executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("c459a6153a17c2cbf8441e1918fda9c8"))); + executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("4f0ef477c0417d1eb602b323474ef377"))); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java index f51881e0b..2c2cbd98f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Iterator; +import java.util.List; import java.util.Stack; import java.util.Vector; @@ -559,26 +560,34 @@ public class ClippingOp { return new CigarShift(cleanCigar, shiftFromStart, shiftFromEnd); } + /** + * Compute the offset of the first "real" position in the cigar on the genome + * + * This is defined as a first position after a run of Hs followed by a run of Ss + * + * @param cigar A non-null cigar + * @return the offset (from 0) of the first on-genome base + */ + private int calcHardSoftOffset(final Cigar cigar) { + final List elements = cigar.getCigarElements(); + + int size = 0; + int i = 0; + while ( i < elements.size() && elements.get(i).getOperator() == CigarOperator.HARD_CLIP ) { + size += elements.get(i).getLength(); + i++; + } + while ( i < elements.size() && elements.get(i).getOperator() == CigarOperator.SOFT_CLIP ) { + size += elements.get(i).getLength(); + i++; + } + + return size; + } + private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) { - int newShift = 0; - int oldShift = 0; - - boolean readHasStarted = false; // if the new cigar is composed of S and H only, we have to traverse the entire old cigar to calculate the shift - for (CigarElement cigarElement : newCigar.getCigarElements()) { - if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) - newShift += cigarElement.getLength(); - else { - readHasStarted = true; - break; - } - } - - for (CigarElement cigarElement : oldCigar.getCigarElements()) { - if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) - oldShift += cigarElement.getLength(); - else if (readHasStarted) - break; - } + final int newShift = calcHardSoftOffset(newCigar); + final int oldShift = calcHardSoftOffset(oldCigar); return newShift - oldShift; } diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java index 6ec4336b0..0b4153535 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java @@ -48,7 +48,7 @@ import java.util.List; public class ReadClipperUnitTest extends BaseTest { List cigarList; - int maximumCigarSize = 6; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2 + int maximumCigarSize = 10; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2 @BeforeClass public void init() { @@ -391,4 +391,11 @@ public class ReadClipperUnitTest extends BaseTest { } } + @Test(enabled = true) + public void testRevertEntirelySoftclippedReads() { + GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar("2H1S3H"); + GATKSAMRecord clippedRead = ReadClipper.revertSoftClippedBases(read); + Assert.assertEquals(clippedRead.getAlignmentStart(), read.getSoftStart()); + } + } \ No newline at end of file From ab40f4af43a28dacc4b3a87d007c4b0b08b4cc83 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 3 Jun 2013 11:01:34 -0400 Subject: [PATCH 064/116] Break out the GGA kmers and the read kmers into separate functions for the DeBruijn assembler. -- Added unit test for new function. --- .../haplotypecaller/DeBruijnAssembler.java | 30 ++++++++++--- .../haplotypecaller/graphs/SeqGraph.java | 6 +-- .../readthreading/ReadThreadingAssembler.java | 2 + .../DeBruijnAssemblerUnitTest.java | 45 ++++++++++++++++++- .../broadinstitute/sting/utils/MathUtils.java | 8 +--- 5 files changed, 74 insertions(+), 17 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java index 3c0642f83..d876a403b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java @@ -143,8 +143,13 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { // something went wrong, so abort right now with a null graph return null; - // now go through the graph already seeded with the reference sequence and add the read kmers to it as well as the artificial GGA haplotypes - if ( ! addReadKmersToGraph(builder, reads, activeAlleleHaplotypes) ) + // add the artificial GGA haplotypes to the graph + if ( ! addGGAKmersToGraph(builder, activeAlleleHaplotypes) ) + // something went wrong, so abort right now with a null graph + return null; + + // now go through the graph already seeded with the reference sequence and add the read kmers to it + if ( ! addReadKmersToGraph(builder, reads) ) // some problem was detected adding the reads to the graph, return null to indicate we failed return null; @@ -153,17 +158,16 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { } /** - * Add the high-quality kmers from the reads to the graph + * Add the high-quality kmers from the artificial GGA haplotypes to the graph * * @param builder a debruijn graph builder to add the read kmers to - * @param reads a non-null list of reads whose kmers we want to add to the graph * @param activeAlleleHaplotypes a list of haplotypes to add to the graph for GGA mode * @return true if we successfully added the read kmers to the graph without corrupting it in some way */ - protected boolean addReadKmersToGraph(final DeBruijnGraphBuilder builder, final List reads, final List activeAlleleHaplotypes) { + protected boolean addGGAKmersToGraph(final DeBruijnGraphBuilder builder, final List activeAlleleHaplotypes) { + final int kmerLength = builder.getKmerSize(); - // First pull kmers out of the artificial GGA haplotypes and throw them on the graph for( final Haplotype haplotype : activeAlleleHaplotypes ) { final int end = haplotype.length() - kmerLength; for( int start = 0; start < end; start++ ) { @@ -171,6 +175,20 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { } } + // always returns true now, but it's possible that we'd add kmers and decide we don't like the graph in some way + return true; + } + + /** + * Add the high-quality kmers from the reads to the graph + * + * @param builder a debruijn graph builder to add the read kmers to + * @param reads a non-null list of reads whose kmers we want to add to the graph + * @return true if we successfully added the read kmers to the graph without corrupting it in some way + */ + protected boolean addReadKmersToGraph(final DeBruijnGraphBuilder builder, final List reads) { + final int kmerLength = builder.getKmerSize(); + // Next pull kmers out of every read and throw them on the graph for( final GATKSAMRecord read : reads ) { final byte[] sequence = read.getReadBases(); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java index 20edcb39b..06c127a84 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java @@ -352,7 +352,7 @@ public final class SeqGraph extends BaseGraph { * Merge until the graph has no vertices that are candidates for merging */ public boolean transformUntilComplete() { - boolean didAtLeastOneTranform = false; + boolean didAtLeastOneTransform = false; boolean foundNodesToMerge = true; while( foundNodesToMerge ) { foundNodesToMerge = false; @@ -360,13 +360,13 @@ public final class SeqGraph extends BaseGraph { for( final SeqVertex v : vertexSet() ) { foundNodesToMerge = tryToTransform(v); if ( foundNodesToMerge ) { - didAtLeastOneTranform = true; + didAtLeastOneTransform = true; break; } } } - return didAtLeastOneTranform; + return didAtLeastOneTransform; } /** diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java index 3d4d38d8e..bd24891bc 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java @@ -97,6 +97,8 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { // add the reference sequence to the graph rtgraph.addSequence("ref", refHaplotype.getBases(), null, true); + + // add the artificial GGA haplotypes to the graph int hapCount = 0; for( final Haplotype h : activeAlleleHaplotypes ) { final int[] counts = new int[h.length()]; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java index 2ca78f306..95592241d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java @@ -147,7 +147,50 @@ public class DeBruijnAssemblerUnitTest extends BaseTest { } } - assembler.addReadKmersToGraph(builder, Arrays.asList(read), Collections.emptyList()); + assembler.addReadKmersToGraph(builder, Arrays.asList(read)); + Assert.assertEquals(builder.addedPairs.size(), expectedStarts.size()); + for ( final Kmer addedKmer : builder.addedPairs ) { + Assert.assertTrue(expectedBases.contains(new String(addedKmer.bases())), "Couldn't find kmer " + addedKmer + " among all expected kmers " + expectedBases); + } + } + + @DataProvider(name = "AddGGAKmersToGraph") + public Object[][] makeAddGGAKmersToGraphData() { + List tests = new ArrayList(); + + // this functionality can be adapted to provide input data for whatever you might want in your data + final String bases = "ACGTAACCGGTTAAACCCGGGTTT"; + final int readLen = bases.length(); + final List allBadStarts = new ArrayList(readLen); + for ( int i = 0; i < readLen; i++ ) allBadStarts.add(i); + + for ( final int kmerSize : Arrays.asList(3, 4, 5) ) { + tests.add(new Object[]{bases, kmerSize}); + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "AddGGAKmersToGraph", enabled = ! DEBUG) + public void testAddGGAKmersToGraph(final String bases, final int kmerSize) { + final int readLen = bases.length(); + final DeBruijnAssembler assembler = new DeBruijnAssembler(); + final MockBuilder builder = new MockBuilder(kmerSize); + + final Set expectedBases = new HashSet(); + final Set expectedStarts = new LinkedHashSet(); + for ( int i = 0; i < readLen; i++) { + boolean good = true; + for ( int j = 0; j < kmerSize + 1; j++ ) { // +1 is for pairing + good &= i + j < readLen; + } + if ( good ) { + expectedStarts.add(i); + expectedBases.add(bases.substring(i, i + kmerSize + 1)); + } + } + + assembler.addGGAKmersToGraph(builder, Arrays.asList(new Haplotype(bases.getBytes()))); Assert.assertEquals(builder.addedPairs.size(), expectedStarts.size()); for ( final Kmer addedKmer : builder.addedPairs ) { Assert.assertTrue(expectedBases.contains(new String(addedKmer.bases())), "Couldn't find kmer " + addedKmer + " among all expected kmers " + expectedBases); diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index b158d1509..dfd3537da 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -244,9 +244,6 @@ public class MathUtils { public static double sumLog10(final double[] log10values) { return Math.pow(10.0, log10sumLog10(log10values)); - // double s = 0.0; - // for ( double v : log10values) s += Math.pow(10.0, v); - // return s; } public static double log10sumLog10(final double[] log10values) { @@ -859,11 +856,8 @@ public class MathUtils { break; sum += x; i++; - //System.out.printf(" %d/%d", sum, i); } - //System.out.printf("Sum = %d, n = %d, maxI = %d, avg = %f%n", sum, i, maxI, (1.0 * sum) / i); - return (1.0 * sum) / i; } @@ -1359,7 +1353,7 @@ public class MathUtils { } /** - * Compute in a numerical correct way the quanity log10(1-x) + * Compute in a numerical correct way the quantity log10(1-x) * * Uses the approximation log10(1-x) = log10(1/x - 1) + log10(x) to avoid very quick underflow * in 1-x when x is very small From c9f5b53efa8307add66b4f1fc1d689a0818db443 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 3 Jun 2013 14:36:54 -0400 Subject: [PATCH 065/116] Bugfix for HC can fail to assemble the correct reference sequence in some cases -- Ultimately this was caused by overly aggressive merging of CommonSuffixMerger. In the case where you have this graph: ACT [ref source] -> C G -> ACT -> C we would merge into G -> ACT -> C which would linearlize into GACTC Causing us to add bases to the reference source node that couldn't be recovered. The solution was to ensure that CommonSuffixMerger only operates when all nodes to be merged aren't source nodes themselves. -- Added a convenient argument to the haplotype caller (captureAssemblyFailureBAM) that will write out the exact reads to a BAM file that went into a failed assembly run (going to a file called AssemblyFailure.BAM). This can be used to rerun the haplotype caller to produce the exact error, which can be hard in regions of deep coverage where the downsampler state determines the exact reads going into assembly and therefore makes running with a sub-interval not reproduce the error -- Did some misc. cleanup of code while debugging -- [delivers #50917729] --- .../haplotypecaller/HaplotypeCaller.java | 30 ++++++--- .../haplotypecaller/LocalAssemblyEngine.java | 38 +++++++----- .../haplotypecaller/graphs/BaseGraph.java | 61 +++++++++++++++++++ .../haplotypecaller/graphs/SeqGraph.java | 17 ++++-- .../graphs/SharedSequenceMerger.java | 8 ++- .../readthreading/ReadThreadingAssembler.java | 6 +- .../graphs/CommonSuffixMergerUnitTest.java | 16 +++++ 7 files changed, 147 insertions(+), 29 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index e0a755c7b..73367f8c3 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -50,6 +50,7 @@ import com.google.java.contract.Ensures; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMFileWriter; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; @@ -387,6 +388,10 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @Argument(fullName="dontUseSoftClippedBases", shortName="dontUseSoftClippedBases", doc="If specified, we will not analyze soft clipped bases in the reads", required = false) protected boolean dontUseSoftClippedBases = false; + @Hidden + @Argument(fullName="captureAssemblyFailureBAM", shortName="captureAssemblyFailureBAM", doc="If specified, we will write a BAM called assemblyFailure.bam capturing all of the reads that were in the active region when the assembler failed for any reason", required = false) + protected boolean captureAssemblyFailureBAM = false; + @Hidden @Argument(fullName="allowCyclesInKmerGraphToGeneratePaths", shortName="allowCyclesInKmerGraphToGeneratePaths", doc="If specified, we will allow cycles in the kmer graphs to generate paths with multiple copies of the path sequenece rather than just the shortest paths", required = false) protected boolean allowCyclesInKmerGraphToGeneratePaths = false; @@ -751,13 +756,24 @@ public class HaplotypeCaller extends ActiveRegionWalker, In final GenomeLoc paddedReferenceLoc = getPaddedLoc(activeRegion); final Haplotype referenceHaplotype = createReferenceHaplotype(activeRegion, paddedReferenceLoc); - final List haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, paddedReferenceLoc, activeAllelesToGenotype ); - - if ( ! dontTrimActiveRegions ) { - return trimActiveRegion(activeRegion, haplotypes, activeAllelesToGenotype, fullReferenceWithPadding, paddedReferenceLoc); - } else { - // we don't want to trim active regions, so go ahead and use the old one - return new AssemblyResult(haplotypes, activeRegion, fullReferenceWithPadding, paddedReferenceLoc, true); + try { + final List haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, paddedReferenceLoc, activeAllelesToGenotype ); + if ( ! dontTrimActiveRegions ) { + return trimActiveRegion(activeRegion, haplotypes, activeAllelesToGenotype, fullReferenceWithPadding, paddedReferenceLoc); + } else { + // we don't want to trim active regions, so go ahead and use the old one + return new AssemblyResult(haplotypes, activeRegion, fullReferenceWithPadding, paddedReferenceLoc, true); + } + } catch ( Exception e ) { + // Capture any exception that might be thrown, and write out the assembly failure BAM if requested + if ( captureAssemblyFailureBAM ) { + final SAMFileWriter writer = ReadUtils.createSAMFileWriterWithCompression(getToolkit().getSAMFileHeader(), true, "assemblyFailure.bam", 5); + for ( final GATKSAMRecord read : activeRegion.getReads() ) { + writer.addAlignment(read); + } + writer.close(); + } + throw e; } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java index 3a377409c..1a5f34bc3 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java @@ -78,6 +78,10 @@ import java.util.*; public abstract class LocalAssemblyEngine { private final static Logger logger = Logger.getLogger(LocalAssemblyEngine.class); + /** + * If false, we will only write out a region around the reference source + */ + private final static boolean PRINT_FULL_GRAPH_FOR_DEBUGGING = true; public static final byte DEFAULT_MIN_BASE_QUALITY_TO_USE = (byte) 8; private static final int MIN_HAPLOTYPE_REFERENCE_LENGTH = 30; @@ -252,20 +256,26 @@ public abstract class LocalAssemblyEngine { return false; } - protected SeqGraph cleanupSeqGraph(final SeqGraph seqGraph) { - if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.1.dot"), pruneFactor); + /** + * Print graph to file if debugGraphTransformations is enabled + * @param graph the graph to print + * @param file the destination file + */ + protected void printDebugGraphTransform(final BaseGraph graph, final File file) { + if ( debugGraphTransformations ) { + if ( PRINT_FULL_GRAPH_FOR_DEBUGGING ) + graph.printGraph(file, pruneFactor); + else + graph.subsetToRefSource().printGraph(file, pruneFactor); + } + } + + protected SeqGraph cleanupSeqGraph(final SeqGraph seqGraph) { + printDebugGraphTransform(seqGraph, new File("sequenceGraph.1.dot")); - // TODO -- we need to come up with a consistent pruning algorithm. The current pruning algorithm - // TODO -- works well but it doesn't differentiate between an isolated chain that doesn't connect - // TODO -- to anything from one that's actually has good support along the chain but just happens - // TODO -- to have a connection in the middle that has weight of < pruneFactor. Ultimately - // TODO -- the pruning algorithm really should be an error correction algorithm that knows more - // TODO -- about the structure of the data and can differentiate between an infrequent path but - // TODO -- without evidence against it (such as occurs when a region is hard to get any reads through) - // TODO -- from a error with lots of weight going along another similar path // the very first thing we need to do is zip up the graph, or pruneGraph will be too aggressive seqGraph.zipLinearChains(); - if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.2.zipped.dot"), pruneFactor); + printDebugGraphTransform(seqGraph, new File("sequenceGraph.2.zipped.dot")); // now go through and prune the graph, removing vertices no longer connected to the reference chain // IMPORTANT: pruning must occur before we call simplifyGraph, as simplifyGraph adds 0 weight @@ -273,9 +283,9 @@ public abstract class LocalAssemblyEngine { seqGraph.pruneGraph(pruneFactor); seqGraph.removeVerticesNotConnectedToRefRegardlessOfEdgeDirection(); - if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.3.pruned.dot"), pruneFactor); + printDebugGraphTransform(seqGraph, new File("sequenceGraph.3.pruned.dot")); seqGraph.simplifyGraph(); - if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.4.merged.dot"), pruneFactor); + printDebugGraphTransform(seqGraph, new File("sequenceGraph.4.merged.dot")); // The graph has degenerated in some way, so the reference source and/or sink cannot be id'd. Can // happen in cases where for example the reference somehow manages to acquire a cycle, or @@ -294,7 +304,7 @@ public abstract class LocalAssemblyEngine { seqGraph.addVertex(dummy); seqGraph.addEdge(complete, dummy, new BaseEdge(true, 0)); } - if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.5.final.dot"), pruneFactor); + printDebugGraphTransform(seqGraph, new File("sequenceGraph.5.final.dot")); return seqGraph; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java index 8938af7c2..c963fb6e5 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java @@ -388,6 +388,17 @@ public class BaseGraph extends Default return s; } + /** + * Get the set of vertices connected to v by incoming or outgoing edges + * @param v a non-null vertex + * @return a set of vertices {X} connected X -> v or v -> Y + */ + public Set neighboringVerticesOf(final V v) { + final Set s = incomingVerticesOf(v); + s.addAll(outgoingVerticesOf(v)); + return s; + } + /** * Print out the graph in the dot language for visualization * @param destination File to write to @@ -664,4 +675,54 @@ public class BaseGraph extends Default "kmerSize=" + kmerSize + '}'; } + + /** + * Get the set of vertices within distance edges of source, regardless of edge direction + * + * @param source the source vertex to consider + * @param distance the distance + * @return a set of vertices within distance of source + */ + protected Set verticesWithinDistance(final V source, final int distance) { + if ( distance == 0 ) + return Collections.singleton(source); + + final Set found = new HashSet<>(); + found.add(source); + for ( final V v : neighboringVerticesOf(source) ) { + found.addAll(verticesWithinDistance(v, distance - 1)); + } + + return found; + } + + /** + * Get a graph containing only the vertices within distance edges of target + * @param target a vertex in graph + * @param distance the max distance + * @return a non-null graph + */ + public BaseGraph subsetToNeighbors(final V target, final int distance) { + if ( target == null ) throw new IllegalArgumentException("Target cannot be null"); + if ( ! containsVertex(target) ) throw new IllegalArgumentException("Graph doesn't contain vertex " + target); + if ( distance < 0 ) throw new IllegalArgumentException("Distance must be >= 0 but got " + distance); + + + final Set toKeep = verticesWithinDistance(target, distance); + final Set toRemove = new HashSet<>(vertexSet()); + toRemove.removeAll(toKeep); + + final BaseGraph result = (BaseGraph)clone(); + result.removeAllVertices(toRemove); + + return result; + } + + /** + * Get a subgraph of graph that contains only vertices within 10 edges of the ref source vertex + * @return a non-null subgraph of this graph + */ + public BaseGraph subsetToRefSource() { + return subsetToNeighbors(getReferenceSourceVertex(), 10); + } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java index 06c127a84..36c515073 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java @@ -155,20 +155,29 @@ public final class SeqGraph extends BaseGraph { //logger.info("simplifyGraph iteration " + i); // iterate until we haven't don't anything useful boolean didSomeWork = false; - if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + iteration + ".1.dot"), 0); + printGraphSimplification(new File("simplifyGraph." + iteration + ".1.dot")); didSomeWork |= new MergeDiamonds().transformUntilComplete(); didSomeWork |= new MergeTails().transformUntilComplete(); - if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + iteration + ".2.diamonds_and_tails.dot"), 0); + printGraphSimplification(new File("simplifyGraph." + iteration + ".2.diamonds_and_tails.dot")); didSomeWork |= new SplitCommonSuffices().transformUntilComplete(); - if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + iteration + ".3.split_suffix.dot"), 0); + printGraphSimplification(new File("simplifyGraph." + iteration + ".3.split_suffix.dot")); didSomeWork |= new MergeCommonSuffices().transformUntilComplete(); - if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + iteration + ".4.merge_suffix.dot"), 0); + printGraphSimplification(new File("simplifyGraph." + iteration + ".4.merge_suffix.dot")); didSomeWork |= zipLinearChains(); return didSomeWork; } + /** + * Print simplication step of this graph, if PRINT_SIMPLIFY_GRAPHS is enabled + * @param file the destination for the graph DOT file + */ + private void printGraphSimplification(final File file) { + if ( PRINT_SIMPLIFY_GRAPHS ) + subsetToNeighbors(getReferenceSourceVertex(), 5).printGraph(file, 0); + } + /** * Zip up all of the simple linear chains present in this graph. * diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedSequenceMerger.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedSequenceMerger.java index 0babd8d56..5d725b1dd 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedSequenceMerger.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedSequenceMerger.java @@ -81,7 +81,7 @@ public class SharedSequenceMerger { else { // graph.printGraph(new File("csm." + counter + "." + v.getSequenceString() + "_pre.dot"), 0); - final List edgesToRemove = new LinkedList(); + final List edgesToRemove = new LinkedList<>(); final byte[] prevSeq = prevs.iterator().next().getSequence(); final SeqVertex newV = new SeqVertex(ArrayUtils.addAll(prevSeq, v.getSequence())); graph.addVertex(newV); @@ -124,11 +124,17 @@ public class SharedSequenceMerger { final SeqVertex first = incomingVertices.iterator().next(); for ( final SeqVertex prev : incomingVertices) { if ( ! prev.seqEquals(first) ) + // cannot merge if our sequence isn't the same as the first sequence return false; final Collection prevOuts = graph.outgoingVerticesOf(prev); if ( prevOuts.size() != 1 ) + // prev -> v must be the only edge from prev return false; if ( prevOuts.iterator().next() != v ) + // don't allow cyles + return false; + if ( graph.inDegreeOf(prev) == 0 ) + // cannot merge when any of the incoming nodes are sources return false; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java index bd24891bc..123b36640 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java @@ -113,7 +113,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { // actually build the read threading graph rtgraph.buildGraphIfNecessary(); - if ( debugGraphTransformations ) rtgraph.printGraph(new File("sequenceGraph.0.0.raw_readthreading_graph.dot"), pruneFactor); + printDebugGraphTransform(rtgraph, new File("sequenceGraph.0.0.raw_readthreading_graph.dot")); // go through and prune all of the chains where all edges have <= pruneFactor. This must occur // before recoverDanglingTails in the graph, so that we don't spend a ton of time recovering @@ -128,7 +128,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { // remove all heading and trailing paths if ( removePathsNotConnectedToRef ) rtgraph.removePathsNotConnectedToRef(); - if ( debugGraphTransformations ) rtgraph.printGraph(new File("sequenceGraph.0.1.cleaned_readthreading_graph.dot"), pruneFactor); + printDebugGraphTransform(rtgraph, new File("sequenceGraph.0.1.cleaned_readthreading_graph.dot")); final SeqGraph initialSeqGraph = rtgraph.convertToSequenceGraph(); @@ -136,7 +136,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { if ( justReturnRawGraph ) return Collections.singletonList(initialSeqGraph); if ( debug ) logger.info("Using kmer size of " + rtgraph.getKmerSize() + " in read threading assembler"); - if ( debugGraphTransformations ) initialSeqGraph.printGraph(new File("sequenceGraph.0.2.initial_seqgraph.dot"), pruneFactor); + printDebugGraphTransform(initialSeqGraph, new File("sequenceGraph.0.2.initial_seqgraph.dot")); initialSeqGraph.cleanNonRefPaths(); // TODO -- I don't this is possible by construction final SeqGraph seqGraph = cleanupSeqGraph(initialSeqGraph); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java index cfed2f0b8..e1398e119 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java @@ -166,4 +166,20 @@ public class CommonSuffixMergerUnitTest extends BaseTest { splitter.merge(data.graph, data.v); assertSameHaplotypes(String.format("suffixMerge.%s.%d", data.commonSuffix, data.graph.vertexSet().size()), data.graph, original); } + + @Test + public void testDoesntMergeSourceNodes() { + final SeqGraph g = new SeqGraph(); + final SeqVertex v1 = new SeqVertex("A"); + final SeqVertex v2 = new SeqVertex("A"); + final SeqVertex v3 = new SeqVertex("A"); + final SeqVertex top = new SeqVertex("T"); + final SeqVertex b = new SeqVertex("C"); + g.addVertices(top, v1, v2, v3, top, b); + g.addEdges(top, v1, b); + g.addEdges(v2, b); // v2 doesn't have previous node, cannot be merged + g.addEdges(top, v3, b); + final SharedSequenceMerger merger = new SharedSequenceMerger(); + Assert.assertFalse(merger.merge(g, b), "Shouldn't be able to merge shared vertices, when one is a source"); + } } From e19c24f3ee0bb8fc3146947f1f4ff59de6a9145f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 4 Jun 2013 09:35:12 -0400 Subject: [PATCH 066/116] Bugfix for HaplotypeCaller error: Only one of refStart or refStop must be < 0, not both -- This occurred because we were reverting reads with soft clips that would produce reads with negative (or 0) alignment starts. From such reads we could end up with adaptor starts that were negative and that would ultimately produce the "Only one of refStart or refStop must be < 0, not both" error in the FragmentUtils merging code (which would revert and adaptor clip reads). -- We now hard clip away bases soft clipped reverted bases that fall before the 1-based contig start in revertSoftClippedBases. -- Replace buggy cigarFromString with proper SAM-JDK call TextCigarCodec.getSingleton().decode(cigarString) -- Added unit tests for reverting soft clipped bases that create a read before the contig -- [delivers #50892431] --- .../sting/utils/clipping/ClippingOp.java | 34 ++++++-- .../utils/clipping/ReadClipperTestUtils.java | 83 +------------------ .../utils/clipping/ReadClipperUnitTest.java | 59 ++++++++++--- .../fragments/FragmentUtilsUnitTest.java | 48 +++++++++++ 4 files changed, 126 insertions(+), 98 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java index 2c2cbd98f..836c16a7e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java @@ -194,9 +194,17 @@ public class ClippingOp { unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH)); unclipped.setCigar(unclippedCigar); - unclipped.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar)); + final int newStart = read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar); + unclipped.setAlignmentStart(newStart); - return unclipped; + if ( newStart <= 0 ) { + // if the start of the unclipped read occurs before the contig, + // we must hard clip away the bases since we cannot represent reads with + // negative or 0 alignment start values in the SAMRecord (e.g., 0 means unaligned) + return hardClip(unclipped, 0, - newStart); + } else { + return unclipped; + } } /** @@ -335,7 +343,24 @@ public class ClippingOp { return newCigar; } - @Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"}) + /** + * Hard clip bases from read, from start to stop in base coordinates + * + * If start == 0, then we will clip from the front of the read, otherwise we clip + * from the right. If start == 0 and stop == 10, this would clip out the first + * 10 bases of the read. + * + * Note that this function works with reads with negative alignment starts, in order to + * allow us to hardClip reads that have had their soft clips reverted and so might have + * negative alignment starts + * + * Works properly with reduced reads and insertion/deletion base qualities + * + * @param read a non-null read + * @param start a start >= 0 and < read.length + * @param stop a stop >= 0 and < read.length. + * @return a cloned version of read that has been properly trimmed down + */ private GATKSAMRecord hardClip(GATKSAMRecord read, int start, int stop) { final int firstBaseAfterSoftClips = read.getAlignmentStart() - read.getSoftStart(); final int lastBaseBeforeSoftClips = read.getSoftEnd() - read.getSoftStart(); @@ -343,7 +368,6 @@ public class ClippingOp { if (start == firstBaseAfterSoftClips && stop == lastBaseBeforeSoftClips) // note that if the read has no soft clips, these constants will be 0 and read length - 1 (beauty of math). return GATKSAMRecord.emptyRead(read); - // If the read is unmapped there is no Cigar string and neither should we create a new cigar string CigarShift cigarShift = (read.getReadUnmappedFlag()) ? new CigarShift(new Cigar(), 0, 0) : hardClipCigar(read.getCigar(), start, stop); @@ -357,7 +381,7 @@ public class ClippingOp { System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength); System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength); - GATKSAMRecord hardClippedRead; + final GATKSAMRecord hardClippedRead; try { hardClippedRead = (GATKSAMRecord) read.clone(); } catch (CloneNotSupportedException e) { diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java index 0e0f6322e..cbbc8252b 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java @@ -28,8 +28,8 @@ package org.broadinstitute.sting.utils.clipping; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; +import net.sf.samtools.TextCigarCodec; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; @@ -38,13 +38,6 @@ import java.util.LinkedList; import java.util.List; import java.util.Stack; -/** - * Created by IntelliJ IDEA. - * User: roger - * Date: 11/27/11 - * Time: 6:45 AM - * To change this template use File | Settings | File Templates. - */ public class ReadClipperTestUtils { //Should contain all the utils needed for tests to mass produce //reads, cigars, and other needed classes @@ -236,78 +229,6 @@ public class ReadClipperTestUtils { } public static Cigar cigarFromString(String cigarString) { - Cigar cigar = new Cigar(); - - boolean isNumber = false; - int number = 0; - for (int i = 0; i < cigarString.length(); i++) { - char x = cigarString.charAt(i); - - if (x >= '0' && x <='9') { - if (isNumber) { - number *= 10; - } - else { - isNumber = true; - } - number += x - '0'; - } - - else { - CigarElement e; - switch (x) { - case 'M': - case 'm': - e = new CigarElement(number, CigarOperator.M); - break; - - case 'I': - case 'i': - e = new CigarElement(number, CigarOperator.I); - break; - - case 'D': - case 'd': - e = new CigarElement(number, CigarOperator.D); - break; - - case 'S': - case 's': - e = new CigarElement(number, CigarOperator.S); - break; - - case 'N': - case 'n': - e = new CigarElement(number, CigarOperator.N); - break; - - case 'H': - case 'h': - e = new CigarElement(number, CigarOperator.H); - break; - - case 'P': - case 'p': - e = new CigarElement(number, CigarOperator.P); - break; - - case '=': - e = new CigarElement(number, CigarOperator.EQ); - break; - - case 'X': - case 'x': - e = new CigarElement(number, CigarOperator.X); - break; - - default: - throw new ReviewedStingException("Unrecognized cigar operator: " + x + " (number: " + number + ")"); - } - cigar.add(e); - } - } - return cigar; + return TextCigarCodec.getSingleton().decode(cigarString); } - - } diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java index 0b4153535..d6bd0d4d2 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java @@ -46,6 +46,7 @@ import java.util.List; * Date: 9/28/11 */ public class ReadClipperUnitTest extends BaseTest { + private final static boolean DEBUG = false; List cigarList; int maximumCigarSize = 10; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2 @@ -55,7 +56,7 @@ public class ReadClipperUnitTest extends BaseTest { cigarList = ReadClipperTestUtils.generateCigarList(maximumCigarSize); } - @Test(enabled = true) + @Test(enabled = !DEBUG) public void testHardClipBothEndsByReferenceCoordinates() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); @@ -71,7 +72,7 @@ public class ReadClipperUnitTest extends BaseTest { } } - @Test(enabled = true) + @Test(enabled = !DEBUG) public void testHardClipByReadCoordinates() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); @@ -101,7 +102,7 @@ public class ReadClipperUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @Test(dataProvider = "ClippedReadLengthData", enabled = true) + @Test(dataProvider = "ClippedReadLengthData", enabled = !DEBUG) public void testHardClipReadLengthIsRight(final int originalReadLength, final int nToClip) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(originalReadLength + "M"); read.getReadLength(); // provoke the caching of the read length @@ -112,7 +113,7 @@ public class ReadClipperUnitTest extends BaseTest { clipped.getReadLength(), clipped.getCigar(), expectedReadLength, nToClip, read.getReadLength(), read.getCigar())); } - @Test(enabled = true) + @Test(enabled = !DEBUG) public void testHardClipByReferenceCoordinates() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); @@ -135,7 +136,7 @@ public class ReadClipperUnitTest extends BaseTest { } } - @Test(enabled = true) + @Test(enabled = !DEBUG) public void testHardClipByReferenceCoordinatesLeftTail() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); @@ -154,7 +155,7 @@ public class ReadClipperUnitTest extends BaseTest { } } - @Test(enabled = true) + @Test(enabled = !DEBUG) public void testHardClipByReferenceCoordinatesRightTail() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); @@ -172,7 +173,7 @@ public class ReadClipperUnitTest extends BaseTest { } } - @Test(enabled = true) + @Test(enabled = !DEBUG) public void testHardClipLowQualEnds() { final byte LOW_QUAL = 2; final byte HIGH_QUAL = 30; @@ -216,7 +217,7 @@ public class ReadClipperUnitTest extends BaseTest { } } - @Test(enabled = true) + @Test(enabled = !DEBUG) public void testHardClipSoftClippedBases() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); @@ -251,7 +252,7 @@ public class ReadClipperUnitTest extends BaseTest { } } - @Test(enabled = true) + @Test(enabled = !DEBUG) public void testRevertSoftClippedBases() { for (Cigar cigar : cigarList) { final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP); @@ -273,7 +274,7 @@ public class ReadClipperUnitTest extends BaseTest { } } - @Test(enabled = true) + @Test(enabled = !DEBUG) public void testRevertSoftClippedBasesWithThreshold() { for (Cigar cigar : cigarList) { final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP); @@ -292,6 +293,40 @@ public class ReadClipperUnitTest extends BaseTest { } } + @DataProvider(name = "RevertSoftClipsBeforeContig") + public Object[][] makeRevertSoftClipsBeforeContig() { + List tests = new ArrayList<>(); + + // this functionality can be adapted to provide input data for whatever you might want in your data + for ( int softStart : Arrays.asList(-10, -1, 0) ) { + for ( int alignmentStart : Arrays.asList(1, 10) ) { + tests.add(new Object[]{softStart, alignmentStart}); + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(enabled = true, dataProvider = "RevertSoftClipsBeforeContig") + public void testRevertSoftClippedBasesBeforeStartOfContig(final int softStart, final int alignmentStart) { + final int nMatches = 10; + final int nSoft = -1 * (softStart - alignmentStart); + final String cigar = nSoft + "S" + nMatches + "M"; + final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); + read.setAlignmentStart(alignmentStart); + + Assert.assertEquals(read.getSoftStart(), softStart); + Assert.assertEquals(read.getAlignmentStart(), alignmentStart); + Assert.assertEquals(read.getCigarString(), cigar); + + final GATKSAMRecord reverted = ReadClipper.revertSoftClippedBases(read); + + final int expectedAlignmentStart = 1; + final String expectedCigar = (1 - softStart) + "H" + read.getAlignmentEnd() + "M"; + Assert.assertEquals(reverted.getSoftStart(), expectedAlignmentStart); + Assert.assertEquals(reverted.getAlignmentStart(), expectedAlignmentStart); + Assert.assertEquals(reverted.getCigarString(), expectedCigar); + } private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) { if (!read.isEmpty()) { @@ -375,7 +410,7 @@ public class ReadClipperUnitTest extends BaseTest { } - @Test(enabled = true) + @Test(enabled = !DEBUG) public void testHardClipReducedRead() { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar("10M"); final int[] counts = new int[read.getReadLength()]; @@ -391,7 +426,7 @@ public class ReadClipperUnitTest extends BaseTest { } } - @Test(enabled = true) + @Test(enabled = !DEBUG) public void testRevertEntirelySoftclippedReads() { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar("2H1S3H"); GATKSAMRecord clippedRead = ReadClipper.revertSoftClippedBases(read); diff --git a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java index e9600480a..0886427ca 100644 --- a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.fragments; import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.TextCigarCodec; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -296,4 +297,51 @@ public class FragmentUtilsUnitTest extends BaseTest { final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2); Assert.assertNull(actual); } + + @DataProvider(name = "MergeFragmentsOffContig") + public Object[][] makeMergeFragmentsOffContig() throws Exception { + List tests = new ArrayList<>(); + + for ( final int pre1 : Arrays.asList(0, 50)) { + for ( final int post1 : Arrays.asList(0, 50)) { + for ( final int pre2 : Arrays.asList(0, 50)) { + for ( final int post2 : Arrays.asList(0, 50)) { + tests.add(new Object[]{pre1, post1, pre2, post2}); + } + } + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "MergeFragmentsOffContig") + public void testMergeFragmentsOffContig(final int pre1, final int post1, final int pre2, final int post2) { + final int contigSize = 10; + final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 0, contigSize); + + final GATKSAMRecord read1 = createReadOffContig(header, false, pre1, post1); + final GATKSAMRecord read2 = createReadOffContig(header, true, pre2, post2); + + final GATKSAMRecord merged = FragmentUtils.mergeOverlappingPairedFragments(read1, read2); + } + + private GATKSAMRecord createReadOffContig(final SAMFileHeader header, final boolean negStrand, final int pre, final int post) { + final int contigLen = header.getSequence(0).getSequenceLength(); + final int readLen = pre + contigLen + post; + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, readLen); + read.setAlignmentStart(1); + read.setCigar(TextCigarCodec.getSingleton().decode(pre + "S" + contigLen + "M" + post + "S")); + read.setBaseQualities(Utils.dupBytes((byte) 30, readLen)); + read.setReadBases(Utils.dupBytes((byte)'A', readLen)); + read.setMappingQuality(60); + read.setMateAlignmentStart(1); + read.setProperPairFlag(true); + read.setReadPairedFlag(true); + read.setInferredInsertSize(30); + read.setReadNegativeStrandFlag(negStrand); + read.setMateNegativeStrandFlag(! negStrand); + read.setReadGroup(new GATKSAMReadGroupRecord("foo")); + return read; + } } From 34bdf20132baebb1567a09605d3928b9841130be Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 5 Jun 2013 16:37:31 -0400 Subject: [PATCH 070/116] Bugfix for bad AD values in UG/HC -- In the case where we have multiple potential alternative alleles *and* we weren't calling all of them (so that n potential values < n called) we could end up trimming the alleles down which would result in the mismatch between the PerReadAlleleLikelihoodMap alleles and the VariantContext trimmed alleles. -- Fixed by doing two things (1) moving the trimming code after the annotation call and (2) updating AD annotation to check that the alleles in the VariantContext and the PerReadAlleleLikelihoodMap are concordant, which will stop us from degenerating in the future. -- delivers [#50897077] --- .../annotator/DepthPerAlleleBySample.java | 29 ++++++++++--------- .../genotyper/UnifiedGenotyperEngine.java | 10 +++---- .../haplotypecaller/GenotypingEngine.java | 7 ++--- ...perGeneralPloidySuite1IntegrationTest.java | 2 +- ...perGeneralPloidySuite2IntegrationTest.java | 2 +- ...GenotyperNormalCallingIntegrationTest.java | 4 +-- ...dGenotyperReducedReadsIntegrationTest.java | 2 +- .../HaplotypeCallerIntegrationTest.java | 2 +- .../genotyper/PerReadAlleleLikelihoodMap.java | 8 +++++ 9 files changed, 37 insertions(+), 29 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 1cf91f181..b22ea7931 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -66,10 +66,7 @@ import org.broadinstitute.variant.variantcontext.Genotype; import org.broadinstitute.variant.variantcontext.GenotypeBuilder; import org.broadinstitute.variant.variantcontext.VariantContext; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** @@ -135,20 +132,24 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa } private void annotateWithLikelihoods(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap, final VariantContext vc, final GenotypeBuilder gb) { - final HashMap alleleCounts = new HashMap(); + final Set alleles = new HashSet<>(vc.getAlleles()); + + // make sure that there's a meaningful relationship between the alleles in the perReadAlleleLikelihoodMap and our VariantContext + if ( ! perReadAlleleLikelihoodMap.getAllelesSet().containsAll(alleles) ) + throw new IllegalStateException("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + perReadAlleleLikelihoodMap.getAllelesSet()); + + final HashMap alleleCounts = new HashMap<>(); + for ( final Allele allele : vc.getAlleles() ) { alleleCounts.put(allele, 0); } - for ( final Allele allele : vc.getAlleles() ) { - alleleCounts.put(allele, 0); - } for (Map.Entry> el : perReadAlleleLikelihoodMap.getLikelihoodReadMap().entrySet()) { + final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles); + if (! a.isInformative() ) continue; // read is non-informative final GATKSAMRecord read = el.getKey(); - final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()); - if (! a.isInformative() ) - continue; // read is non-informative - if (!vc.getAlleles().contains(a.getMostLikelyAllele())) - continue; // sanity check - shouldn't be needed - alleleCounts.put(a.getMostLikelyAllele(), alleleCounts.get(a.getMostLikelyAllele()) + (read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1)); + final int prevCount = alleleCounts.get(a.getMostLikelyAllele()); + final int incCount = read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1; + alleleCounts.put(a.getMostLikelyAllele(), prevCount + incCount); } + final int[] counts = new int[alleleCounts.size()]; counts[0] = alleleCounts.get(vc.getReference()); for (int i = 0; i < vc.getAlternateAlleles().size(); i++) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index fc11706e5..3d9f75d45 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -543,11 +543,6 @@ public class UnifiedGenotyperEngine { builder.attributes(attributes); VariantContext vcCall = builder.make(); - // if we are subsetting alleles (either because there were too many or because some were not polymorphic) - // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end). - if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync - vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall); - if ( annotationEngine != null && !limitedContext ) { // limitedContext callers need to handle annotations on their own by calling their own annotationEngine // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations final ReadBackedPileup pileup = rawContext.getBasePileup(); @@ -556,6 +551,11 @@ public class UnifiedGenotyperEngine { vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall, perReadAlleleLikelihoodMap); } + // if we are subsetting alleles (either because there were too many or because some were not polymorphic) + // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end). + if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync + vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall); + return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PoFGT0)); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 9bb456230..cbcba28fd 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -204,13 +204,12 @@ public class GenotypingEngine { convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0 ) ); final Map stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call ); - VariantContext annotatedCall = call; - if( annotatedCall.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary! + VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call); + + if( call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary! annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall); } - annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, annotatedCall); - // maintain the set of all called haplotypes for ( final Allele calledAllele : call.getAlleles() ) calledHaplotypes.addAll(alleleMapper.get(calledAllele)); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index 1cfc41a27..c791d08ae 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "66a5a3eb657fac5c621bc0c228ea9caf"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "353c97bfb05a939b3838dc8eee50326b"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index 64568d714..1022b6e15 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -58,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","5eabc12fc7b4f9749e6d1be0f5b45d14"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","7e4e1397d5cff68aeba3595e671574fc"); } @Test(enabled = true) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index 907af0f34..a52176a08 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("1ab95513a3abb5b760578831c61ef94b")); + Arrays.asList("f576d86656cc37c0a869c7ac911f4c7c")); executeTest("test Multiple SNP alleles", spec); } @@ -112,7 +112,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("314b99eb146de1fdafed872ecbe1cfc2")); + Arrays.asList("94d7a907fdca7e8c9fd6bb8a87b2bab2")); executeTest("test reverse trim", spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java index 5f9667cca..b9830de8e 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java @@ -74,7 +74,7 @@ public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest { @Test public void testReducedBamINDELs() { - testReducedCalling("INDEL", "19bc6a74250ec19efc4e1b4ee6515ac0"); + testReducedCalling("INDEL", "22110b001e2d3dd45d7872334086b2b9"); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 5fc0f4f52..d0c7228ae 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "ffd69c410dca0d2f9fe75f3cb5d08179"); + "627b5a12f2f02a874fb39982171a3982"); } @Test diff --git a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java index b309ef633..8067d67bc 100644 --- a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java @@ -366,4 +366,12 @@ public class PerReadAlleleLikelihoodMap { return true; } + + /** + * Get an unmodifiable set of the unique alleles in this PerReadAlleleLikelihoodMap + * @return a non-null unmodifiable map + */ + public Set getAllelesSet() { + return Collections.unmodifiableSet(allelesSet); + } } From 209dd64268b208ece6020045b720cb677d949487 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 5 Jun 2013 17:43:31 -0400 Subject: [PATCH 071/116] HaplotypeCaller now emits per-sample DP -- Created a new annotation DepthPerSampleHC that is by default on in the HaplotypeCaller -- The depth for the HC is the sum of the informative alleles at this site. It's not perfect (as we cannot differentiate between reads that align over the event but aren't informative vs. those that aren't even close) but it's a pretty good proxy and it matches with the AD field (i.e., sum(AD) = DP). -- Update MD5s -- delivers [#48240601] --- .../walkers/annotator/DepthPerSampleHC.java | 126 ++++++++++++++++++ .../haplotypecaller/HaplotypeCaller.java | 2 +- ...lexAndSymbolicVariantsIntegrationTest.java | 6 +- .../HaplotypeCallerIntegrationTest.java | 18 +-- 4 files changed, 139 insertions(+), 13 deletions(-) create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerSampleHC.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerSampleHC.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerSampleHC.java new file mode 100644 index 000000000..9bd641011 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerSampleHC.java @@ -0,0 +1,126 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.genotyper.MostLikelyAllele; +import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypeBuilder; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFFormatHeaderLine; +import org.broadinstitute.variant.vcf.VCFStandardHeaderLines; + +import java.util.*; + + +/** + * The depth of coverage of each allele per sample + * + * the depth for the HC is the sum of the informative alleles at this site. It's not perfect (as we cannot + * differentiate between reads that align over the event but aren't informative vs. those that aren't even + * close) but it's a pretty good proxy and it matches with the AD field (i.e., sum(AD) = DP). + */ +public class DepthPerSampleHC extends GenotypeAnnotation { + public void annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final AlignmentContext stratifiedContext, + final VariantContext vc, + final Genotype g, + final GenotypeBuilder gb, + final PerReadAlleleLikelihoodMap alleleLikelihoodMap) { + if ( g == null || !g.isCalled() || ( stratifiedContext == null && alleleLikelihoodMap == null) ) + return; + + if (alleleLikelihoodMap == null ) + throw new IllegalStateException("DepthPerSampleHC can only be used with likelihood based annotations in the HaplotypeCaller"); + + // the depth for the HC is the sum of the informative alleles at this site. It's not perfect (as we cannot + // differentiate between reads that align over the event but aren't informative vs. those that aren't even + // close) but it's a pretty good proxy and it matches with the AD field (i.e., sum(AD) = DP). + int dp = 0; + + if ( alleleLikelihoodMap.isEmpty() ) { + // there are no reads + } else { + final Set alleles = new HashSet<>(vc.getAlleles()); + + // make sure that there's a meaningful relationship between the alleles in the perReadAlleleLikelihoodMap and our VariantContext + if ( ! alleleLikelihoodMap.getAllelesSet().containsAll(alleles) ) + throw new IllegalStateException("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + alleleLikelihoodMap.getAllelesSet()); + + for (Map.Entry> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) { + final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles); + if ( a.isInformative() ) { + final GATKSAMRecord read = el.getKey(); + final int incCount = read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1; + dp += incCount; + } + } + + gb.DP(dp); + } + } + + public List getKeyNames() { + return Collections.singletonList(VCFConstants.DEPTH_KEY); + } + + public List getDescriptions() { + return Collections.singletonList(VCFStandardHeaderLines.getFormatLine(getKeyNames().get(0))); + } +} \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 73367f8c3..182e59493 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -223,7 +223,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In */ @Advanced @Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false) - protected List annotationsToUse = new ArrayList(Arrays.asList(new String[]{"ClippingRankSumTest"})); + protected List annotationsToUse = new ArrayList<>(Arrays.asList(new String[]{"ClippingRankSumTest", "DepthPerSampleHC"})); /** * Which annotations to exclude from output in the VCF file. Note that this argument has higher priority than the -A or -G arguments, diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 3f3b295f8..fba294c3d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "fc11b553fbf16beac0da04a69f419365"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "8d7728909b1b8eb3f30f2f1583f054a8"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "38b4596c3910fdde51ea59aa1a8f848f"); + "db71826dc798ff1cdf0c5d05b0ede976"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "08147870d73d9749ced8cfc7cdd4714f"); + "42831d5463552911b7da9de0b4a27289"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index d0c7228ae..77be9fba2 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -80,12 +80,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "37e462379de17bc6c8aeeed6e9735dd3"); + HCTest(CEUTRIO_BAM, "", "1b15e4647013ab2c3ce7073c420d8640"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "983a0d122714d4aa0ff7af20cc686703"); + HCTest(NA12878_BAM, "", "423be27dc2cf7fd10baf465cf93e18e2"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "627b5a12f2f02a874fb39982171a3982"); + "a28e6f14e28708283d61c1e423bbdcb1"); } @Test @@ -112,7 +112,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "ce602282e80cca6d4272f940e20e90c3"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "8344d86751b707c53b296c297eba4bfa"); } private void HCTestNearbySmallIntervals(String bam, String args, String md5) { @@ -149,7 +149,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerNearbySmallIntervals() { - HCTestNearbySmallIntervals(NA12878_BAM, "", "09335c01d2e90714af7f4c91156da0b1"); + HCTestNearbySmallIntervals(NA12878_BAM, "", "dea98f257d39fa1447a12c36a6bbf4a3"); } // This problem bam came from a user on the forum and it spotted a problem where the ReadClipper @@ -159,14 +159,14 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("b34ddc93a7b9919e05da499508f44dd9")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("7cd1c5e2642ae8ddf38932aba1f51d69")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("98a78b9f58ab197b827ef2ce3ab043d3")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ee55ff4c6ec1bbef88e21cc0f45d4c47")); executeTest("HCTestStructuralIndels: ", spec); } @@ -188,7 +188,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("6e6ef6e0326bee6d20d9fd37349fdb8c")); + Arrays.asList("4886a98bf699f4e7f4491160749ada6a")); executeTest("HC calling on a ReducedRead BAM", spec); } @@ -196,7 +196,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("5e535983b2f7e5fb6c84fecffa092324")); + Arrays.asList("86bdd07a3ac4f6ce239c30efea8bf5ba")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } } From 00c06e9e52f416599bb9b906c32857848a9abd39 Mon Sep 17 00:00:00 2001 From: Michael McCowan Date: Tue, 4 Jun 2013 10:08:24 -0400 Subject: [PATCH 072/116] Performance improvements: - Memoized MathUtil's cumulative binomial probability function. - Reduced the default size of the read name map in reduced reads and handle its resets more efficiently. --- .../compression/reducereads/ReduceReads.java | 17 +++- .../broadinstitute/sting/utils/MathUtils.java | 77 +++++++++++++++---- .../sting/utils/MathUtilsUnitTest.java | 45 +++++++++-- 3 files changed, 112 insertions(+), 27 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java index eb55701ae..e636f8f17 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java @@ -273,8 +273,9 @@ public class ReduceReads extends ReadWalker, Redu int nCompressedReads = 0; - Object2LongOpenHashMap readNameHash; // This hash will keep the name of the original read the new compressed name (a number). + private static int READ_NAME_HASH_DEFAULT_SIZE = 1000; Long nextReadNumber = 1L; // The next number to use for the compressed read name. + Object2LongOpenHashMap readNameHash; // This hash will keep the name of the original read the new compressed name (a number). ObjectSortedSet intervalList; @@ -313,7 +314,7 @@ public class ReduceReads extends ReadWalker, Redu knownSnpPositions = new ObjectAVLTreeSet(); GenomeAnalysisEngine toolkit = getToolkit(); - readNameHash = new Object2LongOpenHashMap(100000); // prepare the read name hash to keep track of what reads have had their read names compressed + this.resetReadNameHash(); // prepare the read name hash to keep track of what reads have had their read names compressed intervalList = new ObjectAVLTreeSet(); // get the interval list from the engine. If no interval list was provided, the walker will work in WGS mode if (toolkit.getIntervals() != null) @@ -335,6 +336,16 @@ public class ReduceReads extends ReadWalker, Redu } } + /** Initializer for {@link #readNameHash}. */ + private void resetReadNameHash() { + // If the hash grows large, subsequent clear operations can be very expensive, so trim the hash down if it grows beyond its default. + if (readNameHash == null || readNameHash.size() > READ_NAME_HASH_DEFAULT_SIZE) { + readNameHash = new Object2LongOpenHashMap(READ_NAME_HASH_DEFAULT_SIZE); + } else { + readNameHash.clear(); + } + } + /** * Takes in a read and prepares it for the SlidingWindow machinery by performing the * following optional clipping operations: @@ -471,7 +482,7 @@ public class ReduceReads extends ReadWalker, Redu // stash.compress(), the readNameHash can be cleared after the for() loop above. // The advantage of clearing the hash is that otherwise it holds all reads that have been encountered, // which can use a lot of memory and cause RR to slow to a crawl and/or run out of memory. - readNameHash.clear(); + this.resetReadNameHash(); } } else diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index dfd3537da..07aff5983 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -29,9 +29,8 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import java.lang.IllegalArgumentException; +import javax.annotation.Nullable; import java.math.BigDecimal; import java.util.*; @@ -417,9 +416,35 @@ public class MathUtils { return log10BinomialCoefficient(n, k) + (n * FAIR_BINOMIAL_PROB_LOG10_0_5); } + /** A memoization container for {@link #binomialCumulativeProbability(int, int, int)}. Synchronized to accomodate multithreading. */ + private static final Map BINOMIAL_CUMULATIVE_PROBABILITY_MEMOIZATION_CACHE = + Collections.synchronizedMap(new LRUCache(10_000)); + + /** + * Primitive integer-triplet bijection into long. Returns null when the bijection function fails (in lieu of an exception), which will + * happen when: any value is negative or larger than a short. This method is optimized for speed; it is not intended to serve as a + * utility function. + */ + @Nullable + static Long fastGenerateUniqueHashFromThreeIntegers(final int one, final int two, final int three) { + if (one < 0 || two < 0 || three < 0 || Short.MAX_VALUE < one || Short.MAX_VALUE < two || Short.MAX_VALUE < three) { + return null; + } else { + long result = 0; + result += (short) one; + result <<= 16; + result += (short) two; + result <<= 16; + result += (short) three; + return result; + } + } + /** * Performs the cumulative sum of binomial probabilities, where the probability calculation is done in log space. * Assumes that the probability of a successful hit is fair (i.e. 0.5). + * + * This pure function is memoized because of its expensive BigDecimal calculations. * * @param n number of attempts for the number of hits * @param k_start start (inclusive) of the cumulant sum (over hits) @@ -430,23 +455,41 @@ public class MathUtils { if ( k_end > n ) throw new IllegalArgumentException(String.format("Value for k_end (%d) is greater than n (%d)", k_end, n)); - double cumProb = 0.0; - double prevProb; - BigDecimal probCache = BigDecimal.ZERO; - - for (int hits = k_start; hits <= k_end; hits++) { - prevProb = cumProb; - final double probability = binomialProbability(n, hits); - cumProb += probability; - if (probability > 0 && cumProb - prevProb < probability / 2) { // loss of precision - probCache = probCache.add(new BigDecimal(prevProb)); - cumProb = 0.0; - hits--; // repeat loop - // prevProb changes at start of loop - } + // Fetch cached value, if applicable. + final Long memoizationKey = fastGenerateUniqueHashFromThreeIntegers(n, k_start, k_end); + final Double memoizationCacheResult; + if (memoizationKey != null) { + memoizationCacheResult = BINOMIAL_CUMULATIVE_PROBABILITY_MEMOIZATION_CACHE.get(memoizationKey); + } else { + memoizationCacheResult = null; } - return probCache.add(new BigDecimal(cumProb)).doubleValue(); + final double result; + if (memoizationCacheResult != null) { + result = memoizationCacheResult; + } else { + double cumProb = 0.0; + double prevProb; + BigDecimal probCache = BigDecimal.ZERO; + + for (int hits = k_start; hits <= k_end; hits++) { + prevProb = cumProb; + final double probability = binomialProbability(n, hits); + cumProb += probability; + if (probability > 0 && cumProb - prevProb < probability / 2) { // loss of precision + probCache = probCache.add(new BigDecimal(prevProb)); + cumProb = 0.0; + hits--; // repeat loop + // prevProb changes at start of loop + } + } + + result = probCache.add(new BigDecimal(cumProb)).doubleValue(); + if (memoizationKey != null) { + BINOMIAL_CUMULATIVE_PROBABILITY_MEMOIZATION_CACHE.put(memoizationKey, result); + } + } + return result; } /** diff --git a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java index e4c74a0ad..3933b3830 100644 --- a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java @@ -41,6 +41,35 @@ public class MathUtilsUnitTest extends BaseTest { public void init() { } + /** + * Tests that we get unqiue values for the valid (non-null-producing) input space for {@link MathUtils#fastGenerateUniqueHashFromThreeIntegers(int, int, int)}. + */ + @Test + public void testGenerateUniqueHashFromThreePositiveIntegers() { + logger.warn("Executing testGenerateUniqueHashFromThreePositiveIntegers"); + + final Set observedLongs = new HashSet(); + for (short i = 0; i < Byte.MAX_VALUE; i++) { + for (short j = 0; j < Byte.MAX_VALUE; j++) { + for (short k = 0; k < Byte.MAX_VALUE; k++) { + final Long aLong = MathUtils.fastGenerateUniqueHashFromThreeIntegers(i, j, k); + //System.out.println(String.format("%s, %s, %s: %s", i, j, k, aLong)); + Assert.assertTrue(observedLongs.add(aLong)); + } + } + } + + for (short i = Byte.MAX_VALUE; i <= Short.MAX_VALUE && i > 0; i += 128) { + for (short j = Byte.MAX_VALUE; j <= Short.MAX_VALUE && j > 0; j += 128) { + for (short k = Byte.MAX_VALUE; k <= Short.MAX_VALUE && k > 0; k += 128) { + final Long aLong = MathUtils.fastGenerateUniqueHashFromThreeIntegers(i, j, k); + // System.out.println(String.format("%s, %s, %s: %s", i, j, k, aLong)); + Assert.assertTrue(observedLongs.add(aLong)); + } + } + } + } + /** * Tests that we get the right values from the binomial distribution */ @@ -64,13 +93,15 @@ public class MathUtilsUnitTest extends BaseTest { public void testCumulativeBinomialProbability() { logger.warn("Executing testCumulativeBinomialProbability"); - final int numTrials = 10; - for ( int i = 0; i < numTrials; i++ ) - Assert.assertEquals(MathUtils.binomialCumulativeProbability(numTrials, i, i), MathUtils.binomialProbability(numTrials, i), 1e-10, String.format("k=%d, n=%d", i, numTrials)); - - Assert.assertEquals(MathUtils.binomialCumulativeProbability(10, 0, 2), 0.05468750, 1e-7); - Assert.assertEquals(MathUtils.binomialCumulativeProbability(10, 0, 5), 0.62304687, 1e-7); - Assert.assertEquals(MathUtils.binomialCumulativeProbability(10, 0, 10), 1.0, 1e-7); + for (int j = 0; j < 2; j++) { // Test memoizing functionality, as well. + final int numTrials = 10; + for ( int i = 0; i < numTrials; i++ ) + Assert.assertEquals(MathUtils.binomialCumulativeProbability(numTrials, i, i), MathUtils.binomialProbability(numTrials, i), 1e-10, String.format("k=%d, n=%d", i, numTrials)); + + Assert.assertEquals(MathUtils.binomialCumulativeProbability(10, 0, 2), 0.05468750, 1e-7); + Assert.assertEquals(MathUtils.binomialCumulativeProbability(10, 0, 5), 0.62304687, 1e-7); + Assert.assertEquals(MathUtils.binomialCumulativeProbability(10, 0, 10), 1.0, 1e-7); + } } /** From 96073c30587a4061eed5950897790d32a178aef6 Mon Sep 17 00:00:00 2001 From: Valentin Ruano-Rubio Date: Thu, 23 May 2013 20:39:32 -0400 Subject: [PATCH 073/116] This commit addresses JIRA issue GSA-948: Prevent users from doing the wrong thing with RNA-Seq data and the GATK. The previous behavior is to process reads with N CIGAR operators as they are despite that many of the tools do not actually support such operator and results become unpredictible. Now if the there is some read with the N operator, the engine returns a user exception. The error message indicates what is the problem (including the offending read and mapping position) and give a couple of alternatives that the user can take in order to move forward: a) ask for those reads to be filtered out (with --filter_reads_with_N_cigar or -filterRNC) b) keep them in as before (with -U ALLOW_N_CIGAR_READS or -U ALL) Notice that (b) does not have any effect if (a) is enacted; i.e. filtering overrides ignoring. Implementation: * Added filterReadsWithMCigar argument to MalformedReadFilter with the corresponding changes in the code to get it to work. * Added ALLOW_N_CIGAR_READS unsafe flag so that N cigar containing reads can be processed as they are if that is what the user wants. * Added ReadFilterTest class commont parent for ReadFilter test cases. * Refactor ReadGroupBlackListFilterUnitTest to extend ReadFilterTest and push up some functionality to that class. * Modified MalformedReadFilterUnitTest to extend ReadFilterTest and to test the new filter functionality. * Added AllowNCigarMalformedReadFilterUnittest to check on the behavior when the unsafe ALLOW_N_CIGAR_READS flag is used. * Added UnsafeNCigarMalformedReadFilterUnittest to check on the behavior when the unsafe ALL flag is used. * Updated a broken test case in UnifiedGenotyperIntegrationTest resulting from the new behavior. * Updated EngineFeaturesIntegrationTest testdata to be compliant with new behavior --- .../UnifiedGenotyperIntegrationTest.java | 5 +- .../gatk/arguments/ValidationExclusion.java | 2 + .../gatk/filters/MalformedReadFilter.java | 118 +++++- .../sting/utils/exceptions/UserException.java | 14 + .../gatk/EngineFeaturesIntegrationTest.java | 1 + ...llowNCigarMalformedReadFilterUnitTest.java | 77 ++++ .../filters/MalformedReadFilterUnitTest.java | 190 ++++++++- .../sting/gatk/filters/ReadFilterTest.java | 370 ++++++++++++++++++ .../ReadGroupBlackListFilterUnitTest.java | 88 ++--- .../UnsafeMalformedReadFilterUnitTest.java | 50 +++ 10 files changed, 836 insertions(+), 79 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/filters/AllowNCigarMalformedReadFilterUnitTest.java create mode 100644 public/java/test/org/broadinstitute/sting/gatk/filters/ReadFilterTest.java create mode 100644 public/java/test/org/broadinstitute/sting/gatk/filters/UnsafeMalformedReadFilterUnitTest.java diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index d55a923dc..300d7f5da 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -288,9 +288,10 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testNsInCigar() { - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1, - Arrays.asList("2ae3fd39c53a6954d32faed8703adfe8")); + UserException.UnsupportedCigarOperatorException.class); + executeTest("test calling on reads with Ns in CIGAR", spec); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java index f8f56f89e..75a68d978 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java @@ -36,6 +36,8 @@ public class ValidationExclusion { // our validation options public enum TYPE { + ALLOW_N_CIGAR_READS, // ignore the presence of N operators in CIGARs: do not blow up and process reads that contain one or more N operators. + // This exclusion does not have effect on reads that get filtered {@see MalformedReadFilter}. ALLOW_UNINDEXED_BAM, // allow bam files that do not have an index; we'll traverse them using monolithic shard ALLOW_UNSET_BAM_SORT_ORDER, // assume that the bam is sorted, even if the SO (sort-order) flag is not set NO_READ_ORDER_VERIFICATION, // do not validate that the reads are in order as we take them from the bam file diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java index f7d1d0297..a15870a22 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java @@ -25,14 +25,16 @@ package org.broadinstitute.sting.gatk.filters; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMSequenceRecord; -import net.sf.samtools.SAMTagUtil; +import net.sf.samtools.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; +import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.util.Collections; + /** * Filter out malformed reads. * @@ -40,20 +42,46 @@ import org.broadinstitute.sting.utils.exceptions.UserException; * @version 0.1 */ public class MalformedReadFilter extends ReadFilter { + + + private static final String FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME = "filter_reads_with_N_cigar" ; + private SAMFileHeader header; + @Argument(fullName = FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME, shortName = "filterRNC", doc = "filter out reads with CIGAR containing the N operator, instead of stop processing and report an error.", required = false) + boolean filterReadsWithNCigar = false; + + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required = false) boolean filterMismatchingBaseAndQuals = false; @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "if a read has no stored bases (i.e. a '*'), filter out the read instead of blowing up.", required = false) boolean filterBasesNotStored = false; + /** + * Indicates the applicable validation exclusions + */ + private boolean allowNCigars; + @Override - public void initialize(GenomeAnalysisEngine engine) { - this.header = engine.getSAMFileHeader(); + public void initialize(final GenomeAnalysisEngine engine) { + header = engine.getSAMFileHeader(); + ValidationExclusion validationExclusions = null; + final SAMDataSource rds = engine.getReadsDataSource(); + if (rds != null) { + final ReadProperties rps = rds.getReadsInfo(); + if (rps != null) { + validationExclusions = rps.getValidationExclusionList(); + } + } + if (validationExclusions == null) { + allowNCigars = false; + } else { + allowNCigars = validationExclusions.contains(ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS); + } } - public boolean filterOut(SAMRecord read) { + public boolean filterOut(final SAMRecord read) { // slowly changing the behavior to blow up first and filtering out if a parameter is explicitly provided return !checkInvalidAlignmentStart(read) || !checkInvalidAlignmentEnd(read) || @@ -61,7 +89,8 @@ public class MalformedReadFilter extends ReadFilter { !checkHasReadGroup(read) || !checkMismatchingBasesAndQuals(read, filterMismatchingBaseAndQuals) || !checkCigarDisagreesWithAlignment(read) || - !checkSeqStored(read, filterBasesNotStored); + !checkSeqStored(read, filterBasesNotStored) || + !checkCigarIsSupported(read,filterReadsWithNCigar,allowNCigars); } private static boolean checkHasReadGroup(final SAMRecord read) { @@ -80,7 +109,7 @@ public class MalformedReadFilter extends ReadFilter { * @param read The read to validate. * @return true if read start is valid, false otherwise. */ - private static boolean checkInvalidAlignmentStart( SAMRecord read ) { + private static boolean checkInvalidAlignmentStart(final SAMRecord read ) { // read is not flagged as 'unmapped', but alignment start is NO_ALIGNMENT_START if( !read.getReadUnmappedFlag() && read.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START ) return false; @@ -95,7 +124,7 @@ public class MalformedReadFilter extends ReadFilter { * @param read The read to validate. * @return true if read end is valid, false otherwise. */ - private static boolean checkInvalidAlignmentEnd( SAMRecord read ) { + private static boolean checkInvalidAlignmentEnd(final SAMRecord read ) { // Alignment aligns to negative number of bases in the reference. if( !read.getReadUnmappedFlag() && read.getAlignmentEnd() != -1 && (read.getAlignmentEnd()-read.getAlignmentStart()+1)<0 ) return false; @@ -108,11 +137,11 @@ public class MalformedReadFilter extends ReadFilter { * @param read The read to verify. * @return true if alignment agrees with header, false othrewise. */ - private static boolean checkAlignmentDisagreesWithHeader( SAMFileHeader header, SAMRecord read ) { + private static boolean checkAlignmentDisagreesWithHeader(final SAMFileHeader header, final SAMRecord read ) { // Read is aligned to nonexistent contig if( read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && read.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START ) return false; - SAMSequenceRecord contigHeader = header.getSequence( read.getReferenceIndex() ); + final SAMSequenceRecord contigHeader = header.getSequence( read.getReferenceIndex() ); // Read is aligned to a point after the end of the contig if( !read.getReadUnmappedFlag() && read.getAlignmentStart() > contigHeader.getSequenceLength() ) return false; @@ -124,7 +153,7 @@ public class MalformedReadFilter extends ReadFilter { * @param read The read to validate. * @return true if cigar agrees with alignment, false otherwise. */ - private static boolean checkCigarDisagreesWithAlignment(SAMRecord read) { + private static boolean checkCigarDisagreesWithAlignment(final SAMRecord read) { // Read has a valid alignment start, but the CIGAR string is empty if( !read.getReadUnmappedFlag() && read.getAlignmentStart() != -1 && @@ -134,13 +163,72 @@ public class MalformedReadFilter extends ReadFilter { return true; } + /** + * Check for unsupported CIGAR operators. + * Currently the N operator is not supported. + * @param read The read to validate. + * @param filterReadsWithNCigar whether the offending read should just + * be silently filtered or not. + * @param allowNCigars whether reads that contain N operators in their CIGARs + * can be processed or an exception should be thrown instead. + * @throws UserException.UnsupportedCigarOperatorException + * if {@link #filterReadsWithNCigar} is false and + * the input read has some unsupported operation. + * @return true if the read CIGAR operations are + * fully supported, otherwise false, as long as + * no exception has been thrown. + */ + private static boolean checkCigarIsSupported(final SAMRecord read, final boolean filterReadsWithNCigar, final boolean allowNCigars) { + if( containsNOperator(read)) { + if (! filterReadsWithNCigar && !allowNCigars) { + throw new UserException.UnsupportedCigarOperatorException( + CigarOperator.N,read, + "Perhaps you are" + + " trying to use RNA-Seq data?" + + " While we are currently actively working to" + + " support this data type unfortunately the" + + " GATK cannot be used with this data in its" + + " current form. You have the option of either" + + " filtering out all reads with operator " + + CigarOperator.N + " in their CIGAR string" + + " (please add --" + + FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME + + " to your command line) or" + + " assume the risk of processing those reads as they" + + " are including the pertinent unsafe flag (please add -U" + + ' ' + ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS + + " to your command line). Notice however that if you were" + + " to choose the latter, an unspecified subset of the" + + " analytical outputs of an unspecified subset of the tools" + + " will become unpredictable. Consequently the GATK team" + + " might well not be able to provide you with the usual support" + + " with any issue regarding any output"); + } + return ! filterReadsWithNCigar; + } + return true; + } + + private static boolean containsNOperator(final SAMRecord read) { + final Cigar cigar = read.getCigar(); + if (cigar == null) { + return false; + } + for (final CigarElement ce : cigar.getCigarElements()) { + if (ce.getOperator() == CigarOperator.N) { + return true; + } + } + return false; + } + /** * Check if the read has the same number of bases and base qualities * @param read the read to validate * @return true if they have the same number. False otherwise. */ - private static boolean checkMismatchingBasesAndQuals(SAMRecord read, boolean filterMismatchingBaseAndQuals) { - boolean result; + private static boolean checkMismatchingBasesAndQuals(final SAMRecord read, final boolean filterMismatchingBaseAndQuals) { + final boolean result; if (read.getReadLength() == read.getBaseQualities().length) result = true; else if (filterMismatchingBaseAndQuals) diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 3abe5a7f4..0e95fd158 100644 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.utils.exceptions; +import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; @@ -87,6 +88,19 @@ public class UserException extends ReviewedStingException { } } + public static class UnsupportedCigarOperatorException extends UserException { + public UnsupportedCigarOperatorException(final CigarOperator co, final SAMRecord read, final String message) { + super(String.format( + "Unsupported CIGAR operator %s in read %s at %s:%d. %s", + co, + read.getReadName(), + read.getReferenceName(), + read.getAlignmentStart(), + message)); + } + } + + public static class MalformedGenomeLoc extends UserException { public MalformedGenomeLoc(String message, GenomeLoc loc) { super(String.format("Badly formed genome loc: %s: %s", message, loc)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index 6cfa90d90..b5b82f869 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -131,6 +131,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { final String root = "-T ErrorThrowing -R " + exampleFASTA; final String args = root + cfg.args + " -E " + cfg.expectedException.getSimpleName(); WalkerTestSpec spec = new WalkerTestSpec(args, 0, cfg.expectedException); + executeTest(cfg.toString(), spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/filters/AllowNCigarMalformedReadFilterUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/filters/AllowNCigarMalformedReadFilterUnitTest.java new file mode 100644 index 000000000..d169bf7e9 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/filters/AllowNCigarMalformedReadFilterUnitTest.java @@ -0,0 +1,77 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.filters; + + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.Collections; + + +/** + * Tests for the {@link MalformedReadFilter} when the unsafe flag + * {@link ValidationExclusion.TYPE#ALLOW_N_CIGAR_READS} is set. + * + * @author Valentin Ruano-Rubio + * @since 6/6/13 + */ +public class AllowNCigarMalformedReadFilterUnitTest extends MalformedReadFilterUnitTest { + + + @Override + protected ValidationExclusion composeValidationExclusion() { + return new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS)); + } + + + @Test(enabled = true, + dataProvider= "UnsupportedCigarOperatorDataProvider") + @CigarOperatorTest(CigarOperatorTest.Outcome.IGNORE) + public void testCigarNOperatorFilterIgnore(final String cigarString) { + + final MalformedReadFilter filter = buildMalformedReadFilter(false); + final SAMRecord nContainingCigarRead = buildSAMRecord(cigarString); + Assert.assertFalse(filter.filterOut(nContainingCigarRead), + "filters out N containing Cigar when it should ignore the fact"); + } + + @Test(enabled = false) + @Override + public void testCigarNOperatorFilterException(final String cigarString) { + // Nothing to do here. + // Just deactivates the parents test case. + } + + + + + + + +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/filters/MalformedReadFilterUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/filters/MalformedReadFilterUnitTest.java index 981d54d54..0d8515dde 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/filters/MalformedReadFilterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/filters/MalformedReadFilterUnitTest.java @@ -25,11 +25,25 @@ package org.broadinstitute.sting.gatk.filters; -import org.broadinstitute.sting.utils.exceptions.UserException; + +import net.sf.samtools.Cigar; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.TextCigarCodec; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; +import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.exceptions.UserException.UnsupportedCigarOperatorException; + +import java.lang.annotation.*; +import java.lang.reflect.Method; +import java.util.*; /** @@ -38,14 +52,14 @@ import org.testng.annotations.Test; * @author Eric Banks * @since 3/14/13 */ -public class MalformedReadFilterUnitTest { +public class MalformedReadFilterUnitTest extends ReadFilterTest { ////////////////////////////////////// // Test the checkSeqStored() method // ////////////////////////////////////// @Test(enabled = true) - public void testcheckSeqStored () { + public void testCheckSeqStored () { final GATKSAMRecord goodRead = ArtificialSAMUtils.createArtificialRead(new byte[]{(byte)'A'}, new byte[]{(byte)'A'}, "1M"); final GATKSAMRecord badRead = ArtificialSAMUtils.createArtificialRead(new byte[]{}, new byte[]{}, "1M"); @@ -59,4 +73,174 @@ public class MalformedReadFilterUnitTest { Assert.assertTrue(false, "We should have exceptioned out in the previous line"); } catch (UserException e) { } } + + @Test(enabled = true, dataProvider= "UnsupportedCigarOperatorDataProvider") + @CigarOperatorTest(CigarOperatorTest.Outcome.FILTER) + public void testCigarNOperatorFilterTruePositive(String cigarString) { + + final MalformedReadFilter filter = buildMalformedReadFilter(true); + final SAMRecord nContainingCigarRead = buildSAMRecord(cigarString); + Assert.assertTrue(filter.filterOut(nContainingCigarRead), + " Did not filtered out a N containing CIGAR read"); + } + + @Test(enabled = true, dataProvider= "UnsupportedCigarOperatorDataProvider") + @CigarOperatorTest(CigarOperatorTest.Outcome.ACCEPT) + public void testCigarNOperatorFilterTrueNegative(String cigarString) { + + final MalformedReadFilter filter = buildMalformedReadFilter(true); + final SAMRecord nonNContainingCigarRead = buildSAMRecord(cigarString); + Assert.assertFalse(filter.filterOut(nonNContainingCigarRead), + " Filtered out a non-N containing CIGAR read"); + } + + @Test(enabled = true, + expectedExceptions = UnsupportedCigarOperatorException.class, + dataProvider= "UnsupportedCigarOperatorDataProvider") + @CigarOperatorTest(CigarOperatorTest.Outcome.EXCEPTION) + public void testCigarNOperatorFilterException(final String cigarString) { + + final MalformedReadFilter filter = buildMalformedReadFilter(false); + final SAMRecord nContainingCigarRead = buildSAMRecord(cigarString); + + filter.filterOut(nContainingCigarRead); + } + + @Test(enabled = true, dataProvider="UnsupportedCigarOperatorDataProvider") + @CigarOperatorTest(CigarOperatorTest.Outcome.ACCEPT) + public void testCigarNOperatorFilterControl(final String cigarString) { + + final MalformedReadFilter filter = buildMalformedReadFilter(false); + final SAMRecord nonNContainingCigarRead = buildSAMRecord(cigarString); + + Assert.assertFalse(filter.filterOut(nonNContainingCigarRead)); + } + + protected SAMRecord buildSAMRecord(final String cigarString) { + final Cigar nContainingCigar = TextCigarCodec.getSingleton().decode(cigarString); + return this.createRead(nContainingCigar, 1, 0, 10); + } + + protected MalformedReadFilter buildMalformedReadFilter(final boolean filterRNO) { + return buildMalformedReadFiter(filterRNO,new ValidationExclusion.TYPE[] {}); + } + + protected MalformedReadFilter buildMalformedReadFiter(boolean filterRNO, final ValidationExclusion.TYPE... excl) { + final ValidationExclusion ve = new ValidationExclusion(Arrays.asList(excl)); + + final MalformedReadFilter filter = new MalformedReadFilter(); + + final SAMFileHeader h = getHeader(); + final SAMDataSource ds = getDataSource(); + + final GenomeAnalysisEngine gae = new GenomeAnalysisEngine() { + @Override + public SAMFileHeader getSAMFileHeader() { + return h; + } + + @Override + public SAMDataSource getReadsDataSource() { + return ds; + } + }; + filter.initialize(gae); + filter.filterReadsWithNCigar = filterRNO; + return filter; + } + + @Retention(RetentionPolicy.RUNTIME) + @Target(ElementType.METHOD) + @Inherited + protected @interface CigarOperatorTest { + + enum Outcome { + ANY,ACCEPT,FILTER,EXCEPTION,IGNORE; + + public boolean appliesTo (String cigar) { + boolean hasN = cigar.indexOf('N') != -1; + switch (this) { + case ANY: return true; + case ACCEPT: return !hasN; + case IGNORE: return hasN; + case FILTER: + case EXCEPTION: + default: + return hasN; + + } + } + } + + Outcome value() default Outcome.ANY; + } + + /** + * Cigar test data for unsupported operator test. + * Each element of this array corresponds to a test case. In turn the first element of the test case array is the + * Cigar string for that test case and the second indicates whether it should be filtered due to the presence of a + * unsupported operator + */ + private static final String[] TEST_CIGARS = { + "101M10D20I10M", + "6M14N5M", + "1N", + "101M", + "110N", + "2N4M", + "4M2N", + "3M1I1M", + "1M2I2M", + "1M10N1I1M", + "1M1I1D", + "11N12M1I34M12N" + }; + + @DataProvider(name= "UnsupportedCigarOperatorDataProvider") + public Iterator unsupportedOperatorDataProvider(final Method testMethod) { + final CigarOperatorTest a = resolveCigarOperatorTestAnnotation(testMethod); + final List result = new LinkedList(); + for (final String cigarString : TEST_CIGARS) { + if (a == null || a.value().appliesTo(cigarString)) { + result.add(new Object[] { cigarString }); + } + } + return result.iterator(); + } + + /** + * Gets the most specific {@link CigarOperatorTest} annotation for the + * signature of the test method provided. + *

+ * This in-house implementation is required due to the fact that method + * annotations do not have inheritance. + * + * @param m targeted test method. + * @return null if there is no {@link CigarOperatorTest} + * annotation in this or overridden methods. + */ + private CigarOperatorTest resolveCigarOperatorTestAnnotation(final Method m) { + CigarOperatorTest res = m.getAnnotation(CigarOperatorTest.class); + if (res != null) { + return res; + } + Class c = this.getClass(); + Class p = c.getSuperclass(); + while (p != null && p != Object.class) { + try { + final Method met = p.getDeclaredMethod(m.getName(), + m.getParameterTypes()); + res = met.getAnnotation(CigarOperatorTest.class); + if (res != null) { + break; + } + } catch (NoSuchMethodException e) { + // Its ok; nothing to do here, just keep looking. + } + c = p; + p = c.getSuperclass(); + } + return res; + } + } diff --git a/public/java/test/org/broadinstitute/sting/gatk/filters/ReadFilterTest.java b/public/java/test/org/broadinstitute/sting/gatk/filters/ReadFilterTest.java new file mode 100644 index 000000000..5b6f67c42 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/filters/ReadFilterTest.java @@ -0,0 +1,370 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.filters; + +import net.sf.samtools.*; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; +import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod; +import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; + +import java.util.*; + +/** + * Class ReadBaseTest + *

+ * This is the base test class for read filter test classes. All read + * filter test cases should extend from this + * class; it sets ups a header mock up to test read filtering. + * + * Feel free to override non-final method to modify the behavior + * (i.e. change how read group id are formatted, or complete a header). + * + *

+ * You can statically determine the number of read-group involved + * in the test by calling {@link #ReadFilterTest(int)} in you constructor. + *

+ * + * Notice that the same header object is shared by all test and + * it is initialized by Junit (calling {@link #beforeClass()}. + * + * @author Valentin Ruano Rubio + * @date May 23, 2013 + */ +public class ReadFilterTest extends BaseTest { + + private static final int DEFAULT_READ_GROUP_COUNT = 5; + private static final int DEFAULT_READER_COUNT = 1; + private static final String DEFAULT_READ_GROUP_PREFIX = "ReadGroup"; + private static final String DEFAULT_PLATFORM_UNIT_PREFIX = "Lane"; + private static final String DEFAULT_SAMPLE_NAME_PREFIX = "Sample"; + private static final String DEFAULT_PLATFORM_PREFIX = "Platform"; + private static final int DEFAULT_CHROMOSOME_COUNT = 1; + private static final int DEFAULT_CHROMOSOME_START_INDEX = 1; + private static final int DEFAULT_CHROMOSOME_SIZE = 1000; + private static final String DEFAULT_SAM_FILE_FORMAT = "readfile-%3d.bam"; + + private final int groupCount; + + private SAMFileHeader header; + + private SAMDataSource dataSource; + + /** + * Constructs a new read-filter test providing the number of read + * groups in the file. + * + * @param groupCount number of read-group in the fictional SAM file, + * must be equal or greater than 1. + */ + protected ReadFilterTest(final int groupCount) { + if (groupCount < 1) { + throw new IllegalArgumentException( + "the read group count must at least be 1"); + } + this.groupCount = groupCount; + } + + + /** + * Gets the data source. + * + * @throws IllegalStateException if the data source was not initialized + * invoking {@link #beforeClass()} + * @return never null + */ + protected final SAMDataSource getDataSource() { + checkDataSourceExists(); + return dataSource; + } + + /** + * Returns the mock-up SAM file header for testing. + * + * @throws IllegalStateException if the header was not initialized + * invoking {@link #beforeClass()} + * @return never null + */ + protected final SAMFileHeader getHeader() { + checkHeaderExists(); + return header; + } + + /** + * Construct a read filter test with the default number of groups + * ({@link #DEFAULT_READ_GROUP_COUNT}. + */ + public ReadFilterTest() { + this(DEFAULT_READ_GROUP_COUNT); + } + + /** + * Return the number of read groups involved in the test + * @return 1 or greater. + */ + protected final int getReadGroupCount() { + return groupCount; + } + + /** + * Composes the Id for the read group given its index. + * + * This methods must return a unique distinct ID for each possible index and + * it must be the same value each time it is invoked. + * + * @param index the index of the targeted read group in the range + * [1,{@link #getReadGroupCount()}] + * @return never null and must be unique to each possible + * read group index. + */ + protected String composeReadGroupId(final int index) { + checkReadGroupIndex(index); + return DEFAULT_READ_GROUP_PREFIX + index; + } + + /** + * Composes the Platform name for the read group given its index. + * + * This method must always return the same value give an index. + * + * @param index the index of the targeted read group in the range + * [1,{@link #getReadGroupCount()}] + * @return never null. + */ + protected String composePlatformName(final int index) { + checkReadGroupIndex(index); + return DEFAULT_PLATFORM_PREFIX + (((index-1)%2)+1); + } + + + /** + * Composes the Platform unit name for the read group given its index. + * + * @param index the index of the targeted read group in the range + * [1,{@link #getReadGroupCount()}] + * @return never null. + */ + protected String composePlatformUnitName(final int index) { + checkReadGroupIndex(index); + return DEFAULT_PLATFORM_UNIT_PREFIX + (((index-1)%3)+1); + } + + + + /** + * Checks the correctness of a given read group index. + * + * A correct index is any value in the range [1,{@link #getReadGroupCount()}]. + * + * @param index the target index. + * @throws IllegalArgumentException if the input index is not correct. + */ + protected final void checkReadGroupIndex(final int index) { + checkIndex(index,groupCount,"read group"); + } + + + private void checkIndex(final int index, final int max, CharSequence name) { + if (index < 1 || index > max) { + throw new IllegalArgumentException( + name + " index (" + + index + + ") is out of bounds [1," + max + "]"); + } + } + + + /** + * Checks whether the header was initialized. + * + * @throws IllegalStateException if the header was not yet initialized. + */ + protected final void checkHeaderExists() { + if (header == null) { + throw new IllegalArgumentException( + "header has not been initialized;" + + " beforeClass() was not invoked"); + } + } + + /** + * Checks whether the data source was initialized. + * + * @throws IllegalStateException if the data source was not yet initialized. + */ + protected final void checkDataSourceExists() { + if (header == null) { + throw new IllegalArgumentException( + "data source has not been initialized;" + + " beforeClass() was not invoked"); + } + } + + /** + * Returns the ID for a read group given its index. + * + * @param index the index of the targeted read group in the range + * [1,{@link #getReadGroupCount()}] + * @return never null and must be unique to each + * possible read group index. + */ + protected final String getReadGroupId(final int index) { + checkReadGroupIndex(index); + return getHeader().getReadGroups().get(index - 1).getReadGroupId(); + } + + /** + * Returns the platform name for a read group given its index. + * + * @param group the index of the targeted read group in the range + * [1,{@link #getReadGroupCount()}] + * @return never null. + */ + protected final String getPlatformName(final int group) { + checkReadGroupIndex(group); + return getHeader().getReadGroups().get(group - 1).getPlatform(); + } + + /** + * Returns the platform unit for a read group given its index. + * + * @param group the index of the targeted read group in the range + * [1,{@link #getReadGroupCount()}] + * @return never null. + */ + protected final String getPlatformUnit(final int group) { + checkReadGroupIndex(group); + return getHeader().getReadGroups().get(group - 1).getPlatformUnit(); + } + + + /** + * Composes the mock up SAM file header. + * + * It must return an equivalent (equal) value each time it is invoked. + * + * @return never null. + */ + protected SAMFileHeader composeHeader() { + + return ArtificialSAMUtils.createArtificialSamHeader( + DEFAULT_CHROMOSOME_COUNT, DEFAULT_CHROMOSOME_START_INDEX, + DEFAULT_CHROMOSOME_SIZE); + } + + @BeforeClass + public void beforeClass() { + + header = composeHeader(); + dataSource = composeDataSource(); + final List readGroupIDs = new ArrayList(); + final List sampleNames = new ArrayList(); + + for (int i = 1; i <= getReadGroupCount(); i++) { + final String readGroupId = composeReadGroupId(i); + readGroupIDs.add(readGroupId); + sampleNames.add(readGroupId); + } + + ArtificialSAMUtils.createEnumeratedReadGroups( + header, readGroupIDs, sampleNames); + + for (int i = 1; i <= getReadGroupCount(); i++) { + final String readGroupId = readGroupIDs.get(i-1); + final SAMReadGroupRecord groupRecord = header.getReadGroup(readGroupId); + groupRecord.setAttribute("PL", composePlatformName(i)); + groupRecord.setAttribute("PU", composePlatformUnitName(i)); + } + + } + + protected ValidationExclusion composeValidationExclusion() { + return new ValidationExclusion(); + } + + protected SAMDataSource composeDataSource() { + checkHeaderExists(); + final Set readerIDs = new HashSet<>(1); + final ThreadAllocation ta = new ThreadAllocation(); + final Integer numFileHandles = 1; // I believe that any value would do but need to confirm. + final boolean useOriginalBaseQualities = true; + final SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.LENIENT; + final Integer readBufferSize = 1; // not relevant. + final DownsamplingMethod downsamplingMethod = DownsamplingMethod.NONE; + final ValidationExclusion exclusionList = composeValidationExclusion(); + final Collection supplementalFilters = Collections.EMPTY_SET; + final boolean includeReadsWithDeletionAtLoci = true; + + final GenomeLocParser glp = new GenomeLocParser(header.getSequenceDictionary()); + final SAMDataSource res = new SAMDataSource( + readerIDs, + ta, + numFileHandles, + glp, + useOriginalBaseQualities, + strictness, + readBufferSize, + downsamplingMethod, + exclusionList, + supplementalFilters, + includeReadsWithDeletionAtLoci); + + return res; + } + + @AfterClass + public void afterClass() { + header = null; + dataSource = null; + } + + /** + * Creates a read record. + * + * @param cigar the new record CIGAR. + * @param group the new record group index that must be in the range \ + * [1,{@link #getReadGroupCount()}] + * @param reference the reference sequence index (0-based) + * @param start the start position of the read alignment in the reference + * (1-based) + * @return never null + */ + protected SAMRecord createRead(final Cigar cigar, final int group, final int reference, final int start) { + final SAMRecord record = ArtificialSAMUtils.createArtificialRead(cigar); + record.setHeader(getHeader()); + record.setAlignmentStart(start); + record.setReferenceIndex(reference); + record.setAttribute(SAMTag.RG.toString(), getReadGroupId(group)); + return record; + + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilterUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilterUnitTest.java index 1370aeb50..1be31b293 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilterUnitTest.java @@ -26,13 +26,10 @@ package org.broadinstitute.sting.gatk.filters; import org.testng.Assert; -import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMReadGroupRecord; @@ -40,34 +37,7 @@ import java.util.List; import java.util.ArrayList; import java.util.Collections; -public class ReadGroupBlackListFilterUnitTest extends BaseTest { - private static final int READ_GROUP_COUNT = 5; - private static final String READ_GROUP_PREFIX = "ReadGroup"; - private static final String SAMPLE_NAME_PREFIX = "Sample"; - private static final String PLATFORM_PREFIX = "Platform"; - private static final String PLATFORM_UNIT_PREFIX = "Lane"; - private static SAMFileHeader header; - - @BeforeClass - public void beforeClass() { - header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); - - List readGroupIDs = new ArrayList(); - List sampleNames = new ArrayList(); - - for (int i = 1; i <= READ_GROUP_COUNT; i++) { - readGroupIDs.add(READ_GROUP_PREFIX + i); - sampleNames.add(SAMPLE_NAME_PREFIX + i); - } - - ArtificialSAMUtils.createEnumeratedReadGroups(header, readGroupIDs, sampleNames); - - for (int i = 1; i <= READ_GROUP_COUNT; i++) { - SAMReadGroupRecord groupRecord = header.getReadGroup(READ_GROUP_PREFIX + i); - groupRecord.setAttribute("PL", PLATFORM_PREFIX + (((i-1)%2)+1)); - groupRecord.setAttribute("PU", PLATFORM_UNIT_PREFIX + (((i-1)%3)+1)); - } - } +public class ReadGroupBlackListFilterUnitTest extends ReadFilterTest { @Test(expectedExceptions=ReviewedStingException.class) public void testBadFilter() { @@ -88,14 +58,14 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest { @Test public void testFilterReadGroup() { - SAMRecord filteredRecord = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 1, 20); - filteredRecord.setAttribute("RG", READ_GROUP_PREFIX + "1"); + SAMRecord filteredRecord = ArtificialSAMUtils.createArtificialRead(getHeader(), "readUno", 0, 1, 20); + filteredRecord.setAttribute("RG", getReadGroupId(1)); - SAMRecord unfilteredRecord = ArtificialSAMUtils.createArtificialRead(header, "readDos", 0, 2, 20); - unfilteredRecord.setAttribute("RG", READ_GROUP_PREFIX + "2"); + SAMRecord unfilteredRecord = ArtificialSAMUtils.createArtificialRead(getHeader(), "readDos", 0, 2, 20); + unfilteredRecord.setAttribute("RG", getReadGroupId(2)); List filterList = new ArrayList(); - filterList.add("RG:" + READ_GROUP_PREFIX + "1"); + filterList.add("RG:" + getReadGroupId(1)); ReadGroupBlackListFilter filter = new ReadGroupBlackListFilter(filterList); Assert.assertTrue(filter.filterOut(filteredRecord)); @@ -104,14 +74,14 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest { @Test public void testFilterPlatformUnit() { - SAMRecord filteredRecord = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 1, 20); - filteredRecord.setAttribute("RG", READ_GROUP_PREFIX + "1"); + SAMRecord filteredRecord = ArtificialSAMUtils.createArtificialRead(getHeader(), "readUno", 0, 1, 20); + filteredRecord.setAttribute("RG", getReadGroupId(1)); - SAMRecord unfilteredRecord = ArtificialSAMUtils.createArtificialRead(header, "readDos", 0, 2, 20); - unfilteredRecord.setAttribute("RG", READ_GROUP_PREFIX + "2"); + SAMRecord unfilteredRecord = ArtificialSAMUtils.createArtificialRead(getHeader(), "readDos", 0, 2, 20); + unfilteredRecord.setAttribute("RG", getReadGroupId(2)); List filterList = new ArrayList(); - filterList.add("PU:" + PLATFORM_UNIT_PREFIX + "1"); + filterList.add("PU:" + getPlatformUnit(1)); ReadGroupBlackListFilter filter = new ReadGroupBlackListFilter(filterList); Assert.assertTrue(filter.filterOut(filteredRecord)); @@ -123,18 +93,18 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest { int recordsPerGroup = 3; List records = new ArrayList(); int alignmentStart = 0; - for (int x = 1; x <= READ_GROUP_COUNT; x++) { - SAMReadGroupRecord groupRecord = header.getReadGroup(READ_GROUP_PREFIX + x); + for (int x = 1; x <= getReadGroupCount(); x++) { + SAMReadGroupRecord groupRecord = getHeader().getReadGroup(getReadGroupId(x)); for (int y = 1; y <= recordsPerGroup; y++) { - SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, ++alignmentStart, 20); + SAMRecord record = ArtificialSAMUtils.createArtificialRead(getHeader(), "readUno", 0, ++alignmentStart, 20); record.setAttribute("RG", groupRecord.getReadGroupId()); records.add(record); } } List filterList = new ArrayList(); - filterList.add("RG:" + READ_GROUP_PREFIX + "1"); - filterList.add("RG:" + READ_GROUP_PREFIX + "3"); + filterList.add("RG:" + getReadGroupId(1)); + filterList.add("RG:" + getReadGroupId(3)); ReadGroupBlackListFilter filter = new ReadGroupBlackListFilter(filterList); int filtered = 0; @@ -153,7 +123,7 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest { } int filteredExpected = recordsPerGroup * 2; - int unfilteredExpected = recordsPerGroup * (READ_GROUP_COUNT - 2); + int unfilteredExpected = recordsPerGroup * (getReadGroupCount() - 2); Assert.assertEquals(filtered, filteredExpected, "Filtered"); Assert.assertEquals(unfiltered, unfilteredExpected, "Uniltered"); } @@ -163,17 +133,17 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest { int recordsPerGroup = 3; List records = new ArrayList(); int alignmentStart = 0; - for (int x = 1; x <= READ_GROUP_COUNT; x++) { - SAMReadGroupRecord groupRecord = header.getReadGroup(READ_GROUP_PREFIX + x); + for (int x = 1; x <= getReadGroupCount(); x++) { + SAMReadGroupRecord groupRecord = getHeader().getReadGroup(getReadGroupId(x)); for (int y = 1; y <= recordsPerGroup; y++) { - SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, ++alignmentStart, 20); + SAMRecord record = ArtificialSAMUtils.createArtificialRead(getHeader(), "readUno", 0, ++alignmentStart, 20); record.setAttribute("RG", groupRecord.getReadGroupId()); records.add(record); } } List filterList = new ArrayList(); - filterList.add("PU:" + PLATFORM_UNIT_PREFIX + "1"); + filterList.add("PU:" + getPlatformUnit(1)); ReadGroupBlackListFilter filter = new ReadGroupBlackListFilter(filterList); int filtered = 0; @@ -202,10 +172,10 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest { int recordsPerGroup = 3; List records = new ArrayList(); int alignmentStart = 0; - for (int x = 1; x <= READ_GROUP_COUNT; x++) { - SAMReadGroupRecord groupRecord = header.getReadGroup(READ_GROUP_PREFIX + x); + for (int x = 1; x <= getReadGroupCount(); x++) { + SAMReadGroupRecord groupRecord = getHeader().getReadGroup(getReadGroupId(x)); for (int y = 1; y <= recordsPerGroup; y++) { - SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, ++alignmentStart, 20); + SAMRecord record = ArtificialSAMUtils.createArtificialRead(getHeader(), "readUno", 0, ++alignmentStart, 20); record.setAttribute("RG", groupRecord.getReadGroupId()); records.add(record); } @@ -231,7 +201,7 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest { } int filteredExpected = recordsPerGroup * 2; - int unfilteredExpected = recordsPerGroup * (READ_GROUP_COUNT - 2); + int unfilteredExpected = recordsPerGroup * (getReadGroupCount() - 2); Assert.assertEquals(filtered, filteredExpected, "Filtered"); Assert.assertEquals(unfiltered, unfilteredExpected, "Uniltered"); } @@ -241,10 +211,10 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest { int recordsPerGroup = 3; List records = new ArrayList(); int alignmentStart = 0; - for (int x = 1; x <= READ_GROUP_COUNT; x++) { - SAMReadGroupRecord groupRecord = header.getReadGroup(READ_GROUP_PREFIX + x); + for (int x = 1; x <= getReadGroupCount(); x++) { + SAMReadGroupRecord groupRecord = getHeader().getReadGroup(getReadGroupId(x)); for (int y = 1; y <= recordsPerGroup; y++) { - SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, ++alignmentStart, 20); + SAMRecord record = ArtificialSAMUtils.createArtificialRead(getHeader(), "readUno", 0, ++alignmentStart, 20); record.setAttribute("RG", groupRecord.getReadGroupId()); records.add(record); } @@ -270,7 +240,7 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest { } int filteredExpected = recordsPerGroup * 2; - int unfilteredExpected = recordsPerGroup * (READ_GROUP_COUNT - 2); + int unfilteredExpected = recordsPerGroup * (getReadGroupCount() - 2); Assert.assertEquals(filtered, filteredExpected, "Filtered"); Assert.assertEquals(unfiltered, unfilteredExpected, "Uniltered"); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/filters/UnsafeMalformedReadFilterUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/filters/UnsafeMalformedReadFilterUnitTest.java new file mode 100644 index 000000000..30e2f0f1b --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/filters/UnsafeMalformedReadFilterUnitTest.java @@ -0,0 +1,50 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.filters; + + +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; + +import java.util.Collections; + + +/** + * Tests for the {@link MalformedReadFilter} when the unsafe flag + * {@link ValidationExclusion.TYPE#ALL} is set. + * + * @author Valentin Ruano-Rubio + * @since 6/6/13 + */ +public class UnsafeMalformedReadFilterUnitTest extends AllowNCigarMalformedReadFilterUnitTest { + + + @Override + protected ValidationExclusion composeValidationExclusion() { + return new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)); + } + + +} From a95fbd48e5712b4785ed3a54a8daadd21729c22f Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 10 Jun 2013 13:10:32 -0400 Subject: [PATCH 074/116] Moving QualifyMissingIntervals to protected Making this walker available so we can share it with the CSER group for CLIA analysis. --- .../walkers/diagnostics/missing/Metrics.java | 110 +++++++++ .../missing/QualifyMissingIntervals.java | 226 ++++++++++++++++++ 2 files changed, 336 insertions(+) create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/Metrics.java create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/Metrics.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/Metrics.java new file mode 100644 index 000000000..5e3da5f4f --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/Metrics.java @@ -0,0 +1,110 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.diagnostics.missing; + +/** + * Short one line description of the walker. + *

+ *

+ * [Long description of the walker] + *

+ *

+ *

+ *

Input

+ *

+ * [Description of the Input] + *

+ *

+ *

Output

+ *

+ * [Description of the Output] + *

+ *

+ *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T [walker name]
+ *  
+ * + * @author Mauricio Carneiro + * @since 5/1/13 + */ +final class Metrics { + private double gccontent; + private double baseQual; + private double mapQual; + private int reads; + private int refs; + + void reads(int reads) {this.reads = reads;} + void refs(int refs) {this.refs = refs;} + + void gccontent(double gccontent) {this.gccontent = gccontent;} + void baseQual(double baseQual) {this.baseQual = baseQual;} + void mapQual(double mapQual) {this.mapQual = mapQual;} + + double gccontent() {return refs > 0 ? gccontent/refs : 0.0;} + double baseQual() {return reads > 0 ? baseQual/reads : 0.0;} + double mapQual() {return reads > 0 ? mapQual/reads : 0.0;} + + /** + * Combines two metrics + * + * @param value the other metric to combine + * @return itself, for simple reduce + */ + public Metrics combine(Metrics value) { + this.gccontent += value.gccontent; + this.baseQual += value.baseQual; + this.mapQual += value.mapQual; + this.reads += value.reads; + this.refs += value.refs; + + return this; + } +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java new file mode 100644 index 000000000..62716d6d2 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java @@ -0,0 +1,226 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.diagnostics.missing; + +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.walkers.By; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.NanoSchedulable; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.help.HelpConstants; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.text.XReadLines; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; +import java.util.List; + +/** + * Walks along reference and calculates a few metrics for each interval. + * + * Metrics: + *
    + *
  • Average Base Quality
  • + *
  • Average Mapping Quality
  • + *
  • GC Content
  • + *
  • Position in the target
  • + *
  • Coding Sequence / Intron
  • + *
  • Length of the uncovered area
  • + *
+ * + *

Input

+ *

+ * A reference file + *

+ * + *

Output

+ *

+ * GC content calculations per interval. + *

+ * + *

Example

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -T QualifyMissingIntervals \
+ *   -R ref.fasta \
+ *   -o output.grp \
+ *   -L input.intervals \
+ *   -cds cds.intervals \
+ *   -targets targets.intervals
+ * 
+ * + */ +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) +@By(DataSource.REFERENCE) +public final class QualifyMissingIntervals extends LocusWalker implements NanoSchedulable { + @Output + protected PrintStream out; + + @Argument(shortName = "targets", required = true) + public File targetsFile; + + @Argument(shortName = "cds", required = false) + public File cdsFile; + + GATKReport simpleReport; + GenomeLocSortedSet target; + GenomeLocSortedSet cds; + + public boolean isReduceByInterval() { + return true; + } + + public void initialize() { + simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "IN", "GC", "BQ", "MQ", "TP", "CD", "LN"); + final GenomeLocParser parser = getToolkit().getGenomeLocParser(); + target = new GenomeLocSortedSet(parser); + cds = new GenomeLocSortedSet(parser); + parseFile(targetsFile, target, parser); + parseFile(cdsFile, cds, parser); + } + + public Metrics reduceInit() { + return new Metrics(); + } + + public Metrics map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (tracker == null) + return null; + + final Metrics metrics = new Metrics(); + final byte baseIndex = ref.getBase(); + final ReadBackedPileup pileup = context.getBasePileup(); + final int nBases = pileup.getNumberOfElements(); + + double baseQual = 0.0; + for (byte qual : pileup.getQuals()) { + baseQual += qual; + } + double mapQual = 0.0; + for (byte qual : pileup.getMappingQuals()) { + mapQual += qual; + } + + metrics.baseQual(baseQual); + metrics.mapQual(mapQual); + metrics.gccontent(baseIndex == 'C' || baseIndex == 'G' ? 1.0 : 0.0); + metrics.reads(nBases); + metrics.refs(1); + + return metrics; + } + + @Override + public Metrics reduce(Metrics value, Metrics sum) { + return sum.combine(value); + } + + public void onTraversalDone(List> results) { + for (Pair r : results) { + GenomeLoc interval = r.getFirst(); + Metrics metrics = r.getSecond(); + simpleReport.addRow( + interval.toString(), + metrics.gccontent(), + metrics.baseQual(), + metrics.mapQual(), + getPositionInTarget(interval), + cds.overlaps(interval), + interval.size() + ); + } + simpleReport.print(out); + out.close(); + } + + private static GenomeLoc parseInterval(String s, GenomeLocParser parser) { + if (s.isEmpty()) { + return null; + } + String[] first = s.split(":"); + if (first.length == 2) { + String[] second = first[1].split("\\-"); + return parser.createGenomeLoc(first[0], Integer.decode(second[0]), Integer.decode(second[1])); + } else { + throw new UserException.BadInput("Interval doesn't parse correctly: " + s); + } + } + + private void parseFile(File file, GenomeLocSortedSet set, GenomeLocParser parser) { + try { + for (String s : new XReadLines(file) ) { + GenomeLoc interval = parseInterval(s, parser); + if (interval != null) + set.add(interval, true); + } + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + } + + private int getPositionInTarget(GenomeLoc interval) { + final List hits = target.getOverlapping(interval); + int result = 0; + for (GenomeLoc hit : hits) { + result = interval.getStart() - hit.getStart(); // if there are multiple hits, we'll get the last one. + } + return result; + } +} From 0d593cff70ece0fae75ed149f6819a5eeeaf9a2b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 6 Jun 2013 14:32:47 -0400 Subject: [PATCH 075/116] Refactor rsID and overlap detection in VariantOverlapAnnotator utility class -- Variants will be considered matching if they have the same reference allele and at least 1 common alternative allele. This matching algorithm determines how rsID are added back into the VariantContext we want to annotate, and as well determining the overlap FLAG attribute field. -- Updated VariantAnnotator and VariantsToVCF to use this class, removing its old stale implementation -- Added unit tests for this VariantOverlapAnnotator class -- Removed GATKVCFUtils.rsIDOfFirstRealVariant as this is now better to use VariantOverlapAnnotator -- Now requires strict allele matching, without any option to just use site annotation. --- .../VariantOverlapAnnotatorUnitTest.java | 164 +++++++++++++ ...dGenotyperIndelCallingIntegrationTest.java | 12 +- ...GenotyperNormalCallingIntegrationTest.java | 2 +- .../walkers/annotator/VariantAnnotator.java | 4 - .../annotator/VariantAnnotatorEngine.java | 115 ++------- .../annotator/VariantOverlapAnnotator.java | 224 ++++++++++++++++++ .../walkers/variantutils/VariantsToVCF.java | 11 +- .../sting/utils/variant/GATKVCFUtils.java | 15 -- 8 files changed, 425 insertions(+), 122 deletions(-) create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantOverlapAnnotatorUnitTest.java create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantOverlapAnnotator.java diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantOverlapAnnotatorUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantOverlapAnnotatorUnitTest.java new file mode 100644 index 000000000..6d6761f1c --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantOverlapAnnotatorUnitTest.java @@ -0,0 +1,164 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.vcf.VCFConstants; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.*; + +public class VariantOverlapAnnotatorUnitTest extends BaseTest { + private GenomeLocParser genomeLocParser; + private IndexedFastaSequenceFile seq; + + @BeforeClass + public void setup() throws FileNotFoundException { + // sequence + seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); + genomeLocParser = new GenomeLocParser(seq); + } + + private VariantContext makeVC(final String source, final String id, final List alleles) { + final VariantContext vc = GATKVariantContextUtils.makeFromAlleles(source, "20", 10, alleles); + return new VariantContextBuilder(vc).id(id).make(); + } + + private VariantOverlapAnnotator makeAnnotator(final String dbSNP, final String ... overlaps) { + final RodBinding dbSNPBinding = dbSNP == null ? null : new RodBinding<>(VariantContext.class, dbSNP); + final Map, String> overlapBinding = new LinkedHashMap<>(); + for ( final String overlap : overlaps ) overlapBinding.put(new RodBinding<>(VariantContext.class, overlap), overlap); + return new VariantOverlapAnnotator(dbSNPBinding, overlapBinding, genomeLocParser); + } + + @Test + public void testCreateWithSpecialNames() { + final List names = Arrays.asList("X", "Y", "Z"); + final Map, String> overlapBinding = new LinkedHashMap<>(); + for ( final String overlap : names ) overlapBinding.put(new RodBinding<>(VariantContext.class, overlap + "Binding"), overlap); + final VariantOverlapAnnotator annotator = new VariantOverlapAnnotator(null, overlapBinding, genomeLocParser); + Assert.assertEquals(annotator.getOverlapNames(), names); + } + + @DataProvider(name = "AnnotateRsIDData") + public Object[][] makeAnnotateRsIDData() { + List tests = new ArrayList<>(); + + // this functionality can be adapted to provide input data for whatever you might want in your data + final VariantContext callNoIDAC = makeVC("call", VCFConstants.EMPTY_ID_FIELD, Arrays.asList("A", "C")); + final VariantContext callNoIDAT = makeVC("call", VCFConstants.EMPTY_ID_FIELD, Arrays.asList("A", "T")); + final VariantContext callIDAC = makeVC("call", "foo", Arrays.asList("A", "C")); + final VariantContext callExistingIDAC = makeVC("call", "rsID1", Arrays.asList("A", "C")); + + final VariantContext dbSNP_AC = makeVC("DBSNP", "rsID1", Arrays.asList("A", "C")); + final VariantContext dbSNP_AT = makeVC("DBSNP", "rsID2", Arrays.asList("A", "T")); + final VariantContext dbSNP_AG = makeVC("DBSNP", "rsID3", Arrays.asList("A", "G")); + final VariantContext dbSNP_AC_AT = makeVC("DBSNP", "rsID1;rsID2", Arrays.asList("A", "C", "T")); + final VariantContext dbSNP_AC_AG = makeVC("DBSNP", "rsID1;rsID3", Arrays.asList("A", "C", "G")); + + tests.add(new Object[]{callNoIDAC, Arrays.asList(dbSNP_AC), dbSNP_AC.getID(), true}); + tests.add(new Object[]{callNoIDAC, Arrays.asList(dbSNP_AT), VCFConstants.EMPTY_ID_FIELD, false}); + tests.add(new Object[]{callIDAC, Arrays.asList(dbSNP_AC), "foo" + ";" + dbSNP_AC.getID(), true}); + tests.add(new Object[]{callIDAC, Arrays.asList(dbSNP_AT), "foo", false}); + tests.add(new Object[]{callExistingIDAC, Arrays.asList(dbSNP_AC), "rsID1", true}); + tests.add(new Object[]{callExistingIDAC, Arrays.asList(dbSNP_AT), "rsID1", false}); + + final VariantContext callNoIDACT = makeVC("call", VCFConstants.EMPTY_ID_FIELD, Arrays.asList("A", "C", "T")); + tests.add(new Object[]{callNoIDACT, Arrays.asList(dbSNP_AC), dbSNP_AC.getID(), true}); + tests.add(new Object[]{callNoIDACT, Arrays.asList(dbSNP_AT), dbSNP_AT.getID(), true}); + tests.add(new Object[]{callNoIDACT, Arrays.asList(dbSNP_AG), VCFConstants.EMPTY_ID_FIELD, false}); + tests.add(new Object[]{callNoIDACT, Arrays.asList(dbSNP_AC_AT), dbSNP_AC_AT.getID(), true}); + tests.add(new Object[]{callNoIDACT, Arrays.asList(dbSNP_AC_AG), dbSNP_AC_AG.getID(), true}); + + // multiple options + tests.add(new Object[]{callNoIDAC, Arrays.asList(dbSNP_AC, dbSNP_AT), "rsID1", true}); + tests.add(new Object[]{callNoIDAC, Arrays.asList(dbSNP_AT, dbSNP_AC), "rsID1", true}); + tests.add(new Object[]{callNoIDAC, Arrays.asList(dbSNP_AC_AT), "rsID1;rsID2", true}); + tests.add(new Object[]{callNoIDAT, Arrays.asList(dbSNP_AC_AT), "rsID1;rsID2", true}); + tests.add(new Object[]{callNoIDAC, Arrays.asList(dbSNP_AC_AG), "rsID1;rsID3", true}); + tests.add(new Object[]{callNoIDAT, Arrays.asList(dbSNP_AC_AG), VCFConstants.EMPTY_ID_FIELD, false}); + + final VariantContext dbSNP_AC_FAIL = new VariantContextBuilder(makeVC("DBSNP", "rsID1", Arrays.asList("A", "C"))).filter("FAIL").make(); + tests.add(new Object[]{callNoIDAC, Arrays.asList(dbSNP_AC_FAIL), VCFConstants.EMPTY_ID_FIELD, false}); + + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "AnnotateRsIDData") + public void testAnnotateRsID(final VariantContext toAnnotate, final List dbSNPRecords, final String expectedID, final boolean expectOverlap) throws Exception { + final VariantOverlapAnnotator annotator = makeAnnotator("dbnsp"); + final VariantContext annotated = annotator.annotateRsID(dbSNPRecords, toAnnotate); + Assert.assertNotNull(annotated); + Assert.assertEquals(annotated.getID(), expectedID); + } + + @Test(dataProvider = "AnnotateRsIDData") + public void testAnnotateOverlaps(final VariantContext toAnnotate, final List records, final String expectedID, final boolean expectOverlap) throws Exception { + final String name = "binding"; + final VariantOverlapAnnotator annotator = makeAnnotator(null, name); + final VariantContext annotated = annotator.annotateOverlap(records, name, toAnnotate); + Assert.assertNotNull(annotated); + Assert.assertEquals(annotated.getID(), toAnnotate.getID(), "Shouldn't modify annotation"); + Assert.assertEquals(annotated.hasAttribute(name), expectOverlap); + if ( expectOverlap ) { + Assert.assertEquals(annotated.getAttribute(name), true); + } + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index 856e97ebe..98a482c6f 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("19f77f557150905ef3fa4713f611a1b9")); + Arrays.asList("14ad6eeed46e9b6f4757370267b1a1cc")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -101,7 +101,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("bb3dbad9666ebf38d338f0c9c211a42e")); + Arrays.asList("cd184a2a5a1932dcf3e8f0424652176b")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -111,7 +111,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("8052390ca2b6a57c3ddf379a51225d64")); + Arrays.asList("e8d98996eb81ece8cfb52437920ae2e0")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -121,7 +121,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("b6b9dba97fbabaeeb458a41051983e7b")); + Arrays.asList("23a78c16f64bffe1dea3a5587fcabdad")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -136,7 +136,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, - Arrays.asList("38730c7030271f5d0ca0b59365d57814")); + Arrays.asList("294183823d678d3668f4fa98b4de6e06")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -176,7 +176,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("264325878b988acc11d8e5d9d2ba0b7f")); + Arrays.asList("e90256acfc360fc4bf377094732a673a")); executeTest("test minIndelFraction 0.0", spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index a52176a08..bf4316415 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -80,7 +80,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testWithAllelesPassedIn2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("698e54aeae3130779d246b9480a4052c")); + Arrays.asList("60115af273fde49c76d4df6c9c0f6501")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index f2bd6c14c..10ba4ca17 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -180,9 +180,6 @@ public class VariantAnnotator extends RodWalker implements Ann @Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality threshold in order to annotate mendelian violation ratio") public double minGenotypeQualityP = 0.0; - @Argument(fullName="requireStrictAlleleMatch", shortName="strict", doc="If provided only comp tracks that exactly match both reference and alternate alleles will be counted as concordant", required=false) - protected boolean requireStrictAlleleMatch = false; - private VariantAnnotatorEngine engine; /** @@ -204,7 +201,6 @@ public class VariantAnnotator extends RodWalker implements Ann else engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, annotationsToExclude, this, getToolkit()); engine.initializeExpressions(expressionsToUse); - engine.setRequireStrictAlleleMatch(requireStrictAlleleMatch); // setup the header fields // note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 695868bb1..90050a10a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -34,26 +34,23 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.variant.GATKVCFUtils; -import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.vcf.*; import java.util.*; public class VariantAnnotatorEngine { - private List requestedInfoAnnotations = Collections.emptyList(); private List requestedGenotypeAnnotations = Collections.emptyList(); - private List requestedExpressions = new ArrayList(); + private List requestedExpressions = new ArrayList<>(); - private final HashMap, String> dbAnnotations = new HashMap, String>(); private final AnnotatorCompatible walker; private final GenomeAnalysisEngine toolkit; - private boolean requireStrictAlleleMatch = false; + VariantOverlapAnnotator variantOverlapAnnotator = null; protected static class VAExpression { @@ -85,7 +82,7 @@ public class VariantAnnotatorEngine { requestedInfoAnnotations = AnnotationInterfaceManager.createAllInfoFieldAnnotations(); requestedGenotypeAnnotations = AnnotationInterfaceManager.createAllGenotypeAnnotations(); excludeAnnotations(annotationsToExclude); - initializeDBs(); + initializeDBs(toolkit); } // use this constructor if you want to select specific annotations (and/or interfaces) @@ -93,14 +90,7 @@ public class VariantAnnotatorEngine { this.walker = walker; this.toolkit = toolkit; initializeAnnotations(annotationGroupsToUse, annotationsToUse, annotationsToExclude); - initializeDBs(); - } - - // experimental constructor for active region traversal - public VariantAnnotatorEngine(GenomeAnalysisEngine toolkit) { - this.walker = null; - this.toolkit = toolkit; - requestedInfoAnnotations = AnnotationInterfaceManager.createInfoFieldAnnotations(Arrays.asList("ActiveRegionBasedAnnotation"), Collections.emptyList()); + initializeDBs(toolkit); } // select specific expressions to use @@ -138,16 +128,19 @@ public class VariantAnnotatorEngine { requestedGenotypeAnnotations = tempRequestedGenotypeAnnotations; } - private void initializeDBs() { - + private void initializeDBs(final GenomeAnalysisEngine engine) { // check to see whether comp rods were included - final RodBinding dbsnp = walker.getDbsnpRodBinding(); - if ( dbsnp != null && dbsnp.isBound() ) - dbAnnotations.put(dbsnp, VCFConstants.DBSNP_KEY); + RodBinding dbSNPBinding = walker.getDbsnpRodBinding(); + if ( dbSNPBinding != null && ! dbSNPBinding.isBound() ) + dbSNPBinding = null; - final List> comps = walker.getCompRodBindings(); - for ( RodBinding rod : comps ) - dbAnnotations.put(rod, rod.getName()); + final Map, String> overlapBindings = new LinkedHashMap<>(); + for ( final RodBinding b : walker.getCompRodBindings()) + if ( b.isBound() ) overlapBindings.put(b, b.getName()); + if ( dbSNPBinding != null && ! overlapBindings.keySet().contains(VCFConstants.DBSNP_KEY) ) + overlapBindings.put(dbSNPBinding, VCFConstants.DBSNP_KEY); // add overlap detection with DBSNP by default + + variantOverlapAnnotator = new VariantOverlapAnnotator(dbSNPBinding, overlapBindings, engine.getGenomeLocParser()); } public void invokeAnnotationInitializationMethods( Set headerLines ) { @@ -161,14 +154,13 @@ public class VariantAnnotatorEngine { } public Set getVCFAnnotationDescriptions() { - Set descriptions = new HashSet(); for ( InfoFieldAnnotation annotation : requestedInfoAnnotations ) descriptions.addAll(annotation.getDescriptions()); for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations ) descriptions.addAll(annotation.getDescriptions()); - for ( String db : dbAnnotations.values() ) { + for ( String db : variantOverlapAnnotator.getOverlapNames() ) { if ( VCFStandardHeaderLines.getInfoLine(db, false) != null ) descriptions.add(VCFStandardHeaderLines.getInfoLine(db)); else @@ -178,10 +170,6 @@ public class VariantAnnotatorEngine { return descriptions; } - public void setRequireStrictAlleleMatch( final boolean requireStrictAlleleMatch ) { - this.requireStrictAlleleMatch = requireStrictAlleleMatch; - } - public VariantContext annotateContext(final RefMetaDataTracker tracker, final ReferenceContext ref, final Map stratifiedContexts, @@ -192,13 +180,10 @@ public class VariantAnnotatorEngine { public VariantContext annotateContext(final RefMetaDataTracker tracker, final ReferenceContext ref, final Map stratifiedContexts, - VariantContext vc, + final VariantContext vc, final Map perReadAlleleLikelihoodMap) { Map infoAnnotations = new LinkedHashMap(vc.getAttributes()); - // annotate db occurrences - vc = annotateDBs(tracker, ref.getLocus(), vc, infoAnnotations); - // annotate expressions where available annotateExpressions(tracker, ref.getLocus(), infoAnnotations); @@ -213,7 +198,10 @@ public class VariantAnnotatorEngine { VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(infoAnnotations); // annotate genotypes, creating another new VC in the process - return builder.genotypes(annotateGenotypes(tracker, ref, stratifiedContexts, vc, perReadAlleleLikelihoodMap)).make(); + final VariantContext annotated = builder.genotypes(annotateGenotypes(tracker, ref, stratifiedContexts, vc, perReadAlleleLikelihoodMap)).make(); + + // annotate db occurrences + return annotateDBs(tracker, annotated); } public VariantContext annotateContext(final Map perReadAlleleLikelihoodMap, VariantContext vc) { @@ -241,66 +229,13 @@ public class VariantAnnotatorEngine { * Annotate the ID field and other DBs for the given Variant Context * * @param tracker ref meta data tracker (cannot be null) - * @param loc location of the vc * @param vc variant context to annotate * @return non-null annotated version of vc */ - @Requires({"tracker != null && loc != null && vc != null"}) - @Ensures("result != null") - public VariantContext annotateDBs(final RefMetaDataTracker tracker, final GenomeLoc loc, VariantContext vc) { - final Map newInfoAnnotations = new HashMap(0); - vc = annotateDBs(tracker, loc, vc, newInfoAnnotations); - - if ( !newInfoAnnotations.isEmpty() ) { - final VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(newInfoAnnotations); - vc = builder.make(); - } - - return vc; - } - - /** - * Annotate the ID field and other DBs for the given Variant Context - * - * @param tracker ref meta data tracker (cannot be null) - * @param loc location of the vc - * @param vc variant context to annotate - * @param infoAnnotations info annotation map to populate - * @return non-null annotated version of vc - */ @Requires({"tracker != null && loc != null && vc != null && infoAnnotations != null"}) @Ensures("result != null") - private VariantContext annotateDBs(final RefMetaDataTracker tracker, final GenomeLoc loc, VariantContext vc, final Map infoAnnotations) { - for ( Map.Entry, String> dbSet : dbAnnotations.entrySet() ) { - if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) { - final String rsID = GATKVCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), loc), vc.getType()); - - // add the ID if appropriate - if ( rsID != null ) { - // put the DB key into the INFO field - infoAnnotations.put(VCFConstants.DBSNP_KEY, true); - - if ( vc.emptyID() ) { - vc = new VariantContextBuilder(vc).id(rsID).make(); - } else if ( walker.alwaysAppendDbsnpId() && vc.getID().indexOf(rsID) == -1 ) { - final String newRsID = vc.getID() + VCFConstants.ID_FIELD_SEPARATOR + rsID; - vc = new VariantContextBuilder(vc).id(newRsID).make(); - } - } - } else { - boolean overlapsComp = false; - for ( VariantContext comp : tracker.getValues(dbSet.getKey(), loc) ) { - if ( !comp.isFiltered() && ( !requireStrictAlleleMatch || comp.getAlleles().equals(vc.getAlleles()) ) ) { - overlapsComp = true; - break; - } - } - if ( overlapsComp ) - infoAnnotations.put(dbSet.getValue(), overlapsComp); - } - } - - return vc; + private VariantContext annotateDBs(final RefMetaDataTracker tracker, VariantContext vc) { + return variantOverlapAnnotator.annotateOverlaps(tracker, variantOverlapAnnotator.annotateRsID(tracker, vc)); } private void annotateExpressions(final RefMetaDataTracker tracker, final GenomeLoc loc, final Map infoAnnotations) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantOverlapAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantOverlapAnnotator.java new file mode 100644 index 000000000..0efabba3c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantOverlapAnnotator.java @@ -0,0 +1,224 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.vcf.VCFConstants; + +import java.util.*; + +/** + * Annotate the ID field and attribute overlap FLAGs for a VariantContext against a RefMetaDataTracker or a list + * of VariantContexts + */ +public final class VariantOverlapAnnotator { + final RodBinding dbSNPBinding; + final Map, String> overlapBindings; + final GenomeLocParser genomeLocParser; + + /** + * Create a new VariantOverlapAnnotator without overall bindings + * + * @see #VariantOverlapAnnotator(org.broadinstitute.sting.commandline.RodBinding, java.util.Map, org.broadinstitute.sting.utils.GenomeLocParser) + */ + public VariantOverlapAnnotator(RodBinding dbSNPBinding, GenomeLocParser genomeLocParser) { + this(dbSNPBinding, Collections., String>emptyMap(), genomeLocParser); + } + + /** + * Create a new VariantOverlapAnnotator + * + * @param dbSNPBinding the RodBinding to use for updating ID field values, or null if that behavior isn't desired + * @param overlapBindings a map of RodBindings / name to use for overlap annotation. Each binding will be used to + * add name => true for variants that overlap with variants found to a + * RefMetaDataTracker at each location. Can be empty but not null + * @param genomeLocParser the genome loc parser we'll use to create GenomeLocs for VariantContexts + */ + public VariantOverlapAnnotator(RodBinding dbSNPBinding, Map, String> overlapBindings, GenomeLocParser genomeLocParser) { + if ( overlapBindings == null ) throw new IllegalArgumentException("overlapBindings cannot be null"); + if ( genomeLocParser == null ) throw new IllegalArgumentException("genomeLocParser cannot be null"); + + this.dbSNPBinding = dbSNPBinding; + this.overlapBindings = overlapBindings; + this.genomeLocParser = genomeLocParser; + } + + /** + * Update rsID in vcToAnnotate with rsIDs from dbSNPBinding fetched from tracker + * @see #annotateOverlap(java.util.List, String, org.broadinstitute.variant.variantcontext.VariantContext) + * + * @param tracker non-null tracker, which we will use to update the rsID of vcToAnnotate + * for VariantContexts bound to dbSNPBinding that start at vcToAnnotate + * @param vcToAnnotate a variant context to annotate + * @return a VariantContext (may be == to vcToAnnotate) with updated rsID value + */ + public VariantContext annotateRsID(final RefMetaDataTracker tracker, final VariantContext vcToAnnotate) { + if ( dbSNPBinding != null ) { + final GenomeLoc loc = getLoc(vcToAnnotate); + return annotateRsID(tracker.getValues(dbSNPBinding, loc), vcToAnnotate); + } else { + return vcToAnnotate; + } + } + + /** + * Update rsID of vcToAnnotate with rsID match found in vcsAtLoc, if one exists + * + * @param vcsAtLoc a list of variant contexts starting at this location to use as sources for rsID values + * @param vcToAnnotate a variant context to annotate + * @return a VariantContext (may be == to vcToAnnotate) with updated rsID value + */ + public VariantContext annotateRsID(final List vcsAtLoc, final VariantContext vcToAnnotate ) { + final String rsID = getRsID(vcsAtLoc, vcToAnnotate); + + // add the ID if appropriate + if ( rsID != null ) { + final VariantContextBuilder vcb = new VariantContextBuilder(vcToAnnotate); + + if ( ! vcToAnnotate.hasID() ) { + return vcb.id(rsID).make(); + } else if ( ! vcToAnnotate.getID().contains(rsID) ) { + return vcb.id(vcToAnnotate.getID() + VCFConstants.ID_FIELD_SEPARATOR + rsID).make(); + } // falling through to return VC lower down + } + + // nothing to do, just return vc + return vcToAnnotate; + } + + private GenomeLoc getLoc(final VariantContext vc) { + return genomeLocParser.createGenomeLoc(vc); + } + + /** + * Add overlap attributes to vcToAnnotate against all overlapBindings in tracker + * + * @see #annotateOverlap(java.util.List, , String, org.broadinstitute.variant.variantcontext.VariantContext) + * for more information + * + * @param tracker non-null tracker, which we will use to update the rsID of vcToAnnotate + * for VariantContexts bound to dbSNPBinding that start at vcToAnnotate + * @param vcToAnnotate a variant context to annotate + * @return a VariantContext (may be == to vcToAnnotate) with updated overlaps update fields value + */ + public VariantContext annotateOverlaps(final RefMetaDataTracker tracker, VariantContext vcToAnnotate) { + if ( overlapBindings.isEmpty() ) return vcToAnnotate; + + VariantContext annotated = vcToAnnotate; + final GenomeLoc loc = getLoc(vcToAnnotate); + for ( Map.Entry, String> overlapBinding : overlapBindings.entrySet() ) { + annotated = annotateOverlap(tracker.getValues(overlapBinding.getKey(), loc), overlapBinding.getValue(), vcToAnnotate); + } + + return annotated; + } + + /** + * Add overlaps flag attributes to vcToAnnotate binding overlapTestVCs.getSource() => true if + * an overlapping variant context can be found in overlapTestVCs with vcToAnnotate + * + * Overlaps here means that the reference alleles are the same and at least one alt + * allele in vcToAnnotate is equals to one of the alt alleles in overlapTestVCs + * + * @param overlapTestVCs a non-null list of potential overlaps that start at vcToAnnotate + * @param attributeKey the key to set to true in the attribute map for vcToAnnotate if it overlaps + * @param vcToAnnotate a non-null VariantContext to annotate + * @return + */ + public VariantContext annotateOverlap(final List overlapTestVCs, final String attributeKey, VariantContext vcToAnnotate) { + if ( overlapBindings.isEmpty() ) return vcToAnnotate; + + final boolean overlaps = overlaps(overlapTestVCs, vcToAnnotate); + if ( overlaps ) { + return new VariantContextBuilder(vcToAnnotate).attribute(attributeKey, true).make(); + } else { + return vcToAnnotate; + } + } + + /** + * Returns the ID field of the first VariantContext in rsIDSourceVCs that has the same reference allele + * as vcToAnnotate and all of the alternative alleles in vcToAnnotate. + * + * Doesn't require vcToAnnotate to be a complete match, so + * + * A/C/G in VC in rsIDSourceVCs + * + * would match the a VC with A/C but not A/T. Also we don't require all alleles to match + * so we would also match A/C/T to A/C/G. + * + * Will only match rsIDSourceVCs that aren't failing filters. + * + * @param rsIDSourceVCs a non-null list of potential overlaps that start at vcToAnnotate + * @param vcToAnnotate a non-null VariantContext to annotate + * @return a String to use for the rsID from rsIDSourceVCs if one matches, or null if none matches + */ + private String getRsID(final List rsIDSourceVCs, final VariantContext vcToAnnotate) { + if ( rsIDSourceVCs == null ) throw new IllegalArgumentException("rsIDSourceVCs cannot be null"); + if ( vcToAnnotate == null ) throw new IllegalArgumentException("vcToAnnotate cannot be null"); + + for ( VariantContext vcComp : rsIDSourceVCs ) { + if ( vcComp.isFiltered() ) continue; // don't process any failed VCs + + if ( ! vcComp.getChr().equals(vcToAnnotate.getChr()) || vcComp.getStart() != vcToAnnotate.getStart() ) + throw new IllegalArgumentException("source rsID VariantContext " + vcComp + " doesn't start at the same position as vcToAnnotate " + vcToAnnotate); + + if ( vcToAnnotate.getReference().equals(vcComp.getReference()) ) { + for ( final Allele allele : vcToAnnotate.getAlternateAlleles() ) { + if ( vcComp.getAlternateAlleles().contains(allele) ) + return vcComp.getID(); + } + } + } + + return null; + } + + /** + * Does vcToAnnotate overlap with any of the records in potentialOverlaps? + * + * @param potentialOverlaps a non-null list of potential overlaps that start at vcToAnnotate + * @param vcToAnnotate a non-null VariantContext to annotate + * @return true if vcToAnnotate overlaps (position and all alt alleles) with some variant in potentialOverlaps + */ + private boolean overlaps(final List potentialOverlaps, final VariantContext vcToAnnotate) { + return getRsID(potentialOverlaps, vcToAnnotate) != null; + } + + /** + * Get the collection of the RodBinding names for those being used for overlap detection + * @return a non-null collection of Strings + */ + public Collection getOverlapNames() { + return overlapBindings.values(); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 60809134a..dbb68961f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; +import org.broadinstitute.sting.gatk.walkers.annotator.VariantOverlapAnnotator; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; @@ -112,24 +113,21 @@ public class VariantsToVCF extends RodWalker { // for dealing with indels in hapmap CloseableIterator dbsnpIterator = null; + VariantOverlapAnnotator variantOverlapAnnotator = null; public void initialize() { vcfwriter = VariantContextWriterFactory.sortOnTheFly(baseWriter, 40, false); + variantOverlapAnnotator = new VariantOverlapAnnotator(dbsnp.dbsnp, getToolkit().getGenomeLocParser()); } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) ) return 0; - String rsID = dbsnp == null ? null : GATKVCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbsnp.dbsnp, context.getLocation()), VariantContext.Type.SNP); - Collection contexts = getVariantContexts(tracker, ref); for ( VariantContext vc : contexts ) { VariantContextBuilder builder = new VariantContextBuilder(vc); - if ( rsID != null && vc.emptyID() ) { - builder.id(rsID).make(); - } // set the appropriate sample name if necessary if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) { @@ -137,7 +135,8 @@ public class VariantsToVCF extends RodWalker { builder.genotypes(g); } - writeRecord(builder.make(), tracker, ref.getLocus()); + final VariantContext withID = variantOverlapAnnotator.annotateRsID(tracker, builder.make()); + writeRecord(withID, tracker, ref.getLocus()); } return 1; diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java index 0fba432e7..aa2e92559 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java @@ -149,21 +149,6 @@ public class GATKVCFUtils { return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary()); } - public static String rsIDOfFirstRealVariant(List VCs, VariantContext.Type type) { - if ( VCs == null ) - return null; - - String rsID = null; - for ( VariantContext vc : VCs ) { - if ( vc.getType() == type ) { - rsID = vc.getID(); - break; - } - } - - return rsID; - } - /** * Utility class to read all of the VC records from a file * From 1c03ebc82d3b67a78d947aec39ffc28a552244b3 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 6 Jun 2013 15:38:06 -0400 Subject: [PATCH 076/116] Implement ActiveRegionTraversal RefMetaDataTracker for map call; HaplotypeCaller now annotates ID from dbSNP -- Reuse infrastructure for RODs for reads to implement general IntervalReferenceOrderedView so that both TraverseReads and TraverseActiveRegions can use the same underlying infrastructure -- TraverseActiveRegions now provides a meaningful RefMetaDataTracker to ActiveRegionWalker.map -- Cleanup misc. code as it came up -- Resolves GSA-808: Write general utility code to do rsID allele matching, hook up to UG and HC --- .../haplotypecaller/GenotypingEngine.java | 4 +- .../haplotypecaller/HaplotypeCaller.java | 10 +- .../HaplotypeCallerIntegrationTest.java | 27 ++- .../IntervalOverlappingRODsFromStream.java | 8 +- .../IntervalReferenceOrderedView.java | 184 ++++++++++++++++++ .../ManagingReferenceOrderedView.java | 3 +- .../ReadBasedReferenceOrderedView.java | 104 +--------- .../providers/ReferenceOrderedView.java | 3 +- .../datasources/providers/RodLocusView.java | 3 +- .../gatk/executive/LinearMicroScheduler.java | 7 - .../traversals/TraverseActiveRegions.java | 157 +++++++++------ .../gatk/traversals/TraverseLociNano.java | 2 +- .../annotator/VariantAnnotatorEngine.java | 13 +- .../annotator/VariantOverlapAnnotator.java | 8 +- .../sting/gatk/ReadMetricsUnitTest.java | 1 - ...IntervalReferenceOrderedViewUnitTest.java} | 4 +- .../ReferenceOrderedViewUnitTest.java | 6 +- .../TraverseActiveRegionsUnitTest.java | 2 - .../traversals/TraverseReadsUnitTest.java | 4 +- 19 files changed, 352 insertions(+), 198 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/datasources/providers/IntervalReferenceOrderedView.java rename public/java/test/org/broadinstitute/sting/gatk/datasources/providers/{ReadBasedReferenceOrderedViewUnitTest.java => IntervalReferenceOrderedViewUnitTest.java} (98%) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index cbcba28fd..04173b64f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -49,6 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; @@ -146,6 +147,7 @@ public class GenotypingEngine { final GenomeLoc refLoc, final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser, + final RefMetaDataTracker tracker, final List activeAllelesToGenotype ) { // sanity check input arguments if (UG_engine == null) throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine); @@ -204,7 +206,7 @@ public class GenotypingEngine { convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0 ) ); final Map stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call ); - VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call); + VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(tracker, stratifiedReadMap, call); if( call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary! annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 182e59493..e55413649 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -441,7 +441,6 @@ public class HaplotypeCaller extends ActiveRegionWalker, In private final static int MIN_READ_LENGTH = 10; private List samplesList = new ArrayList(); - private final List allelesToGenotype = new ArrayList(); private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file private final static Allele FAKE_ALT_ALLELE = Allele.create("", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file @@ -596,7 +595,6 @@ public class HaplotypeCaller extends ActiveRegionWalker, In if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { final VariantContext vcFromAllelesRod = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), false, logger, UG_engine.getUAC().alleles); if( vcFromAllelesRod != null ) { - allelesToGenotype.add(vcFromAllelesRod); // save for later for processing during the ActiveRegion's map call. Should be folded into a RefMetaDataTracker object return new ActivityProfileState(ref.getLocus(), 1.0); } } @@ -664,12 +662,11 @@ public class HaplotypeCaller extends ActiveRegionWalker, In final List activeAllelesToGenotype = new ArrayList<>(); if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { - for( final VariantContext vc : allelesToGenotype ) { - if( originalActiveRegion.getLocation().overlapsP( getToolkit().getGenomeLocParser().createGenomeLoc(vc) ) ) { + for ( final VariantContext vc : metaDataTracker.getValues(UG_engine.getUAC().alleles) ) { + if ( vc.isNotFiltered() ) { activeAllelesToGenotype.add(vc); // do something with these VCs during GGA mode } } - allelesToGenotype.removeAll( activeAllelesToGenotype ); // No alleles found in this region so nothing to do! if ( activeAllelesToGenotype.isEmpty() ) { return NO_CALLS; } } else { @@ -704,6 +701,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In assemblyResult.paddedReferenceLoc, assemblyResult.regionForGenotyping.getLocation(), getToolkit().getGenomeLocParser(), + metaDataTracker, activeAllelesToGenotype ); // TODO -- must disable if we are doing NCT, or set the output type of ! presorted @@ -890,8 +888,6 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @Override public Integer reduce(List callsInRegion, Integer numCalledRegions) { for( final VariantContext call : callsInRegion ) { - // TODO -- uncomment this line once ART-based walkers have a proper RefMetaDataTracker. - // annotationEngine.annotateDBs(metaDataTracker, getToolkit().getGenomeLocParser().createGenomeLoc(call), call); vcfWriter.add( call ); } return (callsInRegion.isEmpty() ? 0 : 1) + numCalledRegions; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 77be9fba2..904f15728 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -47,15 +47,12 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broad.tribble.TribbleIndexedFeatureReader; import org.broadinstitute.sting.WalkerTest; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.variant.variantcontext.VariantContext; -import org.broadinstitute.variant.vcf.VCFCodec; import org.testng.annotations.Test; import java.io.File; @@ -69,6 +66,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { final static String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam"; final static String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam"; final static String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam"; + final static String NA12878_PCRFREE = privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam"; final static String CEUTRIO_MT_TEST_BAM = privateTestDir + "CEUTrio.HiSeq.b37.MT.1_50.bam"; final static String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals"; @@ -199,4 +197,27 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { Arrays.asList("86bdd07a3ac4f6ce239c30efea8bf5ba")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } + + // -------------------------------------------------------------------------------------------------------------- + // + // test dbSNP annotation + // + // -------------------------------------------------------------------------------------------------------------- + + @Test + public void HCTestDBSNPAnnotationWGS() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, + Arrays.asList("7b23a288a31cafca3946f14f2381e7cb")); + executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); + } + + @Test + public void HCTestDBSNPAnnotationWEx() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132 + + " -L " + hg19Intervals + " -isr INTERSECTION", 1, + Arrays.asList("9587029b702bb59bd4dfec69eac4c210")); + executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/IntervalOverlappingRODsFromStream.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/IntervalOverlappingRODsFromStream.java index fe3a0c6ce..3aff745fa 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/IntervalOverlappingRODsFromStream.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/IntervalOverlappingRODsFromStream.java @@ -72,8 +72,6 @@ class IntervalOverlappingRODsFromStream { /** * Get the list of RODs overlapping loc from this stream of RODs. * - * Sequential calls to this function must obey the rule that loc2.getStart >= loc1.getStart - * * @param loc the interval to query * @return a non-null RODRecordList containing the overlapping RODs, which may be empty */ @@ -84,7 +82,6 @@ class IntervalOverlappingRODsFromStream { if ( lastQuery != null && loc.getStart() < lastQuery.getStart() ) throw new IllegalArgumentException(String.format("BUG: query interval (%s) starts before the previous interval %s", loc, lastQuery)); - trimCurrentFeaturesToLoc(loc); readOverlappingFutureFeatures(loc); return new RODRecordListImpl(name, subsetToOverlapping(loc, currentFeatures), loc); } @@ -128,11 +125,14 @@ class IntervalOverlappingRODsFromStream { /** * Update function. Remove all elements of currentFeatures that end before loc * + * Must be called by clients periodically when they know they they will never ask for data before + * loc, so that the running cache of RODs doesn't grow out of control. + * * @param loc the location to use */ @Requires("loc != null") @Ensures("currentFeatures.size() <= old(currentFeatures.size())") - private void trimCurrentFeaturesToLoc(final GenomeLoc loc) { + public void trimCurrentFeaturesToLoc(final GenomeLoc loc) { final ListIterator it = currentFeatures.listIterator(); while ( it.hasNext() ) { final GATKFeature feature = it.next(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/IntervalReferenceOrderedView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/IntervalReferenceOrderedView.java new file mode 100644 index 000000000..5e884ce53 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/IntervalReferenceOrderedView.java @@ -0,0 +1,184 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.datasources.providers; + +import net.sf.picard.util.PeekableIterator; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.reads.ReadShard; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +/** + * a ROD view that allows for requests for RODs that overlap intervals on the genome to produce a RefMetaDataTracker + */ +public class IntervalReferenceOrderedView implements ReferenceOrderedView { + /** a list of the RMDDataState (location->iterators) */ + private final List states = new ArrayList<>(1); + + /** + * Used to get genome locs for reads + */ + protected final GenomeLocParser genomeLocParser; + + /** + * The total extent of all reads in this span. We create iterators from our RODs + * from the start of this span, to the end. + */ + private final GenomeLoc shardSpan; + + /** + * Create a new IntervalReferenceOrderedView taking data from provider and capable of + * servicing ROD overlap requests within the genomic interval span + * + * @param provider a ShardDataProvider to give us data + * @param span a GenomeLoc span, or null indicating take the entire genome + */ + public IntervalReferenceOrderedView(final ShardDataProvider provider, final GenomeLoc span) { + if ( provider == null ) throw new IllegalArgumentException("provider cannot be null"); + if ( provider.hasReferenceOrderedData() && span == null ) throw new IllegalArgumentException("span cannot be null when provider has reference ordered data"); + + this.genomeLocParser = provider.getGenomeLocParser(); + this.shardSpan = span; + provider.register(this); + + // conditional to optimize the case where we don't have any ROD data + if ( provider.hasReferenceOrderedData() && ! shardSpan.isUnmapped() ) { + for (final ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData()) + states.add(new RMDDataState(dataSource, dataSource.seek(shardSpan))); + } + } + + /** + * Testing constructor + */ + protected IntervalReferenceOrderedView(final GenomeLocParser genomeLocParser, + final GenomeLoc shardSpan, + final List names, + final List> featureSources) { + this.genomeLocParser = genomeLocParser; + this.shardSpan = shardSpan; + for ( int i = 0; i < names.size(); i++ ) + states.add(new RMDDataState(names.get(i), featureSources.get(i))); + } + + public Collection> getConflictingViews() { + List> classes = new ArrayList<>(); + classes.add(ManagingReferenceOrderedView.class); + return classes; + } + + /** + * Get a RefMetaDataTracker containing bindings for all RODs overlapping the start position of loc + * @param loc a GenomeLoc of size == 1 + * @return a non-null RefMetaDataTracker + */ + @Override + public RefMetaDataTracker getReferenceOrderedDataAtLocus(GenomeLoc loc) { + if ( loc == null ) throw new IllegalArgumentException("loc cannot be null"); + if ( loc.size() != 1 ) throw new IllegalArgumentException("GenomeLoc must have size == 1 but got " + loc); + return getReferenceOrderedDataForInterval(loc); + } + + /** + * Get a RefMetaDataTracker containing bindings for all RODs overlapping interval + * + * @param interval a non=null interval + * @return a non-null RefMetaDataTracker + */ + public RefMetaDataTracker getReferenceOrderedDataForInterval(final GenomeLoc interval) { + if ( interval == null ) throw new IllegalArgumentException("Interval cannot be null"); + + if ( states.isEmpty() || shardSpan.isUnmapped() ) // optimization for no bindings (common for read walkers) + return RefMetaDataTracker.EMPTY_TRACKER; + else { + final List bindings = new ArrayList<>(states.size()); + for ( final RMDDataState state : states ) + bindings.add(state.stream.getOverlapping(interval)); + return new RefMetaDataTracker(bindings); + } + } + + /** + * Trim down all of the ROD managers so that they only hold ROD bindings wit start >= startOfDataToKeep.getStart() + * + * @param startOfDataToKeep a non-null genome loc + */ + public void trimCurrentFeaturesToLoc(final GenomeLoc startOfDataToKeep) { + if ( startOfDataToKeep == null ) throw new IllegalArgumentException("startOfDataToKeep cannot be null"); + + for ( final RMDDataState state : states ) + state.stream.trimCurrentFeaturesToLoc(startOfDataToKeep); + } + + /** + * Closes the current view. + */ + public void close() { + for (final RMDDataState state : states) + state.close(); + + // Clear out the existing data so that post-close() accesses to this data will fail-fast. + states.clear(); + } + + /** + * Models the traversal state of a given ROD lane. + */ + private static class RMDDataState { + public final ReferenceOrderedDataSource dataSource; + public final IntervalOverlappingRODsFromStream stream; + private final LocationAwareSeekableRODIterator iterator; + + public RMDDataState(ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator) { + this.dataSource = dataSource; + this.iterator = iterator; + this.stream = new IntervalOverlappingRODsFromStream(dataSource.getName(), new PeekableIterator<>(iterator)); + } + + /** + * For testing + */ + public RMDDataState(final String name, final PeekableIterator iterator) { + this.dataSource = null; + this.iterator = null; + this.stream = new IntervalOverlappingRODsFromStream(name, new PeekableIterator<>(iterator)); + } + + public void close() { + if ( dataSource != null ) + dataSource.close( iterator ); + } + } +} + diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java index 09b72f5eb..50f2369cb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java @@ -76,7 +76,8 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView { * @param loc Locus at which to track. * @return A tracker containing information about this locus. */ - public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) { + @Override + public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) { if ( states.isEmpty() ) return RefMetaDataTracker.EMPTY_TRACKER; else { diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java index 52f490972..84e27c953 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java @@ -42,52 +42,9 @@ import java.util.Collection; import java.util.List; /** a ROD view for reads. This provides the Read traversals a way of getting a RefMetaDataTracker */ -public class ReadBasedReferenceOrderedView implements View { - // a list of the RMDDataState (location->iterators) - private final List states = new ArrayList(1); - private final static RefMetaDataTracker EMPTY_TRACKER = new RefMetaDataTracker(); - - /** - * Used to get genome locs for reads - */ - private final GenomeLocParser genomeLocParser; - - /** - * The total extent of all reads in this span. We create iterators from our RODs - * from the start of this span, to the end. - */ - private final GenomeLoc shardSpan; - +public class ReadBasedReferenceOrderedView extends IntervalReferenceOrderedView { public ReadBasedReferenceOrderedView(final ShardDataProvider provider) { - this.genomeLocParser = provider.getGenomeLocParser(); - // conditional to optimize the case where we don't have any ROD data - this.shardSpan = provider.getReferenceOrderedData() != null ? ((ReadShard)provider.getShard()).getReadsSpan() : null; - provider.register(this); - - if ( provider.getReferenceOrderedData() != null && ! shardSpan.isUnmapped() ) { - for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData()) - states.add(new RMDDataState(dataSource, dataSource.seek(shardSpan))); - } - } - - - /** - * Testing constructor - */ - protected ReadBasedReferenceOrderedView(final GenomeLocParser genomeLocParser, - final GenomeLoc shardSpan, - final List names, - final List> featureSources) { - this.genomeLocParser = genomeLocParser; - this.shardSpan = shardSpan; - for ( int i = 0; i < names.size(); i++ ) - states.add(new RMDDataState(names.get(i), featureSources.get(i))); - } - - public Collection> getConflictingViews() { - List> classes = new ArrayList>(); - classes.add(ManagingReferenceOrderedView.class); - return classes; + super(provider, provider.hasReferenceOrderedData() ? ((ReadShard)provider.getShard()).getReadsSpan() : null); } /** @@ -101,60 +58,11 @@ public class ReadBasedReferenceOrderedView implements View { @Ensures("result != null") public RefMetaDataTracker getReferenceOrderedDataForRead(final SAMRecord rec) { if ( rec.getReadUnmappedFlag() ) - // empty RODs for unmapped reads - return new RefMetaDataTracker(); - else - return getReferenceOrderedDataForInterval(genomeLocParser.createGenomeLoc(rec)); - } - - @Requires({"interval != null", "shardSpan == null || shardSpan.isUnmapped() || shardSpan.containsP(interval)"}) - @Ensures("result != null") - public RefMetaDataTracker getReferenceOrderedDataForInterval(final GenomeLoc interval) { - if ( states.isEmpty() || shardSpan.isUnmapped() ) // optimization for no bindings (common for read walkers) - return EMPTY_TRACKER; + return RefMetaDataTracker.EMPTY_TRACKER; else { - final List bindings = new ArrayList(states.size()); - for ( final RMDDataState state : states ) - bindings.add(state.stream.getOverlapping(interval)); - return new RefMetaDataTracker(bindings); - } - } - - /** - * Closes the current view. - */ - public void close() { - for (final RMDDataState state : states) - state.close(); - - // Clear out the existing data so that post-close() accesses to this data will fail-fast. - states.clear(); - } - - /** Models the traversal state of a given ROD lane. */ - private static class RMDDataState { - public final ReferenceOrderedDataSource dataSource; - public final IntervalOverlappingRODsFromStream stream; - private final LocationAwareSeekableRODIterator iterator; - - public RMDDataState(ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator) { - this.dataSource = dataSource; - this.iterator = iterator; - this.stream = new IntervalOverlappingRODsFromStream(dataSource.getName(), new PeekableIterator(iterator)); - } - - /** - * For testing - */ - public RMDDataState(final String name, final PeekableIterator iterator) { - this.dataSource = null; - this.iterator = null; - this.stream = new IntervalOverlappingRODsFromStream(name, new PeekableIterator(iterator)); - } - - public void close() { - if ( dataSource != null ) - dataSource.close( iterator ); + final GenomeLoc readSpan = genomeLocParser.createGenomeLoc(rec); + trimCurrentFeaturesToLoc(readSpan); + return getReferenceOrderedDataForInterval(readSpan); } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedView.java index fa83dff82..85c20a6c3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedView.java @@ -25,10 +25,9 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; public interface ReferenceOrderedView extends View { - RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext refContext ); + RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index 3fb4c7352..1b6c14628 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -98,7 +98,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { rodQueue = new RODMergingIterator(iterators); } - public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) { + @Override + public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) { // special case the interval again -- add it into the ROD if ( interval != null ) { allTracksHere.add(interval); } return new RefMetaDataTracker(allTracksHere); diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 415049228..dc46849df 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.io.DirectOutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; import org.broadinstitute.sting.gatk.traversals.TraversalEngine; -import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor; @@ -114,12 +113,6 @@ public class LinearMicroScheduler extends MicroScheduler { done = walker.isDone(); } - // Special function call to empty out the work queue. Ugly for now but will be cleaned up when we eventually push this functionality more into the engine - if( traversalEngine instanceof TraverseActiveRegions) { - final Object result = ((TraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit()); - accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator - } - Object result = accumulator.finishTraversal(); outputTracker.close(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index cac93cb07..b85365366 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -29,14 +29,12 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.providers.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ActiveRegionTraversalParameters; import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; -import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; @@ -92,12 +90,26 @@ public final class TraverseActiveRegions extends TraversalEngine walker; - final NanoScheduler nanoScheduler; + final NanoScheduler nanoScheduler; + + /** + * Data to use in the ActiveRegionWalker.map function produced by the NanoScheduler input iterator + */ + private static class MapData { + public ActiveRegion activeRegion; + public RefMetaDataTracker tracker; + + private MapData(ActiveRegion activeRegion, RefMetaDataTracker tracker) { + this.activeRegion = activeRegion; + this.tracker = tracker; + } + } /** * Create a single threaded active region traverser @@ -112,12 +124,12 @@ public final class TraverseActiveRegions extends TraversalEngine(nThreads); - nanoScheduler.setProgressFunction(new NSProgressFunction() { + nanoScheduler.setProgressFunction(new NSProgressFunction() { @Override - public void progress(ActiveRegion lastActiveRegion) { + public void progress(MapData lastActiveRegion) { if ( lastActiveRegion != null ) // note, need to use getStopLocation so we don't give an interval to ProgressMeterDaemon - printProgress(lastActiveRegion.getLocation().getStopLocation()); + printProgress(lastActiveRegion.activeRegion.getLocation().getStopLocation()); } }); } @@ -149,13 +161,6 @@ public final class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine walker, - final LocusShardDataProvider dataProvider, - final LocusView locusView) { - if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA ) - return new ManagingReferenceOrderedView( dataProvider ); - else - return (RodLocusView)locusView; - } - - // ------------------------------------------------------------------------------------- // // Actual traverse function @@ -254,7 +267,7 @@ public final class TraverseActiveRegions extends TraversalEngine activeRegionIterator = new ActiveRegionIterator(dataProvider); + final Iterator activeRegionIterator = new ActiveRegionIterator(dataProvider); final TraverseActiveRegionMap myMap = new TraverseActiveRegionMap(); final TraverseActiveRegionReduce myReduce = new TraverseActiveRegionReduce(); final T result = nanoScheduler.execute(activeRegionIterator, myMap, sum, myReduce); @@ -262,29 +275,53 @@ public final class TraverseActiveRegions extends TraversalEngine { + private class ActiveRegionIterator implements Iterator { private final LocusShardDataProvider dataProvider; - private LinkedList readyActiveRegions = new LinkedList(); + private LinkedList readyActiveRegions = new LinkedList<>(); private boolean done = false; private final LocusView locusView; private final LocusReferenceView referenceView; - private final ReferenceOrderedView referenceOrderedDataView; private final GenomeLoc locOfLastReadAtTraversalStart; + private final IntervalReferenceOrderedView referenceOrderedDataView; + private final GenomeLoc currentWindow; + private final boolean processRemainingActiveRegions; public ActiveRegionIterator( final LocusShardDataProvider dataProvider ) { this.dataProvider = dataProvider; locusView = new AllLocusView(dataProvider); referenceView = new LocusReferenceView( walker, dataProvider ); - referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView); + + // The data shard may carry a number of locations to process (due to being indexed together). + // This value is just the interval we are processing within the entire provider + currentWindow = dataProvider.getLocus(); + final int currentWindowPos = dataProvider.getShard().getGenomeLocs().indexOf(currentWindow); + if ( currentWindowPos == -1 ) throw new IllegalStateException("Data provider " + dataProvider + " didn't have our current window in it " + currentWindow); + processRemainingActiveRegions = currentWindowPos == dataProvider.getShard().getGenomeLocs().size() - 1; + + // the rodSpan covers all of the bases in the activity profile, including all of the bases + // through the current window interval. This is because we may issue a query to get data for an + // active region spanning before the current interval as far back as the start of the current profile, + // if we have pending work to do that finalizes in this interval. + final GenomeLoc rodSpan = activityProfile.getSpan() == null ? currentWindow : activityProfile.getSpan().endpointSpan(currentWindow); + if ( ! dataProvider.getShard().getLocation().containsP(rodSpan) ) throw new IllegalStateException("Rod span " + rodSpan + " isn't contained within the data shard " + dataProvider.getShard().getLocation() + ", meaning we wouldn't get all of the data we need"); + referenceOrderedDataView = new IntervalReferenceOrderedView( dataProvider, rodSpan ); // We keep processing while the next reference location is within the interval locOfLastReadAtTraversalStart = spanOfLastSeenRead(); + + // load in the workQueue the present regions that span the current contig, if it's different from the last one + if ( walkerHasPresetRegions && ( lastRegionProcessed == null || ! currentWindow.onSameContig(lastRegionProcessed)) ) { + loadPresetRegionsForContigToWorkQueue(currentWindow.getContig()); + } + + // remember the last region we processed for sanity checking later + lastRegionProcessed = currentWindow; } @Override public void remove() { throw new UnsupportedOperationException("Cannot remove from ActiveRegionIterator"); } @Override - public ActiveRegion next() { + public MapData next() { return readyActiveRegions.pop(); } @Override @@ -326,7 +363,7 @@ public final class TraverseActiveRegions extends TraversalEngine newActiveRegions = prepActiveRegionsForProcessing(walker, flushProfile, false); + final List newActiveRegions = prepActiveRegionsForProcessing(walker, flushProfile, false, referenceOrderedDataView); dataProvider.getShard().getReadMetrics().incrementNumIterations(); @@ -335,7 +372,7 @@ public final class TraverseActiveRegions extends TraversalEngine extends TraversalEngine walker, T sum) { - for ( final ActiveRegion region : prepActiveRegionsForProcessing((ActiveRegionWalker)walker, true, true) ) { - final M x = ((ActiveRegionWalker) walker).map(region, null); - sum = walker.reduce( x, sum ); - } - return sum; - } - // ------------------------------------------------------------------------------------- // // Functions to manage and interact with the live / dead zone @@ -594,7 +627,10 @@ public final class TraverseActiveRegions extends TraversalEngine prepActiveRegionsForProcessing(final ActiveRegionWalker walker, final boolean flushActivityProfile, final boolean forceAllRegionsToBeActive) { + private List prepActiveRegionsForProcessing(final ActiveRegionWalker walker, + final boolean flushActivityProfile, + final boolean forceAllRegionsToBeActive, + final IntervalReferenceOrderedView referenceOrderedDataView) { if ( ! walkerHasPresetRegions ) { // We don't have preset regions, so we get our regions from the activity profile final Collection activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMinRegionSize(), getMaxRegionSize(), flushActivityProfile); @@ -603,13 +639,13 @@ public final class TraverseActiveRegions extends TraversalEngine readyRegions = new LinkedList(); + final LinkedList readyRegions = new LinkedList<>(); while( workQueue.peek() != null ) { final ActiveRegion activeRegion = workQueue.peek(); if ( forceAllRegionsToBeActive || regionCompletelyWithinDeadZone(activeRegion) ) { writeActivityProfile(activeRegion.getSupportingStates()); writeActiveRegion(activeRegion); - readyRegions.add(prepActiveRegionForProcessing(workQueue.remove(), walker)); + readyRegions.add(prepActiveRegionForProcessing(workQueue.remove(), walker, referenceOrderedDataView)); } else { break; } @@ -619,8 +655,10 @@ public final class TraverseActiveRegions extends TraversalEngine walker) { - final List stillLive = new LinkedList(); + private MapData prepActiveRegionForProcessing(final ActiveRegion activeRegion, + final ActiveRegionWalker walker, + final IntervalReferenceOrderedView referenceOrderedDataView) { + final List stillLive = new LinkedList<>(); for ( final GATKSAMRecord read : myReads.popCurrentReads() ) { boolean killed = false; final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read ); @@ -653,14 +691,21 @@ public final class TraverseActiveRegions extends TraversalEngine { + private class TraverseActiveRegionMap implements NSMapFunction { @Override - public M apply(final ActiveRegion activeRegion) { - if ( DEBUG ) logger.info("Executing walker.map for " + activeRegion + " in thread " + Thread.currentThread().getName()); - return walker.map(activeRegion, null); + public M apply(final MapData mapData) { + if ( DEBUG ) logger.info("Executing walker.map for " + mapData.activeRegion + " in thread " + Thread.currentThread().getName()); + return walker.map(mapData.activeRegion, mapData.tracker); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java index 8e67963c1..627f98d69 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java @@ -179,7 +179,7 @@ public class TraverseLociNano extends TraversalEngine, final ReferenceContext refContext = referenceView.getReferenceContext(location); // Iterate forward to get all reference ordered data covering this location - final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(location, refContext); + final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(location); numIterations++; return new MapData(locus, refContext, tracker); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 90050a10a..078a36dd9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -204,8 +204,10 @@ public class VariantAnnotatorEngine { return annotateDBs(tracker, annotated); } - public VariantContext annotateContext(final Map perReadAlleleLikelihoodMap, VariantContext vc) { - Map infoAnnotations = new LinkedHashMap(vc.getAttributes()); + public VariantContext annotateContextForActiveRegion(final RefMetaDataTracker tracker, + final Map perReadAlleleLikelihoodMap, + final VariantContext vc) { + final Map infoAnnotations = new LinkedHashMap<>(vc.getAttributes()); // go through all the requested info annotationTypes for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) { @@ -219,10 +221,13 @@ public class VariantAnnotatorEngine { } // generate a new annotated VC - VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(infoAnnotations); + final VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(infoAnnotations); // annotate genotypes, creating another new VC in the process - return builder.genotypes(annotateGenotypes(null, null, null, vc, perReadAlleleLikelihoodMap)).make(); + final VariantContext annotated = builder.genotypes(annotateGenotypes(null, null, null, vc, perReadAlleleLikelihoodMap)).make(); + + // annotate db occurrences + return annotateDBs(tracker, annotated); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantOverlapAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantOverlapAnnotator.java index 0efabba3c..07af4bd74 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantOverlapAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantOverlapAnnotator.java @@ -122,7 +122,7 @@ public final class VariantOverlapAnnotator { /** * Add overlap attributes to vcToAnnotate against all overlapBindings in tracker * - * @see #annotateOverlap(java.util.List, , String, org.broadinstitute.variant.variantcontext.VariantContext) + * @see #annotateOverlap(java.util.List, String, org.broadinstitute.variant.variantcontext.VariantContext) * for more information * * @param tracker non-null tracker, which we will use to update the rsID of vcToAnnotate @@ -130,12 +130,12 @@ public final class VariantOverlapAnnotator { * @param vcToAnnotate a variant context to annotate * @return a VariantContext (may be == to vcToAnnotate) with updated overlaps update fields value */ - public VariantContext annotateOverlaps(final RefMetaDataTracker tracker, VariantContext vcToAnnotate) { + public VariantContext annotateOverlaps(final RefMetaDataTracker tracker, final VariantContext vcToAnnotate) { if ( overlapBindings.isEmpty() ) return vcToAnnotate; VariantContext annotated = vcToAnnotate; final GenomeLoc loc = getLoc(vcToAnnotate); - for ( Map.Entry, String> overlapBinding : overlapBindings.entrySet() ) { + for ( final Map.Entry, String> overlapBinding : overlapBindings.entrySet() ) { annotated = annotateOverlap(tracker.getValues(overlapBinding.getKey(), loc), overlapBinding.getValue(), vcToAnnotate); } @@ -186,7 +186,7 @@ public final class VariantOverlapAnnotator { if ( rsIDSourceVCs == null ) throw new IllegalArgumentException("rsIDSourceVCs cannot be null"); if ( vcToAnnotate == null ) throw new IllegalArgumentException("vcToAnnotate cannot be null"); - for ( VariantContext vcComp : rsIDSourceVCs ) { + for ( final VariantContext vcComp : rsIDSourceVCs ) { if ( vcComp.isFiltered() ) continue; // don't process any failed VCs if ( ! vcComp.getChr().equals(vcToAnnotate.getChr()) || vcComp.getStart() != vcToAnnotate.getStart() ) diff --git a/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java index 3225a128c..56725147e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java @@ -256,7 +256,6 @@ public class ReadMetricsUnitTest extends BaseTest { } windowMaker.close(); } - traverseActiveRegions.endTraversal(walker, 0); Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig); Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig); diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/IntervalReferenceOrderedViewUnitTest.java similarity index 98% rename from public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewUnitTest.java rename to public/java/test/org/broadinstitute/sting/gatk/datasources/providers/IntervalReferenceOrderedViewUnitTest.java index bf4d36d92..784bd727e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/IntervalReferenceOrderedViewUnitTest.java @@ -49,7 +49,7 @@ import java.util.*; /** * @author depristo */ -public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest { +public class IntervalReferenceOrderedViewUnitTest extends BaseTest { private static int startingChr = 1; private static int endingChr = 2; private static int readCount = 100; @@ -285,7 +285,7 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest { Collections.sort(intervals); final GenomeLoc span = span(intervals); - final ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(genomeLocParser, span, names, iterators); + final IntervalReferenceOrderedView view = new IntervalReferenceOrderedView(genomeLocParser, span, names, iterators); if ( testStateless ) { // test each tracker is well formed, as each is created diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java index fad632cfd..1d39f43c6 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java @@ -97,7 +97,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Collections.emptyList()); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); - RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",10), null); + RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",10)); Assert.assertEquals(tracker.getValues(Feature.class).size(), 0, "The tracker should not have produced any data"); } @@ -115,7 +115,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource)); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); - RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20), null); + RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20)); TableFeature datum = tracker.getFirstValue(new RodBinding(TableFeature.class, "tableTest")); Assert.assertEquals(datum.get("COL1"),"C","datum parameter for COL1 is incorrect"); @@ -141,7 +141,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2)); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); - RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20), null); + RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20)); TableFeature datum1 = tracker.getFirstValue(new RodBinding(TableFeature.class, "tableTest1")); Assert.assertEquals(datum1.get("COL1"),"C","datum1 parameter for COL1 is incorrect"); diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java index 1f5cd6d0e..e4b6c37cc 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java @@ -405,8 +405,6 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { for (LocusShardDataProvider dataProvider : createDataProviders(t, walker, intervals, bam)) t.traverse(walker, dataProvider, 0); - t.endTraversal(walker, 0); - return walker.mappedActiveRegions; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java index e8840c39f..5b52d4e33 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; import org.broadinstitute.sting.gatk.datasources.reads.*; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.qc.CountReads; @@ -47,6 +48,7 @@ import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.PrintStream; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import static org.testng.Assert.fail; @@ -146,7 +148,7 @@ public class TraverseReadsUnitTest extends BaseTest { fail("Shard == null"); } - ReadShardDataProvider dataProvider = new ReadShardDataProvider(shard,genomeLocParser,dataSource.seek(shard),null,null); + ReadShardDataProvider dataProvider = new ReadShardDataProvider(shard,genomeLocParser,dataSource.seek(shard),null, Collections.emptyList()); accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator); dataProvider.close(); } From 58e354176e14e4175940f6783962661d33f18774 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 11 Jun 2013 10:33:22 -0400 Subject: [PATCH 077/116] Minor changes to docs in the graph pruning. --- .../haplotypecaller/graphs/LowWeightChainPruner.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/LowWeightChainPruner.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/LowWeightChainPruner.java index 7327b5736..27b6bd902 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/LowWeightChainPruner.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/LowWeightChainPruner.java @@ -80,7 +80,7 @@ public class LowWeightChainPruner { final Set edgesToKeep = new LinkedHashSet<>(); for ( final Path linearChain : getLinearChains(graph) ) { - if( mustBeKeep(linearChain, pruneFactor) ) { + if( mustBeKept(linearChain, pruneFactor) ) { // we must keep edges in any path that contains a reference edge or an edge with weight > pruneFactor edgesToKeep.addAll(linearChain.getEdges()); } @@ -96,10 +96,14 @@ public class LowWeightChainPruner { } /** - * Get the maximum pruning multiplicity seen on any edge in this graph - * @return an integer > 0 + * Traverse the edges in the path and determine if any are either ref edges or have weight above + * the pruning factor and should therefore not be pruned away. + * + * @param path the path in question + * @param pruneFactor the integer pruning factor + * @return true if any edge in the path must be kept */ - private boolean mustBeKeep(final Path path, final int pruneFactor) { + private boolean mustBeKept(final Path path, final int pruneFactor) { for ( final E edge : path.getEdges() ) { if ( edge.getPruningMultiplicity() >= pruneFactor || edge.isRef() ) return true; From e4e7d39e2c8e9cb6a21f5152f46e20d334f81df0 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 23 May 2013 12:02:19 -0400 Subject: [PATCH 078/116] Fix FN problem stemming from sequence graphs that contain cycles. Problem: The sequence graphs can get very complex and it's not enough just to test that any given read has non-unique kmers. Reads with variants can have kmers that match unique regions of the reference, and this causes cycles in the final sequence graph. Ultimately the problem is that kmers of 10/25 may not be large enough for these complex regions. Solution: We continue to try kmers of 10/25 but detect whether cycles exist; if so, we do not use them. If (and only if) we can't get usable graphs from the 10/25 kmers, then we start iterating over larger kmers until we either can generate a graph without cycles or attempt too many iterations. --- .../haplotypecaller/HaplotypeCaller.java | 6 +- .../readthreading/ReadThreadingAssembler.java | 145 +++++++++++------- .../readthreading/ReadThreadingGraph.java | 18 ++- .../ReadThreadingGraphUnitTest.java | 35 +++++ 4 files changed, 147 insertions(+), 57 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index e55413649..a41b68e2c 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -266,6 +266,10 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @Argument(fullName="kmerSize", shortName="kmerSize", doc="Kmer size to use in the read threading assembler", required = false) protected List kmerSizes = Arrays.asList(10, 25); + @Advanced + @Argument(fullName="dontIncreaseKmerSizesForCycles", shortName="dontIncreaseKmerSizesForCycles", doc="Should we disable the iterating over kmer sizes when graph cycles are detected?", required = false) + protected boolean dontIncreaseKmerSizesForCycles = false; + /** * Assembly graph can be quite complex, and could imply a very large number of possible haplotypes. Each haplotype * considered requires N PairHMM evaluations if there are N reads across all samples. In order to control the @@ -520,7 +524,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In final int maxAllowedPathsForReadThreadingAssembler = Math.max(maxPathsPerSample * nSamples, MIN_PATHS_PER_GRAPH); assemblyEngine = useDebruijnAssembler ? new DeBruijnAssembler(minKmerForDebruijnAssembler, onlyUseKmerSizeForDebruijnAssembler) - : new ReadThreadingAssembler(maxAllowedPathsForReadThreadingAssembler, kmerSizes); + : new ReadThreadingAssembler(maxAllowedPathsForReadThreadingAssembler, kmerSizes, dontIncreaseKmerSizesForCycles); assemblyEngine.setErrorCorrectKmers(errorCorrectKmers); assemblyEngine.setPruneFactor(MIN_PRUNE_FACTOR); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java index 123b36640..0887929ab 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java @@ -49,6 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.LocalAssemblyEngine; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.haplotype.Haplotype; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -63,11 +64,14 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { private final static int DEFAULT_NUM_PATHS_PER_GRAPH = 128; private final static int GGA_MODE_ARTIFICIAL_COUNTS = 1000; + private final static int KMER_SIZE_ITERATION_INCREASE = 10; + private final static int MAX_KMER_ITERATIONS_TO_ATTEMPT = 6; /** The min and max kmer sizes to try when building the graph. */ private final List kmerSizes; private final int maxAllowedPathsForReadThreadingAssembler; + private final boolean dontIncreaseKmerSizesForCycles; private boolean requireReasonableNumberOfPaths = false; protected boolean removePathsNotConnectedToRef = true; private boolean justReturnRawGraph = false; @@ -77,10 +81,15 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { this(DEFAULT_NUM_PATHS_PER_GRAPH, Arrays.asList(25)); } - public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List kmerSizes) { + public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List kmerSizes, final boolean dontIncreaseKmerSizesForCycles) { super(maxAllowedPathsForReadThreadingAssembler); this.kmerSizes = kmerSizes; this.maxAllowedPathsForReadThreadingAssembler = maxAllowedPathsForReadThreadingAssembler; + this.dontIncreaseKmerSizesForCycles = dontIncreaseKmerSizesForCycles; + } + + public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List kmerSizes) { + this(maxAllowedPathsForReadThreadingAssembler, kmerSizes, false); } /** for testing purposes */ @@ -89,67 +98,99 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { } @Override - public List assemble( final List reads, final Haplotype refHaplotype, final List activeAlleleHaplotypes ) { + public List assemble(final List reads, final Haplotype refHaplotype, final List activeAlleleHaplotypes) { final List graphs = new LinkedList<>(); + // first, try using the requested kmer sizes for ( final int kmerSize : kmerSizes ) { - final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly); + final SeqGraph graph = createGraph(reads, refHaplotype, kmerSize, activeAlleleHaplotypes); + if ( graph != null ) + graphs.add(graph); + } - // add the reference sequence to the graph - rtgraph.addSequence("ref", refHaplotype.getBases(), null, true); - - // add the artificial GGA haplotypes to the graph - int hapCount = 0; - for( final Haplotype h : activeAlleleHaplotypes ) { - final int[] counts = new int[h.length()]; - Arrays.fill(counts, GGA_MODE_ARTIFICIAL_COUNTS); - rtgraph.addSequence("activeAllele" + hapCount++, h.getBases(), counts, false); - } - - // Next pull kmers out of every read and throw them on the graph - for( final GATKSAMRecord read : reads ) { - rtgraph.addRead(read); - } - - // actually build the read threading graph - rtgraph.buildGraphIfNecessary(); - printDebugGraphTransform(rtgraph, new File("sequenceGraph.0.0.raw_readthreading_graph.dot")); - - // go through and prune all of the chains where all edges have <= pruneFactor. This must occur - // before recoverDanglingTails in the graph, so that we don't spend a ton of time recovering - // tails that we'll ultimately just trim away anyway, as the dangling tail edges have weight of 1 - rtgraph.pruneLowWeightChains(pruneFactor); - - // look at all chains in the graph that terminate in a non-ref node (dangling sinks) and see if - // we can recover them by merging some N bases from the chain back into the reference uniquely, for - // N < kmerSize - if ( recoverDanglingTails ) rtgraph.recoverDanglingTails(); - - // remove all heading and trailing paths - if ( removePathsNotConnectedToRef ) rtgraph.removePathsNotConnectedToRef(); - - printDebugGraphTransform(rtgraph, new File("sequenceGraph.0.1.cleaned_readthreading_graph.dot")); - - final SeqGraph initialSeqGraph = rtgraph.convertToSequenceGraph(); - - // if the unit tests don't want us to cleanup the graph, just return the raw sequence graph - if ( justReturnRawGraph ) return Collections.singletonList(initialSeqGraph); - - if ( debug ) logger.info("Using kmer size of " + rtgraph.getKmerSize() + " in read threading assembler"); - printDebugGraphTransform(initialSeqGraph, new File("sequenceGraph.0.2.initial_seqgraph.dot")); - initialSeqGraph.cleanNonRefPaths(); // TODO -- I don't this is possible by construction - - final SeqGraph seqGraph = cleanupSeqGraph(initialSeqGraph); - if ( seqGraph != null ) { - if ( ! requireReasonableNumberOfPaths || reasonableNumberOfPaths(seqGraph) ) { - graphs.add(seqGraph); - } + // if none of those worked, iterate over larger sizes if allowed to do so + if ( graphs.isEmpty() && !dontIncreaseKmerSizesForCycles ) { + int kmerSize = MathUtils.arrayMaxInt(kmerSizes) + KMER_SIZE_ITERATION_INCREASE; + int numIterations = 1; + while ( graphs.isEmpty() && numIterations <= MAX_KMER_ITERATIONS_TO_ATTEMPT ) { + final SeqGraph graph = createGraph(reads, refHaplotype, kmerSize, activeAlleleHaplotypes); + if ( graph != null ) + graphs.add(graph); + kmerSize += KMER_SIZE_ITERATION_INCREASE; + numIterations++; } } return graphs; } + /** + * Creates the sequence graph for the given kmerSize + * + * @param reads reads to use + * @param refHaplotype reference haplotype + * @param kmerSize kmer size + * @param activeAlleleHaplotypes the GGA haplotypes to inject into the graph + * @return sequence graph or null if one could not be created (e.g. because it contains cycles or too many paths) + */ + protected SeqGraph createGraph(final List reads, final Haplotype refHaplotype, final int kmerSize, final List activeAlleleHaplotypes) { + final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly); + + // add the reference sequence to the graph + rtgraph.addSequence("ref", refHaplotype.getBases(), null, true); + + // add the artificial GGA haplotypes to the graph + int hapCount = 0; + for ( final Haplotype h : activeAlleleHaplotypes ) { + final int[] counts = new int[h.length()]; + Arrays.fill(counts, GGA_MODE_ARTIFICIAL_COUNTS); + rtgraph.addSequence("activeAllele" + hapCount++, h.getBases(), counts, false); + } + + // Next pull kmers out of every read and throw them on the graph + for( final GATKSAMRecord read : reads ) { + rtgraph.addRead(read); + } + + // actually build the read threading graph + rtgraph.buildGraphIfNecessary(); + + // sanity check: make sure there are no cycles in the graph + if ( rtgraph.hasCycles() ) { + if ( debug ) logger.info("Not using kmer size of " + rtgraph.getKmerSize() + " in read threading assembler because it contains a cycle"); + return null; + } + + printDebugGraphTransform(rtgraph, new File("sequenceGraph.0.0.raw_readthreading_graph.dot")); + + // go through and prune all of the chains where all edges have <= pruneFactor. This must occur + // before recoverDanglingTails in the graph, so that we don't spend a ton of time recovering + // tails that we'll ultimately just trim away anyway, as the dangling tail edges have weight of 1 + rtgraph.pruneLowWeightChains(pruneFactor); + + // look at all chains in the graph that terminate in a non-ref node (dangling sinks) and see if + // we can recover them by merging some N bases from the chain back into the reference uniquely, for + // N < kmerSize + if ( recoverDanglingTails ) rtgraph.recoverDanglingTails(); + + // remove all heading and trailing paths + if ( removePathsNotConnectedToRef ) rtgraph.removePathsNotConnectedToRef(); + + printDebugGraphTransform(rtgraph, new File("sequenceGraph.0.1.cleaned_readthreading_graph.dot")); + + final SeqGraph initialSeqGraph = rtgraph.convertToSequenceGraph(); + + // if the unit tests don't want us to cleanup the graph, just return the raw sequence graph + if ( justReturnRawGraph ) return initialSeqGraph; + + if ( debug ) logger.info("Using kmer size of " + rtgraph.getKmerSize() + " in read threading assembler"); + printDebugGraphTransform(initialSeqGraph, new File("sequenceGraph.0.2.initial_seqgraph.dot")); + initialSeqGraph.cleanNonRefPaths(); // TODO -- I don't this is possible by construction + + final SeqGraph seqGraph = cleanupSeqGraph(initialSeqGraph); + return ( seqGraph != null && requireReasonableNumberOfPaths && !reasonableNumberOfPaths(seqGraph) ) ? null : seqGraph; + } + /** * Did we find a reasonable number of paths in this graph? * @param graph diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java index 8e879377f..bbc1618ac 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java @@ -54,6 +54,7 @@ import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.PrimitivePair; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.jgrapht.EdgeFactory; +import org.jgrapht.alg.CycleDetector; import java.io.File; import java.util.*; @@ -297,7 +298,7 @@ public class ReadThreadingGraph extends BaseGraph(this).detectCycles(); + } + public void recoverDanglingTails() { if ( ! alreadyBuilt ) throw new IllegalStateException("recoverDanglingTails requires the graph be already built"); @@ -409,7 +417,8 @@ public class ReadThreadingGraph extends BaseGraph determineNonUniqueKmers(final SequenceForKmers seqForKmers, final int kmerSize) { // count up occurrences of kmers within each read final KMerCounter counter = new KMerCounter(kmerSize); - for ( int i = 0; i <= seqForKmers.stop - kmerSize; i++ ) { + final int stopPosition = seqForKmers.stop - kmerSize; + for ( int i = 0; i <= stopPosition; i++ ) { final Kmer kmer = new Kmer(seqForKmers.sequence, i, kmerSize); counter.addKmer(kmer, 1); } @@ -578,7 +587,7 @@ public class ReadThreadingGraph extends BaseGraph " + uniqueMergeVertex); @@ -590,7 +599,8 @@ public class ReadThreadingGraph extends BaseGraph reads = new ArrayList<>(); + for ( int index = 0; index < alt.length() - 100; index += 20 ) + reads.add(ArtificialSAMUtils.createArtificialRead(Arrays.copyOfRange(alt.getBytes(), index, index + 100), Utils.dupBytes((byte) 30, 100), 100 + "M")); + + // test that there are cycles detected for small kmer + final ReadThreadingGraph rtgraph25 = new ReadThreadingGraph(25); + rtgraph25.addSequence("ref", ref.getBytes(), null, true); + for ( final GATKSAMRecord read : reads ) + rtgraph25.addRead(read); + rtgraph25.buildGraphIfNecessary(); + Assert.assertTrue(rtgraph25.hasCycles()); + + // test that there are no cycles detected for large kmer + final ReadThreadingGraph rtgraph75 = new ReadThreadingGraph(75); + rtgraph75.addSequence("ref", ref.getBytes(), null, true); + for ( final GATKSAMRecord read : reads ) + rtgraph75.addRead(read); + rtgraph75.buildGraphIfNecessary(); + Assert.assertFalse(rtgraph75.hasCycles()); + } + // TODO -- update to use determineKmerSizeAndNonUniques directly // @DataProvider(name = "KmerSizeData") // public Object[][] makeKmerSizeDataProvider() { From 77868d034f4e006b8e33d2e9bf39447b88790ba7 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 30 May 2013 14:00:43 -0400 Subject: [PATCH 079/116] Do not allow the use of Ns in reads for graph construction. Ns are treated as wildcards in the PairHMM so creating haplotypes with Ns gives them artificial advantages over other ones. This was the cause of at least one FN where there were Ns at a SNP position. --- .../readthreading/ReadThreadingGraph.java | 15 +++++++++- .../LocalAssemblyEngineUnitTest.java | 2 +- .../ReadThreadingGraphUnitTest.java | 30 ++++++++++++++++--- 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java index bbc1618ac..ab6b17c35 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java @@ -50,6 +50,7 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.KMerCounter; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.Kmer; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.PrimitivePair; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -611,7 +612,7 @@ public class ReadThreadingGraph extends BaseGraph= minBaseQualityToUseInAssembly; + } + /** * Get the set of non-unique kmers in this graph. For debugging purposes * @return a non-null set of kmers diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java index 74361de1b..a74ce1c75 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java @@ -251,7 +251,7 @@ public class LocalAssemblyEngineUnitTest extends BaseTest { for ( int snpPos = 0; snpPos < windowSize; snpPos++) { if ( snpPos > excludeVariantsWithXbp && (windowSize - snpPos) >= excludeVariantsWithXbp ) { final byte[] altBases = ref.getBytes(); - altBases[snpPos] = 'N'; + altBases[snpPos] = altBases[snpPos] == 'A' ? (byte)'C' : (byte)'A'; final String alt = new String(altBases); tests.add(new Object[]{"SNP at " + snpPos, assembler, refLoc, ref, alt}); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java index 340777513..67ee52734 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java @@ -48,10 +48,8 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.Kmer; -import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.MultiSampleEdge; -import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.SeqGraph; +import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.haplotype.Haplotype; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; @@ -180,7 +178,31 @@ public class ReadThreadingGraphUnitTest extends BaseTest { Assert.assertFalse(rtgraph75.hasCycles()); } - // TODO -- update to use determineKmerSizeAndNonUniques directly + @Test(enabled = !DEBUG) + public void testNsInReadsAreNotUsedForGraph() { + + final int length = 100; + final byte[] ref = Utils.dupBytes((byte)'A', length); + + final ReadThreadingGraph rtgraph = new ReadThreadingGraph(25); + rtgraph.addSequence("ref", ref, null, true); + + // add reads with Ns at any position + for ( int i = 0; i < length; i++ ) { + final byte[] bases = ref.clone(); + bases[i] = 'N'; + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, Utils.dupBytes((byte) 30, length), length + "M"); + rtgraph.addRead(read); + } + rtgraph.buildGraphIfNecessary(); + + final SeqGraph graph = rtgraph.convertToSequenceGraph(); + final KBestPaths pathFinder = new KBestPaths<>(false); + Assert.assertEquals(pathFinder.getKBestPaths(graph, length, graph.getReferenceSourceVertex(), graph.getReferenceSinkVertex()).size(), 1); + } + + +// TODO -- update to use determineKmerSizeAndNonUniques directly // @DataProvider(name = "KmerSizeData") // public Object[][] makeKmerSizeDataProvider() { // List tests = new ArrayList(); From c0e3874db095836e389d4ff7c277fb778a54c7b8 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 3 Jun 2013 14:34:29 -0400 Subject: [PATCH 080/116] Change the HC's phredScaledGlobalReadMismappingRate from 60 to 45, because Ryan and Mark told me to. --- .../sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index a41b68e2c..24fd5901f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -336,7 +336,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In */ @Advanced @Argument(fullName="phredScaledGlobalReadMismappingRate", shortName="globalMAPQ", doc="The global assumed mismapping rate for reads", required = false) - protected int phredScaledGlobalReadMismappingRate = 60; + protected int phredScaledGlobalReadMismappingRate = 45; @Advanced @Argument(fullName="maxNumHaplotypesInPopulation", shortName="maxNumHaplotypesInPopulation", doc="Maximum number of haplotypes to consider for your population. This number will probably need to be increased when calling organisms with high heterozygosity.", required = false) From c0030f3f2dd36d29d70b1fa06daf2e399e99169a Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 4 Jun 2013 09:26:50 -0400 Subject: [PATCH 081/116] We no longer subset down to the best N haplotypes for the GL calculation. I explain in comments within the code that this was causing problems with the marginalization over events. --- .../haplotypecaller/HaplotypeCaller.java | 26 +++++-------------- .../haplotypecaller/LocalAssemblyEngine.java | 2 +- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 24fd5901f..3e411ae33 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -694,11 +694,14 @@ public class HaplotypeCaller extends ActiveRegionWalker, In //logger.info("Computing read likelihoods with " + assemblyResult.regionForGenotyping.size() + " reads"); final Map stratifiedReadMap = likelihoodCalculationEngine.computeReadLikelihoods( assemblyResult.haplotypes, splitReadsBySample( assemblyResult.regionForGenotyping.getReads() ) ); - // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes ) - final List bestHaplotypes = selectBestHaplotypesForGenotyping(assemblyResult.haplotypes, stratifiedReadMap); + // Note: we used to subset down at this point to only the "best" haplotypes in all samples for genotyping, but there + // was a bad interaction between that selection and the marginalization that happens over each event when computing + // GLs. In particular, for samples that are heterozygous non-reference (B/C) the marginalization for B treats the + // haplotype containing C as reference (and vice versa). Now this is fine if all possible haplotypes are included + // in the genotyping, but we lose information if we select down to a few haplotypes. [EB] final GenotypingEngine.CalledHaplotypes calledHaplotypes = genotypingEngine.assignGenotypeLikelihoods( UG_engine, - bestHaplotypes, + assemblyResult.haplotypes, stratifiedReadMap, perSampleFilteredReadList, assemblyResult.fullReferenceWithPadding, @@ -711,7 +714,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // TODO -- must disable if we are doing NCT, or set the output type of ! presorted if ( bamWriter != null ) { haplotypeBAMWriter.writeReadsAlignedToHaplotypes(assemblyResult.haplotypes, assemblyResult.paddedReferenceLoc, - bestHaplotypes, + assemblyResult.haplotypes, calledHaplotypes.getCalledHaplotypes(), stratifiedReadMap); } @@ -863,21 +866,6 @@ public class HaplotypeCaller extends ActiveRegionWalker, In return new AssemblyResult(trimmedHaplotypes, trimmedActiveRegion, fullReferenceWithPadding, paddedReferenceLoc, true); } - /** - * Select the best N haplotypes according to their likelihoods, if appropriate - * - * @param haplotypes a list of haplotypes to consider - * @param stratifiedReadMap a map from samples -> read likelihoods - * @return the list of haplotypes to genotype - */ - protected List selectBestHaplotypesForGenotyping(final List haplotypes, final Map stratifiedReadMap) { - if ( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { - return haplotypes; - } else { - return likelihoodCalculationEngine.selectBestHaplotypesFromEachSample(haplotypes, stratifiedReadMap, maxNumHaplotypesInPopulation); - } - } - //--------------------------------------------------------------------------------------------------------------- // // reduce diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java index 1a5f34bc3..2a74e9dd0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java @@ -215,7 +215,7 @@ public abstract class LocalAssemblyEngine { returnHaplotypes.add(h); if ( debug ) - logger.info("Adding haplotype " + h.getCigar() + " from debruijn graph with kmer " + graph.getKmerSize()); + logger.info("Adding haplotype " + h.getCigar() + " from graph with kmer " + graph.getKmerSize()); } } } From 55d5f2194cd4b75f2a2f2e03c1057daa67fe6ade Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Tue, 4 Jun 2013 14:25:26 -0400 Subject: [PATCH 082/116] Read Error Corrector for haplotype assembly Principle is simple: when coverage is deep enough, any single-base read error will look like a rare k-mer but correct sequence will be supported by many reads to correct sequences will look like common k-mers. So, algorithm has 3 main steps: 1. K-mer graph buildup. For each read in an active region, a map from k-mers to the number of times they have been seen is built. 2. Building correction map. All "rare" k-mers that are sparse (by default, seen only once), get mapped to k-mers that are good (by default, seen at least 20 times but this is a CL argument), and that lie within a given Hamming distance (by default, =1). This map can be empty (i.e. k-mers can be uncorrectable). 3. Correction proposal For each constituent k-mer of each read, if this k-mer is rare and maps to a good k-mer, get differing base positions in k-mer and add these to a list of corrections for each base in each read. Then, correct read at positions where correction proposal is unanimous and non-empty. The algorithm defaults are chosen to be very stringent and conservative in the correction: we only try to correct singleton k-mers, we only look for good k-mers lying at Hamming distance = 1 from them, and we only correct a base in read if all correction proposals are congruent. By default, algorithm is disabled but can be enabled in HaplotypeCaller via the -readErrorCorrect CL option. However, at this point it's about 3x-10x more expensive so it needs to be optimized if it's to be used. --- .../haplotypecaller/HaplotypeCaller.java | 30 +- .../gatk/walkers/haplotypecaller/Kmer.java | 52 ++ .../haplotypecaller/LocalAssemblyEngine.java | 22 +- .../haplotypecaller/ReadErrorCorrector.java | 526 ++++++++++++++++++ .../walkers/haplotypecaller/KmerUnitTest.java | 41 +- .../LocalAssemblyEngineUnitTest.java | 2 +- .../ReadErrorCorrectorUnitTest.java | 190 +++++++ 7 files changed, 855 insertions(+), 8 deletions(-) create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReadErrorCorrector.java create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReadErrorCorrectorUnitTest.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index e55413649..680ae06e1 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -396,6 +396,20 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @Argument(fullName="allowCyclesInKmerGraphToGeneratePaths", shortName="allowCyclesInKmerGraphToGeneratePaths", doc="If specified, we will allow cycles in the kmer graphs to generate paths with multiple copies of the path sequenece rather than just the shortest paths", required = false) protected boolean allowCyclesInKmerGraphToGeneratePaths = false; + // Parameters to control read error correction + @Hidden + @Argument(fullName="errorCorrectReads", shortName="errorCorrectReads", doc = "Use an exploratory algorithm to error correct the kmers used during assembly. May cause fundamental problems with the assembly graph itself", required=false) + protected boolean errorCorrectReads = false; + + @Hidden + @Argument(fullName="kmerLengthForReadErrorCorrection", shortName="kmerLengthForReadErrorCorrection", doc = "Use an exploratory algorithm to error correct the kmers used during assembly. May cause fundamental problems with the assembly graph itself", required=false) + protected int kmerLengthForReadErrorCorrection = 25; + + @Hidden + @Argument(fullName="minObservationsForKmerToBeSolid", shortName="minObservationsForKmerToBeSolid", doc = "A k-mer must be seen at least these times for it considered to be solid", required=false) + protected int minObservationsForKmerToBeSolid = 20; + + // ----------------------------------------------------------------------------------------------- // done with Haplotype caller parameters // ----------------------------------------------------------------------------------------------- @@ -437,6 +451,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // bases with quality less than or equal to this value are trimmed off the tails of the reads private static final byte MIN_TAIL_QUALITY = 20; + private static final byte MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION = 6; // the minimum length of a read we'd consider using for genotyping private final static int MIN_READ_LENGTH = 10; @@ -754,8 +769,13 @@ public class HaplotypeCaller extends ActiveRegionWalker, In final GenomeLoc paddedReferenceLoc = getPaddedLoc(activeRegion); final Haplotype referenceHaplotype = createReferenceHaplotype(activeRegion, paddedReferenceLoc); + // Create ReadErrorCorrector object if requested - will be used within assembly engine. + ReadErrorCorrector readErrorCorrector = null; + if (errorCorrectReads) + readErrorCorrector = new ReadErrorCorrector(kmerLengthForReadErrorCorrection, MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION, minObservationsForKmerToBeSolid, DEBUG,fullReferenceWithPadding); + try { - final List haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, paddedReferenceLoc, activeAllelesToGenotype ); + final List haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, paddedReferenceLoc, activeAllelesToGenotype,readErrorCorrector ); if ( ! dontTrimActiveRegions ) { return trimActiveRegion(activeRegion, haplotypes, activeAllelesToGenotype, fullReferenceWithPadding, paddedReferenceLoc); } else { @@ -922,7 +942,13 @@ public class HaplotypeCaller extends ActiveRegionWalker, In for( final GATKSAMRecord myRead : finalizedReadList ) { final GATKSAMRecord postAdapterRead = ( myRead.getReadUnmappedFlag() ? myRead : ReadClipper.hardClipAdaptorSequence( myRead ) ); if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) { - GATKSAMRecord clippedRead = useLowQualityBasesForAssembly ? postAdapterRead : ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY ); + GATKSAMRecord clippedRead; + if (errorCorrectReads) + clippedRead = ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION ); + else if (useLowQualityBasesForAssembly) + clippedRead = postAdapterRead; + else // default case: clip low qual ends of reads + clippedRead= ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY ); if ( dontUseSoftClippedBases ) { // uncomment to remove hard clips from consideration at all diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/Kmer.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/Kmer.java index 745d4de06..2e757722b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/Kmer.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/Kmer.java @@ -46,7 +46,11 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; +import com.google.java.contract.Requires; + import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; /** * Fast wrapper for byte[] kmers @@ -149,6 +153,15 @@ public class Kmer { return bases; } + /** + * Backdoor method for fast base peeking: avoids copying like bases() and doesn't modify internal state. + * Intended to be used for fast computation of neighboring kmers + * @return Reference to complete bases stores in this kmer + * WARNING: UNSAFE, caller should NEVER modify bases. Speed/safety tradeoff!! + */ + private byte[] unsafePeekAtBases() { + return bases; + } /** * Get a string representation of the bases of this kmer * @return a non-null string @@ -165,6 +178,45 @@ public class Kmer { return length; } + /** + * Gets a set of differing positions and bases from another k-mer, limiting up to a max distance. + * For example, if this = "ACATT" and other = "ACGGT": + * - if maxDistance < 2 then -1 will be returned, since distance between kmers is 2. + * - If maxDistance >=2, then 2 will be returned, and arrays will be filled as follows: + * differingIndeces = {2,3} + * differingBases = {'G','G'} + * @param other Other k-mer to test + * @param maxDistance Maximum distance to search. If this and other k-mers are beyond this Hamming distance, + * search is aborted and a null is returned + * @param differingIndeces Array with indices of differing bytes in array + * @param differingBases Actual differing bases + * @return Set of mappings of form (int->byte), where each elements represents index + * of k-mer array where bases mismatch, and the byte is the base from other kmer. + * If both k-mers differ by more than maxDistance, returns null + */ + @Requires({"other != null","differingIndeces != null","differingBases != null", + "differingIndeces.size>=maxDistance","differingBases.size>=maxDistance"}) + public int getDifferingPositions(final Kmer other, + final int maxDistance, + final int[] differingIndeces, + final byte[] differingBases) { + + + int dist = 0; + if (length == other.length()) { + final byte[] f2 = other.unsafePeekAtBases(); + for (int i=0; i < length; i++) + if(bases[start+i] != f2[i]) { + differingIndeces[dist] = i; + differingBases[dist++] = f2[i]; + if (dist > maxDistance) + return -1; + } + + } + return dist; + } + @Override public String toString() { return "Kmer{" + new String(bases()) + "}"; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java index 1a5f34bc3..9f2197a84 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java @@ -128,9 +128,15 @@ public abstract class LocalAssemblyEngine { * @param fullReferenceWithPadding byte array holding the reference sequence with padding * @param refLoc GenomeLoc object corresponding to the reference sequence with padding * @param activeAllelesToGenotype the alleles to inject into the haplotypes during GGA mode + * @param readErrorCorrector a ReadErrorCorrector object, if read are to be corrected before assembly. Can be null if no error corrector is to be used. * @return a non-empty list of all the haplotypes that are produced during assembly */ - public List runLocalAssembly(ActiveRegion activeRegion, Haplotype refHaplotype, byte[] fullReferenceWithPadding, GenomeLoc refLoc, List activeAllelesToGenotype) { + public List runLocalAssembly(final ActiveRegion activeRegion, + final Haplotype refHaplotype, + final byte[] fullReferenceWithPadding, + final GenomeLoc refLoc, + final List activeAllelesToGenotype, + final ReadErrorCorrector readErrorCorrector) { if( activeRegion == null ) { throw new IllegalArgumentException("Assembly engine cannot be used with a null ActiveRegion."); } if( refHaplotype == null ) { throw new IllegalArgumentException("Reference haplotype cannot be null."); } if( fullReferenceWithPadding.length != refLoc.size() ) { throw new IllegalArgumentException("Reference bases and reference loc must be the same size."); } @@ -139,8 +145,20 @@ public abstract class LocalAssemblyEngine { // create the list of artificial haplotypes that should be added to the graph for GGA mode final List activeAlleleHaplotypes = createActiveAlleleHaplotypes(refHaplotype, activeAllelesToGenotype, activeRegion.getExtendedLoc()); + + // error-correct reads before clipping low-quality tails: some low quality bases might be good and we want to recover them + final List correctedReads; + if (readErrorCorrector != null) { + // now correct all reads in active region after filtering/downsampling + // Note that original reads in active region are NOT modified by default, since they will be used later for GL computation, + // and we only want the read-error corrected reads for graph building. + readErrorCorrector.addReadsToKmers(activeRegion.getReads()); + correctedReads = new ArrayList<>(readErrorCorrector.correctReads(activeRegion.getReads())); + } + else correctedReads = activeRegion.getReads(); + // create the graphs by calling our subclass assemble method - final List graphs = assemble(activeRegion.getReads(), refHaplotype, activeAlleleHaplotypes); + final List graphs = assemble(correctedReads, refHaplotype, activeAlleleHaplotypes); // do some QC on the graphs for ( final SeqGraph graph : graphs ) { sanityCheckGraph(graph, refHaplotype); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReadErrorCorrector.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReadErrorCorrector.java new file mode 100644 index 000000000..e1471ab33 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReadErrorCorrector.java @@ -0,0 +1,526 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +import com.google.java.contract.Requires; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.clipping.ReadClipper; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +import java.util.*; + +/** + * Utility class that error-corrects reads. + * Main idea: An error in a read will appear as a bubble in a k-mer (de Bruijn) graph and such bubble will have very low multiplicity. + * Hence, read errors will appear as "sparse" kmers with very little support. + * Historically, the most common approach to error-correct reads before assembly has been to first compute the kmer spectrum of the reads, + * defined as the kmer composition of a set of reads along with the multiplicity of each kmer. + * First-generation correctors like the Euler corrector (Pevzner 2001) mapped low frequency kmers (kmers appearing say below N times) + * into high frequency ones that lied within a certain Hamming or edit distance. + * This is doable, but has some drawbacks: + * - Kmers used for error correction become tied to kmers used for graph building. + * - Hence, large kmers (desireable for graph building because they can resolve repeats better) are a hindrance for error correction, + * because they are seen less often. + * - After error correction, there is no guarantee that a sequence of kmers corresponds to an "actual" read. + * + * An error-corrected set of reads also makes a much smoother graph without the need to resolving so many bubbles. + * + * Idea hence is to correct reads based on their kmer content, but in a context independent from graph building. + * In order to do this, the following steps are taken: + * - The k-mer spectrum of a set of reads in computed. However, we are at freedom to choose the most convenient k-mer size (typicially around + * read length /2). + * - We partition the set of observed k-mers into "solid" kmers which have multiplicity > M, and "insolid" ones otherwise (Pevzner 2001). + * + * - Main idea of the algorithm is to try to substitute a sequence of bases in a read by a sequence better supported by kmers. + * - For each "unsolid" kmer observed in reads, we try to find a "solid" kmer within a maximum Hamming distance. + * - If such solid kmer exists, then this unsolid kmer is "correctable", otherwise, uncorrectable. + * - For each read, then: + * -- Walk through read and visit all kmers. + * -- If kmer is solid, continue to next kmer. + * -- If not, and if it's correctable (i.e. there exists a mapping from an unsolid kmer to a solid kmer within a given Hamming distance), + * add the bases and offsets corresponding to differing positions between unsolid and solid kmer to correction list. + * -- At the end, each base in read will have a list of corrections associated with it. We can then choose to correct or not. + * If read has only consistent corrections, then we can correct base to common base in corrections. + * + * TODO: + * todo Q: WHAT QUALITY TO USE?? + * todo how do we deal with mate pairs? + * + * + + + */ +public class ReadErrorCorrector { + private final static Logger logger = Logger.getLogger(ReadErrorCorrector.class); + /** + * A map of for each kmer to its num occurrences in addKmers + */ + KMerCounter countsByKMer; + + Map kmerCorrectionMap = new HashMap<>(); + Map> kmerDifferingBases = new HashMap<>(); + private final int kmerLength; + private final boolean debug; + private final boolean trimLowQualityBases; + private final byte minTailQuality; + private final int maxMismatchesToCorrect; + private final byte qualityOfCorrectedBases; + private final int maxObservationsForKmerToBeCorrectable; + private final int maxHomopolymerLengthInRegion; + private final int minObservationsForKmerToBeSolid; + + // default values, for debugging + private final static boolean doInplaceErrorCorrection = false; // currently not used, since we want corrected reads to be used only for assembly + private final static int MAX_MISMATCHES_TO_CORRECT = 2; + private final static byte QUALITY_OF_CORRECTED_BASES = 30; // what's a reasonable value here? + private final static int MAX_OBSERVATIONS_FOR_KMER_TO_BE_CORRECTABLE = 1; + private final static boolean TRIM_LOW_QUAL_TAILS = false; + private final static boolean DONT_CORRECT_IN_LONG_HOMOPOLYMERS = false; + private final static int MAX_HOMOPOLYMER_THRESHOLD = 12; + + // debug counter structure + private final ReadErrorCorrectionStats readErrorCorrectionStats = new ReadErrorCorrectionStats(); + + /** + * Create a new kmer corrector + * + * @param kmerLength the length of kmers we'll be counting to error correct, must be >= 1 + * @param maxMismatchesToCorrect e >= 0 + * @param qualityOfCorrectedBases Bases to be corrected will be assigned this quality + */ + public ReadErrorCorrector(final int kmerLength, + final int maxMismatchesToCorrect, + final int maxObservationsForKmerToBeCorrectable, + final byte qualityOfCorrectedBases, + final int minObservationsForKmerToBeSolid, + final boolean trimLowQualityBases, + final byte minTailQuality, + final boolean debug, + final byte[] fullReferenceWithPadding) { + if ( kmerLength < 1 ) throw new IllegalArgumentException("kmerLength must be > 0 but got " + kmerLength); + if ( maxMismatchesToCorrect < 1 ) + throw new IllegalArgumentException("maxMismatchesToCorrect must be >= 1 but got " + maxMismatchesToCorrect); + if ( qualityOfCorrectedBases < 2 || qualityOfCorrectedBases > QualityUtils.MAX_REASONABLE_Q_SCORE) + throw new IllegalArgumentException("qualityOfCorrectedBases must be >= 2 and <= MAX_REASONABLE_Q_SCORE but got " + qualityOfCorrectedBases); + + countsByKMer = new KMerCounter(kmerLength); + this.kmerLength = kmerLength; + this.maxMismatchesToCorrect = maxMismatchesToCorrect; + this.qualityOfCorrectedBases = qualityOfCorrectedBases; + this.minObservationsForKmerToBeSolid = minObservationsForKmerToBeSolid; + this.trimLowQualityBases = trimLowQualityBases; + this.minTailQuality = minTailQuality; + this.debug = debug; + this.maxObservationsForKmerToBeCorrectable = maxObservationsForKmerToBeCorrectable; + + // when region has long homopolymers, we may want not to correct reads, since assessment is complicated, + // so we may decide to skip error correction in these regions + maxHomopolymerLengthInRegion = computeMaxHLen(fullReferenceWithPadding); + } + + /** + * Simple constructor with sensible defaults + * @param kmerLength K-mer length for error correction (not necessarily the same as for assembly graph) + * @param minTailQuality Minimum tail quality: remaining bases with Q's below this value are hard-clipped after correction + * @param debug Output debug information + */ + public ReadErrorCorrector(final int kmerLength, final byte minTailQuality, final int minObservationsForKmerToBeSolid, final boolean debug,final byte[] fullReferenceWithPadding) { + this(kmerLength, MAX_MISMATCHES_TO_CORRECT, MAX_OBSERVATIONS_FOR_KMER_TO_BE_CORRECTABLE, QUALITY_OF_CORRECTED_BASES, minObservationsForKmerToBeSolid, TRIM_LOW_QUAL_TAILS, minTailQuality, debug,fullReferenceWithPadding); + } + + /** + * Main entry routine to add all kmers in a read to the read map counter + * @param read Read to add bases + */ + @Requires("read != null") + protected void addReadKmers(final GATKSAMRecord read) { + if (DONT_CORRECT_IN_LONG_HOMOPOLYMERS && maxHomopolymerLengthInRegion > MAX_HOMOPOLYMER_THRESHOLD) + return; + + final byte[] readBases = read.getReadBases(); + for (int offset = 0; offset <= readBases.length-kmerLength; offset++ ) { + countsByKMer.addKmer(new Kmer(readBases,offset,kmerLength),1); + + } + } + + /** + * Correct a collection of reads based on stored k-mer counts + * @param reads + */ + public final List correctReads(final Collection reads) { + + final List correctedReads = new ArrayList<>(reads.size()); + if (DONT_CORRECT_IN_LONG_HOMOPOLYMERS && maxHomopolymerLengthInRegion > MAX_HOMOPOLYMER_THRESHOLD) { + // just copy reads into output and exit + correctedReads.addAll(reads); + } + else { + computeKmerCorrectionMap(); + for (final GATKSAMRecord read: reads) { + final GATKSAMRecord correctedRead = correctRead(read); + if (trimLowQualityBases) + correctedReads.add(ReadClipper.hardClipLowQualEnds(correctedRead, minTailQuality)); + else + correctedReads.add(correctedRead); + } + if (debug) { + logger.info("Number of corrected bases:"+readErrorCorrectionStats.numBasesCorrected); + logger.info("Number of corrected reads:"+readErrorCorrectionStats.numReadsCorrected); + logger.info("Number of skipped reads:"+readErrorCorrectionStats.numReadsUncorrected); + logger.info("Number of solid kmers:"+readErrorCorrectionStats.numSolidKmers); + logger.info("Number of corrected kmers:"+readErrorCorrectionStats.numCorrectedKmers); + logger.info("Number of uncorrectable kmers:"+readErrorCorrectionStats.numUncorrectableKmers); + } + } + return correctedReads; + } + + + /** + * Do actual read correction based on k-mer map. First, loop through stored k-mers to get a list of possible corrections + * for each position in the read. Then correct read based on all possible consistent corrections. + * @param inputRead Read to correct + * @return Corrected read (can be same reference as input if doInplaceErrorCorrection is set) + */ + @Requires("inputRead != null") + private GATKSAMRecord correctRead(final GATKSAMRecord inputRead) { + // no support for reduced reads (which shouldn't need to be error-corrected anyway!) + if (inputRead.isReducedRead()) + return inputRead; + + // do actual correction + boolean corrected = false; + final byte[] correctedBases = inputRead.getReadBases(); + final byte[] correctedQuals = inputRead.getBaseQualities(); + + // array to store list of possible corrections for read + final CorrectionSet correctionSet = buildCorrectionMap(correctedBases); + + for (int offset = 0; offset < correctedBases.length; offset++) { + final Byte b = correctionSet.getConsensusCorrection(offset); + if (b != null && b != correctedBases[offset]) { + correctedBases[offset] = b; + correctedQuals[offset] = qualityOfCorrectedBases; + corrected = true; + } + readErrorCorrectionStats.numBasesCorrected++; + } + + if (corrected) { + readErrorCorrectionStats.numReadsCorrected++; + if (doInplaceErrorCorrection) { + inputRead.setReadBases(correctedBases); + inputRead.setBaseQualities(correctedQuals); + return inputRead; + } + else { + GATKSAMRecord correctedRead = new GATKSAMRecord(inputRead); + + // do the actual correction + // todo - do we need to clone anything else from read? + correctedRead.setBaseQualities(inputRead.getBaseQualities()); + correctedRead.setIsStrandless(inputRead.isStrandless()); + correctedRead.setReadBases(inputRead.getReadBases()); + correctedRead.setReadString(inputRead.getReadString()); + correctedRead.setReadGroup(inputRead.getReadGroup()); + return correctedRead; + } + } + else { + readErrorCorrectionStats.numReadsUncorrected++; + return inputRead; + } + } + + /** + * Build correction map for each of the bases in read. + * For each of the constituent kmers in read: + * a) See whether the kmer has been mapped to a corrected kmer. + * b) If so, get list of differing positions and corresponding bases. + * c) Add then list of new bases to index in correction list. + * Correction list is of read size, and holds a list of bases to correct. + * @param correctedBases Bases to attempt to correct + * @return CorrectionSet object. + */ + @Requires("correctedBases != null") + private CorrectionSet buildCorrectionMap(final byte[] correctedBases) { + // array to store list of possible corrections for read + final CorrectionSet correctionSet = new CorrectionSet(correctedBases.length); + + for (int offset = 0; offset <= correctedBases.length-kmerLength; offset++ ) { + final Kmer kmer = new Kmer(correctedBases,offset,kmerLength); + final Kmer newKmer = kmerCorrectionMap.get(kmer); + if (newKmer != null && !newKmer.equals(kmer)){ + final Pair differingPositions = kmerDifferingBases.get(kmer); + final int[] differingIndeces = differingPositions.first; + final byte[] differingBases = differingPositions.second; + + for (int k=0; k < differingIndeces.length; k++) { + // get list of differing positions for corrected kmer + // for each of these, add correction candidate to correction set + correctionSet.add(offset + differingIndeces[k],differingBases[k]); + } + } + } + return correctionSet; + } + + + /** + * Top-level entry point that adds a collection of reads to our kmer list. + * For each read in list, its constituent kmers will be logged in our kmer table. + * @param reads + */ + @Requires("reads != null") + public void addReadsToKmers(final Collection reads) { + for (final GATKSAMRecord read: reads) + addReadKmers(read); + + if (debug) + for ( final KMerCounter.CountedKmer countedKmer: countsByKMer.getCountedKmers() ) + logger.info(String.format("%s\t%d\n", countedKmer.kmer, countedKmer.count)); + } + + + /** + * For each kmer we've seen, do the following: + * a) If kmer count > threshold1, this kmer is good, so correction map will be to itself. + * b) If kmer count <= threshold2, this kmer is bad. + * In that case, loop through all other kmers. If kmer is good, compute distance, and get minimal distance. + * If such distance is < some threshold, map to this kmer, and record differing positions and bases. + * + */ + private void computeKmerCorrectionMap() { + for (final KMerCounter.CountedKmer storedKmer : countsByKMer.getCountedKmers()) { + if (storedKmer.getCount() >= minObservationsForKmerToBeSolid) { + // this kmer is good: map to itself + kmerCorrectionMap.put(storedKmer.getKmer(),storedKmer.getKmer()); + kmerDifferingBases.put(storedKmer.getKmer(),new Pair<>(new int[0],new byte[0])); // dummy empty array + readErrorCorrectionStats.numSolidKmers++; + } + else if (storedKmer.getCount() <= maxObservationsForKmerToBeCorrectable) { + // loop now thru all other kmers to find nearest neighbor + final Pair> nearestNeighbor = findNearestNeighbor(storedKmer.getKmer(),countsByKMer,maxMismatchesToCorrect); + + // check if nearest neighbor lies in a close vicinity. If so, log the new bases and the correction map + if (nearestNeighbor != null) { // ok, found close neighbor + kmerCorrectionMap.put(storedKmer.getKmer(), nearestNeighbor.first); + kmerDifferingBases.put(storedKmer.getKmer(), nearestNeighbor.second); + readErrorCorrectionStats.numCorrectedKmers++; +// if (debug) +// logger.info("Original kmer:"+storedKmer + "\tCorrected kmer:"+nearestNeighbor.first+"\tDistance:"+dist); + } + else + readErrorCorrectionStats.numUncorrectableKmers++; + + } + } + } + + /** + * Finds nearest neighbor of a given k-mer, among a list of counted K-mers, up to a given distance. + * If many k-mers share same closest distance, an arbitrary k-mer is picked + * @param kmer K-mer of interest + * @param countsByKMer KMerCounter storing set of counted k-mers (may include kmer of interest) + * @param maxDistance Maximum distance to search + * @return Pair of values: closest K-mer in Hamming distance and list of differing bases. + * If no neighbor can be found up to given distance, returns null + */ + @Requires({"kmer != null", "countsByKMer != null","maxDistance >= 1"}) + private Pair> findNearestNeighbor(final Kmer kmer, + final KMerCounter countsByKMer, + final int maxDistance) { + int minimumDistance = Integer.MAX_VALUE; + Kmer closestKmer = null; + + final int[] differingIndeces = new int[maxDistance+1]; + final byte[] differingBases = new byte[maxDistance+1]; + + final int[] closestDifferingIndices = new int[maxDistance+1]; + final byte[] closestDifferingBases = new byte[maxDistance+1]; + + for (final KMerCounter.CountedKmer candidateKmer : countsByKMer.getCountedKmers()) { + // skip if candidate set includes test kmer + if (candidateKmer.getKmer().equals(kmer)) + continue; + + final int hammingDistance = kmer.getDifferingPositions(candidateKmer.getKmer(), maxDistance, differingIndeces, differingBases); + if (hammingDistance < 0) // can't compare kmer? skip + continue; + + if (hammingDistance < minimumDistance) { + minimumDistance = hammingDistance; + closestKmer = candidateKmer.getKmer(); + System.arraycopy(differingBases,0,closestDifferingBases,0,differingBases.length); + System.arraycopy(differingIndeces,0,closestDifferingIndices,0,differingIndeces.length); + } + } + return new Pair<>(closestKmer, new Pair<>(closestDifferingIndices,closestDifferingBases)); + } + + + /** + * experimental function to compute max homopolymer length in a given reference context + * @param fullReferenceWithPadding Reference context of interest + * @return Max homopolymer length in region + */ + @Requires("fullReferenceWithPadding != null") + private static int computeMaxHLen(final byte[] fullReferenceWithPadding) { + + int leftRun = 1; + int maxRun = 1; + for ( int i = 1; i < fullReferenceWithPadding.length; i++) { + if ( fullReferenceWithPadding[i] == fullReferenceWithPadding[i-1] ) + leftRun++; + else + leftRun = 1; + } + if (leftRun > maxRun) + maxRun = leftRun; + + + return maxRun; + } + + private static final class ReadErrorCorrectionStats { + public int numReadsCorrected; + public int numReadsUncorrected; + public int numBasesCorrected; + public int numSolidKmers; + public int numUncorrectableKmers; + public int numCorrectedKmers; + } + + /** + * Wrapper utility class that holds, for each position in read, a list of bytes representing candidate corrections. + * So, a read ACAGT where the middle A has found to be errorful might look like: + * 0: {} + * 1: {} + * 2: {'C','C','C'} + * 3: {} + * 4: {} + * + * It's up to the method getConsensusCorrection() to decide how to use the correction sets for each position. + * By default, only strict consensus is allowed right now. + * + */ + protected static class CorrectionSet { + private final int size; + private ArrayList> corrections; + + /** + * Main class constructor. + * @param size Size of correction set, needs to be set equal to the read being corrected + */ + public CorrectionSet(final int size) { + this.size = size; + corrections = new ArrayList<>(size); + for (int k=0; k < size; k++) + corrections.add(k,new ArrayList()); + } + + /** + * Add a base to this correction set at a particular offset, measured from the start of the read + * @param offset Offset from start of read + * @param base base to be added to list of corrections at this offset + */ + public void add(final int offset, final byte base) { + if (offset >= size || offset < 0) + throw new IllegalStateException("Bad entry into CorrectionSet: offset > size"); + if (!BaseUtils.isRegularBase(base)) + return; // no irregular base correction + + final List storedBytes = corrections.get(offset); + storedBytes.add(base); + } + + /** + * Get list of corrections for a particular offset + * @param offset Offset of interest + * @return List of bases representing possible corrections at this offset + */ + public List get(final int offset) { + if (offset >= size || offset < 0) + throw new IllegalArgumentException("Illegal call of CorrectionSet.get(): offset must be < size"); + return corrections.get(offset); + } + + /** + * Get consensus correction for a particular offset. In this implementation, it just boils down to seeing if + * byte list associated with offset has identical values. If so, return this base, otherwise return null. + * @param offset + * @return Consensus base, or null if no consensus possible. + */ + public Byte getConsensusCorrection(final int offset) { + if (offset >= size || offset < 0) + throw new IllegalArgumentException("Illegal call of CorrectionSet.getConsensusCorrection(): offset must be < size"); + final List storedBytes = corrections.get(offset); + if (storedBytes.isEmpty()) + return null; + + // todo - is there a cheaper/nicer way to compare if all elements in list are identical?? + final byte lastBase = storedBytes.remove(storedBytes.size()-1); + for (final Byte b: storedBytes) { + // strict correction rule: all bases must match + if (b != lastBase) + return null; + } + + // all bytes then are equal: + return lastBase; + + } + + + + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KmerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KmerUnitTest.java index 989c38628..116c987a6 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KmerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KmerUnitTest.java @@ -47,13 +47,12 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; +import java.util.*; public class KmerUnitTest extends BaseTest { @DataProvider(name = "KMerCreationData") @@ -130,4 +129,40 @@ public class KmerUnitTest extends BaseTest { } } } + + @Test + public void testDifferingPositions() { + final String bases = "ACGTCAGACGTACGTTTGACGTCAGACGTACGT"; + final Kmer baseKmer = new Kmer(bases.getBytes()); + + + final int NUM_TEST_CASES = 30; + + for (int test = 0; test < NUM_TEST_CASES; test++) { + + final int numBasesToChange = test % bases.length(); + + // changes numBasesToChange bases - spread regularly through read string + final int step = (numBasesToChange > 0?Math.min(bases.length() / numBasesToChange,1) : 1); + + final byte[] newBases = bases.getBytes().clone(); + int actualChangedBases =0; // could be different from numBasesToChange due to roundoff + for (int idx=0; idx < numBasesToChange; idx+=step) { + // now change given positions + newBases[idx] = (newBases[idx] == (byte)'A'? (byte)'T':(byte)'A'); + actualChangedBases++; + } + + // compute changed positions + final int[] differingIndices = new int[newBases.length]; + final byte[] differingBases = new byte[newBases.length]; + final int numDiffs = baseKmer.getDifferingPositions(new Kmer(newBases),newBases.length,differingIndices,differingBases); + Assert.assertEquals(numDiffs,actualChangedBases); + for (int k=0; k < numDiffs; k++) { + final int idx = differingIndices[k]; + Assert.assertTrue(newBases[idx] != bases.getBytes()[idx]); + Assert.assertEquals(differingBases[idx],newBases[idx]); + } + } + } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java index 74361de1b..9f6013235 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java @@ -227,7 +227,7 @@ public class LocalAssemblyEngineUnitTest extends BaseTest { activeRegion.addAll(reads); final LocalAssemblyEngine engine = createAssembler(assembler); // logger.warn("Assembling " + activeRegion + " with " + engine); - return engine.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, Collections.emptyList()); + return engine.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, Collections.emptyList(), null); } @DataProvider(name = "SimpleAssemblyTestData") diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReadErrorCorrectorUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReadErrorCorrectorUnitTest.java new file mode 100644 index 000000000..e201b24fc --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReadErrorCorrectorUnitTest.java @@ -0,0 +1,190 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.haplotypecaller; + +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class ReadErrorCorrectorUnitTest { + private static final boolean debug = true; + final String refChunk = "GCATAAACATGGCTCACTGC"; + final String refChunkHard = "AGCCTTGAACTCCTGGGCTCAAGTGATCCTCCTGCCTCAGTTTCCCATGTAGCTGGGACCACAGGTGGGGGCTCCACCCCTGGCTGATTTTTTTTTTTTTTTTTTTTTGAGATAGGGT"; + + @Test + public void TestBasicCorrectionSet() { + + final byte[] trueBases = refChunk.getBytes(); + final int numCorrections = 50; + final ReadErrorCorrector.CorrectionSet correctionSet = new ReadErrorCorrector.CorrectionSet(trueBases.length); + + int offset = 2; + for (int k=0; k < numCorrections; k++) { + // introduce one correction at a random offset in array. To make testing easier, we will replicate corrrection + final byte base = trueBases[offset]; + correctionSet.add(offset, base); + // skip to some other offset + offset += 7; + if (offset >= trueBases.length) + offset -= trueBases.length; + } + + for (int k=0; k < trueBases.length; k++) { + final byte corr = correctionSet.getConsensusCorrection(k); + Assert.assertEquals(corr, trueBases[k]); + } + } + + @Test + public void TestExtendedCorrectionSet() { + + final byte[] trueBases = refChunk.getBytes(); + final int numCorrections = 50; + final ReadErrorCorrector.CorrectionSet correctionSet = new ReadErrorCorrector.CorrectionSet(trueBases.length); + + for (int offset=0; offset < trueBases.length; offset++) { + // insert k corrections at offset k and make sure we get exactly k bases back + for (int k=0; k < offset; k++) + correctionSet.add(offset,trueBases[offset]); + + } + + for (int offset=0; offset < trueBases.length; offset++) { + Assert.assertEquals(correctionSet.get(offset).size(),offset); + } + } + + @Test + public void TestAddReadsToKmers() { + final int NUM_GOOD_READS = 500; + + final String bases = "AAAAAAAAAAAAAAA"; + final int READ_LENGTH = bases.length(); + final int kmerLengthForReadErrorCorrection = READ_LENGTH; + final List finalizedReadList = new ArrayList(NUM_GOOD_READS); + int offset = 0; + final byte[] quals = new byte[READ_LENGTH]; + + Arrays.fill(quals,(byte)30); + + for (int k=0; k < NUM_GOOD_READS; k++) { + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases.getBytes(), quals,READ_LENGTH+"M"); + finalizedReadList.add(read); + } + + ReadErrorCorrector readErrorCorrector = new ReadErrorCorrector(kmerLengthForReadErrorCorrection,(byte)6,10, debug,refChunkHard.getBytes()); + readErrorCorrector.addReadsToKmers(finalizedReadList); + + // special trivial case: kmer length is equal to read length. + // K-mer counter should hold then exactly one kmer + Assert.assertEquals(readErrorCorrector.countsByKMer.getCountedKmers().size(), 1); + for (final KMerCounter.CountedKmer kmer : readErrorCorrector.countsByKMer.getCountedKmers()) { + Assert.assertTrue(Arrays.equals( kmer.getKmer().bases(),bases.getBytes())); + Assert.assertEquals(kmer.getCount(),NUM_GOOD_READS); + } + + // special case 2: kmers are all the same but length < read length. + // Each kmer is added then readLength-kmerLength+1 times + final int KMER_LENGTH = 10; + readErrorCorrector = new ReadErrorCorrector(KMER_LENGTH,(byte)6,10, debug,refChunkHard.getBytes()); + readErrorCorrector.addReadsToKmers(finalizedReadList); + Assert.assertEquals(readErrorCorrector.countsByKMer.getCountedKmers().size(), 1); + for (final KMerCounter.CountedKmer kmer : readErrorCorrector.countsByKMer.getCountedKmers()) { + Assert.assertEquals(kmer.getCount(),NUM_GOOD_READS*(READ_LENGTH-KMER_LENGTH+1)); + } + + } + @Test + public void TestBasicErrorCorrection() { + final int NUM_GOOD_READS = 500; + final int NUM_BAD_READS = 10; + final int READ_LENGTH = 15; + final int kmerLengthForReadErrorCorrection = 10; + final List finalizedReadList = new ArrayList(NUM_GOOD_READS); + int offset = 0; + final byte[] quals = new byte[READ_LENGTH]; + + Arrays.fill(quals,(byte)30); + + for (int k=0; k < NUM_GOOD_READS; k++) { + final byte[] bases = Arrays.copyOfRange(refChunk.getBytes(),offset,offset+READ_LENGTH); + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals,READ_LENGTH+"M"); + finalizedReadList.add(read); + offset++; + if (offset >= refChunk.length()-READ_LENGTH) + offset = 0; + } + offset = 2; + // coverage profile is now perfectly triangular with "good" bases. Inject now bad bases with errors in them. + for (int k=0; k < NUM_BAD_READS; k++) { + final byte[] bases = finalizedReadList.get(k).getReadBases().clone(); + bases[offset] = 'N'; + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, READ_LENGTH + "M"); + finalizedReadList.add(read); + offset += 7; + if (offset >= READ_LENGTH) + offset = 4; // just some randomly circulating offset for error position + } + + // now correct all reads + final ReadErrorCorrector readErrorCorrector = new ReadErrorCorrector(kmerLengthForReadErrorCorrection,(byte)6,10, debug,refChunkHard.getBytes()); + readErrorCorrector.addReadsToKmers(finalizedReadList); + readErrorCorrector.correctReads(finalizedReadList); + + // check that corrected reads have exactly same content as original reads + for (int k=0; k < NUM_BAD_READS; k++) { + final byte[] badBases = finalizedReadList.get(k).getReadBases(); + final byte[] originalBases = finalizedReadList.get(k).getReadBases(); + Assert.assertTrue(Arrays.equals(badBases,originalBases)); + } + } +} From dadcfe296dff64dae226aad1da57da2c512c3870 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 5 Jun 2013 14:26:23 -0400 Subject: [PATCH 083/116] Reworking of the dangling tails merging code. We now run Smith-Waterman on the dangling tail against the corresponding reference tail. If we can generate a reasonable, low entropy alignment then we trigger the merge to the reference path; otherwise we abort. Also, we put in a check for low-complexity of graphs and don't let those pass through. Added tests for this implementation that checks exact SW results and correct edges added. --- .../haplotypecaller/graphs/BaseGraph.java | 18 ++ .../haplotypecaller/graphs/GraphUtils.java | 10 +- .../walkers/haplotypecaller/graphs/Path.java | 3 +- .../readthreading/ReadThreadingAssembler.java | 28 ++- .../readthreading/ReadThreadingGraph.java | 207 ++++++++++++++---- .../graphs/BaseGraphUnitTest.java | 15 ++ .../ReadThreadingAssemblerUnitTest.java | 3 +- .../ReadThreadingGraphUnitTest.java | 76 +++++++ .../sting/utils/sam/AlignmentUtils.java | 17 ++ .../smithwaterman/SWPairwiseAlignment.java | 15 ++ .../utils/sam/AlignmentUtilsUnitTest.java | 7 + 11 files changed, 339 insertions(+), 60 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java index c963fb6e5..70ef539f3 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java @@ -676,6 +676,24 @@ public class BaseGraph extends Default '}'; } + /** + * The base sequence for the given path. + * Note, this assumes that the path does not start with a source node. + * + * @param path the list of vertexes that make up the path + * @return non-null sequence of bases corresponding to the given path + */ + @Ensures({"result != null"}) + public byte[] getBasesForPath(final List path) { + if ( path == null ) throw new IllegalArgumentException("Path cannot be null"); + + final StringBuffer sb = new StringBuffer(); + for ( final DeBruijnVertex v : path ) + sb.append((char)v.getSuffix()); + + return sb.toString().getBytes(); + } + /** * Get the set of vertices within distance edges of source, regardless of edge direction * diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/GraphUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/GraphUtils.java index 4aa6047a9..73a1daa3e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/GraphUtils.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/GraphUtils.java @@ -171,7 +171,15 @@ final public class GraphUtils { return foundDup ? null : new PrimitivePair.Int(longestPos, length); } - private static int longestSuffixMatch(final byte[] seq, final byte[] kmer, final int seqStart) { + /** + * calculates the longest suffix match between a sequence and a smaller kmer + * + * @param seq the (reference) sequence + * @param kmer the smaller kmer sequence + * @param seqStart the index (inclusive) on seq to start looking backwards from + * @return the longest matching suffix + */ + public static int longestSuffixMatch(final byte[] seq, final byte[] kmer, final int seqStart) { for ( int len = 1; len <= kmer.length; len++ ) { final int seqI = seqStart - len + 1; final int kmerI = kmer.length - len; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/Path.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/Path.java index a07b98bb6..2e84e1d22 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/Path.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/Path.java @@ -47,7 +47,6 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; @@ -92,7 +91,7 @@ public class Path { /** * Create a new Path containing no edges and starting at initialVertex * @param initialVertex the starting vertex of the path - * @param graph the graph this path with follow through + * @param graph the graph this path will follow through */ public Path(final T initialVertex, final BaseGraph graph) { if ( initialVertex == null ) throw new IllegalArgumentException("initialVertex cannot be null"); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java index 0887929ab..f4290f2bb 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java @@ -55,7 +55,6 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.File; import java.util.Arrays; -import java.util.Collections; import java.util.LinkedList; import java.util.List; @@ -89,7 +88,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { } public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List kmerSizes) { - this(maxAllowedPathsForReadThreadingAssembler, kmerSizes, false); + this(maxAllowedPathsForReadThreadingAssembler, kmerSizes, true); } /** for testing purposes */ @@ -103,7 +102,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { // first, try using the requested kmer sizes for ( final int kmerSize : kmerSizes ) { - final SeqGraph graph = createGraph(reads, refHaplotype, kmerSize, activeAlleleHaplotypes); + final SeqGraph graph = createGraph(reads, refHaplotype, kmerSize, activeAlleleHaplotypes, dontIncreaseKmerSizesForCycles); if ( graph != null ) graphs.add(graph); } @@ -113,7 +112,8 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { int kmerSize = MathUtils.arrayMaxInt(kmerSizes) + KMER_SIZE_ITERATION_INCREASE; int numIterations = 1; while ( graphs.isEmpty() && numIterations <= MAX_KMER_ITERATIONS_TO_ATTEMPT ) { - final SeqGraph graph = createGraph(reads, refHaplotype, kmerSize, activeAlleleHaplotypes); + // on the last attempt we will allow low complexity graphs + final SeqGraph graph = createGraph(reads, refHaplotype, kmerSize, activeAlleleHaplotypes, numIterations == MAX_KMER_ITERATIONS_TO_ATTEMPT); if ( graph != null ) graphs.add(graph); kmerSize += KMER_SIZE_ITERATION_INCREASE; @@ -131,9 +131,14 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { * @param refHaplotype reference haplotype * @param kmerSize kmer size * @param activeAlleleHaplotypes the GGA haplotypes to inject into the graph - * @return sequence graph or null if one could not be created (e.g. because it contains cycles or too many paths) + * @param allowLowComplexityGraphs if true, do not check for low-complexity graphs + * @return sequence graph or null if one could not be created (e.g. because it contains cycles or too many paths or is low complexity) */ - protected SeqGraph createGraph(final List reads, final Haplotype refHaplotype, final int kmerSize, final List activeAlleleHaplotypes) { + protected SeqGraph createGraph(final List reads, + final Haplotype refHaplotype, + final int kmerSize, + final List activeAlleleHaplotypes, + final boolean allowLowComplexityGraphs) { final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly); // add the reference sequence to the graph @@ -157,7 +162,13 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { // sanity check: make sure there are no cycles in the graph if ( rtgraph.hasCycles() ) { - if ( debug ) logger.info("Not using kmer size of " + rtgraph.getKmerSize() + " in read threading assembler because it contains a cycle"); + if ( debug ) logger.info("Not using kmer size of " + kmerSize + " in read threading assembler because it contains a cycle"); + return null; + } + + // sanity check: make sure the graph had enough complexity with the given kmer + if ( ! allowLowComplexityGraphs && rtgraph.isLowComplexity() ) { + if ( debug ) logger.info("Not using kmer size of " + kmerSize + " in read threading assembler because it does not produce a graph with enough complexity"); return null; } @@ -169,8 +180,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { rtgraph.pruneLowWeightChains(pruneFactor); // look at all chains in the graph that terminate in a non-ref node (dangling sinks) and see if - // we can recover them by merging some N bases from the chain back into the reference uniquely, for - // N < kmerSize + // we can recover them by merging some N bases from the chain back into the reference if ( recoverDanglingTails ) rtgraph.recoverDanglingTails(); // remove all heading and trailing paths diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java index ab6b17c35..8d8cb83f6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java @@ -46,14 +46,19 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.KMerCounter; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.Kmer; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.collections.PrimitivePair; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.smithwaterman.SWPairwiseAlignment; +import org.broadinstitute.sting.utils.smithwaterman.SmithWaterman; import org.jgrapht.EdgeFactory; import org.jgrapht.alg.CycleDetector; @@ -80,9 +85,6 @@ public class ReadThreadingGraph extends BaseGraph their corresponding vertex in the graph */ - private Map uniqueKmers = new LinkedHashMap(); + private Map uniqueKmers = new LinkedHashMap<>(); /** * @@ -113,8 +115,6 @@ public class ReadThreadingGraph extends BaseGraph danglingPath, referencePath; + final byte[] danglingPathString, referencePathString; + final Cigar cigar; + + public DanglingTailMergeResult(final List danglingPath, + final List referencePath, + final byte[] danglingPathString, + final byte[] referencePathString, + final Cigar cigar) { + this.danglingPath = danglingPath; + this.referencePath = referencePath; + this.danglingPathString = danglingPathString; + this.referencePathString = referencePathString; + this.cigar = cigar; + } + } + + /** + * Attempt to attach vertex with out-degree == 0 to the graph + * * @param vertex the vertex to recover + * @return 1 if we successfully recovered the vertex and 0 otherwise */ protected int recoverDanglingChain(final MultiDeBruijnVertex vertex) { if ( outDegreeOf(vertex) != 0 ) throw new IllegalStateException("Attempting to recover a dangling tail for " + vertex + " but it has out-degree > 0"); - final byte[] kmer = vertex.getSequence(); - if ( ! nonUniqueKmers.contains(new Kmer(kmer)) ) { - // don't attempt to fix non-unique kmers! - final MultiDeBruijnVertex uniqueMergePoint = danglingTailMergePoint(kmer); - if ( uniqueMergePoint != null ) { - addEdge(vertex, uniqueMergePoint, new MultiSampleEdge(false, 1)); - return 1; - } - } + // generate the CIGAR string from Smith-Waterman between the dangling tail and reference paths + final DanglingTailMergeResult danglingTailMergeResult = generateCigarAgainstReferencePath(vertex); - return 0; + // if the CIGAR is too complex (or couldn't be computed) then we do not allow the merge into the reference path + if ( danglingTailMergeResult == null || ! cigarIsOkayToMerge(danglingTailMergeResult.cigar) ) + return 0; + + // merge + return mergeDanglingTail(danglingTailMergeResult); } /** - * Find a unique merge point for kmer in the reference sequence - * @param kmer the full kmer of the dangling tail - * @return a vertex appropriate to merge kmer into, or null if none could be found + * Determine whether the provided cigar is okay to merge into the reference path + * + * @param cigar the cigar to analyze + * @return true if it's okay to merge, false otherwise */ - private MultiDeBruijnVertex danglingTailMergePoint(final byte[] kmer) { - final PrimitivePair.Int endAndLength = GraphUtils.findLongestUniqueSuffixMatch(refSeq, kmer); - if ( endAndLength != null && endAndLength.second >= MIN_MATCH_LENGTH_TO_RECOVER_DANGLING_TAIL && endAndLength.first + 1 < refKmers.length) { - final int len = endAndLength.second; - final MultiDeBruijnVertex mergePoint = refKmers[endAndLength.first + 1]; -// logger.info("recoverDanglingChain of kmer " + new String(kmer) + " merged to " + mergePoint + " with match size " + len); - final Set nonUniquesAtLength = determineKmerSizeAndNonUniques(len, len).nonUniques; - final Kmer matchedKmer = new Kmer(kmer, kmer.length - len, len); - if ( nonUniquesAtLength.contains(matchedKmer) ) { -// logger.info("Rejecting merge " + new String(kmer) + " because match kmer " + matchedKmer + " isn't unique across all reads"); - return null; - } else { - return mergePoint; - } + protected boolean cigarIsOkayToMerge(final Cigar cigar) { + + final List elements = cigar.getCigarElements(); + + // don't allow more than a couple of different ops + if ( elements.size() > 3 ) + return false; + + // the last element must be an M + if ( elements.get(elements.size() - 1).getOperator() != CigarOperator.M ) + return false; + + // TODO -- do we want to check whether the Ms mismatch too much also? + + return true; + } + + /** + * Actually merge the dangling tail if possible + * + * @param danglingTailMergeResult the result from generating a Cigar for the dangling tail against the reference + * @return 1 if merge was successful, 0 otherwise + */ + protected int mergeDanglingTail(final DanglingTailMergeResult danglingTailMergeResult) { + + final List elements = danglingTailMergeResult.cigar.getCigarElements(); + final CigarElement lastElement = elements.get(elements.size() - 1); + if ( lastElement.getOperator() != CigarOperator.M ) + throw new IllegalArgumentException("The last Cigar element must be an M"); + + final int lastRefIndex = danglingTailMergeResult.cigar.getReferenceLength() - 1; + final int matchingSuffix = Math.min(GraphUtils.longestSuffixMatch(danglingTailMergeResult.referencePathString, danglingTailMergeResult.danglingPathString, lastRefIndex), lastElement.getLength()); + if ( matchingSuffix == 0 ) + return 0; + + final int altIndexToMerge = Math.max(danglingTailMergeResult.cigar.getReadLength() - matchingSuffix - 1, 0); + final int refIndexToMerge = lastRefIndex - matchingSuffix + 1; + addEdge(danglingTailMergeResult.danglingPath.get(altIndexToMerge), danglingTailMergeResult.referencePath.get(refIndexToMerge), new MultiSampleEdge(false, 1)); + return 1; + } + + /** + * Generates the CIGAR string from the Smith-Waterman alignment of the dangling path (where the + * provided vertex is the sink) and the reference path. + * + * @param vertex the sink of the dangling tail + * @return a SmithWaterman object which can be null if no proper alignment could be generated + */ + protected DanglingTailMergeResult generateCigarAgainstReferencePath(final MultiDeBruijnVertex vertex) { + + // find the lowest common ancestor path between vertex and the reference sink if available + final List altPath = findPathToLowestCommonAncestorOfReference(vertex); + if ( altPath == null ) + return null; + + // now get the reference path from the LCA + final List refPath = getReferencePath(altPath.get(0)); + + // create the Smith-Waterman strings to use + final byte[] refBases = getBasesForPath(refPath); + final byte[] altBases = getBasesForPath(altPath); + + // run Smith-Waterman to determine the best alignment (and remove trailing deletions since they aren't interesting) + final SmithWaterman alignment = new SWPairwiseAlignment(refBases, altBases, SWPairwiseAlignment.OVERHANG_STRATEGY.INDEL); + return new DanglingTailMergeResult(altPath, refPath, altBases, refBases, AlignmentUtils.removeTrailingDeletions(alignment.getCigar())); + } + + /** + * Finds the path upwards in the graph from this vertex to the reference sequence, including the lowest common ancestor vertex + * + * @param vertex the original vertex + * @return the path if it can be determined or null if this vertex either doesn't merge onto the reference path or + * has an ancestor with multiple incoming edges before hitting the reference path + */ + protected List findPathToLowestCommonAncestorOfReference(final MultiDeBruijnVertex vertex) { + final LinkedList path = new LinkedList<>(); + + MultiDeBruijnVertex v = vertex; + while ( ! isReferenceNode(v) && inDegreeOf(v) == 1 ) { + path.addFirst(v); + v = getEdgeSource(incomingEdgeOf(v)); + } + path.addFirst(v); + + return isReferenceNode(v) ? path : null; + } + + /** + * Finds the path downwards in the graph from this vertex to the reference sink, including this vertex + * + * @param start the reference vertex to start from + * @return the path (non-null, non-empty) + */ + protected List getReferencePath(final MultiDeBruijnVertex start) { + if ( ! isReferenceNode(start) ) throw new IllegalArgumentException("Cannot construct the reference path from a vertex that is not on that path"); + + final List path = new ArrayList<>(); + + MultiDeBruijnVertex v = start; + while ( v != null ) { + path.add(v); + v = getNextReferenceVertex(v); } - return null; + return path; } /** @@ -330,6 +432,16 @@ public class ReadThreadingGraph extends BaseGraph(this).detectCycles(); } + /** + * Does the graph not have enough complexity? We define low complexity as a situation where the number + * of non-unique kmers is more than 20% of the total number of kmers. + * + * @return true if the graph has low complexity, false otherwise + */ + public boolean isLowComplexity() { + return nonUniqueKmers.size() * 4 > uniqueKmers.size(); + } + public void recoverDanglingTails() { if ( ! alreadyBuilt ) throw new IllegalStateException("recoverDanglingTails requires the graph be already built"); @@ -341,7 +453,8 @@ public class ReadThreadingGraph extends BaseGraph vertexes = new ArrayList<>(); + for ( int i = 0; i <= testString.length() - kmerSize; i++ ) { + vertexes.add(new DeBruijnVertex(testString.substring(i, i + kmerSize))); + } + + final String result = new String(new DeBruijnGraph().getBasesForPath(vertexes)); + Assert.assertEquals(result, testString.substring(kmerSize - 1)); + } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java index 3f10fc72c..8269b9c20 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java @@ -83,7 +83,8 @@ public class ReadThreadingAssemblerUnitTest extends BaseTest { } public SeqGraph assemble() { - assembler.removePathsNotConnectedToRef = false; // need to pass some of the tests + assembler.removePathsNotConnectedToRef = false; // needed to pass some of the tests + assembler.setRecoverDanglingTails(false); // needed to pass some of the tests assembler.setDebugGraphTransformations(true); final SeqGraph graph = assembler.assemble(reads, refHaplotype, Collections.emptyList()).get(0); if ( DEBUG ) graph.printGraph(new File("test.dot"), 0); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java index 67ee52734..ed91cccb3 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java @@ -53,6 +53,7 @@ import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; @@ -201,6 +202,81 @@ public class ReadThreadingGraphUnitTest extends BaseTest { Assert.assertEquals(pathFinder.getKBestPaths(graph, length, graph.getReferenceSourceVertex(), graph.getReferenceSinkVertex()).size(), 1); } + @DataProvider(name = "DanglingTails") + public Object[][] makeDanglingTailsData() { + List tests = new ArrayList(); + + // add 1M to the expected CIGAR because it includes the previous (common) base too + tests.add(new Object[]{"AAAAAAAAAA", "CAAA", "5M", true, 3}); // incomplete haplotype + tests.add(new Object[]{"AAAAAAAAAA", "CAAAAAAAAAA", "1M1I10M", true, 10}); // insertion + tests.add(new Object[]{"CCAAAAAAAAAA", "AAAAAAAAAA", "1M2D10M", true, 10}); // deletion + tests.add(new Object[]{"AAAAAAAA", "CAAAAAAA", "9M", true, 7}); // 1 snp + tests.add(new Object[]{"AAAAAAAA", "CAAGATAA", "9M", true, 2}); // several snps + tests.add(new Object[]{"AAAAA", "C", "1M4D1M", true, -1}); // funky SW alignment + tests.add(new Object[]{"AAAAA", "CA", "1M3D2M", true, 1}); // very little data + tests.add(new Object[]{"AAAAAAA", "CAAAAAC", "8M", true, -1}); // ends in mismatch + tests.add(new Object[]{"AAAAAA", "CGAAAACGAA", "1M2I4M2I2M", false, 0}); // alignment is too complex + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "DanglingTails", enabled = !DEBUG) + public void testDanglingTails(final String refEnd, + final String altEnd, + final String cigar, + final boolean cigarIsGood, + final int mergePointDistanceFromSink) { + + final int kmerSize = 15; + + // construct the haplotypes + final String commonPrefix = "AAAAAAAAAACCCCCCCCCCGGGGGGGGGGTTTTTTTTTT"; + final String ref = commonPrefix + refEnd; + final String alt = commonPrefix + altEnd; + + // create the graph and populate it + final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize); + rtgraph.addSequence("ref", ref.getBytes(), null, true); + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(alt.getBytes(), Utils.dupBytes((byte) 30, alt.length()), alt.length() + "M"); + rtgraph.addRead(read); + rtgraph.buildGraphIfNecessary(); + + // confirm that we have just a single dangling tail + MultiDeBruijnVertex altSink = null; + for ( final MultiDeBruijnVertex v : rtgraph.vertexSet() ) { + if ( rtgraph.isSink(v) && !rtgraph.isReferenceNode(v) ) { + Assert.assertTrue(altSink == null, "We found more than one non-reference sink"); + altSink = v; + } + } + + Assert.assertTrue(altSink != null, "We did not find a non-reference sink"); + + // confirm that the SW alignment agrees with our expectations + final ReadThreadingGraph.DanglingTailMergeResult result = rtgraph.generateCigarAgainstReferencePath(altSink); + Assert.assertTrue(cigar.equals(result.cigar.toString()), "SW generated cigar = " + result.cigar.toString()); + + // confirm that the goodness of the cigar agrees with our expectations + Assert.assertEquals(rtgraph.cigarIsOkayToMerge(result.cigar), cigarIsGood); + + // confirm that the tail merging works as expected + if ( cigarIsGood ) { + final int mergeResult = rtgraph.mergeDanglingTail(result); + Assert.assertTrue(mergeResult == 1 || mergePointDistanceFromSink == -1); + + // confirm that we created the appropriate edge + if ( mergePointDistanceFromSink >= 0 ) { + MultiDeBruijnVertex v = altSink; + for ( int i = 0; i < mergePointDistanceFromSink; i++ ) { + if ( rtgraph.inDegreeOf(v) != 1 ) + Assert.fail("Encountered vertex with multiple sources"); + v = rtgraph.getEdgeSource(rtgraph.incomingEdgeOf(v)); + } + Assert.assertTrue(rtgraph.outDegreeOf(v) > 1); + } + } + } + // TODO -- update to use determineKmerSizeAndNonUniques directly // @DataProvider(name = "KmerSizeData") diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index fa35e3f53..762ce4858 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -800,6 +800,23 @@ public final class AlignmentUtils { return new Cigar(elements); } + /** + * Removing a trailing deletion from the incoming cigar if present + * + * @param c the cigar we want to update + * @return a non-null Cigar + */ + @Requires("c != null") + @Ensures("result != null") + public static Cigar removeTrailingDeletions(final Cigar c) { + + final List elements = c.getCigarElements(); + if ( elements.get(elements.size() - 1).getOperator() != CigarOperator.D ) + return c; + + return new Cigar(elements.subList(0, elements.size() - 1)); + } + /** * Move the indel in a given cigar string one base to the left * diff --git a/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SWPairwiseAlignment.java b/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SWPairwiseAlignment.java index 84c33d4a5..1abf9f836 100644 --- a/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SWPairwiseAlignment.java +++ b/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SWPairwiseAlignment.java @@ -118,6 +118,21 @@ public class SWPairwiseAlignment implements SmithWaterman { align(seq1,seq2); } + /** + * Create a new SW pairwise aligner + * + * After creating the object the two sequences are aligned with an internal call to align(seq1, seq2) + * + * @param seq1 the first sequence we want to align + * @param seq2 the second sequence we want to align + * @param strategy the overhang strategy to use + */ + public SWPairwiseAlignment(final byte[] seq1, final byte[] seq2, final OVERHANG_STRATEGY strategy) { + this(SWParameterSet.ORIGINAL_DEFAULT.parameters); + overhang_strategy = strategy; + align(seq1, seq2); + } + /** * Create a new SW pairwise aligner, without actually doing any alignment yet * diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java index e7d54c460..fbf0242a3 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java @@ -1033,5 +1033,12 @@ public class AlignmentUtilsUnitTest { Assert.assertEquals(AlignmentUtils.startsOrEndsWithInsertionOrDeletion(TextCigarCodec.getSingleton().decode(cigar)), expected); } + @Test(dataProvider = "StartsOrEndsWithInsertionOrDeletionData", enabled = true) + public void testRemoveTrailingDeletions(final String cigar, final boolean expected) { + final Cigar originalCigar = TextCigarCodec.getSingleton().decode(cigar); + final Cigar newCigar = AlignmentUtils.removeTrailingDeletions(originalCigar); + + Assert.assertEquals(originalCigar.equals(newCigar), !cigar.endsWith("D")); + } } From 2c3c680eb704348cb8e20572f5b9d2fb3a5a986c Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 5 Jun 2013 12:22:14 -0400 Subject: [PATCH 084/116] Misc changes and cleanup from all previous commits in this push. 1. By default, do not include the UG CEU callset for assessment. 2. Updated md5s that are different now with all the HC changes. --- ...lexAndSymbolicVariantsIntegrationTest.java | 6 +++--- .../HaplotypeCallerIntegrationTest.java | 20 +++++++++---------- ...aplotypeCallerParallelIntegrationTest.java | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index fba294c3d..073d54ec5 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "8d7728909b1b8eb3f30f2f1583f054a8"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "d21f15a5809fe5259af41ae6774af6f1"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "db71826dc798ff1cdf0c5d05b0ede976"); + "d4a0797c2fd4c103bf9a137633376156"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "42831d5463552911b7da9de0b4a27289"); + "a9872228d0275a30f5a1f7e070a9c9f4"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 904f15728..dbdd0afcd 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -78,12 +78,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "1b15e4647013ab2c3ce7073c420d8640"); + HCTest(CEUTRIO_BAM, "", "e9167a1bfc0fc276586788d1ce1be408"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "423be27dc2cf7fd10baf465cf93e18e2"); + HCTest(NA12878_BAM, "", "b1d46afb9659ac3b92a3d131b58924ef"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -94,7 +94,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "a28e6f14e28708283d61c1e423bbdcb1"); + "d83856b8136776bd731a8037c16b71fa"); } @Test @@ -110,7 +110,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "8344d86751b707c53b296c297eba4bfa"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "70c4476816f5d35c9978c378dbeac09b"); } private void HCTestNearbySmallIntervals(String bam, String args, String md5) { @@ -147,7 +147,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerNearbySmallIntervals() { - HCTestNearbySmallIntervals(NA12878_BAM, "", "dea98f257d39fa1447a12c36a6bbf4a3"); + HCTestNearbySmallIntervals(NA12878_BAM, "", "947aae309ecab7cd3f17ff9810884924"); } // This problem bam came from a user on the forum and it spotted a problem where the ReadClipper @@ -157,14 +157,14 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("7cd1c5e2642ae8ddf38932aba1f51d69")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("0689d2c202849fd05617648eaf429b9a")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ee55ff4c6ec1bbef88e21cc0f45d4c47")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("91717e5e271742c2c9b67223e58f1320")); executeTest("HCTestStructuralIndels: ", spec); } @@ -186,7 +186,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("4886a98bf699f4e7f4491160749ada6a")); + Arrays.asList("0124c4923d96ec0f8222b596dd4ef534")); executeTest("HC calling on a ReducedRead BAM", spec); } @@ -194,7 +194,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("86bdd07a3ac4f6ce239c30efea8bf5ba")); + Arrays.asList("0e020dcfdf249225714f5cd86ed3869f")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } @@ -208,7 +208,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("7b23a288a31cafca3946f14f2381e7cb")); + Arrays.asList("446a786bb539f3ec2084dd75167568aa")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java index ff5a501cc..62e685eab 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java @@ -61,7 +61,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest { List tests = new ArrayList(); for ( final int nct : Arrays.asList(1, 2, 4) ) { - tests.add(new Object[]{nct, "c277fd65365d59b734260dd8423313bb"}); + tests.add(new Object[]{nct, "ef42a438b82681d1c0f921c57e16ff12"}); } return tests.toArray(new Object[][]{}); From 95b5f99feb89e06ac0e09ae490552b8de926007b Mon Sep 17 00:00:00 2001 From: David Roazen Date: Wed, 5 Jun 2013 15:55:43 -0400 Subject: [PATCH 085/116] Exclude reduced reads from elimination during downsampling Problem: -Downsamplers were treating reduced reads the same as normal reads, with occasionally catastrophic results on variant calling when an entire reduced read happened to get eliminated. Solution: -Since reduced reads lack the information we need to do position-based downsampling on them, best available option for now is to simply exempt all reduced reads from elimination during downsampling. Details: -Add generic capability of exempting items from elimination to the Downsampler interface via new doNotDiscardItem() method. Default inherited version of this method exempts all reduced reads (or objects encapsulating reduced reads) from elimination. -Switch from interfaces to abstract classes to facilitate this change, and do some minor refactoring of the Downsampler interface (push implementation of some methods into the abstract classes, improve names of the confusing clear() and reset() methods). -Rewrite TAROrderedReadCache. This class was incorrectly relying on the ReservoirDownsampler to preserve the relative ordering of items in some circumstances, which was behavior not guaranteed by the API and only happened to work due to implementation details which no longer apply. Restructured this class around the assumption that the ReservoirDownsampler will not preserve relative ordering at all. -Add disclaimer to description of -dcov argument explaining that coverage targets are approximate goals that will not always be precisely met. -Unit tests for all individual downsamplers to verify that reduced reads are exempted from elimination --- .../arguments/GATKArgumentCollection.java | 9 +- .../sting/gatk/downsampling/Downsampler.java | 69 +++++++++--- .../downsampling/FractionalDownsampler.java | 47 ++++---- .../downsampling/LevelingDownsampler.java | 66 ++++++------ .../downsampling/PassThroughDownsampler.java | 35 +++--- .../gatk/downsampling/ReadsDownsampler.java | 6 +- .../downsampling/ReservoirDownsampler.java | 102 +++++++++++------- .../SimplePositionalDownsampler.java | 60 +++++------ .../gatk/traversals/TAROrderedReadCache.java | 92 +++++++++++----- .../locusiterator/AlignmentStateMachine.java | 9 ++ .../PerSampleReadStateManager.java | 2 +- .../locusiterator/SamplePartitioner.java | 4 +- .../sting/utils/sam/ArtificialSAMUtils.java | 25 +++++ .../FractionalDownsamplerUnitTest.java | 35 +++++- .../LevelingDownsamplerUnitTest.java | 48 ++++++++- .../ReservoirDownsamplerUnitTest.java | 45 +++++++- .../SimplePositionalDownsamplerUnitTest.java | 46 +++++++- .../TAROrderedReadCacheUnitTest.java | 50 ++++++++- 18 files changed, 545 insertions(+), 205 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 8d1fa4638..dc3d67283 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -125,7 +125,14 @@ public class GATKArgumentCollection { @Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false) public Double downsampleFraction = null; - @Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus. For non-locus-based traversals (eg., ReadWalkers), this sets the maximum number of reads at each alignment start position.", required = false) + @Argument(fullName = "downsample_to_coverage", shortName = "dcov", + doc = "Coverage [integer] to downsample to. For locus-based traversals (eg., LocusWalkers and ActiveRegionWalkers)," + + "this controls the maximum depth of coverage at each locus. For non-locus-based traversals (eg., ReadWalkers), " + + "this controls the maximum number of reads sharing the same alignment start position. Note that the " + + "coverage target is an approximate goal that is not guaranteed to be met exactly: the GATK's approach " + + "to downsampling is based on even representation of reads from all alignment start positions, and the " + + "downsampling algorithm will under some circumstances retain slightly more coverage than requested.", + required = false) public Integer downsampleCoverage = null; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/Downsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/Downsampler.java index 23b16cff2..466ade1ed 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/Downsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/Downsampler.java @@ -25,19 +25,27 @@ package org.broadinstitute.sting.gatk.downsampling; +import org.broadinstitute.sting.utils.locusiterator.AlignmentStateMachine; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + import java.util.Collection; import java.util.List; /** * The basic downsampler API, with no reads-specific operations. * - * Downsamplers that extend this interface rather than the ReadsDownsampler interface can handle + * Downsamplers that extend this class rather than the ReadsDownsampler class can handle * any kind of item, however they cannot be wrapped within a DownsamplingReadsIterator or a * PerSampleDownsamplingReadsIterator. * * @author David Roazen */ -public interface Downsampler { +public abstract class Downsampler { + + /** + * Number of items discarded by this downsampler since the last call to resetStats() + */ + protected int numDiscardedItems = 0; /** * Submit one item to the downsampler for consideration. Some downsamplers will be able to determine @@ -46,7 +54,7 @@ public interface Downsampler { * * @param item the individual item to submit to the downsampler for consideration */ - public void submit( T item ); + public abstract void submit( final T item ); /** * Submit a collection of items to the downsampler for consideration. Should be equivalent to calling @@ -54,21 +62,29 @@ public interface Downsampler { * * @param items the collection of items to submit to the downsampler for consideration */ - public void submit( Collection items ); + public void submit( final Collection items ) { + if ( items == null ) { + throw new IllegalArgumentException("submitted items must not be null"); + } + + for ( final T item : items ) { + submit(item); + } + } /** * Are there items that have survived the downsampling process waiting to be retrieved? * * @return true if this downsampler has > 0 finalized items, otherwise false */ - public boolean hasFinalizedItems(); + public abstract boolean hasFinalizedItems(); /** * Return (and *remove*) all items that have survived downsampling and are waiting to be retrieved. * * @return a list of all finalized items this downsampler contains, or an empty list if there are none */ - public List consumeFinalizedItems(); + public abstract List consumeFinalizedItems(); /** * Are there items stored in this downsampler that it doesn't yet know whether they will @@ -76,7 +92,7 @@ public interface Downsampler { * * @return true if this downsampler has > 0 pending items, otherwise false */ - public boolean hasPendingItems(); + public abstract boolean hasPendingItems(); /** * Peek at the first finalized item stored in this downsampler (or null if there are no finalized items) @@ -84,7 +100,7 @@ public interface Downsampler { * @return the first finalized item in this downsampler (the item is not removed from the downsampler by this call), * or null if there are none */ - public T peekFinalized(); + public abstract T peekFinalized(); /** * Peek at the first pending item stored in this downsampler (or null if there are no pending items) @@ -92,7 +108,7 @@ public interface Downsampler { * @return the first pending item stored in this downsampler (the item is not removed from the downsampler by this call), * or null if there are none */ - public T peekPending(); + public abstract T peekPending(); /** * Get the current number of items in this downsampler @@ -103,7 +119,7 @@ public interface Downsampler { * * @return a positive integer */ - public int size(); + public abstract int size(); /** * Returns the number of items discarded (so far) during the downsampling process @@ -111,21 +127,46 @@ public interface Downsampler { * @return the number of items that have been submitted to this downsampler and discarded in the process of * downsampling */ - public int getNumberOfDiscardedItems(); + public int getNumberOfDiscardedItems() { + return numDiscardedItems; + } /** * Used to tell the downsampler that no more items will be submitted to it, and that it should * finalize any pending items. */ - public void signalEndOfInput(); + public abstract void signalEndOfInput(); /** * Empty the downsampler of all finalized/pending items */ - public void clear(); + public abstract void clearItems(); /** * Reset stats in the downsampler such as the number of discarded items *without* clearing the downsampler of items */ - public void reset(); + public void resetStats() { + numDiscardedItems = 0; + } + + /** + * Indicates whether an item should be excluded from elimination during downsampling. By default, + * all items representing reduced reads are excluded from downsampling, but individual downsamplers + * may override if they are able to handle reduced reads correctly. Downsamplers should check + * the return value of this method before discarding an item. + * + * @param item The item to test + * @return true if the item should not be subject to elimination during downsampling, otherwise false + */ + protected boolean doNotDiscardItem( final Object item ) { + // Use getClass() rather than instanceof for performance reasons. Ugly but fast. + if ( item.getClass() == GATKSAMRecord.class ) { + return ((GATKSAMRecord)item).isReducedRead(); + } + else if ( item.getClass() == AlignmentStateMachine.class ) { + return ((AlignmentStateMachine)item).isReducedRead(); + } + + return false; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsampler.java index 1cede9c33..c40f8019e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsampler.java @@ -30,7 +30,6 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; -import java.util.Collection; import java.util.List; /** @@ -41,13 +40,11 @@ import java.util.List; * * @author David Roazen */ -public class FractionalDownsampler implements ReadsDownsampler { +public class FractionalDownsampler extends ReadsDownsampler { private ArrayList selectedReads; - private int cutoffForInclusion; - - private int numDiscardedItems; + private final int cutoffForInclusion; private static final int RANDOM_POOL_SIZE = 10000; @@ -57,18 +54,19 @@ public class FractionalDownsampler implements ReadsDownsamp * @param fraction Fraction of reads to preserve, between 0.0 (inclusive) and 1.0 (inclusive). * Actual number of reads preserved may differ randomly. */ - public FractionalDownsampler( double fraction ) { + public FractionalDownsampler( final double fraction ) { if ( fraction < 0.0 || fraction > 1.0 ) { throw new ReviewedStingException("Fraction of reads to include must be between 0.0 and 1.0, inclusive"); } cutoffForInclusion = (int)(fraction * RANDOM_POOL_SIZE); - clear(); - reset(); + clearItems(); + resetStats(); } - public void submit( T newRead ) { - if ( GenomeAnalysisEngine.getRandomGenerator().nextInt(10000) < cutoffForInclusion ) { + @Override + public void submit( final T newRead ) { + if ( GenomeAnalysisEngine.getRandomGenerator().nextInt(10000) < cutoffForInclusion || doNotDiscardItem(newRead) ) { selectedReads.add(newRead); } else { @@ -76,61 +74,56 @@ public class FractionalDownsampler implements ReadsDownsamp } } - public void submit( Collection newReads ) { - for ( T read : newReads ) { - submit(read); - } - } - + @Override public boolean hasFinalizedItems() { return selectedReads.size() > 0; } + @Override public List consumeFinalizedItems() { // pass by reference rather than make a copy, for speed List downsampledItems = selectedReads; - clear(); + clearItems(); return downsampledItems; } + @Override public boolean hasPendingItems() { return false; } + @Override public T peekFinalized() { return selectedReads.isEmpty() ? null : selectedReads.get(0); } + @Override public T peekPending() { return null; } - public int getNumberOfDiscardedItems() { - return numDiscardedItems; - } - @Override public int size() { return selectedReads.size(); } + @Override public void signalEndOfInput() { // NO-OP } - public void clear() { + @Override + public void clearItems() { selectedReads = new ArrayList(); } - public void reset() { - numDiscardedItems = 0; - } - + @Override public boolean requiresCoordinateSortOrder() { return false; } - public void signalNoMoreReadsBefore( T read ) { + @Override + public void signalNoMoreReadsBefore( final T read ) { // NO-OP } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java index 4ff729537..3ce4d09d6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java @@ -46,16 +46,15 @@ import java.util.*; * * @author David Roazen */ -public class LevelingDownsampler, E> implements Downsampler { +public class LevelingDownsampler, E> extends Downsampler { private final int minElementsPerStack; + private final int targetSize; private List groups; private boolean groupsAreFinalized; - private int numDiscardedItems; - /** * Construct a LevelingDownsampler * @@ -65,7 +64,7 @@ public class LevelingDownsampler, E> implements Downsampler * this value -- if it does, items are removed from Lists evenly until the total size * is <= this value */ - public LevelingDownsampler( int targetSize ) { + public LevelingDownsampler( final int targetSize ) { this(targetSize, 1); } @@ -79,55 +78,58 @@ public class LevelingDownsampler, E> implements Downsampler * if a stack has only 3 elements and minElementsPerStack is 3, no matter what * we'll not reduce this stack below 3. */ - public LevelingDownsampler(final int targetSize, final int minElementsPerStack) { + public LevelingDownsampler( final int targetSize, final int minElementsPerStack ) { if ( targetSize < 0 ) throw new IllegalArgumentException("targetSize must be >= 0 but got " + targetSize); if ( minElementsPerStack < 0 ) throw new IllegalArgumentException("minElementsPerStack must be >= 0 but got " + minElementsPerStack); this.targetSize = targetSize; this.minElementsPerStack = minElementsPerStack; - clear(); - reset(); + clearItems(); + resetStats(); } - public void submit( T item ) { + @Override + public void submit( final T item ) { groups.add(item); } - public void submit( Collection items ){ + @Override + public void submit( final Collection items ){ groups.addAll(items); } + @Override public boolean hasFinalizedItems() { return groupsAreFinalized && groups.size() > 0; } + @Override public List consumeFinalizedItems() { if ( ! hasFinalizedItems() ) { return new ArrayList(); } // pass by reference rather than make a copy, for speed - List toReturn = groups; - clear(); + final List toReturn = groups; + clearItems(); return toReturn; } + @Override public boolean hasPendingItems() { return ! groupsAreFinalized && groups.size() > 0; } + @Override public T peekFinalized() { return hasFinalizedItems() ? groups.get(0) : null; } + @Override public T peekPending() { return hasPendingItems() ? groups.get(0) : null; } - public int getNumberOfDiscardedItems() { - return numDiscardedItems; - } - @Override public int size() { int s = 0; @@ -137,26 +139,24 @@ public class LevelingDownsampler, E> implements Downsampler return s; } + @Override public void signalEndOfInput() { levelGroups(); groupsAreFinalized = true; } - public void clear() { + @Override + public void clearItems() { groups = new ArrayList(); groupsAreFinalized = false; } - public void reset() { - numDiscardedItems = 0; - } - private void levelGroups() { + final int[] groupSizes = new int[groups.size()]; int totalSize = 0; - int[] groupSizes = new int[groups.size()]; int currentGroupIndex = 0; - for ( T group : groups ) { + for ( final T group : groups ) { groupSizes[currentGroupIndex] = group.size(); totalSize += groupSizes[currentGroupIndex]; currentGroupIndex++; @@ -191,20 +191,18 @@ public class LevelingDownsampler, E> implements Downsampler // Now we actually go through and reduce each group to its new count as specified in groupSizes currentGroupIndex = 0; - for ( T group : groups ) { + for ( final T group : groups ) { downsampleOneGroup(group, groupSizes[currentGroupIndex]); currentGroupIndex++; } } - private void downsampleOneGroup( T group, int numItemsToKeep ) { + private void downsampleOneGroup( final T group, final int numItemsToKeep ) { if ( numItemsToKeep >= group.size() ) { return; } - numDiscardedItems += group.size() - numItemsToKeep; - - BitSet itemsToKeep = new BitSet(group.size()); + final BitSet itemsToKeep = new BitSet(group.size()); for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(group.size(), numItemsToKeep) ) { itemsToKeep.set(selectedIndex); } @@ -213,12 +211,13 @@ public class LevelingDownsampler, E> implements Downsampler // If our group is a linked list, we can remove the desired items in a single O(n) pass with an iterator if ( group instanceof LinkedList ) { - Iterator iter = group.iterator(); + final Iterator iter = group.iterator(); while ( iter.hasNext() ) { - iter.next(); + final E item = iter.next(); - if ( ! itemsToKeep.get(currentIndex) ) { + if ( ! itemsToKeep.get(currentIndex) && ! doNotDiscardItem(item) ) { iter.remove(); + numDiscardedItems++; } currentIndex++; @@ -227,14 +226,15 @@ public class LevelingDownsampler, E> implements Downsampler // If it's not a linked list, it's more efficient to copy the desired items into a new list and back rather // than suffer O(n^2) of item shifting else { - List keptItems = new ArrayList(numItemsToKeep); + final List keptItems = new ArrayList(group.size()); - for ( E item : group ) { - if ( itemsToKeep.get(currentIndex) ) { + for ( final E item : group ) { + if ( itemsToKeep.get(currentIndex) || doNotDiscardItem(item) ) { keptItems.add(item); } currentIndex++; } + numDiscardedItems += group.size() - keptItems.size(); group.clear(); group.addAll(keptItems); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/PassThroughDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/PassThroughDownsampler.java index 3aaed6c73..1eabf5038 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/PassThroughDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/PassThroughDownsampler.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.downsampling; import net.sf.samtools.SAMRecord; -import java.util.Collection; import java.util.LinkedList; import java.util.List; @@ -39,25 +38,21 @@ import java.util.List; * * @author David Roazen */ -public class PassThroughDownsampler implements ReadsDownsampler { +public class PassThroughDownsampler extends ReadsDownsampler { private LinkedList selectedReads; public PassThroughDownsampler() { - clear(); + clearItems(); } + @Override public void submit( T newRead ) { // All reads pass-through, no reads get downsampled selectedReads.add(newRead); } - public void submit( Collection newReads ) { - for ( T read : newReads ) { - submit(read); - } - } - + @Override public boolean hasFinalizedItems() { return ! selectedReads.isEmpty(); } @@ -66,50 +61,50 @@ public class PassThroughDownsampler implements ReadsDownsam * Note that this list is a linked list and so doesn't support fast random access * @return */ + @Override public List consumeFinalizedItems() { // pass by reference rather than make a copy, for speed - List downsampledItems = selectedReads; - clear(); + final List downsampledItems = selectedReads; + clearItems(); return downsampledItems; } + @Override public boolean hasPendingItems() { return false; } + @Override public T peekFinalized() { return selectedReads.isEmpty() ? null : selectedReads.getFirst(); } + @Override public T peekPending() { return null; } - public int getNumberOfDiscardedItems() { - return 0; - } - @Override public int size() { return selectedReads.size(); } + @Override public void signalEndOfInput() { // NO-OP } - public void clear() { + @Override + public void clearItems() { selectedReads = new LinkedList(); } - public void reset() { - // NO-OP - } - + @Override public boolean requiresCoordinateSortOrder() { return false; } + @Override public void signalNoMoreReadsBefore( T read ) { // NO-OP } diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReadsDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReadsDownsampler.java index a878d7553..a8df014e5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReadsDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReadsDownsampler.java @@ -32,14 +32,14 @@ import net.sf.samtools.SAMRecord; * * @author David Roazen */ -public interface ReadsDownsampler extends Downsampler { +public abstract class ReadsDownsampler extends Downsampler { /** * Does this downsampler require that reads be fed to it in coordinate order? * * @return true if reads must be submitted to this downsampler in coordinate order, otherwise false */ - public boolean requiresCoordinateSortOrder(); + public abstract boolean requiresCoordinateSortOrder(); /** * Tell this downsampler that no more reads located before the provided read (according to @@ -52,5 +52,5 @@ public interface ReadsDownsampler extends Downsampler { * @param read the downsampler will assume that no reads located before this read will ever * be submitted to it in the future */ - public void signalNoMoreReadsBefore( T read ); + public abstract void signalNoMoreReadsBefore( final T read ); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsampler.java index 0e6bbfcb6..ff085d17b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsampler.java @@ -39,7 +39,12 @@ import java.util.*; * * @author David Roazen */ -public class ReservoirDownsampler implements ReadsDownsampler { +public class ReservoirDownsampler extends ReadsDownsampler { + + /** + * size of our reservoir -- ie., the maximum number of reads from the stream that will be retained + * (not including any undiscardable items) + */ private final int targetSampleSize; /** @@ -58,17 +63,33 @@ public class ReservoirDownsampler implements ReadsDownsampl */ private List reservoir; + /** + * Certain items (eg., reduced reads) cannot be discarded at all during downsampling. We store + * these items separately so as not to impact the fair selection of items for inclusion in the + * reservoir. These items are returned (and cleared) along with any items in the reservoir in + * calls to consumeFinalizedItems(). + */ + private List undiscardableItems; + + /** + * Are we currently using a linked list for the reservoir? + */ private boolean isLinkedList; - private int totalReadsSeen; + /** + * Count of the number of reads seen that were actually eligible for discarding. Used by the reservoir downsampling + * algorithm to ensure that all discardable reads have an equal chance of making it into the reservoir. + */ + private int totalDiscardableReadsSeen; - private int numDiscardedItems; /** * Construct a ReservoirDownsampler * * @param targetSampleSize Size of the reservoir used by this downsampler. Number of items retained - * after downsampling will be min(totalReads, targetSampleSize) + * after downsampling will be min(totalDiscardableReads, targetSampleSize) + any + * undiscardable reads (eg., reduced reads). + * * @param expectFewOverflows if true, this downsampler will be optimized for the case * where most of the time we won't fill up anything like the * targetSampleSize elements. If this is false, we will allocate @@ -76,15 +97,15 @@ public class ReservoirDownsampler implements ReadsDownsampl * the cost of allocation if we often use targetSampleSize or more * elements. */ - public ReservoirDownsampler ( final int targetSampleSize, final boolean expectFewOverflows) { + public ReservoirDownsampler ( final int targetSampleSize, final boolean expectFewOverflows ) { if ( targetSampleSize <= 0 ) { throw new ReviewedStingException("Cannot do reservoir downsampling with a sample size <= 0"); } this.targetSampleSize = targetSampleSize; this.expectFewOverflows = expectFewOverflows; - clear(); - reset(); + clearItems(); + resetStats(); } /** @@ -93,15 +114,21 @@ public class ReservoirDownsampler implements ReadsDownsampl * @param targetSampleSize Size of the reservoir used by this downsampler. Number of items retained * after downsampling will be min(totalReads, targetSampleSize) */ - public ReservoirDownsampler ( int targetSampleSize ) { + public ReservoirDownsampler ( final int targetSampleSize ) { this(targetSampleSize, false); } + @Override + public void submit ( final T newRead ) { + if ( doNotDiscardItem(newRead) ) { + undiscardableItems.add(newRead); + return; + } - public void submit ( T newRead ) { - totalReadsSeen++; + // Only count reads that are actually eligible for discarding for the purposes of the reservoir downsampling algorithm + totalDiscardableReadsSeen++; - if ( totalReadsSeen <= targetSampleSize ) { + if ( totalDiscardableReadsSeen <= targetSampleSize ) { reservoir.add(newRead); } else { @@ -110,7 +137,7 @@ public class ReservoirDownsampler implements ReadsDownsampl isLinkedList = false; } - final int randomSlot = GenomeAnalysisEngine.getRandomGenerator().nextInt(totalReadsSeen); + final int randomSlot = GenomeAnalysisEngine.getRandomGenerator().nextInt(totalDiscardableReadsSeen); if ( randomSlot < targetSampleSize ) { reservoir.set(randomSlot, newRead); } @@ -118,49 +145,46 @@ public class ReservoirDownsampler implements ReadsDownsampl } } - public void submit ( Collection newReads ) { - for ( T read : newReads ) { - submit(read); - } - } - + @Override public boolean hasFinalizedItems() { - return reservoir.size() > 0; + return ! reservoir.isEmpty() || ! undiscardableItems.isEmpty(); } + @Override public List consumeFinalizedItems() { - if ( reservoir.isEmpty() ) { - // if there's nothing here, don't both allocating a new list completely + if ( ! hasFinalizedItems() ) { + // if there's nothing here, don't bother allocating a new list return Collections.emptyList(); } else { - // pass by reference rather than make a copy, for speed - List downsampledItems = reservoir; - clear(); + // pass reservoir by reference rather than make a copy, for speed + final List downsampledItems = reservoir; + downsampledItems.addAll(undiscardableItems); + clearItems(); return downsampledItems; } } + @Override public boolean hasPendingItems() { return false; } + @Override public T peekFinalized() { - return reservoir.isEmpty() ? null : reservoir.get(0); + return ! reservoir.isEmpty() ? reservoir.get(0) : (! undiscardableItems.isEmpty() ? undiscardableItems.get(0) : null); } + @Override public T peekPending() { return null; } - public int getNumberOfDiscardedItems() { - return numDiscardedItems; + @Override + public int size() { + return reservoir.size() + undiscardableItems.size(); } @Override - public int size() { - return reservoir.size(); - } - public void signalEndOfInput() { // NO-OP } @@ -168,25 +192,27 @@ public class ReservoirDownsampler implements ReadsDownsampl /** * Clear the data structures used to hold information */ - public void clear() { + @Override + public void clearItems() { // if we aren't expecting many overflows, allocate a linked list not an arraylist reservoir = expectFewOverflows ? new LinkedList() : new ArrayList(targetSampleSize); + // there's no possibility of overflow with the undiscardable items, so we always use a linked list for them + undiscardableItems = new LinkedList<>(); + // it's a linked list if we allocate one isLinkedList = expectFewOverflows; - // an internal stat used by the downsampling process, so not cleared by reset() below - totalReadsSeen = 0; - } - - public void reset() { - numDiscardedItems = 0; + // an internal stat used by the downsampling process, so not cleared by resetStats() below + totalDiscardableReadsSeen = 0; } + @Override public boolean requiresCoordinateSortOrder() { return false; } + @Override public void signalNoMoreReadsBefore( T read ) { // NO-OP } diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java index 7c6c043c2..897e2c05e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java @@ -35,11 +35,11 @@ import java.util.*; * * @author David Roazen */ -public class SimplePositionalDownsampler implements ReadsDownsampler { +public class SimplePositionalDownsampler extends ReadsDownsampler { - private int targetCoverage; + private final int targetCoverage; - private ReservoirDownsampler reservoir; + private final ReservoirDownsampler reservoir; private int currentContigIndex; @@ -51,97 +51,93 @@ public class SimplePositionalDownsampler implements ReadsDo private ArrayList finalizedReads; - private int numDiscardedItems; /** * Construct a SimplePositionalDownsampler * * @param targetCoverage Maximum number of reads that may share any given alignment start position */ - public SimplePositionalDownsampler( int targetCoverage ) { + public SimplePositionalDownsampler( final int targetCoverage ) { this.targetCoverage = targetCoverage; reservoir = new ReservoirDownsampler(targetCoverage); finalizedReads = new ArrayList(); - clear(); - reset(); + clearItems(); + resetStats(); } - public void submit( T newRead ) { + @Override + public void submit( final T newRead ) { updatePositionalState(newRead); if ( unmappedReadsReached ) { // don't downsample the unmapped reads at the end of the stream finalizedReads.add(newRead); } else { - int reservoirPreviouslyDiscardedItems = reservoir.getNumberOfDiscardedItems(); + final int reservoirPreviouslyDiscardedItems = reservoir.getNumberOfDiscardedItems(); + // our reservoir downsampler will call doNotDiscardItem() for us to exclude items from elimination as appropriate reservoir.submit(newRead); numDiscardedItems += reservoir.getNumberOfDiscardedItems() - reservoirPreviouslyDiscardedItems; } } - public void submit( Collection newReads ) { - for ( T read : newReads ) { - submit(read); - } - } - + @Override public boolean hasFinalizedItems() { return finalizedReads.size() > 0; } + @Override public List consumeFinalizedItems() { // pass by reference rather than make a copy, for speed - List toReturn = finalizedReads; + final List toReturn = finalizedReads; finalizedReads = new ArrayList(); return toReturn; } + @Override public boolean hasPendingItems() { return reservoir.hasFinalizedItems(); } + @Override public T peekFinalized() { return finalizedReads.isEmpty() ? null : finalizedReads.get(0); } + @Override public T peekPending() { return reservoir.peekFinalized(); } - public int getNumberOfDiscardedItems() { - return numDiscardedItems; - } - @Override public int size() { return finalizedReads.size() + reservoir.size(); } + @Override public void signalEndOfInput() { finalizeReservoir(); } - public void clear() { - reservoir.clear(); - reservoir.reset(); + @Override + public void clearItems() { + reservoir.clearItems(); + reservoir.resetStats(); finalizedReads.clear(); positionEstablished = false; unmappedReadsReached = false; } - public void reset() { - numDiscardedItems = 0; - } - + @Override public boolean requiresCoordinateSortOrder() { return true; } - public void signalNoMoreReadsBefore( T read ) { + @Override + public void signalNoMoreReadsBefore( final T read ) { updatePositionalState(read); } - private void updatePositionalState( T newRead ) { + private void updatePositionalState( final T newRead ) { if ( readIsPastCurrentPosition(newRead) ) { if ( reservoir.hasFinalizedItems() ) { finalizeReservoir(); @@ -155,13 +151,13 @@ public class SimplePositionalDownsampler implements ReadsDo } } - private void setCurrentPosition( T read ) { + private void setCurrentPosition( final T read ) { currentContigIndex = read.getReferenceIndex(); currentAlignmentStart = read.getAlignmentStart(); positionEstablished = true; } - private boolean readIsPastCurrentPosition( T read ) { + private boolean readIsPastCurrentPosition( final T read ) { return ! positionEstablished || read.getReferenceIndex() > currentContigIndex || read.getAlignmentStart() > currentAlignmentStart || @@ -170,6 +166,6 @@ public class SimplePositionalDownsampler implements ReadsDo private void finalizeReservoir() { finalizedReads.addAll(reservoir.consumeFinalizedItems()); - reservoir.reset(); + reservoir.resetStats(); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TAROrderedReadCache.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TAROrderedReadCache.java index 80da8f8eb..424bd489e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TAROrderedReadCache.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TAROrderedReadCache.java @@ -43,17 +43,42 @@ import java.util.List; * Time: 11:23 AM */ public class TAROrderedReadCache { - final int maxCapacity; - final Downsampler downsampler; + private final int maxCapacity; + private ArrayList undownsampledCache; + private Downsampler downsampler; + + private static final int UNDOWNSAMPLED_CACHE_MAX_INITIAL_SIZE = 10000; /** * Create a new empty ReadCache * @param maxCapacity the max capacity of the read cache. */ - public TAROrderedReadCache(int maxCapacity) { + public TAROrderedReadCache( final int maxCapacity ) { if ( maxCapacity < 0 ) throw new IllegalArgumentException("maxCapacity must be >= 0 but got " + maxCapacity); this.maxCapacity = maxCapacity; - this.downsampler = new ReservoirDownsampler(maxCapacity); + + // The one we're not currently using will always be null: + initializeUndownsampledCache(); + this.downsampler = null; + } + + /** + * Moves all reads over to the downsampler, causing it to be used from this point on. Should be called + * when the undownsampledCache fills up and we need to start discarding reads. Since the + * ReservoirDownsampler doesn't preserve relative ordering, pop operations become expensive + * after this point, as they require a O(n log n) sort. + */ + private void activateDownsampler() { + downsampler = new ReservoirDownsampler<>(maxCapacity, false); + downsampler.submit(undownsampledCache); + undownsampledCache = null; // preferable to the O(n) clear() method + } + + /** + * Allocate the undownsampled cache used when we have fewer than maxCapacity items + */ + private void initializeUndownsampledCache() { + undownsampledCache = new ArrayList<>(Math.min(maxCapacity + 1, UNDOWNSAMPLED_CACHE_MAX_INITIAL_SIZE)); } /** @@ -68,18 +93,31 @@ public class TAROrderedReadCache { * Add a single read to this cache. Assumed to be in sorted order w.r.t. the previously added reads * @param read a read to add */ - public void add(final GATKSAMRecord read) { + public void add( final GATKSAMRecord read ) { if ( read == null ) throw new IllegalArgumentException("Read cannot be null"); - downsampler.submit(read); + + if ( downsampler != null ) { + downsampler.submit(read); + } + else { + undownsampledCache.add(read); + + // No more room in the undownsampledCache? Time to start downsampling + if ( undownsampledCache.size() > maxCapacity ) { + activateDownsampler(); + } + } } /** * Add a collection of reads to this cache. Assumed to be in sorted order w.r.t. the previously added reads and each other * @param reads a collection of reads to add */ - public void addAll(final List reads) { + public void addAll( final List reads ) { if ( reads == null ) throw new IllegalArgumentException("Reads cannot be null"); - downsampler.submit(reads); + for ( final GATKSAMRecord read : reads ) { + add(read); + } } /** @@ -87,40 +125,44 @@ public class TAROrderedReadCache { * @return a positive integer */ public int size() { - return downsampler.size(); + return downsampler != null ? downsampler.size() : undownsampledCache.size(); } /** * How many reads were discarded since the last call to popCurrentReads - * @return + * + * @return number of items discarded during downsampling since last pop operation */ public int getNumDiscarded() { - return downsampler.getNumberOfDiscardedItems(); + return downsampler != null ? downsampler.getNumberOfDiscardedItems() : 0; } /** * Removes all reads currently in the cache, and returns them in sorted order (w.r.t. alignmentStart) * - * Flushes this cache, so after this call the cache will contain no reads and all downsampling stats will - * be reset. + * Flushes this cache, so after this call the cache will contain no reads, and we'll be in the same + * initial state as the constructor would put us in, with a non-null undownsampledCache and a null + * downsampler. * * @return a list of GATKSAMRecords in this cache */ public List popCurrentReads() { - final List maybeUnordered = downsampler.consumeFinalizedItems(); + final List poppedReads; - final List ordered; - if ( downsampler.getNumberOfDiscardedItems() == 0 ) { - // haven't discarded anything, so the reads are ordered properly - ordered = maybeUnordered; - } else { - // we need to sort these damn things: O(n log n) - ordered = new ArrayList(maybeUnordered); - Collections.sort(ordered, new AlignmentStartComparator()); + if ( downsampler == null ) { + poppedReads = undownsampledCache; // avoid making a copy here, since we're going to allocate a new cache + } + else { + // If we triggered the downsampler, we need to sort the reads before returning them, + // since the ReservoirDownsampler is not guaranteed to preserve relative ordering of items. + // After consuming the downsampled items in this call to popCurrentReads(), we switch back + // to using the undownsampledCache until we fill up again. + poppedReads = downsampler.consumeFinalizedItems(); // avoid making a copy here + Collections.sort(poppedReads, new AlignmentStartComparator()); + downsampler = null; } - // reset the downsampler stats so getNumberOfDiscardedItems is 0 - downsampler.reset(); - return ordered; + initializeUndownsampledCache(); + return poppedReads; } } diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachine.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachine.java index c4b566582..86f3500be 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachine.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachine.java @@ -123,6 +123,15 @@ public class AlignmentStateMachine { return getRead().getReferenceIndex(); } + /** + * Is our read a reduced read? + * + * @return true if the read we encapsulate is a reduced read, otherwise false + */ + public boolean isReducedRead() { + return read.isReducedRead(); + } + /** * Is this the left edge state? I.e., one that is before or after the current read? * @return true if this state is an edge state, false otherwise diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManager.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManager.java index 2caaf9d27..669e76adc 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManager.java @@ -167,7 +167,7 @@ final class PerSampleReadStateManager implements Iterable // use returned List directly rather than make a copy, for efficiency's sake readStatesByAlignmentStart = flattenByAlignmentStart(levelingDownsampler.consumeFinalizedItems()); - levelingDownsampler.reset(); + levelingDownsampler.resetStats(); } return nStatesAdded; diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/SamplePartitioner.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/SamplePartitioner.java index 49a8d10aa..9122beebb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/SamplePartitioner.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/SamplePartitioner.java @@ -164,8 +164,8 @@ class SamplePartitioner { @Ensures("doneSubmittingReads == false") public void reset() { for ( final Downsampler downsampler : readsBySample.values() ) { - downsampler.clear(); - downsampler.reset(); + downsampler.clearItems(); + downsampler.resetStats(); } doneSubmittingReads = false; } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java index b8367a7df..055f8630b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java @@ -324,6 +324,31 @@ public class ArtificialSAMUtils { return Arrays.asList(left, right); } + /** + * Create an artificial reduced read based on the parameters. The cigar string will be *M, where * is the + * length of the read. The base counts specified in the baseCounts array will be stored fully encoded in + * the RR attribute. + * + * @param header the SAM header to associate the read with + * @param name the name of the read + * @param refIndex the reference index, i.e. what chromosome to associate it with + * @param alignmentStart where to start the alignment + * @param length the length of the read + * @param baseCounts reduced base counts to encode in the RR attribute; length must match the read length + * @return the artificial reduced read + */ + public static GATKSAMRecord createArtificialReducedRead( final SAMFileHeader header, + final String name, + final int refIndex, + final int alignmentStart, + final int length, + final int[] baseCounts ) { + final GATKSAMRecord read = createArtificialRead(header, name, refIndex, alignmentStart, length); + read.setReducedReadCounts(baseCounts); + read.setReducedReadCountsTag(); + return read; + } + /** * Create a collection of identical artificial reads based on the parameters. The cigar string for each * read will be *M, where * is the length of the read. diff --git a/public/java/test/org/broadinstitute/sting/gatk/downsampling/FractionalDownsamplerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/downsampling/FractionalDownsamplerUnitTest.java index 6f18d794f..8f0eee069 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/FractionalDownsamplerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/FractionalDownsamplerUnitTest.java @@ -30,6 +30,7 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import org.testng.Assert; @@ -152,7 +153,39 @@ public class FractionalDownsamplerUnitTest extends BaseTest { Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), test.totalReads - downsampledReads.size()); - downsampler.reset(); + downsampler.resetStats(); Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0); } + + @Test + public void testDoNotDiscardReducedReads() { + GenomeAnalysisEngine.resetRandomGenerator(); + final ReadsDownsampler downsampler = new FractionalDownsampler(0.0); + + final Collection reads = new ArrayList(); + final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000); + final int[] baseCounts = { 10, 10, 10, 10, 10 }; + + for ( int i = 1; i <= 10; i++ ) { + reads.add(ArtificialSAMUtils.createArtificialReducedRead(header, "foo", 0, 1, 5, baseCounts)); + } + for ( int i = 1; i <= 5; i++ ) { + reads.add(ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5)); + } + + downsampler.submit(reads); + downsampler.signalEndOfInput(); + + Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 5, "wrong number of items discarded by the downsampler"); + Assert.assertTrue(downsampler.hasFinalizedItems(), "downsampler should have finalized items but doesn't"); + Assert.assertEquals(downsampler.size(), 10, "downsampler size() reports wrong number of items"); + + final Collection readsReturned = downsampler.consumeFinalizedItems(); + + Assert.assertEquals(readsReturned.size(), 10, "wrong number of items returned by the downsampler"); + + for ( GATKSAMRecord readReturned : readsReturned ) { + Assert.assertTrue(readReturned.isReducedRead(), "non-reduced read survived the downsampling process, but shouldn't have"); + } + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java index 972e51dcd..8cf0fd2a1 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java @@ -25,16 +25,17 @@ package org.broadinstitute.sting.gatk.downsampling; +import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.locusiterator.AlignmentStateMachine; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.annotations.Test; import org.testng.annotations.DataProvider; import org.testng.Assert; -import java.util.ArrayList; -import java.util.Collection; -import java.util.LinkedList; -import java.util.List; +import java.util.*; public class LevelingDownsamplerUnitTest extends BaseTest { @@ -158,9 +159,46 @@ public class LevelingDownsamplerUnitTest extends BaseTest { Assert.assertEquals(numItemsReportedDiscarded, numItemsActuallyDiscarded); - downsampler.reset(); + downsampler.resetStats(); Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0); Assert.assertTrue(totalRemainingItems <= Math.max(test.targetSize, test.numStacks)); } + + @Test + public void testDoNotDiscardReducedReads() { + GenomeAnalysisEngine.resetRandomGenerator(); + final Downsampler> downsampler = new LevelingDownsampler, AlignmentStateMachine>(1); + + final Collection> groups = new LinkedList>(); + final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000); + final int[] baseCounts = { 10, 10, 10, 10, 10 }; + + for ( int alignmentStart : Arrays.asList(1, 2, 3) ) { + final LinkedList group = new LinkedList(); + for ( int i = 1; i <= 10; i++ ) { + group.add(new AlignmentStateMachine(ArtificialSAMUtils.createArtificialReducedRead(header, "foo", 0, alignmentStart, 5, baseCounts))); + } + groups.add(group); + } + + downsampler.submit(groups); + downsampler.signalEndOfInput(); + + Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0, "wrong number of items discarded by the downsampler"); + Assert.assertTrue(downsampler.hasFinalizedItems(), "downsampler should have finalized items but doesn't"); + Assert.assertEquals(downsampler.size(), 30, "downsampler size() reports wrong number of items"); + + final Collection> groupsReturned = downsampler.consumeFinalizedItems(); + + Assert.assertEquals(groupsReturned.size(), 3, "wrong number of groups returned by the downsampler"); + + for ( LinkedList group : groupsReturned ) { + Assert.assertEquals(group.size(), 10, "group has wrong size after downsampling"); + + for ( AlignmentStateMachine state : group ) { + Assert.assertTrue(state.isReducedRead()); + } + } + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java index 022eb02d2..a50201efd 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java @@ -30,6 +30,7 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import org.testng.Assert; @@ -125,7 +126,49 @@ public class ReservoirDownsamplerUnitTest extends BaseTest { Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), test.expectedNumDiscardedItems); Assert.assertEquals(test.totalReads - downsampledReads.size(), test.expectedNumDiscardedItems); - downsampler.reset(); + downsampler.resetStats(); Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0); } + + @Test + public void testDoNotDiscardReducedReads() { + GenomeAnalysisEngine.resetRandomGenerator(); + final ReadsDownsampler downsampler = new ReservoirDownsampler(1); + + final Collection reads = new ArrayList(); + final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000); + final int[] baseCounts = { 10, 10, 10, 10, 10 }; + + for ( int i = 1; i <= 10; i++ ) { + reads.add(ArtificialSAMUtils.createArtificialReducedRead(header, "foo", 0, 1, 5, baseCounts)); + } + for ( int i = 1; i <= 5; i++ ) { + reads.add(ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5)); + } + + downsampler.submit(reads); + downsampler.signalEndOfInput(); + + Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 4, "wrong number of items discarded by the downsampler"); + Assert.assertTrue(downsampler.hasFinalizedItems(), "downsampler should have finalized items but doesn't"); + Assert.assertEquals(downsampler.size(), 11, "downsampler size() reports wrong number of items"); + + final Collection readsReturned = downsampler.consumeFinalizedItems(); + + Assert.assertEquals(readsReturned.size(), 11, "wrong number of items returned by the downsampler"); + + int numReducedReadsReturned = 0; + int numNormalReadsReturned = 0; + for ( GATKSAMRecord readReturned : readsReturned ) { + if ( readReturned.isReducedRead() ) { + numReducedReadsReturned++; + } + else { + numNormalReadsReturned++; + } + } + + Assert.assertEquals(numReducedReadsReturned, 10, "wrong number of reduced reads returned by the downsampler"); + Assert.assertEquals(numNormalReadsReturned, 1, "wrong number of non-reduced reads returned by the downsampler"); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsamplerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsamplerUnitTest.java index c6b0dea29..bec0030d0 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsamplerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsamplerUnitTest.java @@ -177,7 +177,7 @@ public class SimplePositionalDownsamplerUnitTest extends BaseTest { Assert.assertEquals(numReadsActuallyEliminated, numReadsReportedEliminated); } - downsampler.reset(); + downsampler.resetStats(); Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0); } @@ -328,4 +328,48 @@ public class SimplePositionalDownsamplerUnitTest extends BaseTest { Assert.assertEquals(downsampledReads.size(), 10); } + + @Test + public void testDoNotDiscardReducedReads() { + GenomeAnalysisEngine.resetRandomGenerator(); + final ReadsDownsampler downsampler = new SimplePositionalDownsampler(1); + + final Collection reads = new ArrayList(); + final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000); + final int[] baseCounts = { 10, 10, 10, 10, 10 }; + + for ( int alignmentStart : Arrays.asList(1, 2, 3) ) { + for ( int i = 1; i <= 10; i++ ) { + reads.add(ArtificialSAMUtils.createArtificialReducedRead(header, "foo", 0, alignmentStart, 5, baseCounts)); + } + for ( int i = 1; i <= 5; i++ ) { + reads.add(ArtificialSAMUtils.createArtificialRead(header, "foo", 0, alignmentStart, 5)); + } + } + + downsampler.submit(reads); + downsampler.signalEndOfInput(); + + Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 12, "wrong number of items discarded by the downsampler"); + Assert.assertTrue(downsampler.hasFinalizedItems(), "downsampler should have finalized items but doesn't"); + Assert.assertEquals(downsampler.size(), 33, "downsampler size() reports wrong number of items"); + + final Collection readsReturned = downsampler.consumeFinalizedItems(); + + Assert.assertEquals(readsReturned.size(), 33, "wrong number of items returned by the downsampler"); + + int numReducedReadsReturned = 0; + int numNormalReadsReturned = 0; + for ( GATKSAMRecord readReturned : readsReturned ) { + if ( readReturned.isReducedRead() ) { + numReducedReadsReturned++; + } + else { + numNormalReadsReturned++; + } + } + + Assert.assertEquals(numReducedReadsReturned, 30, "wrong number of reduced reads returned by the downsampler"); + Assert.assertEquals(numNormalReadsReturned, 3, "wrong number of non-reduced reads returned by the downsampler"); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TAROrderedReadCacheUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TAROrderedReadCacheUnitTest.java index f3e1ce44b..4d85997b3 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TAROrderedReadCacheUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TAROrderedReadCacheUnitTest.java @@ -26,9 +26,11 @@ package org.broadinstitute.sting.gatk.traversals; import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.sam.ArtificialBAMBuilder; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; import org.testng.annotations.BeforeClass; @@ -39,6 +41,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.List; public class TAROrderedReadCacheUnitTest extends BaseTest { @@ -98,8 +101,53 @@ public class TAROrderedReadCacheUnitTest extends BaseTest { Assert.assertEquals(cache.getNumDiscarded(), 0, "should have reset stats"); Assert.assertEquals(cacheReads.size(), nExpectedToKeep, "should have 1 read for every read we expected to keep"); + verifySortednessOfReads(cacheReads); + } + + @Test + public void testReadCacheWithReducedReads() { + final List reads = new ArrayList(); + final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000); + final int[] baseCounts = { 10, 10, 10, 10, 10 }; + + for ( int i = 1; i <= 100; i++ ) { + reads.add(ArtificialSAMUtils.createArtificialReducedRead(header, "foo", 0, i, 5, baseCounts)); + reads.add(ArtificialSAMUtils.createArtificialRead(header, "foo", 0, i, 5)); + } + + final TAROrderedReadCache cache = new TAROrderedReadCache(50); + + cache.addAll(reads); + + // Our cache should have kept all of the reduced reads (which are retained unconditionally and do not count + // towards the capacity limit), and discarded half of the 100 non-reduced reads due to the cache capacity + // limit of 50. + Assert.assertEquals(cache.size(), 150, "wrong number of reads in the cache at the end"); + Assert.assertEquals(cache.getNumDiscarded(), 50, "wrong number of reads discarded from the cache"); + + final List cacheReads = cache.popCurrentReads(); + + int numReducedReadsRetained = 0; + int numNormalReadsRetained = 0; + + for ( GATKSAMRecord read : cacheReads ) { + if ( read.isReducedRead() ) { + numReducedReadsRetained++; + } + else { + numNormalReadsRetained++; + } + } + + Assert.assertEquals(numReducedReadsRetained, 100, "wrong number of reduced reads retained in the cache"); + Assert.assertEquals(numNormalReadsRetained, 50, "wrong number of non-reduced reads retained in the cache"); + + verifySortednessOfReads(cacheReads); + } + + private void verifySortednessOfReads( final List reads) { int lastStart = -1; - for ( final GATKSAMRecord read : cacheReads ) { + for ( GATKSAMRecord read : reads ) { Assert.assertTrue(lastStart <= read.getAlignmentStart(), "Reads should be sorted but weren't. Found read with start " + read.getAlignmentStart() + " while last was " + lastStart); lastStart = read.getAlignmentStart(); } From d1f397c7115e37dd4b54091a60b71fdb57260d89 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 12 Jun 2013 12:22:36 -0400 Subject: [PATCH 086/116] Fixing bug with dangling tails in which the tail connects all the way back to the reference source node. -- List of vertices can't contain a source node. --- .../sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java | 4 ++-- .../haplotypecaller/readthreading/ReadThreadingAssembler.java | 2 +- .../haplotypecaller/readthreading/ReadThreadingGraph.java | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java index 70ef539f3..2b37d90c2 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java @@ -309,7 +309,7 @@ public class BaseGraph extends Default } v = getNextReferenceVertex(v); // advance along the reference path while( v != null && !v.equals(toVertex) ) { - bytes = ArrayUtils.addAll( bytes, getAdditionalSequence(v) ); + bytes = ArrayUtils.addAll(bytes, getAdditionalSequence(v)); v = getNextReferenceVertex(v); // advance along the reference path } if( includeStop && v != null && v.equals(toVertex)) { @@ -561,7 +561,7 @@ public class BaseGraph extends Default verticesToRemove.removeAll(onPathFromRefSource); removeAllVertices(verticesToRemove); - // simple santity checks that this algorithm is working. + // simple sanity checks that this algorithm is working. if ( getSinks().size() > 1 ) { throw new IllegalStateException("Should have eliminated all but the reference sink, but found " + getSinks()); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java index f4290f2bb..fc0f781c5 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java @@ -207,7 +207,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { * @return */ private boolean reasonableNumberOfPaths(final SeqGraph graph) { - final KBestPaths pathFinder = new KBestPaths(false); + final KBestPaths pathFinder = new KBestPaths<>(false); final List> allPaths = pathFinder.getKBestPaths(graph, 100000); logger.info("Found " + allPaths.size() + " paths through " + graph + " with maximum " + maxAllowedPathsForReadThreadingAssembler); return allPaths.size() <= maxAllowedPathsForReadThreadingAssembler; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java index 8d8cb83f6..0844f979b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java @@ -339,7 +339,7 @@ public class ReadThreadingGraph extends BaseGraph altPath = findPathToLowestCommonAncestorOfReference(vertex); - if ( altPath == null ) + if ( altPath == null || isRefSource(altPath.get(0)) ) return null; // now get the reference path from the LCA From d5f0848bd50d698e5509ffcdd20b4de6e39c684c Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 13 Jun 2013 09:59:16 -0400 Subject: [PATCH 087/116] HC bam writer now sets the read to MQ0 if it isn't informative -- Makes visualization of read evidence easier in IGV. --- .../haplotypeBAMWriter/AllHaplotypeBAMWriter.java | 8 ++++---- .../CalledHaplotypeBAMWriter.java | 6 +++--- .../haplotypeBAMWriter/HaplotypeBAMWriter.java | 14 +++++++++++--- .../HaplotypeBAMWriterUnitTest.java | 6 +++--- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/AllHaplotypeBAMWriter.java b/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/AllHaplotypeBAMWriter.java index 54061c781..e7e5cf0e1 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/AllHaplotypeBAMWriter.java +++ b/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/AllHaplotypeBAMWriter.java @@ -80,18 +80,18 @@ class AllHaplotypeBAMWriter extends HaplotypeBAMWriter { final List bestHaplotypes, final Set calledHaplotypes, final Map stratifiedReadMap) { - writeHaplotypesAsReads(haplotypes, new HashSet(bestHaplotypes), paddedReferenceLoc); + writeHaplotypesAsReads(haplotypes, new HashSet<>(bestHaplotypes), paddedReferenceLoc); // we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently - final Map alleleToHaplotypeMap = new HashMap(haplotypes.size()); + final Map alleleToHaplotypeMap = new HashMap<>(haplotypes.size()); for ( final Haplotype haplotype : haplotypes ) alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype); // next, output the interesting reads for each sample aligned against the appropriate haplotype for ( final PerReadAlleleLikelihoodMap readAlleleLikelihoodMap : stratifiedReadMap.values() ) { - for ( Map.Entry> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) { + for ( final Map.Entry> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) { final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue()); - writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart()); + writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart(), bestAllele.isInformative()); } } } diff --git a/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/CalledHaplotypeBAMWriter.java b/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/CalledHaplotypeBAMWriter.java index d63cf65fc..7206dd674 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/CalledHaplotypeBAMWriter.java +++ b/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/CalledHaplotypeBAMWriter.java @@ -87,7 +87,7 @@ class CalledHaplotypeBAMWriter extends HaplotypeBAMWriter { writeHaplotypesAsReads(calledHaplotypes, calledHaplotypes, paddedReferenceLoc); // we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently - final Map alleleToHaplotypeMap = new HashMap(haplotypes.size()); + final Map alleleToHaplotypeMap = new HashMap<>(haplotypes.size()); for ( final Haplotype haplotype : calledHaplotypes ) { alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype); } @@ -97,10 +97,10 @@ class CalledHaplotypeBAMWriter extends HaplotypeBAMWriter { // next, output the interesting reads for each sample aligned against one of the called haplotypes for ( final PerReadAlleleLikelihoodMap readAlleleLikelihoodMap : stratifiedReadMap.values() ) { - for ( Map.Entry> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) { + for ( final Map.Entry> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) { if ( entry.getKey().getMappingQuality() > 0 ) { final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue(), allelesOfCalledHaplotypes); - writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart()); + writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart(), bestAllele.isInformative()); } } } diff --git a/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/HaplotypeBAMWriter.java b/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/HaplotypeBAMWriter.java index 2eea664d9..1afbeed63 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/HaplotypeBAMWriter.java +++ b/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/HaplotypeBAMWriter.java @@ -185,11 +185,13 @@ public abstract class HaplotypeBAMWriter { * @param originalRead the read we want to write aligned to the reference genome * @param haplotype the haplotype that the read should be aligned to, before aligning to the reference * @param referenceStart the start of the reference that haplotype is aligned to. Provides global coordinate frame. + * @param isInformative true if the read is differentially informative for one of the haplotypes */ protected void writeReadAgainstHaplotype(final GATKSAMRecord originalRead, final Haplotype haplotype, - final int referenceStart) { - final GATKSAMRecord alignedToRef = createReadAlignedToRef(originalRead, haplotype, referenceStart); + final int referenceStart, + final boolean isInformative) { + final GATKSAMRecord alignedToRef = createReadAlignedToRef(originalRead, haplotype, referenceStart, isInformative); if ( alignedToRef != null ) bamWriter.addAlignment(alignedToRef); } @@ -201,11 +203,13 @@ public abstract class HaplotypeBAMWriter { * @param originalRead the read we want to write aligned to the reference genome * @param haplotype the haplotype that the read should be aligned to, before aligning to the reference * @param referenceStart the start of the reference that haplotype is aligned to. Provides global coordinate frame. + * @param isInformative true if the read is differentially informative for one of the haplotypes * @return a GATKSAMRecord aligned to reference, or null if no meaningful alignment is possible */ protected GATKSAMRecord createReadAlignedToRef(final GATKSAMRecord originalRead, final Haplotype haplotype, - final int referenceStart) { + final int referenceStart, + final boolean isInformative) { if ( originalRead == null ) throw new IllegalArgumentException("originalRead cannot be null"); if ( haplotype == null ) throw new IllegalArgumentException("haplotype cannot be null"); if ( haplotype.getCigar() == null ) throw new IllegalArgumentException("Haplotype cigar not set " + haplotype); @@ -225,6 +229,10 @@ public abstract class HaplotypeBAMWriter { addHaplotypeTag(read, haplotype); + // uninformative reads are set to zero mapping quality to enhance visualization + if ( !isInformative ) + read.setMappingQuality(0); + // compute here the read starts w.r.t. the reference from the SW result and the hap -> ref cigar final Cigar extendedHaplotypeCigar = haplotype.getConsolidatedPaddedCigar(1000); final int readStartOnHaplotype = AlignmentUtils.calcFirstBaseMatchingReferenceInCigar(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1()); diff --git a/protected/java/test/org/broadinstitute/sting/utils/haplotypeBAMWriter/HaplotypeBAMWriterUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/haplotypeBAMWriter/HaplotypeBAMWriterUnitTest.java index 91a2988aa..0c76ad338 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/haplotypeBAMWriter/HaplotypeBAMWriterUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/haplotypeBAMWriter/HaplotypeBAMWriterUnitTest.java @@ -177,10 +177,10 @@ public class HaplotypeBAMWriterUnitTest extends BaseTest { final GATKSAMRecord originalReadCopy = (GATKSAMRecord)read.clone(); if ( expectedReadCigar == null ) { - Assert.assertNull(writer.createReadAlignedToRef(read, haplotype, refStart)); + Assert.assertNull(writer.createReadAlignedToRef(read, haplotype, refStart, true)); } else { final Cigar expectedCigar = TextCigarCodec.getSingleton().decode(expectedReadCigar); - final GATKSAMRecord alignedRead = writer.createReadAlignedToRef(read, haplotype, refStart); + final GATKSAMRecord alignedRead = writer.createReadAlignedToRef(read, haplotype, refStart, true); Assert.assertEquals(alignedRead.getReadName(), originalReadCopy.getReadName()); Assert.assertEquals(alignedRead.getAlignmentStart(), expectedReadStart); @@ -290,7 +290,7 @@ public class HaplotypeBAMWriterUnitTest extends BaseTest { @Test(dataProvider = "ComplexReadAlignedToRef", enabled = !DEBUG) public void testReadAlignedToRefComplexAlignment(final int testIndex, final GATKSAMRecord read, final String reference, final Haplotype haplotype, final int expectedMaxMismatches) throws Exception { final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockBAMWriter()); - final GATKSAMRecord alignedRead = writer.createReadAlignedToRef(read, haplotype, 1); + final GATKSAMRecord alignedRead = writer.createReadAlignedToRef(read, haplotype, 1, true); if ( alignedRead != null ) { final int mismatches = AlignmentUtils.getMismatchCount(alignedRead, reference.getBytes(), alignedRead.getAlignmentStart() - 1).numMismatches; Assert.assertTrue(mismatches <= expectedMaxMismatches, From f44efc27ae1c79c1d376d930014ce0d22dda6f8b Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 13 Jun 2013 10:05:53 -0400 Subject: [PATCH 088/116] Relaxing the constraints on the readIsPoorlyModelled function. -- Turns out we were aggressively throwing out borderline-good reads. --- .../haplotypecaller/HaplotypeCaller.java | 2 +- ...plexAndSymbolicVariantsIntegrationTest.java | 6 +++--- .../HaplotypeCallerIntegrationTest.java | 18 +++++++++--------- ...HaplotypeCallerParallelIntegrationTest.java | 2 +- .../PerReadAlleleLikelihoodMapUnitTest.java | 6 +++--- .../genotyper/PerReadAlleleLikelihoodMap.java | 6 +++--- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index fb7fb652c..f3f54060f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -459,7 +459,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // the minimum length of a read we'd consider using for genotyping private final static int MIN_READ_LENGTH = 10; - private List samplesList = new ArrayList(); + private List samplesList = new ArrayList<>(); private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file private final static Allele FAKE_ALT_ALLELE = Allele.create("", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 073d54ec5..8394baa72 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "d21f15a5809fe5259af41ae6774af6f1"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "e7b28ea087e8624f1e596c9d65381fea"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "d4a0797c2fd4c103bf9a137633376156"); + "321dc9f3d330790bac7981ffae00cb0c"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "a9872228d0275a30f5a1f7e070a9c9f4"); + "2a72a9b5c6778b99bf155a7c5e90d11e"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index dbdd0afcd..f9bab8ea7 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -78,12 +78,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "e9167a1bfc0fc276586788d1ce1be408"); + HCTest(CEUTRIO_BAM, "", "f25b9cfc85995cbe8eb6ba5a126d713d"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "b1d46afb9659ac3b92a3d131b58924ef"); + HCTest(NA12878_BAM, "", "19d685727ec60b3568f313bc44f79b49"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -94,7 +94,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "d83856b8136776bd731a8037c16b71fa"); + "6da65f1d396b9c709eb6246cf3f615c1"); } @Test @@ -110,7 +110,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "70c4476816f5d35c9978c378dbeac09b"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "e3db7d56154e36eeb887259bea4b241d"); } private void HCTestNearbySmallIntervals(String bam, String args, String md5) { @@ -147,7 +147,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerNearbySmallIntervals() { - HCTestNearbySmallIntervals(NA12878_BAM, "", "947aae309ecab7cd3f17ff9810884924"); + HCTestNearbySmallIntervals(NA12878_BAM, "", "6e170d03047caefc2fba3f1c1f8de132"); } // This problem bam came from a user on the forum and it spotted a problem where the ReadClipper @@ -186,7 +186,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("0124c4923d96ec0f8222b596dd4ef534")); + Arrays.asList("40416433baf96f4e84a058459717060b")); executeTest("HC calling on a ReducedRead BAM", spec); } @@ -194,7 +194,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("0e020dcfdf249225714f5cd86ed3869f")); + Arrays.asList("cf1461ce829023ea9920fbfeb534eb97")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } @@ -208,7 +208,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("446a786bb539f3ec2084dd75167568aa")); + Arrays.asList("45ca324be3917655e645d6c290c9280f")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -217,7 +217,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132 + " -L " + hg19Intervals + " -isr INTERSECTION", 1, - Arrays.asList("9587029b702bb59bd4dfec69eac4c210")); + Arrays.asList("b7037770b7953cdf858764b99fa243ed")); executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java index 62e685eab..857d0fc9e 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java @@ -61,7 +61,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest { List tests = new ArrayList(); for ( final int nct : Arrays.asList(1, 2, 4) ) { - tests.add(new Object[]{nct, "ef42a438b82681d1c0f921c57e16ff12"}); + tests.add(new Object[]{nct, "bd2a57e6b0cffb4cbdba609a6c1683dc"}); } return tests.toArray(new Object[][]{}); diff --git a/protected/java/test/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMapUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMapUnitTest.java index 9530ea41f..651beffc8 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMapUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMapUnitTest.java @@ -233,7 +233,7 @@ public class PerReadAlleleLikelihoodMapUnitTest extends BaseTest { tests.add(new Object[]{100, 0.01, true, Arrays.asList(-5.0, -10.0)}); tests.add(new Object[]{100, 0.01, false, Arrays.asList(-5.0, -10.0, -3.0)}); tests.add(new Object[]{100, 0.01, false, Arrays.asList(-5.0, -10.0, -2.0)}); - tests.add(new Object[]{100, 0.01, true, Arrays.asList(-5.0, -10.0, -4.0)}); + tests.add(new Object[]{100, 0.01, true, Arrays.asList(-5.0, -10.0, -4.2)}); tests.add(new Object[]{100, 0.001, true, Arrays.asList(-5.0, -10.0)}); tests.add(new Object[]{100, 0.001, false, Arrays.asList(-5.0, -10.0, 0.0)}); @@ -243,7 +243,7 @@ public class PerReadAlleleLikelihoodMapUnitTest extends BaseTest { @Test(dataProvider = "PoorlyModelledReadData") public void testPoorlyModelledRead(final int readLen, final double maxErrorRatePerBase, final boolean expected, final List log10likelihoods) { final byte[] bases = Utils.dupBytes((byte)'A', readLen); - final byte[] quals = Utils.dupBytes((byte) 30, readLen); + final byte[] quals = Utils.dupBytes((byte) 40, readLen); final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, readLen + "M"); @@ -279,7 +279,7 @@ public class PerReadAlleleLikelihoodMapUnitTest extends BaseTest { final double likelihood = bad ? -100.0 : 0.0; final byte[] bases = Utils.dupBytes((byte)'A', readLen); - final byte[] quals = Utils.dupBytes((byte) 30, readLen); + final byte[] quals = Utils.dupBytes((byte) 40, readLen); final Allele allele = Allele.create(Utils.dupString("A", readI+1)); diff --git a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java index 8067d67bc..70be85f54 100644 --- a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java @@ -321,7 +321,7 @@ public class PerReadAlleleLikelihoodMap { * @return the list of reads removed from this map because they are poorly modelled */ public List filterPoorlyModelledReads(final double maxErrorRatePerBase) { - final List removedReads = new LinkedList(); + final List removedReads = new LinkedList<>(); final Iterator>> it = likelihoodReadMap.entrySet().iterator(); while ( it.hasNext() ) { final Map.Entry> record = it.next(); @@ -356,8 +356,8 @@ public class PerReadAlleleLikelihoodMap { * @return true if none of the log10 likelihoods imply that the read truly originated from one of the haplotypes */ protected boolean readIsPoorlyModelled(final GATKSAMRecord read, final Collection log10Likelihoods, final double maxErrorRatePerBase) { - final double maxErrorsForRead = Math.ceil(read.getReadLength() * maxErrorRatePerBase); - final double log10QualPerBase = -3.0; + final double maxErrorsForRead = Math.min(2.0, Math.ceil(read.getReadLength() * maxErrorRatePerBase)); + final double log10QualPerBase = -4.0; final double log10MaxLikelihoodForTrueAllele = maxErrorsForRead * log10QualPerBase; for ( final double log10Likelihood : log10Likelihoods ) From dd6e2523731bd829e3df5658bbb2ba8505346a54 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 12 Jun 2013 17:47:27 -0400 Subject: [PATCH 089/116] GATKRunReport no longer tries to use the Broad filesystem destination, rather it goes unconditionally to S3 --- .../org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index de84809bd..9704454c9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -419,7 +419,7 @@ public class GATKRunReport { * @return true if and only if the common run report repository is available and online to receive reports */ private boolean repositoryIsOnline() { - return REPORT_SENTINEL.exists(); + return false; // REPORT_SENTINEL.exists(); } From 33720b83ebd093dea536b43428f937994cfc9bc4 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 10 Jun 2013 14:52:41 -0400 Subject: [PATCH 090/116] No longer merge overlapping fragments from HaplotypeCaller -- Merging overlapping fragments turns out to be a bad idea. In the case where you can safely merge the reads you only gain a small about of overlapping kmers, so the potential gains are relatively small. That's in contrast to the very large danger of merging reads inappropriately, such as when the reads only overlap in a repetitive region, and you artificially construct reads that look like the reference but actually may carry a larger true insertion w.r.t. the reference. Because this problem isn't limited to repetitive sequeuence, but in principle could occur in any sequence, it's just not safe to do this merging. Best to leave haplotype construction to the assembly graph. --- .../walkers/haplotypecaller/HaplotypeCaller.java | 14 +++----------- ...omplexAndSymbolicVariantsIntegrationTest.java | 4 ++-- .../HaplotypeCallerIntegrationTest.java | 16 ++++++++-------- .../HaplotypeCallerParallelIntegrationTest.java | 2 +- 4 files changed, 14 insertions(+), 22 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index f3f54060f..b94b74748 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -919,19 +919,10 @@ public class HaplotypeCaller extends ActiveRegionWalker, In private void finalizeActiveRegion( final ActiveRegion activeRegion ) { if( DEBUG ) { logger.info("Assembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); } - final List finalizedReadList = new ArrayList<>(); - final FragmentCollection fragmentCollection = FragmentUtils.create( activeRegion.getReads() ); - activeRegion.clearReads(); - - // Join overlapping paired reads to create a single longer read - finalizedReadList.addAll( fragmentCollection.getSingletonReads() ); - for( final List overlappingPair : fragmentCollection.getOverlappingPairs() ) { - finalizedReadList.addAll( FragmentUtils.mergeOverlappingPairedFragments(overlappingPair) ); - } // Loop through the reads hard clipping the adaptor and low quality tails - final List readsToUse = new ArrayList<>(finalizedReadList.size()); - for( final GATKSAMRecord myRead : finalizedReadList ) { + final List readsToUse = new ArrayList<>(activeRegion.getReads().size()); + for( final GATKSAMRecord myRead : activeRegion.getReads() ) { final GATKSAMRecord postAdapterRead = ( myRead.getReadUnmappedFlag() ? myRead : ReadClipper.hardClipAdaptorSequence( myRead ) ); if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) { GATKSAMRecord clippedRead; @@ -962,6 +953,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } } + activeRegion.clearReads(); activeRegion.addAll(DownsamplingUtils.levelCoverageByPosition(ReadUtils.sortReadsByCoordinate(readsToUse), maxReadsInRegionPerSample, minReadsPerAlignmentStart)); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 8394baa72..c1b8f8a70 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "e7b28ea087e8624f1e596c9d65381fea"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "03944bbedb012e2ac2026a84baa0560c"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -94,6 +94,6 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "2a72a9b5c6778b99bf155a7c5e90d11e"); + "7e9f99d4cba8087dac66ea871b910d7e"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index f9bab8ea7..da92f39fc 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -78,12 +78,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "f25b9cfc85995cbe8eb6ba5a126d713d"); + HCTest(CEUTRIO_BAM, "", "09d84bc1aef2dd9c185934752172b794"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "19d685727ec60b3568f313bc44f79b49"); + HCTest(NA12878_BAM, "", "5c074930b27d1f5c942fe755c2a8be27"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -94,7 +94,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "6da65f1d396b9c709eb6246cf3f615c1"); + "005a6d1933913a5d96fc56d01303fa95"); } @Test @@ -110,7 +110,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "e3db7d56154e36eeb887259bea4b241d"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "9b6f667ad87e19c38d16fefe63c37484"); } private void HCTestNearbySmallIntervals(String bam, String args, String md5) { @@ -186,7 +186,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("40416433baf96f4e84a058459717060b")); + Arrays.asList("a47ef09a8701128cfb301a83b7bb0728")); executeTest("HC calling on a ReducedRead BAM", spec); } @@ -194,7 +194,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("cf1461ce829023ea9920fbfeb534eb97")); + Arrays.asList("0cb99f6bb3e630add4b3486c496fa508")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } @@ -208,7 +208,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("45ca324be3917655e645d6c290c9280f")); + Arrays.asList("92f947cc89e4f50cf2ef3121d2fe308d")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -217,7 +217,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132 + " -L " + hg19Intervals + " -isr INTERSECTION", 1, - Arrays.asList("b7037770b7953cdf858764b99fa243ed")); + Arrays.asList("91877c8ea3eb0e0316d9ad11fdcc1a87")); executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java index 857d0fc9e..d009550f4 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java @@ -61,7 +61,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest { List tests = new ArrayList(); for ( final int nct : Arrays.asList(1, 2, 4) ) { - tests.add(new Object[]{nct, "bd2a57e6b0cffb4cbdba609a6c1683dc"}); + tests.add(new Object[]{nct, "9da4cc89590c4c64a36f4a9c820f8609"}); } return tests.toArray(new Object[][]{}); From dd5674b3b8c088e7dbb0b7e1822f4e55d02f7315 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 12 Jun 2013 12:43:19 -0400 Subject: [PATCH 091/116] Add genotyping accuracy assessment to AssessNA12878 -- Now table looks like: Name VariantType AssessmentType Count variant SNPS TRUE_POSITIVE 1220 variant SNPS FALSE_POSITIVE 0 variant SNPS FALSE_NEGATIVE 1 variant SNPS TRUE_NEGATIVE 150 variant SNPS CALLED_NOT_IN_DB_AT_ALL 0 variant SNPS HET_CONCORDANCE 100.00 variant SNPS HOMVAR_CONCORDANCE 99.63 variant INDELS TRUE_POSITIVE 273 variant INDELS FALSE_POSITIVE 0 variant INDELS FALSE_NEGATIVE 15 variant INDELS TRUE_NEGATIVE 79 variant INDELS CALLED_NOT_IN_DB_AT_ALL 2 variant INDELS HET_CONCORDANCE 98.67 variant INDELS HOMVAR_CONCORDANCE 89.58 -- Rewrite / refactored parts of subsetDiploidAlleles in GATKVariantContextUtils to have a BEST_MATCH assignment method that does it's best to simply match the genotype after subsetting to a set of alleles. So if the original GT was A/B and you subset to A/B it remains A/B but if you subset to A/C you get A/A. This means that het-alt B/C genotypes become A/B and A/C when subsetting to bi-allelics which is the convention in the KB. Add lots of unit tests for this functions (from 0 previously) -- BadSites in Assessment now emits TP sites with discordant genotypes with the type GENOTYPE_DISCORDANCE and tags the expected genotype in the info field as ExpectedGenotype, such as this record: 20 10769255 . A ATGTG 165.73 . ExpectedGenotype=HOM_VAR;SupportingCallsets=ebanks,depristo,CEUTrio_best_practices;WHY=GENOTYPE_DISCORDANCE GT:AD:DP:GQ:PL 0/1:1,9:10:6:360,0,6 Indicating that the call was a HET but the expected result was HOM_VAR -- Forbid subsetting of diploid genotypes to just a single allele. -- Added subsetToRef as a separate specific function. Use that in the DiploidExactAFCalc in the case that you need to reduce yourself to ref only. Preserves DP in the genotype field when this is possible, so a few integration tests have changed for the UG --- .../genotyper/afcalc/DiploidExactAFCalc.java | 7 +- ...dGenotyperIndelCallingIntegrationTest.java | 2 +- ...GenotyperNormalCallingIntegrationTest.java | 4 +- .../variant/GATKVariantContextUtils.java | 206 ++++++++++++---- .../GATKVariantContextUtilsUnitTest.java | 233 ++++++++++++++++-- 5 files changed, 380 insertions(+), 72 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java index 170b6e250..2ece18002 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java @@ -106,7 +106,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { alleles.add(vc.getReference()); alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles())); builder.alleles(alleles); - builder.genotypes(GATKVariantContextUtils.subsetDiploidAlleles(vc, alleles, false)); + builder.genotypes(GATKVariantContextUtils.subsetDiploidAlleles(vc, alleles, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL)); return builder.make(); } else { return vc; @@ -352,6 +352,9 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { final List allelesToUse, final boolean assignGenotypes, final int ploidy) { - return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes); + return allelesToUse.size() == 1 + ? GATKVariantContextUtils.subsetToRefOnly(vc, ploidy) + : GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, + assignGenotypes ? GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN : GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index 98a482c6f..64a27c4c3 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -136,7 +136,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, - Arrays.asList("294183823d678d3668f4fa98b4de6e06")); + Arrays.asList("facac578891a4f2be63ddd5ba6b9096b")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index bf4316415..f7c5e6fd5 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -64,7 +64,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("5e8f1fa88dc93320cc0e75e9fe6e153b")); + Arrays.asList("474dfb943a307c86cabe2043970c58f3")); executeTest("test MultiSample Pilot1", spec); } @@ -80,7 +80,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testWithAllelesPassedIn2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("60115af273fde49c76d4df6c9c0f6501")); + Arrays.asList("3e646003c5b93da80c7d8e5d0ff2ee4e")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java index b5a6e82a0..3bc5da82f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java @@ -45,7 +45,7 @@ public class GATKVariantContextUtils { public static final int DEFAULT_PLOIDY = 2; public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. - private static final List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); + protected static final List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); public final static String MERGE_FILTER_PREFIX = "filterIn"; public final static String MERGE_REF_IN_ALL = "ReferenceInAll"; public final static String MERGE_FILTER_IN_ALL = "FilteredInAll"; @@ -421,6 +421,37 @@ public class GATKVariantContextUtils { return true; // we passed all tests, we matched } + public enum GenotypeAssignmentMethod { + /** + * set all of the genotype GT values to NO_CALL + */ + SET_TO_NO_CALL, + + /** + * Use the subsetted PLs to greedily assigned genotypes + */ + USE_PLS_TO_ASSIGN, + + /** + * Try to match the original GT calls, if at all possible + * + * Suppose I have 3 alleles: A/B/C and the following samples: + * + * original_GT best_match to A/B best_match to A/C + * S1 => A/A A/A A/A + * S2 => A/B A/B A/A + * S3 => B/B B/B A/A + * S4 => B/C A/B A/C + * S5 => C/C A/A C/C + * + * Basically, all alleles not in the subset map to ref. It means that het-alt genotypes + * when split into 2 bi-allelic variants will be het in each, which is good in some cases, + * rather than the undetermined behavior when using the PLs to assign, which could result + * in hom-var or hom-ref for each, depending on the exact PL values. + */ + BEST_MATCH_TO_ORIGINAL + } + /** * subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately) * @@ -430,22 +461,23 @@ public class GATKVariantContextUtils { * @return genotypes */ public static GenotypesContext subsetDiploidAlleles(final VariantContext vc, - final List allelesToUse, - final boolean assignGenotypes) { + final List allelesToUse, + final GenotypeAssignmentMethod assignGenotypes) { + if ( allelesToUse.get(0).isNonReference() ) throw new IllegalArgumentException("First allele must be the reference allele"); + if ( allelesToUse.size() == 1 ) throw new IllegalArgumentException("Cannot subset to only 1 alt allele"); // the genotypes with PLs final GenotypesContext oldGTs = vc.getGenotypes(); // the new genotypes to create final GenotypesContext newGTs = GenotypesContext.create(); + // optimization: if no input genotypes, just exit - if (oldGTs.isEmpty()) - return newGTs; + if (oldGTs.isEmpty()) return newGTs; // samples final List sampleIndices = oldGTs.getSampleNamesOrderedByName(); - // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), 2); @@ -456,8 +488,8 @@ public class GATKVariantContextUtils { // an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles, // then we can keep the PLs as is; otherwise, we determine which ones to keep - if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) { - likelihoodIndexesToUse = new ArrayList(30); + if ( numNewAltAlleles != numOriginalAltAlleles ) { + likelihoodIndexesToUse = new ArrayList<>(30); final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles]; for ( int i = 0; i < numOriginalAltAlleles; i++ ) { @@ -478,55 +510,127 @@ public class GATKVariantContextUtils { // create the new genotypes for ( int k = 0; k < oldGTs.size(); k++ ) { final Genotype g = oldGTs.get(sampleIndices.get(k)); - if ( !g.hasLikelihoods() ) { - newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); - continue; - } + final GenotypeBuilder gb = new GenotypeBuilder(g); // create the new likelihoods array from the alleles we are allowed to use - final double[] originalLikelihoods = g.getLikelihoods().getAsVector(); double[] newLikelihoods; - if ( likelihoodIndexesToUse == null ) { - newLikelihoods = originalLikelihoods; - } else if ( originalLikelihoods.length != expectedNumLikelihoods ) { - logger.warn("Wrong number of likelihoods in sample " + g.getSampleName() + " at " + vc + " got " + g.getLikelihoodsString() + " but expected " + expectedNumLikelihoods); + if ( !g.hasLikelihoods() ) { + // we don't have any likelihoods, so we null out PLs and make G ./. newLikelihoods = null; + gb.noPL(); } else { - newLikelihoods = new double[likelihoodIndexesToUse.size()]; - int newIndex = 0; - for ( int oldIndex : likelihoodIndexesToUse ) - newLikelihoods[newIndex++] = originalLikelihoods[oldIndex]; + final double[] originalLikelihoods = g.getLikelihoods().getAsVector(); + if ( likelihoodIndexesToUse == null ) { + newLikelihoods = originalLikelihoods; + } else if ( originalLikelihoods.length != expectedNumLikelihoods ) { + logger.warn("Wrong number of likelihoods in sample " + g.getSampleName() + " at " + vc + " got " + g.getLikelihoodsString() + " but expected " + expectedNumLikelihoods); + newLikelihoods = null; + } else { + newLikelihoods = new double[likelihoodIndexesToUse.size()]; + int newIndex = 0; + for ( int oldIndex : likelihoodIndexesToUse ) + newLikelihoods[newIndex++] = originalLikelihoods[oldIndex]; - // might need to re-normalize - newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true); - } + // might need to re-normalize + newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true); + } - // if there is no mass on the (new) likelihoods, then just no-call the sample - if ( newLikelihoods != null && MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) { - newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); - } - else { - final GenotypeBuilder gb = new GenotypeBuilder(g); - - if ( newLikelihoods == null || numNewAltAlleles == 0 ) + if ( newLikelihoods == null || likelihoodsAreUninformative(newLikelihoods) ) gb.noPL(); else gb.PL(newLikelihoods); - - // if we weren't asked to assign a genotype, then just no-call the sample - if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) { - gb.alleles(NO_CALL_ALLELES); - } - else { - // find the genotype with maximum likelihoods - int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods); - GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); - - gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2))); - if ( numNewAltAlleles != 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods)); - } - newGTs.add(gb.make()); } + + updateGenotypeAfterSubsetting(g.getAlleles(), gb, assignGenotypes, newLikelihoods, allelesToUse); + newGTs.add(gb.make()); + } + + return newGTs; + } + + private static boolean likelihoodsAreUninformative(final double[] likelihoods) { + return MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL; + } + + /** + * Add the genotype call (GT) field to GenotypeBuilder using the requested algorithm assignmentMethod + * + * @param originalGT the original genotype calls, cannot be null + * @param gb the builder where we should put our newly called alleles, cannot be null + * @param assignmentMethod the method to use to do the assignment, cannot be null + * @param newLikelihoods a vector of likelihoods to use if the method requires PLs, should be log10 likelihoods, cannot be null + * @param allelesToUse the alleles we are using for our subsetting + */ + protected static void updateGenotypeAfterSubsetting(final List originalGT, + final GenotypeBuilder gb, + final GenotypeAssignmentMethod assignmentMethod, + final double[] newLikelihoods, + final List allelesToUse) { + gb.noAD(); + switch ( assignmentMethod ) { + case SET_TO_NO_CALL: + gb.alleles(NO_CALL_ALLELES); + gb.noGQ(); + break; + case USE_PLS_TO_ASSIGN: + if ( newLikelihoods == null || likelihoodsAreUninformative(newLikelihoods) ) { + // if there is no mass on the (new) likelihoods, then just no-call the sample + gb.alleles(NO_CALL_ALLELES); + gb.noGQ(); + } else { + // find the genotype with maximum likelihoods + final int PLindex = MathUtils.maxElementIndex(newLikelihoods); + GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); + gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2))); + gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods)); + } + break; + case BEST_MATCH_TO_ORIGINAL: + final List best = new LinkedList<>(); + final Allele ref = allelesToUse.get(0); // WARNING -- should be checked in input argument + for ( final Allele originalAllele : originalGT ) { + best.add(allelesToUse.contains(originalAllele) ? originalAllele : ref); + } + gb.noGQ(); + gb.noPL(); + gb.alleles(best); + break; + } + } + + /** + * Subset the samples in VC to reference only information with ref call alleles + * + * Preserves DP if present + * + * @param vc the variant context to subset down to + * @param ploidy ploidy to use if a genotype doesn't have any alleles + * @return a GenotypesContext + */ + public static GenotypesContext subsetToRefOnly(final VariantContext vc, final int ploidy) { + if ( vc == null ) throw new IllegalArgumentException("vc cannot be null"); + if ( ploidy < 1 ) throw new IllegalArgumentException("ploidy must be >= 1 but got " + ploidy); + + // the genotypes with PLs + final GenotypesContext oldGTs = vc.getGenotypes(); + + // optimization: if no input genotypes, just exit + if (oldGTs.isEmpty()) return oldGTs; + + // the new genotypes to create + final GenotypesContext newGTs = GenotypesContext.create(); + + final Allele ref = vc.getReference(); + final List diploidRefAlleles = Arrays.asList(ref, ref); + + // create the new genotypes + for ( final Genotype g : vc.getGenotypes() ) { + final int gPloidy = g.getPloidy() == 0 ? ploidy : g.getPloidy(); + final List refAlleles = gPloidy == 2 ? diploidRefAlleles : Collections.nCopies(gPloidy, ref); + final GenotypeBuilder gb = new GenotypeBuilder(g.getSampleName(), refAlleles); + if ( g.hasDP() ) gb.DP(g.getDP()); + if ( g.hasGQ() ) gb.GQ(g.getGQ()); + newGTs.add(gb.make()); } return newGTs; @@ -539,7 +643,7 @@ public class GATKVariantContextUtils { * @return genotypes context */ public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) { - return subsetDiploidAlleles(vc, vc.getAlleles(), true); + return subsetDiploidAlleles(vc, vc.getAlleles(), GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN); } /** @@ -557,7 +661,7 @@ public class GATKVariantContextUtils { * @return a list of bi-allelic (or monomorphic) variant context */ public static List splitVariantContextToBiallelics(final VariantContext vc) { - return splitVariantContextToBiallelics(vc, false); + return splitVariantContextToBiallelics(vc, false, GenotypeAssignmentMethod.SET_TO_NO_CALL); } /** @@ -575,18 +679,18 @@ public class GATKVariantContextUtils { * @param trimLeft if true, we will also left trim alleles, potentially moving the resulting vcs forward on the genome * @return a list of bi-allelic (or monomorphic) variant context */ - public static List splitVariantContextToBiallelics(final VariantContext vc, final boolean trimLeft) { + public static List splitVariantContextToBiallelics(final VariantContext vc, final boolean trimLeft, final GenotypeAssignmentMethod genotypeAssignmentMethod) { if ( ! vc.isVariant() || vc.isBiallelic() ) // non variant or biallelics already satisfy the contract return Collections.singletonList(vc); else { - final List biallelics = new LinkedList(); + final List biallelics = new LinkedList<>(); for ( final Allele alt : vc.getAlternateAlleles() ) { VariantContextBuilder builder = new VariantContextBuilder(vc); final List alleles = Arrays.asList(vc.getReference(), alt); builder.alleles(alleles); - builder.genotypes(subsetDiploidAlleles(vc, alleles, false)); + builder.genotypes(subsetDiploidAlleles(vc, alleles, genotypeAssignmentMethod)); VariantContextUtils.calculateChromosomeCounts(builder, true); final VariantContext trimmed = trimAlleles(builder.make(), trimLeft, true); biallelics.add(trimmed); diff --git a/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java index fcc7c7998..937698d82 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.variant; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.variant.variantcontext.*; @@ -39,6 +40,7 @@ import org.testng.annotations.Test; import java.util.*; public class GATKVariantContextUtilsUnitTest extends BaseTest { + private final static boolean DEBUG = false; Allele Aref, T, C, G, Cref, ATC, ATCATC; @@ -168,7 +170,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { return MergeAllelesTest.getTests(MergeAllelesTest.class); } - @Test(dataProvider = "mergeAlleles") + @Test(enabled = !DEBUG, dataProvider = "mergeAlleles") public void testMergeAlleles(MergeAllelesTest cfg) { final List inputs = new ArrayList(); @@ -229,7 +231,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { return SimpleMergeRSIDTest.getTests(SimpleMergeRSIDTest.class); } - @Test(dataProvider = "simplemergersiddata") + @Test(enabled = !DEBUG, dataProvider = "simplemergersiddata") public void testRSIDMerge(SimpleMergeRSIDTest cfg) { VariantContext snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T)); final List inputs = new ArrayList(); @@ -352,7 +354,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { return MergeFilteredTest.getTests(MergeFilteredTest.class); } - @Test(dataProvider = "mergeFiltered") + @Test(enabled = !DEBUG, dataProvider = "mergeFiltered") public void testMergeFiltered(MergeFilteredTest cfg) { final List priority = vcs2priority(cfg.inputs); final VariantContext merged = GATKVariantContextUtils.simpleMerge( @@ -479,7 +481,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { return MergeGenotypesTest.getTests(MergeGenotypesTest.class); } - @Test(dataProvider = "mergeGenotypes") + @Test(enabled = !DEBUG, dataProvider = "mergeGenotypes") public void testMergeGenotypes(MergeGenotypesTest cfg) { final VariantContext merged = GATKVariantContextUtils.simpleMerge( cfg.inputs, cfg.priority, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, @@ -517,7 +519,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { } } - @Test + @Test(enabled = !DEBUG) public void testMergeGenotypesUniquify() { final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, -1)); final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, -2)); @@ -547,7 +549,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { // // -------------------------------------------------------------------------------- - @Test + @Test(enabled = !DEBUG) public void testAnnotationSet() { for ( final boolean annotate : Arrays.asList(true, false)) { for ( final String set : Arrays.asList("set", "combine", "x")) { @@ -618,7 +620,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class); } - @Test(dataProvider = "ReverseClippingPositionTestProvider") + @Test(enabled = !DEBUG, dataProvider = "ReverseClippingPositionTestProvider") public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) { int result = GATKVariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes()); Assert.assertEquals(result, cfg.expectedClip); @@ -706,7 +708,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @Test(dataProvider = "SplitBiallelics") + @Test(enabled = !DEBUG, dataProvider = "SplitBiallelics") public void testSplitBiallelicsNoGenotypes(final VariantContext vc, final List expectedBiallelics) { final List biallelics = GATKVariantContextUtils.splitVariantContextToBiallelics(vc); Assert.assertEquals(biallelics.size(), expectedBiallelics.size()); @@ -717,7 +719,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { } } - @Test(dataProvider = "SplitBiallelics", dependsOnMethods = "testSplitBiallelicsNoGenotypes") + @Test(enabled = !DEBUG, dataProvider = "SplitBiallelics", dependsOnMethods = "testSplitBiallelicsNoGenotypes") public void testSplitBiallelicsGenotypes(final VariantContext vc, final List expectedBiallelics) { final List genotypes = new ArrayList(); @@ -745,7 +747,6 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { } } - // -------------------------------------------------------------------------------- // // Test repeats @@ -810,14 +811,14 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { return RepeatDetectorTest.getTests(RepeatDetectorTest.class); } - @Test(dataProvider = "RepeatDetectorTest") + @Test(enabled = !DEBUG, dataProvider = "RepeatDetectorTest") public void testRepeatDetectorTest(RepeatDetectorTest cfg) { // test alleles are equal Assert.assertEquals(GATKVariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat); } - @Test + @Test(enabled = !DEBUG) public void testRepeatAllele() { Allele nullR = Allele.create("A", true); Allele nullA = Allele.create("A", false); @@ -940,7 +941,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @Test(dataProvider = "ForwardClippingData") + @Test(enabled = !DEBUG, dataProvider = "ForwardClippingData") public void testForwardClipping(final List alleleStrings, final int expectedClip) { final List alleles = new LinkedList(); for ( final String alleleString : alleleStrings ) @@ -975,7 +976,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @Test(dataProvider = "ClipAlleleTest") + @Test(enabled = !DEBUG, dataProvider = "ClipAlleleTest") public void testClipAlleles(final List alleleStrings, final List expected, final int numLeftClipped) { final int start = 10; final VariantContext unclipped = GATKVariantContextUtils.makeFromAlleles("test", "20", start, alleleStrings); @@ -1019,7 +1020,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @Test(dataProvider = "PrimitiveAlleleSplittingData") + @Test(enabled = !DEBUG, dataProvider = "PrimitiveAlleleSplittingData") public void testPrimitiveAlleleSplitting(final String ref, final String alt, final int expectedSplit, final List variantPositions) { final int start = 10; @@ -1066,7 +1067,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @Test(dataProvider = "AlleleRemappingData") + @Test(enabled = !DEBUG, dataProvider = "AlleleRemappingData") public void testAlleleRemapping(final Map alleleMap, final int numGenotypes) { final GATKVariantContextUtils.AlleleMapper alleleMapper = new GATKVariantContextUtils.AlleleMapper(alleleMap); @@ -1102,4 +1103,204 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { return gc; } + + // -------------------------------------------------------------------------------- + // + // Test subsetDiploidAlleles + // + // -------------------------------------------------------------------------------- + + @DataProvider(name = "subsetDiploidAllelesData") + public Object[][] makesubsetDiploidAllelesData() { + List tests = new ArrayList<>(); + + final Allele A = Allele.create("A", true); + final Allele C = Allele.create("C"); + final Allele G = Allele.create("G"); + + final List AA = Arrays.asList(A,A); + final List AC = Arrays.asList(A,C); + final List CC = Arrays.asList(C,C); + final List AG = Arrays.asList(A,G); + final List CG = Arrays.asList(C,G); + final List GG = Arrays.asList(G,G); + final List ACG = Arrays.asList(A,C,G); + + final VariantContext vcBase = new VariantContextBuilder("test", "20", 10, 10, AC).make(); + + final double[] homRefPL = MathUtils.normalizeFromRealSpace(new double[]{0.9, 0.09, 0.01}); + final double[] hetPL = MathUtils.normalizeFromRealSpace(new double[]{0.09, 0.9, 0.01}); + final double[] homVarPL = MathUtils.normalizeFromRealSpace(new double[]{0.01, 0.09, 0.9}); + final double[] uninformative = new double[]{0, 0, 0}; + + final Genotype base = new GenotypeBuilder("NA12878").DP(10).GQ(50).make(); + + // make sure we don't screw up the simple case + final Genotype aaGT = new GenotypeBuilder(base).alleles(AA).AD(new int[]{10,2}).PL(homRefPL).GQ(8).make(); + final Genotype acGT = new GenotypeBuilder(base).alleles(AC).AD(new int[]{10,2}).PL(hetPL).GQ(8).make(); + final Genotype ccGT = new GenotypeBuilder(base).alleles(CC).AD(new int[]{10,2}).PL(homVarPL).GQ(8).make(); + + tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(aaGT).make(), AC, Arrays.asList(new GenotypeBuilder(aaGT).noAD().make())}); + tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(acGT).make(), AC, Arrays.asList(new GenotypeBuilder(acGT).noAD().make())}); + tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(ccGT).make(), AC, Arrays.asList(new GenotypeBuilder(ccGT).noAD().make())}); + + // uninformative test case + final Genotype uninformativeGT = new GenotypeBuilder(base).alleles(CC).noAD().PL(uninformative).GQ(0).make(); + final Genotype emptyGT = new GenotypeBuilder(base).alleles(GATKVariantContextUtils.NO_CALL_ALLELES).noAD().noPL().noGQ().make(); + tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(uninformativeGT).make(), AC, Arrays.asList(emptyGT)}); + + // actually subsetting down from multiple alt values + final double[] homRef3AllelesPL = new double[]{0, -10, -20, -30, -40, -50}; + final double[] hetRefC3AllelesPL = new double[]{-10, 0, -20, -30, -40, -50}; + final double[] homC3AllelesPL = new double[]{-20, -10, 0, -30, -40, -50}; + final double[] hetRefG3AllelesPL = new double[]{-20, -10, -30, 0, -40, -50}; + final double[] hetCG3AllelesPL = new double[]{-20, -10, -30, -40, 0, -50}; // AA, AC, CC, AG, CG, GG + final double[] homG3AllelesPL = new double[]{-20, -10, -30, -40, -50, 0}; // AA, AC, CC, AG, CG, GG + tests.add(new Object[]{ + new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).noAD().PL(homRef3AllelesPL).make()).make(), + AC, + Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -10, -20}).noAD().GQ(100).make())}); + + tests.add(new Object[]{ + new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).noAD().PL(hetRefC3AllelesPL).make()).make(), + AC, + Arrays.asList(new GenotypeBuilder(base).alleles(AC).PL(new double[]{-10, 0, -20}).noAD().GQ(100).make())}); + + tests.add(new Object[]{ + new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).noAD().PL(homC3AllelesPL).make()).make(), + AC, + Arrays.asList(new GenotypeBuilder(base).alleles(CC).PL(new double[]{-20, -10, 0}).noAD().GQ(100).make())}); + tests.add(new Object[]{ + new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).noAD().PL(hetRefG3AllelesPL).make()).make(), + AG, + Arrays.asList(new GenotypeBuilder(base).alleles(AG).PL(new double[]{-20, 0, -50}).noAD().GQ(200).make())}); + + // wow, scary -- bad output but discussed with Eric and we think this is the only thing that can be done + tests.add(new Object[]{ + new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).noAD().PL(hetCG3AllelesPL).make()).make(), + AG, + Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -20, -30}).noAD().GQ(200).make())}); + + tests.add(new Object[]{ + new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).noAD().PL(homG3AllelesPL).make()).make(), + AG, + Arrays.asList(new GenotypeBuilder(base).alleles(GG).PL(new double[]{-20, -40, 0}).noAD().GQ(200).make())}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "subsetDiploidAllelesData") + public void testsubsetDiploidAllelesData(final VariantContext inputVC, + final List allelesToUse, + final List expectedGenotypes) { + final GenotypesContext actual = GATKVariantContextUtils.subsetDiploidAlleles(inputVC, allelesToUse, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN); + + Assert.assertEquals(actual.size(), expectedGenotypes.size()); + for ( final Genotype expected : expectedGenotypes ) { + final Genotype actualGT = actual.get(expected.getSampleName()); + Assert.assertNotNull(actualGT); + assertGenotypesAreEqual(actualGT, expected); + } + } + + @DataProvider(name = "UpdateGenotypeAfterSubsettingData") + public Object[][] makeUpdateGenotypeAfterSubsettingData() { + List tests = new ArrayList(); + + final Allele A = Allele.create("A", true); + final Allele C = Allele.create("C"); + final Allele G = Allele.create("G"); + + final List AA = Arrays.asList(A,A); + final List AC = Arrays.asList(A,C); + final List CC = Arrays.asList(C,C); + final List AG = Arrays.asList(A,G); + final List CG = Arrays.asList(C,G); + final List GG = Arrays.asList(G,G); + final List ACG = Arrays.asList(A,C,G); + final List> allSubsetAlleles = Arrays.asList(AC,AG,ACG); + + final double[] homRefPL = new double[]{0.9, 0.09, 0.01}; + final double[] hetPL = new double[]{0.09, 0.9, 0.01}; + final double[] homVarPL = new double[]{0.01, 0.09, 0.9}; + final double[] uninformative = new double[]{0.33, 0.33, 0.33}; + final List allPLs = Arrays.asList(homRefPL, hetPL, homVarPL, uninformative); + + for ( final List alleles : allSubsetAlleles ) { + for ( final double[] pls : allPLs ) { + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL, pls, AA, alleles, GATKVariantContextUtils.NO_CALL_ALLELES}); + } + } + + for ( final List originalGT : Arrays.asList(AA, AC, CC, AG, CG, GG) ) { + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, homRefPL, originalGT, AC, AA}); + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, hetPL, originalGT, AC, AC}); + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, homVarPL, originalGT, AC, CC}); +// tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, uninformative, AA, AC, GATKVariantContextUtils.NO_CALL_ALLELES}); + } + + for ( final double[] pls : allPLs ) { + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AA, AC, AA}); + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AC, AC, AC}); + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CC, AC, CC}); + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CG, AC, AC}); + + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AA, AG, AA}); + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AC, AG, AA}); + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CC, AG, AA}); + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CG, AG, AG}); + + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AA, ACG, AA}); + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AC, ACG, AC}); + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CC, ACG, CC}); + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AG, ACG, AG}); + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CG, ACG, CG}); + tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, GG, ACG, GG}); + } + + return tests.toArray(new Object[][]{}); + } + + @Test(enabled = !DEBUG, dataProvider = "UpdateGenotypeAfterSubsettingData") + public void testUpdateGenotypeAfterSubsetting(final GATKVariantContextUtils.GenotypeAssignmentMethod mode, + final double[] likelihoods, + final List originalGT, + final List allelesToUse, + final List expectedAlleles) { + final GenotypeBuilder gb = new GenotypeBuilder("test"); + final double[] log10Likelhoods = MathUtils.normalizeFromLog10(likelihoods, true, false); + GATKVariantContextUtils.updateGenotypeAfterSubsetting(originalGT, gb, mode, log10Likelhoods, allelesToUse); + final Genotype g = gb.make(); + Assert.assertEquals(new HashSet<>(g.getAlleles()), new HashSet<>(expectedAlleles)); + } + + @Test(enabled = !DEBUG) + public void testSubsetToRef() { + final Map tests = new LinkedHashMap<>(); + + for ( final List alleles : Arrays.asList(Arrays.asList(Aref), Arrays.asList(C), Arrays.asList(Aref, C), Arrays.asList(Aref, C, C) ) ) { + for ( final String name : Arrays.asList("test1", "test2") ) { + final GenotypeBuilder builder = new GenotypeBuilder(name, alleles); + builder.DP(10); + builder.GQ(30); + builder.AD(alleles.size() == 1 ? new int[]{1} : (alleles.size() == 2 ? new int[]{1, 2} : new int[]{1, 2, 3})); + builder.PL(alleles.size() == 1 ? new int[]{1} : (alleles.size() == 2 ? new int[]{1,2} : new int[]{1,2,3})); + final List refs = Collections.nCopies(alleles.size(), Aref); + tests.put(builder.make(), builder.alleles(refs).noAD().noPL().make()); + } + } + + for ( final int n : Arrays.asList(1, 2, 3) ) { + for ( final List genotypes : Utils.makePermutations(new ArrayList<>(tests.keySet()), n, false) ) { + final VariantContext vc = new VariantContextBuilder("test", "20", 1, 1, Arrays.asList(Aref, C)).genotypes(genotypes).make(); + final GenotypesContext gc = GATKVariantContextUtils.subsetToRefOnly(vc, 2); + + Assert.assertEquals(gc.size(), genotypes.size()); + for ( int i = 0; i < genotypes.size(); i++ ) { +// logger.warn("Testing " + genotypes.get(i) + " => " + gc.get(i) + " " + tests.get(genotypes.get(i))); + assertGenotypesAreEqual(gc.get(i), tests.get(genotypes.get(i))); + } + } + } + } } \ No newline at end of file From 6232db3157acdc3a27ba7a21401d605010baf19a Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 13 Jun 2013 15:18:28 -0400 Subject: [PATCH 092/116] Remove STANDARD option from GATKRunReport -- AWS is now the default. Removed old code the referred to the STANDARD type. Deleted unused variables and functions. --- .../arguments/GATKArgumentCollection.java | 4 +- .../sting/gatk/phonehome/GATKRunReport.java | 71 +------------------ .../org/broadinstitute/sting/WalkerTest.java | 2 +- 3 files changed, 5 insertions(+), 72 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index dc3d67283..0b1f341f0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -69,8 +69,8 @@ public class GATKArgumentCollection { // // -------------------------------------------------------------------------------------------------------------- - @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see " + UserException.PHONE_HOME_DOCS_URL + " for details.", required = false) - public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD; + @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? AWS is the default, can be NO_ET so nothing is posted to the run repository. Please see " + UserException.PHONE_HOME_DOCS_URL + " for details.", required = false) + public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.AWS; @Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see " + UserException.PHONE_HOME_DOCS_URL + " for details.", required = false) public File gatkKeyFile = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index 9704454c9..67d72189c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -78,22 +78,6 @@ public class GATKRunReport { private static final DateFormat DATE_FORMAT = new SimpleDateFormat("yyyy/MM/dd HH.mm.ss"); - /** - * The root file system directory where we keep common report data - */ - private final static File REPORT_DIR = new File("/humgen/gsa-hpprojects/GATK/reports"); - - /** - * The full path to the direct where submitted (and uncharacterized) report files are written - */ - private final static File REPORT_SUBMIT_DIR = new File(REPORT_DIR.getAbsolutePath() + "/submitted"); - - /** - * Full path to the sentinel file that controls whether reports are written out. If this file doesn't - * exist, no long will be written - */ - private final static File REPORT_SENTINEL = new File(REPORT_DIR.getAbsolutePath() + "/ENABLE"); - /** * our log */ @@ -181,8 +165,6 @@ public class GATKRunReport { public enum PhoneHomeOption { /** Disable phone home */ NO_ET, - /** Standard option. Writes to local repository if it can be found, or S3 otherwise */ - STANDARD, /** Forces the report to go to S3 */ AWS, /** Force output to STDOUT. For debugging only */ @@ -365,14 +347,9 @@ public class GATKRunReport { switch (type) { case NO_ET: // don't do anything return false; - case STANDARD: case AWS: - if ( type == PhoneHomeOption.STANDARD && repositoryIsOnline() ) { - return postReportToLocalDisk(getLocalReportFullPath()) != null; - } else { - wentToAWS = true; - return postReportToAWSS3() != null; - } + wentToAWS = true; + return postReportToAWSS3() != null; case STDOUT: return postReportToStream(System.out); default: @@ -404,50 +381,6 @@ public class GATKRunReport { } } - /** - * Get the full path as a file where we'll write this report to local disl - * @return a non-null File - */ - @Ensures("result != null") - protected File getLocalReportFullPath() { - return new File(REPORT_SUBMIT_DIR, getReportFileName()); - } - - /** - * Is the local GATKRunReport repository available for writing reports? - * - * @return true if and only if the common run report repository is available and online to receive reports - */ - private boolean repositoryIsOnline() { - return false; // REPORT_SENTINEL.exists(); - } - - - /** - * Main entry point to writing reports to disk. Posts the XML report to the common GATK run report repository. - * If this process fails for any reason, all exceptions are handled and this routine merely prints a warning. - * That is, postReport() is guarenteed not to fail for any reason. - * - * @return the path where the file was written, or null if any failure occurred - */ - @Requires("destination != null") - private File postReportToLocalDisk(final File destination) { - try { - final BufferedOutputStream out = new BufferedOutputStream( - new GZIPOutputStream( - new FileOutputStream(destination))); - postReportToStream(out); - out.close(); - logger.debug("Wrote report to " + destination); - return destination; - } catch ( Exception e ) { - // we catch everything, and no matter what eat the error - exceptDuringRunReport("Couldn't read report file", e); - destination.delete(); - return null; - } - } - // --------------------------------------------------------------------------- // // Code for sending reports to s3 diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java index 40f1f7bcd..422ddbfb0 100644 --- a/public/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java @@ -220,7 +220,7 @@ public class WalkerTest extends BaseTest { String args = this.args; if ( includeImplicitArgs ) { args = args + (ENABLE_PHONE_HOME_FOR_TESTS ? - String.format(" -et %s ", GATKRunReport.PhoneHomeOption.STANDARD) : + String.format(" -et %s ", GATKRunReport.PhoneHomeOption.AWS) : String.format(" -et %s -K %s ", GATKRunReport.PhoneHomeOption.NO_ET, gatkKeyFile)); if ( includeShadowBCF && GENERATE_SHADOW_BCF ) args = args + " --generateShadowBCF "; From 74f311c973820e97c8da36a77095c63e3d098454 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 13 Jun 2013 15:46:16 -0400 Subject: [PATCH 093/116] Emit the GATK version number in the VCF header -- Looks like ##GATKVersion=2.5-159-g3f91d93 in the VCF header line -- delivers [#51595305] --- .../io/stubs/VariantContextWriterStub.java | 13 ++++++++++++ .../gatk/EngineFeaturesIntegrationTest.java | 21 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java index 5c80da214..8b7c4282b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.io.stubs; import net.sf.samtools.SAMSequenceDictionary; import org.broadinstitute.sting.gatk.CommandLineExecutable; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.utils.classloader.JVMUtils; @@ -53,6 +54,7 @@ import java.util.List; * @version 0.1 */ public class VariantContextWriterStub implements Stub, VariantContextWriter { + public final static String GATK_VERSION_KEY = "GATKVersion"; public final static boolean UPDATE_CONTIG_HEADERS = true; /** @@ -225,6 +227,9 @@ public class VariantContextWriterStub implements Stub, Var if ( header.isWriteEngineHeaders() ) { // skip writing the command line header if requested if ( ! skipWritingCommandLineHeader && header.isWriteCommandLine() ) { + // write the GATK version if we have command line information enabled + vcfHeader.addMetaDataLine(getGATKVersionHeaderLine()); + // Check for the command-line argument header line. If not present, add it in. final VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine(); final boolean foundCommandLineHeaderLine = vcfHeader.getMetaDataLine(commandLineArgHeaderLine.getKey()) != null; @@ -284,4 +289,12 @@ public class VariantContextWriterStub implements Stub, Var CommandLineExecutable executable = JVMUtils.getObjectOfType(argumentSources,CommandLineExecutable.class); return new VCFHeaderLine(executable.getAnalysisName(), "\"" + engine.createApproximateCommandLineArgumentString(argumentSources.toArray()) + "\""); } + + /** + * Gets the GATK version header line for the VCF file + * @return non-null VCFHeaderLine. + */ + private VCFHeaderLine getGATKVersionHeaderLine() { + return new VCFHeaderLine(GATK_VERSION_KEY, CommandLineGATK.getVersionNumber()); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index b5b82f869..226224199 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -25,10 +25,12 @@ package org.broadinstitute.sting.gatk; +import org.broad.tribble.readers.AsciiLineReader; import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; +import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.gatk.walkers.ReadWalker; @@ -36,9 +38,15 @@ import org.broadinstitute.sting.gatk.walkers.qc.ErrorThrowing; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.variant.vcf.VCFCodec; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFHeaderLine; +import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.File; +import java.io.FileInputStream; import java.io.PrintStream; import java.util.Arrays; @@ -191,4 +199,17 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { 1, UserException.class); executeTest("badCompress " + compress, spec); } + + @Test(enabled = true) + public void testGATKVersionInVCF() throws Exception { + WalkerTestSpec spec = new WalkerTestSpec("-T UnifiedGenotyper -R " + b37KGReference + " -I " + + privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam" + + " -o %s -L 20:10,000,000", + 1, Arrays.asList("")); + final File vcf = executeTest("testGATKVersionInVCF", spec).first.get(0); + final VCFHeader header = (VCFHeader)new VCFCodec().readHeader(new AsciiLineReader(new FileInputStream(vcf))); + final VCFHeaderLine versionLine = header.getMetaDataLine(VariantContextWriterStub.GATK_VERSION_KEY); + Assert.assertNotNull(versionLine); + Assert.assertEquals(versionLine.getValue(), CommandLineGATK.getVersionNumber()); + } } \ No newline at end of file From f9c986be7448943f11d5d18083b5b1a2fa77acd2 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Thu, 13 Jun 2013 15:30:10 -0400 Subject: [PATCH 094/116] Remove com.sun.javadoc.* dependencies from the GATK proper, and isolate them for doclet use only Problem: Classes in com.sun.javadoc.* are non-standard. Since we can't depend on their availability for all users, the GATK proper should not have any runtime dependencies on this package. Solution: -Isolate com.sun.javadoc.* dependencies in a DocletUtils class for use only by doclets. The only users who need to run our doclets are those who compile from source, and they should be competent enough to figure out how to resolve a missing com.sun.* dependency. -HelpUtils now contains no com.sun.javadoc.* dependencies and can be safely used by walkers/other tools. -Added comments with instructions on when it is safe to use DocletUtils vs. HelpUtils [delivers #51450385] [delivers #50387199] --- .../sting/utils/help/DocletUtils.java | 76 +++++++++++++++++++ .../sting/utils/help/GATKDoclet.java | 2 +- .../help/GenericDocumentationHandler.java | 6 +- .../sting/utils/help/HelpUtils.java | 48 ++---------- .../help/ResourceBundleExtractorDoclet.java | 4 +- .../sting/utils/runtime/ProcessSettings.java | 1 - 6 files changed, 87 insertions(+), 50 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/utils/help/DocletUtils.java diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocletUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/DocletUtils.java new file mode 100644 index 000000000..1e9a37cb7 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocletUtils.java @@ -0,0 +1,76 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.utils.help; + +import com.sun.javadoc.FieldDoc; +import com.sun.javadoc.PackageDoc; +import com.sun.javadoc.ProgramElementDoc; +import org.broadinstitute.sting.utils.classloader.JVMUtils; + +import java.lang.reflect.Field; + +/** + * Methods in the class must ONLY be used by doclets, since the com.sun.javadoc.* classes are not + * available on all systems, and we don't want the GATK proper to depend on them. + */ +public class DocletUtils { + + protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) { + try { + Class type = getClassForDoc(classDoc); + return lhsClass.isAssignableFrom(type) && (!requireConcrete || JVMUtils.isConcrete(type)); + } catch (Throwable t) { + // Ignore errors. + return false; + } + } + + protected static Class getClassForDoc(ProgramElementDoc doc) throws ClassNotFoundException { + return Class.forName(getClassName(doc)); + } + + protected static Field getFieldForFieldDoc(FieldDoc fieldDoc) { + try { + Class clazz = getClassForDoc(fieldDoc.containingClass()); + return JVMUtils.findField(clazz, fieldDoc.name()); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + + /** + * Reconstitute the class name from the given class JavaDoc object. + * + * @param doc the Javadoc model for the given class. + * @return The (string) class name of the given class. + */ + protected static String getClassName(ProgramElementDoc doc) { + PackageDoc containingPackage = doc.containingPackage(); + return containingPackage.name().length() > 0 ? + String.format("%s.%s", containingPackage.name(), doc.name()) : + String.format("%s", doc.name()); + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java index 677bbf2e5..63cb0900a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java @@ -352,7 +352,7 @@ public class GATKDoclet { private Class getClassForClassDoc(ClassDoc doc) { try { // todo -- what do I need the ? extends Object to pass the compiler? - return (Class) HelpUtils.getClassForDoc(doc); + return (Class) DocletUtils.getClassForDoc(doc); } catch (ClassNotFoundException e) { //logger.warn("Couldn't find class for ClassDoc " + doc); // we got a classdoc for a class we can't find. Maybe in a library or something diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java index 1711a3923..02c269495 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java @@ -68,7 +68,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { @Override public boolean includeInDocs(ClassDoc doc) { try { - Class type = HelpUtils.getClassForDoc(doc); + Class type = DocletUtils.getClassForDoc(doc); boolean hidden = !getDoclet().showHiddenFeatures() && type.isAnnotationPresent(Hidden.class); return !hidden && JVMUtils.isConcrete(type); } catch (ClassNotFoundException e) { @@ -157,7 +157,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { root.put("arguments", args); try { // loop over all of the arguments according to the parsing engine - for (final ArgumentSource argumentSource : parsingEngine.extractArgumentSources(HelpUtils.getClassForDoc(toProcess.classDoc))) { + for (final ArgumentSource argumentSource : parsingEngine.extractArgumentSources(DocletUtils.getClassForDoc(toProcess.classDoc))) { // todo -- why can you have multiple ones? ArgumentDefinition argDef = argumentSource.createArgumentDefinitions().get(0); FieldDoc fieldDoc = getFieldDoc(toProcess.classDoc, argumentSource.field.getName()); @@ -663,7 +663,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { if (fieldDoc.name().equals(name)) return fieldDoc; - Field field = HelpUtils.getFieldForFieldDoc(fieldDoc); + Field field = DocletUtils.getFieldForFieldDoc(fieldDoc); if (field == null) throw new RuntimeException("Could not find the field corresponding to " + fieldDoc + ", presumably because the field is inaccessible"); if (field.isAnnotationPresent(ArgumentCollection.class)) { diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java index 9a23fd022..74516672d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java @@ -25,57 +25,20 @@ package org.broadinstitute.sting.utils.help; -import com.sun.javadoc.FieldDoc; -import com.sun.javadoc.PackageDoc; -import com.sun.javadoc.ProgramElementDoc; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.classloader.PluginManager; -import java.lang.reflect.Field; import java.util.List; +/** + * NON-javadoc/doclet help-related utility methods should go here. Anything with a com.sun.javadoc.* dependency + * should go into DocletUtils for use only by doclets. + */ public class HelpUtils { - protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) { - try { - Class type = getClassForDoc(classDoc); - return lhsClass.isAssignableFrom(type) && (!requireConcrete || JVMUtils.isConcrete(type)); - } catch (Throwable t) { - // Ignore errors. - return false; - } - } - - protected static Class getClassForDoc(ProgramElementDoc doc) throws ClassNotFoundException { - return Class.forName(getClassName(doc)); - } - - protected static Field getFieldForFieldDoc(FieldDoc fieldDoc) { - try { - Class clazz = getClassForDoc(fieldDoc.containingClass()); - return JVMUtils.findField(clazz, fieldDoc.name()); - } catch (ClassNotFoundException e) { - throw new RuntimeException(e); - } - } - - /** - * Reconstitute the class name from the given class JavaDoc object. - * - * @param doc the Javadoc model for the given class. - * @return The (string) class name of the given class. - */ - protected static String getClassName(ProgramElementDoc doc) { - PackageDoc containingPackage = doc.containingPackage(); - return containingPackage.name().length() > 0 ? - String.format("%s.%s", containingPackage.name(), doc.name()) : - String.format("%s", doc.name()); - } - /** * Simple method to print a list of available annotations. */ @@ -98,5 +61,4 @@ public class HelpUtils { System.out.println("\t" + c.getSimpleName()); System.out.println(); } - -} \ No newline at end of file +} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java index 0f2383b4b..ac85d7aff 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java @@ -108,7 +108,7 @@ public class ResourceBundleExtractorDoclet { if(isRequiredJavadocMissing(currentClass) && isWalker(currentClass)) undocumentedWalkers.add(currentClass.name()); - renderHelpText(HelpUtils.getClassName(currentClass),currentClass); + renderHelpText(DocletUtils.getClassName(currentClass),currentClass); } for(PackageDoc currentPackage: packages) @@ -173,7 +173,7 @@ public class ResourceBundleExtractorDoclet { * @return True if the class of the given name is a walker. False otherwise. */ protected static boolean isWalker(ClassDoc classDoc) { - return HelpUtils.assignableToClass(classDoc, Walker.class, true); + return DocletUtils.assignableToClass(classDoc, Walker.class, true); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessSettings.java b/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessSettings.java index 8aafd6034..659523641 100644 --- a/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessSettings.java +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessSettings.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.utils.runtime; -import com.sun.corba.se.spi.orbutil.fsm.Input; import java.io.File; import java.util.Map; From d1672926881874f007b3ade7d613b17470e5261c Mon Sep 17 00:00:00 2001 From: David Roazen Date: Fri, 14 Jun 2013 15:30:17 -0400 Subject: [PATCH 097/116] Reduce number of leftover temp files in GATK runs -WalkerTest now deletes *.idx files on exit -ArtificialBAMBuilder now deletes *.bai files on exit -VariantsToBinaryPed walker now deletes its temp files on exit --- .../sting/gatk/walkers/variantutils/VariantsToBinaryPed.java | 1 + .../broadinstitute/sting/utils/sam/ArtificialBAMBuilder.java | 5 +++++ public/java/test/org/broadinstitute/sting/WalkerTest.java | 4 ++++ 3 files changed, 10 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java index 8d16e6ca2..c414b443e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java @@ -176,6 +176,7 @@ public class VariantsToBinaryPed extends RodWalker { // Cut down on memory. try { File temp = File.createTempFile("VariantsToBPed_"+sample, ".tmp"); + temp.deleteOnExit(); printMap.put(sample,new PrintStream(temp)); tempFiles.put(sample,temp); } catch (IOException e) { diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialBAMBuilder.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialBAMBuilder.java index bf3045c71..8d496ab96 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialBAMBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialBAMBuilder.java @@ -182,6 +182,11 @@ public class ArtificialBAMBuilder { try { final File file = File.createTempFile("tempBAM", ".bam"); file.deleteOnExit(); + + // Register the bam index file for deletion on exit as well: + new File(file.getAbsolutePath().replace(".bam", ".bai")).deleteOnExit(); + new File(file.getAbsolutePath() + ".bai").deleteOnExit(); + return makeBAMFile(file); } catch ( IOException e ) { throw new RuntimeException(e); diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java index 422ddbfb0..78f67967b 100644 --- a/public/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java @@ -312,6 +312,10 @@ public class WalkerTest extends BaseTest { for (int i = 0; i < spec.nOutputFiles; i++) { String ext = spec.exts == null ? ".tmp" : "." + spec.exts.get(i); File fl = createTempFile(String.format("walktest.tmp_param.%d", i), ext); + + // Mark corresponding *.idx for deletion on exit as well just in case an index is created for the temp file: + new File(fl.getAbsolutePath() + ".idx").deleteOnExit(); + tmpFiles.add(fl); } From 1677a0a458e83075e2b0c0e14d45f33d39690593 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 14 Jun 2013 15:56:13 -0400 Subject: [PATCH 098/116] Simpler FILTER and info field encoding for BeagleOutputToVCF -- Previous version created FILTERs for each possible alt allele when that site was set to monomorphic by BEAGLE. So if you had a A/C SNP in the original file and beagle thought it was AC=0, then you'd get a record with BGL_RM_WAS_A in the FILTER field. This obviously would cause problems for indels, as so the tool was blowing up in this case. Now beagle sets the filter field to BGL_SET_TO_MONOMORPHIC and sets the info field annotation OriginalAltAllele to A instead. This works in general with any type of allele. -- Here's an example output line from the previous and current versions: old: 20 64150 rs7274499 C . 3041.68 BGL_RM_WAS_A AN=566;DB;DP=1069;Dels=0.00;HRun=0;HaplotypeScore=238.33;LOD=3.5783;MQ=83.74;MQ0=0;NumGenotypesChanged=1;OQ=1949.35;QD=10.95;SB=-6918.88 new: 20 64062 . G . 100.39 BGL_SET_TO_MONOMORPHIC AN=566;DP=1108;Dels=0.00;HRun=2;HaplotypeScore=221.59;LOD=-0.5051;MQ=85.69;MQ0=0;NumGenotypesChanged=1;OQ=189.66;OriginalAltAllele=A;QD=15.81;SB=-6087.15 -- update MD5s to reflect these changes -- [delivers #50847721] --- .../gatk/walkers/beagle/BeagleIntegrationTest.java | 4 ++-- .../gatk/walkers/beagle/BeagleOutputToVCF.java | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java index 69a5fc65f..5601d66fb 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java @@ -62,7 +62,7 @@ public class BeagleIntegrationTest extends WalkerTest { "--beagleR2:BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " + "--beagleProbs:BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " + "--beaglePhased:BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " + - "-o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING", 1, Arrays.asList("c5522304abf0633041c7772dd7dafcea")); + "-o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING", 1, Arrays.asList("989449fa3e262b88ba126867fa3ad9fb")); spec.disableShadowBCF(); executeTest("test BeagleOutputToVCF", spec); } @@ -96,7 +96,7 @@ public class BeagleIntegrationTest extends WalkerTest { "--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+ "--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+ "--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+ - "-L 20:1-70000 -o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING",1,Arrays.asList("d8906b67c7f9fdb5b37b8e9e050982d3")); + "-L 20:1-70000 -o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING",1,Arrays.asList("e036636fcd6a748ede4a70ea47941d47")); spec.disableShadowBCF(); executeTest("testBeagleChangesSitesToRef",spec); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java index 15bd79586..7d5ad9b8a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java @@ -129,6 +129,9 @@ public class BeagleOutputToVCF extends RodWalker { private final double MIN_PROB_ERROR = 0.000001; private final double MAX_GENOTYPE_QUALITY = -6.0; + private final static String BEAGLE_MONO_FILTER_STRING = "BGL_SET_TO_MONOMORPHIC"; + private final static String ORIGINAL_ALT_ALLELE_INFO_KEY = "OriginalAltAllele"; + public void initialize() { // setup the header fields @@ -138,10 +141,8 @@ public class BeagleOutputToVCF extends RodWalker { hInfo.add(new VCFFormatHeaderLine("OG",1, VCFHeaderLineType.String, "Original Genotype input to Beagle")); hInfo.add(new VCFInfoHeaderLine("R2", 1, VCFHeaderLineType.Float, "r2 Value reported by Beagle on each site")); hInfo.add(new VCFInfoHeaderLine("NumGenotypesChanged", 1, VCFHeaderLineType.Integer, "The number of genotypes changed by Beagle")); - hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_A", "This 'A' site was set to monomorphic by Beagle")); - hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_C", "This 'C' site was set to monomorphic by Beagle")); - hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_G", "This 'G' site was set to monomorphic by Beagle")); - hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_T", "This 'T' site was set to monomorphic by Beagle")); + hInfo.add(new VCFInfoHeaderLine(ORIGINAL_ALT_ALLELE_INFO_KEY, 1, VCFHeaderLineType.String, "The original alt allele for a site set to monomorphic by Beagle")); + hInfo.add(new VCFFilterHeaderLine(BEAGLE_MONO_FILTER_STRING, "This site was set to monomorphic by Beagle")); if ( comp.isBound() ) { hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site")); @@ -335,9 +336,8 @@ public class BeagleOutputToVCF extends RodWalker { final VariantContextBuilder builder = new VariantContextBuilder(vc_input).source("outputvcf").genotypes(genotypes); if ( ! ( beagleVarCounts > 0 || DONT_FILTER_MONOMORPHIC_SITES ) ) { - Set removedFilters = vc_input.filtersWereApplied() ? new HashSet(vc_input.getFilters()) : new HashSet(1); - removedFilters.add(String.format("BGL_RM_WAS_%s",vc_input.getAlternateAllele(0))); - builder.alleles(new HashSet(Arrays.asList(vc_input.getReference()))).filters(removedFilters); + builder.attribute(ORIGINAL_ALT_ALLELE_INFO_KEY, vc_input.getAlternateAllele(0)); + builder.alleles(Collections.singleton(vc_input.getReference())).filter(BEAGLE_MONO_FILTER_STRING); } // re-compute chromosome counts From f46f7d9b23d22ac249fddbfacc4e748b61940ac9 Mon Sep 17 00:00:00 2001 From: James Warren Date: Fri, 14 Jun 2013 14:25:16 -0700 Subject: [PATCH 099/116] deducing dictionary path should not use global find and replace Signed-off-by: David Roazen --- .../sting/gatk/datasources/reference/ReferenceDataSource.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java index 01edd44ba..edd3d324c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java @@ -68,8 +68,8 @@ public class ReferenceDataSource { final File indexFile = new File(fastaFile.getAbsolutePath() + ".fai"); // determine the name for the dict file - final String fastaExt = fastaFile.getAbsolutePath().endsWith("fa") ? ".fa" : ".fasta"; - final File dictFile = new File(fastaFile.getAbsolutePath().replace(fastaExt, ".dict")); + final String fastaExt = fastaFile.getAbsolutePath().endsWith("fa") ? "\\.fa$" : "\\.fasta$"; + final File dictFile = new File(fastaFile.getAbsolutePath().replaceAll(fastaExt, ".dict")); // It's an error if either the fai or dict file does not exist. The user is now responsible // for creating these files. From e48f7544785437d52d2149ac82d5750e9c6746ac Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 13 Jun 2013 19:29:08 -0400 Subject: [PATCH 101/116] Fixes to several of the annotations for reduced reads (and other issues). 1. Have the RMSMappingQuality annotation take into account the fact that reduced reads represent multiple reads. 2. The rank sume tests should not be using reduced reads (because they do not represent distinct observations). 3. Fixed a massive bug in the BaseQualityRankSumTest annotation! It was not using the base qualities but rather the read likelihoods?! Added a unit test for Rank Sum Tests to prove that the distributions are correctly getting assigned appropriate p-values. Also, and just as importantly, the test shows that using reduced reads in the rank sum tests skews the results and makes insignificant distributions look significant (so it can falsely cause the filtering of good sites). Also included in this commit is a massive refactor of the RankSumTest class as requested by the reviewer. --- .../annotator/BaseQualityRankSumTest.java | 42 +---- .../annotator/ClippingRankSumTest.java | 31 +--- .../gatk/walkers/annotator/FisherStrand.java | 16 +- .../annotator/MappingQualityRankSumTest.java | 42 +---- .../walkers/annotator/RMSMappingQuality.java | 55 +++---- .../gatk/walkers/annotator/RankSumTest.java | 139 +++++++++++----- .../walkers/annotator/ReadPosRankSumTest.java | 95 ++++------- .../walkers/annotator/RankSumUnitTest.java | 151 ++++++++++++++++++ .../VariantAnnotatorIntegrationTest.java | 6 +- ...perGeneralPloidySuite1IntegrationTest.java | 2 +- ...perGeneralPloidySuite2IntegrationTest.java | 2 +- ...dGenotyperIndelCallingIntegrationTest.java | 16 +- ...GenotyperNormalCallingIntegrationTest.java | 8 +- ...dGenotyperReducedReadsIntegrationTest.java | 6 +- ...lexAndSymbolicVariantsIntegrationTest.java | 6 +- .../HaplotypeCallerIntegrationTest.java | 18 +-- 16 files changed, 373 insertions(+), 262 deletions(-) create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/RankSumUnitTest.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index a3a9e50e9..534834d0e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -47,13 +47,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.sting.utils.genotyper.MostLikelyAllele; -import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.variant.vcf.VCFHeaderLineType; import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.variant.variantcontext.Allele; import java.util.*; @@ -71,37 +69,11 @@ public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnot public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("BaseQRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities")); } - protected void fillQualsFromPileup(final List allAlleles, final int refLoc, - final ReadBackedPileup pileup, - final PerReadAlleleLikelihoodMap alleleLikelihoodMap, - final List refQuals, final List altQuals){ - - if (alleleLikelihoodMap == null) { - // use fast SNP-based version if we don't have per-read allele likelihoods - for ( final PileupElement p : pileup ) { - if ( isUsableBase(p) ) { - if ( allAlleles.get(0).equals(Allele.create(p.getBase(),true)) ) { - refQuals.add((double)p.getQual()); - } else if ( allAlleles.contains(Allele.create(p.getBase()))) { - altQuals.add((double)p.getQual()); - } - } - } - return; - } - - for (Map el : alleleLikelihoodMap.getLikelihoodMapValues()) { - final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el); - if (! a.isInformative()) - continue; // read is non-informative - if (a.getMostLikelyAllele().isReference()) - refQuals.add(-10.0*(double)el.get(a.getMostLikelyAllele())); - else if (allAlleles.contains(a.getMostLikelyAllele())) - altQuals.add(-10.0*(double)el.get(a.getMostLikelyAllele())); - - - } + protected Double getElementForRead(final GATKSAMRecord read, final int refLoc) { + return (double)read.getBaseQualities()[ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, refLoc, ReadUtils.ClippingTail.RIGHT_TAIL)]; } - + protected Double getElementForPileupElement(final PileupElement p) { + return (double)p.getQual(); + } } \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java index 366512119..68e983bb8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java @@ -46,14 +46,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.utils.genotyper.MostLikelyAllele; -import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; +import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.variant.vcf.VCFHeaderLineType; import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.variant.variantcontext.Allele; import java.util.*; @@ -74,26 +71,12 @@ public class ClippingRankSumTest extends RankSumTest { public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("ClippingRankSum", 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases")); } - - protected void fillQualsFromPileup(final List allAlleles, - final int refLoc, - final ReadBackedPileup pileup, - final PerReadAlleleLikelihoodMap likelihoodMap, final List refQuals, final List altQuals) { - // todo - only support non-pileup case for now, e.g. active-region based version - if (pileup != null || likelihoodMap == null) - return; - - for (Map.Entry> el : likelihoodMap.getLikelihoodReadMap().entrySet()) { - - final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()); - if (! a.isInformative()) - continue; // read is non-informative - if (a.getMostLikelyAllele().isReference()) - refQuals.add((double)AlignmentUtils.getNumHardClippedBases(el.getKey())); - else if (allAlleles.contains(a.getMostLikelyAllele())) - altQuals.add((double)AlignmentUtils.getNumHardClippedBases(el.getKey())); - - } + protected Double getElementForRead(final GATKSAMRecord read, final int refLoc) { + return (double)AlignmentUtils.getNumHardClippedBases(read); } + protected Double getElementForPileupElement(final PileupElement p) { + // TODO - we only support the non-pileup case for now, e.g. an active-region based version + return null; + } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 957eb1aba..876dbf039 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -300,7 +300,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat for ( Map.Entry sample : stratifiedContexts.entrySet() ) { for (PileupElement p : sample.getValue().getBasePileup()) { - if ( ! RankSumTest.isUsableBase(p, false) ) // ignore deletions + if ( ! isUsableBase(p) ) // ignore deletions and bad MQ continue; if ( p.getQual() < minQScoreToConsider || p.getMappingQual() < minQScoreToConsider ) @@ -313,6 +313,20 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat return table; } + /** + * Can the base in this pileup element be used in comparative tests? + * + * @param p the pileup element to consider + * + * @return true if this base is part of a meaningful read for comparison, false otherwise + */ + private static boolean isUsableBase(final PileupElement p) { + return !( p.isDeletion() || + p.getMappingQual() == 0 || + p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE || + ((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE); + } + private static void updateTable(final int[][] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt, final int representativeCount) { final boolean matchesRef = allele.equals(ref, true); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index 3873138a2..0ebb09961 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -47,14 +47,10 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.sting.utils.genotyper.MostLikelyAllele; -import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; +import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.variant.vcf.VCFHeaderLineType; import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.variant.variantcontext.Allele; import java.util.*; @@ -73,35 +69,11 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MQRankSum", 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities")); } - protected void fillQualsFromPileup(final List allAlleles, - final int refLoc, - final ReadBackedPileup pileup, - final PerReadAlleleLikelihoodMap likelihoodMap, - final List refQuals, final List altQuals) { - - if (pileup != null && likelihoodMap == null) { - // old UG snp-only path through the annotations - for ( final PileupElement p : pileup ) { - if ( isUsableBase(p) ) { - if ( allAlleles.get(0).equals(Allele.create(p.getBase(), true)) ) { - refQuals.add((double)p.getMappingQual()); - } else if ( allAlleles.contains(Allele.create(p.getBase()))) { - altQuals.add((double)p.getMappingQual()); - } - } - } - return; - } - for (Map.Entry> el : likelihoodMap.getLikelihoodReadMap().entrySet()) { - final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()); - // BUGBUG: There needs to be a comparable isUsableBase check here - if (! a.isInformative()) - continue; // read is non-informative - if (a.getMostLikelyAllele().isReference()) - refQuals.add((double)el.getKey().getMappingQuality()); - else if (allAlleles.contains(a.getMostLikelyAllele())) - altQuals.add((double)el.getKey().getMappingQuality()); - } + protected Double getElementForRead(final GATKSAMRecord read, final int refLoc) { + return (double)read.getMappingQuality(); } - } \ No newline at end of file + protected Double getElementForPileupElement(final PileupElement p) { + return (double)p.getRead().getMappingQuality(); + } +} \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 18348162e..d9bc5966c 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -56,6 +56,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnota import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.variant.vcf.VCFStandardHeaderLines; @@ -77,55 +78,41 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn final Map stratifiedContexts, final VariantContext vc, final Map perReadAlleleLikelihoodMap ) { - int totalSize = 0, index = 0; - int qualities[]; - if (stratifiedContexts != null) { + + final List qualities = new ArrayList<>(); + if ( stratifiedContexts != null ) { if ( stratifiedContexts.size() == 0 ) return null; - for ( AlignmentContext context : stratifiedContexts.values() ) - totalSize += context.size(); - - qualities = new int[totalSize]; - - for ( Map.Entry sample : stratifiedContexts.entrySet() ) { - AlignmentContext context = sample.getValue(); - for (PileupElement p : context.getBasePileup() ) - index = fillMappingQualitiesFromPileupAndUpdateIndex(p.getRead(), index, qualities); + for ( final Map.Entry sample : stratifiedContexts.entrySet() ) { + final AlignmentContext context = sample.getValue(); + for ( final PileupElement p : context.getBasePileup() ) + fillMappingQualitiesFromPileup(p.getRead().getMappingQuality(), p.getRepresentativeCount(), qualities); } } else if (perReadAlleleLikelihoodMap != null) { if ( perReadAlleleLikelihoodMap.size() == 0 ) return null; - for ( PerReadAlleleLikelihoodMap perReadLikelihoods : perReadAlleleLikelihoodMap.values() ) - totalSize += perReadLikelihoods.size(); - - qualities = new int[totalSize]; - for ( PerReadAlleleLikelihoodMap perReadLikelihoods : perReadAlleleLikelihoodMap.values() ) { - for (GATKSAMRecord read : perReadLikelihoods.getStoredElements()) - index = fillMappingQualitiesFromPileupAndUpdateIndex(read, index, qualities); - - - } + for ( final PerReadAlleleLikelihoodMap perReadLikelihoods : perReadAlleleLikelihoodMap.values() ) { + for ( final GATKSAMRecord read : perReadLikelihoods.getStoredElements() ) + fillMappingQualitiesFromPileup(read.getMappingQuality(), (read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1), qualities); + } } else return null; - - - double rms = MathUtils.rms(qualities); - Map map = new HashMap(); - map.put(getKeyNames().get(0), String.format("%.2f", rms)); - return map; + final double rms = MathUtils.rms(qualities); + return Collections.singletonMap(getKeyNames().get(0), (Object)String.format("%.2f", rms)); } - private static int fillMappingQualitiesFromPileupAndUpdateIndex(final GATKSAMRecord read, final int inputIdx, final int[] qualities) { - int outputIdx = inputIdx; - if ( read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE ) - qualities[outputIdx++] = read.getMappingQuality(); - - return outputIdx; + private static void fillMappingQualitiesFromPileup(final int mq, final int representativeCount, final List qualities) { + if ( mq != QualityUtils.MAPPING_QUALITY_UNAVAILABLE ) { + if ( representativeCount == 1 ) + qualities.add(mq); + else + qualities.addAll(Collections.nCopies(representativeCount, mq)); + } } public List getKeyNames() { return Arrays.asList(VCFConstants.RMS_MAPPING_QUALITY_KEY); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index ef456824e..37508fc06 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -53,9 +53,11 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.genotyper.MostLikelyAllele; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MannWhitneyU; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -87,31 +89,33 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR if (genotypes == null || genotypes.size() == 0) return null; - final ArrayList refQuals = new ArrayList(); - final ArrayList altQuals = new ArrayList(); + final ArrayList refQuals = new ArrayList<>(); + final ArrayList altQuals = new ArrayList<>(); for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) { - PerReadAlleleLikelihoodMap indelLikelihoodMap = null; - ReadBackedPileup pileup = null; + boolean usePileup = true; - if (stratifiedContexts != null) { // the old UG SNP-only path through the annotations - final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); - if ( context != null ) - pileup = context.getBasePileup(); + if ( stratifiedPerReadAlleleLikelihoodMap != null ) { + final PerReadAlleleLikelihoodMap likelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName()); + if ( likelihoodMap != null && !likelihoodMap.isEmpty() ) { + fillQualsFromLikelihoodMap(vc.getAlleles(), vc.getStart(), likelihoodMap, refQuals, altQuals); + usePileup = false; + } } - if (stratifiedPerReadAlleleLikelihoodMap != null ) - indelLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName()); - if (indelLikelihoodMap != null && indelLikelihoodMap.isEmpty()) - indelLikelihoodMap = null; - // treat an empty likelihood map as a null reference - will simplify contract with fillQualsFromPileup - if (indelLikelihoodMap == null && pileup == null) - continue; - - fillQualsFromPileup(vc.getAlleles(), vc.getStart(), pileup, indelLikelihoodMap, refQuals, altQuals ); + // the old UG SNP-only path through the annotations + if ( usePileup && stratifiedContexts != null ) { + final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); + if ( context != null ) { + final ReadBackedPileup pileup = context.getBasePileup(); + if ( pileup != null ) + fillQualsFromPileup(vc.getAlleles(), pileup, refQuals, altQuals); + } + } } - if (refQuals.isEmpty() && altQuals.isEmpty()) + + if ( refQuals.isEmpty() && altQuals.isEmpty() ) return null; final MannWhitneyU mannWhitneyU = new MannWhitneyU(useDithering); @@ -136,18 +140,72 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR // we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases) final Pair testResults = mannWhitneyU.runOneSidedTest(MannWhitneyU.USet.SET1); - final Map map = new HashMap(); + final Map map = new HashMap<>(); if (!Double.isNaN(testResults.first)) map.put(getKeyNames().get(0), String.format("%.3f", testResults.first)); return map; } - protected abstract void fillQualsFromPileup(final List alleles, - final int refLoc, - final ReadBackedPileup readBackedPileup, - final PerReadAlleleLikelihoodMap alleleLikelihoodMap, - final List refQuals, - final List altQuals); + private void fillQualsFromPileup(final List alleles, + final ReadBackedPileup pileup, + final List refQuals, + final List altQuals) { + for ( final PileupElement p : pileup ) { + if ( isUsableBase(p) ) { + final Double value = getElementForPileupElement(p); + if ( value == null ) + continue; + + if ( alleles.get(0).equals(Allele.create(p.getBase(), true)) ) + refQuals.add(value); + else if ( alleles.contains(Allele.create(p.getBase())) ) + altQuals.add(value); + } + } + } + + private void fillQualsFromLikelihoodMap(final List alleles, + final int refLoc, + final PerReadAlleleLikelihoodMap likelihoodMap, + final List refQuals, + final List altQuals) { + for ( final Map.Entry> el : likelihoodMap.getLikelihoodReadMap().entrySet() ) { + final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()); + if ( ! a.isInformative() ) + continue; // read is non-informative + + final GATKSAMRecord read = el.getKey(); + if ( isUsableRead(read, refLoc) ) { + final Double value = getElementForRead(read, refLoc); + if ( value == null ) + continue; + + if ( a.getMostLikelyAllele().isReference() ) + refQuals.add(value); + else if ( alleles.contains(a.getMostLikelyAllele()) ) + altQuals.add(value); + } + } + } + + /** + * Get the element for the given read at the given reference position + * + * @param read the read + * @param refLoc the reference position + * @return a Double representing the element to be used in the rank sum test, or null if it should not be used + */ + protected abstract Double getElementForRead(final GATKSAMRecord read, final int refLoc); + + // TODO -- until the ReadPosRankSumTest stops treating these differently, we need to have separate methods for GATKSAMRecords and PileupElements. Yuck. + + /** + * Get the element for the given read at the given reference position + * + * @param p the pileup element + * @return a Double representing the element to be used in the rank sum test, or null if it should not be used + */ + protected abstract Double getElementForPileupElement(final PileupElement p); /** * Can the base in this pileup element be used in comparative tests between ref / alt bases? @@ -157,30 +215,33 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR * @param p the pileup element to consider * @return true if this base is part of a meaningful read for comparison, false otherwise */ - public static boolean isUsableBase(final PileupElement p) { - return isUsableBase(p, false); + protected boolean isUsableBase(final PileupElement p) { + return !(p.isDeletion() || + p.getMappingQual() == 0 || + p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE || + ((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE || // need the unBAQed quality score here + p.getRead().isReducedRead() ); } /** - * Can the base in this pileup element be used in comparative tests between ref / alt bases? + * Can the read be used in comparative tests between ref / alt bases? * - * @param p the pileup element to consider - * @param allowDeletions if true, allow p to be a deletion base - * @return true if this base is part of a meaningful read for comparison, false otherwise + * @param read the read to consider + * @param refLoc the reference location + * @return true if this read is meaningful for comparison, false otherwise */ - public static boolean isUsableBase(final PileupElement p, final boolean allowDeletions) { - return !((! allowDeletions && p.isDeletion()) || - p.getMappingQual() == 0 || - p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE || - ((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE); // need the unBAQed quality score here + protected boolean isUsableRead(final GATKSAMRecord read, final int refLoc) { + return !( read.getMappingQuality() == 0 || + read.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE || + read.isReducedRead() ); } /** * Initialize the rank sum test annotation using walker and engine information. Right now this checks to see if * engine randomization is turned off, and if so does not dither. - * @param walker - * @param toolkit - * @param headerLines + * @param walker the walker + * @param toolkit the GATK engine + * @param headerLines the header lines */ public void initialize ( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set headerLines ) { useDithering = ! toolkit.getArguments().disableDithering; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index 6ce4aab49..37faaed22 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -51,17 +51,13 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.sting.utils.genotyper.MostLikelyAllele; -import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.variant.vcf.VCFHeaderLineType; import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.variant.variantcontext.Allele; import java.util.*; @@ -83,55 +79,34 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio return Arrays.asList(new VCFInfoHeaderLine("ReadPosRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias")); } - protected void fillQualsFromPileup(final List allAlleles, - final int refLoc, - final ReadBackedPileup pileup, - final PerReadAlleleLikelihoodMap alleleLikelihoodMap, - final List refQuals, final List altQuals) { + protected Double getElementForRead(final GATKSAMRecord read, final int refLoc) { + final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true ); + if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) + return null; - if (alleleLikelihoodMap == null) { - // use old UG SNP-based version if we don't have per-read allele likelihoods - for ( final PileupElement p : pileup ) { - if ( isUsableBase(p) && p.getRead().getCigar() != null ) { - int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0); - - readPos = getFinalReadPosition(p.getRead(),readPos); - - if ( allAlleles.get(0).equals(Allele.create(p.getBase(), true)) ) { - refQuals.add((double)readPos); - } else if ( allAlleles.contains(Allele.create(p.getBase()))) { - altQuals.add((double)readPos); - } - } - } - return; - } - - for (Map.Entry> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) { - final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()); - if (! a.isInformative() ) - continue; // read is non-informative - - final GATKSAMRecord read = el.getKey(); - if ( read.getSoftStart() + read.getCigar().getReadLength() <= refLoc ) { // make sure the read actually covers the requested ref loc - continue; - } - final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true ); - if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED || read.getCigar() == null ) - continue; - int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, 0, 0 ); - final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read ); - if (readPos > numAlignedBases / 2) - readPos = numAlignedBases - (readPos + 1); - - if (a.getMostLikelyAllele().isReference()) - refQuals.add((double)readPos); - else if (allAlleles.contains(a.getMostLikelyAllele())) - altQuals.add((double)readPos); - } + int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, 0, 0 ); + final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read ); + if (readPos > numAlignedBases / 2) + readPos = numAlignedBases - (readPos + 1); + return (double)readPos; } - int getFinalReadPosition(GATKSAMRecord read, int initialReadPosition) { + protected Double getElementForPileupElement(final PileupElement p) { + final int offset = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0); + return (double)getFinalReadPosition(p.getRead(), offset); + } + + @Override + protected boolean isUsableBase(final PileupElement p) { + return super.isUsableBase(p) && p.getRead().getCigar() != null; + } + + @Override + protected boolean isUsableRead(final GATKSAMRecord read, final int refLoc) { + return super.isUsableRead(read, refLoc) && read.getSoftStart() + read.getCigar().getReadLength() > refLoc; + } + + private int getFinalReadPosition(final GATKSAMRecord read, final int initialReadPosition) { final int numAlignedBases = getNumAlignedBases(read); int readPos = initialReadPosition; @@ -141,7 +116,8 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio return readPos; } - int getNumClippedBasesAtStart(SAMRecord read) { + + private int getNumClippedBasesAtStart(final SAMRecord read) { // compute total number of clipped bases (soft or hard clipped) // check for hard clips (never consider these bases): final Cigar c = read.getCigar(); @@ -151,8 +127,8 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio if (first.getOperator() == CigarOperator.H) { numStartClippedBases = first.getLength(); } - byte[] unclippedReadBases = read.getReadBases(); - byte[] unclippedReadQuals = read.getBaseQualities(); + final byte[] unclippedReadBases = read.getReadBases(); + final byte[] unclippedReadQuals = read.getBaseQualities(); // Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative, // and may leave a string of Q2 bases still hanging off the reads. @@ -167,11 +143,11 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio return numStartClippedBases; } - int getNumAlignedBases(SAMRecord read) { + private int getNumAlignedBases(final GATKSAMRecord read) { return read.getReadLength() - getNumClippedBasesAtStart(read) - getNumClippedBasesAtEnd(read); } - int getNumClippedBasesAtEnd(SAMRecord read) { + private int getNumClippedBasesAtEnd(final GATKSAMRecord read) { // compute total number of clipped bases (soft or hard clipped) // check for hard clips (never consider these bases): final Cigar c = read.getCigar(); @@ -181,8 +157,8 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio if (last.getOperator() == CigarOperator.H) { numEndClippedBases = last.getLength(); } - byte[] unclippedReadBases = read.getReadBases(); - byte[] unclippedReadQuals = read.getBaseQualities(); + final byte[] unclippedReadBases = read.getReadBases(); + final byte[] unclippedReadQuals = read.getBaseQualities(); // Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative, // and may leave a string of Q2 bases still hanging off the reads. @@ -193,11 +169,6 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio break; } - return numEndClippedBases; } - - int getOffsetFromClippedReadStart(SAMRecord read, int offset) { - return offset - getNumClippedBasesAtStart(read); - } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/RankSumUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/RankSumUnitTest.java new file mode 100644 index 000000000..fec83e1a8 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/RankSumUnitTest.java @@ -0,0 +1,151 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.walkers.compression.reducereads.*; +import org.broadinstitute.sting.gatk.walkers.compression.reducereads.BaseCounts; +import org.broadinstitute.sting.utils.MannWhitneyU; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class RankSumUnitTest { + + List distribution20, distribution30, distribution20_40; + static final int observations = 100; + + @BeforeClass + public void init() { + distribution20 = new ArrayList<>(observations); + distribution30 = new ArrayList<>(observations); + distribution20_40 = new ArrayList<>(observations); + + final int skew = 3; + makeDistribution(distribution20, 20, skew, observations); + makeDistribution(distribution30, 30, skew, observations); + makeDistribution(distribution20_40, 20, skew, observations/2); + makeDistribution(distribution20_40, 40, skew, observations/2); + + // shuffle the observations + Collections.shuffle(distribution20); + Collections.shuffle(distribution30); + Collections.shuffle(distribution20_40); + } + + private static void makeDistribution(final List result, final int target, final int skew, final int numObservations) { + final int rangeStart = target - skew; + final int rangeEnd = target + skew; + + int current = rangeStart; + for ( int i = 0; i < numObservations; i++ ) { + result.add(current++); + if ( current > rangeEnd ) + current = rangeStart; + } + } + + @DataProvider(name = "DistributionData") + public Object[][] makeDistributionData() { + List tests = new ArrayList(); + + for ( final int numToReduce : Arrays.asList(0, 10, 50, 100) ) { + tests.add(new Object[]{distribution20, distribution20, numToReduce, true, "20-20"}); + tests.add(new Object[]{distribution30, distribution30, numToReduce, true, "30-30"}); + tests.add(new Object[]{distribution20_40, distribution20_40, numToReduce, true, "20/40-20/40"}); + + tests.add(new Object[]{distribution20, distribution30, numToReduce, false, "20-30"}); + tests.add(new Object[]{distribution30, distribution20, numToReduce, false, "30-20"}); + + tests.add(new Object[]{distribution20, distribution20_40, numToReduce, false, "20-20/40"}); + tests.add(new Object[]{distribution30, distribution20_40, numToReduce, true, "30-20/40"}); + } + + return tests.toArray(new Object[][]{}); + } + + @Test(enabled = true, dataProvider = "DistributionData") + public void testDistribution(final List distribution1, final List distribution2, final int numToReduceIn2, final boolean distributionsShouldBeEqual, final String debugString) { + final MannWhitneyU mannWhitneyU = new MannWhitneyU(true); + + for ( final Integer num : distribution1 ) + mannWhitneyU.add(num, MannWhitneyU.USet.SET1); + + final List dist2 = new ArrayList<>(distribution2); + if ( numToReduceIn2 > 0 ) { + final org.broadinstitute.sting.gatk.walkers.compression.reducereads.BaseCounts counts = new BaseCounts(); + for ( int i = 0; i < numToReduceIn2; i++ ) { + final int value = dist2.remove(0); + counts.incr(BaseIndex.A, (byte)value, 0, false); + } + + final int qual = (int)counts.averageQualsOfBase(BaseIndex.A); + for ( int i = 0; i < numToReduceIn2; i++ ) + dist2.add(qual); + } + + for ( final Integer num : dist2 ) + mannWhitneyU.add(num, MannWhitneyU.USet.SET2); + + final Double result = mannWhitneyU.runTwoSidedTest().second; + Assert.assertFalse(Double.isNaN(result)); + + if ( distributionsShouldBeEqual ) { + // TODO -- THIS IS THE FAILURE POINT OF USING REDUCED READS WITH RANK SUM TESTS + if ( numToReduceIn2 >= observations / 2 ) + return; + Assert.assertTrue(result > 0.1, String.format("%f %d %d", result, numToReduceIn2, dist2.get(0))); + } else { + Assert.assertTrue(result < 0.01, String.format("%f %d %d", result, numToReduceIn2, dist2.get(0))); + } + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 961a28bcf..e7d7300ae 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -78,7 +78,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("42889072698af972f2004ccfe8eae15e")); + Arrays.asList("823868a4b5b5ec2cdf080c059d04d31a")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -112,7 +112,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("7e755bb09169699b76850e76b71a5f5a")); + Arrays.asList("6f873b3152db291e18e3a04fbce2e117")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -128,7 +128,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testExcludeAnnotations() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("e17596007d0db7673d138a9ae4890e82")); + Arrays.asList("552c2ad9dbfaa85d51d2def93c8229c6")); executeTest("test exclude annotations", spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index c791d08ae..2d36a27d1 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "353c97bfb05a939b3838dc8eee50326b"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "dd28b14d732852bffbba4f22f7697227"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index 1022b6e15..117e54ef8 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -58,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","7e4e1397d5cff68aeba3595e671574fc"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","369ad0ff28bb9ce7974dc2c7343c8737"); } @Test(enabled = true) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index 64a27c4c3..49d429c0d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("14ad6eeed46e9b6f4757370267b1a1cc")); + Arrays.asList("ef8151aa699da3272c1ae0986d16ca21")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -88,7 +88,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("d9572a227ccb13a6baa6dc4fb65bc1e5")); + Arrays.asList("7f88229ccefb74513efb199b61183cb8")); executeTest(String.format("test indel caller in SLX with low min allele count"), spec); } @@ -101,7 +101,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("cd184a2a5a1932dcf3e8f0424652176b")); + Arrays.asList("1928ad48bcd0ca180e046bc235cfb3f4")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -111,7 +111,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("e8d98996eb81ece8cfb52437920ae2e0")); + Arrays.asList("6663e434a7b549f23bfd52db90e53a1a")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -121,7 +121,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("23a78c16f64bffe1dea3a5587fcabdad")); + Arrays.asList("581c552664e536df6d0f102fb0d10e5a")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -136,7 +136,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, - Arrays.asList("facac578891a4f2be63ddd5ba6b9096b")); + Arrays.asList("587bf6bad368ed81189747a84f913ab2")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -176,7 +176,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("e90256acfc360fc4bf377094732a673a")); + Arrays.asList("862d82c8aa35f1da4f9e67b5b48dfe52")); executeTest("test minIndelFraction 0.0", spec); } @@ -184,7 +184,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction25() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.25", 1, - Arrays.asList("98abcfb0a008050eba8b9c285a25b2a0")); + Arrays.asList("8d9fc96be07db791737ac18135de4d63")); executeTest("test minIndelFraction 0.25", spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index f7c5e6fd5..439039f9b 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -64,7 +64,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("474dfb943a307c86cabe2043970c58f3")); + Arrays.asList("a9466c1e3ce1fc4bac83086b25a6df54")); executeTest("test MultiSample Pilot1", spec); } @@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("f576d86656cc37c0a869c7ac911f4c7c")); + Arrays.asList("70a21812d4dd6b72c44f60c74d508d5b")); executeTest("test Multiple SNP alleles", spec); } @@ -112,7 +112,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("94d7a907fdca7e8c9fd6bb8a87b2bab2")); + Arrays.asList("f3da1ff1e49a831af055ca52d6d07dd7")); executeTest("test reverse trim", spec); } @@ -120,7 +120,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMismatchedPLs() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, - Arrays.asList("94bfccbd06043e90ae1b1c66fc3afe07")); + Arrays.asList("20ff311f363c51b7385a76f6f296759c")); executeTest("test mismatched PLs", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java index b9830de8e..33810e255 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java @@ -63,18 +63,18 @@ public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest { public void testReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("e6565060b44a7804935973efcd56e596")); + Arrays.asList("ffde0d5e23523e4bd9e7e18f62d37d0f")); executeTest("test calling on a ReducedRead BAM", spec); } @Test public void testReducedBamSNPs() { - testReducedCalling("SNP", "ab776d74c41ce2b859e2b2466a76204a"); + testReducedCalling("SNP", "e8de8c523751ad2fa2ee20185ba5dea7"); } @Test public void testReducedBamINDELs() { - testReducedCalling("INDEL", "22110b001e2d3dd45d7872334086b2b9"); + testReducedCalling("INDEL", "4b4902327fb132f9aaab3dd5ace934e1"); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index c1b8f8a70..0636d7c1b 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "03944bbedb012e2ac2026a84baa0560c"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "4a3479fc4ad387d381593b328f737a1b"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "321dc9f3d330790bac7981ffae00cb0c"); + "b7a01525c00d02b3373513a668a43c6a"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "7e9f99d4cba8087dac66ea871b910d7e"); + "a2a42055b068334f415efb07d6bb9acd"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index da92f39fc..aca1172d4 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -78,12 +78,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "09d84bc1aef2dd9c185934752172b794"); + HCTest(CEUTRIO_BAM, "", "baa5a2eedc8f06ce9f8f98411ee09f8a"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "5c074930b27d1f5c942fe755c2a8be27"); + HCTest(NA12878_BAM, "", "f09e03d41238697b23f95716a12667cb"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -94,7 +94,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "005a6d1933913a5d96fc56d01303fa95"); + "130d36448faeb7b8d4bce4be12dacd3a"); } @Test @@ -110,7 +110,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "9b6f667ad87e19c38d16fefe63c37484"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "7c20aa62633f4ce8ebf12950fbf05ec0"); } private void HCTestNearbySmallIntervals(String bam, String args, String md5) { @@ -147,7 +147,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerNearbySmallIntervals() { - HCTestNearbySmallIntervals(NA12878_BAM, "", "6e170d03047caefc2fba3f1c1f8de132"); + HCTestNearbySmallIntervals(NA12878_BAM, "", "0ddc56f0a0fbcfefda79aa20b2ecf603"); } // This problem bam came from a user on the forum and it spotted a problem where the ReadClipper @@ -186,7 +186,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("a47ef09a8701128cfb301a83b7bb0728")); + Arrays.asList("5fe9310addf881bed4fde2354e59ce34")); executeTest("HC calling on a ReducedRead BAM", spec); } @@ -194,7 +194,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("0cb99f6bb3e630add4b3486c496fa508")); + Arrays.asList("26a9917f6707536636451266de0116c3")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } @@ -208,7 +208,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("92f947cc89e4f50cf2ef3121d2fe308d")); + Arrays.asList("cc6f2a76ee97ecc14a5f956ffbb21d88")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -217,7 +217,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132 + " -L " + hg19Intervals + " -isr INTERSECTION", 1, - Arrays.asList("91877c8ea3eb0e0316d9ad11fdcc1a87")); + Arrays.asList("51e91c8af61a6b47807165906baefb00")); executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec); } } From b69d210255324d80cfca3986849a716492c76d1e Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 17 Jun 2013 10:50:07 -0400 Subject: [PATCH 102/116] Bugfix: allow gzip VCF output in multi-threaded GATK output -- VariantContextWriterStorage was gzipping the intermediate files that would be merged in, but the mergeInto function couldn't read those outputs, and we'd throw a very strange error. Now tmp. VCFs aren't compressed, even if the final VCF is. Added integrationtest to ensure this behavior works going forward. -- [delivers #47399279] --- .../storage/VariantContextWriterStorage.java | 24 +++++++++++++++---- .../gatk/EngineFeaturesIntegrationTest.java | 15 ++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java index 84709d6d8..80841bae7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java @@ -67,12 +67,16 @@ public class VariantContextWriterStorage implements Storage 0); + } } \ No newline at end of file From f6025d25aeabd7c52ef89c6202438f0a40199ee1 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 16 May 2013 10:04:11 -0400 Subject: [PATCH 103/116] Feature requested by Reich lab and Paavo lab in Leipzig for ancient DNA processing: -- When doing cross-species comparisons and studying population history and ancient DNA data, having SOME measure of confidence is needed at every single site that doesn't depend on the reference base, even in a naive per-site SNP mode. Old versions of GATK provided GQ and some wrong PL values at reference sites but these were wrong. This commit addresses this need by adding a new UG command line argument, -allSitePLs, that, if enabled will: a) Emit all 3 ALT snp alleles in the ALT column. b) Emit all corresponding 10 PL values. It's up to the user to process these PL values downstream to make sense of these. Note that, in order to follow VCF spec, the QUAL field in a reference call when there are non-null ALT alleles present will be zero, so QUAL will be useless and filtering will need to be done based on other fields. -- Tweaks and fixes to processing pipelines for Reich lab. --- .../SNPGenotypeLikelihoodsCalculationModel.java | 12 ++++++++++-- .../genotyper/UnifiedArgumentCollection.java | 17 ++++++++++++++++- .../genotyper/UnifiedGenotyperEngine.java | 12 ++++++++++-- .../UnifiedGenotyperIntegrationTest.java | 8 ++++++++ .../picard/CollectGcBiasMetrics.scala | 3 +-- 5 files changed, 45 insertions(+), 7 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index ce5f94478..360f88e51 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -147,9 +147,17 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC // if we only want variants, then we don't need to calculate genotype likelihoods if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY ) return builder.make(); + // if user requires all PLs at all sites, add all possible alt alleles + else if (UAC.annotateAllSitesWithPLs) { + for ( final byte base : BaseUtils.BASES ) { + if ( base != refBase ) + alleles.add(Allele.create(base)); + } + } - // otherwise, choose any alternate allele (it doesn't really matter) - alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(indexOfRefBase == 0 ? 1 : 0))); + else + // otherwise, choose any alternate allele (it doesn't really matter) + alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(indexOfRefBase == 0 ? 1 : 0))); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index e346b10b7..b96b5733f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -52,6 +52,9 @@ import org.broadinstitute.sting.utils.pairhmm.PairHMM; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.VariantContext; +import java.util.Collections; +import java.util.List; + public class UnifiedArgumentCollection extends StandardCallerArgumentCollection { @Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false) @@ -95,6 +98,18 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection @Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false) public Double MAX_DELETION_FRACTION = 0.05; + /** + * Advanced, experimental argument: if SNP likelihood model is specified, and if EMIT_ALL_SITES output mode is set, when we set this argument then we will also emit PLs at all sites. + * This will give a measure of reference confidence and a measure of which alt alleles are more plausible (if any). + * WARNINGS: + * - This feature will inflate VCF file size considerably. + * - All SNP ALT alleles will be emitted with corresponding 10 PL values. + * - An error will be emitted if EMIT_ALL_SITES is not set, or if anything other than diploid SNP model is used + */ + @Advanced + @Argument(fullName = "allSitePLs", shortName = "allSitePLs", doc = "Annotate all sites with PLs", required = false) + public boolean annotateAllSitesWithPLs = false; + // indel-related arguments /** * A candidate indel is genotyped (and potentially called) if there are this number of reads with a consensus indel at a site. @@ -247,7 +262,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection this.EXCLUDE_FILTERED_REFERENCE_SITES = uac.EXCLUDE_FILTERED_REFERENCE_SITES; this.IGNORE_LANE_INFO = uac.IGNORE_LANE_INFO; this.pairHMM = uac.pairHMM; - + this.annotateAllSitesWithPLs = uac.annotateAllSitesWithPLs; // todo- arguments to remove this.IGNORE_SNP_ALLELES = uac.IGNORE_SNP_ALLELES; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 3d9f75d45..9f3368cf8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -168,6 +168,13 @@ public class UnifiedGenotyperEngine { filter.add(LOW_QUAL_FILTER_NAME); determineGLModelsToUse(); + + // do argument checking + if (UAC.annotateAllSitesWithPLs) { + if (!modelsToUse.contains(GenotypeLikelihoodsCalculationModel.Model.SNP)) + throw new IllegalArgumentException("Invalid genotype likelihood model specification: Only diploid SNP model can be used in conjunction with option allSitePLs"); + + } } /** @@ -439,7 +446,8 @@ public class UnifiedGenotyperEngine { bestGuessIsRef = false; } // if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele - else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { + else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES || + UAC.annotateAllSitesWithPLs) { myAlleles.add(alternateAllele); alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele)); } @@ -449,7 +457,7 @@ public class UnifiedGenotyperEngine { // note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice final double phredScaledConfidence = - Math.abs(! bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES + Math.abs(! bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES || UAC.annotateAllSitesWithPLs ? -10 * AFresult.getLog10PosteriorOfAFEq0() : -10 * AFresult.getLog10PosteriorOfAFGT0()); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 300d7f5da..3eb9b4e1c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -156,6 +156,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { } + @Test + public void emitPLsAtAllSites() { + WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --output_mode EMIT_ALL_SITES -allSitePLs", 1, + Arrays.asList("7cc55db8693759e059a05bc4398f6f69")); + executeTest("test all site PLs 1", spec1); + + } // -------------------------------------------------------------------------------------------------------------- // // testing heterozygosity diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala index 5d887016e..7c4c3f26a 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala @@ -52,6 +52,5 @@ class CollectGcBiasMetrics extends org.broadinstitute.sting.queue.function.JavaC override def commandLine = super.commandLine + required("SUMMARY_OUTPUT=" + output) + required("CHART_OUTPUT=" + output+".pdf") + - required("REFERENCE_SEQUENCE=" + reference) + - required("ASSUME_SORTED=true") + required("REFERENCE_SEQUENCE=" + reference) } From 7b22467148e7a8851323402d93b9e357a5b11fc8 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 17 Jun 2013 13:35:04 -0400 Subject: [PATCH 104/116] Bugfix: defaultBaseQualities actually works now -- It was being applied in the wrong order (after the first call to the underlying MalformedReadFilter) so if your first read was malformed you'd blow up there instead of being fixed properly. Added integration tests to ensure this continues to work. -- [delivers #49538319] --- .../arguments/GATKArgumentCollection.java | 12 ++++---- .../gatk/datasources/reads/SAMDataSource.java | 8 +++--- .../gatk/filters/MalformedReadFilter.java | 5 +++- .../gatk/EngineFeaturesIntegrationTest.java | 28 +++++++++++++++++++ 4 files changed, 42 insertions(+), 11 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 0b1f341f0..b5113fdea 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -187,6 +187,12 @@ public class GATKArgumentCollection { @Argument(fullName = "allow_potentially_misencoded_quality_scores", shortName="allowPotentiallyMisencodedQuals", doc="Do not fail when encountering base qualities that are too high and that seemingly indicate a problem with the base quality encoding of the BAM file", required = false) public boolean ALLOW_POTENTIALLY_MISENCODED_QUALS = false; + @Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false) + public Boolean useOriginalBaseQualities = false; + + @Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false) + public byte defaultBaseQualities = -1; + // -------------------------------------------------------------------------------------------------------------- // // performance log arguments @@ -201,9 +207,6 @@ public class GATKArgumentCollection { @Argument(fullName = "performanceLog", shortName="PF", doc="If provided, a GATK runtime performance log will be written to this file", required = false) public File performanceLog = null; - @Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false) - public Boolean useOriginalBaseQualities = false; - // -------------------------------------------------------------------------------------------------------------- // // BQSR arguments @@ -267,9 +270,6 @@ public class GATKArgumentCollection { // // -------------------------------------------------------------------------------------------------------------- - @Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false) - public byte defaultBaseQualities = -1; - @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false) public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index bf25582ab..2f934e8df 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -630,6 +630,10 @@ public class SAMDataSource { // * (otherwise we will process something that we may end up throwing away) * // // ************************************************************************************************ // + if (useOriginalBaseQualities || defaultBaseQualities >= 0) + // only wrap if we are replacing the original qualities or using a default base quality + wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities); + // Filters: wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters)); @@ -654,10 +658,6 @@ public class SAMDataSource { if (!noValidationOfReadOrder && enableVerification) wrappedIterator = new VerifyingSamIterator(wrappedIterator); - if (useOriginalBaseQualities || defaultBaseQualities >= 0) - // only wrap if we are replacing the original qualities or using a default base quality - wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities); - // set up read transformers for ( final ReadTransformer readTransformer : readTransformers ) { if ( readTransformer.enabled() && readTransformer.getApplicationTime() == ReadTransformer.ApplicationTime.ON_INPUT ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java index a15870a22..3167ba139 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java @@ -234,7 +234,10 @@ public class MalformedReadFilter extends ReadFilter { else if (filterMismatchingBaseAndQuals) result = false; else - throw new UserException.MalformedBAM(read, String.format("BAM file has a read with mismatching number of bases and base qualities. Offender: %s [%d bases] [%d quals]", read.getReadName(), read.getReadLength(), read.getBaseQualities().length)); + throw new UserException.MalformedBAM(read, + String.format("BAM file has a read with mismatching number of bases and base qualities. Offender: %s [%d bases] [%d quals].%s", + read.getReadName(), read.getReadLength(), read.getBaseQualities().length, + read.getBaseQualities().length == 0 ? " You can use --defaultBaseQualities to assign a default base quality for all reads, but this can be dangerous in you don't know what you are doing." : "")); return result; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index 736989418..fe30b60fd 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -227,4 +227,32 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { nLines++; Assert.assertTrue(nLines > 0); } + + // -------------------------------------------------------------------------------- + // + // Test that defaultBaseQualities actually works + // + // -------------------------------------------------------------------------------- + + public WalkerTestSpec testDefaultBaseQualities(final Integer value, final String md5) { + return new WalkerTestSpec("-T PrintReads -R " + b37KGReference + " -I " + privateTestDir + "/baseQualitiesToFix.bam -o %s" + + (value != null ? " --defaultBaseQualities " + value : ""), + 1, Arrays.asList(md5)); + } + + @Test() + public void testDefaultBaseQualities20() { + executeTest("testDefaultBaseQualities20", testDefaultBaseQualities(20, "7d254a9d0ec59c66ee3e137f56f4c78f")); + } + + @Test() + public void testDefaultBaseQualities30() { + executeTest("testDefaultBaseQualities30", testDefaultBaseQualities(30, "0f50def6cbbbd8ccd4739e2b3998e503")); + } + + @Test(expectedExceptions = Exception.class) + public void testDefaultBaseQualitiesNoneProvided() { + executeTest("testDefaultBaseQualitiesNoneProvided", testDefaultBaseQualities(null, "")); + } + } \ No newline at end of file From cb5b1c3c343bc3a02764746f66d91ff7bb2e9975 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 17 Jun 2013 16:03:45 -0300 Subject: [PATCH 105/116] Create README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 000000000..13b3c0c6e --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +gsa-unstable +============ +See http://www.broadinstitute.org/gatk/ From 8511c4385c3b47d0568d6fe404086ddcae5cd8ee Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 17 Jun 2013 14:02:54 -0400 Subject: [PATCH 106/116] Adding new pruning parameter to ReadThreadingAssembler -- numPruningSamples allows one to specify that the minPruning factor must be met by this many samples for a path to be considered good (e.g. seen twice in three samples). By default this is just one sample. -- adding unit test to test this new functionality --- .../haplotypecaller/HaplotypeCaller.java | 8 ++-- .../graphs/MultiSampleEdge.java | 39 ++++++++++-------- .../readthreading/ReadThreadingAssembler.java | 8 ++-- .../readthreading/ReadThreadingGraph.java | 33 ++++++++++----- .../graphs/MultiSampleEdgeUnitTest.java | 40 ++++++++++++------- 5 files changed, 80 insertions(+), 48 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index b94b74748..9b9c3924b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -80,8 +80,6 @@ import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.fragments.FragmentCollection; -import org.broadinstitute.sting.utils.fragments.FragmentUtils; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.haplotype.*; import org.broadinstitute.sting.utils.haplotypeBAMWriter.HaplotypeBAMWriter; @@ -270,6 +268,10 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @Argument(fullName="dontIncreaseKmerSizesForCycles", shortName="dontIncreaseKmerSizesForCycles", doc="Should we disable the iterating over kmer sizes when graph cycles are detected?", required = false) protected boolean dontIncreaseKmerSizesForCycles = false; + @Advanced + @Argument(fullName="numPruningSamples", shortName="numPruningSamples", doc="The number of samples that must pass the minPuning factor in order for the path to be kept", required = false) + protected int numPruningSamples = 1; + /** * Assembly graph can be quite complex, and could imply a very large number of possible haplotypes. Each haplotype * considered requires N PairHMM evaluations if there are N reads across all samples. In order to control the @@ -539,7 +541,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In final int maxAllowedPathsForReadThreadingAssembler = Math.max(maxPathsPerSample * nSamples, MIN_PATHS_PER_GRAPH); assemblyEngine = useDebruijnAssembler ? new DeBruijnAssembler(minKmerForDebruijnAssembler, onlyUseKmerSizeForDebruijnAssembler) - : new ReadThreadingAssembler(maxAllowedPathsForReadThreadingAssembler, kmerSizes, dontIncreaseKmerSizesForCycles); + : new ReadThreadingAssembler(maxAllowedPathsForReadThreadingAssembler, kmerSizes, dontIncreaseKmerSizesForCycles, numPruningSamples); assemblyEngine.setErrorCorrectKmers(errorCorrectKmers); assemblyEngine.setPruneFactor(MIN_PRUNE_FACTOR); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java index c1937e5c8..978d83eb4 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java @@ -46,6 +46,8 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; +import java.util.PriorityQueue; + /** * edge class for connecting nodes in the graph that tracks some per-sample information * @@ -63,32 +65,43 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; * e.getPruningMultiplicity() // = 3 */ public class MultiSampleEdge extends BaseEdge { - private int maxSingleSampleMultiplicity, currentSingleSampleMultiplicity; + private int currentSingleSampleMultiplicity; + private final int singleSampleCapacity; + private final PriorityQueue singleSampleMultiplicities; /** * Create a new MultiSampleEdge with weight multiplicity and, if isRef == true, indicates a path through the reference * * @param isRef indicates whether this edge is a path through the reference * @param multiplicity the number of observations of this edge in this sample + * @param singleSampleCapacity the max number of samples to track edge multiplicities */ - public MultiSampleEdge(final boolean isRef, final int multiplicity) { + public MultiSampleEdge(final boolean isRef, final int multiplicity, final int singleSampleCapacity) { super(isRef, multiplicity); - maxSingleSampleMultiplicity = multiplicity; + + if( singleSampleCapacity <= 0 ) { throw new IllegalArgumentException("singleSampleCapacity must be > 0 but found: " + singleSampleCapacity); } + singleSampleMultiplicities = new PriorityQueue<>(singleSampleCapacity); + singleSampleMultiplicities.add(multiplicity); currentSingleSampleMultiplicity = multiplicity; + this.singleSampleCapacity = singleSampleCapacity; } @Override public MultiSampleEdge copy() { - return new MultiSampleEdge(isRef(), getMultiplicity()); // TODO -- should I copy values for other features? + return new MultiSampleEdge(isRef(), getMultiplicity(), singleSampleCapacity); // TODO -- should I copy values for other features? } /** - * update the max single sample multiplicity based on the current single sample multiplicity, and + * update the single sample multiplicities by adding the current single sample multiplicity to the priority queue, and * reset the current single sample multiplicity to 0. */ public void flushSingleSampleMultiplicity() { - if ( currentSingleSampleMultiplicity > maxSingleSampleMultiplicity ) - maxSingleSampleMultiplicity = currentSingleSampleMultiplicity; + singleSampleMultiplicities.add(currentSingleSampleMultiplicity); + if( singleSampleMultiplicities.size() == singleSampleCapacity + 1 ) { + singleSampleMultiplicities.poll(); // remove the lowest multiplicity from the list + } else if( singleSampleMultiplicities.size() > singleSampleCapacity + 1 ) { + throw new IllegalStateException("Somehow the per sample multiplicity list has grown too big: " + singleSampleMultiplicities); + } currentSingleSampleMultiplicity = 0; } @@ -100,20 +113,12 @@ public class MultiSampleEdge extends BaseEdge { @Override public int getPruningMultiplicity() { - return getMaxSingleSampleMultiplicity(); + return singleSampleMultiplicities.peek(); } @Override public String getDotLabel() { - return super.getDotLabel() + "/" + getMaxSingleSampleMultiplicity(); - } - - /** - * Get the maximum multiplicity for this edge seen in any single sample - * @return an integer >= 0 - */ - public int getMaxSingleSampleMultiplicity() { - return maxSingleSampleMultiplicity; + return super.getDotLabel() + "/" + getPruningMultiplicity(); } /** only provided for testing purposes */ diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java index fc0f781c5..672c61c0f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java @@ -71,6 +71,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { private final int maxAllowedPathsForReadThreadingAssembler; private final boolean dontIncreaseKmerSizesForCycles; + private final int numPruningSamples; private boolean requireReasonableNumberOfPaths = false; protected boolean removePathsNotConnectedToRef = true; private boolean justReturnRawGraph = false; @@ -80,15 +81,16 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { this(DEFAULT_NUM_PATHS_PER_GRAPH, Arrays.asList(25)); } - public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List kmerSizes, final boolean dontIncreaseKmerSizesForCycles) { + public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List kmerSizes, final boolean dontIncreaseKmerSizesForCycles, final int numPruningSamples) { super(maxAllowedPathsForReadThreadingAssembler); this.kmerSizes = kmerSizes; this.maxAllowedPathsForReadThreadingAssembler = maxAllowedPathsForReadThreadingAssembler; this.dontIncreaseKmerSizesForCycles = dontIncreaseKmerSizesForCycles; + this.numPruningSamples = numPruningSamples; } public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List kmerSizes) { - this(maxAllowedPathsForReadThreadingAssembler, kmerSizes, true); + this(maxAllowedPathsForReadThreadingAssembler, kmerSizes, true, 1); } /** for testing purposes */ @@ -139,7 +141,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { final int kmerSize, final List activeAlleleHaplotypes, final boolean allowLowComplexityGraphs) { - final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly); + final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly, numPruningSamples); // add the reference sequence to the graph rtgraph.addSequence("ref", refHaplotype.getBases(), null, true); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java index 0844f979b..7d7df2c06 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java @@ -67,13 +67,24 @@ import java.util.*; public class ReadThreadingGraph extends BaseGraph { /** - * Edge factory that creates non-reference multiplicity 1 edges + * Edge factory that encapsulates the numPruningSamples assembly parameter */ private static class MyEdgeFactory implements EdgeFactory { - @Override - public MultiSampleEdge createEdge(MultiDeBruijnVertex sourceVertex, MultiDeBruijnVertex targetVertex) { - return new MultiSampleEdge(false, 1); + final int numPruningSamples; + + public MyEdgeFactory(int numPruningSamples) { + this.numPruningSamples = numPruningSamples; } + + @Override + public MultiSampleEdge createEdge(final MultiDeBruijnVertex sourceVertex, final MultiDeBruijnVertex targetVertex) { + return new MultiSampleEdge(false, 1, numPruningSamples); + } + + public MultiSampleEdge createEdge(final boolean isRef, final int multiplicity) { + return new MultiSampleEdge(isRef, multiplicity, numPruningSamples); + } + } private final static Logger logger = Logger.getLogger(ReadThreadingGraph.class); @@ -88,7 +99,7 @@ public class ReadThreadingGraph extends BaseGraph> pending = new LinkedHashMap>(); + private final Map> pending = new LinkedHashMap<>(); /** * A set of non-unique kmers that cannot be used as merge points in the graph @@ -117,19 +128,19 @@ public class ReadThreadingGraph extends BaseGraph= 1 */ - protected ReadThreadingGraph(final int kmerSize, final boolean debugGraphTransformations, final byte minBaseQualityToUseInAssembly) { - super(kmerSize, new MyEdgeFactory()); + protected ReadThreadingGraph(final int kmerSize, final boolean debugGraphTransformations, final byte minBaseQualityToUseInAssembly, final int numPruningSamples) { + super(kmerSize, new MyEdgeFactory(numPruningSamples)); if ( kmerSize < 1 ) throw new IllegalArgumentException("bad minkKmerSize " + kmerSize); this.kmerSize = kmerSize; @@ -324,7 +335,7 @@ public class ReadThreadingGraph extends BaseGraph countsPerSample; + final int numSamplesPruning; + public MultiplicityTestProvider(final List countsPerSample, final int numSamplesPruning) { + this.countsPerSample = countsPerSample; + this.numSamplesPruning = numSamplesPruning; + } + } + @DataProvider(name = "MultiplicityData") public Object[][] makeMultiplicityData() { - List tests = new ArrayList(); + List tests = new ArrayList<>(); final List countsPerSample = Arrays.asList(0, 1, 2, 3, 4, 5); - for ( final int nSamples : Arrays.asList(1, 2, 3, 4, 5)) { - for ( final List perm : Utils.makePermutations(countsPerSample, nSamples, false) ) { - tests.add(new Object[]{perm}); + for ( final int numSamplesPruning : Arrays.asList(1, 2, 3) ) { + for ( final int nSamples : Arrays.asList(1, 2, 3, 4, 5)) { + for ( final List perm : Utils.makePermutations(countsPerSample, nSamples, false) ) { + tests.add(new Object[]{new MultiplicityTestProvider(perm, numSamplesPruning)}); + } } } @@ -77,15 +87,15 @@ public class MultiSampleEdgeUnitTest extends BaseTest { * Example testng test using MyDataProvider */ @Test(dataProvider = "MultiplicityData") - public void testMultiplicity(final List countsPerSample) { - final MultiSampleEdge edge = new MultiSampleEdge(false, 0); + public void testMultiplicity(final MultiplicityTestProvider cfg) { + final MultiSampleEdge edge = new MultiSampleEdge(false, 0, cfg.numSamplesPruning); Assert.assertEquals(edge.getMultiplicity(), 0); Assert.assertEquals(edge.getPruningMultiplicity(), 0); int total = 0; - for ( int i = 0; i < countsPerSample.size(); i++ ) { + for ( int i = 0; i < cfg.countsPerSample.size(); i++ ) { int countForSample = 0; - for ( int count = 0; count < countsPerSample.get(i); count++ ) { + for ( int count = 0; count < cfg.countsPerSample.get(i); count++ ) { edge.incMultiplicity(1); total++; countForSample++; @@ -95,9 +105,11 @@ public class MultiSampleEdgeUnitTest extends BaseTest { edge.flushSingleSampleMultiplicity(); } - final int max = MathUtils.arrayMax(ArrayUtils.toPrimitive(countsPerSample.toArray(new Integer[countsPerSample.size()]))); + ArrayList counts = new ArrayList<>(cfg.countsPerSample); + counts.add(0); + Collections.sort(counts); + final int prune = counts.get(Math.max(counts.size() - cfg.numSamplesPruning, 0)); Assert.assertEquals(edge.getMultiplicity(), total); - Assert.assertEquals(edge.getPruningMultiplicity(), max); - Assert.assertEquals(edge.getMaxSingleSampleMultiplicity(), max); + Assert.assertEquals(edge.getPruningMultiplicity(), prune); } } From f176c854c684dd2412b59c0767d344e62918d0be Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 13 Jun 2013 13:27:06 -0400 Subject: [PATCH 107/116] Swapping in logless Pair HMM for default usage with UG: -- Changed default HMM model. -- Removed check. -- Changed md5's: PL's in the high 100s change by a point or two due to new implementation. -- Resulting performance improvement is about 30 to 50% less runtime when using -glm INDEL. --- .../IndelGenotypeLikelihoodsCalculationModel.java | 12 +++++++----- .../walkers/genotyper/UnifiedArgumentCollection.java | 2 +- .../gatk/walkers/indels/PairHMMIndelErrorModel.java | 10 +++++----- ...dGenotyperGeneralPloidySuite1IntegrationTest.java | 2 +- ...dGenotyperGeneralPloidySuite2IntegrationTest.java | 2 +- .../UnifiedGenotyperIndelCallingIntegrationTest.java | 2 +- ...UnifiedGenotyperNormalCallingIntegrationTest.java | 2 +- 7 files changed, 17 insertions(+), 15 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index c6e9ea379..0f3f7739d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -76,7 +76,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood private List alleleList = new ArrayList(); - protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { + protected IndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, + final Logger logger) { super(UAC, logger); pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY, UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.pairHMM); @@ -85,10 +86,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES; } - protected static List computeConsensusAlleles(ReferenceContext ref, - Map contexts, - AlignmentContextUtils.ReadOrientation contextType, - GenomeLocParser locParser, UnifiedArgumentCollection UAC) { + protected static List computeConsensusAlleles(final ReferenceContext ref, + final Map contexts, + final AlignmentContextUtils.ReadOrientation contextType, + final GenomeLocParser locParser, + final UnifiedArgumentCollection UAC) { ConsensusAlleleCounter counter = new ConsensusAlleleCounter(locParser, true, UAC.MIN_INDEL_COUNT_FOR_GENOTYPING, UAC.MIN_INDEL_FRACTION_PER_SAMPLE); return counter.computeConsensusAlleles(ref, contexts, contextType); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index b96b5733f..f156468cc 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -85,7 +85,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection * The PairHMM implementation to use for -glm INDEL genotype likelihood calculations. The various implementations balance a tradeoff of accuracy and runtime. */ @Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for -glm INDEL genotype likelihood calculations", required = false) - public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.ORIGINAL; + public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING; /** * The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 7b444c4bd..c77557da6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -54,6 +54,7 @@ import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.pairhmm.Log10PairHMM; +import org.broadinstitute.sting.utils.pairhmm.LoglessPairHMM; import org.broadinstitute.sting.utils.pairhmm.PairHMM; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -116,12 +117,11 @@ public class PairHMMIndelErrorModel { case ORIGINAL: pairHMM = new Log10PairHMM(false); break; - case LOGLESS_CACHING: //TODO: still not tested so please do not use yet - //pairHMM = new LoglessCachingPairHMM(); //TODO - add it back when the figure out how to use the protected LoglessCachingPairHMM class - throw new UserException.BadArgumentValue("pairHMM"," this option (LOGLESS_CACHING in UG) is still under development"); - //break; + case LOGLESS_CACHING: + pairHMM = new LoglessPairHMM(); + break; default: - throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the UnifiedGenotyper. Acceptable options are ORIGINAL, EXACT or LOGLESS_CACHING (the third option is still under development)."); + throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the UnifiedGenotyper. Acceptable options are ORIGINAL, EXACT or LOGLESS_CACHING."); } // fill gap penalty table, affine naive model: diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index 2d36a27d1..aaa3b1284 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "dd28b14d732852bffbba4f22f7697227"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "98f4d78aad745c6e853b81b2e4e207b4"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index 117e54ef8..0eb89adc7 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -58,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","369ad0ff28bb9ce7974dc2c7343c8737"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","25902d7a6a0c00c60c2d5845dfaa1a4c"); } @Test(enabled = true) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index 49d429c0d..65a569cdc 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -136,7 +136,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, - Arrays.asList("587bf6bad368ed81189747a84f913ab2")); + Arrays.asList("5596851d19582dd1af3901b7d703ae0a")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index 439039f9b..1bfbbac17 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("70a21812d4dd6b72c44f60c74d508d5b")); + Arrays.asList("06c85e8eab08b67244cf38fc785aca22")); executeTest("test Multiple SNP alleles", spec); } From 15171c07a85254ead28bfc75c5ffe9203378306f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 19 Jun 2013 11:10:36 -0400 Subject: [PATCH 108/116] CatVariants accepts reference files ending in any standard extension -- [resolves #49339235] Make CatVariants accept reference files ending in .fa (not only .fasta) --- .../broadinstitute/sting/tools/CatVariants.java | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/tools/CatVariants.java b/public/java/src/org/broadinstitute/sting/tools/CatVariants.java index ad77b2548..b59786d15 100644 --- a/public/java/src/org/broadinstitute/sting/tools/CatVariants.java +++ b/public/java/src/org/broadinstitute/sting/tools/CatVariants.java @@ -144,15 +144,13 @@ public class CatVariants extends CommandLineProgram { BasicConfigurator.configure(); logger.setLevel(Level.INFO); - if ( ! refFile.getName().endsWith(".fasta")) { - throw new UserException("Reference file "+refFile+"name must end with .fasta"); + final ReferenceSequenceFile ref; + try { + ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile); + } catch ( Exception e ) { + throw new UserException("Couldn't load provided reference sequence file " + refFile, e); } - if ( ! refFile.exists() ) { - throw new UserException(String.format("Reference file %s does not exist", refFile.getAbsolutePath())); - } - - // Comparator>> comparator = new PositionComparator(); Comparator> positionComparator = new PositionComparator(); @@ -203,8 +201,6 @@ public class CatVariants extends CommandLineProgram { if (!(outputFile.getName().endsWith(".vcf") || outputFile.getName().endsWith(".VCF"))){ throw new UserException(String.format("Output file %s should be .vcf", outputFile)); } - ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile); - FileOutputStream outputStream = new FileOutputStream(outputFile); EnumSet options = EnumSet.of(Options.INDEX_ON_THE_FLY); From af275fdf100f03c78f12e1944b979cbab33c0e69 Mon Sep 17 00:00:00 2001 From: Chris Hartl Date: Wed, 12 Jun 2013 13:54:30 -0400 Subject: [PATCH 109/116] Extend the documentation of GenotypeConcordance to include notes about Monomorphic and Filtered VCF records. Address Geraldine's comments - information on moltenization and explanation of fields Fix paren --- .../variantutils/GenotypeConcordance.java | 52 ++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java index 10397d718..da8b20c66 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -67,8 +67,58 @@ import java.util.*; * *

Output

* Genotype Concordance writes a GATK report to the specified file (via -o) , consisting of multiple tables of counts - * and proportions. These tables may be optionally moltenized via the -moltenize argument. + * and proportions. These tables may be optionally moltenized via the -moltenize argument. That is, the standard table * + * Sample NO_CALL_HOM_REF NO_CALL_HET NO_CALL_HOM_VAR (...) + * NA12878 0.003 0.001 0.000 (...) + * NA12891 0.005 0.000 0.000 (...) + * + * would instead be displayed + * + * NA12878 NO_CALL_HOM_REF 0.003 + * NA12878 NO_CALL_HET 0.001 + * NA12878 NO_CALL_HOM_VAR 0.000 + * NA12891 NO_CALL_HOM_REF 0.005 + * NA12891 NO_CALL_HET 0.000 + * NA12891 NO_CALL_HOM_VAR 0.000 + * (...) + * + * + * These tables are constructed on a per-sample basis, and include counts of eval vs comp genotype states, and the + * number of times the alternate alleles between the eval and comp sample did not match up. + * + * In addition, Genotype Concordance produces site-level allelic concordance. For strictly bi-allelic VCFs, + * only the ALLELES_MATCH, EVAL_ONLY, TRUTH_ONLY fields will be populated, but where multi-allelic sites are involved + * counts for EVAL_SUBSET_TRUTH and EVAL_SUPERSET_TRUTH will be generated. + * + * For example, in the following situation + * eval: ref - A alt - C + * comp: ref - A alt - C,T + * then the site is tabulated as EVAL_SUBSET_TRUTH. Were the situation reversed, it would be EVAL_SUPERSET_TRUTH. + * However, in the case where eval has both C and T alternate alleles, both must be observed in the genotypes + * (that is, there must be at least one of (0/1,1/1) and at least one of (0/2,1/2,2/2) in the genotype field). If + * one of the alleles has no observations in the genotype fields of the eval, the site-level concordance is + * tabulated as though that allele were not present in the record. + * + *

Monomorphic Records

+ * A site which has an alternate allele, but which is monomorphic in samples, is treated as not having been + * discovered, and will be recorded in the TRUTH_ONLY column (if a record exists in the comp VCF), or not at all + * (if no record exists in the comp VCF). + * + * That is, in the situation + * eval: ref - A alt - C genotypes - 0/0 0/0 0/0 ... 0/0 + * comp: ref - A alt - C ... 0/0 0/0 ... + * is equivalent to + * eval: ref - A alt - . genotypes - 0/0 0/0 0/0 ... 0/0 + * comp: ref - A alt - C ... 0/0 0/0 ... + * + * When a record is present in the comp VCF the *genotypes* for the monomorphic site will still be used to evaluate + * per-sample genotype concordance counts. + * + *

Filtered Records

+ * Filtered records are treated as though they were not present in the VCF, unless -ignoreSiteFilters is provided, + * in which case all records are used. There is currently no way to assess concordance metrics on filtered sites + * exclusively. SelectVariants can be used to extract filtered sites, and VariantFiltration used to un-filter them. */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) public class GenotypeConcordance extends RodWalker>,ConcordanceMetrics> { From 0be788f0f9ab0212d8a9eb91f995502ebe1d2b62 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Wed, 19 Jun 2013 13:15:24 -0400 Subject: [PATCH 110/116] Fix typo in snpEff documentation --- .../org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index 288196d1b..8c068d3e4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -49,7 +49,7 @@ import java.util.regex.Pattern; * *

See http://snpeff.sourceforge.net/ for more information on the SnpEff tool

. * - *

For each variant, this tol chooses one of the effects of highest biological impact from the SnpEff + *

For each variant, this tool chooses one of the effects of highest biological impact from the SnpEff * output file (which must be provided on the command line via --snpEffFile filename.vcf), * and adds annotations on that effect.

* From 23ee192d5ef969675b3cf1d5396fb9bb3353f4f8 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Wed, 19 Jun 2013 13:22:44 -0400 Subject: [PATCH 111/116] PrintReads: remove -ds argument -This argument was completely redundant with the engine-level -dfrac argument. -Could produce unintended consequences if used in conjunction with engine-level downsampling arguments. --- .../sting/gatk/walkers/readutils/PrintReads.java | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/PrintReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/PrintReads.java index a28523369..c7ed0bffd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/PrintReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/PrintReads.java @@ -96,7 +96,7 @@ import java.util.*; * -T PrintReads \ * -o output.bam \ * -I input.bam \ - * -ds 0.25 + * -dfrac 0.25 * * */ @@ -124,12 +124,6 @@ public class PrintReads extends ReadWalker impleme @Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false) int nReadsToPrint = -1; - /** - * Downsamples the bam file by the given ratio, printing only approximately the given percentage of reads. The downsampling is balanced (over the entire coverage) - */ - @Argument(fullName = "downsample_coverage", shortName = "ds", doc="Downsample BAM to desired coverage", required = false) - public double downsampleRatio = 1.0; - /** * Only reads from samples listed in the provided file(s) will be included in the output. */ @@ -237,8 +231,7 @@ public class PrintReads extends ReadWalker impleme nReadsToPrint--; // n > 0 means there are still reads to be printed. } - // if downsample option is turned off (= 1) then don't waste time getting the next random number. - return (downsampleRatio == 1 || random.nextDouble() < downsampleRatio); + return true; } /** From 51ec5404d4a50f6eb34915ad3f1f3016265f3ad0 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Tue, 18 Jun 2013 16:04:29 -0400 Subject: [PATCH 112/116] SAMDataSource: always consolidate cigar strings into canonical form -Collapses zero-length and repeated cigar elements, neither of which can necessarily be handled correctly by downstream code (like LIBS). -Consolidation is done before read filters, because not all read filters behave correctly with non-consoliated cigars. -Examined other uses of consolidateCigar() throughout the GATK, and found them to not be redundant with the new engine-level consolidation (they're all on artificially-created cigars in the HaplotypeCaller and SmithWaterman classes) -Improved comments in SAMDataSource.applyDecoratingIterators() -Updated MD5s; differences were examined and found to be innocuous -Two tests: -Unit test for ReadFormattingIterator -Integration test for correct handling of zero-length cigar elements by the GATK engine as a whole --- .../gatk/datasources/reads/SAMDataSource.java | 20 ++++---- .../iterators/ReadFormattingIterator.java | 5 ++ .../gatk/EngineFeaturesIntegrationTest.java | 22 ++++++++ .../ReadFormattingIteratorUnitTest.java | 50 +++++++++++++++++++ .../gatk/walkers/BAQIntegrationTest.java | 2 +- .../readutils/PrintReadsIntegrationTest.java | 6 +-- 6 files changed, 91 insertions(+), 14 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/iterators/ReadFormattingIteratorUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 2f934e8df..a36667ec4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -625,16 +625,15 @@ public class SAMDataSource { byte defaultBaseQualities, boolean isLocusBasedTraversal ) { - // ************************************************************************************************ // - // * NOTE: ALL FILTERING/DOWNSAMPLING SHOULD BE DONE BEFORE ANY ITERATORS THAT MODIFY THE READS! * // - // * (otherwise we will process something that we may end up throwing away) * // - // ************************************************************************************************ // + // Always apply the ReadFormattingIterator before both ReadFilters and ReadTransformers. At a minimum, + // this will consolidate the cigar strings into canonical form. This has to be done before the read + // filtering, because not all read filters will behave correctly with things like zero-length cigar + // elements. If useOriginalBaseQualities is true or defaultBaseQualities >= 0, this iterator will also + // modify the base qualities. + wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities); - if (useOriginalBaseQualities || defaultBaseQualities >= 0) - // only wrap if we are replacing the original qualities or using a default base quality - wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities); - - // Filters: + // Read Filters: these are applied BEFORE downsampling, so that we downsample within the set of reads + // that actually survive filtering. Otherwise we could get much less coverage than requested. wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters)); // Downsampling: @@ -658,7 +657,8 @@ public class SAMDataSource { if (!noValidationOfReadOrder && enableVerification) wrappedIterator = new VerifyingSamIterator(wrappedIterator); - // set up read transformers + // Read transformers: these are applied last, so that we don't bother transforming reads that get discarded + // by the read filters or downsampler. for ( final ReadTransformer readTransformer : readTransformers ) { if ( readTransformer.enabled() && readTransformer.getApplicationTime() == ReadTransformer.ApplicationTime.ON_INPUT ) wrappedIterator = new ReadTransformingIterator(wrappedIterator, readTransformer); diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java index c3b4aaa0a..f9d2f4802 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; /** * An iterator which does post-processing of a read, including potentially wrapping @@ -104,6 +105,10 @@ public class ReadFormattingIterator implements StingSAMIterator { public SAMRecord next() { SAMRecord rec = wrappedIterator.next(); + // Always consolidate the cigar string into canonical form, collapsing zero-length / repeated cigar elements. + // Downstream code (like LocusIteratorByState) cannot necessarily handle non-consolidated cigar strings. + rec.setCigar(AlignmentUtils.consolidateCigar(rec.getCigar())); + // if we are using default quals, check if we need them, and add if necessary. // 1. we need if reads are lacking or have incomplete quality scores // 2. we add if defaultBaseQualities has a positive value diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index fe30b60fd..c97ab7301 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk; +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecord; import net.sf.samtools.util.BlockCompressedInputStream; import org.broad.tribble.readers.AsciiLineReader; import org.broadinstitute.sting.WalkerTest; @@ -39,6 +41,7 @@ import org.broadinstitute.sting.gatk.walkers.qc.ErrorThrowing; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory; import org.broadinstitute.variant.vcf.VCFCodec; import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.variant.vcf.VCFHeaderLine; @@ -255,4 +258,23 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { executeTest("testDefaultBaseQualitiesNoneProvided", testDefaultBaseQualities(null, "")); } + @Test + public void testGATKEngineConsolidatesCigars() { + final WalkerTestSpec spec = new WalkerTestSpec(" -T PrintReads" + + " -R " + b37KGReference + + " -I " + privateTestDir + "zero_length_cigar_elements.bam" + + " -o %s", + 1, Arrays.asList("")); // No MD5s; we only want to check the cigar + + final File outputBam = executeTest("testGATKEngineConsolidatesCigars", spec).first.get(0); + final SAMFileReader reader = new SAMFileReader(outputBam); + reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT); + reader.setSAMRecordFactory(new GATKSamRecordFactory()); + + final SAMRecord read = reader.iterator().next(); + reader.close(); + + // Original cigar was 0M3M0M8M. Check that it's been consolidated after running through the GATK engine: + Assert.assertEquals(read.getCigarString(), "11M", "Cigar 0M3M0M8M not consolidated correctly by the engine"); + } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/iterators/ReadFormattingIteratorUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/iterators/ReadFormattingIteratorUnitTest.java new file mode 100644 index 000000000..5d037bc4b --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/iterators/ReadFormattingIteratorUnitTest.java @@ -0,0 +1,50 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.iterators; + +import net.sf.samtools.*; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.Arrays; + + +public class ReadFormattingIteratorUnitTest extends BaseTest { + + @Test + public void testIteratorConsolidatesCigars() { + final Cigar unconsolidatedCigar = TextCigarCodec.getSingleton().decode("3M0M5M0M"); + final SAMRecord unconsolidatedRead = ArtificialSAMUtils.createArtificialRead(unconsolidatedCigar); + + final StingSAMIterator readIterator = StingSAMIteratorAdapter.adapt(Arrays.asList(unconsolidatedRead).iterator()); + final ReadFormattingIterator formattingIterator = new ReadFormattingIterator(readIterator, false, (byte)-1); + final SAMRecord postIterationRead = formattingIterator.next(); + + Assert.assertEquals(postIterationRead.getCigarString(), "8M", "Cigar 3M0M5M0M not consolidated correctly by ReadFormattingIterator"); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/BAQIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/BAQIntegrationTest.java index 6b0422c6a..604c0e377 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/BAQIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/BAQIntegrationTest.java @@ -43,7 +43,7 @@ public class BAQIntegrationTest extends WalkerTest { // -------------------------------------------------------------------------------------------------------------- @Test public void testPrintReadsNoBAQ() { - WalkerTestSpec spec = new WalkerTestSpec( baseCommand +" -baq OFF", 1, Arrays.asList("11af64ba020262d06b490bae2c5e08f8")); + WalkerTestSpec spec = new WalkerTestSpec( baseCommand +" -baq OFF", 1, Arrays.asList("d1f74074e718c82810512bf40dbc7f72")); executeTest(String.format("testPrintReadsNoBAQ"), spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/readutils/PrintReadsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/readutils/PrintReadsIntegrationTest.java index 7482eae60..adc7ad765 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/readutils/PrintReadsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/readutils/PrintReadsIntegrationTest.java @@ -59,10 +59,10 @@ public class PrintReadsIntegrationTest extends WalkerTest { {new PRTest(hg18Reference, "HiSeq.1mb.bam", " -simplifyBAM", "1510dc4429f3ed49caf96da41e8ed396")}, {new PRTest(hg18Reference, "HiSeq.1mb.bam", " -n 10", "0e3d1748ad1cb523e3295cab9d09d8fc")}, // See: GATKBAMIndex.getStartOfLastLinearBin(), BAMScheduler.advance(), IntervalOverlapFilteringIterator.advance() - {new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", "", "e1cac555f3d720f611c47eec93e84bd9")}, - {new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L 1", "6e2558317d409195eab3006dc9e5524c")}, + {new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", "", "d7f23fd77d7dc7cb50d3397f644c6d8a")}, + {new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L 1", "c601db95b20248d012b0085347fcb6d1")}, {new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L unmapped", "2d32440e47e8d9d329902fe573ad94ce")}, - {new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L 1 -L unmapped", "6e2558317d409195eab3006dc9e5524c")}, + {new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L 1 -L unmapped", "c601db95b20248d012b0085347fcb6d1")}, {new PRTest(b37KGReference, "oneReadAllInsertion.bam", "", "349650b6aa9e574b48a2a62627f37c7d")}, {new PRTest(b37KGReference, "NA12878.1_10mb_2_10mb.bam", "", "0c1cbe67296637a85e80e7a182f828ab")} }; From 08f92bb6f9ab7d74e82455a95a73aea9a0d603a1 Mon Sep 17 00:00:00 2001 From: Valentin Ruano-Rubio Date: Thu, 13 Jun 2013 18:38:11 -0400 Subject: [PATCH 113/116] Added AnalyzeCovariates tool to generate BQSR assessment quality plots. Implemtation details: * Added tool class *.AnalyzeCovariates * Added convenient addAll method to Utils to be able to add elements of an array. * Added parameter comparison methods to RecalibrationArgumentCollection class in order to verify that multiple imput recalibration report are compatible and comparable. * Modified the BQSR.R script to handle up to 3 different recalibration tables (-BQSR, -before and -after) and removed some irrelevant arguments (or argument values) from the output. * Added an integration test class. --- .../gatk/walkers/bqsr/AnalyzeCovariates.java | 583 ++++++++++++++++++ .../sting/gatk/walkers/bqsr/BQSRGatherer.java | 4 +- .../gatk/walkers/bqsr/BaseRecalibrator.java | 8 +- .../bqsr/RecalibrationArgumentCollection.java | 146 ++++- .../sting/utils/recalibration/RecalUtils.java | 194 +++++- .../recalibration/RecalibrationReport.java | 5 + .../covariates/ContextCovariate.java | 2 + .../AnalyzeCovariatesIntegrationTest.java | 362 +++++++++++ .../sting/utils/recalibration/BQSR.R | 36 +- .../org/broadinstitute/sting/utils/Utils.java | 30 + 10 files changed, 1341 insertions(+), 29 deletions(-) create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AnalyzeCovariates.java create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/AnalyzeCovariatesIntegrationTest.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AnalyzeCovariates.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AnalyzeCovariates.java new file mode 100644 index 000000000..b6f911753 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AnalyzeCovariates.java @@ -0,0 +1,583 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import com.google.java.contract.Requires; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.recalibration.RecalUtils; +import org.broadinstitute.sting.utils.recalibration.RecalibrationReport; +import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; + + +/** + * Tool to analyze and evaluate base recalibration ables. + *

+ * For now it generates a plot report to assess the quality of a recalibration. + * + *

Input

+ * + * The tool can take up to three different sets of recalibration tables. + * The resulting plots will be overlaid on top of each other to make + * comparisons easy. + * + * + * + * + * + * + * + * + * + * + * + * + * + *
SetArgumentLabelColorDescription
Original-beforeBEFOREMaroon1First pass recalibration + * tables obtained from applying {@link BaseRecalibration} + * on the original alignment.
Recalibrated-afterAFTERBlueSecond pass recalibration tables + * results from the application of {@link BaseRecalibration} + * on the alignment recalibrated using the first pass tables
Input-BQSRBQSRBlackAny recalibration table without a specific role
+ *
+ * + * You need to specify one set at least. Multiple sets need to have the same values for the following parameters: + *

+ * covariate (order is not important), no_standard_covs, run_without_dbsnp, solid_recal_mode, + * solid_nocall_strategy, mismatches_context_size, mismatches_default_quality, deletions_default_quality, + * insertions_default_quality, maximum_cycle_value, low_quality_tail, default_platform, force_platform, + * quantizing_levels and binary_tag_name + *

Output

+ * + * Currently this tool generates two outputs: + * + *
+ *
-plots my-report.pdf
+ *
A pdf document that encloses plots to assess the quality of the recalibration.
+ *
-csv my-report.csv
+ *
A csv file that contains a table with all the data required to generate those plots.
+ *
+ * + * You need to specify at least one of them. + * + *

Other Arguments

+ * + *

-ignoreLMT, --ignoreLastModificationTimes

+ * + * when set, no warning message will be displayed in the -before recalibration table file is older than the -after one. + * + *

Examples

+ * + * + *

Plot a single recalibration table

+ *
+ * java -jar GenomeAnalysisTK.jar \
+ *      -T AnalyzeCovariates \
+ *      -R myrefernce.fasta \
+ *      -BQSR myrecal.table \
+ *      -plots BQSR.pdf
+ * 
+ * + *

Plot before (first pass) and after (second pass) recalibration table to compare them

+ * + *
+ * java -jar GenomeAnalysisTK.jar \
+ *      -T AnalyzeCovariates \
+ *      -R myrefernce.fasta \
+ *      -before recal2.table \
+ *      -after recal3.table \
+ *      -plots recalQC.pdf
+ * 
+ * + *

Plot up to three recalibration tables for comparison

+ * + *
+ *
+ * # You can ignore the before/after semantics completely if you like (if you do add -ignoreLMT
+ * # to avoid a possible warning), but all tables should have been generated using the same parameters.
+ *
+ * java -jar GenomeAnalysisTK.jar \
+ *      -T AnalyzeCovariates \
+ *      -R myrefernce.fasta \
+ *      -ignoreLMT \
+ *      -BQSR recal1.table \   # you can discard any two
+ *      -before recal2.table \
+ *      -after recal3.table \
+ *      -plots myrecals.pdf
+ * 
+ * + *

Full BQSR quality assessment pipeline

+ * + *
+ * # Generate the first pass recalibration table file.
+ * java -jar GenomeAnalysisTK.jar \
+ *      -T BaseRecalibrator \
+ *      -R myreference.fasta \
+ *      -I myinput.bam \
+ *      -knownSites bundle/my-trusted-snps.vcf \ # optional but recommendable
+ *      -knownSites bundle/my-trusted-indels.vcf \ # optional but recommendable
+ *      ... other options
+ *      -o firstpass.table
+ *
+ * # Generate the second pass recalibration table file.
+ * java -jar GenomeAnalysisTK.jar \
+ *      -T BaseRecalibrator \
+ *      -BQSR firstpass.table \
+ *      -R myreference.fasta \
+ *      -I myinput.bam \
+ *      -knownSites bundle/my-trusted-snps.vcf \
+ *      -knownSites bundle/my-trusted-indels.vcf \
+ *      ... other options \
+ *      -o secondpass.table
+ *
+ * # Finally generate the plots report and also keep a copy of the csv (optional).
+ * java -jar GenomeAnalysisTK.jar \
+ *      -T AnalyzeCovariates \
+ *      -R myrefernce.fasta \
+ *      -before firstpass.table \
+ *      -after secondpass.table \
+ *      -csv BQSR.csv \ # optional
+ *      -plots BQSR.pdf
+ * 
+ * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + * @version 6/16/2013 + * @since 2.6 + */ +public final class AnalyzeCovariates extends RodWalker { + + + // Constants on option short names that are used in some error/warning messages: + + static final String CSV_ARG_SHORT_NAME = "csv"; + static final String PDF_ARG_SHORT_NAME = "plots"; + static final String BEFORE_ARG_SHORT_NAME = "before"; + static final String AFTER_ARG_SHORT_NAME = "after"; + + /** + * File containing the recalibration tables from the first pass. + */ + @Input(shortName=BEFORE_ARG_SHORT_NAME,fullName="beforeReportFile", doc = "file containing the BQSR first-pass report file",required = false) + protected File beforeFile = null; + + /** + * File containing the recalibration tables from the second pass. + */ + @Input(shortName=AFTER_ARG_SHORT_NAME, fullName="afterReportFile", doc = "file containing the BQSR second-pass report file",required = false) + protected File afterFile = null; + + /** + * If true, it won't show a warning if the last-modification time of the before and after input files suggest that they have been reversed. + */ + @Argument(shortName="ignoreLMT", fullName="ignoreLastModificationTimes", doc= "do not emit warning messages related to suspicious last modification time order of inputs", required = false) + protected boolean ignoreLastModificationTime = false; + + /** + * Output report file name. + */ + @Output(shortName=PDF_ARG_SHORT_NAME, fullName="plotsReportFile" ,doc = "location of the output report", required = false) + protected File pdfFile = null; + + /** + * Output csv file name. + */ + @Output(shortName=CSV_ARG_SHORT_NAME,fullName="intermediateCsvFile" ,doc = "location of the csv intermediate file", required = false) + protected File csvFile = null; + + /** + * Convenience reference to the RECAL_BQSR_FILE argument value. + *

+ * This field value is resolved by {@link #initialize()}. + */ + protected File bqsrFile = null; + + /** + * Checks inputs and argument values. + *

+ * Notice that this routine will not validate the content of files. It may have some minor side effects as + * the output of warning messages back to the user. + * + * @throw IllegalStateException there is some required argument value that has not been loaded yet. + * @throw UserException if there is some error caused by or under the end user's control. + */ + private void checkArgumentsValues() { + checkInputReportFile("BQSR",bqsrFile); + checkInputReportFile("before",beforeFile); + checkInputReportFile("after",afterFile); + if (bqsrFile == null && beforeFile == null && afterFile == null) { + throw new UserException("you must provide at least one recalibration report file " + + "(arguments -BQSR, -" + BEFORE_ARG_SHORT_NAME + " or -" + AFTER_ARG_SHORT_NAME); + } + + checkOutputFile(PDF_ARG_SHORT_NAME,pdfFile); + checkOutputFile(CSV_ARG_SHORT_NAME, csvFile); + checkInputReportFileLMT(beforeFile,afterFile); + checkOutputRequested(); + } + + /** + * Checks whether the last-modification-time of the inputs is consistent with their relative roles. + * + * This routine does not thrown an exception but may output a warning message if inconsistencies are spotted. + * + * @param beforeFile the before report file. + * @param afterFile the after report file. + */ + private void checkInputReportFileLMT(final File beforeFile, final File afterFile) { + + if (ignoreLastModificationTime || beforeFile == null || afterFile == null) { + return; // nothing to do here + } else if (beforeFile.lastModified() > afterFile.lastModified()) { + Utils.warnUser("Last modification timestamp for 'Before' and 'After'" + + "recalibration reports are in the wrong order. Perhaps, have they been swapped?"); + } + } + + /** + * Checks that at least one output was requested. + * + * @throw UserException if no output was requested. + */ + private void checkOutputRequested() { + if (pdfFile == null && csvFile == null) { + throw new UserException("you need to request at least one output:" + + " the intermediate csv file (-" + CSV_ARG_SHORT_NAME + " FILE)" + + " or the final plot file (-" + PDF_ARG_SHORT_NAME + " FILE)."); + } + } + + /** + * Checks the value provided to input file arguments. + * + * @throw UserException if there is any problem cause by or under the end user's control + * + * @param name command line argument short name. + * @param value the argument value. + */ + private void checkInputReportFile(final String name,final File value) { + if (value == null) { + return; + } else if (!value.exists()) { + throw new UserException.BadArgumentValue(name, "input report '" + + value + "' does not exist or is unreachable"); + } else if (!value.isFile()) { + throw new UserException.BadArgumentValue(name, "input report '" + + value + "' is not a regular file"); + } else if (!value.canRead()) { + throw new UserException.BadArgumentValue(name, "input report '" + + value + "' cannot be read"); + } + } + + /** + * Checks the value provided for output arguments. + * + * @throw UserException if there is any problem cause by or under the end user's control + * + * @param name command line argument short name. + * @param value the argument value. + */ + private void checkOutputFile(final String name, final File value) { + if (value == null) { + return; + } + if (value.exists() && !value.isFile()) { + throw new UserException.BadArgumentValue(name, "the output file location '" + + value + "' exists as not a file"); + } + final File parent = value.getParentFile(); + if (parent == null) { + return; + } + if (!parent.exists()) { + throw new UserException.BadArgumentValue(name, "the output file parent directory '" + + parent + "' does not exists or is unreachable"); + } else if (!parent.isDirectory()) { + throw new UserException.BadArgumentValue(name, "the output file parent directory '" + + parent + "' is not a directory"); + } else if (!parent.canWrite()) { + throw new UserException.BadArgumentValue(name, "the output file parent directory '" + + parent + "' cannot be written"); + } + + } + + /** + * Generates the plots using the external R script. + * + *

+ * If plotsFile is null, it does not perform any plotting. + * + * @param csvFile the intermediary csv file. + * @param plotsFile the output plot location. + */ + private void generatePlots(final File csvFile, final Map reportFiles, final File plotsFile) { + + if (plotsFile == null) { + return; + } + logger.info("Generating plots file '" + plotsFile + "'"); + final File exampleReportFile = reportFiles.values().iterator().next(); + RecalUtils.generatePlots(csvFile,exampleReportFile,plotsFile); + } + + @Override + public void initialize() { + super.initialize(); + bqsrFile = getToolkit().getArguments().BQSR_RECAL_FILE; + checkArgumentsValues(); + final Map reportFiles = buildReportFileMap(); + final Map reports = buildReportMap(reportFiles); + checkReportConsistency(reports); + final File csvFile = resolveCsvFile(); + generateCsvFile(csvFile,reports); + final File plotFile = resolvePlotFile(); + generatePlots(csvFile, reportFiles, plotFile); + } + + /** + * Returns the plot output file + * @return might be null if the user has not indicated and output file. + */ + private File resolvePlotFile() { + return pdfFile; + } + + /** + * Generates the intermediary Csv file. + * + * @param csvFile where to write the file. + * @param reports the reports to be included. + */ + private void generateCsvFile(final File csvFile, final Map reports) { + try { + logger.info("Generating csv file '" + csvFile + "'"); + RecalUtils.generateCsv(csvFile, reports); + } catch (FileNotFoundException e) { + throw new UserException( + String.format("There is a problem creating the intermediary Csv file '%s': %s", + csvFile,e.getMessage()),e); + } + } + + /** + * Checks whether multiple input recalibration report files argument values are consistent (equal). + * + * @param reports map with report to verify. + * + * @throw UserException if there is any inconsistency. + */ + private void checkReportConsistency(final Map reports) { + final Map.Entry[] reportEntries = + reports.entrySet().toArray((Map.Entry[]) new Map.Entry[reports.size()]); + + final Map.Entry exampleEntry = reportEntries[0]; + + for (int i = 1; i < reportEntries.length; i++) { + final Map diffs = exampleEntry.getValue().getRAC().compareReportArguments( + reportEntries[i].getValue().getRAC(),exampleEntry.getKey(),reportEntries[i].getKey()); + if (diffs.size() != 0) { + throw new UserException("There are differences in relevant arguments of" + + " two or more input recalibration reports. Please make sure" + + " they have been created using the same recalibration parameters." + + " " + Utils.join("// ", reportDifferencesStringArray(diffs))); + } + } + } + + + /** + * Creates a map with all input recalibration files indexed by their "role". + *

+ * The key is the role and the value the corresponding report file. + *

+ * Roles: "Before" (recalibration), "After" (recalibration), "BQSR" (the tool standard argument recalibration file) + * + * @return never null + */ + private Map buildReportFileMap() { + final Map reports = new LinkedHashMap<>(3); + if (bqsrFile != null) { + reports.put("BQSR",bqsrFile); + } + if (beforeFile != null) { + reports.put("Before",beforeFile); + } + if (afterFile != null) { + reports.put("After",afterFile); + } + return reports; + } + + /** + * Transforms a recalibration file map into a report object map. + * + * @param reportFileMap the file map to transforms. + * @return never null, a new map with the same size as + * reportFileMap and the same key set. + */ + @Requires("reportFileMap != null") + private Map buildReportMap(final Map reportFileMap) { + final Map reports = new LinkedHashMap<>(reportFileMap.size()); + for (final Map.Entry e : reportFileMap.entrySet()) { + reports.put(e.getKey(),new RecalibrationReport(e.getValue())); + } + return reports; + } + + /** + * Generates a flatter String array representation of recalibration argument differences. + * @param diffs the differences to represent. + * + * @return never null, an array of the same length as the size of the input diffs. + */ + @Requires("diffs != null") + private String[] reportDifferencesStringArray(final Map diffs) { + final String[] result = new String[diffs.size()]; + int i = 0; + for (final Map.Entry e : diffs.entrySet()) { + result[i++] = capitalize(e.getKey()) + ": " + e.getValue(); + } + return result; + } + + /** + * Returns the input string capitalizing the first letter. + * + * @param str the string to capitalize + * @return never null. + */ + @Requires("str != null") + private String capitalize(final String str) { + if (str.isEmpty()) { + return str; + } else { + return Character.toUpperCase(str.charAt(0)) + str.substring(1); + } + } + + /** + * Returns the csv file to use. + *

+ * This is the the one specified by the user if any or a temporary file + * that will be deleted as soon as the VM exists by default. + * + * @return never null. + */ + private File resolveCsvFile() { + if (csvFile != null) { + return csvFile; + } else { + try { + final File result = File.createTempFile("AnalyzeCovariates", ".csv"); + result.deleteOnExit(); + return result; + } catch (IOException e) { + throw new UserException("Could not create temporary Csv file",e); + } + } + } + + /** + * Always return true, forcing the immediate termination of the travesal. + * @return + */ + @Override + public boolean isDone() { + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public None reduceInit() { + return new None(); + } + + /** + * Is not supposed to ever be called, thus it always results in an exception. + * + * @throws IllegalStateException always. + */ + @Override + public None reduce(None value, None sum) { + throw new IllegalStateException("AnalyzeCovariates reduce method is not supposed to be invoked ever"); + } + + + /** + * Is not supposed to ever be called, thus it always results in an exception. + * + * @throws IllegalStateException always. + */ + @Override + public None map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + throw new IllegalStateException("AnalyzeCovariates map method is not supposed to be invoked ever"); + } + + /** + * Dummy map and reduce types for the {@link AnalyzeCovariates} tool that in fact does not do any traversal. + */ + protected static class None { + private None() { + } + } +} + + diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java index ad97dc008..7727c2dac 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java @@ -97,10 +97,10 @@ public class BQSRGatherer extends Gatherer { RAC.RECAL_TABLE_FILE = output; if ( RAC.existingRecalibrationReport != null ) { final RecalibrationReport originalReport = new RecalibrationReport(RAC.existingRecalibrationReport); - RecalUtils.generateRecalibrationPlot(RAC, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates()); + RecalUtils.generateRecalibrationPlot(RAC, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getRequestedCovariates()); } else { - RecalUtils.generateRecalibrationPlot(RAC, generalReport.getRecalibrationTables(), generalReport.getCovariates()); + RecalUtils.generateRecalibrationPlot(RAC, generalReport.getRecalibrationTables(), generalReport.getRequestedCovariates()); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index c60eceaa4..41d3f3991 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -61,6 +61,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.collections.Pair; @@ -124,7 +125,7 @@ import java.util.List; * -R resources/Homo_sapiens_assembly18.fasta \ * -knownSites bundle/hg18/dbsnp_132.hg18.vcf \ * -knownSites another/optional/setOfSitesToMask.vcf \ - * -o recal_data.grp + * -o recal_data.table * */ @@ -179,6 +180,11 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche public void initialize() { baq = new BAQ(BAQGOP); // setup the BAQ object with the provided gap open penalty + if (RAC.RECAL_PDF_FILE != null) { + Utils.warnUser("This is not the recommended way to generate recalibration plots any longer and will be" + + " discontinued soon in future releases. Please use the 'AnalyzeCovariates' tool instead from now one"); + } + if (RAC.FORCE_PLATFORM != null) RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index 5a2cdc7a6..c1ecb2320 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -46,15 +46,17 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; +import com.google.java.contract.Requires; import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.recalibration.RecalUtils; import java.io.File; import java.io.PrintStream; -import java.util.Collections; -import java.util.List; +import java.util.*; /** * Created by IntelliJ IDEA. @@ -65,7 +67,7 @@ import java.util.List; * This set of arguments will also be passed to the constructor of every Covariate when it is instantiated. */ -public class RecalibrationArgumentCollection { +public class RecalibrationArgumentCollection implements Cloneable { /** * This algorithm treats every reference mismatch as an indication of error. However, real genetic variation is expected to mismatch the reference, @@ -289,4 +291,142 @@ public class RecalibrationArgumentCollection { return argumentsTable; } + /** + * Returns a map with the arguments that differ between this an + * another {@link RecalibrationArgumentCollection} instance. + *

+ * The key is the name of that argument in the report file. The value is a message + * that explains the difference to the end user. + *

+ * Thus, a empty map indicates that there is no differences between both argument collection that + * is relevant to report comparison. + *

+ * This method should not throw any exception. + * + * @param other the argument-collection to compare against. + * @param thisRole the name used to refer to this RAC report that makes sense to the end user. + * @param otherRole the name used to refer to the other RAC report that makes sense to the end user. + * + * @return never null, but a zero-size collection if there are no differences. + */ + @Requires("other != null && thisRole != null && otherRole != null && !thisRole.equalsIgnoreCase(otherRole)") + Map compareReportArguments(final RecalibrationArgumentCollection other,final String thisRole, final String otherRole) { + final Map result = new LinkedHashMap<>(15); + compareRequestedCovariates(result, other, thisRole, otherRole); + compareSimpleReportArgument(result,"no_standard_covs", DO_NOT_USE_STANDARD_COVARIATES, other.DO_NOT_USE_STANDARD_COVARIATES, thisRole, otherRole); + compareSimpleReportArgument(result,"run_without_dbsnp",RUN_WITHOUT_DBSNP,other.RUN_WITHOUT_DBSNP,thisRole,otherRole); + compareSimpleReportArgument(result,"solid_recal_mode", SOLID_RECAL_MODE, other.SOLID_RECAL_MODE,thisRole,otherRole); + compareSimpleReportArgument(result,"solid_nocall_strategy", SOLID_NOCALL_STRATEGY, other.SOLID_NOCALL_STRATEGY,thisRole,otherRole); + compareSimpleReportArgument(result,"mismatches_context_size", MISMATCHES_CONTEXT_SIZE,other.MISMATCHES_CONTEXT_SIZE,thisRole,otherRole); + compareSimpleReportArgument(result,"mismatches_default_quality", MISMATCHES_DEFAULT_QUALITY, other.MISMATCHES_DEFAULT_QUALITY,thisRole,otherRole); + compareSimpleReportArgument(result,"deletions_default_quality", DELETIONS_DEFAULT_QUALITY, other.DELETIONS_DEFAULT_QUALITY,thisRole,otherRole); + compareSimpleReportArgument(result,"insertions_default_quality", INSERTIONS_DEFAULT_QUALITY, other.INSERTIONS_DEFAULT_QUALITY,thisRole,otherRole); + compareSimpleReportArgument(result,"maximum_cycle_value", MAXIMUM_CYCLE_VALUE, other.MAXIMUM_CYCLE_VALUE,thisRole,otherRole); + compareSimpleReportArgument(result,"low_quality_tail", LOW_QUAL_TAIL, other.LOW_QUAL_TAIL,thisRole,otherRole); + compareSimpleReportArgument(result,"default_platform", DEFAULT_PLATFORM, other.DEFAULT_PLATFORM,thisRole,otherRole); + compareSimpleReportArgument(result,"force_platform", FORCE_PLATFORM, other.FORCE_PLATFORM,thisRole,otherRole); + compareSimpleReportArgument(result,"quantizing_levels", QUANTIZING_LEVELS, other.QUANTIZING_LEVELS,thisRole,otherRole); + compareSimpleReportArgument(result,"binary_tag_name", BINARY_TAG_NAME, other.BINARY_TAG_NAME,thisRole,otherRole); + return result; + } + + + /** + * Compares the covariate report lists. + * + * @param diffs map where to annotate the difference. + * @param other the argument collection to compare against. + * @param thisRole the name for this argument collection that makes sense to the user. + * @param otherRole the name for the other argument collection that makes sense to the end user. + * + * @return true if a difference was found. + */ + @Requires("diffs != null && other != null && thisRole != null && otherRole != null") + private boolean compareRequestedCovariates(final Map diffs, + final RecalibrationArgumentCollection other, final String thisRole, final String otherRole) { + + final Set beforeNames = new HashSet<>(this.COVARIATES.length); + final Set afterNames = new HashSet<>(other.COVARIATES.length); + Utils.addAll(beforeNames, this.COVARIATES); + Utils.addAll(afterNames,other.COVARIATES); + final Set intersect = new HashSet<>(Math.min(beforeNames.size(),afterNames.size())); + intersect.addAll(beforeNames); + intersect.retainAll(afterNames); + + String diffMessage = null; + if (intersect.size() == 0) { // In practice this is not possible due to required covariates but... + diffMessage = String.format("There are no common covariates between '%s' and '%s'" + + " recalibrator reports. Covariates in '%s': {%s}. Covariates in '%s': {%s}.",thisRole,otherRole, + thisRole,Utils.join(", ",this.COVARIATES), + otherRole,Utils.join(",",other.COVARIATES)); + } else if (intersect.size() != beforeNames.size() || intersect.size() != afterNames.size()) { + beforeNames.removeAll(intersect); + afterNames.removeAll(intersect); + diffMessage = String.format("There are differences in the set of covariates requested in the" + + " '%s' and '%s' recalibrator reports. " + + " Exclusive to '%s': {%s}. Exclusive to '%s': {%s}.",thisRole,otherRole, + thisRole,Utils.join(", ",beforeNames), + otherRole,Utils.join(", ",afterNames)); + } + if (diffMessage != null) { + diffs.put("covariate",diffMessage); + return true; + } else { + return false; + } + } + + /** + * Annotates a map with any difference encountered in a simple value report argument that differs between this an + * another {@link RecalibrationArgumentCollection} instance. + *

+ * The key of the new entry would be the name of that argument in the report file. The value is a message + * that explains the difference to the end user. + *

+ * + *

+ * This method should not return any exception. + * + * @param diffs where to annotate the differences. + * @param name the name of the report argument to compare. + * @param thisValue this argument collection value for that argument. + * @param otherValue the other collection value for that argument. + * @param thisRole the name used to refer to this RAC report that makes sense to the end user. + * @param otherRole the name used to refer to the other RAC report that makes sense to the end user. + * + * @type T the argument Object value type. + * + * @return true if a difference has been spotted, thus diff has been modified. + */ + private boolean compareSimpleReportArgument(final Map diffs, + final String name, final T thisValue, final T otherValue, final String thisRole, final String otherRole) { + if (thisValue == null && otherValue == null) { + return false; + } else if (thisValue != null && thisValue.equals(otherValue)) { + return false; + } else { + diffs.put(name, + String.format("differences between '%s' {%s} and '%s' {%s}.", + thisRole,thisValue == null ? "" : thisValue, + otherRole,otherValue == null ? "" : otherValue)); + return true; + } + + } + + /** + * Create a shallow copy of this argument collection. + * + * @return never null. + */ + @Override + public RecalibrationArgumentCollection clone() { + try { + return (RecalibrationArgumentCollection) super.clone(); + } catch (CloneNotSupportedException e) { + throw new StingException("Unreachable code clone not supported thrown when the class " + + this.getClass().getName() + " is cloneable ",e); + } + } + } diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index ae6b56e19..8908ce4a4 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -70,9 +70,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import java.io.File; -import java.io.IOException; -import java.io.PrintStream; +import java.io.*; import java.util.*; /** @@ -223,6 +221,150 @@ public class RecalUtils { } } + /** + * Component used to print out csv representation of the reports that can be use to perform analysis in + * external tools. E.g. generate plots using R scripts. + *

+ * A header is always printed into the output stream (or file) when the printer is created. Then you only need + * to call {@link #print(RecalibrationReport,String) print} for each report you want to include in the csv file. + * Once finished, you close the printer calling {@link #close() close} + * + */ + private static class CsvPrinter { + + private final PrintStream ps; + private final Covariate[] covariates; + + /** + * Constructs a printer redirected to an output file. + * @param out the output file. + * @param c covariates to print out. + * @throws FileNotFoundException if the file could not be created anew. + */ + protected CsvPrinter(final File out, final Covariate ... c) + throws FileNotFoundException { + this(new FileOutputStream(out), c); + } + + /** + * Constructs a printer redirected to an output stream + * @param os the output. + * @param c covariates to print out. + */ + protected CsvPrinter(final OutputStream os, final Covariate ... c) { + covariates = c == null ? new Covariate[0] : c.clone(); + ps = new PrintStream(os); + printHeader(); + } + + /** + * Prints the header out. + *

+ * Should only be invoked at creation. + */ + protected void printHeader() { + RecalUtils.printHeader(ps); + } + + /** + * Prints out a report into the csv file. + * + * + * @param report the report to print out. + * @param mode the report associated mode. (typically ORIGINAL, RECALIBRATED + */ + public void print(final RecalibrationReport report, final String mode) { + RecalUtils.writeCSV(ps,report.getRecalibrationTables(),mode,covariates,false); + } + + /** + * Close the csv printer. + * + * No further output will be allowed or take place after calling this method. + */ + public void close() { + ps.close(); + } + + } + + /** + * Returns a csv output printer. + * + * @param out the output file. It will be overridden + * @param c list of covariates to print out. + * + * @throws FileNotFoundException if out could not be created anew. + * + * @return never null + */ + protected static CsvPrinter csvPrinter(final File out, final Covariate ... c) + throws FileNotFoundException + { + if (c == null) { + throw new IllegalArgumentException("the input covariate array cannot be null"); + } + return new CsvPrinter(out,c); + } + + /** + * Prints out a collection of reports into a file in Csv format in a way + * that can be used by R scripts (such as the plot generator script). + *

+ * The set of covariates is take as the minimum common set from all reports. + * + * @param out the output file. It will be overridden. + * @param reports map where keys are the unique 'mode' (ORIGINAL, RECALIBRATED, ...) + * of each report and the corresponding value the report itself. + * @throws FileNotFoundException if out could not be created anew. + */ + public static void generateCsv(final File out, final Map reports) + throws FileNotFoundException { + if (reports.size() == 0) { + writeCsv(out, reports, new Covariate[0]); + } else { + final Iterator rit = reports.values().iterator(); + final RecalibrationReport first = rit.next(); + final Covariate[] firstCovariates = first.getRequestedCovariates(); + final Set covariates = new LinkedHashSet<>(); + Utils.addAll(covariates,firstCovariates); + while (rit.hasNext() && covariates.size() > 0) { + final Covariate[] nextCovariates = rit.next().getRequestedCovariates(); + final Set nextCovariateNames = new LinkedHashSet(nextCovariates.length); + for (final Covariate nc : nextCovariates) { + nextCovariateNames.add(nc.getClass().getSimpleName()); + } + final Iterator cit = covariates.iterator(); + while (cit.hasNext()) { + if (!nextCovariateNames.contains(cit.next().getClass().getSimpleName())) { + cit.remove(); + } + } + } + writeCsv(out, reports, covariates.toArray(new Covariate[covariates.size()])); + } + } + + /** + * Print out a collection of reports into a file in Csv format in a way + * that can be used by R scripts (such as the plot generator script). + * + * @param out + * @param reports map where keys are the unique 'mode' (ORIGINAL, RECALIBRATED, ...) + * of each report and the corresponding value the report itself. + * @param c the covariates to print out. + * @throws FileNotFoundException if out could not be created anew. + */ + private static void writeCsv(final File out, + final Map reports, final Covariate[] c) + throws FileNotFoundException { + final CsvPrinter p = csvPrinter(out,c); + for (Map.Entry e : reports.entrySet()) { + p.print(e.getValue(),e.getKey()); + } + p.close(); + } + public enum SOLID_RECAL_MODE { /** * Treat reference inserted bases as reference matching bases. Very unsafe! @@ -390,6 +532,24 @@ public class RecalUtils { report.print(outputFile); } + /** s + * Write recalibration plots into a file + * + * @param csvFile location of the intermediary file + * @param exampleReportFile where the report arguments are collected from. + * @param output result plot file name. + */ + public static void generatePlots(final File csvFile, final File exampleReportFile, final File output) { + final RScriptExecutor executor = new RScriptExecutor(); + executor.setExceptOnError(true); + executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class)); + executor.addArgs(csvFile.getAbsolutePath()); + executor.addArgs(exampleReportFile.getAbsolutePath()); + executor.addArgs(output.getAbsolutePath()); + Logger.getLogger(RecalUtils.class).debug("R command line: " + executor.getApproximateCommandLine()); + executor.exec(); + } + private static void outputRecalibrationPlot(final RecalibrationArgumentCollection RAC) { final RScriptExecutor executor = new RScriptExecutor(); @@ -452,18 +612,7 @@ public class RecalUtils { // output the csv file if (printHeader) { - final List header = new LinkedList(); - header.add("ReadGroup"); - header.add("CovariateValue"); - header.add("CovariateName"); - header.add("EventType"); - header.add("Observations"); - header.add("Errors"); - header.add("EmpiricalQuality"); - header.add("AverageReportedQuality"); - header.add("Accuracy"); - header.add("Recalibration"); - deltaTableFile.println(Utils.join(",", header)); + printHeader(deltaTableFile); } final Map covariateNameMap = new HashMap(requestedCovariates.length); @@ -480,6 +629,21 @@ public class RecalUtils { } } + private static void printHeader(PrintStream out) { + final List header = new LinkedList(); + header.add("ReadGroup"); + header.add("CovariateValue"); + header.add("CovariateName"); + header.add("EventType"); + header.add("Observations"); + header.add("Errors"); + header.add("EmpiricalQuality"); + header.add("AverageReportedQuality"); + header.add("Accuracy"); + header.add("Recalibration"); + out.println(Utils.join(",", header)); + } + /* * Return an initialized nested integer array with appropriate dimensions for use with the delta tables * diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java index ea45c2abf..ed9afa733 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java @@ -369,6 +369,11 @@ public class RecalibrationReport { return RAC; } + /** + * + * @deprecated use {@link #getRequestedCovariates()} instead. + */ + @Deprecated public Covariate[] getCovariates() { return requestedCovariates; } diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java index 4fc9470f4..79ffa50a3 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java @@ -67,6 +67,8 @@ import java.util.ArrayList; public class ContextCovariate implements StandardCovariate { private final static Logger logger = Logger.getLogger(ContextCovariate.class); + + private int mismatchesContextSize; private int indelsContextSize; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/AnalyzeCovariatesIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/AnalyzeCovariatesIntegrationTest.java new file mode 100644 index 000000000..8c327efc0 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/AnalyzeCovariatesIntegrationTest.java @@ -0,0 +1,362 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Method; +import java.util.*; + +import static org.testng.Assert.assertTrue; + +/** + * Tests Analyze Covariates. + *

+ * Notice that since PDF report generated by R are different every-time this program + * is executed their content won't be tested. It only will verify that file has a healthy size. + * + */ +public class AnalyzeCovariatesIntegrationTest extends WalkerTest { + + private static final String TOOL_NAME = AnalyzeCovariates.class.getSimpleName(); + + /** + * Directory where the testdata is located. + */ + private static final File TEST_DATA_DIR = new File(privateTestDir,"AnalyzeCovariates"); + + /** + * File containing the before report for normal testing. + */ + private static final File BEFORE_FILE = new File(TEST_DATA_DIR,"before.grp"); + + /** + * File containing the after report for normal testing. + */ + private static final File AFTER_FILE = new File(TEST_DATA_DIR,"after.grp"); + + + /** + * File containing the bqsr report for normal testing. + */ + private static final File BQSR_FILE = new File(TEST_DATA_DIR,"bqsr.grp"); + + /** + * Test the content of the generated csv file. + * + * @throws IOException should never happen. It would be an indicator of a + * problem with the testing environment. + */ + @Test(enabled = true) + public void testCsvGeneration() + throws IOException { + + final WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine("%s",null,true,true,true), + Collections.singletonList("106709d32e6f0a0a9dd6a6340ec246ab")); + executeTest("testCsvGeneration",spec); + } + + + /** + * Test the size of the generated pdf. + *

+ * Unfortunately we cannot test the content as it changes slightly + * every time the tool is run. + * + * @throws IOException should never happen. It would be an + * indicator of a problem with the testing environment. + */ + @Test(enabled = true) + public void testPdfGeneration() + throws IOException { + final File pdfFile = File.createTempFile("ACTest",".pdf"); + pdfFile.delete(); + pdfFile.deleteOnExit(); + + final List md5 = Collections.emptyList(); + final WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine(null,pdfFile.toString(),true,true,true),md5); + executeTest("testPdfGeneration",spec); + assertTrue(pdfFile.exists(),"the pdf file was not created"); + assertTrue(pdfFile.length() > 260000,"the pdf file size does" + + " not reach the minimum of 260Kb"); + } + + /** + * Test the effect of changing some recalibration parameters. + * @param afterFileName name of the alternative after recalibration file. + * @param description describes what has been changed. + * @throws IOException should never happen. It would be an + * indicator of a problem with the testing environment. + */ + @Test(enabled = true, dataProvider="alternativeAfterFileProvider") + public void testParameterChangeException(final String afterFileName, + final String description) + throws IOException { + + final File pdfFile = File.createTempFile("ACTest",".pdf"); + pdfFile.deleteOnExit(); + final List md5 = Collections.emptyList(); + final File afterFile = new File(TEST_DATA_DIR,afterFileName); + final WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine(null,"%s",true,true,afterFile), + 1,UserException.class); + executeTest("testParameterChangeException - " + description, spec); + } + + + /** + * Test combinations of input and output inclusion exclusion of the command + * line that cause an exception to be thrown. + * + * @param useCsvFile whether to include the output csv file. + * @param usePdfFile whether to include the output pdf file. + * @param useBQSRFile whether to include the -BQSR input file. + * @param useBeforeFile whether to include the -before input file. + * @param useAfterFile whether to include the -after input file. + * @throws IOException never thrown, unless there is a problem with the testing environment. + */ + @Test(enabled = true, dataProvider="alternativeInOutAbsenceCombinations") + public void testInOutAbsenceException(final boolean useCsvFile, final boolean usePdfFile, + final boolean useBQSRFile, final boolean useBeforeFile, final boolean useAfterFile) + throws IOException { + final WalkerTestSpec spec = new WalkerTestSpec(buildCommandLine(useCsvFile,usePdfFile, + useBQSRFile,useBeforeFile,useAfterFile),0,UserException.class); + executeTest("testInOutAbsencePresenceException", spec); + } + + /** + * Test combinations of input and output inclusion exclusion of the + * command line that won't cause an exception. + * + * @param useCsvFile whether to include the output csv file. + * @param usePdfFile whether to include the output pdf file. + * @param useBQSRFile whether to include the -BQSR input file. + * @param useBeforeFile whether to include the -before input file. + * @param useAfterFile whether to include the -after input file. + * @throws IOException never thrown, unless there is a problem with the testing environment. + */ + @Test(enabled = true, dataProvider="alternativeInOutAbsenceCombinations") + public void testInOutAbsence(final boolean useCsvFile, final boolean usePdfFile, + final boolean useBQSRFile, final boolean useBeforeFile, final boolean useAfterFile) + throws IOException { + final List md5 = Collections.emptyList(); + final WalkerTestSpec spec = new WalkerTestSpec(buildCommandLine(useCsvFile,usePdfFile, + useBQSRFile,useBeforeFile,useAfterFile),md5); + executeTest("testInOutAbsencePresence", spec); + } + + + + @DataProvider + public Iterator alternativeInOutAbsenceCombinations(Method m) { + List result = new LinkedList(); + if (m.getName().endsWith("Exception")) { + result.add(new Object[] { false, false, true, true, true }); + result.add(new Object[] { true, true, false, false ,false}); + } + else { + result.add(new Object[] { true, true, true, false, false }); + result.add(new Object[] { true, true, false, true, false }); + result.add(new Object[] { true, true, false, false, true }); + result.add(new Object[] { true, false,false, true, false }); + result.add(new Object[] { false, true, true, false, false }); + + } + return result.iterator(); + } + + /** + * Provide recalibration parameter change data to relevant tests. + * @param m target test method. + * @return never null. + */ + @DataProvider + public Iterator alternativeAfterFileProvider (Method m) { + final boolean expectsException = m.getName().endsWith("Exception"); + final List result = new LinkedList(); + for (final Object[] data : DIFFERENT_PARAMETERS_AFTER_FILES) { + if (data[1].equals(expectsException)) { + result.add(new Object[] { data[0], data[2] }); + } + } + return result.iterator(); + } + + /** + * Triplets < alfter-grp-file, whether it should fail, what is different > + */ + private final Object[][] DIFFERENT_PARAMETERS_AFTER_FILES = { + {"after-cov.grp", true, "Adds additional covaraite: repeat-length"}, + {"after-dpSOLID.grp", true, "Change the default platform to SOLID"}, + {"after-noDp.grp",true, "Unset the default platform"}, + {"after-mcs4grp", true, "Changed -mcs parameter from 2 to 4"} + }; + + /** + * Build the AC command line given what combinations of input and output files should be included. + * + * @param useCsvFile whether to include the output csv file. + * @param usePdfFile whether to include the output pdf file. + * @param useBQSRFile whether to include the -BQSR input file. + * @param useBeforeFile whether to include the -before input file. + * @param useAfterFile whether to include the -after input file. + * @return never null. + * @throws IOException never thrown, unless there is a problem with the testing environment. + */ + private String buildCommandLine(final boolean useCsvFile, final boolean usePdfFile, + final boolean useBQSRFile, final boolean useBeforeFile, final boolean useAfterFile) + throws IOException { + + final File csvFile = useCsvFile ? File.createTempFile("ACTest",".csv") : null; + final File pdfFile = usePdfFile ? File.createTempFile("ACTest",".pdf") : null; + + if (csvFile != null) { + csvFile.deleteOnExit(); + } + + if (pdfFile != null) { + pdfFile.deleteOnExit(); + } + + return buildCommandLine(csvFile == null ? null : csvFile.toString(), + pdfFile == null ? null : pdfFile.toString(), + useBQSRFile,useBeforeFile,useAfterFile); + } + + /** + * Build the AC command line given the output file names explicitly and what test input files to use. + *

+ * + * @param csvFileName the csv output file, null if none should be provided. + * @param pdfFileName the plots output file, null if none should be provided. + * @param useBQSRFile whether to include the -BQSR input file. + * @param useBeforeFile whether to include the -before input file. + * @param useAfterFile whether to include the -after input file. + * + * @return never null. + */ + private String buildCommandLine(final String csvFileName, final String pdfFileName, final boolean useBQSRFile, + final boolean useBeforeFile, final boolean useAfterFile) { + return buildCommandLine(csvFileName,pdfFileName,useBQSRFile ? BQSR_FILE : null, + useBeforeFile ? BEFORE_FILE : null, + useAfterFile ? AFTER_FILE : null); + } + + /** + * Build the AC command line given the output file names and the after file name explicitly and what other + * test input files to use. + *

+ * + * @param csvFileName the csv output file, null if none should be provided. + * @param pdfFileName the plots output file, null if none should be provided. + * @param useBQSRFile whether to include the -BQSR input file. + * @param useBeforeFile whether to include the -before input file. + * @param afterFile the after input report file, null if none should be provided. + * + * @return never null. + */ + private String buildCommandLine(final String csvFileName, final String pdfFileName, final boolean useBQSRFile, + final boolean useBeforeFile, final File afterFile) { + return buildCommandLine(csvFileName,pdfFileName,useBQSRFile ? BQSR_FILE : null, + useBeforeFile ? BEFORE_FILE : null, + afterFile); + } + + /** + * Build the AC command line given the output file names and the after file name explicitly and what other + * test input files to use. + *

+ * + * @param csvFileName the csv output file, null if none should be provided. + * @param pdfFileName the plots output file, null if none should be provided. + * @param bqsrFile the BQSR input report file, null if none should be provided. + * @param beforeFile the before input report file, null if non should be provided. + * @param afterFile the after input report file, null if none should be provided. + * + * @return never null. + */ + private String buildCommandLine(final String csvFileName, final String pdfFileName, final File bqsrFile, + final File beforeFile, final File afterFile) { + + final List args = new LinkedList(); + args.add("-T"); + args.add(TOOL_NAME); + args.add("-R"); + args.add(hg19Reference); + args.add("-ignoreLMT"); + + if (csvFileName != null) { + args.add("-" + AnalyzeCovariates.CSV_ARG_SHORT_NAME); + args.add("'" + csvFileName + "'"); + } + if (pdfFileName != null) { + args.add("-" + AnalyzeCovariates.PDF_ARG_SHORT_NAME); + args.add("'" + pdfFileName + "'"); + } + if (bqsrFile != null) { + args.add("-BQSR"); + args.add("'" + bqsrFile.getAbsoluteFile().toString() + "'"); + } + if (beforeFile != null) { + args.add("-" + AnalyzeCovariates.BEFORE_ARG_SHORT_NAME); + args.add("'" + beforeFile.getAbsolutePath().toString() + "'"); + } + if (afterFile != null) { + args.add("-" + AnalyzeCovariates.AFTER_ARG_SHORT_NAME); + args.add("'" + afterFile.getAbsolutePath().toString() + "'"); + } + return Utils.join(" ", args); + + } +} diff --git a/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R index 8a9eecf48..bc53e29dc 100644 --- a/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R +++ b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R @@ -12,7 +12,27 @@ if ( interactive() ) { args <- commandArgs(TRUE) } data <- read.csv(args[1]) + +data$Recalibration = as.factor(sapply(as.character(data$Recalibration),function(x) { + xu = toupper(x); + if (xu == "ORIGINAL") "BEFORE" else + if (xu == "RECALIBRATED") "AFTER" else + if (xu == "RECALIBRATION") "BQSR" else + xu })); + gsa.report <- gsa.read.gatkreport(args[2]) + +gsa.report$Arguments$Value = as.character(gsa.report$Arguments$Value); +gsa.report$Arguments = subset(gsa.report$Arguments,subset= Argument != "plot_pdf_file"); +if (length(levels(data$Recalibration)) > 1) { + gsa.report$Arguments = subset(gsa.report$Arguments,subset= Argument != "recalibration_report"); +} +gsa.report$Arguments$Value[gsa.report$Argument$Value == "null"] = "None"; + +gsa.report.covariate.argnum = gsa.report$Arguments$Argument == "covariate"; +gsa.report$Arguments$Value[gsa.report.covariate.argnum] = sapply(strsplit(gsa.report$Arguments$Value[gsa.report.covariate.argnum],","),function(x) { + y = sub("(^.+)Covariate","\\1",x); paste(y,collapse=",") } ); + data <- within(data, EventType <- factor(EventType, levels = rev(levels(EventType)))) numRG = length(unique(data$ReadGroup)) @@ -54,31 +74,31 @@ for(cov in levels(data$CovariateName)) { # for each covariate in turn d=rbind(dSub, dIns, dDel) if( cov != "QualityScore" ) { - p <- ggplot(d, aes(x=CovariateValue,y=Accuracy,alpha=log10(Observations))) + + p <- ggplot(d, aes(x=CovariateValue,y=Accuracy,alpha=log10(Observations))) + ylim(min(-10,d$Accuracy),max(10,d$Accuracy)) + geom_abline(intercept=0, slope=0, linetype=2) + xlab(paste(cov,"Covariate")) + ylab("Quality Score Accuracy") + blankTheme if(cov == "Cycle") { - b <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) + + b <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("BEFORE"="maroon1","AFTER"="blue","BQSR"="black")) + facet_grid(.~EventType) + opts(axis.text.x=theme_text(angle=90, hjust=0)) p <- ggplot(d, aes(x=CovariateValue,y=AverageReportedQuality,alpha=log10(Observations))) + xlab(paste(cov,"Covariate")) + - ylab("Mean Quality Score") + + ylab("Mean Quality Score") + ylim(0,max(42,d$AverageReportedQuality)); blankTheme - e <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) + + e <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("BEFORE"="maroon1","AFTER"="blue","BQSR"="black")) + facet_grid(.~EventType) + opts(axis.text.x=theme_text(angle=90, hjust=0)) } else { - c <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) + + c <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("BEFORE"="maroon1","AFTER"="blue","BQSR"="black")) + facet_grid(.~EventType) + opts(axis.text.x=theme_text(angle=90, hjust=0)) + xlab(paste(cov,"Covariate (3 base suffix)")) p <- ggplot(d, aes(x=CovariateValue,y=AverageReportedQuality,alpha=log10(Observations))) + xlab(paste(cov,"Covariate (3 base suffix)")) + ylab("Mean Quality Score") + blankTheme - f <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) + + f <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("BEFORE"="maroon1","AFTER"="blue","BQSR"="black")) + facet_grid(.~EventType) + opts(axis.text.x=theme_text(angle=90, hjust=0)) } @@ -88,14 +108,14 @@ for(cov in levels(data$CovariateName)) { # for each covariate in turn xlab("Reported Quality Score") + ylab("Empirical Quality Score") + blankTheme - a <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) + a <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("BEFORE"="maroon1","AFTER"="blue","BQSR"="black")) + facet_grid(.~EventType) p <- ggplot(d, aes(x=CovariateValue)) + xlab(paste(cov,"Covariate")) + ylab("No. of Observations (area normalized)") + blankTheme d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations,y=..ndensity..),alpha=0.6,binwidth=1,position="identity") - d <- d + scale_fill_manual(values=c("maroon1","blue")) + d <- d + scale_fill_manual(values=c("BEFORE"="maroon1","AFTER"="blue","BQSR"="black")) d <- d + facet_grid(.~EventType) # d <- d + scale_y_continuous(formatter="comma") } diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index 73a538ee5..75bd6a3d1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -683,6 +683,36 @@ public class Utils { return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom)); } + /** + * Adds element from an array into a collection. + * + * In the event of exception being throw due to some element, dest might have been modified by + * the successful addition of element before that one. + * + * @param dest the destination collection which cannot be null and should be able to accept + * the input elements. + * @param elements the element to add to dest + * @param collection type element. + * @throws UnsupportedOperationException if the add operation + * is not supported by dest. + * @throws ClassCastException if the class of any of the elements + * prevents it from being added to dest. + * @throws NullPointerException if any of the elements is null and dest + * does not permit null elements + * @throws IllegalArgumentException if some property of any of the elements + * prevents it from being added to this collection + * @throws IllegalStateException if any of the elements cannot be added at this + * time due to insertion restrictions. + * @return true if the collection was modified as a result. + */ + public static boolean addAll(Collection dest, T ... elements) { + boolean result = false; + for (final T e : elements) { + result = dest.add(e) | result; + } + return result; + } + /** * Create a constant map that maps each value in values to itself */ From 1f8282633beed08027da4aa4525464041b309c83 Mon Sep 17 00:00:00 2001 From: Valentin Ruano-Rubio Date: Wed, 19 Jun 2013 11:44:18 -0400 Subject: [PATCH 114/116] Removed plots generation from the BaseRecalibration software Improved AnalyzeCovariates (AC) integration test. Renamed AC test files ending with .grp to .table Implementation: * Removed RECAL_PDF/CSV_FILE from RecalibrationArgumentCollection (RAC). Updated rest of the code accordingly. * Fixed BQSRIntegrationTest to work with new changes --- .../gatk/walkers/bqsr/AnalyzeCovariates.java | 2 +- .../sting/gatk/walkers/bqsr/BQSRGatherer.java | 12 ----- .../gatk/walkers/bqsr/BaseRecalibrator.java | 20 -------- .../bqsr/RecalibrationArgumentCollection.java | 17 ------- .../sting/utils/recalibration/RecalUtils.java | 38 ++++++++++----- .../recalibration/RecalibrationReport.java | 3 -- .../AnalyzeCovariatesIntegrationTest.java | 16 +++---- .../walkers/bqsr/BQSRIntegrationTest.java | 48 +++++++------------ .../sting/utils/recalibration/BQSR.R | 2 +- .../sting/utils/exceptions/UserException.java | 6 +++ 10 files changed, 57 insertions(+), 107 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AnalyzeCovariates.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AnalyzeCovariates.java index b6f911753..7a7527dd1 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AnalyzeCovariates.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AnalyzeCovariates.java @@ -432,7 +432,7 @@ public final class AnalyzeCovariates extends RodWalker diffs = exampleEntry.getValue().getRAC().compareReportArguments( reportEntries[i].getValue().getRAC(),exampleEntry.getKey(),reportEntries[i].getKey()); if (diffs.size() != 0) { - throw new UserException("There are differences in relevant arguments of" + throw new UserException.IncompatibleRecalibrationTableParameters("There are differences in relevant arguments of" + " two or more input recalibration reports. Please make sure" + " they have been created using the same recalibration parameters." + " " + Utils.join("// ", reportDifferencesStringArray(diffs))); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java index 7727c2dac..d6f0e16e8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java @@ -92,18 +92,6 @@ public class BQSRGatherer extends Gatherer { generalReport.calculateQuantizedQualities(); - RecalibrationArgumentCollection RAC = generalReport.getRAC(); - if ( RAC.RECAL_PDF_FILE != null ) { - RAC.RECAL_TABLE_FILE = output; - if ( RAC.existingRecalibrationReport != null ) { - final RecalibrationReport originalReport = new RecalibrationReport(RAC.existingRecalibrationReport); - RecalUtils.generateRecalibrationPlot(RAC, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getRequestedCovariates()); - } - else { - RecalUtils.generateRecalibrationPlot(RAC, generalReport.getRecalibrationTables(), generalReport.getRequestedCovariates()); - } - } - generalReport.output(outputFile); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index 41d3f3991..3882b70fa 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -180,11 +180,6 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche public void initialize() { baq = new BAQ(BAQGOP); // setup the BAQ object with the provided gap open penalty - if (RAC.RECAL_PDF_FILE != null) { - Utils.warnUser("This is not the recommended way to generate recalibration plots any longer and will be" - + " discontinued soon in future releases. Please use the 'AnalyzeCovariates' tool instead from now one"); - } - if (RAC.FORCE_PLATFORM != null) RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; @@ -522,11 +517,6 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche generateReport(); logger.info("...done!"); - if ( RAC.RECAL_PDF_FILE != null ) { - logger.info("Generating recalibration plots..."); - generatePlots(); - } - logger.info("BaseRecalibrator was able to recalibrate " + result + " reads"); } @@ -534,16 +524,6 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche return recalibrationEngine.getFinalRecalibrationTables(); } - private void generatePlots() { - File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE; - if (recalFile != null) { - RecalibrationReport report = new RecalibrationReport(recalFile); - RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), getRecalibrationTable(), requestedCovariates); - } - else - RecalUtils.generateRecalibrationPlot(RAC, getRecalibrationTable(), requestedCovariates); - } - /** * go through the quality score table and use the # observations and the empirical quality score * to build a quality score histogram for quantization. Then use the QuantizeQual algorithm to diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index c1ecb2320..b9f16132c 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -89,21 +89,6 @@ public class RecalibrationArgumentCollection implements Cloneable { public File RECAL_TABLE_FILE = null; public PrintStream RECAL_TABLE; - /** - * If not provided, then no plots will be generated (useful for queue scatter/gathering). - * However, we *highly* recommend that users generate these plots whenever possible for QC checking. - */ - @Output(fullName = "plot_pdf_file", shortName = "plots", doc = "The output recalibration pdf file to create", required = false, defaultToStdout = false) - public File RECAL_PDF_FILE = null; - - /** - * If not provided, then a temporary file is created and then deleted upon completion. - * For advanced users only. - */ - @Advanced - @Argument(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false) - public File RECAL_CSV_FILE = null; - /** * Note that the --list argument requires a fully resolved and correct command-line to work. */ @@ -284,8 +269,6 @@ public class RecalibrationArgumentCollection implements Cloneable { argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS); argumentsTable.addRowID("recalibration_report", true); argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, existingRecalibrationReport == null ? "null" : existingRecalibrationReport.getAbsolutePath()); - argumentsTable.addRowID("plot_pdf_file", true); - argumentsTable.set("plot_pdf_file", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RECAL_PDF_FILE == null ? "null" : RECAL_PDF_FILE.getAbsolutePath()); argumentsTable.addRowID("binary_tag_name", true); argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME); return argumentsTable; diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index 8908ce4a4..56f7e8257 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -550,36 +550,48 @@ public class RecalUtils { executor.exec(); } - private static void outputRecalibrationPlot(final RecalibrationArgumentCollection RAC) { + private static void outputRecalibrationPlot(final File csvFile, final RecalibrationArgumentCollection RAC) { final RScriptExecutor executor = new RScriptExecutor(); executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class)); - executor.addArgs(RAC.RECAL_CSV_FILE.getAbsolutePath()); + executor.addArgs(csvFile.getAbsolutePath()); executor.addArgs(RAC.RECAL_TABLE_FILE.getAbsolutePath()); - executor.addArgs(RAC.RECAL_PDF_FILE.getAbsolutePath()); executor.exec(); } + /** + * Please use {@link #generateCsv(java.io.File, java.util.Map)} and {@link #generatePlots(java.io.File, java.io.File, java.io.File)} instead. + * + * @deprecated + */ + @Deprecated public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final Covariate[] requestedCovariates) { generateRecalibrationPlot(RAC, original, null, requestedCovariates); } + /** + * Please use {@link #generateCsv(java.io.File, java.util.Map)} and {@link #generatePlots(java.io.File, java.io.File, java.io.File)} instead. + * + * @deprecated + */ + @Deprecated public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates) { - final PrintStream csvFile; + final PrintStream csvStream; + final File csvTempFile = null; try { - if ( RAC.RECAL_CSV_FILE == null ) { - RAC.RECAL_CSV_FILE = File.createTempFile("BQSR", ".csv"); - RAC.RECAL_CSV_FILE.deleteOnExit(); - } - csvFile = new PrintStream(RAC.RECAL_CSV_FILE); + File csvTmpFile = File.createTempFile("BQSR",".csv"); + csvTmpFile.deleteOnExit(); + csvStream = new PrintStream(csvTmpFile); } catch (IOException e) { - throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_CSV_FILE, e); + throw new UserException("Could not create temporary csv file", e); } if ( recalibrated != null ) - writeCSV(csvFile, recalibrated, "RECALIBRATED", requestedCovariates, true); - writeCSV(csvFile, original, "ORIGINAL", requestedCovariates, recalibrated == null); - outputRecalibrationPlot(RAC); + writeCSV(csvStream, recalibrated, "RECALIBRATED", requestedCovariates, true); + writeCSV(csvStream, original, "ORIGINAL", requestedCovariates, recalibrated == null); + csvStream.close(); + outputRecalibrationPlot(csvTempFile, RAC); + csvTempFile.delete(); } private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) { diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java index ed9afa733..091b5ecf0 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java @@ -340,9 +340,6 @@ public class RecalibrationReport { else if (argument.equals("recalibration_report")) RAC.existingRecalibrationReport = (value == null) ? null : new File((String) value); - else if (argument.equals("plot_pdf_file")) - RAC.RECAL_PDF_FILE = (value == null) ? null : new File((String) value); - else if (argument.equals("binary_tag_name")) RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/AnalyzeCovariatesIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/AnalyzeCovariatesIntegrationTest.java index 8c327efc0..95ce80848 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/AnalyzeCovariatesIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/AnalyzeCovariatesIntegrationTest.java @@ -77,18 +77,18 @@ public class AnalyzeCovariatesIntegrationTest extends WalkerTest { /** * File containing the before report for normal testing. */ - private static final File BEFORE_FILE = new File(TEST_DATA_DIR,"before.grp"); + private static final File BEFORE_FILE = new File(TEST_DATA_DIR,"before.table"); /** * File containing the after report for normal testing. */ - private static final File AFTER_FILE = new File(TEST_DATA_DIR,"after.grp"); + private static final File AFTER_FILE = new File(TEST_DATA_DIR,"after.table"); /** * File containing the bqsr report for normal testing. */ - private static final File BQSR_FILE = new File(TEST_DATA_DIR,"bqsr.grp"); + private static final File BQSR_FILE = new File(TEST_DATA_DIR,"bqsr.table"); /** * Test the content of the generated csv file. @@ -150,7 +150,7 @@ public class AnalyzeCovariatesIntegrationTest extends WalkerTest { final File afterFile = new File(TEST_DATA_DIR,afterFileName); final WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine(null,"%s",true,true,afterFile), - 1,UserException.class); + 1,UserException.IncompatibleRecalibrationTableParameters.class); executeTest("testParameterChangeException - " + description, spec); } @@ -237,10 +237,10 @@ public class AnalyzeCovariatesIntegrationTest extends WalkerTest { * Triplets < alfter-grp-file, whether it should fail, what is different > */ private final Object[][] DIFFERENT_PARAMETERS_AFTER_FILES = { - {"after-cov.grp", true, "Adds additional covaraite: repeat-length"}, - {"after-dpSOLID.grp", true, "Change the default platform to SOLID"}, - {"after-noDp.grp",true, "Unset the default platform"}, - {"after-mcs4grp", true, "Changed -mcs parameter from 2 to 4"} + {"after-cov.table", true, "Adds additional covariate: repeat-length" }, + {"after-dpSOLID.table", true, "Change the default platform to SOLID" }, + {"after-noDp.table",true, "Unset the default platform" }, + {"after-mcs4.table", true, "Changed -mcs parameter from 2 to 4" } }; /** diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index 71c29fe0b..05183a521 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -100,23 +100,23 @@ public class BQSRIntegrationTest extends WalkerTest { @DataProvider(name = "BQSRTest") public Object[][] createBQSRTestData() { return new Object[][]{ - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "61fd466b5e94d2d67e116f6f67c9f939")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "e08b5bcdb64f4beea03730e5631a14ca")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "448a45dc154c95d1387cb5cdddb67071")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "c1e7999e445d51bbe2e775dac5325643")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "a57c16918cdfe12d55a89c21bf195279")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "836dccacf48ccda6b2843d07e8f1ef4d")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "0fb2aedc2f8d66b5821cb570f15a8c4d")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "c9953f020a65c1603a6d71aeeb1b95f3")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "85a120b7d86b61597b86b9e93decbdfc")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "5248dc49aec0323c74b496bb4928c73c")}, - {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "cb52f267e0010f849f50b0bf1de474a1")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "fb372d0a8fc41b01ced1adab31546850")}, - {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "c1c3cda8caceed619d3d439c3990cd26")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "c9953f020a65c1603a6d71aeeb1b95f3")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "5bfff0c699345cca12a9b33acf95588f")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "f805a0020eea987b79f314fa99913806")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "86075d3856eb06816a0dd81af55e421f")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "155802237e1fc7a001398b8f4bcf4b72")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "38c7916cc019fe8d134df67639422b42")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "b74e75f3c5aa90bd21af1e20f2ac8c40")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "e564505aea11464de8ed72890d9ea89a")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "380d8be121ffaddd3461ee0ac3d1a76f")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "0b5a8e259e997e4c7b5836d4c28e6f4d")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "281682124584ab384f23359934df0c3b")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "0a92fdff5fd26227c29d34eda5a32f49")}, + {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "90d8c24077e8ae9a0037a9aad5f09e31")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "c41ef02c640ef1fed4bfc03b9b33b616")}, + {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "b577cd1d529425f66db49620db09fdca")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "0b5a8e259e997e4c7b5836d4c28e6f4d")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "9ad49269c0156f8ab1173261bf23e600")}, // make sure we work with ION torrent bam - {new BQSRTest(b37KGReference, privateTestDir + "iontorrent.bam", "20:10,000,000-10,200,000", "", "7375c7b692e76b651c278a9fb478fa1c")}, + {new BQSRTest(b37KGReference, privateTestDir + "iontorrent.bam", "20:10,000,000-10,200,000", "", "04bfa4760767022e7f5252e6e4432cc1")}, }; } @@ -141,22 +141,6 @@ public class BQSRIntegrationTest extends WalkerTest { executeTest("testBQSRFailWithoutDBSNP", spec); } - @Test - public void testBQSRCSV() { - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - " -T BaseRecalibrator" + - " -R " + b36KGReference + - " -I " + validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam" + - " -knownSites " + b36dbSNP129 + - " -L 1:10,000,000-10,200,000" + - " -o /dev/null" + - " -sortAllCols" + - " --plot_pdf_file /dev/null" + - " --intermediate_csv_file %s", - Arrays.asList("90ad19143024684e3c4410dc8fd2bd9d")); - executeTest("testBQSR-CSVfile", spec); - } - @Test public void testBQSRFailWithSolidNoCall() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( diff --git a/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R index bc53e29dc..b0055dd10 100644 --- a/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R +++ b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R @@ -85,7 +85,7 @@ for(cov in levels(data$CovariateName)) { # for each covariate in turn p <- ggplot(d, aes(x=CovariateValue,y=AverageReportedQuality,alpha=log10(Observations))) + xlab(paste(cov,"Covariate")) + - ylab("Mean Quality Score") + ylim(0,max(42,d$AverageReportedQuality)); + ylab("Mean Quality Score") + ylim(0,max(42,d$AverageReportedQuality)) + blankTheme e <- p + geom_point(aes(color=Recalibration)) + scale_color_manual(values=c("BEFORE"="maroon1","AFTER"="blue","BQSR"="black")) + facet_grid(.~EventType) + opts(axis.text.x=theme_text(angle=90, hjust=0)) diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 0e95fd158..6126116c2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -471,4 +471,10 @@ public class UserException extends ReviewedStingException { super(message,innerException); } } + + public static class IncompatibleRecalibrationTableParameters extends UserException { + public IncompatibleRecalibrationTableParameters(String s) { + super(s); + } + } } From 0672ac50322ccc57fee96ebda3e50480404a665d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 19 Jun 2013 19:42:09 -0400 Subject: [PATCH 115/116] Fix public / protected dependency --- ...rotectedEngineFeaturesIntegrationTest.java | 93 +++++++++++++++++++ .../gatk/EngineFeaturesIntegrationTest.java | 34 ------- 2 files changed, 93 insertions(+), 34 deletions(-) create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/ProtectedEngineFeaturesIntegrationTest.java diff --git a/protected/java/test/org/broadinstitute/sting/gatk/ProtectedEngineFeaturesIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/ProtectedEngineFeaturesIntegrationTest.java new file mode 100644 index 000000000..680706802 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/ProtectedEngineFeaturesIntegrationTest.java @@ -0,0 +1,93 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk; + +import net.sf.samtools.util.BlockCompressedInputStream; +import org.broad.tribble.readers.AsciiLineReader; +import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub; +import org.broadinstitute.variant.vcf.VCFCodec; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFHeaderLine; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileInputStream; +import java.util.Arrays; + +/** + * + */ +public class ProtectedEngineFeaturesIntegrationTest extends WalkerTest { + @Test(enabled = true) + public void testGATKVersionInVCF() throws Exception { + WalkerTestSpec spec = new WalkerTestSpec("-T UnifiedGenotyper -R " + b37KGReference + " -I " + + privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam" + + " -o %s -L 20:10,000,000", + 1, Arrays.asList("")); + final File vcf = executeTest("testGATKVersionInVCF", spec).first.get(0); + final VCFHeader header = (VCFHeader)new VCFCodec().readHeader(new AsciiLineReader(new FileInputStream(vcf))); + final VCFHeaderLine versionLine = header.getMetaDataLine(VariantContextWriterStub.GATK_VERSION_KEY); + Assert.assertNotNull(versionLine); + Assert.assertEquals(versionLine.getValue(), CommandLineGATK.getVersionNumber()); + } + + @Test(enabled = true) + public void testCompressedVCFOutputWithNT() throws Exception { + WalkerTestSpec spec = new WalkerTestSpec("-T UnifiedGenotyper -R " + b37KGReference + " -I " + + privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam" + + " -o %s -L 20:10,000,000-10,100,000 -nt 4", + 1, Arrays.asList("vcf.gz"), Arrays.asList("")); + final File vcf = executeTest("testCompressedVCFOutputWithNT", spec).first.get(0); + final AsciiLineReader reader = new AsciiLineReader(new BlockCompressedInputStream(vcf)); + int nLines = 0; + while ( reader.readLine() != null ) + nLines++; + Assert.assertTrue(nLines > 0); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index c97ab7301..541fb78c0 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -27,13 +27,10 @@ package org.broadinstitute.sting.gatk; import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMRecord; -import net.sf.samtools.util.BlockCompressedInputStream; -import org.broad.tribble.readers.AsciiLineReader; import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; -import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.gatk.walkers.ReadWalker; @@ -42,15 +39,11 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory; -import org.broadinstitute.variant.vcf.VCFCodec; -import org.broadinstitute.variant.vcf.VCFHeader; -import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; -import java.io.FileInputStream; import java.io.PrintStream; import java.util.Arrays; @@ -204,33 +197,6 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { executeTest("badCompress " + compress, spec); } - @Test(enabled = true) - public void testGATKVersionInVCF() throws Exception { - WalkerTestSpec spec = new WalkerTestSpec("-T UnifiedGenotyper -R " + b37KGReference + " -I " - + privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam" - + " -o %s -L 20:10,000,000", - 1, Arrays.asList("")); - final File vcf = executeTest("testGATKVersionInVCF", spec).first.get(0); - final VCFHeader header = (VCFHeader)new VCFCodec().readHeader(new AsciiLineReader(new FileInputStream(vcf))); - final VCFHeaderLine versionLine = header.getMetaDataLine(VariantContextWriterStub.GATK_VERSION_KEY); - Assert.assertNotNull(versionLine); - Assert.assertEquals(versionLine.getValue(), CommandLineGATK.getVersionNumber()); - } - - @Test(enabled = true) - public void testCompressedVCFOutputWithNT() throws Exception { - WalkerTestSpec spec = new WalkerTestSpec("-T UnifiedGenotyper -R " + b37KGReference + " -I " - + privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam" - + " -o %s -L 20:10,000,000-10,100,000 -nt 4", - 1, Arrays.asList("vcf.gz"), Arrays.asList("")); - final File vcf = executeTest("testCompressedVCFOutputWithNT", spec).first.get(0); - final AsciiLineReader reader = new AsciiLineReader(new BlockCompressedInputStream(vcf)); - int nLines = 0; - while ( reader.readLine() != null ) - nLines++; - Assert.assertTrue(nLines > 0); - } - // -------------------------------------------------------------------------------- // // Test that defaultBaseQualities actually works From fdfe4e41d5d8c92fad74f56e654992f3a97ab602 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 20 Jun 2013 11:19:13 -0400 Subject: [PATCH 116/116] Better GATK version and command line output -- Previous version emitted command lines that look like: ##HaplotypeCaller="analysis_type=HaplotypeCaller input_file=[private/testdata/reduced.readNotFullySpanningDeletion.bam] ..." the new version provides additional information on when the GATK was run and the GATK version in a nicer format: ##GATKCommandLine= -- Additionally, the command line options are emitted sequentially in the file, so you can see a running record of how a VCF was produced, such as this example from the integration test: ##GATKCommandLine= ##GATKCommandLine= -- Removed the ProtectedEngineFeaturesIntegrationTest -- Actual unit tests for these features! --- ...rotectedEngineFeaturesIntegrationTest.java | 93 ------------------- .../UnifiedGenotyperIntegrationTest.java | 18 ++++ .../sting/gatk/GenomeAnalysisEngine.java | 8 ++ .../io/stubs/VariantContextWriterStub.java | 36 +------ .../sting/utils/variant/GATKVCFUtils.java | 27 +++++- .../gatk/EngineFeaturesIntegrationTest.java | 54 +++++++++++ .../utils/variant/GATKVCFUtilsUnitTest.java | 86 +++++++++++++++++ 7 files changed, 197 insertions(+), 125 deletions(-) delete mode 100644 protected/java/test/org/broadinstitute/sting/gatk/ProtectedEngineFeaturesIntegrationTest.java create mode 100644 public/java/test/org/broadinstitute/sting/utils/variant/GATKVCFUtilsUnitTest.java diff --git a/protected/java/test/org/broadinstitute/sting/gatk/ProtectedEngineFeaturesIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/ProtectedEngineFeaturesIntegrationTest.java deleted file mode 100644 index 680706802..000000000 --- a/protected/java/test/org/broadinstitute/sting/gatk/ProtectedEngineFeaturesIntegrationTest.java +++ /dev/null @@ -1,93 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012 Broad Institute, Inc. -* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 4. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 5. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 6. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 7. MISCELLANEOUS -* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.sting.gatk; - -import net.sf.samtools.util.BlockCompressedInputStream; -import org.broad.tribble.readers.AsciiLineReader; -import org.broadinstitute.sting.WalkerTest; -import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub; -import org.broadinstitute.variant.vcf.VCFCodec; -import org.broadinstitute.variant.vcf.VCFHeader; -import org.broadinstitute.variant.vcf.VCFHeaderLine; -import org.testng.Assert; -import org.testng.annotations.Test; - -import java.io.File; -import java.io.FileInputStream; -import java.util.Arrays; - -/** - * - */ -public class ProtectedEngineFeaturesIntegrationTest extends WalkerTest { - @Test(enabled = true) - public void testGATKVersionInVCF() throws Exception { - WalkerTestSpec spec = new WalkerTestSpec("-T UnifiedGenotyper -R " + b37KGReference + " -I " - + privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam" - + " -o %s -L 20:10,000,000", - 1, Arrays.asList("")); - final File vcf = executeTest("testGATKVersionInVCF", spec).first.get(0); - final VCFHeader header = (VCFHeader)new VCFCodec().readHeader(new AsciiLineReader(new FileInputStream(vcf))); - final VCFHeaderLine versionLine = header.getMetaDataLine(VariantContextWriterStub.GATK_VERSION_KEY); - Assert.assertNotNull(versionLine); - Assert.assertEquals(versionLine.getValue(), CommandLineGATK.getVersionNumber()); - } - - @Test(enabled = true) - public void testCompressedVCFOutputWithNT() throws Exception { - WalkerTestSpec spec = new WalkerTestSpec("-T UnifiedGenotyper -R " + b37KGReference + " -I " - + privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam" - + " -o %s -L 20:10,000,000-10,100,000 -nt 4", - 1, Arrays.asList("vcf.gz"), Arrays.asList("")); - final File vcf = executeTest("testCompressedVCFOutputWithNT", spec).first.get(0); - final AsciiLineReader reader = new AsciiLineReader(new BlockCompressedInputStream(vcf)); - int nLines = 0; - while ( reader.readLine() != null ) - nLines++; - Assert.assertTrue(nLines > 0); - } -} \ No newline at end of file diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 3eb9b4e1c..532982853 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -46,11 +46,15 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import net.sf.samtools.util.BlockCompressedInputStream; +import org.broad.tribble.readers.AsciiLineReader; import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.Assert; import org.testng.annotations.Test; +import java.io.File; import java.util.Arrays; import java.util.Collections; @@ -302,4 +306,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { executeTest("test calling on reads with Ns in CIGAR", spec); } + + @Test(enabled = true) + public void testCompressedVCFOutputWithNT() throws Exception { + WalkerTestSpec spec = new WalkerTestSpec("-T UnifiedGenotyper -R " + b37KGReference + " -I " + + privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam" + + " -o %s -L 20:10,000,000-10,100,000 -nt 4", + 1, Arrays.asList("vcf.gz"), Arrays.asList("")); + final File vcf = executeTest("testCompressedVCFOutputWithNT", spec).first.get(0); + final AsciiLineReader reader = new AsciiLineReader(new BlockCompressedInputStream(vcf)); + int nLines = 0; + while ( reader.readLine() != null ) + nLines++; + Assert.assertTrue(nLines > 0); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 6fa1b741c..c4f1a286d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -170,6 +170,14 @@ public class GenomeAnalysisEngine { this.walker = walker; } + /** + * The short name of the current GATK walker as a string + * @return a non-null String + */ + public String getWalkerName() { + return getWalkerName(walker.getClass()); + } + /** * A processed collection of SAM reader identifiers. */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java index 8b7c4282b..3e3d6de41 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java @@ -26,18 +26,15 @@ package org.broadinstitute.sting.gatk.io.stubs; import net.sf.samtools.SAMSequenceDictionary; -import org.broadinstitute.sting.gatk.CommandLineExecutable; -import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.OutputTracker; -import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; -import org.broadinstitute.variant.vcf.VCFHeader; -import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.writer.Options; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFHeaderLine; import java.io.File; import java.io.OutputStream; @@ -54,7 +51,6 @@ import java.util.List; * @version 0.1 */ public class VariantContextWriterStub implements Stub, VariantContextWriter { - public final static String GATK_VERSION_KEY = "GATKVersion"; public final static boolean UPDATE_CONTIG_HEADERS = true; /** @@ -227,14 +223,9 @@ public class VariantContextWriterStub implements Stub, Var if ( header.isWriteEngineHeaders() ) { // skip writing the command line header if requested if ( ! skipWritingCommandLineHeader && header.isWriteCommandLine() ) { - // write the GATK version if we have command line information enabled - vcfHeader.addMetaDataLine(getGATKVersionHeaderLine()); - - // Check for the command-line argument header line. If not present, add it in. - final VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine(); - final boolean foundCommandLineHeaderLine = vcfHeader.getMetaDataLine(commandLineArgHeaderLine.getKey()) != null; - if ( ! foundCommandLineHeaderLine ) - vcfHeader.addMetaDataLine(commandLineArgHeaderLine); + // Always add the header line, as the current format allows multiple entries + final VCFHeaderLine commandLineArgHeaderLine = GATKVCFUtils.getCommandLineArgumentHeaderLine(engine, argumentSources); + vcfHeader.addMetaDataLine(commandLineArgHeaderLine); } if ( UPDATE_CONTIG_HEADERS ) @@ -280,21 +271,4 @@ public class VariantContextWriterStub implements Stub, Var getOutputFile() != null && // that are going to disk engine.getArguments().generateShadowBCF; // and we actually want to do it } - - /** - * Gets the appropriately formatted header for a VCF file - * @return VCF file header. - */ - private VCFHeaderLine getCommandLineArgumentHeaderLine() { - CommandLineExecutable executable = JVMUtils.getObjectOfType(argumentSources,CommandLineExecutable.class); - return new VCFHeaderLine(executable.getAnalysisName(), "\"" + engine.createApproximateCommandLineArgumentString(argumentSources.toArray()) + "\""); - } - - /** - * Gets the GATK version header line for the VCF file - * @return non-null VCFHeaderLine. - */ - private VCFHeaderLine getGATKVersionHeaderLine() { - return new VCFHeaderLine(GATK_VERSION_KEY, CommandLineGATK.getVersionNumber()); - } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java index aa2e92559..09db585a6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java @@ -30,10 +30,10 @@ import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureCodecHeader; import org.broad.tribble.readers.PositionalBufferedStream; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.variant.bcf2.BCF2Codec; import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.vcf.*; @@ -52,6 +52,31 @@ public class GATKVCFUtils { */ private GATKVCFUtils() { } + public final static String GATK_COMMAND_LINE_KEY = "GATKCommandLine"; + + /** + * Gets the appropriately formatted header for a VCF file describing this GATK run + * + * @param engine the GATK engine that holds the walker name, GATK version, and other information + * @param argumentSources contains information on the argument values provided to the GATK for converting to a + * command line string. Should be provided from the data in the parsing engine. Can be + * empty in which case the command line will be the empty string. + * @return VCF header line describing this run of the GATK. + */ + public static VCFHeaderLine getCommandLineArgumentHeaderLine(final GenomeAnalysisEngine engine, final Collection argumentSources) { + if ( engine == null ) throw new IllegalArgumentException("engine cannot be null"); + if ( argumentSources == null ) throw new IllegalArgumentException("argumentSources cannot be null"); + + final Map attributes = new LinkedHashMap<>(); + attributes.put("ID", engine.getWalkerName()); + attributes.put("Version", CommandLineGATK.getVersionNumber()); + final Date date = new Date(); + attributes.put("Date", date.toString()); + attributes.put("Epoch", Long.toString(date.getTime())); + attributes.put("CommandLineOptions", engine.createApproximateCommandLineArgumentString(argumentSources.toArray())); + return new VCFSimpleHeaderLine(GATK_COMMAND_LINE_KEY, attributes, Collections.emptyList()); + } + public static Map getVCFHeadersFromRods(GenomeAnalysisEngine toolkit, List> rodBindings) { // Collect the eval rod names final Set names = new TreeSet(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index 541fb78c0..aca6cf984 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk; import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMRecord; +import org.broad.tribble.readers.AsciiLineReader; import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -39,11 +40,16 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.VCFCodec; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; +import java.io.FileInputStream; import java.io.PrintStream; import java.util.Arrays; @@ -197,6 +203,54 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { executeTest("badCompress " + compress, spec); } + // -------------------------------------------------------------------------------- + // + // Test that the VCF version key is what we expect + // + // -------------------------------------------------------------------------------- + @Test(enabled = true) + public void testGATKVersionInVCF() throws Exception { + WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -R " + b37KGReference + + " -V " + privateTestDir + "NA12878.WGS.b37.chr20.firstMB.vcf" + + " -o %s -L 20:61098", + 1, Arrays.asList("")); + spec.disableShadowBCF(); + final File vcf = executeTest("testGATKVersionInVCF", spec).first.get(0); + final VCFHeader header = (VCFHeader)new VCFCodec().readHeader(new AsciiLineReader(new FileInputStream(vcf))); + final VCFHeaderLine versionLine = header.getMetaDataLine(GATKVCFUtils.GATK_COMMAND_LINE_KEY); + Assert.assertNotNull(versionLine); + Assert.assertTrue(versionLine.toString().contains("SelectVariants")); + } + + @Test(enabled = true) + public void testMultipleGATKVersionsInVCF() throws Exception { + WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -R " + b37KGReference + + " -V " + privateTestDir + "gatkCommandLineInHeader.vcf" + + " -o %s", + 1, Arrays.asList("")); + spec.disableShadowBCF(); + final File vcf = executeTest("testMultipleGATKVersionsInVCF", spec).first.get(0); + final VCFHeader header = (VCFHeader)new VCFCodec().readHeader(new AsciiLineReader(new FileInputStream(vcf))); + + boolean foundHC = false; + boolean foundSV = false; + for ( final VCFHeaderLine line : header.getMetaDataInInputOrder() ) { + if ( line.getKey().equals(GATKVCFUtils.GATK_COMMAND_LINE_KEY) ) { + if ( line.toString().contains("HaplotypeCaller") ) { + Assert.assertFalse(foundHC); + foundHC = true; + } + if ( line.toString().contains("SelectVariants") ) { + Assert.assertFalse(foundSV); + foundSV = true; + } + } + } + + Assert.assertTrue(foundHC, "Didn't find HaplotypeCaller command line header field"); + Assert.assertTrue(foundSV, "Didn't find SelectVariants command line header field"); + } + // -------------------------------------------------------------------------------- // // Test that defaultBaseQualities actually works diff --git a/public/java/test/org/broadinstitute/sting/utils/variant/GATKVCFUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variant/GATKVCFUtilsUnitTest.java new file mode 100644 index 000000000..051d0bcec --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/variant/GATKVCFUtilsUnitTest.java @@ -0,0 +1,86 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.utils.variant; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFHeaderLine; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.Set; + +public class GATKVCFUtilsUnitTest extends BaseTest { + public static class VCFHeaderTestWalker extends RodWalker { + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return null; } + public Integer reduceInit() { return 0; } + public Integer reduce(Integer value, Integer sum) { return value + sum; } + } + + public static class VCFHeaderTest2Walker extends VCFHeaderTestWalker {} + + @Test + public void testAddingVCFHeaderInfo() { + final VCFHeader header = new VCFHeader(); + + final Walker walker1 = new VCFHeaderTestWalker(); + final Walker walker2 = new VCFHeaderTest2Walker(); + + final GenomeAnalysisEngine testEngine1 = new GenomeAnalysisEngine(); + testEngine1.setWalker(walker1); + + final GenomeAnalysisEngine testEngine2 = new GenomeAnalysisEngine(); + testEngine2.setWalker(walker2); + + final VCFHeaderLine line1 = GATKVCFUtils.getCommandLineArgumentHeaderLine(testEngine1, Collections.EMPTY_LIST); + logger.warn(line1); + Assert.assertNotNull(line1); + Assert.assertEquals(line1.getKey(), GATKVCFUtils.GATK_COMMAND_LINE_KEY); + for ( final String field : Arrays.asList("Version", "ID", "Date", "CommandLineOptions")) + Assert.assertTrue(line1.toString().contains(field), "Couldn't find field " + field + " in " + line1.getValue()); + Assert.assertTrue(line1.toString().contains("ID=" + testEngine1.getWalkerName())); + + final VCFHeaderLine line2 = GATKVCFUtils.getCommandLineArgumentHeaderLine(testEngine2, Collections.EMPTY_LIST); + logger.warn(line2); + + header.addMetaDataLine(line1); + final Set lines1 = header.getMetaDataInInputOrder(); + Assert.assertTrue(lines1.contains(line1)); + + header.addMetaDataLine(line2); + final Set lines2 = header.getMetaDataInInputOrder(); + Assert.assertTrue(lines2.contains(line1)); + Assert.assertTrue(lines2.contains(line2)); + } +} \ No newline at end of file