From 4767a83d8a2657875747ba6d596e948a5c5f48c1 Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Wed, 25 Nov 2015 01:52:04 -0500 Subject: [PATCH 01/82] Update pom versions to mark the start of GATK 3.6 development --- pom.xml | 2 +- protected/gatk-package-distribution/pom.xml | 2 +- protected/gatk-queue-extensions-distribution/pom.xml | 2 +- protected/gatk-queue-package-distribution/pom.xml | 2 +- protected/gatk-tools-protected/pom.xml | 2 +- protected/pom.xml | 2 +- public/VectorPairHMM/pom.xml | 2 +- public/external-example/pom.xml | 2 +- public/gatk-engine/pom.xml | 2 +- public/gatk-queue-extensions-generator/pom.xml | 2 +- public/gatk-queue-extensions-public/pom.xml | 2 +- public/gatk-queue/pom.xml | 2 +- public/gatk-root/pom.xml | 2 +- public/gatk-tools-public/pom.xml | 2 +- public/gatk-utils/pom.xml | 2 +- public/gsalib/pom.xml | 2 +- public/package-tests/pom.xml | 2 +- public/pom.xml | 2 +- 18 files changed, 18 insertions(+), 18 deletions(-) diff --git a/pom.xml b/pom.xml index 6490280d1..728c772db 100644 --- a/pom.xml +++ b/pom.xml @@ -13,7 +13,7 @@ org.broadinstitute.gatk gatk-root - 3.5-SNAPSHOT + 3.6-SNAPSHOT public/gatk-root diff --git a/protected/gatk-package-distribution/pom.xml b/protected/gatk-package-distribution/pom.xml index dfc00c219..205671988 100644 --- a/protected/gatk-package-distribution/pom.xml +++ b/protected/gatk-package-distribution/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.gatk gatk-aggregator - 3.5-SNAPSHOT + 3.6-SNAPSHOT ../.. diff --git a/protected/gatk-queue-extensions-distribution/pom.xml b/protected/gatk-queue-extensions-distribution/pom.xml index 6d484c8f7..a296ef8b7 100644 --- a/protected/gatk-queue-extensions-distribution/pom.xml +++ b/protected/gatk-queue-extensions-distribution/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.gatk gatk-aggregator - 3.5-SNAPSHOT + 3.6-SNAPSHOT ../.. diff --git a/protected/gatk-queue-package-distribution/pom.xml b/protected/gatk-queue-package-distribution/pom.xml index 510aa339b..1a60db514 100644 --- a/protected/gatk-queue-package-distribution/pom.xml +++ b/protected/gatk-queue-package-distribution/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.gatk gatk-aggregator - 3.5-SNAPSHOT + 3.6-SNAPSHOT ../.. diff --git a/protected/gatk-tools-protected/pom.xml b/protected/gatk-tools-protected/pom.xml index 6142872c8..f910067f2 100644 --- a/protected/gatk-tools-protected/pom.xml +++ b/protected/gatk-tools-protected/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.gatk gatk-aggregator - 3.5-SNAPSHOT + 3.6-SNAPSHOT ../.. diff --git a/protected/pom.xml b/protected/pom.xml index edfd9e7b3..2bef5b158 100644 --- a/protected/pom.xml +++ b/protected/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.gatk gatk-root - 3.5-SNAPSHOT + 3.6-SNAPSHOT ../public/gatk-root diff --git a/public/VectorPairHMM/pom.xml b/public/VectorPairHMM/pom.xml index ccbc99eef..b0e75eb1d 100644 --- a/public/VectorPairHMM/pom.xml +++ b/public/VectorPairHMM/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.gatk gatk-root - 3.5-SNAPSHOT + 3.6-SNAPSHOT ../../public/gatk-root diff --git a/public/external-example/pom.xml b/public/external-example/pom.xml index 02eaa9800..17919f365 100644 --- a/public/external-example/pom.xml +++ b/public/external-example/pom.xml @@ -9,7 +9,7 @@ External Example - 3.5-SNAPSHOT + 3.6-SNAPSHOT - 1.141 - 1.141 + 2.0.0 + 2.0.0 diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/CatVariantsIntegrationTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/CatVariantsIntegrationTest.java index 1513fc5bf..40a571b8c 100644 --- a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/CatVariantsIntegrationTest.java +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/CatVariantsIntegrationTest.java @@ -92,14 +92,14 @@ public class CatVariantsIntegrationTest { final File catVariantsTempList2 = BaseTest.createTempListFile("CatVariantsTest2", CatVariantsVcf2.getAbsolutePath()); new CatVariantsTestProvider(CatVariantsVcf1, CatVariantsVcf2, BaseTest.createTempFile("CatVariantsTest", ".vcf"), "c055705e0606f4fe89d339d416c182e1"); - new CatVariantsTestProvider(CatVariantsBcf1, CatVariantsBcf2, BaseTest.createTempFile("CatVariantsTest", ".bcf"), "2a82e959b3b07b461d64bd5ed7298aa3"); + new CatVariantsTestProvider(CatVariantsBcf1, CatVariantsBcf2, BaseTest.createTempFile("CatVariantsTest", ".bcf"), "b7085b466ee49ba0857c6ff6ffe32bf3"); for (String extension1 : AbstractFeatureReader.BLOCK_COMPRESSED_EXTENSIONS) { for (String extension2 : AbstractFeatureReader.BLOCK_COMPRESSED_EXTENSIONS) { final File file1 = new File(CatVariantsDir, "CatVariantsTest1.vcf" + extension1); final File file2 = new File(CatVariantsDir, "CatVariantsTest2.vcf" + extension2); new CatVariantsTestProvider(file1, file2, BaseTest.createTempFile("CatVariantsTest.", ".vcf"), "c055705e0606f4fe89d339d416c182e1"); - new CatVariantsTestProvider(file1, file2, BaseTest.createTempFile("CatVariantsTest.", ".bcf"), "2a82e959b3b07b461d64bd5ed7298aa3"); + new CatVariantsTestProvider(file1, file2, BaseTest.createTempFile("CatVariantsTest.", ".bcf"), "820124f0ef22324e070ee21c91a633a4"); new CatVariantsTestProvider(file1, file2, BaseTest.createTempFile("CatVariantsTest.", ".vcf" + extension1), "3beb2c58fb795fcdc485de9868eda576"); } new CatVariantsTestProvider(CatVariantsVcf1, CatVariantsVcf2, BaseTest.createTempFile("CatVariantsTest.", ".vcf" + extension1), "3beb2c58fb795fcdc485de9868eda576"); diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/coverage/DepthOfCoverageIntegrationTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/coverage/DepthOfCoverageIntegrationTest.java index 9b201b411..d064328c0 100644 --- a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/coverage/DepthOfCoverageIntegrationTest.java +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/coverage/DepthOfCoverageIntegrationTest.java @@ -86,23 +86,23 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest { // now add the expected files that get generated spec.addAuxFile("0f9603eb1ca4a26828e82d8c8f4991f6", baseOutputFile); spec.addAuxFile("51e6c09a307654f43811af35238fb179", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts")); - spec.addAuxFile("520720a88ae7608257af51bc41c06b87", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions")); + spec.addAuxFile("3bf1d7247ea68d1afb35c2032c68dbdf", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions")); spec.addAuxFile("9cd395f47b329b9dd00ad024fcac9929", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_statistics")); - spec.addAuxFile("6958004a8156f3f267caa6b04cf90f5f", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary")); - spec.addAuxFile("ebbfc9b9f4e12ac989c127061948c565", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics")); - spec.addAuxFile("e003bef6762833a5cebca25d94194616", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary")); + spec.addAuxFile("681dcbedcf7ca14bb44134abd1d8da3f", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary")); + spec.addAuxFile("0f58e7f0909b84897fea5daebd9d2948", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics")); + spec.addAuxFile("2832e48c12b1d8811ccd319a8ffb8dc1", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary")); spec.addAuxFile("a836b92ac17b8ff9788e2aaa9116b5d4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts")); - spec.addAuxFile("0732b6d2db9c94b0fcf18ca1f19772a8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions")); + spec.addAuxFile("22fee04ffca7fa489ae2e66309de12e4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions")); spec.addAuxFile("7b9d0e93bf5b5313995be7010ef1f528", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics")); - spec.addAuxFile("3522f7380554b926c71a7258250c1d63", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary")); - spec.addAuxFile("2cd9d8c5e37584edd62ca6938659cf59", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics")); - spec.addAuxFile("78fdd35a63a7a4c6b3a043b946b04730", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary")); + spec.addAuxFile("2de22ad840bf40621e51316728a32fe9", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary")); + spec.addAuxFile("6c84a067016c63e8c2face2de800acc7", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics")); + spec.addAuxFile("2629883b99428f51e2d90d820b45504a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary")); spec.addAuxFile("6909d50a7da337cd294828b32b945eb8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts")); - spec.addAuxFile("aa00e3652dd518ccbae2caa00171835b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions")); + spec.addAuxFile("f6e52c5659f53afdcfc69c25272b9e99", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions")); spec.addAuxFile("df0ba76e0e6082c0d29fcfd68efc6b77", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics")); - spec.addAuxFile("0ce5ebfa46b081820d013bdbbfe42d34", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary")); - spec.addAuxFile("c7c5bad6c6818995c634f350aa66fde9", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics")); - spec.addAuxFile("949c9ce745753cd98f337600d3931d09", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary")); + spec.addAuxFile("c2fb3a31d02df03c35afbe7f2284ad66", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary")); + spec.addAuxFile("d1b3d08c6e9565a1dab727d089085761", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics")); + spec.addAuxFile("c0fadfcfd88cec81ba0d57b33bdec277", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary")); execute("testBaseOutputNoFiltering",spec); } @@ -119,7 +119,7 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest { spec.setOutputFileLocation(baseOutputFile); spec.addAuxFile("6ccd7d8970ba98cb95fe41636a070c1c",baseOutputFile); - spec.addAuxFile("4429d33ce8836c09ba2b5ddfae2f998e",createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary")); + spec.addAuxFile("fcaac186b7ccd10f3a6cb89ea49dcc1e",createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary")); execute("testNoCoverageDueToFiltering",spec); } diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/qc/PileupWalkerIntegrationTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/qc/PileupWalkerIntegrationTest.java index 0d9c7864a..a52059bb5 100644 --- a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/qc/PileupWalkerIntegrationTest.java +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/qc/PileupWalkerIntegrationTest.java @@ -40,7 +40,7 @@ public class PileupWalkerIntegrationTest extends WalkerTest { String gatk_args = "-T Pileup -I " + validationDataLocation + "FHS_Pileup_Test.bam " + "-R " + hg18Reference + " -L chr15:46,347,148 -o %s"; - String expected_md5 = "526c93b0fa660d6b953b57103e59fe98"; + String expected_md5 = "22211a5da34a45b3b6045dbcbce8fede"; WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(expected_md5)); executeTest("Testing the standard (no-indel) pileup on three merged FHS pools with 27 deletions in 969 bases", spec); } @@ -115,7 +115,7 @@ public class PileupWalkerIntegrationTest extends WalkerTest { + "-R " + hg18Reference + " -outputInsertLength" + " -L chr15:46,347,148 -o %s"; - String expected_md5 = "53ced173768f3d4d90b8a8206e72eae5"; + String expected_md5 = "cae23b79598b23feec665704f5578897"; WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(expected_md5)); executeTest("Testing the standard (no-indel) pileup on three merged FHS pools with 27 deletions in 969 bases (with insert length)", spec); } diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/readutils/ClipReadsWalkersIntegrationTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/readutils/ClipReadsWalkersIntegrationTest.java index 44c0bc2fe..17cc7c367 100644 --- a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/readutils/ClipReadsWalkersIntegrationTest.java +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/readutils/ClipReadsWalkersIntegrationTest.java @@ -56,9 +56,9 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest { @Test public void testClipRange2() { testClipper("clipRange2", "-CT 1-5,11-15", "be4fcad5b666a5540028b774169cbad7", "ec4cf54ed50a6baf69dbf98782c19aeb"); } @Test public void testClipSeq() { testClipper("clipSeqX", "-X CCCCC", "db199bd06561c9f2122f6ffb07941fbc", "a9cf540e4ed2514061248a878e09a09c"); } - @Test public void testClipSeqFile() { testClipper("clipSeqXF", "-XF " + privateTestDir + "seqsToClip.fasta", "d011a3152b31822475afbe0281491f8d", "906871df304dd966682e5798d59fc86b"); } + @Test public void testClipSeqFile() { testClipper("clipSeqXF", "-XF " + privateTestDir + "seqsToClip.fasta", "22cd33dbd06cc425ce3626360367bfca", "906871df304dd966682e5798d59fc86b"); } - @Test public void testClipMulti() { testClipper("clipSeqMulti", "-QT 10 -CT 1-5 -XF " + privateTestDir + "seqsToClip.fasta -X CCCCC", "a23187bd9bfb06557f799706d98441de", "b41995fea04034ca0427c4a71504ef83"); } + @Test public void testClipMulti() { testClipper("clipSeqMulti", "-QT 10 -CT 1-5 -XF " + privateTestDir + "seqsToClip.fasta -X CCCCC", "e4a88276a6f76ccc2b05a06c76305a1c", "b41995fea04034ca0427c4a71504ef83"); } @Test public void testClipNs() { testClipper("testClipNs", "-QT 10 -CR WRITE_NS", Q10ClipOutput, "27847d330b962e60650df23b6efc8c3c"); } @Test public void testClipQ0s() { testClipper("testClipQs", "-QT 10 -CR WRITE_Q0S", Q10ClipOutput, "195b8bdfc0186fdca742764aa9b06363"); } diff --git a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtilsUnitTest.java b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtilsUnitTest.java index 85d8c56ff..33684cda1 100644 --- a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtilsUnitTest.java +++ b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtilsUnitTest.java @@ -179,7 +179,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { new MergeAllelesTest(Arrays.asList(ATref, ATC, Anoref, G), Arrays.asList(Aref, ATCATC, G), - Arrays.asList(ATref, ATC, Anoref, G, ATCATCT, GT)); + Arrays.asList(ATref, ATC, Anoref, G, GT, ATCATCT)); return MergeAllelesTest.getTests(MergeAllelesTest.class); } From e08940a5a892dd2a2dd53234e6c8cd704eb7fb1f Mon Sep 17 00:00:00 2001 From: Samuel Lee Date: Fri, 18 Mar 2016 10:12:08 -0400 Subject: [PATCH 54/82] Added maxNumPLValues argument to allow users to set maximum number of PL values in output. --- ...GenotypeCalculationArgumentCollection.java | 12 +- .../walkers/genotyper/GenotypingEngine.java | 31 +++-- .../genotyper/UnifiedGenotypingEngine.java | 14 +- .../genotyper/afcalc/AFCalculator.java | 69 +++++++++- .../genotyper/afcalc/ExactAFCalculator.java | 21 ++- .../afcalc/FixedAFCalculatorProvider.java | 11 +- .../GeneralPloidyExactAFCalculator.java | 24 ++-- .../IndependentAllelesExactAFCalculator.java | 121 +++++++++--------- .../haplotypecaller/HaplotypeCaller.java | 37 +++--- .../HaplotypeCallerGVCFIntegrationTest.java | 61 +++++++++ 10 files changed, 281 insertions(+), 120 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/GenotypeCalculationArgumentCollection.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/GenotypeCalculationArgumentCollection.java index 238ad814d..3c9da90d2 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/GenotypeCalculationArgumentCollection.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/GenotypeCalculationArgumentCollection.java @@ -51,6 +51,7 @@ package org.broadinstitute.gatk.engine.arguments; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculator; import org.broadinstitute.gatk.utils.commandline.Advanced; import org.broadinstitute.gatk.utils.commandline.Argument; import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants; @@ -113,7 +114,7 @@ public class GenotypeCalculationArgumentCollection implements Cloneable{ public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0; /** - * If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES), + * If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN_ALLELES), * then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it * scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend * that you not play around with this parameter. @@ -124,6 +125,15 @@ public class GenotypeCalculationArgumentCollection implements Cloneable{ @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false) public int MAX_ALTERNATE_ALLELES = 6; + /** + * Determines the maximum number of PL values that will be logged in the output. If the number of genotypes + * (which is determined by the ploidy and the number of alleles) exceeds the value provided by this argument, + * then output of all of the PL values will be suppressed. + */ + @Advanced + @Argument(fullName = "max_num_PL_values", shortName = "maxNumPLValues", doc = "Maximum number of PL values to output", required = false) + public int MAX_NUM_PL_VALUES = AFCalculator.MAX_NUM_PL_VALUES_DEFAULT; + /** * By default, the prior specified with the argument --heterozygosity/-hets is used for variant discovery at a particular locus, using an infinite sites model, * see e.g. Waterson (1975) or Tajima (1996). diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java index 91c27cdd0..6848e951e 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java @@ -56,22 +56,22 @@ import com.google.java.contract.Requires; import htsjdk.variant.variantcontext.*; import htsjdk.variant.vcf.VCFInfoHeaderLine; import org.apache.log4j.Logger; -import org.broadinstitute.gatk.utils.contexts.AlignmentContext; -import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils; -import org.broadinstitute.gatk.utils.contexts.ReferenceContext; -import org.broadinstitute.gatk.utils.genotyper.SampleList; -import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; -import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculator; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculator; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider; import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.MathUtils; import org.broadinstitute.gatk.utils.QualityUtils; +import org.broadinstitute.gatk.utils.contexts.AlignmentContext; +import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils; +import org.broadinstitute.gatk.utils.contexts.ReferenceContext; import org.broadinstitute.gatk.utils.exceptions.UserException; +import org.broadinstitute.gatk.utils.genotyper.SampleList; import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; +import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.utils.variant.GATKVCFConstants; import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines; import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; @@ -85,7 +85,7 @@ import java.util.*; */ public abstract class GenotypingEngine { - protected final AFCalculatorProvider afCalculatorProvider ; + protected final AFCalculatorProvider afCalculatorProvider; protected Logger logger; @@ -104,6 +104,9 @@ public abstract class GenotypingEngine getFilteredAndStratifiedContexts(final ReferenceContext refContext, diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculator.java index e82c4666e..fa91a2f2b 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculator.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculator.java @@ -53,11 +53,12 @@ package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import org.apache.log4j.Logger; -import org.broadinstitute.gatk.utils.SimpleTimer; import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.GenotypeBuilder; import htsjdk.variant.variantcontext.GenotypesContext; import htsjdk.variant.variantcontext.VariantContext; +import org.apache.log4j.Logger; +import org.broadinstitute.gatk.utils.SimpleTimer; import java.io.File; import java.util.List; @@ -67,10 +68,13 @@ import java.util.List; * Generic interface for calculating the probability of alleles segregating given priors and genotype likelihoods */ public abstract class AFCalculator implements Cloneable { - private final static Logger defaultLogger = Logger.getLogger(AFCalculator.class); - + private static final Logger defaultLogger = Logger.getLogger(AFCalculator.class); + public static final int MAX_NUM_PL_VALUES_DEFAULT = 100; protected Logger logger = defaultLogger; + protected int maxNumPLValues = MAX_NUM_PL_VALUES_DEFAULT; // if PL vectors longer than this # of elements, don't log them + protected static int maxNumPLValuesObserved = 0; + protected static long numTimesMaxNumPLValuesExceeded = 0; private SimpleTimer callTimer = new SimpleTimer(); private StateTracker stateTracker; @@ -102,10 +106,19 @@ public abstract class AFCalculator implements Cloneable { * * @param logger */ - public void setLogger(Logger logger) { + public void setLogger(final Logger logger) { this.logger = logger; } + /** + * Set the maximum number of PL values to log. If the number of PL values exceeds this, no PL values will be logged. + * @param maxNumPLValues maximum number of PL values to log + */ + public AFCalculator setMaxNumPLValues(final int maxNumPLValues) { + this.maxNumPLValues = maxNumPLValues; + return this; + } + /** * Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc * @@ -242,4 +255,50 @@ public abstract class AFCalculator implements Cloneable { return getStateTracker(false,allele + 1).getAlleleCountsOfMAP()[allele]; } + /** + * Strips PLs from the specified GenotypeBuilder if their number exceeds the maximum allowed. Corresponding counters are updated. + * @param gb the GenotypeBuilder to modify + * @param vc the VariantContext + * @param sampleName the sample name + * @param newLikelihoods the PL array + */ + protected void removePLsIfMaxNumPLValuesExceeded(final GenotypeBuilder gb, final VariantContext vc, final String sampleName, final double[] newLikelihoods) { + final int numPLValuesFound = newLikelihoods.length; + if (numPLValuesFound > maxNumPLValues) { + logMaxNumPLValuesWarning(vc, sampleName, numPLValuesFound); + numTimesMaxNumPLValuesExceeded++; + gb.noPL(); + if (numPLValuesFound > maxNumPLValuesObserved) { + maxNumPLValuesObserved = numPLValuesFound; + } + } + } + + private void logMaxNumPLValuesWarning(final VariantContext vc, final String sampleName, final int numPLValuesFound) { + final String message = String.format("Maximum allowed number of PLs (%d) exceeded for sample %s at %s:%d-%d with %d possible genotypes. " + + "No PLs will be output for these genotypes (which may cause incorrect results in subsequent analyses) " + + "unless the --max_num_PL_values argument is increased accordingly", + maxNumPLValues, sampleName, vc.getContig(), vc.getStart(), vc.getEnd(), numPLValuesFound); + + if ( numTimesMaxNumPLValuesExceeded == 0 ) { + logger.warn(message + ". Unless the DEBUG logging level is used, this warning message is output just once per run and further warnings are suppressed."); + } else { + logger.debug(message); + } + } + + /** + * Logs the number of times the maximum allowed number of PLs was exceeded and the largest number of PLs observed. The corresponding counters are reset. + */ + public void printFinalMaxNumPLValuesWarning() { + if ( numTimesMaxNumPLValuesExceeded > 0 ) { + final String message = String.format("Maximum allowed number of PLs (%d) was exceeded %d time(s); the largest number of PLs found was %d. " + + "No PLs will be output for these genotypes (which may cause incorrect results in subsequent analyses) " + + "unless the --max_num_PL_values argument is increased accordingly", + maxNumPLValues, numTimesMaxNumPLValuesExceeded, maxNumPLValuesObserved); + logger.warn(message); + } + maxNumPLValuesObserved = 0; + numTimesMaxNumPLValuesExceeded = 0; + } } \ No newline at end of file diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalculator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalculator.java index 7089cbb77..4af353361 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalculator.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalculator.java @@ -51,10 +51,10 @@ package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; +import htsjdk.variant.variantcontext.*; import org.broadinstitute.gatk.utils.MathUtils; import org.broadinstitute.gatk.utils.variant.GATKVCFConstants; import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; -import htsjdk.variant.variantcontext.*; import java.util.*; @@ -65,8 +65,8 @@ abstract class ExactAFCalculator extends AFCalculator { protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first - // useful so that we don't keep printing out the same warning message - protected static boolean printedWarning = false; + // useful so that we don't keep printing out the same warning messages + protected static boolean printedMaxAltAllelesWarning = false; /** * Sorts {@link ExactAFCalculator.LikelihoodSum} instances where those with higher likelihood are first. @@ -157,15 +157,14 @@ abstract class ExactAFCalculator extends AFCalculator { if (altAlleleReduction == 0) return vc; - String message = "this tool is currently set to genotype at most " + maximumAlternativeAlleles - + " alternate alleles in a given context, but the context at " + vc.getContig() + ":" + vc.getStart() - + " has " + (vc.getAlternateAlleles().size()) - + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument"; + final String message = String.format("This tool is currently set to genotype at most %d " + + "alternate alleles in a given context, but the context at %s: %d has %d " + + "alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument", + maximumAlternativeAlleles, vc.getContig(), vc.getStart(), vc.getAlternateAlleles().size()); - if ( !printedWarning ) { - printedWarning = true; - message += ". This warning message is output just once per run and further warnings will be suppressed unless the DEBUG logging level is used."; - logger.warn(message); + if ( !printedMaxAltAllelesWarning ) { + printedMaxAltAllelesWarning = true; + logger.warn(message + ". Unless the DEBUG logging level is used, this warning message is output just once per run and further warnings are suppressed."); } else { logger.debug(message); } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/FixedAFCalculatorProvider.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/FixedAFCalculatorProvider.java index 08f378f48..1c58191df 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/FixedAFCalculatorProvider.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/FixedAFCalculatorProvider.java @@ -71,6 +71,8 @@ public class FixedAFCalculatorProvider extends AFCalculatorProvider { private final int maximumAltAlleleCount; + private final int maximumNumPLValues; + private final int ploidy; /** @@ -125,17 +127,18 @@ public class FixedAFCalculatorProvider extends AFCalculatorProvider { final Logger logger, final boolean verifyRequests) { if (configuration == null) - throw new IllegalArgumentException("null configuration"); - if (configuration == null) - throw new IllegalArgumentException("null configuration genotype arguments"); + throw new IllegalArgumentException("null genotype-arguments configuration"); if (configuration.samplePloidy < 1) throw new IllegalArgumentException("invalid sample ploidy " + configuration.samplePloidy); if (configuration.MAX_ALTERNATE_ALLELES < 0) throw new IllegalArgumentException("invalid maximum number of alleles " + (configuration.MAX_ALTERNATE_ALLELES + 1)); + if (configuration.MAX_NUM_PL_VALUES < 0) + throw new IllegalArgumentException("invalid maximum number of PL values " + configuration.MAX_NUM_PL_VALUES); ploidy = configuration.samplePloidy; maximumAltAlleleCount = configuration.MAX_ALTERNATE_ALLELES; - singleton = AFCalculatorImplementation.bestValue(ploidy,maximumAltAlleleCount,preferred).newInstance(); + maximumNumPLValues = configuration.MAX_NUM_PL_VALUES; + singleton = AFCalculatorImplementation.bestValue(ploidy,maximumAltAlleleCount,preferred).newInstance().setMaxNumPLValues(maximumNumPLValues); singleton.setLogger(logger); this.verifyRequests = verifyRequests; } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalculator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalculator.java index 886e56958..88537bc44 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalculator.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalculator.java @@ -58,15 +58,16 @@ import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalcula import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculators; import org.broadinstitute.gatk.utils.MathUtils; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; -import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import org.broadinstitute.gatk.utils.variant.GATKVCFConstants; +import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; -import java.util.*; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; public class GeneralPloidyExactAFCalculator extends ExactAFCalculator { - static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 100; // if PL vectors longer than this # of elements, don't log them - protected GeneralPloidyExactAFCalculator() { } @@ -491,6 +492,7 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator { final boolean assignGenotypes, final double[] newLikelihoods) { final GenotypeBuilder gb = new GenotypeBuilder(g); + final String sampleName = g.getSampleName(); // add likelihoods gb.PL(newLikelihoods); @@ -500,7 +502,7 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator { if (newSACs != null) gb.attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, newSACs); if (assignGenotypes) - assignGenotype(gb, newLikelihoods, allelesToUse, ploidy); + assignGenotype(gb, vc, sampleName, newLikelihoods, allelesToUse, ploidy); else gb.alleles(GATKVariantContextUtils.noCallAlleles(ploidy)); @@ -528,13 +530,18 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator { } /** - * Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs + * Assign genotypes (GTs) to the samples in the VariantContext greedily based on the PLs * + * @param gb the GenotypeBuilder to modify + * @param vc the VariantContext + * @param sampleName the sample name * @param newLikelihoods the PL array * @param allelesToUse the list of alleles to choose from (corresponding to the PLs) * @param numChromosomes Number of chromosomes per pool */ private void assignGenotype(final GenotypeBuilder gb, + final VariantContext vc, + final String sampleName, final double[] newLikelihoods, final List allelesToUse, final int numChromosomes) { @@ -547,13 +554,10 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator { gb.alleles(alleleCounts.asAlleleList(allelesToUse)); - // remove PLs if necessary - if (newLikelihoods.length > MAX_LENGTH_FOR_POOL_PL_LOGGING) - gb.noPL(); + removePLsIfMaxNumPLValuesExceeded(gb, vc, sampleName, newLikelihoods); // TODO - deprecated so what is the appropriate method to call? if ( numNewAltAlleles > 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods)); } - } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesExactAFCalculator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesExactAFCalculator.java index 46118d108..5eae11025 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesExactAFCalculator.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesExactAFCalculator.java @@ -75,8 +75,6 @@ import java.util.*; */ public class IndependentAllelesExactAFCalculator extends ExactAFCalculator { - private static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 100; // if PL vectors longer than this # of elements, don't log them - /** * Array that caches the allele list that corresponds to the ith ploidy. * @@ -460,78 +458,83 @@ public class IndependentAllelesExactAFCalculator extends ExactAFCalculator { @Override @Requires("vc != null && allelesToUse != null") public GenotypesContext subsetAlleles(VariantContext vc, int defaultPloidy, List allelesToUse, boolean assignGenotypes) { - // the genotypes with PLs - final GenotypesContext oldGTs = vc.getGenotypes(); + // the genotypes with PLs + final GenotypesContext oldGTs = vc.getGenotypes(); - // samples - final List sampleIndices = oldGTs.getSampleNamesOrderedByName(); + // samples + final List sampleIndices = oldGTs.getSampleNamesOrderedByName(); - // the new genotypes to create - final GenotypesContext newGTs = GenotypesContext.create(); + // the new genotypes to create + final GenotypesContext newGTs = GenotypesContext.create(); - // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward - final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); - final int numNewAltAlleles = allelesToUse.size() - 1; + // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward + final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); + final int numNewAltAlleles = allelesToUse.size() - 1; - // create the new genotypes - for ( int k = 0; k < oldGTs.size(); k++ ) { - final Genotype g = oldGTs.get(sampleIndices.get(k)); - final int declaredPloidy = g.getPloidy(); - final int ploidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy; - if ( !g.hasLikelihoods() ) { - newGTs.add(GenotypeBuilder.create(g.getSampleName(),GATKVariantContextUtils.noCallAlleles(ploidy))); - continue; - } - - // create the new likelihoods array from the alleles we are allowed to use - final double[] originalLikelihoods = g.getLikelihoods().getAsVector(); - double[] newLikelihoods; - - // Optimization: if # of new alt alleles = 0 (pure ref call), keep original likelihoods so we skip normalization - // and subsetting - if ( numOriginalAltAlleles == numNewAltAlleles || numNewAltAlleles == 0) { - newLikelihoods = originalLikelihoods; - } else { - newLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse); - - // might need to re-normalize - newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true); - } - - // if there is no mass on the (new) likelihoods, then just no-call the sample - if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) { - newGTs.add(GenotypeBuilder.create(g.getSampleName(), GATKVariantContextUtils.noCallAlleles(ploidy))); - } - else { - final GenotypeBuilder gb = new GenotypeBuilder(g); - - if ( numNewAltAlleles == 0 ) - gb.noPL(); - else - gb.PL(newLikelihoods); - - // if we weren't asked to assign a genotype, then just no-call the sample - if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) - gb.alleles(GATKVariantContextUtils.noCallAlleles(ploidy)); - else - assignGenotype(gb, newLikelihoods, allelesToUse, ploidy); - newGTs.add(gb.make()); - } + // create the new genotypes + for ( int k = 0; k < oldGTs.size(); k++ ) { + final Genotype g = oldGTs.get(sampleIndices.get(k)); + final int declaredPloidy = g.getPloidy(); + final int ploidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy; + if ( !g.hasLikelihoods() ) { + newGTs.add(GenotypeBuilder.create(g.getSampleName(),GATKVariantContextUtils.noCallAlleles(ploidy))); + continue; } - return GATKVariantContextUtils.fixADFromSubsettedAlleles(newGTs, vc, allelesToUse); + // create the new likelihoods array from the alleles we are allowed to use + final double[] originalLikelihoods = g.getLikelihoods().getAsVector(); + double[] newLikelihoods; + + // Optimization: if # of new alt alleles = 0 (pure ref call), keep original likelihoods so we skip normalization + // and subsetting + if ( numOriginalAltAlleles == numNewAltAlleles || numNewAltAlleles == 0) { + newLikelihoods = originalLikelihoods; + } else { + newLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse); + + // might need to re-normalize + newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true); + } + + // if there is no mass on the (new) likelihoods, then just no-call the sample + if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) { + newGTs.add(GenotypeBuilder.create(g.getSampleName(), GATKVariantContextUtils.noCallAlleles(ploidy))); + } else { + final GenotypeBuilder gb = new GenotypeBuilder(g); + final String sampleName = g.getSampleName(); + + if ( numNewAltAlleles == 0 ) + gb.noPL(); + else + gb.PL(newLikelihoods); + + // if we weren't asked to assign a genotype, then just no-call the sample + if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) + gb.alleles(GATKVariantContextUtils.noCallAlleles(ploidy)); + else + assignGenotype(gb, vc, sampleName, newLikelihoods, allelesToUse, ploidy); + newGTs.add(gb.make()); + } + } + + return GATKVariantContextUtils.fixADFromSubsettedAlleles(newGTs, vc, allelesToUse); } /** - * Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs + * Assign genotypes (GTs) to the samples in the VariantContext greedily based on the PLs * + * @param gb the GenotypeBuilder to modify + * @param vc the VariantContext + * @param sampleName the sample name * @param newLikelihoods the PL array * @param allelesToUse the list of alleles to choose from (corresponding to the PLs) * @param numChromosomes Number of chromosomes per pool */ private void assignGenotype(final GenotypeBuilder gb, + final VariantContext vc, + final String sampleName, final double[] newLikelihoods, final List allelesToUse, final int numChromosomes) { @@ -544,9 +547,7 @@ public class IndependentAllelesExactAFCalculator extends ExactAFCalculator { gb.alleles(alleleCounts.asAlleleList(allelesToUse)); - // remove PLs if necessary - if (newLikelihoods.length > MAX_LENGTH_FOR_POOL_PL_LOGGING) - gb.noPL(); + removePLsIfMaxNumPLValuesExceeded(gb, vc, sampleName, newLikelihoods); if ( numNewAltAlleles > 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods)); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java index f85a254a8..c7cc4cecc 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java @@ -55,30 +55,24 @@ import com.google.java.contract.Ensures; import htsjdk.samtools.SAMFileWriter; import htsjdk.variant.variantcontext.*; import htsjdk.variant.variantcontext.writer.VariantContextWriter; -import htsjdk.variant.vcf.*; +import htsjdk.variant.vcf.VCFConstants; +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLine; +import htsjdk.variant.vcf.VCFStandardHeaderLines; import org.broadinstitute.gatk.engine.CommandLineGATK; import org.broadinstitute.gatk.engine.GATKVCFUtils; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection; +import org.broadinstitute.gatk.engine.filters.BadMateFilter; import org.broadinstitute.gatk.engine.io.DirectOutputTracker; import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub; -import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardHCAnnotation; -import org.broadinstitute.gatk.utils.contexts.AlignmentContext; -import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils; -import org.broadinstitute.gatk.utils.contexts.ReferenceContext; -import org.broadinstitute.gatk.utils.downsampling.AlleleBiasedDownsamplingUtils; -import org.broadinstitute.gatk.utils.downsampling.DownsampleType; -import org.broadinstitute.gatk.utils.downsampling.DownsamplingUtils; -import org.broadinstitute.gatk.engine.filters.BadMateFilter; -import org.broadinstitute.gatk.utils.genotyper.*; -import org.broadinstitute.gatk.engine.iterators.ReadTransformer; import org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub; -import org.broadinstitute.gatk.utils.haplotypeBAMWriter.DroppedReadsTracker; -import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; +import org.broadinstitute.gatk.engine.iterators.ReadTransformer; import org.broadinstitute.gatk.engine.walkers.*; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; +import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardHCAnnotation; import org.broadinstitute.gatk.tools.walkers.genotyper.*; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider; import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; @@ -91,21 +85,33 @@ import org.broadinstitute.gatk.utils.activeregion.ActiveRegionReadState; import org.broadinstitute.gatk.utils.activeregion.ActivityProfileState; import org.broadinstitute.gatk.utils.clipping.ReadClipper; import org.broadinstitute.gatk.utils.commandline.*; +import org.broadinstitute.gatk.utils.contexts.AlignmentContext; +import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils; +import org.broadinstitute.gatk.utils.contexts.ReferenceContext; +import org.broadinstitute.gatk.utils.downsampling.AlleleBiasedDownsamplingUtils; +import org.broadinstitute.gatk.utils.downsampling.DownsampleType; +import org.broadinstitute.gatk.utils.downsampling.DownsamplingUtils; import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.gatk.utils.fragments.FragmentCollection; import org.broadinstitute.gatk.utils.fragments.FragmentUtils; +import org.broadinstitute.gatk.utils.genotyper.*; import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils; import org.broadinstitute.gatk.utils.gvcf.GVCFWriter; import org.broadinstitute.gatk.utils.haplotype.Haplotype; +import org.broadinstitute.gatk.utils.haplotypeBAMWriter.DroppedReadsTracker; import org.broadinstitute.gatk.utils.haplotypeBAMWriter.HaplotypeBAMWriter; import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; import org.broadinstitute.gatk.utils.help.HelpConstants; import org.broadinstitute.gatk.utils.pairhmm.PairHMM; +import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.utils.sam.AlignmentUtils; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import org.broadinstitute.gatk.utils.sam.ReadUtils; -import org.broadinstitute.gatk.utils.variant.*; +import org.broadinstitute.gatk.utils.variant.GATKVCFConstants; +import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines; +import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants; import java.io.File; import java.io.FileNotFoundException; @@ -1154,6 +1160,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @Override public void onTraversalDone(Integer result) { + genotypingEngine.printFinalMaxNumPLValuesWarning(); if ( HCAC.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) ((GVCFWriter)vcfWriter).close(false); // GROSS -- engine forces us to close our own VCF writer since we wrapped it referenceConfidenceModel.close(); //TODO remove the need to call close here for debugging, the likelihood output stream should be managed diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index d828a1c20..02699eb6a 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -51,13 +51,18 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller; +import org.apache.commons.io.FileUtils; +import org.apache.log4j.Level; import org.broadinstitute.gatk.engine.GATKVCFUtils; import org.broadinstitute.gatk.engine.walkers.WalkerTest; import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType; +import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.File; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -352,4 +357,60 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { executeTest(" testHaplotypeCallerMultiAllelicNonRef", spec); } + @Test + public void testHaplotypeCallerMaxNumPLValues() { + final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 4 -maxNumPLValues 70", + b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("7ea93210277fa4b590790a81c4b3994b")); + spec.disableShadowBCF(); + executeTest("testHaplotypeCallerMaxNumPLValues", spec); + } + + @Test + public void testHaplotypeCallerMaxNumPLValuesExceededWithWarnLogLevel() throws IOException { + // Need to see log WARN messages + final Level level = logger.getLevel(); + logger.setLevel(Level.WARN); + + final File logFile = createTempFile("testMaxNumPLValuesExceededWithWarnLogLevel.log", ".tmp"); + final String logFileName = logFile.getAbsolutePath(); + + final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 4 -maxNumPLValues 30 -log %s", + b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals", + GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER, logFileName); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9d69cb9dc67e0d0ee9863767428e6841")); + spec.disableShadowBCF(); + executeTest("testHaplotypeCallerMaxNumPLValuesExceededWithWarnLogLevel", spec); + // Make sure the "Maximum allowed number of PLs exceeded" messages are in the log + Assert.assertTrue(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) exceeded for sample NA12878 at 20:10097101-10097101 with 35 possible genotypes. " + + "No PLs will be output for these genotypes (which may cause incorrect results in subsequent analyses) unless the --max_num_PL_values argument is increased accordingly. " + + "Unless the DEBUG logging level is used, this warning message is output just once per run and further warnings are suppressed.")); + Assert.assertFalse(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) exceeded for sample NA12878 at 20:10316239-10316241 with 70 possible genotypes.")); + Assert.assertTrue(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) was exceeded 2 time(s); the largest number of PLs found was 70.")); + // Set the log level back + logger.setLevel(level); + } + + @Test + public void testHaplotypeCallerMaxNumPLValuesExceededWithDebugLogLevel() throws IOException { + // Need to see log DEBUG messages + final Level level = logger.getLevel(); + logger.setLevel(Level.DEBUG); + + final File logFile = createTempFile("testMaxNumPLValuesExceededWithDebugLogLevel.log", ".tmp"); + final String logFileName = logFile.getAbsolutePath(); + + final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 4 -maxNumPLValues 30 -log %s", + b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals", + GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER, logFileName); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9d69cb9dc67e0d0ee9863767428e6841")); + spec.disableShadowBCF(); + executeTest("testHaplotypeCallerMaxNumPLValuesExceededWithDebugLogLevel", spec); + // Make sure the "Maximum allowed number of PLs exceeded" messages are in the log + Assert.assertTrue(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) exceeded for sample NA12878 at 20:10097101-10097101 with 35 possible genotypes.")); + Assert.assertTrue(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) exceeded for sample NA12878 at 20:10316239-10316241 with 70 possible genotypes.")); + Assert.assertTrue(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) was exceeded 2 time(s); the largest number of PLs found was 70.")); + // Set the log level back + logger.setLevel(level); + } } From 14fe8b1e0e7938d7495647353cc36bf3664591f0 Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Tue, 26 Apr 2016 17:35:55 -0400 Subject: [PATCH 55/82] Moved BQSRGatherer and dependencies to the public module --- .../engine/recalibration/BQSRGatherer.java | 138 ------------ .../recalibration/BQSRReadTransformer.java | 104 --------- .../recalibration/QuantizationInfo.java | 151 ------------- .../engine/recalibration/ReadCovariates.java | 176 --------------- .../recalibration/RecalibrationTables.java | 169 --------------- .../recalibration/covariates/Covariate.java | 144 ------------- .../covariates/ExperimentalCovariate.java | 81 ------- .../covariates/QualityScoreCovariate.java | 129 ----------- .../covariates/ReadGroupCovariate.java | 190 ---------------- .../covariates/RepeatLengthCovariate.java | 74 ------- .../RepeatUnitAndLengthCovariate.java | 75 ------- .../covariates/RepeatUnitCovariate.java | 78 ------- .../covariates/RequiredCovariate.java | 81 ------- .../covariates/StandardCovariate.java | 81 ------- .../BaseRecalibrationUnitTest.java | 116 ---------- .../ContextCovariateUnitTest.java | 121 ----------- .../recalibration/CycleCovariateUnitTest.java | 140 ------------ .../recalibration/QualQuantizerUnitTest.java | 195 ----------------- .../recalibration/ReadCovariatesUnitTest.java | 148 ------------- .../ReadGroupCovariateUnitTest.java | 125 ----------- .../recalibration/RecalUtilsUnitTest.java | 178 --------------- .../RecalibrationReportUnitTest.java | 176 --------------- .../RecalibrationTablesUnitTest.java | 203 ------------------ .../recalibration/RecalibrationTestUtils.java | 74 ------- .../engine/recalibration/BQSRGatherer.java | 112 ++++++++++ .../recalibration/BQSRReadTransformer.java | 78 +++++++ .../recalibration/BaseRecalibration.java | 62 ++---- .../engine/recalibration/QualQuantizer.java | 62 ++---- .../recalibration/QuantizationInfo.java | 125 +++++++++++ .../engine/recalibration/ReadCovariates.java | 150 +++++++++++++ .../gatk/engine/recalibration/RecalDatum.java | 62 ++---- .../engine/recalibration/RecalDatumNode.java | 62 ++---- .../gatk/engine/recalibration/RecalUtils.java | 62 ++---- .../RecalibrationArgumentCollection.java | 62 ++---- .../recalibration/RecalibrationReport.java | 62 ++---- .../recalibration/RecalibrationTables.java | 143 ++++++++++++ .../covariates/ContextCovariate.java | 62 ++---- .../recalibration/covariates/Covariate.java | 118 ++++++++++ .../covariates/CycleCovariate.java | 62 ++---- .../covariates/ExperimentalCovariate.java | 55 +++++ .../covariates/QualityScoreCovariate.java | 103 +++++++++ .../covariates/ReadGroupCovariate.java | 164 ++++++++++++++ .../covariates/RepeatCovariate.java | 62 ++---- .../covariates/RepeatLengthCovariate.java | 48 +++++ .../RepeatUnitAndLengthCovariate.java | 49 +++++ .../covariates/RepeatUnitCovariate.java | 52 +++++ .../covariates/RequiredCovariate.java | 55 +++++ .../covariates/StandardCovariate.java | 55 +++++ .../BaseRecalibrationUnitTest.java | 90 ++++++++ .../ContextCovariateUnitTest.java | 95 ++++++++ .../recalibration/CycleCovariateUnitTest.java | 114 ++++++++++ .../recalibration/QualQuantizerUnitTest.java | 169 +++++++++++++++ .../recalibration/ReadCovariatesUnitTest.java | 122 +++++++++++ .../ReadGroupCovariateUnitTest.java | 99 +++++++++ .../recalibration/RecalDatumUnitTest.java | 62 ++---- .../recalibration/RecalUtilsUnitTest.java | 152 +++++++++++++ .../RecalibrationReportUnitTest.java | 150 +++++++++++++ .../RecalibrationTablesUnitTest.java | 177 +++++++++++++++ .../recalibration/RecalibrationTestUtils.java | 48 +++++ .../RepeatCovariatesUnitTest.java | 62 ++---- 60 files changed, 2739 insertions(+), 3675 deletions(-) delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/BQSRGatherer.java delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/BQSRReadTransformer.java delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/QuantizationInfo.java delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/ReadCovariates.java delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTables.java delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/Covariate.java delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ExperimentalCovariate.java delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/QualityScoreCovariate.java delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ReadGroupCovariate.java delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatLengthCovariate.java delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatUnitAndLengthCovariate.java delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatUnitCovariate.java delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RequiredCovariate.java delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/StandardCovariate.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/BaseRecalibrationUnitTest.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/ContextCovariateUnitTest.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/CycleCovariateUnitTest.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/QualQuantizerUnitTest.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/ReadCovariatesUnitTest.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/ReadGroupCovariateUnitTest.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalUtilsUnitTest.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationReportUnitTest.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTablesUnitTest.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTestUtils.java create mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/BQSRGatherer.java create mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/BQSRReadTransformer.java rename {protected/gatk-tools-protected => public/gatk-engine}/src/main/java/org/broadinstitute/gatk/engine/recalibration/BaseRecalibration.java (57%) rename {protected/gatk-tools-protected => public/gatk-engine}/src/main/java/org/broadinstitute/gatk/engine/recalibration/QualQuantizer.java (66%) create mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/QuantizationInfo.java create mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/ReadCovariates.java rename {protected/gatk-tools-protected => public/gatk-engine}/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalDatum.java (63%) rename {protected/gatk-tools-protected => public/gatk-engine}/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalDatumNode.java (67%) rename {protected/gatk-tools-protected => public/gatk-engine}/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalUtils.java (84%) rename {protected/gatk-tools-protected => public/gatk-engine}/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationArgumentCollection.java (70%) rename {protected/gatk-tools-protected => public/gatk-engine}/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationReport.java (66%) create mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTables.java rename {protected/gatk-tools-protected => public/gatk-engine}/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ContextCovariate.java (52%) create mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/Covariate.java rename {protected/gatk-tools-protected => public/gatk-engine}/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/CycleCovariate.java (53%) create mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ExperimentalCovariate.java create mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/QualityScoreCovariate.java create mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ReadGroupCovariate.java rename {protected/gatk-tools-protected => public/gatk-engine}/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatCovariate.java (51%) create mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatLengthCovariate.java create mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatUnitAndLengthCovariate.java create mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatUnitCovariate.java create mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RequiredCovariate.java create mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/StandardCovariate.java create mode 100644 public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/BaseRecalibrationUnitTest.java create mode 100644 public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/ContextCovariateUnitTest.java create mode 100644 public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/CycleCovariateUnitTest.java create mode 100644 public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/QualQuantizerUnitTest.java create mode 100644 public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/ReadCovariatesUnitTest.java create mode 100644 public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/ReadGroupCovariateUnitTest.java rename {protected/gatk-tools-protected => public/gatk-engine}/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalDatumUnitTest.java (53%) create mode 100644 public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalUtilsUnitTest.java create mode 100644 public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationReportUnitTest.java create mode 100644 public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTablesUnitTest.java create mode 100644 public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTestUtils.java rename {protected/gatk-tools-protected => public/gatk-engine}/src/test/java/org/broadinstitute/gatk/engine/recalibration/RepeatCovariatesUnitTest.java (50%) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/BQSRGatherer.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/BQSRGatherer.java deleted file mode 100644 index 4685047c4..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/BQSRGatherer.java +++ /dev/null @@ -1,138 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration; - -import org.apache.commons.collections.CollectionUtils; -import org.apache.log4j.Logger; -import org.broadinstitute.gatk.utils.commandline.Gatherer; -import org.broadinstitute.gatk.utils.report.GATKReport; -import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; -import org.broadinstitute.gatk.utils.exceptions.UserException; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.PrintStream; -import java.util.*; - -/** - * User: carneiro - * Date: 3/29/11 - */ - - -public class BQSRGatherer extends Gatherer { - - private static final Logger logger = Logger.getLogger(BQSRGatherer.class); - private static final String EMPTY_INPUT_LIST = "list of inputs files is empty or there is no usable data in any input file"; - private static final String MISSING_OUTPUT_FILE = "missing output file name"; - private static final String MISSING_READ_GROUPS = "Missing read group(s)"; - - @Override - public void gather(final List inputs, final File output) { - final PrintStream outputFile; - try { - outputFile = new PrintStream(output); - } catch(FileNotFoundException e) { - throw new UserException.MissingArgument("output", MISSING_OUTPUT_FILE); - } - final GATKReport report = gatherReport(inputs); - report.print(outputFile); - } - - /** - * Gathers the input recalibration reports into a single report. - * - * @param inputs Input recalibration GATK reports - * @return gathered recalibration GATK report - */ - public static GATKReport gatherReport(final List inputs) { - final SortedSet allReadGroups = new TreeSet(); - final LinkedHashMap> inputReadGroups = new LinkedHashMap>(); - - // Get the read groups from each input report - for (final File input : inputs) { - final Set readGroups = RecalibrationReport.getReadGroups(input); - inputReadGroups.put(input, readGroups); - allReadGroups.addAll(readGroups); - } - - // Log the read groups that are missing from specific inputs - for (Map.Entry> entry: inputReadGroups.entrySet()) { - final File input = entry.getKey(); - final Set readGroups = entry.getValue(); - if (allReadGroups.size() != readGroups.size()) { - // Since this is not completely unexpected, more than debug, but less than a proper warning. - logger.info(MISSING_READ_GROUPS + ": " + input.getAbsolutePath()); - for (final Object readGroup: CollectionUtils.subtract(allReadGroups, readGroups)) { - logger.info(" " + readGroup); - } - } - } - - RecalibrationReport generalReport = null; - for (File input : inputs) { - final RecalibrationReport inputReport = new RecalibrationReport(input, allReadGroups); - if( inputReport.isEmpty() ) { continue; } - - if (generalReport == null) - generalReport = inputReport; - else - generalReport.combine(inputReport); - } - if (generalReport == null) - throw new ReviewedGATKException(EMPTY_INPUT_LIST); - - generalReport.calculateQuantizedQualities(); - - return generalReport.createGATKReport(); - } -} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/BQSRReadTransformer.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/BQSRReadTransformer.java deleted file mode 100644 index 712b829e2..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/BQSRReadTransformer.java +++ /dev/null @@ -1,104 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration; - -import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; -import org.broadinstitute.gatk.engine.WalkerManager; -import org.broadinstitute.gatk.engine.iterators.ReadTransformer; -import org.broadinstitute.gatk.engine.walkers.Walker; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; - -/** - * A ReadTransformer that applies BQSR on the fly to reads - * - * User: rpoplin - * Date: 2/13/12 - */ -public class BQSRReadTransformer extends ReadTransformer { - private boolean enabled; - private BaseRecalibration bqsr = null; - - @Override - public OrderingConstraint getOrderingConstraint() { return OrderingConstraint.MUST_BE_FIRST; } - - @Override - public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) { - this.enabled = engine.hasBQSRArgumentSet(); - if ( enabled ) { - // TODO -- See important note below about applying BQSR to a reduced BAM file: - // If it is important to make sure that BQSR is not applied (as opposed to having the covariates computed) against a reduced bam file, - // we need to figure out how to make this work. The problem is that the ReadTransformers are initialized before the ReadDataSource - // inside the GenomeAnalysisEngine, so we generate a NPE when trying to retrieve the SAMFileHeaders. Ultimately, I don't think this is - // a necessary check anyways since we disallow running BaseRecalibrator on reduced bams (so we can't generate the recal tables to use here). - // Although we could add this check to the apply() method below, it's kind of ugly and inefficient. - // The call here would be: RecalUtils.checkForInvalidRecalBams(engine.getSAMFileHeaders(), engine.getArguments().ALLOW_BQSR_ON_REDUCED_BAMS); - final BQSRArgumentSet args = engine.getBQSRArgumentSet(); - this.bqsr = new BaseRecalibration(args.getRecalFile(), args.getQuantizationLevels(), args.shouldDisableIndelQuals(), args.getPreserveQscoresLessThan(), args.shouldEmitOriginalQuals(), args.getGlobalQScorePrior(), args.getStaticQuantizedQuals(), args.getRoundDown()); - } - final BQSRMode mode = WalkerManager.getWalkerAnnotation(walker, BQSRMode.class); - return mode.ApplicationTime(); - } - - @Override - public boolean enabled() { - return enabled; - } - - /** - * initialize a new BQSRReadTransformer that applies BQSR on the fly to incoming reads. - */ - @Override - public GATKSAMRecord apply(GATKSAMRecord read) { - bqsr.recalibrateRead(read); - return read; - } -} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/QuantizationInfo.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/QuantizationInfo.java deleted file mode 100644 index 626f256b2..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/QuantizationInfo.java +++ /dev/null @@ -1,151 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration; - -import org.broadinstitute.gatk.utils.report.GATKReportTable; -import org.broadinstitute.gatk.utils.MathUtils; -import org.broadinstitute.gatk.utils.QualityUtils; -import org.broadinstitute.gatk.utils.collections.NestedIntegerArray; - -import java.util.Arrays; -import java.util.List; - -/** - * Class that encapsulates the information necessary for quality score quantization for BQSR - * - * @author carneiro - * @since 3/26/12 - */ -public class QuantizationInfo { - private List quantizedQuals; - private List empiricalQualCounts; - private int quantizationLevels; - - private QuantizationInfo(List quantizedQuals, List empiricalQualCounts, int quantizationLevels) { - this.quantizedQuals = quantizedQuals; - this.empiricalQualCounts = empiricalQualCounts; - this.quantizationLevels = quantizationLevels; - } - - public QuantizationInfo(List quantizedQuals, List empiricalQualCounts) { - this(quantizedQuals, empiricalQualCounts, calculateQuantizationLevels(quantizedQuals)); - } - - public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) { - final Long [] qualHistogram = new Long[QualityUtils.MAX_SAM_QUAL_SCORE +1]; // create a histogram with the empirical quality distribution - for (int i = 0; i < qualHistogram.length; i++) - qualHistogram[i] = 0L; - - final NestedIntegerArray qualTable = recalibrationTables.getQualityScoreTable(); // get the quality score table - - for (final RecalDatum value : qualTable.getAllValues()) { - final RecalDatum datum = value; - final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL ) - qualHistogram[empiricalQual] += (long) datum.getNumObservations(); // add the number of observations for every key - } - empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities - quantizeQualityScores(quantizationLevels); - - this.quantizationLevels = quantizationLevels; - } - - - public void quantizeQualityScores(int nLevels) { - QualQuantizer quantizer = new QualQuantizer(empiricalQualCounts, nLevels, QualityUtils.MIN_USABLE_Q_SCORE); // quantize the qualities to the desired number of levels - quantizedQuals = quantizer.getOriginalToQuantizedMap(); // map with the original to quantized qual map (using the standard number of levels in the RAC) - } - - public void noQuantization() { - this.quantizationLevels = QualityUtils.MAX_SAM_QUAL_SCORE; - for (int i = 0; i < this.quantizationLevels; i++) - quantizedQuals.set(i, (byte) i); - } - - public List getQuantizedQuals() { - return quantizedQuals; - } - - public int getQuantizationLevels() { - return quantizationLevels; - } - - public GATKReportTable generateReportTable(boolean sortByCols) { - GATKReportTable quantizedTable; - if(sortByCols) { - quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN); - } else { - quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3); - } - quantizedTable.addColumn(RecalUtils.QUALITY_SCORE_COLUMN_NAME); - quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME); - quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME); - - for (int qual = 0; qual <= QualityUtils.MAX_SAM_QUAL_SCORE; qual++) { - quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual); - quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual)); - quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual)); - } - return quantizedTable; - } - - private static int calculateQuantizationLevels(List quantizedQuals) { - byte lastByte = -1; - int quantizationLevels = 0; - for (byte q : quantizedQuals) { - if (q != lastByte) { - quantizationLevels++; - lastByte = q; - } - } - return quantizationLevels; - } -} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/ReadCovariates.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/ReadCovariates.java deleted file mode 100644 index 0cc1e5897..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/ReadCovariates.java +++ /dev/null @@ -1,176 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration; - -import org.apache.log4j.Logger; -import org.broadinstitute.gatk.utils.LRUCache; -import org.broadinstitute.gatk.utils.recalibration.EventType; - -/** - * The object temporarily held by a read that describes all of it's covariates. - * - * In essence, this is an array of CovariateValues, but it also has some functionality to deal with the optimizations of the NestedHashMap - * - * @author Mauricio Carneiro - * @since 2/8/12 - */ -public class ReadCovariates { - private final static Logger logger = Logger.getLogger(ReadCovariates.class); - - /** - * How big should we let the LRU cache grow - */ - private static final int LRU_CACHE_SIZE = 500; - - /** - * Use an LRU cache to keep cache of keys (int[][][]) arrays for each read length we've seen. - * The cache allows us to avoid the expense of recreating these arrays for every read. The LRU - * keeps the total number of cached arrays to less than LRU_CACHE_SIZE. - * - * This is a thread local variable, so the total memory required may grow to N_THREADS x LRU_CACHE_SIZE - */ - private final static ThreadLocal> keysCache = new ThreadLocal>() { - @Override protected LRUCache initialValue() { - return new LRUCache(LRU_CACHE_SIZE); - } - }; - - /** - * The keys cache is only valid for a single covariate count. Normally this will remain constant for the analysis. - * If running multiple analyses (or the unit test suite), it's necessary to clear the cache. - */ - public static void clearKeysCache() { - keysCache.remove(); - } - - /** - * Our keys, indexed by event type x read length x covariate - */ - private final int[][][] keys; - - /** - * The index of the current covariate, used by addCovariate - */ - private int currentCovariateIndex = 0; - - public ReadCovariates(final int readLength, final int numberOfCovariates) { - final LRUCache cache = keysCache.get(); - final int[][][] cachedKeys = cache.get(readLength); - if ( cachedKeys == null ) { - // There's no cached value for read length so we need to create a new int[][][] array - if ( logger.isDebugEnabled() ) logger.debug("Keys cache miss for length " + readLength + " cache size " + cache.size()); - keys = new int[EventType.values().length][readLength][numberOfCovariates]; - cache.put(readLength, keys); - } else { - keys = cachedKeys; - } - } - - public void setCovariateIndex(final int index) { - currentCovariateIndex = index; - } - - /** - * Update the keys for mismatch, insertion, and deletion for the current covariate at read offset - * - * NOTE: no checks are performed on the number of covariates, for performance reasons. If the count increases - * after the keysCache has been accessed, this method will throw an ArrayIndexOutOfBoundsException. This currently - * only occurs in the testing harness, and we don't anticipate that it will become a part of normal runs. - * - * @param mismatch the mismatch key value - * @param insertion the insertion key value - * @param deletion the deletion key value - * @param readOffset the read offset, must be >= 0 and <= the read length used to create this ReadCovariates - */ - public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) { - keys[EventType.BASE_SUBSTITUTION.ordinal()][readOffset][currentCovariateIndex] = mismatch; - keys[EventType.BASE_INSERTION.ordinal()][readOffset][currentCovariateIndex] = insertion; - keys[EventType.BASE_DELETION.ordinal()][readOffset][currentCovariateIndex] = deletion; - } - - /** - * Get the keys for all covariates at read position for error model - * - * @param readPosition - * @param errorModel - * @return - */ - public int[] getKeySet(final int readPosition, final EventType errorModel) { - return keys[errorModel.ordinal()][readPosition]; - } - - public int[][] getKeySet(final EventType errorModel) { - return keys[errorModel.ordinal()]; - } - - // ---------------------------------------------------------------------- - // - // routines for testing - // - // ---------------------------------------------------------------------- - - protected int[][] getMismatchesKeySet() { return getKeySet(EventType.BASE_SUBSTITUTION); } - protected int[][] getInsertionsKeySet() { return getKeySet(EventType.BASE_INSERTION); } - protected int[][] getDeletionsKeySet() { return getKeySet(EventType.BASE_DELETION); } - - protected int[] getMismatchesKeySet(final int readPosition) { - return getKeySet(readPosition, EventType.BASE_SUBSTITUTION); - } - - protected int[] getInsertionsKeySet(final int readPosition) { - return getKeySet(readPosition, EventType.BASE_INSERTION); - } - - protected int[] getDeletionsKeySet(final int readPosition) { - return getKeySet(readPosition, EventType.BASE_DELETION); - } -} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTables.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTables.java deleted file mode 100644 index 0bfcc5b85..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTables.java +++ /dev/null @@ -1,169 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration; - -import com.google.java.contract.Ensures; -import org.broadinstitute.gatk.utils.collections.LoggingNestedIntegerArray; -import org.broadinstitute.gatk.utils.recalibration.EventType; -import org.broadinstitute.gatk.engine.recalibration.covariates.Covariate; -import org.broadinstitute.gatk.utils.collections.NestedIntegerArray; - -import java.io.PrintStream; -import java.util.ArrayList; - -/** - * Utility class to facilitate on-the-fly base quality score recalibration. - * - * User: ebanks - * Date: 6/20/12 - */ - -public final class RecalibrationTables { - public enum TableType { - READ_GROUP_TABLE, - QUALITY_SCORE_TABLE, - OPTIONAL_COVARIATE_TABLES_START; - } - - private final ArrayList> tables; - private final int qualDimension; - private final int eventDimension = EventType.values().length; - private final int numReadGroups; - private final PrintStream log; - - public RecalibrationTables(final Covariate[] covariates) { - this(covariates, covariates[TableType.READ_GROUP_TABLE.ordinal()].maximumKeyValue() + 1, null); - } - - public RecalibrationTables(final Covariate[] covariates, final int numReadGroups) { - this(covariates, numReadGroups, null); - } - - public RecalibrationTables(final Covariate[] covariates, final int numReadGroups, final PrintStream log) { - tables = new ArrayList>(covariates.length); - for ( int i = 0; i < covariates.length; i++ ) - tables.add(i, null); // initialize so we can set below - - qualDimension = covariates[TableType.QUALITY_SCORE_TABLE.ordinal()].maximumKeyValue() + 1; - this.numReadGroups = numReadGroups; - this.log = log; - - tables.set(TableType.READ_GROUP_TABLE.ordinal(), - log == null ? new NestedIntegerArray(numReadGroups, eventDimension) : - new LoggingNestedIntegerArray(log, "READ_GROUP_TABLE", numReadGroups, eventDimension)); - - tables.set(TableType.QUALITY_SCORE_TABLE.ordinal(), makeQualityScoreTable()); - - for (int i = TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal(); i < covariates.length; i++) - tables.set(i, - log == null ? new NestedIntegerArray(numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension) : - new LoggingNestedIntegerArray(log, String.format("OPTIONAL_COVARIATE_TABLE_%d", i - TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + 1), - numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension)); - } - - @Ensures("result != null") - public NestedIntegerArray getReadGroupTable() { - return getTable(TableType.READ_GROUP_TABLE.ordinal()); - } - - @Ensures("result != null") - public NestedIntegerArray getQualityScoreTable() { - return getTable(TableType.QUALITY_SCORE_TABLE.ordinal()); - } - - @Ensures("result != null") - public NestedIntegerArray getTable(final int index) { - return tables.get(index); - } - - @Ensures("result >= 0") - public int numTables() { - return tables.size(); - } - - /** - * @return true if all the tables contain no RecalDatums - */ - public boolean isEmpty() { - for( final NestedIntegerArray table : tables ) { - if( !table.getAllValues().isEmpty() ) { return false; } - } - return true; - } - - /** - * Allocate a new quality score table, based on requested parameters - * in this set of tables, without any data in it. The return result - * of this table is suitable for acting as a thread-local cache - * for quality score values - * @return a newly allocated, empty read group x quality score table - */ - public NestedIntegerArray makeQualityScoreTable() { - return log == null - ? new NestedIntegerArray(numReadGroups, qualDimension, eventDimension) - : new LoggingNestedIntegerArray(log, "QUALITY_SCORE_TABLE", numReadGroups, qualDimension, eventDimension); - } - - /** - * Merge all of the tables from toMerge into into this set of tables - */ - public void combine(final RecalibrationTables toMerge) { - if ( numTables() != toMerge.numTables() ) - throw new IllegalArgumentException("Attempting to merge RecalibrationTables with different sizes"); - - for ( int i = 0; i < numTables(); i++ ) { - final NestedIntegerArray myTable = this.getTable(i); - final NestedIntegerArray otherTable = toMerge.getTable(i); - RecalUtils.combineTables(myTable, otherTable); - } - } -} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/Covariate.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/Covariate.java deleted file mode 100644 index 8b9e6e716..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/Covariate.java +++ /dev/null @@ -1,144 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration.covariates; - -import org.broadinstitute.gatk.engine.recalibration.ReadCovariates; -import org.broadinstitute.gatk.engine.recalibration.RecalibrationArgumentCollection; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Oct 30, 2009 - * - * The Covariate interface. A Covariate is a feature used in the recalibration that can be picked out of the read. - * In general most error checking and adjustments to the data are done before the call to the covariates getValue methods in order to speed up the code. - * This unfortunately muddies the code, but most of these corrections can be done per read while the covariates get called per base, resulting in a big speed up. - */ - -public interface Covariate { - - /** - * Initialize any member variables using the command-line arguments passed to the walker - * - * @param RAC the recalibration argument collection - */ - public void initialize(final RecalibrationArgumentCollection RAC); - - /** - * Calculates covariate values for all positions in the read. - * - * @param read the read to calculate the covariates on. - * @param values the object to record the covariate values for every base in the read. - */ - public void recordValues(final GATKSAMRecord read, final ReadCovariates values); - - /** - * Used to get the covariate's value from input (Recalibration Report) file during on-the-fly recalibration - * - * @param str the key in string type (read from the csv) - * @return the key in it's correct type. - */ - public Object getValue(final String str); - - /** - * Converts the internal representation of the key to String format for file output. - * - * @param key the long representation of the key - * @return a string representation of the key - */ - public String formatKey(final int key); - - /** - * Converts an Object key into a long key using only the lowest numberOfBits() bits - * - * Only necessary for on-the-fly recalibration when you have the object, but need to store it in memory in long format. For counting covariates - * the getValues method already returns all values in long format. - * - * @param value the object corresponding to the covariate - * @return a long representation of the object - */ - public int keyFromValue(final Object value); - - /** - * Returns the maximum value possible for any key representing this covariate - * - * @return the maximum value possible for any key representing this covariate - */ - public int maximumKeyValue(); -} - diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ExperimentalCovariate.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ExperimentalCovariate.java deleted file mode 100644 index 100fb4705..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ExperimentalCovariate.java +++ /dev/null @@ -1,81 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration.covariates; - -/** - * [Short one sentence description of this walker] - *

- *

- * [Functionality of this walker] - *

- *

- *

Input

- *

- * [Input description] - *

- *

- *

Output

- *

- * [Output description] - *

- *

- *

Examples

- *
- *    java
- *      -jar GenomeAnalysisTK.jar
- *      -T $WalkerName
- *  
- * - * @author Your Name - * @since Date created - */ -public interface ExperimentalCovariate extends Covariate {} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/QualityScoreCovariate.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/QualityScoreCovariate.java deleted file mode 100644 index 29daed4bc..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/QualityScoreCovariate.java +++ /dev/null @@ -1,129 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration.covariates; - -import org.broadinstitute.gatk.engine.recalibration.ReadCovariates; -import org.broadinstitute.gatk.engine.recalibration.RecalibrationArgumentCollection; -import org.broadinstitute.gatk.utils.QualityUtils; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Nov 3, 2009 - * - * The Reported Quality Score covariate. - */ - -public class QualityScoreCovariate implements RequiredCovariate { - - // Initialize any member variables using the command-line arguments passed to the walkers - @Override - public void initialize(final RecalibrationArgumentCollection RAC) {} - - @Override - public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { - final byte[] baseQualities = read.getBaseQualities(); - final byte[] baseInsertionQualities = read.getBaseInsertionQualities(); - final byte[] baseDeletionQualities = read.getBaseDeletionQualities(); - - for (int i = 0; i < baseQualities.length; i++) { - values.addCovariate((int)baseQualities[i], (int)baseInsertionQualities[i], (int)baseDeletionQualities[i], i); - } - } - - // Used to get the covariate's value from input csv file during on-the-fly recalibration - @Override - public final Object getValue(final String str) { - return Byte.parseByte(str); - } - - @Override - public String formatKey(final int key) { - return String.format("%d", key); - } - - @Override - public int keyFromValue(final Object value) { - return (value instanceof String) ? (int)Byte.parseByte((String) value) : (int)(Byte) value; - } - - @Override - public int maximumKeyValue() { - return QualityUtils.MAX_SAM_QUAL_SCORE; - } -} \ No newline at end of file diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ReadGroupCovariate.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ReadGroupCovariate.java deleted file mode 100644 index 0edb73f2e..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ReadGroupCovariate.java +++ /dev/null @@ -1,190 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration.covariates; - -import org.broadinstitute.gatk.engine.recalibration.RecalibrationArgumentCollection; -import org.broadinstitute.gatk.engine.recalibration.ReadCovariates; -import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; - -import java.util.HashMap; -import java.util.Map; -import java.util.Set; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Oct 30, 2009 - * - * The Read Group covariate. - */ - -public class ReadGroupCovariate implements RequiredCovariate { - - private final HashMap readGroupLookupTable = new HashMap(); - private final HashMap readGroupReverseLookupTable = new HashMap(); - private int nextId = 0; - private String forceReadGroup; - - // Initialize any member variables using the command-line arguments passed to the walkers - @Override - public void initialize(final RecalibrationArgumentCollection RAC) { - forceReadGroup = RAC.FORCE_READGROUP; - } - - @Override - public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { - final String readGroupId = readGroupValueFromRG(read.getReadGroup()); - final int key = keyForReadGroup(readGroupId); - - final int l = read.getReadLength(); - for (int i = 0; i < l; i++) - values.addCovariate(key, key, key, i); - } - - @Override - public final Object getValue(final String str) { - return str; - } - - @Override - public synchronized String formatKey(final int key) { - // This method is synchronized so that we don't attempt to do a get() - // from the reverse lookup table while that table is being updated - return readGroupReverseLookupTable.get(key); - } - - @Override - public int keyFromValue(final Object value) { - return keyForReadGroup((String) value); - } - - /** - * Get the mapping from read group names to integer key values for all read groups in this covariate - * @return a set of mappings from read group names -> integer key values - */ - public Set> getKeyMap() { - return readGroupLookupTable.entrySet(); - } - - private int keyForReadGroup(final String readGroupId) { - // Rather than synchronize this entire method (which would be VERY expensive for walkers like the BQSR), - // synchronize only the table updates. - - // Before entering the synchronized block, check to see if this read group is not in our tables. - // If it's not, either we will have to insert it, OR another thread will insert it first. - // This preliminary check avoids doing any synchronization most of the time. - if ( ! readGroupLookupTable.containsKey(readGroupId) ) { - - synchronized ( this ) { - - // Now we need to make sure the key is STILL not there, since another thread may have come along - // and inserted it while we were waiting to enter this synchronized block! - if ( ! readGroupLookupTable.containsKey(readGroupId) ) { - readGroupLookupTable.put(readGroupId, nextId); - readGroupReverseLookupTable.put(nextId, readGroupId); - nextId++; - } - } - } - - return readGroupLookupTable.get(readGroupId); - } - - @Override - public synchronized int maximumKeyValue() { - // Synchronized so that we don't query table size while the tables are being updated - return readGroupLookupTable.size() - 1; - } - - /** - * If the sample has a PU tag annotation, return that. If not, return the read group id. - * - * @param rg the read group record - * @return platform unit or readgroup id - */ - private String readGroupValueFromRG(final GATKSAMReadGroupRecord rg) { - if ( forceReadGroup != null ) - return forceReadGroup; - - final String platformUnit = rg.getPlatformUnit(); - return platformUnit == null ? rg.getId() : platformUnit; - } - -} - - diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatLengthCovariate.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatLengthCovariate.java deleted file mode 100644 index b643c052d..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatLengthCovariate.java +++ /dev/null @@ -1,74 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration.covariates; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; - -public class RepeatLengthCovariate extends RepeatCovariate { - - @Requires({"repeatLength>=0", "repeatFromUnitAndLength != null"}) - @Ensures("result != null") - protected String getCovariateValueFromUnitAndLength(final byte[] repeatFromUnitAndLength, final int repeatLength) { - return String.format("%d",repeatLength); - } - - @Override - public synchronized int maximumKeyValue() { - // Synchronized so that we don't query table size while the tables are being updated - //return repeatLookupTable.size() - 1; - // max possible values of covariate: for repeat unit, length is up to MAX_STR_UNIT_LENGTH, - // so we have 4^MAX_STR_UNIT_LENGTH * MAX_REPEAT_LENGTH possible values - return (1+MAX_REPEAT_LENGTH); - } - -} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatUnitAndLengthCovariate.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatUnitAndLengthCovariate.java deleted file mode 100644 index 1399f19a7..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatUnitAndLengthCovariate.java +++ /dev/null @@ -1,75 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration.covariates; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; - - -public class RepeatUnitAndLengthCovariate extends RepeatCovariate { - - @Requires({"repeatLength>=0", "repeatFromUnitAndLength != null"}) - @Ensures("result != null") - protected String getCovariateValueFromUnitAndLength(final byte[] repeatFromUnitAndLength, final int repeatLength) { - return new String(repeatFromUnitAndLength) + String.format("%d",repeatLength); - } - - @Override - public synchronized int maximumKeyValue() { - // Synchronized so that we don't query table size while the tables are being updated - //return repeatLookupTable.size() - 1; - // max possible values of covariate: for repeat unit, length is up to MAX_STR_UNIT_LENGTH, - // so we have 4^MAX_STR_UNIT_LENGTH * MAX_REPEAT_LENGTH possible values - return (1<<(2*MAX_STR_UNIT_LENGTH)) * MAX_REPEAT_LENGTH +1; - } - -} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatUnitCovariate.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatUnitCovariate.java deleted file mode 100644 index 84b8e9c9a..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatUnitCovariate.java +++ /dev/null @@ -1,78 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration.covariates; - -/** - * Created with IntelliJ IDEA. - * User: rpoplin - * Date: 11/3/12 - */ - -public class RepeatUnitCovariate extends RepeatCovariate { - - protected String getCovariateValueFromUnitAndLength(final byte[] repeatFromUnitAndLength, final int repeatLength) { - return new String(repeatFromUnitAndLength); - - } - - - @Override - public synchronized int maximumKeyValue() { - // Synchronized so that we don't query table size while the tables are being updated - //return repeatLookupTable.size() - 1; - // max possible values of covariate: for repeat unit, length is up to MAX_STR_UNIT_LENGTH, - // so we have 4^MAX_STR_UNIT_LENGTH * MAX_REPEAT_LENGTH possible values - return (1<<(2*MAX_STR_UNIT_LENGTH)) +1; - } - - -} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RequiredCovariate.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RequiredCovariate.java deleted file mode 100644 index 45c9a179c..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RequiredCovariate.java +++ /dev/null @@ -1,81 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration.covariates; - -/** - * [Short one sentence description of this walker] - *

- *

- * [Functionality of this walker] - *

- *

- *

Input

- *

- * [Input description] - *

- *

- *

Output

- *

- * [Output description] - *

- *

- *

Examples

- *
- *    java
- *      -jar GenomeAnalysisTK.jar
- *      -T $WalkerName
- *  
- * - * @author Your Name - * @since Date created - */ -public interface RequiredCovariate extends Covariate {} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/StandardCovariate.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/StandardCovariate.java deleted file mode 100644 index d24e13764..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/StandardCovariate.java +++ /dev/null @@ -1,81 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration.covariates; - -/** - * [Short one sentence description of this walker] - *

- *

- * [Functionality of this walker] - *

- *

- *

Input

- *

- * [Input description] - *

- *

- *

Output

- *

- * [Output description] - *

- *

- *

Examples

- *
- *    java
- *      -jar GenomeAnalysisTK.jar
- *      -T $WalkerName
- *  
- * - * @author Your Name - * @since Date created - */ -public interface StandardCovariate extends Covariate {} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/BaseRecalibrationUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/BaseRecalibrationUnitTest.java deleted file mode 100644 index a9f23faf1..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/BaseRecalibrationUnitTest.java +++ /dev/null @@ -1,116 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration; - -import org.broadinstitute.gatk.utils.BaseTest; -import org.broadinstitute.gatk.utils.QualityUtils; -import org.broadinstitute.gatk.utils.exceptions.UserException; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -public class BaseRecalibrationUnitTest { - - @Test - public void repeatedAndUnorderedFixedQualities() { - // Test both repeated quals, and quals that aren't input in order - List quantizedQualsOrdered = Arrays.asList(11, 19); - List quantizedQualsUnordered = Arrays.asList(19, 11, 19, 19); - - // Unordered and Ordered qmapping should be identical - byte[] qmappingUnordered = BaseRecalibration.constructStaticQuantizedMapping(quantizedQualsUnordered, true); - byte[] qmappingOrdered = BaseRecalibration.constructStaticQuantizedMapping(quantizedQualsOrdered, true); - Assert.assertEquals(qmappingOrdered.length, qmappingUnordered.length); - for(int i = 0 ; i < qmappingUnordered.length ; i++) { - Assert.assertEquals(qmappingOrdered[i], qmappingUnordered[i]); - } - } - - @Test - public void nearestVsRoundDown() { - List fixedQuantizedQuals = Arrays.asList(10, 20, 30); - - byte[] qmappingRoundDown = BaseRecalibration.constructStaticQuantizedMapping(fixedQuantizedQuals, true); - byte[] qmappingRoundNearest = BaseRecalibration.constructStaticQuantizedMapping(fixedQuantizedQuals, false); - - // Depending on rounding strategy, bin 19 should round to 10 or 20 - Assert.assertEquals(qmappingRoundDown[19], 10); - Assert.assertEquals(qmappingRoundNearest[19], 20); - - // Regarless of rounding strategy, bin 21 should always round down to 20 - Assert.assertEquals(qmappingRoundDown[21], 20); - Assert.assertEquals(qmappingRoundNearest[21], 20); - } - - @Test - public void onlyOneFixedQualUsed() { - // Set all qualities to singleQual value (except for those below MIN_USABLE_Q_SCORE) - int singleQual = 10; - List fixedQuantizedQuals = Arrays.asList(singleQual); - - byte[] qmapping = BaseRecalibration.constructStaticQuantizedMapping(fixedQuantizedQuals, true); - - for(int i = 0 ; i < qmapping.length ; i++) { - if(i >= QualityUtils.MIN_USABLE_Q_SCORE) { - Assert.assertEquals(qmapping[i], singleQual); - } - else { - // Make sure that all values less than MIN_USABLE_Q_SCORE are preserved - Assert.assertEquals(qmapping[i], i); - } - } - } -} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/ContextCovariateUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/ContextCovariateUnitTest.java deleted file mode 100644 index 43b69be22..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/ContextCovariateUnitTest.java +++ /dev/null @@ -1,121 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration; - -import org.broadinstitute.gatk.engine.recalibration.covariates.ContextCovariate; -import org.broadinstitute.gatk.engine.recalibration.covariates.Covariate; -import org.broadinstitute.gatk.utils.clipping.ClippingRepresentation; -import org.broadinstitute.gatk.utils.clipping.ReadClipper; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; -import org.broadinstitute.gatk.utils.sam.ReadUtils; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - -/** - * @author Mauricio Carneiro - * @since 3/1/12 - */ -public class ContextCovariateUnitTest { - ContextCovariate covariate; - RecalibrationArgumentCollection RAC; - - @BeforeClass - public void init() { - RAC = new RecalibrationArgumentCollection(); - covariate = new ContextCovariate(); - covariate.initialize(RAC); - } - - @BeforeMethod - public void initCache() { - ReadCovariates.clearKeysCache(); - } - - @Test(enabled = true) - public void testSimpleContexts() { - GATKSAMRecord read = ReadUtils.createRandomRead(1000); - GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, RAC.LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); - ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1); - covariate.recordValues(read, readCovariates); - - verifyCovariateArray(readCovariates.getMismatchesKeySet(), RAC.MISMATCHES_CONTEXT_SIZE, clippedRead, covariate); - verifyCovariateArray(readCovariates.getInsertionsKeySet(), RAC.INDELS_CONTEXT_SIZE, clippedRead, covariate); - verifyCovariateArray(readCovariates.getDeletionsKeySet(), RAC.INDELS_CONTEXT_SIZE, clippedRead, covariate); - } - - public static void verifyCovariateArray(int[][] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) { - for (int i = 0; i < values.length; i++) - Assert.assertEquals(contextCovariate.formatKey(values[i][0]), expectedContext(read, i, contextSize)); - - } - - public static String expectedContext (GATKSAMRecord read, int offset, int contextSize) { - final String bases = stringFrom(read.getReadBases()); - String expectedContext = null; - if (offset - contextSize + 1 >= 0) { - String context = bases.substring(offset - contextSize + 1, offset + 1); - if (!context.contains("N")) - expectedContext = context; - } - return expectedContext; - } - - private static String stringFrom(byte[] array) { - String s = ""; - for (byte value : array) - s += (char) value; - return s; - } - -} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/CycleCovariateUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/CycleCovariateUnitTest.java deleted file mode 100644 index 698bb780c..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/CycleCovariateUnitTest.java +++ /dev/null @@ -1,140 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration; - -import org.broadinstitute.gatk.utils.exceptions.UserException; -import org.broadinstitute.gatk.engine.recalibration.covariates.CycleCovariate; -import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; -import org.broadinstitute.gatk.utils.sam.ReadUtils; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - -/** - * @author Mauricio Carneiro - * @since 3/1/12 - */ -public class CycleCovariateUnitTest { - CycleCovariate covariate; - RecalibrationArgumentCollection RAC; - - @BeforeClass - public void init() { - RAC = new RecalibrationArgumentCollection(); - covariate = new CycleCovariate(); - covariate.initialize(RAC); - } - - @BeforeMethod - public void initCache() { - ReadCovariates.clearKeysCache(); - } - - @Test(enabled = true) - public void testSimpleCycles() { - short readLength = 10; - GATKSAMRecord read = ReadUtils.createRandomRead(readLength); - read.setReadPairedFlag(true); - read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID")); - read.getReadGroup().setPlatform("illumina"); - - ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1); - covariate.recordValues(read, readCovariates); - verifyCovariateArray(readCovariates.getMismatchesKeySet(), 1, (short) 1); - - read.setReadNegativeStrandFlag(true); - covariate.recordValues(read, readCovariates); - verifyCovariateArray(readCovariates.getMismatchesKeySet(), readLength, -1); - - read.setSecondOfPairFlag(true); - covariate.recordValues(read, readCovariates); - verifyCovariateArray(readCovariates.getMismatchesKeySet(), -readLength, 1); - - read.setReadNegativeStrandFlag(false); - covariate.recordValues(read, readCovariates); - verifyCovariateArray(readCovariates.getMismatchesKeySet(), -1, -1); - } - - private void verifyCovariateArray(int[][] values, int init, int increment) { - for (short i = 0; i < values.length; i++) { - short actual = Short.decode(covariate.formatKey(values[i][0])); - int expected = init + (increment * i); - Assert.assertEquals(actual, expected); - } - } - - @Test(enabled = true, expectedExceptions={UserException.class}) - public void testMoreThanMaxCycleFails() { - int readLength = RAC.MAXIMUM_CYCLE_VALUE + 1; - GATKSAMRecord read = ReadUtils.createRandomRead(readLength); - read.setReadPairedFlag(true); - read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID")); - read.getReadGroup().setPlatform("illumina"); - - ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1); - covariate.recordValues(read, readCovariates); - } - - @Test(enabled = true) - public void testMaxCyclePasses() { - int readLength = RAC.MAXIMUM_CYCLE_VALUE; - GATKSAMRecord read = ReadUtils.createRandomRead(readLength); - read.setReadPairedFlag(true); - read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID")); - read.getReadGroup().setPlatform("illumina"); - - ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1); - covariate.recordValues(read, readCovariates); - } -} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/QualQuantizerUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/QualQuantizerUnitTest.java deleted file mode 100644 index fe5359ab0..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/QualQuantizerUnitTest.java +++ /dev/null @@ -1,195 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration; - - -// the imports for unit testing. - - -import org.broadinstitute.gatk.utils.BaseTest; -import org.broadinstitute.gatk.utils.QualityUtils; -import org.broadinstitute.gatk.utils.Utils; -import org.testng.Assert; -import org.testng.annotations.BeforeSuite; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - - -public class QualQuantizerUnitTest extends BaseTest { - @BeforeSuite - public void before() { - - } - - // -------------------------------------------------------------------------------- - // - // merge case Provider - // - // -------------------------------------------------------------------------------- - - private class QualIntervalTestProvider extends TestDataProvider { - final QualQuantizer.QualInterval left, right; - int exError, exTotal, exQual; - double exErrorRate; - - private QualIntervalTestProvider(int leftE, int leftN, int rightE, int rightN, int exError, int exTotal) { - super(QualIntervalTestProvider.class); - - QualQuantizer qq = new QualQuantizer(0); - left = qq.new QualInterval(10, 10, leftN, leftE, 0); - right = qq.new QualInterval(11, 11, rightN, rightE, 0); - - this.exError = exError; - this.exTotal = exTotal; - this.exErrorRate = (leftE + rightE + 1) / (1.0 * (leftN + rightN + 1)); - this.exQual = QualityUtils.errorProbToQual(this.exErrorRate); - } - } - - @DataProvider(name = "QualIntervalTestProvider") - public Object[][] makeQualIntervalTestProvider() { - new QualIntervalTestProvider(10, 100, 10, 1000, 20, 1100); - new QualIntervalTestProvider(0, 100, 10, 900, 10, 1000); - new QualIntervalTestProvider(10, 900, 0, 100, 10, 1000); - new QualIntervalTestProvider(0, 0, 10, 100, 10, 100); - new QualIntervalTestProvider(1, 10, 9, 90, 10, 100); - new QualIntervalTestProvider(1, 10, 9, 100000, 10, 100010); - new QualIntervalTestProvider(1, 10, 9, 1000000, 10,1000010); - - return QualIntervalTestProvider.getTests(QualIntervalTestProvider.class); - } - - @Test(dataProvider = "QualIntervalTestProvider") - public void testQualInterval(QualIntervalTestProvider cfg) { - QualQuantizer.QualInterval merged = cfg.left.merge(cfg.right); - Assert.assertEquals(merged.nErrors, cfg.exError); - Assert.assertEquals(merged.nObservations, cfg.exTotal); - Assert.assertEquals(merged.getErrorRate(), cfg.exErrorRate); - Assert.assertEquals(merged.getQual(), cfg.exQual); - } - - @Test - public void testMinInterestingQual() { - for ( int q = 0; q < 15; q++ ) { - for ( int minQual = 0; minQual <= 10; minQual ++ ) { - QualQuantizer qq = new QualQuantizer(minQual); - QualQuantizer.QualInterval left = qq.new QualInterval(q, q, 100, 10, 0); - QualQuantizer.QualInterval right = qq.new QualInterval(q+1, q+1, 1000, 100, 0); - - QualQuantizer.QualInterval merged = left.merge(right); - boolean shouldBeFree = q+1 <= minQual; - if ( shouldBeFree ) - Assert.assertEquals(merged.getPenalty(), 0.0); - else - Assert.assertTrue(merged.getPenalty() > 0.0); - } - } - } - - - // -------------------------------------------------------------------------------- - // - // High-level case Provider - // - // -------------------------------------------------------------------------------- - - private class QuantizerTestProvider extends TestDataProvider { - final List nObservationsPerQual = new ArrayList(); - final int nLevels; - final List expectedMap; - - private QuantizerTestProvider(final List nObservationsPerQual, final int nLevels, final List expectedMap) { - super(QuantizerTestProvider.class); - - for ( int x : nObservationsPerQual ) - this.nObservationsPerQual.add((long)x); - this.nLevels = nLevels; - this.expectedMap = expectedMap; - } - - @Override - public String toString() { - return String.format("QQTest nLevels=%d nObs=[%s] map=[%s]", - nLevels, Utils.join(",", nObservationsPerQual), Utils.join(",", expectedMap)); - } - } - - @DataProvider(name = "QuantizerTestProvider") - public Object[][] makeQuantizerTestProvider() { - List allQ2 = Arrays.asList(0, 0, 1000, 0, 0); - - new QuantizerTestProvider(allQ2, 5, Arrays.asList(0, 1, 2, 3, 4)); - new QuantizerTestProvider(allQ2, 1, Arrays.asList(2, 2, 2, 2, 2)); - - new QuantizerTestProvider(Arrays.asList(0, 0, 1000, 0, 1000), 2, Arrays.asList(2, 2, 2, 2, 4)); - new QuantizerTestProvider(Arrays.asList(0, 0, 1000, 1, 1000), 2, Arrays.asList(2, 2, 2, 4, 4)); - new QuantizerTestProvider(Arrays.asList(0, 0, 1000, 10, 1000), 2, Arrays.asList(2, 2, 2, 2, 4)); - - return QuantizerTestProvider.getTests(QuantizerTestProvider.class); - } - - @Test(dataProvider = "QuantizerTestProvider", enabled = true) - public void testQuantizer(QuantizerTestProvider cfg) { - QualQuantizer qq = new QualQuantizer(cfg.nObservationsPerQual, cfg.nLevels, 0); - logger.warn("cfg: " + cfg); - for ( int i = 0; i < cfg.expectedMap.size(); i++) { - int expected = cfg.expectedMap.get(i); - int observed = qq.originalToQuantizedMap.get(i); - //logger.warn(String.format(" qq map: %s : %d => %d", i, expected, observed)); - Assert.assertEquals(observed, expected); - } - } -} \ No newline at end of file diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/ReadCovariatesUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/ReadCovariatesUnitTest.java deleted file mode 100644 index 062abf767..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/ReadCovariatesUnitTest.java +++ /dev/null @@ -1,148 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration; - -import org.broadinstitute.gatk.engine.recalibration.covariates.*; -import org.broadinstitute.gatk.utils.Utils; -import org.broadinstitute.gatk.utils.recalibration.EventType; -import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; -import org.broadinstitute.gatk.utils.sam.ReadUtils; -import org.testng.Assert; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - -import java.util.Random; - -/** - * @author carneiro - * @since 4/21/12 - */ -public class ReadCovariatesUnitTest { - - @BeforeMethod - public void init() { - ReadCovariates.clearKeysCache(); - } - - @Test(enabled = false) - public void testCovariateGeneration() { - final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); - final String RGID = "id"; - - ReadGroupCovariate rgCov = new ReadGroupCovariate(); - QualityScoreCovariate qsCov = new QualityScoreCovariate(); - ContextCovariate coCov = new ContextCovariate(); - CycleCovariate cyCov = new CycleCovariate(); - - rgCov.initialize(RAC); - qsCov.initialize(RAC); - coCov.initialize(RAC); - cyCov.initialize(RAC); - - Covariate[] requestedCovariates = new Covariate[4]; - requestedCovariates[0] = rgCov; - requestedCovariates[1] = qsCov; - requestedCovariates[2] = coCov; - requestedCovariates[3] = cyCov; - - final int NUM_READS = 100; - final Random rnd = Utils.getRandomGenerator(); - - final String[] readGroups = {"RG1", "RG2", "RGbla"}; - for (int idx = 0; idx < NUM_READS; idx++) { - for (final String rgs : readGroups) { - final int length = 10 + rnd.nextInt(100); // random read length, at least 10 bp long - final GATKSAMRecord read = ReadUtils.createRandomRead(length, false); - final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(rgs); - rg.setPlatform("illumina"); - read.setReadGroup(rg); - read.setReadNegativeStrandFlag(rnd.nextBoolean()); - final byte[] mQuals = read.getBaseQualities(EventType.BASE_SUBSTITUTION); - final byte[] iQuals = read.getBaseQualities(EventType.BASE_INSERTION); - final byte[] dQuals = read.getBaseQualities(EventType.BASE_DELETION); - ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates); - - // check that the length is correct - Assert.assertEquals(rc.getMismatchesKeySet().length, length); - Assert.assertEquals(rc.getInsertionsKeySet().length, length); - Assert.assertEquals(rc.getDeletionsKeySet().length, length); - - for (int i = 0; i < length; i++) { - // check that read group is always the same - Assert.assertEquals(rgCov.formatKey(rc.getMismatchesKeySet(i)[0]), rgs); - Assert.assertEquals(rgCov.formatKey(rc.getInsertionsKeySet(i)[0]), rgs); - Assert.assertEquals(rgCov.formatKey(rc.getDeletionsKeySet(i)[0]), rgs); - - // check quality score - Assert.assertEquals(qsCov.formatKey(rc.getMismatchesKeySet(i)[1]), "" + mQuals[i]); - Assert.assertEquals(qsCov.formatKey(rc.getInsertionsKeySet(i)[1]), "" + iQuals[i]); - Assert.assertEquals(qsCov.formatKey(rc.getDeletionsKeySet(i)[1]), "" + dQuals[i]); - - // check context - Assert.assertEquals(coCov.formatKey(rc.getMismatchesKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.MISMATCHES_CONTEXT_SIZE)); - Assert.assertEquals(coCov.formatKey(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE)); - Assert.assertEquals(coCov.formatKey(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE)); - - // check cycle - Assert.assertEquals(cyCov.formatKey(rc.getMismatchesKeySet(i)[3]), "" + (i+1)); - Assert.assertEquals(cyCov.formatKey(rc.getInsertionsKeySet(i)[3]), "" + (i+1)); - Assert.assertEquals(cyCov.formatKey(rc.getDeletionsKeySet(i)[3]), "" + (i+1)); - } - - } - - } - - } - -} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/ReadGroupCovariateUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/ReadGroupCovariateUnitTest.java deleted file mode 100644 index a4029e297..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/ReadGroupCovariateUnitTest.java +++ /dev/null @@ -1,125 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration; - -import org.broadinstitute.gatk.engine.recalibration.covariates.ReadGroupCovariate; -import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; -import org.broadinstitute.gatk.utils.sam.ReadUtils; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - -/** - * @author Mauricio Carneiro - * @since 3/1/12 - */ -public class ReadGroupCovariateUnitTest { - ReadGroupCovariate covariate; - RecalibrationArgumentCollection RAC; - - @BeforeClass - public void init() { - RAC = new RecalibrationArgumentCollection(); - covariate = new ReadGroupCovariate(); - covariate.initialize(RAC); - } - - @BeforeMethod - public void initCache() { - ReadCovariates.clearKeysCache(); - } - - @Test(enabled = true) - public void testSingleRecord() { - final String expected = "SAMPLE.1"; - GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("MY.ID"); - rg.setPlatformUnit(expected); - runTest(rg, expected, covariate); - } - - @Test(enabled = true) - public void testMissingPlatformUnit() { - final String expected = "MY.7"; - GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(expected); - runTest(rg, expected, covariate); - } - - @Test(enabled = true) - public void testForceReadgroup() { - final RecalibrationArgumentCollection forcedRAC = new RecalibrationArgumentCollection(); - forcedRAC.FORCE_READGROUP = "FOO"; - final ReadGroupCovariate forcedCovariate = new ReadGroupCovariate(); - forcedCovariate.initialize(forcedRAC); - - final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("NOT_FOO"); - runTest(rg, "FOO", forcedCovariate); - } - - private static void runTest(final GATKSAMReadGroupRecord rg, final String expected, final ReadGroupCovariate covariate) { - GATKSAMRecord read = ReadUtils.createRandomRead(10); - read.setReadGroup(rg); - ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1); - covariate.recordValues(read, readCovariates); - verifyCovariateArray(readCovariates.getMismatchesKeySet(), expected, covariate); - - } - - private static void verifyCovariateArray(final int[][] values, final String expected, final ReadGroupCovariate covariate) { - for (int[] value : values) { - String actual = covariate.formatKey(value[0]); - Assert.assertEquals(actual, expected); - } - } - -} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalUtilsUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalUtilsUnitTest.java deleted file mode 100644 index 48270cd8f..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalUtilsUnitTest.java +++ /dev/null @@ -1,178 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration; - -import org.broadinstitute.gatk.utils.BaseTest; -import org.broadinstitute.gatk.utils.Utils; -import org.broadinstitute.gatk.utils.collections.NestedIntegerArray; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedList; -import java.util.List; - -public final class RecalUtilsUnitTest extends BaseTest { - private class Row { - int rg, qual, ne, no; - - private Row(final Row copy) { - this(copy.rg, copy.qual, copy.ne, copy.no); - } - - private Row(int rg, int qual, int ne, int no) { - this.rg = rg; - this.qual = qual; - this.ne = ne; - this.no = no; - } - - @Override - public String toString() { - return "Row{" + - "" + rg + - ", " + qual + - ", " + ne + - ", " + no + - '}'; - } - } - - @DataProvider(name = "CombineTablesProvider") - public Object[][] createCombineTablesProvider() { - List tests = new ArrayList(); - - final List rows = new ArrayList(); - for ( final int rg : Arrays.asList(0, 1) ) { - for ( final int qual : Arrays.asList(0, 1) ) { - rows.add(new Row(rg, qual, 1, 10)); - } - } - - logger.warn("Number of rows " + rows.size()); - - List> permutations = new LinkedList>(); - permutations.addAll(Utils.makePermutations(rows, 1, false)); - permutations.addAll(Utils.makePermutations(rows, 2, false)); - permutations.addAll(Utils.makePermutations(rows, 3, false)); - - // adding 1 row to 2 - for ( final List table1 : permutations ) { - for ( final Row table2 : rows ) { - tests.add(new Object[]{table1, Arrays.asList(table2)}); - } - } - - // adding 2 rows to 1 - for ( final List table1 : permutations ) { - for ( final Row table2 : rows ) { - tests.add(new Object[]{Arrays.asList(table2), table1}); - } - } - - for ( final List table1 : permutations ) { - for ( final List table2 : permutations ) { - tests.add(new Object[]{table1, table2}); - } - } - - return tests.toArray(new Object[][]{}); - } - - @Test(dataProvider = "CombineTablesProvider") - public void testCombineTables(final List table1, final List table2) { - final NestedIntegerArray nia1 = makeTable(table1); - final NestedIntegerArray nia2 = makeTable(table2); - final List expectedRows = makeExpected(table1, table2); - final NestedIntegerArray expected = makeTable(expectedRows); - RecalUtils.combineTables(nia1, nia2); - - Assert.assertEquals(nia1.getDimensions(), expected.getDimensions()); - Assert.assertEquals(nia1.getAllValues().size(), expected.getAllValues().size()); - - for ( final NestedIntegerArray.Leaf leaf : expected.getAllLeaves() ) { - final RecalDatum actual = nia1.get(leaf.keys); - Assert.assertEquals(actual.getNumMismatches(), leaf.value.getNumMismatches()); - Assert.assertEquals(actual.getNumObservations(), leaf.value.getNumObservations()); - } - } - - public List makeExpected(final List table1, final List table2) { - final List combined = new LinkedList(); - for ( final Row t1 : table1 ) combined.add(new Row(t1)); - for ( final Row t2 : table2 ) { - combine(combined, t2); - } - return combined; - } - - private void combine(final List combined, final Row row) { - for ( final Row c : combined ) { - if ( c.rg == row.rg && c.qual == row.qual ) { - c.ne += row.ne; - c.no += row.no; - return; - } - } - - combined.add(new Row(row)); - } - - public NestedIntegerArray makeTable(final List rows) { - final NestedIntegerArray x = new NestedIntegerArray(3, 3); - for ( final Row r : rows ) - x.put(new RecalDatum((long)r.no, (double)r.ne, (byte)10), r.rg, r.qual); - return x; - } -} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationReportUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationReportUnitTest.java deleted file mode 100644 index 334065caa..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationReportUnitTest.java +++ /dev/null @@ -1,176 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration; - -import org.broadinstitute.gatk.engine.recalibration.covariates.*; -import org.broadinstitute.gatk.utils.recalibration.EventType; -import org.broadinstitute.gatk.utils.QualityUtils; -import org.broadinstitute.gatk.utils.collections.NestedIntegerArray; -import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; -import org.broadinstitute.gatk.utils.sam.ReadUtils; -import org.testng.Assert; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - -import java.util.*; - -/** - * @author carneiro - * @since 4/21/12 - */ -public class RecalibrationReportUnitTest { - @BeforeMethod - public void init() { - ReadCovariates.clearKeysCache(); - } - - private static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) { - final Random random = new Random(); - final int nObservations = random.nextInt(maxObservations); - final int nErrors = Math.min(random.nextInt(maxErrors), nObservations); - final int qual = random.nextInt(QualityUtils.MAX_SAM_QUAL_SCORE); - return new RecalDatum((long)nObservations, (double)nErrors, (byte)qual); - } - - @Test - public void testOutput() { - final int length = 100; - - List quals = new ArrayList(QualityUtils.MAX_SAM_QUAL_SCORE + 1); - List counts = new ArrayList(QualityUtils.MAX_SAM_QUAL_SCORE + 1); - - for (int i = 0; i<= QualityUtils.MAX_SAM_QUAL_SCORE; i++) { - quals.add((byte) i); - counts.add(1L); - } - - final QuantizationInfo quantizationInfo = new QuantizationInfo(quals, counts); - final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); - - quantizationInfo.noQuantization(); - final List requiredCovariates = new LinkedList(); - final List optionalCovariates = new LinkedList(); - - final ReadGroupCovariate rgCovariate = new ReadGroupCovariate(); - rgCovariate.initialize(RAC); - requiredCovariates.add(rgCovariate); - - final QualityScoreCovariate qsCovariate = new QualityScoreCovariate(); - qsCovariate.initialize(RAC); - requiredCovariates.add(qsCovariate); - - final ContextCovariate cxCovariate = new ContextCovariate(); - cxCovariate.initialize(RAC); - optionalCovariates.add(cxCovariate); - final CycleCovariate cyCovariate = new CycleCovariate(); - cyCovariate.initialize(RAC); - optionalCovariates.add(cyCovariate); - - final Covariate[] requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()]; - int covariateIndex = 0; - for (final Covariate cov : requiredCovariates) - requestedCovariates[covariateIndex++] = cov; - for (final Covariate cov : optionalCovariates) - requestedCovariates[covariateIndex++] = cov; - - final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("id"); - rg.setPlatform("illumina"); - final GATKSAMRecord read = ReadUtils.createRandomRead(length, false); - read.setReadGroup(rg); - final byte [] readQuals = new byte[length]; - for (int i = 0; i < length; i++) - readQuals[i] = 20; - read.setBaseQualities(readQuals); - - final int expectedKeys = expectedNumberOfKeys(length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE); - int nKeys = 0; // keep track of how many keys were produced - final ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates); - - final RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates); - final NestedIntegerArray rgTable = recalibrationTables.getReadGroupTable(); - final NestedIntegerArray qualTable = recalibrationTables.getQualityScoreTable(); - - for (int offset = 0; offset < length; offset++) { - - for (EventType errorMode : EventType.values()) { - - final int[] covariates = rc.getKeySet(offset, errorMode); - final int randomMax = errorMode == EventType.BASE_SUBSTITUTION ? 10000 : 100000; - - rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.ordinal()); - qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.ordinal()); - nKeys += 2; - for (int j = 0; j < optionalCovariates.size(); j++) { - final NestedIntegerArray covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + j); - final int covValue = covariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + j]; - if ( covValue >= 0 ) { - covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.ordinal()); - nKeys++; - } - } - } - } - Assert.assertEquals(nKeys, expectedKeys); - } - - private static int expectedNumberOfKeys (int readLength, int indelContextSize, int mismatchesContextSize) { - final int numCovariates = 4; - final int numTables = 3; - final int mismatchContextPadding = mismatchesContextSize - 1; - final int indelContextPadding = 2 * (indelContextSize - 1); - final int indelCyclePadding = 2 * (2 * CycleCovariate.CUSHION_FOR_INDELS); - - return (numCovariates * numTables * readLength) - mismatchContextPadding - indelContextPadding - indelCyclePadding; - } - -} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTablesUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTablesUnitTest.java deleted file mode 100644 index 32055c5cc..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTablesUnitTest.java +++ /dev/null @@ -1,203 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration; - -import org.broadinstitute.gatk.engine.recalibration.covariates.Covariate; -import org.broadinstitute.gatk.utils.BaseTest; -import org.broadinstitute.gatk.utils.collections.NestedIntegerArray; -import org.broadinstitute.gatk.utils.recalibration.EventType; -import org.testng.Assert; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - -import java.util.Arrays; -import java.util.List; - -public final class RecalibrationTablesUnitTest extends BaseTest { - private RecalibrationTables tables; - private Covariate[] covariates; - private int numReadGroups = 6; - final byte qualByte = 1; - final List combineStates = Arrays.asList(0, 1, 2); - - @BeforeMethod - private void makeTables() { - covariates = RecalibrationTestUtils.makeInitializedStandardCovariates(); - tables = new RecalibrationTables(covariates, numReadGroups); - fillTable(tables); - } - - private void fillTable(final RecalibrationTables tables) { - for ( int iterations = 0; iterations < 10; iterations++ ) { - for ( final EventType et : EventType.values() ) { - for ( final int rg : combineStates) { - final double error = rg % 2 == 0 ? 1 : 0; - RecalUtils.incrementDatumOrPutIfNecessary(tables.getReadGroupTable(), qualByte, error, rg, et.ordinal()); - for ( final int qual : combineStates) { - RecalUtils.incrementDatumOrPutIfNecessary(tables.getQualityScoreTable(), qualByte, error, rg, qual, et.ordinal()); - for ( final int cycle : combineStates) - RecalUtils.incrementDatumOrPutIfNecessary(tables.getTable(2), qualByte, error, rg, qual, cycle, et.ordinal()); - for ( final int context : combineStates) - RecalUtils.incrementDatumOrPutIfNecessary(tables.getTable(3), qualByte, error, rg, qual, context, et.ordinal()); - } - } - } - } - } - - @Test - public void basicTest() { - final Covariate qualCov = covariates[1]; - final Covariate cycleCov = covariates[2]; - final Covariate contextCov = covariates[3]; - - Assert.assertEquals(tables.numTables(), covariates.length); - - Assert.assertNotNull(tables.getReadGroupTable()); - Assert.assertEquals(tables.getReadGroupTable(), tables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal())); - testDimensions(tables.getReadGroupTable(), numReadGroups); - - Assert.assertNotNull(tables.getQualityScoreTable()); - Assert.assertEquals(tables.getQualityScoreTable(), tables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE.ordinal())); - testDimensions(tables.getQualityScoreTable(), numReadGroups, qualCov.maximumKeyValue() + 1); - - Assert.assertNotNull(tables.getTable(2)); - testDimensions(tables.getTable(2), numReadGroups, qualCov.maximumKeyValue() + 1, cycleCov.maximumKeyValue() + 1); - - Assert.assertNotNull(tables.getTable(3)); - testDimensions(tables.getTable(3), numReadGroups, qualCov.maximumKeyValue() + 1, contextCov.maximumKeyValue() + 1); - } - - private void testDimensions(final NestedIntegerArray table, final int ... dimensions) { - final int[] dim = new int[dimensions.length+1]; - System.arraycopy(dimensions, 0, dim, 0, dimensions.length); - dim[dimensions.length] = EventType.values().length; - Assert.assertEquals(table.getDimensions().length, dim.length); - - for ( int i = 0; i < dim.length; i++ ) { - Assert.assertEquals(table.getDimensions()[i], dim[i], "Table dimensions not expected at dim " + i); - } - } - - @Test - public void basicMakeQualityScoreTable() { - final Covariate qualCov = covariates[1]; - final NestedIntegerArray copy = tables.makeQualityScoreTable(); - testDimensions(copy, numReadGroups, qualCov.maximumKeyValue()+1); - Assert.assertEquals(copy.getAllValues().size(), 0); - } - - @Test - public void testCombine1() { - final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups); - fillTable(merged); - - merged.combine(tables); - - for ( int i = 0; i < tables.numTables(); i++ ) { - NestedIntegerArray table = tables.getTable(i); - NestedIntegerArray mergedTable = merged.getTable(i); - - Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size()); - for ( final NestedIntegerArray.Leaf leaf : table.getAllLeaves() ) { - final RecalDatum mergedValue = mergedTable.get(leaf.keys); - Assert.assertNotNull(mergedValue); - Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations() * 2); - Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches() * 2); - } - } - } - - @Test - public void testCombineEmptyOther() { - final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups); - - merged.combine(tables); - - for ( int i = 0; i < tables.numTables(); i++ ) { - NestedIntegerArray table = tables.getTable(i); - NestedIntegerArray mergedTable = merged.getTable(i); - - Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size()); - for ( final NestedIntegerArray.Leaf leaf : table.getAllLeaves() ) { - final RecalDatum mergedValue = mergedTable.get(leaf.keys); - Assert.assertNotNull(mergedValue); - Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations()); - Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches()); - } - } - } - - @Test - public void testCombinePartial() { - final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups); - for ( final int rg : combineStates) { - RecalUtils.incrementDatumOrPutIfNecessary(merged.getTable(3), qualByte, 1, rg, 0, 0, 0); - } - - merged.combine(tables); - for ( int i = 0; i < tables.numTables(); i++ ) { - NestedIntegerArray table = tables.getTable(i); - NestedIntegerArray mergedTable = merged.getTable(i); - - Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size()); - for ( final NestedIntegerArray.Leaf leaf : table.getAllLeaves() ) { - final RecalDatum mergedValue = mergedTable.get(leaf.keys); - Assert.assertNotNull(mergedValue); - - final int delta = i == 3 && leaf.keys[1] == 0 && leaf.keys[2] == 0 && leaf.keys[3] == 0 ? 1 : 0; - Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations() + delta); - Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches() + delta); - } - } - } -} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTestUtils.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTestUtils.java deleted file mode 100644 index a47c11c4e..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTestUtils.java +++ /dev/null @@ -1,74 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.engine.recalibration; - -import org.broadinstitute.gatk.engine.recalibration.covariates.*; - -/** - * Created with IntelliJ IDEA. - * User: depristo - * Date: 12/23/12 - * Time: 1:06 PM - * To change this template use File | Settings | File Templates. - */ -public class RecalibrationTestUtils { - public static Covariate[] makeInitializedStandardCovariates() { - final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); - final Covariate[] covariates = new Covariate[4]; - covariates[0] = new ReadGroupCovariate(); - covariates[1] = new QualityScoreCovariate(); - covariates[2] = new ContextCovariate(); - covariates[3] = new CycleCovariate(); - for ( Covariate cov : covariates ) cov.initialize(RAC); - return covariates; - } -} diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/BQSRGatherer.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/BQSRGatherer.java new file mode 100644 index 000000000..134106db7 --- /dev/null +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/BQSRGatherer.java @@ -0,0 +1,112 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration; + +import org.apache.commons.collections.CollectionUtils; +import org.apache.log4j.Logger; +import org.broadinstitute.gatk.utils.commandline.Gatherer; +import org.broadinstitute.gatk.utils.report.GATKReport; +import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; +import org.broadinstitute.gatk.utils.exceptions.UserException; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; +import java.util.*; + +/** + * User: carneiro + * Date: 3/29/11 + */ + + +public class BQSRGatherer extends Gatherer { + + private static final Logger logger = Logger.getLogger(BQSRGatherer.class); + private static final String EMPTY_INPUT_LIST = "list of inputs files is empty or there is no usable data in any input file"; + private static final String MISSING_OUTPUT_FILE = "missing output file name"; + private static final String MISSING_READ_GROUPS = "Missing read group(s)"; + + @Override + public void gather(final List inputs, final File output) { + final PrintStream outputFile; + try { + outputFile = new PrintStream(output); + } catch(FileNotFoundException e) { + throw new UserException.MissingArgument("output", MISSING_OUTPUT_FILE); + } + final GATKReport report = gatherReport(inputs); + report.print(outputFile); + } + + /** + * Gathers the input recalibration reports into a single report. + * + * @param inputs Input recalibration GATK reports + * @return gathered recalibration GATK report + */ + public static GATKReport gatherReport(final List inputs) { + final SortedSet allReadGroups = new TreeSet(); + final LinkedHashMap> inputReadGroups = new LinkedHashMap>(); + + // Get the read groups from each input report + for (final File input : inputs) { + final Set readGroups = RecalibrationReport.getReadGroups(input); + inputReadGroups.put(input, readGroups); + allReadGroups.addAll(readGroups); + } + + // Log the read groups that are missing from specific inputs + for (Map.Entry> entry: inputReadGroups.entrySet()) { + final File input = entry.getKey(); + final Set readGroups = entry.getValue(); + if (allReadGroups.size() != readGroups.size()) { + // Since this is not completely unexpected, more than debug, but less than a proper warning. + logger.info(MISSING_READ_GROUPS + ": " + input.getAbsolutePath()); + for (final Object readGroup: CollectionUtils.subtract(allReadGroups, readGroups)) { + logger.info(" " + readGroup); + } + } + } + + RecalibrationReport generalReport = null; + for (File input : inputs) { + final RecalibrationReport inputReport = new RecalibrationReport(input, allReadGroups); + if( inputReport.isEmpty() ) { continue; } + + if (generalReport == null) + generalReport = inputReport; + else + generalReport.combine(inputReport); + } + if (generalReport == null) + throw new ReviewedGATKException(EMPTY_INPUT_LIST); + + generalReport.calculateQuantizedQualities(); + + return generalReport.createGATKReport(); + } +} diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/BQSRReadTransformer.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/BQSRReadTransformer.java new file mode 100644 index 000000000..9aaae3b69 --- /dev/null +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/BQSRReadTransformer.java @@ -0,0 +1,78 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration; + +import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; +import org.broadinstitute.gatk.engine.WalkerManager; +import org.broadinstitute.gatk.engine.iterators.ReadTransformer; +import org.broadinstitute.gatk.engine.walkers.Walker; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; + +/** + * A ReadTransformer that applies BQSR on the fly to reads + * + * User: rpoplin + * Date: 2/13/12 + */ +public class BQSRReadTransformer extends ReadTransformer { + private boolean enabled; + private BaseRecalibration bqsr = null; + + @Override + public OrderingConstraint getOrderingConstraint() { return OrderingConstraint.MUST_BE_FIRST; } + + @Override + public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) { + this.enabled = engine.hasBQSRArgumentSet(); + if ( enabled ) { + // TODO -- See important note below about applying BQSR to a reduced BAM file: + // If it is important to make sure that BQSR is not applied (as opposed to having the covariates computed) against a reduced bam file, + // we need to figure out how to make this work. The problem is that the ReadTransformers are initialized before the ReadDataSource + // inside the GenomeAnalysisEngine, so we generate a NPE when trying to retrieve the SAMFileHeaders. Ultimately, I don't think this is + // a necessary check anyways since we disallow running BaseRecalibrator on reduced bams (so we can't generate the recal tables to use here). + // Although we could add this check to the apply() method below, it's kind of ugly and inefficient. + // The call here would be: RecalUtils.checkForInvalidRecalBams(engine.getSAMFileHeaders(), engine.getArguments().ALLOW_BQSR_ON_REDUCED_BAMS); + final BQSRArgumentSet args = engine.getBQSRArgumentSet(); + this.bqsr = new BaseRecalibration(args.getRecalFile(), args.getQuantizationLevels(), args.shouldDisableIndelQuals(), args.getPreserveQscoresLessThan(), args.shouldEmitOriginalQuals(), args.getGlobalQScorePrior(), args.getStaticQuantizedQuals(), args.getRoundDown()); + } + final BQSRMode mode = WalkerManager.getWalkerAnnotation(walker, BQSRMode.class); + return mode.ApplicationTime(); + } + + @Override + public boolean enabled() { + return enabled; + } + + /** + * initialize a new BQSRReadTransformer that applies BQSR on the fly to incoming reads. + */ + @Override + public GATKSAMRecord apply(GATKSAMRecord read) { + bqsr.recalibrateRead(read); + return read; + } +} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/BaseRecalibration.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/BaseRecalibration.java similarity index 57% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/BaseRecalibration.java rename to public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/BaseRecalibration.java index a40320421..f5c4381c1 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/BaseRecalibration.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/BaseRecalibration.java @@ -1,52 +1,26 @@ /* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. * Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. * -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: * -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. * -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.gatk.engine.recalibration; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/QualQuantizer.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/QualQuantizer.java similarity index 66% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/QualQuantizer.java rename to public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/QualQuantizer.java index 130776437..8799c2d7e 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/QualQuantizer.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/QualQuantizer.java @@ -1,52 +1,26 @@ /* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. * Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. * -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: * -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. * -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.gatk.engine.recalibration; diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/QuantizationInfo.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/QuantizationInfo.java new file mode 100644 index 000000000..77eace868 --- /dev/null +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/QuantizationInfo.java @@ -0,0 +1,125 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration; + +import org.broadinstitute.gatk.utils.report.GATKReportTable; +import org.broadinstitute.gatk.utils.MathUtils; +import org.broadinstitute.gatk.utils.QualityUtils; +import org.broadinstitute.gatk.utils.collections.NestedIntegerArray; + +import java.util.Arrays; +import java.util.List; + +/** + * Class that encapsulates the information necessary for quality score quantization for BQSR + * + * @author carneiro + * @since 3/26/12 + */ +public class QuantizationInfo { + private List quantizedQuals; + private List empiricalQualCounts; + private int quantizationLevels; + + private QuantizationInfo(List quantizedQuals, List empiricalQualCounts, int quantizationLevels) { + this.quantizedQuals = quantizedQuals; + this.empiricalQualCounts = empiricalQualCounts; + this.quantizationLevels = quantizationLevels; + } + + public QuantizationInfo(List quantizedQuals, List empiricalQualCounts) { + this(quantizedQuals, empiricalQualCounts, calculateQuantizationLevels(quantizedQuals)); + } + + public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) { + final Long [] qualHistogram = new Long[QualityUtils.MAX_SAM_QUAL_SCORE +1]; // create a histogram with the empirical quality distribution + for (int i = 0; i < qualHistogram.length; i++) + qualHistogram[i] = 0L; + + final NestedIntegerArray qualTable = recalibrationTables.getQualityScoreTable(); // get the quality score table + + for (final RecalDatum value : qualTable.getAllValues()) { + final RecalDatum datum = value; + final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL ) + qualHistogram[empiricalQual] += (long) datum.getNumObservations(); // add the number of observations for every key + } + empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities + quantizeQualityScores(quantizationLevels); + + this.quantizationLevels = quantizationLevels; + } + + + public void quantizeQualityScores(int nLevels) { + QualQuantizer quantizer = new QualQuantizer(empiricalQualCounts, nLevels, QualityUtils.MIN_USABLE_Q_SCORE); // quantize the qualities to the desired number of levels + quantizedQuals = quantizer.getOriginalToQuantizedMap(); // map with the original to quantized qual map (using the standard number of levels in the RAC) + } + + public void noQuantization() { + this.quantizationLevels = QualityUtils.MAX_SAM_QUAL_SCORE; + for (int i = 0; i < this.quantizationLevels; i++) + quantizedQuals.set(i, (byte) i); + } + + public List getQuantizedQuals() { + return quantizedQuals; + } + + public int getQuantizationLevels() { + return quantizationLevels; + } + + public GATKReportTable generateReportTable(boolean sortByCols) { + GATKReportTable quantizedTable; + if(sortByCols) { + quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN); + } else { + quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3); + } + quantizedTable.addColumn(RecalUtils.QUALITY_SCORE_COLUMN_NAME); + quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME); + quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME); + + for (int qual = 0; qual <= QualityUtils.MAX_SAM_QUAL_SCORE; qual++) { + quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual); + quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual)); + quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual)); + } + return quantizedTable; + } + + private static int calculateQuantizationLevels(List quantizedQuals) { + byte lastByte = -1; + int quantizationLevels = 0; + for (byte q : quantizedQuals) { + if (q != lastByte) { + quantizationLevels++; + lastByte = q; + } + } + return quantizationLevels; + } +} diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/ReadCovariates.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/ReadCovariates.java new file mode 100644 index 000000000..6e866f973 --- /dev/null +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/ReadCovariates.java @@ -0,0 +1,150 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration; + +import org.apache.log4j.Logger; +import org.broadinstitute.gatk.utils.LRUCache; +import org.broadinstitute.gatk.utils.recalibration.EventType; + +/** + * The object temporarily held by a read that describes all of it's covariates. + * + * In essence, this is an array of CovariateValues, but it also has some functionality to deal with the optimizations of the NestedHashMap + * + * @author Mauricio Carneiro + * @since 2/8/12 + */ +public class ReadCovariates { + private final static Logger logger = Logger.getLogger(ReadCovariates.class); + + /** + * How big should we let the LRU cache grow + */ + private static final int LRU_CACHE_SIZE = 500; + + /** + * Use an LRU cache to keep cache of keys (int[][][]) arrays for each read length we've seen. + * The cache allows us to avoid the expense of recreating these arrays for every read. The LRU + * keeps the total number of cached arrays to less than LRU_CACHE_SIZE. + * + * This is a thread local variable, so the total memory required may grow to N_THREADS x LRU_CACHE_SIZE + */ + private final static ThreadLocal> keysCache = new ThreadLocal>() { + @Override protected LRUCache initialValue() { + return new LRUCache(LRU_CACHE_SIZE); + } + }; + + /** + * The keys cache is only valid for a single covariate count. Normally this will remain constant for the analysis. + * If running multiple analyses (or the unit test suite), it's necessary to clear the cache. + */ + public static void clearKeysCache() { + keysCache.remove(); + } + + /** + * Our keys, indexed by event type x read length x covariate + */ + private final int[][][] keys; + + /** + * The index of the current covariate, used by addCovariate + */ + private int currentCovariateIndex = 0; + + public ReadCovariates(final int readLength, final int numberOfCovariates) { + final LRUCache cache = keysCache.get(); + final int[][][] cachedKeys = cache.get(readLength); + if ( cachedKeys == null ) { + // There's no cached value for read length so we need to create a new int[][][] array + if ( logger.isDebugEnabled() ) logger.debug("Keys cache miss for length " + readLength + " cache size " + cache.size()); + keys = new int[EventType.values().length][readLength][numberOfCovariates]; + cache.put(readLength, keys); + } else { + keys = cachedKeys; + } + } + + public void setCovariateIndex(final int index) { + currentCovariateIndex = index; + } + + /** + * Update the keys for mismatch, insertion, and deletion for the current covariate at read offset + * + * NOTE: no checks are performed on the number of covariates, for performance reasons. If the count increases + * after the keysCache has been accessed, this method will throw an ArrayIndexOutOfBoundsException. This currently + * only occurs in the testing harness, and we don't anticipate that it will become a part of normal runs. + * + * @param mismatch the mismatch key value + * @param insertion the insertion key value + * @param deletion the deletion key value + * @param readOffset the read offset, must be >= 0 and <= the read length used to create this ReadCovariates + */ + public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) { + keys[EventType.BASE_SUBSTITUTION.ordinal()][readOffset][currentCovariateIndex] = mismatch; + keys[EventType.BASE_INSERTION.ordinal()][readOffset][currentCovariateIndex] = insertion; + keys[EventType.BASE_DELETION.ordinal()][readOffset][currentCovariateIndex] = deletion; + } + + /** + * Get the keys for all covariates at read position for error model + * + * @param readPosition + * @param errorModel + * @return + */ + public int[] getKeySet(final int readPosition, final EventType errorModel) { + return keys[errorModel.ordinal()][readPosition]; + } + + public int[][] getKeySet(final EventType errorModel) { + return keys[errorModel.ordinal()]; + } + + // ---------------------------------------------------------------------- + // + // routines for testing + // + // ---------------------------------------------------------------------- + + protected int[][] getMismatchesKeySet() { return getKeySet(EventType.BASE_SUBSTITUTION); } + protected int[][] getInsertionsKeySet() { return getKeySet(EventType.BASE_INSERTION); } + protected int[][] getDeletionsKeySet() { return getKeySet(EventType.BASE_DELETION); } + + protected int[] getMismatchesKeySet(final int readPosition) { + return getKeySet(readPosition, EventType.BASE_SUBSTITUTION); + } + + protected int[] getInsertionsKeySet(final int readPosition) { + return getKeySet(readPosition, EventType.BASE_INSERTION); + } + + protected int[] getDeletionsKeySet(final int readPosition) { + return getKeySet(readPosition, EventType.BASE_DELETION); + } +} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalDatum.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalDatum.java similarity index 63% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalDatum.java rename to public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalDatum.java index b8fd11431..be012dbaa 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalDatum.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalDatum.java @@ -1,52 +1,26 @@ /* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. * Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. * -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: * -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. * -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.gatk.engine.recalibration; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalDatumNode.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalDatumNode.java similarity index 67% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalDatumNode.java rename to public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalDatumNode.java index 30da17680..589052753 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalDatumNode.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalDatumNode.java @@ -1,52 +1,26 @@ /* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. * Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. * -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: * -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. * -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.gatk.engine.recalibration; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalUtils.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalUtils.java similarity index 84% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalUtils.java rename to public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalUtils.java index e41144f4e..bef0a0111 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalUtils.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalUtils.java @@ -1,52 +1,26 @@ /* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. * Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. * -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: * -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. * -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.gatk.engine.recalibration; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationArgumentCollection.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationArgumentCollection.java similarity index 70% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationArgumentCollection.java rename to public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationArgumentCollection.java index 47c7dc6c6..8b26de65e 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationArgumentCollection.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationArgumentCollection.java @@ -1,52 +1,26 @@ /* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. * Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. * -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: * -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. * -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.gatk.engine.recalibration; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationReport.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationReport.java similarity index 66% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationReport.java rename to public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationReport.java index eabdc668e..7392001ea 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationReport.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationReport.java @@ -1,52 +1,26 @@ /* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. * Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. * -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: * -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. * -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.gatk.engine.recalibration; diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTables.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTables.java new file mode 100644 index 000000000..db2456af2 --- /dev/null +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTables.java @@ -0,0 +1,143 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration; + +import com.google.java.contract.Ensures; +import org.broadinstitute.gatk.utils.collections.LoggingNestedIntegerArray; +import org.broadinstitute.gatk.utils.recalibration.EventType; +import org.broadinstitute.gatk.engine.recalibration.covariates.Covariate; +import org.broadinstitute.gatk.utils.collections.NestedIntegerArray; + +import java.io.PrintStream; +import java.util.ArrayList; + +/** + * Utility class to facilitate on-the-fly base quality score recalibration. + * + * User: ebanks + * Date: 6/20/12 + */ + +public final class RecalibrationTables { + public enum TableType { + READ_GROUP_TABLE, + QUALITY_SCORE_TABLE, + OPTIONAL_COVARIATE_TABLES_START; + } + + private final ArrayList> tables; + private final int qualDimension; + private final int eventDimension = EventType.values().length; + private final int numReadGroups; + private final PrintStream log; + + public RecalibrationTables(final Covariate[] covariates) { + this(covariates, covariates[TableType.READ_GROUP_TABLE.ordinal()].maximumKeyValue() + 1, null); + } + + public RecalibrationTables(final Covariate[] covariates, final int numReadGroups) { + this(covariates, numReadGroups, null); + } + + public RecalibrationTables(final Covariate[] covariates, final int numReadGroups, final PrintStream log) { + tables = new ArrayList>(covariates.length); + for ( int i = 0; i < covariates.length; i++ ) + tables.add(i, null); // initialize so we can set below + + qualDimension = covariates[TableType.QUALITY_SCORE_TABLE.ordinal()].maximumKeyValue() + 1; + this.numReadGroups = numReadGroups; + this.log = log; + + tables.set(TableType.READ_GROUP_TABLE.ordinal(), + log == null ? new NestedIntegerArray(numReadGroups, eventDimension) : + new LoggingNestedIntegerArray(log, "READ_GROUP_TABLE", numReadGroups, eventDimension)); + + tables.set(TableType.QUALITY_SCORE_TABLE.ordinal(), makeQualityScoreTable()); + + for (int i = TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal(); i < covariates.length; i++) + tables.set(i, + log == null ? new NestedIntegerArray(numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension) : + new LoggingNestedIntegerArray(log, String.format("OPTIONAL_COVARIATE_TABLE_%d", i - TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + 1), + numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension)); + } + + @Ensures("result != null") + public NestedIntegerArray getReadGroupTable() { + return getTable(TableType.READ_GROUP_TABLE.ordinal()); + } + + @Ensures("result != null") + public NestedIntegerArray getQualityScoreTable() { + return getTable(TableType.QUALITY_SCORE_TABLE.ordinal()); + } + + @Ensures("result != null") + public NestedIntegerArray getTable(final int index) { + return tables.get(index); + } + + @Ensures("result >= 0") + public int numTables() { + return tables.size(); + } + + /** + * @return true if all the tables contain no RecalDatums + */ + public boolean isEmpty() { + for( final NestedIntegerArray table : tables ) { + if( !table.getAllValues().isEmpty() ) { return false; } + } + return true; + } + + /** + * Allocate a new quality score table, based on requested parameters + * in this set of tables, without any data in it. The return result + * of this table is suitable for acting as a thread-local cache + * for quality score values + * @return a newly allocated, empty read group x quality score table + */ + public NestedIntegerArray makeQualityScoreTable() { + return log == null + ? new NestedIntegerArray(numReadGroups, qualDimension, eventDimension) + : new LoggingNestedIntegerArray(log, "QUALITY_SCORE_TABLE", numReadGroups, qualDimension, eventDimension); + } + + /** + * Merge all of the tables from toMerge into into this set of tables + */ + public void combine(final RecalibrationTables toMerge) { + if ( numTables() != toMerge.numTables() ) + throw new IllegalArgumentException("Attempting to merge RecalibrationTables with different sizes"); + + for ( int i = 0; i < numTables(); i++ ) { + final NestedIntegerArray myTable = this.getTable(i); + final NestedIntegerArray otherTable = toMerge.getTable(i); + RecalUtils.combineTables(myTable, otherTable); + } + } +} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ContextCovariate.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ContextCovariate.java similarity index 52% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ContextCovariate.java rename to public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ContextCovariate.java index 6037eafe9..6ec459c1b 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ContextCovariate.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ContextCovariate.java @@ -1,52 +1,26 @@ /* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. * Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. * -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: * -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. * -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.gatk.engine.recalibration.covariates; diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/Covariate.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/Covariate.java new file mode 100644 index 000000000..40639a339 --- /dev/null +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/Covariate.java @@ -0,0 +1,118 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration.covariates; + +import org.broadinstitute.gatk.engine.recalibration.ReadCovariates; +import org.broadinstitute.gatk.engine.recalibration.RecalibrationArgumentCollection; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: Oct 30, 2009 + * + * The Covariate interface. A Covariate is a feature used in the recalibration that can be picked out of the read. + * In general most error checking and adjustments to the data are done before the call to the covariates getValue methods in order to speed up the code. + * This unfortunately muddies the code, but most of these corrections can be done per read while the covariates get called per base, resulting in a big speed up. + */ + +public interface Covariate { + + /** + * Initialize any member variables using the command-line arguments passed to the walker + * + * @param RAC the recalibration argument collection + */ + public void initialize(final RecalibrationArgumentCollection RAC); + + /** + * Calculates covariate values for all positions in the read. + * + * @param read the read to calculate the covariates on. + * @param values the object to record the covariate values for every base in the read. + */ + public void recordValues(final GATKSAMRecord read, final ReadCovariates values); + + /** + * Used to get the covariate's value from input (Recalibration Report) file during on-the-fly recalibration + * + * @param str the key in string type (read from the csv) + * @return the key in it's correct type. + */ + public Object getValue(final String str); + + /** + * Converts the internal representation of the key to String format for file output. + * + * @param key the long representation of the key + * @return a string representation of the key + */ + public String formatKey(final int key); + + /** + * Converts an Object key into a long key using only the lowest numberOfBits() bits + * + * Only necessary for on-the-fly recalibration when you have the object, but need to store it in memory in long format. For counting covariates + * the getValues method already returns all values in long format. + * + * @param value the object corresponding to the covariate + * @return a long representation of the object + */ + public int keyFromValue(final Object value); + + /** + * Returns the maximum value possible for any key representing this covariate + * + * @return the maximum value possible for any key representing this covariate + */ + public int maximumKeyValue(); +} + diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/CycleCovariate.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/CycleCovariate.java similarity index 53% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/CycleCovariate.java rename to public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/CycleCovariate.java index 5cfb9b933..7a3a0c586 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/CycleCovariate.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/CycleCovariate.java @@ -1,52 +1,26 @@ /* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. * Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. * -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: * -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. * -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.gatk.engine.recalibration.covariates; diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ExperimentalCovariate.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ExperimentalCovariate.java new file mode 100644 index 000000000..da7cec0be --- /dev/null +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ExperimentalCovariate.java @@ -0,0 +1,55 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration.covariates; + +/** + * [Short one sentence description of this walker] + *

+ *

+ * [Functionality of this walker] + *

+ *

+ *

Input

+ *

+ * [Input description] + *

+ *

+ *

Output

+ *

+ * [Output description] + *

+ *

+ *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  
+ * + * @author Your Name + * @since Date created + */ +public interface ExperimentalCovariate extends Covariate {} diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/QualityScoreCovariate.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/QualityScoreCovariate.java new file mode 100644 index 000000000..d7271d0aa --- /dev/null +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/QualityScoreCovariate.java @@ -0,0 +1,103 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration.covariates; + +import org.broadinstitute.gatk.engine.recalibration.ReadCovariates; +import org.broadinstitute.gatk.engine.recalibration.RecalibrationArgumentCollection; +import org.broadinstitute.gatk.utils.QualityUtils; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: Nov 3, 2009 + * + * The Reported Quality Score covariate. + */ + +public class QualityScoreCovariate implements RequiredCovariate { + + // Initialize any member variables using the command-line arguments passed to the walkers + @Override + public void initialize(final RecalibrationArgumentCollection RAC) {} + + @Override + public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { + final byte[] baseQualities = read.getBaseQualities(); + final byte[] baseInsertionQualities = read.getBaseInsertionQualities(); + final byte[] baseDeletionQualities = read.getBaseDeletionQualities(); + + for (int i = 0; i < baseQualities.length; i++) { + values.addCovariate((int)baseQualities[i], (int)baseInsertionQualities[i], (int)baseDeletionQualities[i], i); + } + } + + // Used to get the covariate's value from input csv file during on-the-fly recalibration + @Override + public final Object getValue(final String str) { + return Byte.parseByte(str); + } + + @Override + public String formatKey(final int key) { + return String.format("%d", key); + } + + @Override + public int keyFromValue(final Object value) { + return (value instanceof String) ? (int)Byte.parseByte((String) value) : (int)(Byte) value; + } + + @Override + public int maximumKeyValue() { + return QualityUtils.MAX_SAM_QUAL_SCORE; + } +} \ No newline at end of file diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ReadGroupCovariate.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ReadGroupCovariate.java new file mode 100644 index 000000000..a63fbe0ee --- /dev/null +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/ReadGroupCovariate.java @@ -0,0 +1,164 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration.covariates; + +import org.broadinstitute.gatk.engine.recalibration.RecalibrationArgumentCollection; +import org.broadinstitute.gatk.engine.recalibration.ReadCovariates; +import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: Oct 30, 2009 + * + * The Read Group covariate. + */ + +public class ReadGroupCovariate implements RequiredCovariate { + + private final HashMap readGroupLookupTable = new HashMap(); + private final HashMap readGroupReverseLookupTable = new HashMap(); + private int nextId = 0; + private String forceReadGroup; + + // Initialize any member variables using the command-line arguments passed to the walkers + @Override + public void initialize(final RecalibrationArgumentCollection RAC) { + forceReadGroup = RAC.FORCE_READGROUP; + } + + @Override + public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { + final String readGroupId = readGroupValueFromRG(read.getReadGroup()); + final int key = keyForReadGroup(readGroupId); + + final int l = read.getReadLength(); + for (int i = 0; i < l; i++) + values.addCovariate(key, key, key, i); + } + + @Override + public final Object getValue(final String str) { + return str; + } + + @Override + public synchronized String formatKey(final int key) { + // This method is synchronized so that we don't attempt to do a get() + // from the reverse lookup table while that table is being updated + return readGroupReverseLookupTable.get(key); + } + + @Override + public int keyFromValue(final Object value) { + return keyForReadGroup((String) value); + } + + /** + * Get the mapping from read group names to integer key values for all read groups in this covariate + * @return a set of mappings from read group names -> integer key values + */ + public Set> getKeyMap() { + return readGroupLookupTable.entrySet(); + } + + private int keyForReadGroup(final String readGroupId) { + // Rather than synchronize this entire method (which would be VERY expensive for walkers like the BQSR), + // synchronize only the table updates. + + // Before entering the synchronized block, check to see if this read group is not in our tables. + // If it's not, either we will have to insert it, OR another thread will insert it first. + // This preliminary check avoids doing any synchronization most of the time. + if ( ! readGroupLookupTable.containsKey(readGroupId) ) { + + synchronized ( this ) { + + // Now we need to make sure the key is STILL not there, since another thread may have come along + // and inserted it while we were waiting to enter this synchronized block! + if ( ! readGroupLookupTable.containsKey(readGroupId) ) { + readGroupLookupTable.put(readGroupId, nextId); + readGroupReverseLookupTable.put(nextId, readGroupId); + nextId++; + } + } + } + + return readGroupLookupTable.get(readGroupId); + } + + @Override + public synchronized int maximumKeyValue() { + // Synchronized so that we don't query table size while the tables are being updated + return readGroupLookupTable.size() - 1; + } + + /** + * If the sample has a PU tag annotation, return that. If not, return the read group id. + * + * @param rg the read group record + * @return platform unit or readgroup id + */ + private String readGroupValueFromRG(final GATKSAMReadGroupRecord rg) { + if ( forceReadGroup != null ) + return forceReadGroup; + + final String platformUnit = rg.getPlatformUnit(); + return platformUnit == null ? rg.getId() : platformUnit; + } + +} + + diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatCovariate.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatCovariate.java similarity index 51% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatCovariate.java rename to public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatCovariate.java index 2102f1f6c..45091e12a 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatCovariate.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatCovariate.java @@ -1,52 +1,26 @@ /* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. * Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. * -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: * -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. * -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.gatk.engine.recalibration.covariates; diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatLengthCovariate.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatLengthCovariate.java new file mode 100644 index 000000000..5d24c2e7b --- /dev/null +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatLengthCovariate.java @@ -0,0 +1,48 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration.covariates; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; + +public class RepeatLengthCovariate extends RepeatCovariate { + + @Requires({"repeatLength>=0", "repeatFromUnitAndLength != null"}) + @Ensures("result != null") + protected String getCovariateValueFromUnitAndLength(final byte[] repeatFromUnitAndLength, final int repeatLength) { + return String.format("%d",repeatLength); + } + + @Override + public synchronized int maximumKeyValue() { + // Synchronized so that we don't query table size while the tables are being updated + //return repeatLookupTable.size() - 1; + // max possible values of covariate: for repeat unit, length is up to MAX_STR_UNIT_LENGTH, + // so we have 4^MAX_STR_UNIT_LENGTH * MAX_REPEAT_LENGTH possible values + return (1+MAX_REPEAT_LENGTH); + } + +} diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatUnitAndLengthCovariate.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatUnitAndLengthCovariate.java new file mode 100644 index 000000000..0d61492ba --- /dev/null +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatUnitAndLengthCovariate.java @@ -0,0 +1,49 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration.covariates; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; + + +public class RepeatUnitAndLengthCovariate extends RepeatCovariate { + + @Requires({"repeatLength>=0", "repeatFromUnitAndLength != null"}) + @Ensures("result != null") + protected String getCovariateValueFromUnitAndLength(final byte[] repeatFromUnitAndLength, final int repeatLength) { + return new String(repeatFromUnitAndLength) + String.format("%d",repeatLength); + } + + @Override + public synchronized int maximumKeyValue() { + // Synchronized so that we don't query table size while the tables are being updated + //return repeatLookupTable.size() - 1; + // max possible values of covariate: for repeat unit, length is up to MAX_STR_UNIT_LENGTH, + // so we have 4^MAX_STR_UNIT_LENGTH * MAX_REPEAT_LENGTH possible values + return (1<<(2*MAX_STR_UNIT_LENGTH)) * MAX_REPEAT_LENGTH +1; + } + +} diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatUnitCovariate.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatUnitCovariate.java new file mode 100644 index 000000000..2744f64dc --- /dev/null +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RepeatUnitCovariate.java @@ -0,0 +1,52 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration.covariates; + +/** + * Created with IntelliJ IDEA. + * User: rpoplin + * Date: 11/3/12 + */ + +public class RepeatUnitCovariate extends RepeatCovariate { + + protected String getCovariateValueFromUnitAndLength(final byte[] repeatFromUnitAndLength, final int repeatLength) { + return new String(repeatFromUnitAndLength); + + } + + + @Override + public synchronized int maximumKeyValue() { + // Synchronized so that we don't query table size while the tables are being updated + //return repeatLookupTable.size() - 1; + // max possible values of covariate: for repeat unit, length is up to MAX_STR_UNIT_LENGTH, + // so we have 4^MAX_STR_UNIT_LENGTH * MAX_REPEAT_LENGTH possible values + return (1<<(2*MAX_STR_UNIT_LENGTH)) +1; + } + + +} diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RequiredCovariate.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RequiredCovariate.java new file mode 100644 index 000000000..b20086a39 --- /dev/null +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/RequiredCovariate.java @@ -0,0 +1,55 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration.covariates; + +/** + * [Short one sentence description of this walker] + *

+ *

+ * [Functionality of this walker] + *

+ *

+ *

Input

+ *

+ * [Input description] + *

+ *

+ *

Output

+ *

+ * [Output description] + *

+ *

+ *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  
+ * + * @author Your Name + * @since Date created + */ +public interface RequiredCovariate extends Covariate {} diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/StandardCovariate.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/StandardCovariate.java new file mode 100644 index 000000000..572fa501b --- /dev/null +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/StandardCovariate.java @@ -0,0 +1,55 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration.covariates; + +/** + * [Short one sentence description of this walker] + *

+ *

+ * [Functionality of this walker] + *

+ *

+ *

Input

+ *

+ * [Input description] + *

+ *

+ *

Output

+ *

+ * [Output description] + *

+ *

+ *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  
+ * + * @author Your Name + * @since Date created + */ +public interface StandardCovariate extends Covariate {} diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/BaseRecalibrationUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/BaseRecalibrationUnitTest.java new file mode 100644 index 000000000..f73296793 --- /dev/null +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/BaseRecalibrationUnitTest.java @@ -0,0 +1,90 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration; + +import org.broadinstitute.gatk.utils.BaseTest; +import org.broadinstitute.gatk.utils.QualityUtils; +import org.broadinstitute.gatk.utils.exceptions.UserException; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class BaseRecalibrationUnitTest { + + @Test + public void repeatedAndUnorderedFixedQualities() { + // Test both repeated quals, and quals that aren't input in order + List quantizedQualsOrdered = Arrays.asList(11, 19); + List quantizedQualsUnordered = Arrays.asList(19, 11, 19, 19); + + // Unordered and Ordered qmapping should be identical + byte[] qmappingUnordered = BaseRecalibration.constructStaticQuantizedMapping(quantizedQualsUnordered, true); + byte[] qmappingOrdered = BaseRecalibration.constructStaticQuantizedMapping(quantizedQualsOrdered, true); + Assert.assertEquals(qmappingOrdered.length, qmappingUnordered.length); + for(int i = 0 ; i < qmappingUnordered.length ; i++) { + Assert.assertEquals(qmappingOrdered[i], qmappingUnordered[i]); + } + } + + @Test + public void nearestVsRoundDown() { + List fixedQuantizedQuals = Arrays.asList(10, 20, 30); + + byte[] qmappingRoundDown = BaseRecalibration.constructStaticQuantizedMapping(fixedQuantizedQuals, true); + byte[] qmappingRoundNearest = BaseRecalibration.constructStaticQuantizedMapping(fixedQuantizedQuals, false); + + // Depending on rounding strategy, bin 19 should round to 10 or 20 + Assert.assertEquals(qmappingRoundDown[19], 10); + Assert.assertEquals(qmappingRoundNearest[19], 20); + + // Regarless of rounding strategy, bin 21 should always round down to 20 + Assert.assertEquals(qmappingRoundDown[21], 20); + Assert.assertEquals(qmappingRoundNearest[21], 20); + } + + @Test + public void onlyOneFixedQualUsed() { + // Set all qualities to singleQual value (except for those below MIN_USABLE_Q_SCORE) + int singleQual = 10; + List fixedQuantizedQuals = Arrays.asList(singleQual); + + byte[] qmapping = BaseRecalibration.constructStaticQuantizedMapping(fixedQuantizedQuals, true); + + for(int i = 0 ; i < qmapping.length ; i++) { + if(i >= QualityUtils.MIN_USABLE_Q_SCORE) { + Assert.assertEquals(qmapping[i], singleQual); + } + else { + // Make sure that all values less than MIN_USABLE_Q_SCORE are preserved + Assert.assertEquals(qmapping[i], i); + } + } + } +} diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/ContextCovariateUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/ContextCovariateUnitTest.java new file mode 100644 index 000000000..361ef390b --- /dev/null +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/ContextCovariateUnitTest.java @@ -0,0 +1,95 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration; + +import org.broadinstitute.gatk.engine.recalibration.covariates.ContextCovariate; +import org.broadinstitute.gatk.engine.recalibration.covariates.Covariate; +import org.broadinstitute.gatk.utils.clipping.ClippingRepresentation; +import org.broadinstitute.gatk.utils.clipping.ReadClipper; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; +import org.broadinstitute.gatk.utils.sam.ReadUtils; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +/** + * @author Mauricio Carneiro + * @since 3/1/12 + */ +public class ContextCovariateUnitTest { + ContextCovariate covariate; + RecalibrationArgumentCollection RAC; + + @BeforeClass + public void init() { + RAC = new RecalibrationArgumentCollection(); + covariate = new ContextCovariate(); + covariate.initialize(RAC); + } + + @BeforeMethod + public void initCache() { + ReadCovariates.clearKeysCache(); + } + + @Test(enabled = true) + public void testSimpleContexts() { + GATKSAMRecord read = ReadUtils.createRandomRead(1000); + GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, RAC.LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); + ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1); + covariate.recordValues(read, readCovariates); + + verifyCovariateArray(readCovariates.getMismatchesKeySet(), RAC.MISMATCHES_CONTEXT_SIZE, clippedRead, covariate); + verifyCovariateArray(readCovariates.getInsertionsKeySet(), RAC.INDELS_CONTEXT_SIZE, clippedRead, covariate); + verifyCovariateArray(readCovariates.getDeletionsKeySet(), RAC.INDELS_CONTEXT_SIZE, clippedRead, covariate); + } + + public static void verifyCovariateArray(int[][] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) { + for (int i = 0; i < values.length; i++) + Assert.assertEquals(contextCovariate.formatKey(values[i][0]), expectedContext(read, i, contextSize)); + + } + + public static String expectedContext (GATKSAMRecord read, int offset, int contextSize) { + final String bases = stringFrom(read.getReadBases()); + String expectedContext = null; + if (offset - contextSize + 1 >= 0) { + String context = bases.substring(offset - contextSize + 1, offset + 1); + if (!context.contains("N")) + expectedContext = context; + } + return expectedContext; + } + + private static String stringFrom(byte[] array) { + String s = ""; + for (byte value : array) + s += (char) value; + return s; + } + +} diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/CycleCovariateUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/CycleCovariateUnitTest.java new file mode 100644 index 000000000..4f6bf2aab --- /dev/null +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/CycleCovariateUnitTest.java @@ -0,0 +1,114 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration; + +import org.broadinstitute.gatk.utils.exceptions.UserException; +import org.broadinstitute.gatk.engine.recalibration.covariates.CycleCovariate; +import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; +import org.broadinstitute.gatk.utils.sam.ReadUtils; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +/** + * @author Mauricio Carneiro + * @since 3/1/12 + */ +public class CycleCovariateUnitTest { + CycleCovariate covariate; + RecalibrationArgumentCollection RAC; + + @BeforeClass + public void init() { + RAC = new RecalibrationArgumentCollection(); + covariate = new CycleCovariate(); + covariate.initialize(RAC); + } + + @BeforeMethod + public void initCache() { + ReadCovariates.clearKeysCache(); + } + + @Test(enabled = true) + public void testSimpleCycles() { + short readLength = 10; + GATKSAMRecord read = ReadUtils.createRandomRead(readLength); + read.setReadPairedFlag(true); + read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID")); + read.getReadGroup().setPlatform("illumina"); + + ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1); + covariate.recordValues(read, readCovariates); + verifyCovariateArray(readCovariates.getMismatchesKeySet(), 1, (short) 1); + + read.setReadNegativeStrandFlag(true); + covariate.recordValues(read, readCovariates); + verifyCovariateArray(readCovariates.getMismatchesKeySet(), readLength, -1); + + read.setSecondOfPairFlag(true); + covariate.recordValues(read, readCovariates); + verifyCovariateArray(readCovariates.getMismatchesKeySet(), -readLength, 1); + + read.setReadNegativeStrandFlag(false); + covariate.recordValues(read, readCovariates); + verifyCovariateArray(readCovariates.getMismatchesKeySet(), -1, -1); + } + + private void verifyCovariateArray(int[][] values, int init, int increment) { + for (short i = 0; i < values.length; i++) { + short actual = Short.decode(covariate.formatKey(values[i][0])); + int expected = init + (increment * i); + Assert.assertEquals(actual, expected); + } + } + + @Test(enabled = true, expectedExceptions={UserException.class}) + public void testMoreThanMaxCycleFails() { + int readLength = RAC.MAXIMUM_CYCLE_VALUE + 1; + GATKSAMRecord read = ReadUtils.createRandomRead(readLength); + read.setReadPairedFlag(true); + read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID")); + read.getReadGroup().setPlatform("illumina"); + + ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1); + covariate.recordValues(read, readCovariates); + } + + @Test(enabled = true) + public void testMaxCyclePasses() { + int readLength = RAC.MAXIMUM_CYCLE_VALUE; + GATKSAMRecord read = ReadUtils.createRandomRead(readLength); + read.setReadPairedFlag(true); + read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID")); + read.getReadGroup().setPlatform("illumina"); + + ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1); + covariate.recordValues(read, readCovariates); + } +} diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/QualQuantizerUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/QualQuantizerUnitTest.java new file mode 100644 index 000000000..3d128f133 --- /dev/null +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/QualQuantizerUnitTest.java @@ -0,0 +1,169 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration; + + +// the imports for unit testing. + + +import org.broadinstitute.gatk.utils.BaseTest; +import org.broadinstitute.gatk.utils.QualityUtils; +import org.broadinstitute.gatk.utils.Utils; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + + +public class QualQuantizerUnitTest extends BaseTest { + @BeforeSuite + public void before() { + + } + + // -------------------------------------------------------------------------------- + // + // merge case Provider + // + // -------------------------------------------------------------------------------- + + private class QualIntervalTestProvider extends TestDataProvider { + final QualQuantizer.QualInterval left, right; + int exError, exTotal, exQual; + double exErrorRate; + + private QualIntervalTestProvider(int leftE, int leftN, int rightE, int rightN, int exError, int exTotal) { + super(QualIntervalTestProvider.class); + + QualQuantizer qq = new QualQuantizer(0); + left = qq.new QualInterval(10, 10, leftN, leftE, 0); + right = qq.new QualInterval(11, 11, rightN, rightE, 0); + + this.exError = exError; + this.exTotal = exTotal; + this.exErrorRate = (leftE + rightE + 1) / (1.0 * (leftN + rightN + 1)); + this.exQual = QualityUtils.errorProbToQual(this.exErrorRate); + } + } + + @DataProvider(name = "QualIntervalTestProvider") + public Object[][] makeQualIntervalTestProvider() { + new QualIntervalTestProvider(10, 100, 10, 1000, 20, 1100); + new QualIntervalTestProvider(0, 100, 10, 900, 10, 1000); + new QualIntervalTestProvider(10, 900, 0, 100, 10, 1000); + new QualIntervalTestProvider(0, 0, 10, 100, 10, 100); + new QualIntervalTestProvider(1, 10, 9, 90, 10, 100); + new QualIntervalTestProvider(1, 10, 9, 100000, 10, 100010); + new QualIntervalTestProvider(1, 10, 9, 1000000, 10,1000010); + + return QualIntervalTestProvider.getTests(QualIntervalTestProvider.class); + } + + @Test(dataProvider = "QualIntervalTestProvider") + public void testQualInterval(QualIntervalTestProvider cfg) { + QualQuantizer.QualInterval merged = cfg.left.merge(cfg.right); + Assert.assertEquals(merged.nErrors, cfg.exError); + Assert.assertEquals(merged.nObservations, cfg.exTotal); + Assert.assertEquals(merged.getErrorRate(), cfg.exErrorRate); + Assert.assertEquals(merged.getQual(), cfg.exQual); + } + + @Test + public void testMinInterestingQual() { + for ( int q = 0; q < 15; q++ ) { + for ( int minQual = 0; minQual <= 10; minQual ++ ) { + QualQuantizer qq = new QualQuantizer(minQual); + QualQuantizer.QualInterval left = qq.new QualInterval(q, q, 100, 10, 0); + QualQuantizer.QualInterval right = qq.new QualInterval(q+1, q+1, 1000, 100, 0); + + QualQuantizer.QualInterval merged = left.merge(right); + boolean shouldBeFree = q+1 <= minQual; + if ( shouldBeFree ) + Assert.assertEquals(merged.getPenalty(), 0.0); + else + Assert.assertTrue(merged.getPenalty() > 0.0); + } + } + } + + + // -------------------------------------------------------------------------------- + // + // High-level case Provider + // + // -------------------------------------------------------------------------------- + + private class QuantizerTestProvider extends TestDataProvider { + final List nObservationsPerQual = new ArrayList(); + final int nLevels; + final List expectedMap; + + private QuantizerTestProvider(final List nObservationsPerQual, final int nLevels, final List expectedMap) { + super(QuantizerTestProvider.class); + + for ( int x : nObservationsPerQual ) + this.nObservationsPerQual.add((long)x); + this.nLevels = nLevels; + this.expectedMap = expectedMap; + } + + @Override + public String toString() { + return String.format("QQTest nLevels=%d nObs=[%s] map=[%s]", + nLevels, Utils.join(",", nObservationsPerQual), Utils.join(",", expectedMap)); + } + } + + @DataProvider(name = "QuantizerTestProvider") + public Object[][] makeQuantizerTestProvider() { + List allQ2 = Arrays.asList(0, 0, 1000, 0, 0); + + new QuantizerTestProvider(allQ2, 5, Arrays.asList(0, 1, 2, 3, 4)); + new QuantizerTestProvider(allQ2, 1, Arrays.asList(2, 2, 2, 2, 2)); + + new QuantizerTestProvider(Arrays.asList(0, 0, 1000, 0, 1000), 2, Arrays.asList(2, 2, 2, 2, 4)); + new QuantizerTestProvider(Arrays.asList(0, 0, 1000, 1, 1000), 2, Arrays.asList(2, 2, 2, 4, 4)); + new QuantizerTestProvider(Arrays.asList(0, 0, 1000, 10, 1000), 2, Arrays.asList(2, 2, 2, 2, 4)); + + return QuantizerTestProvider.getTests(QuantizerTestProvider.class); + } + + @Test(dataProvider = "QuantizerTestProvider", enabled = true) + public void testQuantizer(QuantizerTestProvider cfg) { + QualQuantizer qq = new QualQuantizer(cfg.nObservationsPerQual, cfg.nLevels, 0); + logger.warn("cfg: " + cfg); + for ( int i = 0; i < cfg.expectedMap.size(); i++) { + int expected = cfg.expectedMap.get(i); + int observed = qq.originalToQuantizedMap.get(i); + //logger.warn(String.format(" qq map: %s : %d => %d", i, expected, observed)); + Assert.assertEquals(observed, expected); + } + } +} \ No newline at end of file diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/ReadCovariatesUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/ReadCovariatesUnitTest.java new file mode 100644 index 000000000..419df6655 --- /dev/null +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/ReadCovariatesUnitTest.java @@ -0,0 +1,122 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration; + +import org.broadinstitute.gatk.engine.recalibration.covariates.*; +import org.broadinstitute.gatk.utils.Utils; +import org.broadinstitute.gatk.utils.recalibration.EventType; +import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; +import org.broadinstitute.gatk.utils.sam.ReadUtils; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.util.Random; + +/** + * @author carneiro + * @since 4/21/12 + */ +public class ReadCovariatesUnitTest { + + @BeforeMethod + public void init() { + ReadCovariates.clearKeysCache(); + } + + @Test(enabled = false) + public void testCovariateGeneration() { + final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); + final String RGID = "id"; + + ReadGroupCovariate rgCov = new ReadGroupCovariate(); + QualityScoreCovariate qsCov = new QualityScoreCovariate(); + ContextCovariate coCov = new ContextCovariate(); + CycleCovariate cyCov = new CycleCovariate(); + + rgCov.initialize(RAC); + qsCov.initialize(RAC); + coCov.initialize(RAC); + cyCov.initialize(RAC); + + Covariate[] requestedCovariates = new Covariate[4]; + requestedCovariates[0] = rgCov; + requestedCovariates[1] = qsCov; + requestedCovariates[2] = coCov; + requestedCovariates[3] = cyCov; + + final int NUM_READS = 100; + final Random rnd = Utils.getRandomGenerator(); + + final String[] readGroups = {"RG1", "RG2", "RGbla"}; + for (int idx = 0; idx < NUM_READS; idx++) { + for (final String rgs : readGroups) { + final int length = 10 + rnd.nextInt(100); // random read length, at least 10 bp long + final GATKSAMRecord read = ReadUtils.createRandomRead(length, false); + final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(rgs); + rg.setPlatform("illumina"); + read.setReadGroup(rg); + read.setReadNegativeStrandFlag(rnd.nextBoolean()); + final byte[] mQuals = read.getBaseQualities(EventType.BASE_SUBSTITUTION); + final byte[] iQuals = read.getBaseQualities(EventType.BASE_INSERTION); + final byte[] dQuals = read.getBaseQualities(EventType.BASE_DELETION); + ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates); + + // check that the length is correct + Assert.assertEquals(rc.getMismatchesKeySet().length, length); + Assert.assertEquals(rc.getInsertionsKeySet().length, length); + Assert.assertEquals(rc.getDeletionsKeySet().length, length); + + for (int i = 0; i < length; i++) { + // check that read group is always the same + Assert.assertEquals(rgCov.formatKey(rc.getMismatchesKeySet(i)[0]), rgs); + Assert.assertEquals(rgCov.formatKey(rc.getInsertionsKeySet(i)[0]), rgs); + Assert.assertEquals(rgCov.formatKey(rc.getDeletionsKeySet(i)[0]), rgs); + + // check quality score + Assert.assertEquals(qsCov.formatKey(rc.getMismatchesKeySet(i)[1]), "" + mQuals[i]); + Assert.assertEquals(qsCov.formatKey(rc.getInsertionsKeySet(i)[1]), "" + iQuals[i]); + Assert.assertEquals(qsCov.formatKey(rc.getDeletionsKeySet(i)[1]), "" + dQuals[i]); + + // check context + Assert.assertEquals(coCov.formatKey(rc.getMismatchesKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.MISMATCHES_CONTEXT_SIZE)); + Assert.assertEquals(coCov.formatKey(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE)); + Assert.assertEquals(coCov.formatKey(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE)); + + // check cycle + Assert.assertEquals(cyCov.formatKey(rc.getMismatchesKeySet(i)[3]), "" + (i+1)); + Assert.assertEquals(cyCov.formatKey(rc.getInsertionsKeySet(i)[3]), "" + (i+1)); + Assert.assertEquals(cyCov.formatKey(rc.getDeletionsKeySet(i)[3]), "" + (i+1)); + } + + } + + } + + } + +} diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/ReadGroupCovariateUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/ReadGroupCovariateUnitTest.java new file mode 100644 index 000000000..6f32807bd --- /dev/null +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/ReadGroupCovariateUnitTest.java @@ -0,0 +1,99 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration; + +import org.broadinstitute.gatk.engine.recalibration.covariates.ReadGroupCovariate; +import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; +import org.broadinstitute.gatk.utils.sam.ReadUtils; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +/** + * @author Mauricio Carneiro + * @since 3/1/12 + */ +public class ReadGroupCovariateUnitTest { + ReadGroupCovariate covariate; + RecalibrationArgumentCollection RAC; + + @BeforeClass + public void init() { + RAC = new RecalibrationArgumentCollection(); + covariate = new ReadGroupCovariate(); + covariate.initialize(RAC); + } + + @BeforeMethod + public void initCache() { + ReadCovariates.clearKeysCache(); + } + + @Test(enabled = true) + public void testSingleRecord() { + final String expected = "SAMPLE.1"; + GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("MY.ID"); + rg.setPlatformUnit(expected); + runTest(rg, expected, covariate); + } + + @Test(enabled = true) + public void testMissingPlatformUnit() { + final String expected = "MY.7"; + GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(expected); + runTest(rg, expected, covariate); + } + + @Test(enabled = true) + public void testForceReadgroup() { + final RecalibrationArgumentCollection forcedRAC = new RecalibrationArgumentCollection(); + forcedRAC.FORCE_READGROUP = "FOO"; + final ReadGroupCovariate forcedCovariate = new ReadGroupCovariate(); + forcedCovariate.initialize(forcedRAC); + + final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("NOT_FOO"); + runTest(rg, "FOO", forcedCovariate); + } + + private static void runTest(final GATKSAMReadGroupRecord rg, final String expected, final ReadGroupCovariate covariate) { + GATKSAMRecord read = ReadUtils.createRandomRead(10); + read.setReadGroup(rg); + ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1); + covariate.recordValues(read, readCovariates); + verifyCovariateArray(readCovariates.getMismatchesKeySet(), expected, covariate); + + } + + private static void verifyCovariateArray(final int[][] values, final String expected, final ReadGroupCovariate covariate) { + for (int[] value : values) { + String actual = covariate.formatKey(value[0]); + Assert.assertEquals(actual, expected); + } + } + +} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalDatumUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalDatumUnitTest.java similarity index 53% rename from protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalDatumUnitTest.java rename to public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalDatumUnitTest.java index 592e3cdf6..77a12cae4 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalDatumUnitTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalDatumUnitTest.java @@ -1,52 +1,26 @@ /* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. * Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. * -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: * -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. * -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.gatk.engine.recalibration; diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalUtilsUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalUtilsUnitTest.java new file mode 100644 index 000000000..5ee87549f --- /dev/null +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalUtilsUnitTest.java @@ -0,0 +1,152 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration; + +import org.broadinstitute.gatk.utils.BaseTest; +import org.broadinstitute.gatk.utils.Utils; +import org.broadinstitute.gatk.utils.collections.NestedIntegerArray; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; + +public final class RecalUtilsUnitTest extends BaseTest { + private class Row { + int rg, qual, ne, no; + + private Row(final Row copy) { + this(copy.rg, copy.qual, copy.ne, copy.no); + } + + private Row(int rg, int qual, int ne, int no) { + this.rg = rg; + this.qual = qual; + this.ne = ne; + this.no = no; + } + + @Override + public String toString() { + return "Row{" + + "" + rg + + ", " + qual + + ", " + ne + + ", " + no + + '}'; + } + } + + @DataProvider(name = "CombineTablesProvider") + public Object[][] createCombineTablesProvider() { + List tests = new ArrayList(); + + final List rows = new ArrayList(); + for ( final int rg : Arrays.asList(0, 1) ) { + for ( final int qual : Arrays.asList(0, 1) ) { + rows.add(new Row(rg, qual, 1, 10)); + } + } + + logger.warn("Number of rows " + rows.size()); + + List> permutations = new LinkedList>(); + permutations.addAll(Utils.makePermutations(rows, 1, false)); + permutations.addAll(Utils.makePermutations(rows, 2, false)); + permutations.addAll(Utils.makePermutations(rows, 3, false)); + + // adding 1 row to 2 + for ( final List table1 : permutations ) { + for ( final Row table2 : rows ) { + tests.add(new Object[]{table1, Arrays.asList(table2)}); + } + } + + // adding 2 rows to 1 + for ( final List table1 : permutations ) { + for ( final Row table2 : rows ) { + tests.add(new Object[]{Arrays.asList(table2), table1}); + } + } + + for ( final List table1 : permutations ) { + for ( final List table2 : permutations ) { + tests.add(new Object[]{table1, table2}); + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "CombineTablesProvider") + public void testCombineTables(final List table1, final List table2) { + final NestedIntegerArray nia1 = makeTable(table1); + final NestedIntegerArray nia2 = makeTable(table2); + final List expectedRows = makeExpected(table1, table2); + final NestedIntegerArray expected = makeTable(expectedRows); + RecalUtils.combineTables(nia1, nia2); + + Assert.assertEquals(nia1.getDimensions(), expected.getDimensions()); + Assert.assertEquals(nia1.getAllValues().size(), expected.getAllValues().size()); + + for ( final NestedIntegerArray.Leaf leaf : expected.getAllLeaves() ) { + final RecalDatum actual = nia1.get(leaf.keys); + Assert.assertEquals(actual.getNumMismatches(), leaf.value.getNumMismatches()); + Assert.assertEquals(actual.getNumObservations(), leaf.value.getNumObservations()); + } + } + + public List makeExpected(final List table1, final List table2) { + final List combined = new LinkedList(); + for ( final Row t1 : table1 ) combined.add(new Row(t1)); + for ( final Row t2 : table2 ) { + combine(combined, t2); + } + return combined; + } + + private void combine(final List combined, final Row row) { + for ( final Row c : combined ) { + if ( c.rg == row.rg && c.qual == row.qual ) { + c.ne += row.ne; + c.no += row.no; + return; + } + } + + combined.add(new Row(row)); + } + + public NestedIntegerArray makeTable(final List rows) { + final NestedIntegerArray x = new NestedIntegerArray(3, 3); + for ( final Row r : rows ) + x.put(new RecalDatum((long)r.no, (double)r.ne, (byte)10), r.rg, r.qual); + return x; + } +} diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationReportUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationReportUnitTest.java new file mode 100644 index 000000000..ba49ec082 --- /dev/null +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationReportUnitTest.java @@ -0,0 +1,150 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration; + +import org.broadinstitute.gatk.engine.recalibration.covariates.*; +import org.broadinstitute.gatk.utils.recalibration.EventType; +import org.broadinstitute.gatk.utils.QualityUtils; +import org.broadinstitute.gatk.utils.collections.NestedIntegerArray; +import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; +import org.broadinstitute.gatk.utils.sam.ReadUtils; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * @author carneiro + * @since 4/21/12 + */ +public class RecalibrationReportUnitTest { + @BeforeMethod + public void init() { + ReadCovariates.clearKeysCache(); + } + + private static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) { + final Random random = new Random(); + final int nObservations = random.nextInt(maxObservations); + final int nErrors = Math.min(random.nextInt(maxErrors), nObservations); + final int qual = random.nextInt(QualityUtils.MAX_SAM_QUAL_SCORE); + return new RecalDatum((long)nObservations, (double)nErrors, (byte)qual); + } + + @Test + public void testOutput() { + final int length = 100; + + List quals = new ArrayList(QualityUtils.MAX_SAM_QUAL_SCORE + 1); + List counts = new ArrayList(QualityUtils.MAX_SAM_QUAL_SCORE + 1); + + for (int i = 0; i<= QualityUtils.MAX_SAM_QUAL_SCORE; i++) { + quals.add((byte) i); + counts.add(1L); + } + + final QuantizationInfo quantizationInfo = new QuantizationInfo(quals, counts); + final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); + + quantizationInfo.noQuantization(); + final List requiredCovariates = new LinkedList(); + final List optionalCovariates = new LinkedList(); + + final ReadGroupCovariate rgCovariate = new ReadGroupCovariate(); + rgCovariate.initialize(RAC); + requiredCovariates.add(rgCovariate); + + final QualityScoreCovariate qsCovariate = new QualityScoreCovariate(); + qsCovariate.initialize(RAC); + requiredCovariates.add(qsCovariate); + + final ContextCovariate cxCovariate = new ContextCovariate(); + cxCovariate.initialize(RAC); + optionalCovariates.add(cxCovariate); + final CycleCovariate cyCovariate = new CycleCovariate(); + cyCovariate.initialize(RAC); + optionalCovariates.add(cyCovariate); + + final Covariate[] requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()]; + int covariateIndex = 0; + for (final Covariate cov : requiredCovariates) + requestedCovariates[covariateIndex++] = cov; + for (final Covariate cov : optionalCovariates) + requestedCovariates[covariateIndex++] = cov; + + final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("id"); + rg.setPlatform("illumina"); + final GATKSAMRecord read = ReadUtils.createRandomRead(length, false); + read.setReadGroup(rg); + final byte [] readQuals = new byte[length]; + for (int i = 0; i < length; i++) + readQuals[i] = 20; + read.setBaseQualities(readQuals); + + final int expectedKeys = expectedNumberOfKeys(length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE); + int nKeys = 0; // keep track of how many keys were produced + final ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates); + + final RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates); + final NestedIntegerArray rgTable = recalibrationTables.getReadGroupTable(); + final NestedIntegerArray qualTable = recalibrationTables.getQualityScoreTable(); + + for (int offset = 0; offset < length; offset++) { + + for (EventType errorMode : EventType.values()) { + + final int[] covariates = rc.getKeySet(offset, errorMode); + final int randomMax = errorMode == EventType.BASE_SUBSTITUTION ? 10000 : 100000; + + rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.ordinal()); + qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.ordinal()); + nKeys += 2; + for (int j = 0; j < optionalCovariates.size(); j++) { + final NestedIntegerArray covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + j); + final int covValue = covariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + j]; + if ( covValue >= 0 ) { + covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.ordinal()); + nKeys++; + } + } + } + } + Assert.assertEquals(nKeys, expectedKeys); + } + + private static int expectedNumberOfKeys (int readLength, int indelContextSize, int mismatchesContextSize) { + final int numCovariates = 4; + final int numTables = 3; + final int mismatchContextPadding = mismatchesContextSize - 1; + final int indelContextPadding = 2 * (indelContextSize - 1); + final int indelCyclePadding = 2 * (2 * CycleCovariate.CUSHION_FOR_INDELS); + + return (numCovariates * numTables * readLength) - mismatchContextPadding - indelContextPadding - indelCyclePadding; + } + +} diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTablesUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTablesUnitTest.java new file mode 100644 index 000000000..8d82a24bd --- /dev/null +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTablesUnitTest.java @@ -0,0 +1,177 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration; + +import org.broadinstitute.gatk.engine.recalibration.covariates.Covariate; +import org.broadinstitute.gatk.utils.BaseTest; +import org.broadinstitute.gatk.utils.collections.NestedIntegerArray; +import org.broadinstitute.gatk.utils.recalibration.EventType; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.List; + +public final class RecalibrationTablesUnitTest extends BaseTest { + private RecalibrationTables tables; + private Covariate[] covariates; + private int numReadGroups = 6; + final byte qualByte = 1; + final List combineStates = Arrays.asList(0, 1, 2); + + @BeforeMethod + private void makeTables() { + covariates = RecalibrationTestUtils.makeInitializedStandardCovariates(); + tables = new RecalibrationTables(covariates, numReadGroups); + fillTable(tables); + } + + private void fillTable(final RecalibrationTables tables) { + for ( int iterations = 0; iterations < 10; iterations++ ) { + for ( final EventType et : EventType.values() ) { + for ( final int rg : combineStates) { + final double error = rg % 2 == 0 ? 1 : 0; + RecalUtils.incrementDatumOrPutIfNecessary(tables.getReadGroupTable(), qualByte, error, rg, et.ordinal()); + for ( final int qual : combineStates) { + RecalUtils.incrementDatumOrPutIfNecessary(tables.getQualityScoreTable(), qualByte, error, rg, qual, et.ordinal()); + for ( final int cycle : combineStates) + RecalUtils.incrementDatumOrPutIfNecessary(tables.getTable(2), qualByte, error, rg, qual, cycle, et.ordinal()); + for ( final int context : combineStates) + RecalUtils.incrementDatumOrPutIfNecessary(tables.getTable(3), qualByte, error, rg, qual, context, et.ordinal()); + } + } + } + } + } + + @Test + public void basicTest() { + final Covariate qualCov = covariates[1]; + final Covariate cycleCov = covariates[2]; + final Covariate contextCov = covariates[3]; + + Assert.assertEquals(tables.numTables(), covariates.length); + + Assert.assertNotNull(tables.getReadGroupTable()); + Assert.assertEquals(tables.getReadGroupTable(), tables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal())); + testDimensions(tables.getReadGroupTable(), numReadGroups); + + Assert.assertNotNull(tables.getQualityScoreTable()); + Assert.assertEquals(tables.getQualityScoreTable(), tables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE.ordinal())); + testDimensions(tables.getQualityScoreTable(), numReadGroups, qualCov.maximumKeyValue() + 1); + + Assert.assertNotNull(tables.getTable(2)); + testDimensions(tables.getTable(2), numReadGroups, qualCov.maximumKeyValue() + 1, cycleCov.maximumKeyValue() + 1); + + Assert.assertNotNull(tables.getTable(3)); + testDimensions(tables.getTable(3), numReadGroups, qualCov.maximumKeyValue() + 1, contextCov.maximumKeyValue() + 1); + } + + private void testDimensions(final NestedIntegerArray table, final int ... dimensions) { + final int[] dim = new int[dimensions.length+1]; + System.arraycopy(dimensions, 0, dim, 0, dimensions.length); + dim[dimensions.length] = EventType.values().length; + Assert.assertEquals(table.getDimensions().length, dim.length); + + for ( int i = 0; i < dim.length; i++ ) { + Assert.assertEquals(table.getDimensions()[i], dim[i], "Table dimensions not expected at dim " + i); + } + } + + @Test + public void basicMakeQualityScoreTable() { + final Covariate qualCov = covariates[1]; + final NestedIntegerArray copy = tables.makeQualityScoreTable(); + testDimensions(copy, numReadGroups, qualCov.maximumKeyValue()+1); + Assert.assertEquals(copy.getAllValues().size(), 0); + } + + @Test + public void testCombine1() { + final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups); + fillTable(merged); + + merged.combine(tables); + + for ( int i = 0; i < tables.numTables(); i++ ) { + NestedIntegerArray table = tables.getTable(i); + NestedIntegerArray mergedTable = merged.getTable(i); + + Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size()); + for ( final NestedIntegerArray.Leaf leaf : table.getAllLeaves() ) { + final RecalDatum mergedValue = mergedTable.get(leaf.keys); + Assert.assertNotNull(mergedValue); + Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations() * 2); + Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches() * 2); + } + } + } + + @Test + public void testCombineEmptyOther() { + final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups); + + merged.combine(tables); + + for ( int i = 0; i < tables.numTables(); i++ ) { + NestedIntegerArray table = tables.getTable(i); + NestedIntegerArray mergedTable = merged.getTable(i); + + Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size()); + for ( final NestedIntegerArray.Leaf leaf : table.getAllLeaves() ) { + final RecalDatum mergedValue = mergedTable.get(leaf.keys); + Assert.assertNotNull(mergedValue); + Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations()); + Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches()); + } + } + } + + @Test + public void testCombinePartial() { + final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups); + for ( final int rg : combineStates) { + RecalUtils.incrementDatumOrPutIfNecessary(merged.getTable(3), qualByte, 1, rg, 0, 0, 0); + } + + merged.combine(tables); + for ( int i = 0; i < tables.numTables(); i++ ) { + NestedIntegerArray table = tables.getTable(i); + NestedIntegerArray mergedTable = merged.getTable(i); + + Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size()); + for ( final NestedIntegerArray.Leaf leaf : table.getAllLeaves() ) { + final RecalDatum mergedValue = mergedTable.get(leaf.keys); + Assert.assertNotNull(mergedValue); + + final int delta = i == 3 && leaf.keys[1] == 0 && leaf.keys[2] == 0 && leaf.keys[3] == 0 ? 1 : 0; + Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations() + delta); + Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches() + delta); + } + } + } +} diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTestUtils.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTestUtils.java new file mode 100644 index 000000000..ae92e4139 --- /dev/null +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RecalibrationTestUtils.java @@ -0,0 +1,48 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.recalibration; + +import org.broadinstitute.gatk.engine.recalibration.covariates.*; + +/** + * Created with IntelliJ IDEA. + * User: depristo + * Date: 12/23/12 + * Time: 1:06 PM + * To change this template use File | Settings | File Templates. + */ +public class RecalibrationTestUtils { + public static Covariate[] makeInitializedStandardCovariates() { + final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); + final Covariate[] covariates = new Covariate[4]; + covariates[0] = new ReadGroupCovariate(); + covariates[1] = new QualityScoreCovariate(); + covariates[2] = new ContextCovariate(); + covariates[3] = new CycleCovariate(); + for ( Covariate cov : covariates ) cov.initialize(RAC); + return covariates; + } +} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RepeatCovariatesUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RepeatCovariatesUnitTest.java similarity index 50% rename from protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RepeatCovariatesUnitTest.java rename to public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RepeatCovariatesUnitTest.java index 652bc1761..1ae101d97 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/engine/recalibration/RepeatCovariatesUnitTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/recalibration/RepeatCovariatesUnitTest.java @@ -1,52 +1,26 @@ /* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. * Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. * -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: * -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. * -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.gatk.engine.recalibration; From aecaa6d38e4115450ed15f73cf1895e116a4a9e8 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Mon, 9 Nov 2015 12:48:40 -0500 Subject: [PATCH 56/82] Allow GenotypeGVCFs to emit ref sites. --- .../walkers/genotyper/GenotypingEngine.java | 12 ++- .../genotyper/UnifiedGenotypingEngine.java | 3 +- .../afcalc/DiploidExactAFCalculator.java | 3 +- .../genotyper/afcalc/ExactAFCalculator.java | 15 ++- .../walkers/variantutils/GenotypeGVCFs.java | 97 ++++++++++--------- .../GenotypeGVCFsIntegrationTest.java | 17 ++-- .../SelectVariantsIntegrationTest.java | 4 +- .../variant/GATKVariantContextUtils.java | 2 +- 8 files changed, 87 insertions(+), 66 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java index 91c27cdd0..451b49ab5 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java @@ -244,8 +244,11 @@ public abstract class GenotypingEngine genotypeLikelihoods = getGLs(vc.getGenotypes(), true); + final ArrayList genotypeLikelihoods = getGLs(vc.getGenotypes(), true, vc.hasAllele(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)); final int numSamples = genotypeLikelihoods.size()-1; final int numChr = 2*numSamples; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalculator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalculator.java index 7089cbb77..9a888f934 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalculator.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalculator.java @@ -125,11 +125,22 @@ abstract class ExactAFCalculator extends AFCalculator { } /** - * Unpack GenotypesContext into arraylist of doubel values + * Unpack GenotypesContext into arraylist of double values * @param GLs Input genotype context * @return ArrayList of doubles corresponding to GL vectors */ protected static ArrayList getGLs(final GenotypesContext GLs, final boolean includeDummy) { + return getGLs(GLs, includeDummy, false); + } + + /** + * Unpack GenotypesContext into arraylist of double values + * @param GLs Input genotype context + * @param keepUninformative Don't filter out uninformative genotype likelihoods (i.e. all log likelihoods near 0) + * This is useful for VariantContexts with a NON_REF allele + * @return ArrayList of doubles corresponding to GL vectors + */ + protected static ArrayList getGLs(final GenotypesContext GLs, final boolean includeDummy, final boolean keepUninformative) { final ArrayList genotypeLikelihoods = new ArrayList<>(GLs.size() + 1); if ( includeDummy ) genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy @@ -137,7 +148,7 @@ abstract class ExactAFCalculator extends AFCalculator { if ( sample.hasLikelihoods() ) { final double[] gls = sample.getLikelihoods().getAsVector(); - if ( MathUtils.sum(gls) < GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) + if ( MathUtils.sum(gls) < GATKVariantContextUtils.SUM_GL_THRESH_NOCALL || keepUninformative ) genotypeLikelihoods.add(gls); } } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java index 681bb07c3..feb2fbb2e 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java @@ -68,6 +68,7 @@ import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AS_StandardAnnotation; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode; import org.broadinstitute.gatk.tools.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.gatk.tools.walkers.genotyper.UnifiedGenotypingEngine; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.GeneralPloidyFailOverAFCalculatorProvider; @@ -132,7 +133,6 @@ import java.util.*; @Reference(window=@Window(start=-10,stop=10)) @SuppressWarnings("unused") public class GenotypeGVCFs extends RodWalker implements AnnotatorCompatible, TreeReducible { - /** * The gVCF files to merge together */ @@ -212,15 +212,10 @@ public class GenotypeGVCFs extends RodWalker vcfRods = GATKVCFUtils.getVCFHeadersFromRods(toolkit, variants); - final GATKVariantContextUtils.GenotypeMergeType mergeType; - if(uniquifySamples) { - mergeType = GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY; - } - else - mergeType = GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE; - + final GATKVariantContextUtils.GenotypeMergeType mergeType = uniquifySamples ? + GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY : GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE; final SampleList samples = new IndexedSampleList(SampleUtils.getSampleList(vcfRods, mergeType)); - // create the annotation engine + annotationEngine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, Collections.emptyList(), this, toolkit); // create the genotyping engine @@ -255,15 +250,18 @@ public class GenotypeGVCFs extends RodWalker vcsAtThisLocus = tracker.getPrioritizedValue(variants, loc); + final Byte refBase = INCLUDE_NON_VARIANTS ? ref.getBase() : null; + final boolean removeNonRefSymbolicAllele = !INCLUDE_NON_VARIANTS; + final VariantContext combinedVC = ReferenceConfidenceVariantContextMerger.merge(vcsAtThisLocus, loc, + refBase, removeNonRefSymbolicAllele, uniquifySamples, annotationEngine); + return combinedVC == null ? null : regenotypeVC(tracker, ref, combinedVC); } /** @@ -275,65 +273,67 @@ public class GenotypeGVCFs extends RodWalker 0 ) { + result = genotypingEngine.calculateGenotypes(originalVC); + } + + if (result == null || (!isProperlyPolymorphic(result) && !INCLUDE_NON_VARIANTS)) { + return null; + } + + result = addGenotypingAnnotations(originalVC.getAttributes(), result); //At this point we should already have DP and AD annotated - VariantContext result = annotationEngine.finalizeAnnotations(rawResult, originalVC); + result = annotationEngine.finalizeAnnotations(result, originalVC); //do trimming after allele-specific annotation reduction or the mapping is difficult result = GATKVariantContextUtils.reverseTrimAlleles(result); - // if it turned monomorphic then we either need to ignore or fix such sites - boolean createRefGTs = false; - if ( result.isMonomorphicInSamples() ) { - if ( !INCLUDE_NON_VARIANTS ) - return null; - createRefGTs = true; - } // Re-annotate and fix/remove some of the original annotations. // Note that the order of these actions matters and is different for polymorphic and monomorphic sites. // For polymorphic sites we need to make sure e.g. the SB tag is sent to the annotation engine and then removed later. // For monomorphic sites we need to make sure e.g. the hom ref genotypes are created and only then are passed to the annotation engine. // We could theoretically make 2 passes to re-create the genotypes, but that gets extremely expensive with large sample sizes. - if ( createRefGTs ) { + if (result.isPolymorphicInSamples()) { + result = annotationEngine.annotateContext(tracker, ref, null, result); + result = new VariantContextBuilder(result).genotypes(cleanupGenotypeAnnotations(result, false)).make(); + } else if (INCLUDE_NON_VARIANTS) { result = new VariantContextBuilder(result).genotypes(cleanupGenotypeAnnotations(result, true)).make(); result = annotationEngine.annotateContext(tracker, ref, null, result); } else { - result = annotationEngine.annotateContext(tracker, ref, null, result); - result = new VariantContextBuilder(result).genotypes(cleanupGenotypeAnnotations(result, false)).make(); + return null; } - return result; } /** - * Determines whether the provided VariantContext has real alternate alleles + * Determines whether the provided VariantContext has real alternate alleles. + * + * There is a bit of a hack to handle the case because it is not defined in htsjdk.Allele + * We check for this as a biallelic symbolic allele. * * @param vc the VariantContext to evaluate * @return true if it has proper alternate alleles, false otherwise */ private boolean isProperlyPolymorphic(final VariantContext vc) { - return ( vc != null && - !vc.getAlternateAlleles().isEmpty() && - (!vc.isBiallelic() || - (!vc.getAlternateAllele(0).equals(Allele.SPAN_DEL) && - !vc.getAlternateAllele(0).equals(GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE_DEPRECATED)) - ) - ); + //obvious cases + if (vc == null || vc.getAlternateAlleles().isEmpty()) { + return false; + } else if (vc.isBiallelic()) { + return !(vc.getAlternateAllele(0).equals(Allele.SPAN_DEL) || + vc.getAlternateAllele(0).equals(GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE_DEPRECATED) || + vc.isSymbolic()); + } else { + return true; + } } /** @@ -435,6 +435,9 @@ public class GenotypeGVCFs extends RodWalker Date: Thu, 31 Mar 2016 22:46:42 -0400 Subject: [PATCH 57/82] Fixed bug in which consecutive SPAN_DELS were merged into a ** MNP.# --- .../tools/walkers/phasing/PhasingUtils.java | 4 ++-- .../ReadBackedPhasingIntegrationTest.java | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtils.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtils.java index 6d66dc015..8dbf65730 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtils.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtils.java @@ -297,8 +297,8 @@ class PhasingUtils { */ static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2) { // Can only merge "simple" base strings (i.e., SNPs or MNPs, but not indels): - final boolean vc1CanBeMerged = vc1.isSNP() || vc1.isMNP(); - final boolean vc2CanBeMerged = vc2.isSNP() || vc2.isMNP(); + final boolean vc1CanBeMerged = (vc1.isSNP() || vc1.isMNP()) && !vc1.hasAllele(Allele.SPAN_DEL); + final boolean vc2CanBeMerged = (vc2.isSNP() || vc2.isMNP()) && !vc2.hasAllele(Allele.SPAN_DEL); if (!vc1CanBeMerged || !vc2CanBeMerged) return false; diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasingIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasingIntegrationTest.java index 9dfc49913..7539b23e2 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasingIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasingIntegrationTest.java @@ -70,7 +70,6 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { " --no_cmdline_in_header"; } - @Test public void test1() { WalkerTestSpec spec = new WalkerTestSpec( @@ -170,4 +169,22 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { Arrays.asList("630816da701b9ea8674c23c91fa61bec")); executeTest("Merge SNPs if on the same read", spec); } + + @Test + public void testDontMergeSpanningDeletions() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T ReadBackedPhasing" + + " -R " + b37KGReferenceWithDecoy + + " -I " + privateTestDir + "phasing_test_with_span_del_1.bam" + + " -I " + privateTestDir + "phasing_test_with_span_del_2.bam" + + " -I " + privateTestDir + "phasing_test_with_span_del_3.bam" + + " -I " + privateTestDir + "phasing_test_with_span_del_4.bam" + + " --variant " + privateTestDir + "phasing_test_with_span_del.vcf" + + " -enableMergeToMNP" + + " -o %s" + + " --no_cmdline_in_header", + 1, + Arrays.asList("b334de5ad35665f0d65034197ac05b32")); + executeTest("Don't merge symbolic SPAN_DEL (*) alleles (into the nonexistent ** MNP).", spec); + } } From 58a471b0ed8a59ee49c19ccf0209278d23578334 Mon Sep 17 00:00:00 2001 From: droazen Date: Fri, 6 May 2016 12:46:27 -0400 Subject: [PATCH 58/82] Add "GATK4 Implications" section to GATK3 pull request template --- .github/PULL_REQUEST_TEMPLATE.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 5852348ec..75309b858 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -23,4 +23,8 @@ Never delete this, it is our record that procedure was followed. If you find tha - [ ] Suggest a reviewer or ask your team lead to suggest one - [ ] Final (thumbsup) from the reviewer(s) -Once everything is checked off, you can go ahead and merge the PR. Don't forget to also delete the branch. \ No newline at end of file +#### GATK4 Implications +- [ ] If your fix/change is applicable to GATK4 as well, and is reasonably small and self-contained (< ~50 lines or so), port the change to GATK4 and open a PR against https://github.com/broadinstitute/gatk or https://github.com/broadinstitute/gatk-protected as appropriate, or at least make a "best effort" attempt to do so. +- [ ] If your fix/change cannot yet be ported to GATK4 because the tool in question hasn't been ported yet, or has only been partially ported, or it would be difficult/burdensome to port the change, or you tried to port the change and failed, then add the ticket to our list of [GATK3 PRs to be eventually ported to GATK4](https://docs.google.com/document/d/1DjEHw57k5h0i8MZRGYPlQA3InvURKwQ7pCoi_Eigc4M/edit) + +Once everything is checked off, you can go ahead and merge the PR. Don't forget to also delete the branch. From 831360fd0b041958d72cd55e8cd9a1c42251fcff Mon Sep 17 00:00:00 2001 From: Ron Levine Date: Tue, 10 May 2016 10:47:43 -0400 Subject: [PATCH 59/82] Don't output program tag --- .../gatk/queue/qscripts/examples/ExamplePrintReads.scala | 4 ++++ .../gatk/queue/extensions/gatk/QueueFeaturesQueueTest.scala | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/public/gatk-queue-extensions-public/src/main/qscripts/org/broadinstitute/gatk/queue/qscripts/examples/ExamplePrintReads.scala b/public/gatk-queue-extensions-public/src/main/qscripts/org/broadinstitute/gatk/queue/qscripts/examples/ExamplePrintReads.scala index a629da59f..6603ddf8c 100644 --- a/public/gatk-queue-extensions-public/src/main/qscripts/org/broadinstitute/gatk/queue/qscripts/examples/ExamplePrintReads.scala +++ b/public/gatk-queue-extensions-public/src/main/qscripts/org/broadinstitute/gatk/queue/qscripts/examples/ExamplePrintReads.scala @@ -44,6 +44,9 @@ class ExamplePrintReads extends QScript { @Argument(doc="One or more genomic intervals over which to operate", shortName="L", required=false) var intervals: Seq[String] = Nil + @Argument(doc = "Don't output a program tag", shortName = "npt", required = false) + var noPGTag: Boolean = _ + def script() { val printReads = new PrintReads printReads.reference_sequence = referenceFile @@ -52,6 +55,7 @@ class ExamplePrintReads extends QScript { printReads.input_file :+= bamFile printReads.out = outFile printReads.intervalsString = intervals + printReads.no_pg_tag = noPGTag add(printReads) } } diff --git a/public/gatk-queue-extensions-public/src/test/scala/org/broadinstitute/gatk/queue/extensions/gatk/QueueFeaturesQueueTest.scala b/public/gatk-queue-extensions-public/src/test/scala/org/broadinstitute/gatk/queue/extensions/gatk/QueueFeaturesQueueTest.scala index f44e458c3..f352a3966 100644 --- a/public/gatk-queue-extensions-public/src/test/scala/org/broadinstitute/gatk/queue/extensions/gatk/QueueFeaturesQueueTest.scala +++ b/public/gatk-queue-extensions-public/src/test/scala/org/broadinstitute/gatk/queue/extensions/gatk/QueueFeaturesQueueTest.scala @@ -42,7 +42,8 @@ class QueueFeaturesQueueTest { " -S " + QueueTest.publicQScriptsPackageDir + "examples/ExamplePrintReads.scala", " -R " + BaseTest.publicTestDir + "exampleFASTA.fasta", " -I " + BaseTest.publicTestDir + "exampleBAM_with_unmapped.bam", - " -out " + testOut).mkString + " -out " + testOut, + " -npt ").mkString spec.fileMD5s += testOut -> "3134a6c732d7f235373095586bc7d470" QueueTest.executeTest(spec) @@ -55,7 +56,8 @@ class QueueFeaturesQueueTest { " -R " + BaseTest.publicTestDir + "exampleFASTA.fasta", " -I " + BaseTest.publicTestDir + "exampleBAM_with_unmapped.bam", " -L chr1", - " -out " + testOut2).mkString + " -out " + testOut2, + " -npt ").mkString spec2.fileMD5s += testOut2 -> "aa33e589879c4baf6a470d22da76d885" QueueTest.executeTest(spec2) } From 71a3447772fae79636e84233482c47d9fabfcef6 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Thu, 12 May 2016 15:26:04 -0400 Subject: [PATCH 61/82] Yf tumor only (#1298) * tumor only pipeline * a few new wrappers for picard tools --- .../queue/extensions/picard/FilterVcf.scala | 86 +++++++++++++++++++ .../extensions/picard/MakeSitesOnlyVcf.scala | 68 +++++++++++++++ 2 files changed, 154 insertions(+) create mode 100644 public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/picard/FilterVcf.scala create mode 100644 public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/picard/MakeSitesOnlyVcf.scala diff --git a/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/picard/FilterVcf.scala b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/picard/FilterVcf.scala new file mode 100644 index 000000000..971f67a28 --- /dev/null +++ b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/picard/FilterVcf.scala @@ -0,0 +1,86 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.queue.extensions.picard + +import java.io.File + +import htsjdk.samtools.ValidationStringency +import org.broadinstitute.gatk.queue.function.JavaCommandLineFunction +import org.broadinstitute.gatk.utils.commandline.{Argument, Input, Output} + +/** + * Reads a VCF/VCF.gz/BCF and and filters it quickly based on some simple hard filters + */ +class FilterVcf extends JavaCommandLineFunction { + analysisName = "FilterVcf" + javaMainClass = "picard.vcf.FilterVcf" + + @Input(doc = "The input VCF files to filter.", shortName = "input", fullName = "input_vcf_file", required = true) + var input: File = _ + + @Output(doc = "The output VCF which will have it's FILTER field updated", required = false) + var output: File = _ + + + @Argument(doc = "The minimum allele balance acceptable before filtering a site. Allele balance is calculated for heterozygotes as " + + "the number of bases supporting the least-represented allele over the total number of base observations. Different heterozygous " + + "genotypes at the same locus are measured independently. The locus is filtered if any allele balance is below the limit.", required = false) + var minAb: Option[Double] = _ + + @Argument(doc = "The minimum sequencing depth supporting a genotype before the genotype will be filtered out.", required = false) + var minDp: Option[Double] = _ + + @Argument(doc = "The minimum genotype quality that must be achieved for a sample otherwise the genotype will be filtered out.", required = false) + var minGQ: Option[Double] = _ + + @Argument(doc = "The maximum phred scaled fisher strand value before a site will be filtered out.", required = false) + var maxFs: Option[Double] = _ + + @Argument(doc = "The minimum QD value to accept or otherwise filter out the variant.", required = false) + var minQd: Option[Double] = _ + + var validationStringency = ValidationStringency.SILENT + var compressionLevel: Option[Int] = None + var createIndex: Option[Boolean] = None + var maxRecordsInRam: Option[Int] = None + var assumeSorted: Option[Boolean] = None + + override def commandLine = super.commandLine + + required("INPUT=", input, spaceSeparated = false) + + required("TMP_DIR=" + jobTempDir) + + optional("OUTPUT=", output, spaceSeparated = false) + + optional("MIN_AB=", minAb, spaceSeparated = false) + + optional("MIN_DP=", minDp, spaceSeparated = false) + + optional("MIN_GQ=", minGQ, spaceSeparated = false) + + optional("MIN_FS=", maxFs, spaceSeparated = false) + + optional("MIN_QD=", minQd, spaceSeparated = false) + + optional("COMPRESSION_LEVEL=", compressionLevel, spaceSeparated = false) + + optional("VALIDATION_STRINGENCY=", validationStringency, spaceSeparated = false) + + optional("MAX_RECORDS_IN_RAM=", maxRecordsInRam, spaceSeparated = false) + + optional("ASSUME_SORTED=", assumeSorted, spaceSeparated = false) + + optional("CREATE_INDEX=", createIndex, spaceSeparated = false) + +} diff --git a/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/picard/MakeSitesOnlyVcf.scala b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/picard/MakeSitesOnlyVcf.scala new file mode 100644 index 000000000..794bee0b9 --- /dev/null +++ b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/picard/MakeSitesOnlyVcf.scala @@ -0,0 +1,68 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.queue.extensions.picard + +import java.io.File + +import htsjdk.samtools.ValidationStringency +import org.broadinstitute.gatk.queue.function.JavaCommandLineFunction +import org.broadinstitute.gatk.utils.commandline.{Argument, Input, Output} + +/** + * "Reads a VCF/VCF.gz/BCF and removes all genotype information from it while retaining all site level information, + * including annotations based on genotypes (e.g. AN, AF). Output can be any support variant format including .vcf, .vcf.gz or .bcf." + */ +class MakeSitesOnlyVcf extends JavaCommandLineFunction { + analysisName = "MakeSitesOnlyVcf" + javaMainClass = "picard.vcf.MakeSitesOnlyVcf" + + @Input(doc = "The input VCF files to analyze.", shortName = "input", fullName = "input_vcf_file", required = true) + var input: File = _ + + @Output(doc = "The output VCF which will not have any genotypes, but will keep the INFO field intact.", required = false) + var output: File = _ + + @Argument(shortName = "S", doc = "Optionally one or more samples to retain when building the \'sites-only\' VCF.", required = false) + var samples: List[String] = _ + + var validationStringency = ValidationStringency.SILENT + var compressionLevel: Option[Int] = None + var createIndex: Option[Boolean] = None + var maxRecordsInRam: Option[Int] = None + var assumeSorted: Option[Boolean] = None + + override def commandLine = super.commandLine + + required("INPUT=", input, spaceSeparated = false) + + required("TMP_DIR=" + jobTempDir) + + optional("OUTPUT=", output, spaceSeparated = false) + + repeat("SAMPLE=", samples, spaceSeparated = false) + + optional("COMPRESSION_LEVEL=", compressionLevel, spaceSeparated = false) + + optional("VALIDATION_STRINGENCY=", validationStringency, spaceSeparated = false) + + optional("MAX_RECORDS_IN_RAM=", maxRecordsInRam, spaceSeparated = false) + + optional("ASSUME_SORTED=", assumeSorted, spaceSeparated = false) + + optional("CREATE_INDEX=", createIndex, spaceSeparated = false) + +} From 35a06879f1c8dd0fe795fe1858b11f17296b9551 Mon Sep 17 00:00:00 2001 From: Ron Levine Date: Tue, 26 Apr 2016 13:56:05 -0400 Subject: [PATCH 63/82] Move htsjdk and picard to version 2.3.0 --- .../genotyper/ConsensusAlleleCounter.java | 2 +- .../walkers/genotyper/GenotypingEngine.java | 6 +- .../walkers/genotyper/UnifiedGenotyper.java | 4 +- .../VariantAnnotatorIntegrationTest.java | 52 +++++++-------- .../cancer/m2/MuTect2IntegrationTest.java | 12 ++-- .../VariantFiltrationIntegrationTest.java | 2 +- .../UnifiedGenotyperEngineUnitTest.java | 2 +- ...perGeneralPloidySuite1IntegrationTest.java | 8 +-- ...perGeneralPloidySuite2IntegrationTest.java | 6 +- ...dGenotyperIndelCallingIntegrationTest.java | 22 +++---- .../UnifiedGenotyperIntegrationTest.java | 32 ++++----- ...GenotyperNormalCallingIntegrationTest.java | 16 ++--- ...lexAndSymbolicVariantsIntegrationTest.java | 10 +-- .../HaplotypeCallerGVCFIntegrationTest.java | 66 +++++++++---------- .../HaplotypeCallerIntegrationTest.java | 64 +++++++++--------- .../PhaseByTransmissionIntegrationTest.java | 2 +- .../ReadBackedPhasingIntegrationTest.java | 6 +- ...ntRecalibrationWalkersIntegrationTest.java | 6 +- ...lateGenotypePosteriorsIntegrationTest.java | 10 +-- .../CombineGVCFsIntegrationTest.java | 28 ++++---- .../CombineVariantsIntegrationTest.java | 2 +- .../GenotypeGVCFsIntegrationTest.java | 63 +++++++++--------- ...ftAlignAndTrimVariantsIntegrationTest.java | 10 +-- .../SelectVariantsIntegrationTest.java | 30 ++++----- ...SelectVariantsParallelIntegrationTest.java | 4 +- public/external-example/pom.xml | 2 +- .../engine/EngineFeaturesIntegrationTest.java | 8 +-- .../engine/arguments/CramIntegrationTest.java | 8 +-- .../arguments/IntervalIntegrationTest.java | 2 +- public/gatk-root/pom.xml | 10 +-- .../readutils/PrintReadsIntegrationTest.java | 2 +- .../variantutils/VCFIntegrationTest.java | 2 +- .../1.2-20140817/cofoja-1.2-20140817.pom | 4 +- 33 files changed, 251 insertions(+), 252 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/ConsensusAlleleCounter.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/ConsensusAlleleCounter.java index 4bfd6cc89..feee51aea 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/ConsensusAlleleCounter.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/ConsensusAlleleCounter.java @@ -278,7 +278,7 @@ public class ConsensusAlleleCounter { builder.noGenotypes(); if (doMultiAllelicCalls) { vcs.add(builder.make()); - if (vcs.size() >= GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED) + if (vcs.size() >= GenotypeLikelihoods.MAX_DIPLOID_ALT_ALLELES_THAT_CAN_BE_GENOTYPED) break; } else if (curCnt > maxAlleleCnt) { maxAlleleCnt = curCnt; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java index a5e5d8a87..83c7ed533 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java @@ -407,14 +407,14 @@ public abstract class GenotypingEngine, Unif UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, UAC.CONTAMINATION_FRACTION, sampleNameSet, logger)); // check for a bad max alleles value - if ( UAC.genotypeArgs.MAX_ALTERNATE_ALLELES > GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED) - throw new UserException.BadArgumentValue("max_alternate_alleles", "the maximum possible value is " + GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED); + if ( UAC.genotypeArgs.MAX_ALTERNATE_ALLELES > GenotypeLikelihoods.MAX_DIPLOID_ALT_ALLELES_THAT_CAN_BE_GENOTYPED) + throw new UserException.BadArgumentValue("max_alternate_alleles", "the maximum possible value is " + GenotypeLikelihoods.MAX_DIPLOID_ALT_ALLELES_THAT_CAN_BE_GENOTYPED); // warn the user for misusing EMIT_ALL_SITES if ( UAC.outputMode == OutputMode.EMIT_ALL_SITES && diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java index 94c3db651..0029f78a6 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -99,7 +99,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("832861ecdc6344cfb7097d02903ffd4d")); + Arrays.asList("b63baada372925a76c3f279e16eb631d")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -107,7 +107,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("d584454019f21729757ed7685ff8d02c")); + Arrays.asList("6f5856bc2d31f8aae4131717e5ab0b16")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -133,7 +133,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("95a4be70c46496fb6791fdd0c3bcf8a3")); + Arrays.asList("7ec5470f742f80cdfbfb203213bea8cc")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -141,7 +141,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("fe6a7fdd3f7c1c77b31c2676aeb0ed8d")); + Arrays.asList("afc47e4f253d0999961f26920be8e834")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } @@ -150,7 +150,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testExcludeAnnotations() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + STANDARD_ANNOTATIONS + "-XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("4995f77bedf7f476a15d46b5a8a392bd")); + Arrays.asList("a8a87e2a67436e14ed32ce9d355a3440")); executeTest("test exclude annotations", spec); } @@ -183,7 +183,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testOverwritingHeader() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1, - Arrays.asList("de96b1e62f414b107430d179a154534d")); + Arrays.asList("fc7958261af93681fde73c1fc6b578a0")); executeTest("test overwriting header", spec); } @@ -191,7 +191,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoReads() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1, - Arrays.asList("489a09a8531d9c8ef683ad8cc81db3e8")); + Arrays.asList("2d535a48ec1c66aa9c707f6d498fc81d")); executeTest("not passing it any reads", spec); } @@ -199,7 +199,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testDBTagWithDbsnp() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --dbsnp " + b36dbSNP129 + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1, - Arrays.asList("a7af6774ea1f7622d999cae1b7f8ea32")); + Arrays.asList("a2a2f9ce8e6f9c933ad46906719ce402")); executeTest("getting DB tag with dbSNP", spec); } @@ -207,7 +207,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testMultipleIdsWithDbsnp() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3withIDs.vcf -L " + privateTestDir + "vcfexample3withIDs.vcf", 1, - Arrays.asList("de8cfffe3b61b7c8832096a399e9d954")); + Arrays.asList("ed351765d63d92b1913784fa47b3d859")); executeTest("adding multiple IDs with dbSNP", spec); } @@ -215,7 +215,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testDBTagWithHapMap() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf" + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1, - Arrays.asList("25443af7099f7de184b8dcdfb659f62e")); + Arrays.asList("114d8300ec0fac613bb2e82f8951adc0")); executeTest("getting DB tag with HM3", spec); } @@ -223,7 +223,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testDBTagWithTwoComps() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf --comp:foo " + privateTestDir + "fakeHM3.vcf " + STANDARD_ANNOTATIONS + " --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1, - Arrays.asList("ea9b10d2b82a7846c01a017f6f3bb57e")); + Arrays.asList("af59185b6a03d4147d2755019dcc6bf9")); executeTest("getting DB tag with 2 comps", spec); } @@ -239,7 +239,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testUsingExpression() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf" + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3empty.vcf -E foo.AF -L " + privateTestDir + "vcfexample3empty.vcf", 1, - Arrays.asList("f26d1f849cceca0ab115737f8db670ae")); + Arrays.asList("7bcbd8ad8388f371d1a990fde67d3273")); executeTest("using expression", spec); } @@ -247,7 +247,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testUsingExpressionAlleleMisMatch() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --resourceAlleleConcordance --resource:foo " + privateTestDir + "targetAnnotations.vcf" + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3empty-mod.vcf -E foo.AF -L " + privateTestDir + "vcfexample3empty-mod.vcf", 1, - Arrays.asList("6f288c4b672ac3a22cb2385981f51d75")); + Arrays.asList("76f716569e33d88914e479b70e08ac88")); executeTest("using expression allele mismatch", spec); } @@ -255,7 +255,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testUsingExpressionMultiAllele() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations-multiAllele.vcf" + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3empty-multiAllele.vcf -E foo.AF -E foo.AC -L " + privateTestDir + "vcfexample3empty-multiAllele.vcf", 1, - Arrays.asList("af92a439f092f45da10adac0f9c8fc8f")); + Arrays.asList("0e3fc86349f5fd28159d00d22d278e84")); executeTest("using expression with multi-alleles", spec); } @@ -264,7 +264,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { /* The order of filters in the output seems platform-dependent. May need to change htsjdk to make the order consistent across platforms. [Sato] */ WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --resource:foo " + privateTestDir + "annotationResourceWithFilter.vcf" + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3empty-multiAllele.vcf -E foo.FILTER -L " + privateTestDir + "vcfexample3empty-multiAllele.vcf", 1, - Arrays.asList("d0a381375a407dd454637f78bb5b194f")); + Arrays.asList("6fe67e72232165a829fde7c3b12c2275")); executeTest("annotate a vcf with the FILTER field of another vcf", spec); } @@ -272,7 +272,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testUsingExpressionWithID() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf" + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3empty.vcf -E foo.ID -L " + privateTestDir + "vcfexample3empty.vcf", 1, - Arrays.asList("58a86fe8a34c92127eb33e36107941dd")); + Arrays.asList("94b03ee63604ab8d61aacfd3297c5dca")); executeTest("using expression with ID", spec); } @@ -300,7 +300,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation + "snpEff2.0.5.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429", 1, - Arrays.asList("6618f3ae9dc6d4ce6ebd4eb8f9495103") + Arrays.asList("db0c5f273583a54d0cefc4b3c01aae9a") ); executeTest("Testing SnpEff annotations", spec); } @@ -313,7 +313,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { "--snpEffFile " + privateTestDir + "snpEff_unsupported_version_gatk_mode.vcf " + "-L 1:10001292-10012424", 1, - Arrays.asList("7533645a3791ce30d7407f789e1ffbb0") + Arrays.asList("5f952cfcd25653edcffd3916d52e94ec") ); executeTest("Testing SnpEff annotations (unsupported version, GATK mode)", spec); } @@ -326,14 +326,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { "--snpEffFile " + privateTestDir + "snpEff_unsupported_version_no_gatk_mode.vcf " + "-L 1:10001292-10012424", 1, - Arrays.asList("0e201a91a2b2b130debcd5dd7d9328ab") + Arrays.asList("86fb28c2e3886eda194026b3b7d07c77") ); executeTest("Testing SnpEff annotations (unsupported version, no GATK mode)", spec); } @Test(enabled = true) public void testTDTAnnotation() { - final String MD5 = "9532ca341b52be650b35e32d7c765030"; + final String MD5 = "cf65fdfbcd822279e84326989b2f6378"; WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf" + " -L " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + privateTestDir + "ug.random50000.family.ped -o %s", 1, @@ -344,7 +344,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test(enabled = true) public void testChromosomeCountsPed() { - final String MD5 = "4ab0b4245ba2c5c62424775879f51379"; + final String MD5 = "71045f20e4cd9f5fdaa367f6d7324e59"; WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + b37KGReference + " -A ChromosomeCounts --variant:vcf " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf" + " -L " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + privateTestDir + "ug.random50000.family.ped -o %s", 1, @@ -354,7 +354,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test(enabled = true) public void testInbreedingCoeffPed() { - final String MD5 = "5cbf01dc895bff5a8dd9f6c46e0958c6"; + final String MD5 = "0cf7c115316950abc0213935b20a653a"; WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + b37KGReference + " -A InbreedingCoeff --variant:vcf " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf" + " -L " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + privateTestDir + "ug.random50000.family.ped -o %s", 1, @@ -364,7 +364,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test(enabled = true) public void testAlleleTrimming() { - final String MD5 = "90f9ee6c34c0820435dce7a0d63b4c1e"; + final String MD5 = "5db0cd72ad0ffc711afb95df58de31fa"; WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + b37KGReference + " -A InbreedingCoeff --variant:vcf " + privateTestDir + "alleleTrim.vcf.gz" + " -L 1:26608870-26608875 -no_cmdline_in_header --resource:exac " + privateTestDir + "exacAlleleTrim.vcf.gz -E exac.AC_Adj" + @@ -423,7 +423,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testStrandAlleleCountsBySample() { - final String MD5 = "dca6c2b416076ca89769a322cae65bb8"; + final String MD5 = "564aeeefad92353d66dbb2a2222d5108"; final WalkerTestSpec spec = new WalkerTestSpec( "-T HaplotypeCaller --disableDithering " + String.format("-R %s -I %s ", REF, CEUTRIO_BAM) + @@ -510,7 +510,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + hg19ReferenceWithChrPrefixInChromosomeNames + " -A HomopolymerRun --variant:vcf " + privateTestDir + "problem_del.vcf " + "-U ALLOW_SEQ_DICT_INCOMPATIBILITY -L chr18:44382010-44384010 --reference_window_stop 59 --no_cmdline_in_header -o %s", 1, - Arrays.asList("bda55495578147b2390d850d7fb25a12")); + Arrays.asList("f3166721ef0380636590b3e860aa06af")); executeTest("Testing testHomopolymerRunWindow", spec); } @@ -519,7 +519,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + hg19ReferenceWithChrPrefixInChromosomeNames + " -A HomopolymerRun --variant:vcf " + privateTestDir + "problem_del.vcf " + "-U ALLOW_SEQ_DICT_INCOMPATIBILITY -L chr18:44382010-44384010 --no_cmdline_in_header -o %s", 1, - Arrays.asList("e20b12fd45f37a7bb31a2f2e91983477")); + Arrays.asList("3dba997d03779781c82a25ace69c838e")); executeTest("Testing HomopolymerRunTooBig", spec); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java index 8a7c54d35..bfd0e4911 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2IntegrationTest.java @@ -121,7 +121,7 @@ public class MuTect2IntegrationTest extends WalkerTest { @Test public void testMicroRegression() { - M2Test(CCLE_MICRO_TUMOR_BAM, CCLE_MICRO_NORMAL_BAM, CCLE_MICRO_INTERVALS_FILE, "", "617054c6d056cad7448a463cb8d04a55"); + M2Test(CCLE_MICRO_TUMOR_BAM, CCLE_MICRO_NORMAL_BAM, CCLE_MICRO_INTERVALS_FILE, "", "a7658ccfb75bf1ce8d3d3cfbf3b552f0"); } /** @@ -131,7 +131,7 @@ public class MuTect2IntegrationTest extends WalkerTest { */ @Test public void testTruePositivesDream3() { - M2Test(DREAM3_TUMOR_BAM, DREAM3_NORMAL_BAM, DREAM3_TP_INTERVALS_FILE, "", "f856432679e43445d2939772be4326cf"); + M2Test(DREAM3_TUMOR_BAM, DREAM3_NORMAL_BAM, DREAM3_TP_INTERVALS_FILE, "", "91dee82a13275e5568f5d2e680e3162b"); } /** @@ -140,7 +140,7 @@ public class MuTect2IntegrationTest extends WalkerTest { @Test public void testTruePositivesDream3TrackedDropped() { M2TestWithDroppedReads(DREAM3_TUMOR_BAM, DREAM3_NORMAL_BAM, "21:10935369", "", - "ee4786de29532ffd745048c449a9772a", + "4f1337df1de5dd4468e2d389403ca785", "b536e76870326b4be01b8d6b83c1cf1c"); } @@ -150,7 +150,7 @@ public class MuTect2IntegrationTest extends WalkerTest { */ @Test public void testFalsePositivesDream3() { - M2Test(DREAM3_TUMOR_BAM, DREAM3_NORMAL_BAM, DREAM3_FP_INTERVALS_FILE, "", "11357aa543e7c6b2725cd330adba23a0"); + M2Test(DREAM3_TUMOR_BAM, DREAM3_NORMAL_BAM, DREAM3_FP_INTERVALS_FILE, "", "6be3fc318e2c22a28098f58b76c9a5a1"); } /** @@ -158,7 +158,7 @@ public class MuTect2IntegrationTest extends WalkerTest { */ @Test public void testContaminationCorrection() { - M2Test(CCLE_MICRO_TUMOR_BAM, CCLE_MICRO_NORMAL_BAM, CCLE_MICRO_INTERVALS_FILE, "-contamination 0.1", "1df41f2dd6d4715ae1b423bf295ec7c5"); + M2Test(CCLE_MICRO_TUMOR_BAM, CCLE_MICRO_NORMAL_BAM, CCLE_MICRO_INTERVALS_FILE, "-contamination 0.1", "b1010a6614b0332c41fd6da9d5f6b14e"); } /** @@ -166,7 +166,7 @@ public class MuTect2IntegrationTest extends WalkerTest { */ @Test public void testTumorOnly(){ - m2TumorOnlyTest(CCLE_MICRO_TUMOR_BAM, "2:166000000-167000000", "", "8439d9a673b3a57aa5893af600125d3b"); + m2TumorOnlyTest(CCLE_MICRO_TUMOR_BAM, "2:166000000-167000000", "", "bb0cddfdc29500fbea68a0913d6706a3"); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationIntegrationTest.java index e594dce5b..61d92f0bb 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationIntegrationTest.java @@ -189,7 +189,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference + " --filterExpression 'FS > 60.0' --filterName SNP_FS -V " + privateTestDir + "unfilteredForFiltering.vcf", 1, - Arrays.asList("0febd66699fcd7f521377d1d0d0016fb")); + Arrays.asList("b9fa012770831c984101d23420ef0c38")); executeTest("testUnfilteredBecomesFilteredAndPass", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperEngineUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperEngineUnitTest.java index 9d0984c81..ff8d7f99a 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperEngineUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperEngineUnitTest.java @@ -133,7 +133,7 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest { } final VariantContext vc = new VariantContextBuilder("test", "chr1", 1000, 1000, alleles).make(); final boolean result = ugEngine.hasTooManyAlternativeAlleles(vc); - Assert.assertTrue(result == (vc.getNAlleles() > GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED)); + Assert.assertTrue(result == (vc.getNAlleles() > GenotypeLikelihoods.MAX_DIPLOID_ALT_ALLELES_THAT_CAN_BE_GENOTYPED)); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index 0f7f9fb67..6e21e96d1 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -69,17 +69,17 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testSNP_ACS_Pools() { - executor.PC_LSV_Test_short("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES", "LSV_SNP_ACS", "SNP", "bf6012b6e7dec2d44b2bcb402c98c95e"); + executor.PC_LSV_Test_short("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES", "LSV_SNP_ACS", "SNP", "ebdf749d404aaef298780a53059a4f93"); } @Test(enabled = true) public void testBOTH_GGA_Pools() { - executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "1b834608171f67e52a4e7617458a3ba6"); + executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "c3826794a250e32b0497353ceb1deb26"); } @Test(enabled = true) public void testINDEL_GGA_Pools() { - executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "5b9e08bb141c48f826dc513066cb8a13"); + executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "4eb0d8018da6612cd434491f338ed5a4"); } @Test(enabled = true) @@ -88,6 +88,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe //TODO the old MD5 is kept for the record. //TODO this should be revisit once we get into addressing inaccuracies by the independent allele approach. // executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "b5ff7530827f4b9039a58bdc8a3560d2"); - executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "988d421354869ec3b17f90bad695757a"); + executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "c2fb9b05027c2b0ac9e338d9ddda69b1"); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index e683b34b7..51d7a5a65 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -63,16 +63,16 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","1ad2b57af06e90679ce6476900d9cbbe"); + executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","e22846de4567f576e08e00edda2931d0"); } @Test(enabled = true) public void testMT_SNP_DISCOVERY_sp4() { - executor.PC_MT_Test(CEUTRIO_BAM, "-A AlleleCountBySample -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","b9f5d1db0d2f5eb00eeb72ea29130de6"); + executor.PC_MT_Test(CEUTRIO_BAM, "-A AlleleCountBySample -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","9757563c5e32f59cb47eb9b16f1016ac"); } @Test(enabled = true) public void testMT_SNP_GGA_sp10() { - executor.PC_MT_Test(CEUTRIO_BAM, String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "d90e41081b4a910a50116ff18c311245"); + executor.PC_MT_Test(CEUTRIO_BAM, String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "37f8ccc683dc525c25dddc4f8dad505c"); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index 79385a81f..9e524e347 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -78,7 +78,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("8893a0ef99744757333ec6a85c31b753")); + Arrays.asList("32bece91e170d623092817738faddb4e")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -92,7 +92,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("7fca7fe87afba84c1bece29ce7a402ef")); + Arrays.asList("897c6063236fcd7242c2ff5982585648")); executeTest(String.format("test indel caller in SLX with low min allele count"), spec); } @@ -105,7 +105,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("343307f432d2026e3c4cc44f64aad06f")); + Arrays.asList("dd66e5f8a6e43be0e473251185a4f38a")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -115,7 +115,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("618d6cdd823c9f88c03b58fc7ebf47b4")); + Arrays.asList("aa56ed44e77162efce45c936c485769e")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -125,7 +125,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("889e43aa0d2c9ce07064817268c965a4")); + Arrays.asList("a4b6434c59c4b119e480ddafc86de234")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -140,7 +140,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, - Arrays.asList("8052ee2044f3c5687859aadee2fe2a75")); + Arrays.asList("f9d848fe5e6e6762e0dd5b5d925f74f4")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -150,7 +150,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + privateTestDir + vcf + " -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1, - Arrays.asList("6e8319e65fef1059c2092c05e6916257")); + Arrays.asList("41aa49e9c15198a006a1bc8e9638d6ec")); executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec); } @@ -162,7 +162,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 20:10,000,000-10,100,000", 1, - Arrays.asList("745eb3eefa93aca72f724aab4734c7ef")); + Arrays.asList("e883a8863bdd44c559c4440183c87078")); executeTest(String.format("test UG with base indel quality scores"), spec); } @@ -181,7 +181,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("8861c367dd20505e120af174a9d4f9f4")); + Arrays.asList("2a82d1586b2148e8d902da5cf8538210")); executeTest("test minIndelFraction 0.0", spec); } @@ -189,7 +189,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction25() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.25", 1, - Arrays.asList("a3bb6d95764d6f5a7d2f78e09699d4ef")); + Arrays.asList("3184a3f58b3aeafcd97280af708a04bb")); executeTest("test minIndelFraction 0.25", spec); } @@ -197,7 +197,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction100() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 1", 1, - Arrays.asList("c7f190f7cea34a7f9c931cd8de110a48")); + Arrays.asList("990b838ef13b8ccf257eb6cbcc7c7741")); executeTest("test minIndelFraction 1.0", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 822be96b5..7d45eddb5 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinBaseQualityScore() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1, - Arrays.asList("b4201a4d30b6ed5c6fc80248676be5ff")); + Arrays.asList("52a3064863b97e43d8df878edc29275c")); executeTest("test min_base_quality_score 26", spec); } @@ -94,7 +94,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSLOD() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("06d166ecf40563c234902d1b340a3ff1")); + Arrays.asList("31790ea2bd2fa7e5cec78e3ffbc98c81")); executeTest("test SLOD", spec); } @@ -102,7 +102,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testNDA() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("0538b7363d271f1f37d73dc3bd5b0071")); + Arrays.asList("2f2d7dd623446fc3cae62a44a016c16d")); executeTest("test NDA", spec); } @@ -110,7 +110,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testCompTrack() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("f54dafe8e9ac1402d230e4d83766d6cf")); + Arrays.asList("7645a06b917efb1ee799bf21bdf08bc4")); executeTest("test using comp track", spec); } @@ -124,17 +124,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOutputParameterSitesOnly() { - testOutputParameters("-sites_only", "dfd11f2f33dc4da50a2725bfb6f64dba"); + testOutputParameters("-sites_only", "3c0e109190cfbe41d24e7726cc8fe6e3"); } @Test public void testOutputParameterAllConfident() { - testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "a9c30d2bb179ea7a69dd0e7367fcd465"); + testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "f6937cc8ec068f2d38b5d277a92be34b"); } @Test public void testOutputParameterAllSites() { - testOutputParameters("--output_mode EMIT_ALL_SITES", "791fbabf4a8a7a1d0735d57c4c6b4a5a"); + testOutputParameters("--output_mode EMIT_ALL_SITES", "1cddd7b1e730765c2b7b55d8a1d69b4c"); } private void testOutputParameters(final String args, final String md5) { @@ -148,7 +148,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("38a2f8e5542f78c324e89d09eb545f07")); + Arrays.asList("5c7d237e666439edb0ef8c697e37933c")); executeTest("test confidence 1", spec1); } @@ -156,7 +156,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testNoPrior() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -inputPrior 0.33333 -inputPrior 0.33333", 1, - Arrays.asList("6f96d079f5889d8555880e6cb614a41d")); + Arrays.asList("24b550bbc3c9f0577e069b3fd3122d52")); executeTest("test no prior 1", spec1); } @@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testUserPrior() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -inputPrior 0.001 -inputPrior 0.495", 1, - Arrays.asList("6e48c44b58960954e7e49810d9e3b3ad")); + Arrays.asList("f60b6705daec1059ce3e533bf8e44c89")); executeTest("test user prior 1", spec1); } @@ -174,7 +174,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void emitPLsAtAllSites() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --output_mode EMIT_ALL_SITES -allSitePLs", 1, - Arrays.asList("04630efda71e0e804cfa4b2e7461c083")); + Arrays.asList("ae778a64323abe0da5194f0b936f48aa")); // GDA: TODO: BCF encoder/decoder doesn't seem to support non-standard values in genotype fields. IE even if there is a field defined in FORMAT and in the header the BCF2 encoder will still fail spec1.disableShadowBCF(); @@ -190,12 +190,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity1() { - testHeterozosity( 0.01, "3e6328e822a92911e591733438fe023c" ); + testHeterozosity( 0.01, "6b8bdde9d303139806c5177fae53b1fd" ); } @Test public void testHeterozyosity2() { - testHeterozosity( 1.0 / 1850, "e87e2ad4a6d86ba9cc627b397af9c681" ); + testHeterozosity( 1.0 / 1850, "b1604d1ba68dfe2fcfb861ef6420a8ba" ); } private void testHeterozosity(final double arg, final String md5) { @@ -274,7 +274,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("9363de1caf569d565e0c377fe234f170")); + Arrays.asList("7ed55f70feeacf8ecc6b36f0d741dfc7")); executeTest(String.format("test multiple technologies"), spec); } @@ -293,7 +293,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("5e9d48c296d50d553ae13f283b75e1ee")); + Arrays.asList("90224ac1c9e2ce9b77fee8dd6e044efe")); executeTest(String.format("test calling with BAQ"), spec); } @@ -310,7 +310,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000 " + "-A SnpEff", 1, - Arrays.asList("52bac28a85b85d9596d668d3f934f792")); + Arrays.asList("e99f100fe71bb7f328b485204c16f14a")); executeTest("testSnpEffAnnotationRequestedWithoutRodBinding", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index 39bd498f2..bdd3eb5c0 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -70,7 +70,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("46dd44e69ff078a48057ff08d278a293")); + Arrays.asList("c759b04ed0d948bda95008e29f3f5c2d")); executeTest("test MultiSample Pilot1", spec); } @@ -78,7 +78,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testWithAllelesPassedIn1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("a19e5f74606cd980c3083d41d23b4acb")); + Arrays.asList("c35cadefda8a9f13c497f84193f9f841")); executeTest("test MultiSample Pilot2 with alleles passed in", spec1); } @@ -86,7 +86,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testWithAllelesPassedIn2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("fe3aec195ceb9180b6ffc474a4bb37b3")); + Arrays.asList("06b4ce476bf444305c8d76a765c5ddb6")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -94,7 +94,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("4cc795e0e2ec1ce31fd6545fb24a850e")); + Arrays.asList("281db46f39e3367f207838c620a82bd2")); executeTest("test SingleSample Pilot2", spec); } @@ -102,7 +102,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("0de8363482204fe2aedbe612782d1049")); + Arrays.asList("94ca1e00d4fad9c5279271c2779ff797")); executeTest("test Multiple SNP alleles", spec); } @@ -110,7 +110,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testBadRead() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1, - Arrays.asList("67dbab7d307d02b3d879eca8bd15a573")); + Arrays.asList("1d5c55b2df63eb24832de3486c020453")); executeTest("test bad read", spec); } @@ -118,7 +118,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("e86ff5eb488f29aa77606309b2fd4fcb")); + Arrays.asList("87dbae957b1df41c1938906e23a88c5e")); executeTest("test reverse trim", spec); } @@ -126,7 +126,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMismatchedPLs() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, - Arrays.asList("d8b205bce4addb4ceff0b5a7ec36e3fe")); + Arrays.asList("c5aff2572ce09c413e7f5c9e1b3f92d6")); executeTest("test mismatched PLs", spec); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 9d09dd253..0a62bcdcb 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -72,7 +72,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "39a1dfa5143e04f75ecd2b24f9c69578"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "32ce23b3830f5f2c693161b40de8b15e"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -84,7 +84,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa // TODO -- need a better symbolic allele test @Test public void testHaplotypeCallerSingleSampleSymbolic() { - HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "8a83ab27177f0b7adf50031f061f9cd7"); + HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "e158212aba1d7d229563db11b08b7974"); } private void HCTestComplexGGA(String bam, String args, String md5) { @@ -96,13 +96,13 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "af3a490e5f41e890c59927426ac0fe9a"); + "8f8680bd8e1549ad88691c9c8af9977c"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "12b1826ee23243c64a002c3cbdfa569a"); + "82b53501bc3254def885e09866377e7c"); } private void HCTestComplexConsensusMode(String bam, String args, String md5) { @@ -114,7 +114,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleConsensusModeComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337", - "9765065daf3a008bd92c755d882c5f07"); + "353f1895047b15b1fec22b559c9da0c1"); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index 02699eb6a..4ff188851 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -84,12 +84,12 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { //TODO this might need to be addressed at some point. //TODO the following test is commented out for the record //tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "7f09c261950bf86e435edfa69ed2ec71"}); - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "1c1ca2d76bc5d7dd45a5e7aef756ad8f"}); - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7ddfd33f2efcd2617d896826434fb43c"}); - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "30accd7a171e9a13969fa543fde3fed1"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "20d896bbf750739f937ccd2fb152d902"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "99dd66a5b36ba9e2d1b0f408ecfbb50b"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "72d90fe0f5641b373744f75a21d4d14c"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "8d30370465d74fd549d76dd31adc4c0c"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "b7a5f4e40d5ebaf5f6c46a3d4355c817"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "0f5e6f2584649a1b7386d94e3dc60f91"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "2e81881e92061ad4eb29025ffdc129c7"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "2c67bdc08c8784f2114c2039270b9766"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "63fa5841a21e2c13f1e1a8e2d4ea3380"}); return tests.toArray(new Object[][]{}); } @@ -103,13 +103,13 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals; // this functionality can be adapted to provide input data for whatever you might want in your data - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "9b2914cf121c411aad3a65cfdc98c1d4"}); - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "259110565155a3a27ab3e98113bedef8"}); - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "8058ce63339701d33425769217bffed1"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "3abdde9d9ddbe9ec7cef28c65844a409"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "f5f0cef38a529e1542a8fd8bcce7ab1e"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "3ae2c7e570855f6d6ca58ddd1089a970"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "22e03f01e91177011ac028d2347751ba"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "cb3f16bc10e1cc75f2093bec92145d18"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "63ff771eed3e62340c8938b4963d0add"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "1122a0b3849f42d1c4a654f93b660e1b"}); - final String NA12878bandedResolutionMD5 = "0f848ee7240ca48827bdfb85b455c1ad"; + final String NA12878bandedResolutionMD5 = "8d4a51af32cd13ba4b3e33dd00c58398"; tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5}); tests.add(new Object[]{NA12878_WEx + " -I " + privateTestDir + "NA20313.highCoverageRegion.bam -sn NA12878", ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5}); @@ -126,12 +126,12 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals; // this functionality can be adapted to provide input data for whatever you might want in your data - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "1f99396292974df55ef6497be79b1917"}); - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "eabb44785fd4a84ade3c674a0bda16d9"}); - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "904feadac0e4a2a0c6b2cc7e55718f3b"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "b430a401310b3812346d7496f9c62011"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "6e481747c50734b7fb0c4de39405044f"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "cff5e49456d5ecc0dafd31cd014def4f"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "0c87e26fdd7ab5629eb33f36833e3607"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "90b25f3050435c9e67aa0ee325c24167"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "5f329540dc5c4556ab029d0e2cfcabcb"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "6ad7855dbf6dda2060aa93a3ee010b3e"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "a0be095ed902a8acdb80fb56ca4e8fb4"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "8123d8b68b6fa77ef084f292e191622a"}); return tests.toArray(new Object[][]{}); } @@ -144,12 +144,12 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals; // this functionality can be adapted to provide input data for whatever you might want in your data - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "50156c819a0096fa22ed1b9749affecc"}); - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "d91aace2100dc040659d77f366053a2e"}); - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "d86dc51757e69aa3f2608fbbfaa90069"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "27c73b8b1ec384a880bf60daf0b0f80e"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "a6c70203e43d62b42c4b751fe9018410"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "7ebbbcab78d090d70a24819093812748"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "0820ae1d19ba0a2da25737ded8e2c96f"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "70ee4e60d9f86b63aaab09075a71ddd3"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "700d79df3b0b481444e81471204e242e"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "09d1ae38586465b98dea0a0e432a7146"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "228e1d2ec2e729a5f79c37f3f2557708"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "2fc7020457dde4439b4133c098d9ab9b"}); return tests.toArray(new Object[][]{}); } @@ -275,7 +275,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testWrongGVCFNonVariantRecordOrderBugFix() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("1c3d390b467a9b0e1e307419796142fd")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("6facd3d2cf9f52877182d627cef1c872")); spec.disableShadowBCF(); executeTest("testMissingGVCFIndexingStrategyException", spec); } @@ -292,7 +292,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testNoCallGVCFMissingPLsBugFix() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("c5a017f1cbd60219506be76f30fc4468")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("d55ccf214fd5095e6d586c1547cb1a7a")); spec.disableShadowBCF(); executeTest("testNoCallGVCFMissingPLsBugFix", spec); } @@ -325,7 +325,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testAlleleSpecificAnnotations() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -G Standard -G AS_Standard --disableDithering", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "20:10433000-10437000", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("169c2145c9981b8a1bb1d64a6d776d66")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("6f6b2fa85cd1bae7f8f72e144fe56c96")); spec.disableShadowBCF(); executeTest(" testAlleleSpecificAnnotations", spec); } @@ -334,7 +334,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testASMQMateRankSumAnnotation() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -A AS_MQMateRankSumTest --disableDithering", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "20:10433000-10437000", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("75b78770469c5aaa73f1c95db8fda574")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9613ec1ec93547cfb0651673e914bee4")); spec.disableShadowBCF(); executeTest(" testASMQMateRankSumAnnotation", spec); } @@ -343,7 +343,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testASInsertSizeRankSum() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -G Standard -G AS_Standard --disableDithering -A AS_InsertSizeRankSum", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "20:10433000-10437000", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4fd0fcf44b1121c15e9d38de4171002c")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("a8765c11b9130c815aae4e06c1f90e45")); spec.disableShadowBCF(); executeTest(" testASInsertSizeRankSum", spec); } @@ -352,7 +352,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testHaplotypeCallerMultiAllelicNonRef() { final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -A StrandAlleleCountsBySample", b37KGReference, privateTestDir + "multiallelic-nonref.bam", "2:47641259-47641859", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("2bd863f0b54b9c9a5014097cd3d3f61a")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("87f9203f5ac637080469052b702c61c7")); spec.disableShadowBCF(); executeTest(" testHaplotypeCallerMultiAllelicNonRef", spec); } @@ -361,7 +361,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testHaplotypeCallerMaxNumPLValues() { final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 4 -maxNumPLValues 70", b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("7ea93210277fa4b590790a81c4b3994b")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("a4b5c40b1993573c5efd992f3f0db8a9")); spec.disableShadowBCF(); executeTest("testHaplotypeCallerMaxNumPLValues", spec); } @@ -378,7 +378,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 4 -maxNumPLValues 30 -log %s", b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER, logFileName); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9d69cb9dc67e0d0ee9863767428e6841")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("19f5398e4013c06b52c0085fe0b3469e")); spec.disableShadowBCF(); executeTest("testHaplotypeCallerMaxNumPLValuesExceededWithWarnLogLevel", spec); // Make sure the "Maximum allowed number of PLs exceeded" messages are in the log @@ -403,7 +403,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 4 -maxNumPLValues 30 -log %s", b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER, logFileName); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9d69cb9dc67e0d0ee9863767428e6841")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("19f5398e4013c06b52c0085fe0b3469e")); spec.disableShadowBCF(); executeTest("testHaplotypeCallerMaxNumPLValuesExceededWithDebugLogLevel", spec); // Make sure the "Maximum allowed number of PLs exceeded" messages are in the log diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index e02ea56b6..19d4a675b 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -106,92 +106,92 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeBAMOutFlags() throws IOException { - HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "1ad8934dc0ea624ffeb89d3e877176b2", "6a81bbefa6c4ed7a6b8d2c3e0e5a4756"); + HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "08943fb76d1cd5b5b8815e3991754911", "6a81bbefa6c4ed7a6b8d2c3e0e5a4756"); } @Test public void testHaplotypeCallerMultiSample() throws IOException { - HCTest(CEUTRIO_BAM, "", "5d9b06d7bf88bb18aa3e8fa6322dff89"); + HCTest(CEUTRIO_BAM, "", "ad472fbd63864caacf5bc018dcae9df9"); } @Test public void testHaplotypeCallerSingleSample() throws IOException { - HCTest(NA12878_BAM, "", "b35ba8b7ec8ddfe8268b82e77709c9ca"); + HCTest(NA12878_BAM, "", "c04293cb8466a1a217bce4ef419bdabe"); } @Test public void testHaplotypeCallerMultiSampleHaploid() throws IOException { - HCTest(CEUTRIO_BAM, "-ploidy 1", "84bd5a2ac22aa47b4436ecaf656b73b1"); + HCTest(CEUTRIO_BAM, "-ploidy 1", "7ee30877f0153257afdc691c638e7684"); } @Test public void testHaplotypeCallerSingleSampleHaploid() throws IOException { - HCTest(NA12878_BAM, "-ploidy 1", "6e9855e22ed78a60eaaf0a06ed967b77"); + HCTest(NA12878_BAM, "-ploidy 1", "9ca97bb743a369a1abb1e61168d63d69"); } @Test public void testHaplotypeCallerSingleSampleTetraploid() throws IOException { - HCTest(NA12878_BAM, "-ploidy 4", "894b56fa3afd35e240a7db394da7b4ef"); + HCTest(NA12878_BAM, "-ploidy 4", "5098645e8b570bc4521570654fa91806"); } @Test public void testHaplotypeCallerMinBaseQuality() throws IOException { - HCTest(NA12878_BAM, "-mbq 15", "b35ba8b7ec8ddfe8268b82e77709c9ca"); + HCTest(NA12878_BAM, "-mbq 15", "c04293cb8466a1a217bce4ef419bdabe"); } @Test public void testHaplotypeCallerMinBaseQualityHaploid() throws IOException { - HCTest(NA12878_BAM, "-mbq 15 -ploidy 1", "6e9855e22ed78a60eaaf0a06ed967b77"); + HCTest(NA12878_BAM, "-mbq 15 -ploidy 1", "9ca97bb743a369a1abb1e61168d63d69"); } @Test public void testHaplotypeCallerMinBaseQualityTetraploid() throws IOException { - HCTest(NA12878_BAM, "-mbq 15 -ploidy 4", "894b56fa3afd35e240a7db394da7b4ef"); + HCTest(NA12878_BAM, "-mbq 15 -ploidy 4", "5098645e8b570bc4521570654fa91806"); } @Test public void testHaplotypeCallerGraphBasedSingleSample() throws IOException { - HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "94dc1ca52ebe24ab118b13558e667253"); + HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "ba0dc5f416d69558cb5dd3e0a0a5a084"); } @Test public void testHaplotypeCallerGraphBasedMultiSampleHaploid() throws IOException { - HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "8aae4fe36d14158ea2cbf5db151f39a1"); + HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "129bca18bb9eec23004b2d28aa541de2"); } @Test public void testHaplotypeCallerGraphBasedMultiSample() throws IOException { - HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "ea4365e2dd6ae627d6347f2a53ceeaef"); + HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "2b89c9e102a049e223bc0d91156a08a3"); } @Test public void testHaplotypeCallerSingleSampleWithDbsnp() throws IOException { - HCTest(NA12878_BAM, "-D " + b37dbSNP132, "626abc84e03d4488b781cfc5a5f50290"); + HCTest(NA12878_BAM, "-D " + b37dbSNP132, "ff8e142f491b06e17e64e3a5d59737a7"); } @Test public void testHaplotypeCallerMultiSampleGGA() throws IOException { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf" + " -isr INTERSECTION -L " + GGA_INTERVALS_FILE, - "03a9fa3a1c7163c68807d8c713d8040c"); + "6d3cea3ee76b6eba14c1dfe230cff96b"); } @Test public void testHaplotypeCallerMultiSampleGGAHaploid() throws IOException { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 1 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf -isr INTERSECTION -L 20:10080000-10100000", - "6120365c05e74a8e1e1c44d739cbf8cd"); + "e60065998227f4ba8002165fb1729a71"); } @Test public void testHaplotypeCallerMultiSampleGGATetraploid() throws IOException { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 4 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf -isr INTERSECTION -L 20:10080000-10100000", - "a3a547a7bf7e488651733da08fd22049"); + "9315d146a66c7baf3b615eb480c54dc1"); } @Test public void testHaplotypeCallerInsertionOnEdgeOfContig() throws IOException { - HCTest(CEUTRIO_MT_TEST_BAM, "-L MT:1-10", "da1f6b9a7e5913910531b00f3b35ce06"); + HCTest(CEUTRIO_MT_TEST_BAM, "-L MT:1-10", "60e98012fbad5f429b3b2abc3a7aa454"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { @@ -202,7 +202,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "a048ea02eeb9610660e31f36e6114bf4"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "3625167f0e788d409c7eab1898d5eafe"); } private void HCTestNearbySmallIntervals(String bam, String args, String md5) { @@ -239,7 +239,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerNearbySmallIntervals() { - HCTestNearbySmallIntervals(NA12878_BAM, "", "796f4a44a29fc3c1a1461f90aef45846"); + HCTestNearbySmallIntervals(NA12878_BAM, "", "591a58f16104fbb83ccf81c97cef931a"); } // This problem bam came from a user on the forum and it spotted a problem where the ReadClipper @@ -249,14 +249,14 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("188ce2d74ee42f4187c7b41a01a193bb")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("eb79b4c0bf9142c955f0a4501e9e6d8f")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("034c6b151dfde537d5843f70880bf8a4")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("8bddb7f343302ed20bc549df4b82825a")); executeTest("HCTestStructuralIndels: ", spec); } @@ -311,7 +311,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " " + ALWAYS_LOAD_VECTOR_HMM + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("5430b91902813cf64f5bb781b25d76c6")); + Arrays.asList("b56895e6d28ea0b9dadeecd0ff61687e")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -320,7 +320,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " " + ALWAYS_LOAD_VECTOR_HMM + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,100,000-11,000,000 -D " + b37dbSNP132 + " -L " + hg19Intervals + " -isr INTERSECTION", 1, - Arrays.asList("8b775d84ea981f1e8207fa69a92b5e1f")); + Arrays.asList("7b52164df8bf76d789836f990bd6066a")); executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec); } @@ -328,7 +328,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGSGraphBased() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " " + ALWAYS_LOAD_VECTOR_HMM + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("f6f5b7e9348fc6dccd63110de0371d78")); + Arrays.asList("096826325215f79fe70661d984ae45a4")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -337,7 +337,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " " + ALWAYS_LOAD_VECTOR_HMM + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132 + " -L " + hg19Intervals + " -isr INTERSECTION", 1, - Arrays.asList("82cd0142146521fd659905737dc6192c")); + Arrays.asList("ff3b24412090ce7693d66d750ae84ac9")); executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec); } @@ -360,7 +360,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestAggressivePcrIndelModelWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " " + ALWAYS_LOAD_VECTOR_HMM + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,270,000-10,300,000", 1, - Arrays.asList("c43caebd09b69dbc9d1e4b5f5f449712")); + Arrays.asList("c2dab66ad3740320004874c83051bbfc")); executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec); } @@ -368,7 +368,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestConservativePcrIndelModelWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " " + ALWAYS_LOAD_VECTOR_HMM + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,270,000-10,300,000", 1, - Arrays.asList("54e7595f6d82a69566f4c0163045688d")); + Arrays.asList("a8ea15ac136042891434ccb0b3c3b686")); executeTest("HC calling with conservative indel error modeling on WGS intervals", spec); } @@ -397,7 +397,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testLackSensitivityDueToBadHaplotypeSelectionFix() { final String commandLine = String.format("-T HaplotypeCaller -pairHMMSub %s %s -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list"); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("8d44a2e1967e6e844f221960d6ea42bb")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("5514cfbcf12954bb12c725b77eaac248")); spec.disableShadowBCF(); executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec); } @@ -406,7 +406,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testMissingKeyAlternativeHaplotypesBugFix() { final String commandLine = String.format("-T HaplotypeCaller -pairHMMSub %s %s -R %s -I %s -L %s --no_cmdline_in_header ", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list"); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("cf8912f84153b2357a4d1ddb361f786f")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e6d8c32585906122a6407cb40261d00d")); spec.disableShadowBCF(); executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec); } @@ -429,7 +429,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { // but please make sure that both outputs get the same variant, // alleles all with DBSNP ids // We test here that change in active region size does not have an effect in placement of indels. - final String md5 = "6121d05f96eca3b1dbe3a881d968b6c5"; + final String md5 = "87b687b5476eb38b11db6a156b4066c8"; final WalkerTestSpec shortSpec = new WalkerTestSpec(commandLineShortInterval + " -o %s",Arrays.asList(md5)); executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::shortInterval",shortSpec); final WalkerTestSpec longSpec = new WalkerTestSpec(commandLineLongInterval + " -o %s",Arrays.asList(md5)); @@ -483,12 +483,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerTandemRepeatAnnotator() throws IOException{ - HCTest(NA12878_BAM, " -L 20:10001000-10010000 -A TandemRepeatAnnotator -XA MappingQualityZero -XA SpanningDeletions", "57eed4eec7b49efa1269fd8d58bde718"); + HCTest(NA12878_BAM, " -L 20:10001000-10010000 -A TandemRepeatAnnotator -XA MappingQualityZero -XA SpanningDeletions", "34328c475325b7dfaa57ab5920478e0c"); } @Test public void testHBaseCountsBySample() throws IOException{ - HCTest(NA12878_BAM, " -L 20:10001000-10010000 -A BaseCountsBySample", "5e5d064f2bb90e05e1ab28a087d8469d"); + HCTest(NA12878_BAM, " -L 20:10001000-10010000 -A BaseCountsBySample", "f5ad4e03c0faaa806ee6ae536af8a479"); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/PhaseByTransmissionIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/PhaseByTransmissionIntegrationTest.java index 1a66f0467..6a1aea7ba 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/PhaseByTransmissionIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/PhaseByTransmissionIntegrationTest.java @@ -88,7 +88,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("af979bcb353edda8dee2127605c71daf","3934b5de598024496a5de0ec35bde5b0") + Arrays.asList("af979bcb353edda8dee2127605c71daf","7ed46f6aa4565b5012acde17119a1a31") ); executeTest("testTrueNegativeMV", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasingIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasingIntegrationTest.java index 7539b23e2..0638804d6 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasingIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasingIntegrationTest.java @@ -151,7 +151,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { " -o %s" + " --no_cmdline_in_header", 1, - Arrays.asList("b251b4378fda9784f2175c7e3d80f032")); + Arrays.asList("0b47205ebdf2cf752ad91ff49e82c401")); executeTest("Do not merge unphased SNPs", spec); } @@ -166,7 +166,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { " -o %s" + " --no_cmdline_in_header", 1, - Arrays.asList("630816da701b9ea8674c23c91fa61bec")); + Arrays.asList("1419d9292d6e5db6282204826cb53bf7")); executeTest("Merge SNPs if on the same read", spec); } @@ -184,7 +184,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { " -o %s" + " --no_cmdline_in_header", 1, - Arrays.asList("b334de5ad35665f0d65034197ac05b32")); + Arrays.asList("8a3cd58dd6b1d04ab8c699f4e328dff4")); executeTest("Don't merge symbolic SPAN_DEL (*) alleles (into the nonexistent ** MNP).", spec); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index f94dbca8e..1e4d0681a 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -197,7 +197,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf", "3ad7f55fb3b072f373cbce0b32b66df4", // tranches "48c21792897bdbb9adcc64886d03c5d1", // recal file - "0bd2067f831e5388b790e7bb7f45d98f"); // cut VCF + "93a6e7ab6cbd6ae24a5b2a6f0fd29d92"); // cut VCF @DataProvider(name = "VRBCFTest") public Object[][] createVRBCFTest() { @@ -359,7 +359,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { " -tranchesFile " + privateTestDir + "VQSR.AStest.snps.tranches" + " -recalFile " + privateTestDir + "VQSR.AStest.snps.recal"; - final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList("cd087f2824fac5fe04c6c50cbdab1fab")); + final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList("dbcf0cb5c2a0eb2312e6ca7d4e3aeeda")); final List outputFiles = executeTest("testApplyRecalibrationAlleleSpecificSNPmode", spec).getFirst(); setPDFsForDeletion(outputFiles); } @@ -377,7 +377,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { " -tranchesFile " + privateTestDir + "VQSR.AStest.indels.tranches" + " -recalFile " + privateTestDir + "VQSR.AStest.indels.recal"; - final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList("b0d14f1c0647f46819018cd378036024")); + final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList("38d4b8e89dbf8acb6a36dfa1bb55c54c")); final List outputFiles = executeTest("testApplyRecalibrationAlleleSpecificINDELmode", spec).getFirst(); setPDFsForDeletion(outputFiles); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriorsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriorsIntegrationTest.java index 525320be4..9c2594ed0 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriorsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriorsIntegrationTest.java @@ -74,7 +74,7 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest { " -L 20:10,000,000-10,001,432" + " -V " + validationDataLocation + "1000G.phase3.broad.withGenotypes.chr20.1MB.vcf", 1, - Arrays.asList("3e60ca3e04fecf5d6004c08d6f7503ca")); + Arrays.asList("43fa27382e654081af69ea05bd26e281")); executeTest("testUsingDiscoveredAF", spec); } @@ -89,7 +89,7 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest { " -L 20:10,000,000-10,001,432" + " -V " + validationDataLocation + "1000G.phase3.broad.withGenotypes.chr20.1MB.vcf", 1, - Arrays.asList("1cca249ebc2599c7f24210d4f3204049")); + Arrays.asList("d63893f530fb749505ec685a5c57ff69")); executeTest("testMissingPriors", spec); } @@ -103,7 +103,7 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest { " -V " + validationDataLocation + "NA12878.Jan2013.haplotypeCaller.subset.indels.vcf" + " -supporting " + validationDataLocation + "1000G.phase3.broad.withGenotypes.chr20.1MB.vcf", 1, - Arrays.asList("cc59ceb6dab620a353edf03ef14090f1")); + Arrays.asList("a5d7bcad5a2a194441d00eb6574b8300")); executeTest("testInputINDELs", spec); } @@ -117,7 +117,7 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest { " -V " + CEUtrioTest + " -supporting " + CEUtrioPopPriorsTest, 1, - Arrays.asList("c7d35ce5f3675528fc484baa1c5df7b4")); + Arrays.asList("98bf63fd2ae3fa1cc42e66fa6b4f50f5")); executeTest("testFamilyPriors", spec); } @@ -131,7 +131,7 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest { " -V " + getThreeMemberNonTrioTest + " -skipPop", 1, - Arrays.asList("c523b99da1f7e0c0ea4090b916ae7379")); + Arrays.asList("e71420099fcfc824f1cf92ff2010b69e")); executeTest("testFamilyPriors", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java index dcd6357d5..97ba4fc07 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java @@ -100,7 +100,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "tetraploid-gvcf-3.vcf" + " -L " + privateTestDir + "tetraploid-gvcfs.intervals", 1, - Arrays.asList("f3538bcaf27f5e8b036d4c1f8734e4c2")); + Arrays.asList("787aca81ad51cd40267f92f3309fa47e")); executeTest("combineSingleSamplePipelineGVCF", spec); } @@ -112,7 +112,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "diploid-gvcf-3.vcf" + " -L " + privateTestDir + "tetraploid-gvcfs.intervals", 1, - Arrays.asList("32cd060d6662bdc835f70a848d48fb0e")); + Arrays.asList("d52f018643ffed072f43dfd4d33ca082")); executeTest("combineSingleSamplePipelineGVCF", spec); } @@ -190,7 +190,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { @Test public void testMD5s() throws Exception { final String cmd = baseTestString(" -L 1:69485-69791"); - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("82fa951ce741451267dbf30335e0f71d")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("021ec495e70044039d092ebd5ef4b82a")); spec.disableShadowBCF(); executeTest("testMD5s", spec); } @@ -198,7 +198,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { @Test public void testBasepairResolutionOutput() throws Exception { final String cmd = baseTestString(" -L 1:69485-69791 --convertToBasePairResolution"); - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("8e1bfa842d53f86d46b2166574c0c66c")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("d5bc938a26cd197d9b1c80cb8dfefbba")); spec.disableShadowBCF(); executeTest("testBasepairResolutionOutput", spec); } @@ -206,7 +206,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { @Test public void testBreakBlocks() throws Exception { final String cmd = baseTestString(" -L 1:69485-69791 --breakBandsAtMultiplesOf 5"); - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("76d78f83c7db247ce12087d6118dc5df")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("31c2f26e6b172a957a3f504734df5eff")); spec.disableShadowBCF(); executeTest("testBreakBlocks", spec); } @@ -217,7 +217,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { "-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference + " -V " + privateTestDir + "spanningDel.1.g.vcf -V " + privateTestDir + "spanningDel.2.g.vcf", 1, - Arrays.asList("cb46cb8fd6506ab3e80bd50f9231643c")); + Arrays.asList("097160606e65547722a1726e031529ec")); spec.disableShadowBCF(); executeTest("testSpanningDeletions", spec); } @@ -228,7 +228,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { "-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference + " -V " + privateTestDir + "spanningDel.many.g.vcf", 1, - Arrays.asList("5aeb14d64b9103b62d053aeb6158e5de")); + Arrays.asList("f517c2d361defeaac245916c835811d5")); spec.disableShadowBCF(); executeTest("testMultipleSpanningDeletionsForOneSample", spec); } @@ -239,7 +239,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { "-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference + " -V " + privateTestDir + "spanningDel.many.haploid.g.vcf", 1, - Arrays.asList("3fca32a67922bf30f72fe066fe7159fe")); + Arrays.asList("0a8d217b2833070dcaa3bbc1e7602b1c")); spec.disableShadowBCF(); executeTest("testMultipleSpanningDeletionsForOneSampleHaploid", spec); } @@ -250,7 +250,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { "-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference + " -V " + privateTestDir + "spanningDel.many.tetraploid.g.vcf", 1, - Arrays.asList("6891eaaef2991d7f967c7876fd2e4f5c")); + Arrays.asList("3fd437ad1f9e18303fec517653a30b6d")); spec.disableShadowBCF(); executeTest("testMultipleSpanningDeletionsForOneSampleTetraploid", spec); } @@ -259,7 +259,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { public void testWrongReferenceBaseBugFix() throws Exception { final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -V " + (privateTestDir + "combine-gvcf-wrong-ref-input1.vcf" + " -V " + (privateTestDir + "combine-gvcf-wrong-ref-input2.vcf") + " -o %s --no_cmdline_in_header"); - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("224c3d6e06f7ce4bdb55411b2e376577")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("129879954e5b453d30326f100cbc2e83")); spec.disableShadowBCF(); executeTest("testWrongReferenceBaseBugFix",spec); @@ -268,7 +268,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { @Test public void testBasepairResolutionInput() throws Exception { final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V " + privateTestDir + "gvcf.basepairResolution.vcf"; - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("d3244d99e9423b45099a220f19fac516")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("d5b4abe639081e6bf9c8970ca8405dbe")); spec.disableShadowBCF(); executeTest("testBasepairResolutionInput", spec); } @@ -277,7 +277,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { public void testAlleleSpecificAnnotations() throws Exception { final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard -V " + privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.chr20snippet.g.vcf"; - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("cbb2571eeb95e661acee8f9e1d1cbfbd")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("f6a7fa62c33de963c55262820effe44a")); spec.disableShadowBCF(); executeTest("testAlleleSpecificAnnotations", spec); } @@ -286,7 +286,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { public void testASMateRankSumAnnotation() throws Exception { final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard -A AS_MQMateRankSumTest -V " + privateTestDir + "NA12878.AS.MateRankSum.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.MateRankSum.chr20snippet.g.vcf"; - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("005bf0087480cce364c20d67aab5ad59")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("2c264ed0057c93276c647f55998c4f25")); spec.disableShadowBCF(); executeTest("testASMateRankSumAnnotation", spec); } @@ -295,7 +295,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { public void testASInsertSizeRankSumAnnotation() throws Exception { final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard -V " + privateTestDir + "NA12878.AS.InsertSizeRankSum.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.InsertSizeRankSum.chr20snippet.g.vcf"; - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("309ddf2a8b0c431cdabec8dafa4ab3a0")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("ff5ca958e81e406cfe010d5649b5c0d1")); spec.disableShadowBCF(); executeTest("testASInsertSizeRankSumAnnotation", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariantsIntegrationTest.java index 7f0888c53..eddf113ac 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -207,7 +207,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { + " -R " + b37KGReference + " -V " + privateTestDir + "combineVariantsLeavesRecordsUnfiltered.vcf", 1, - Arrays.asList("11aab642395645589e48edee1fb179e2")); + Arrays.asList("0f221847e76521250de1abcba535e49c")); cvExecuteTest("combineLeavesUnfilteredRecordsUnfiltered: ", spec, false); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java index e0de90a10..59d146167 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java @@ -84,7 +84,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -V " + privateTestDir + "testUpdatePGT.vcf", b37KGReference), 1, - Arrays.asList("beebc536d20d69a45c6f56fbb041c9bc")); + Arrays.asList("8d9788afd0de26bd9d9e55dd0e9fc3ed")); executeTest("testUpdatePGT", spec); } @@ -94,7 +94,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -V " + privateTestDir + "testUpdatePGT.vcf -A StrandAlleleCountsBySample -log " + logFileName, b37KGReference), 1, - Arrays.asList("527d513874a787821daf54b8fc8a33e3")); + Arrays.asList("5dd4698da963a423446bb1e183eb75aa")); executeTest("testUpdatePGTStrandAlleleCountsBySample", spec); File file = new File(logFileName); @@ -109,7 +109,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" + " -L 20:10,000,000-11,000,000", b37KGReference), 1, - Arrays.asList("24ea3dd1f13b6636cf51aea7d5a4ce06")); + Arrays.asList("d3fab0d45f0054b71aa1d031876a4bbb")); executeTest("combineSingleSamplePipelineGVCF", spec); } @@ -121,7 +121,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "tetraploid-gvcf-3.vcf" + " -L " + privateTestDir + "tetraploid-gvcfs.intervals", b37KGReference), 1, - Arrays.asList("3708b0d993a683e8c7421f60d7123cf4")); + Arrays.asList("64fa89f20ee25df21ad20ce4ada7e7ad")); executeTest("combineSingleSamplePipelineGVCF", spec); } @@ -133,7 +133,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "diploid-gvcf-3.vcf" + " -L " + privateTestDir + "tetraploid-gvcfs.intervals", b37KGReference), 1, - Arrays.asList("7d7a65ea549fcd30553766ad4333f9e2")); + Arrays.asList("b1d93f4cd93093c208be2c9842f38d12")); executeTest("combineSingleSamplePipelineGVCF", spec); } @@ -145,7 +145,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" + " --includeNonVariantSites -L 20:10,030,000-10,033,000 -L 20:10,386,000-10,386,500", b37KGReference), 1, - Arrays.asList("65497a0711a33d131a165c9cfc8bc3cf")); + Arrays.asList("c2f30f25ba4a84e38c04aa49b95694e8")); executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec); } @@ -158,7 +158,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" + " -L 20:10,000,000-20,000,000", b37KGReference), 1, - Arrays.asList("70dcdc1a111ebd048d32e7e61a9b7052")); + Arrays.asList("54a86ade63b84c87ff4e537e276987fc")); executeTest("combineSingleSamplePipelineGVCFHierarchical", spec); } @@ -170,7 +170,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" + " -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference), 1, - Arrays.asList("f911428f0f3bfba9b1d96a6b5ace3dee")); + Arrays.asList("1a2728e7295a6ffca6c2ba5a01af3593")); executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec); } @@ -180,7 +180,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { "-T GenotypeGVCFs --no_cmdline_in_header -L 1:69485-69791 -o %s -R " + b37KGReference + " -V " + privateTestDir + "gvcfExample1.vcf", 1, - Arrays.asList("84ad9c6e7582dbcc693deacdeff5984a")); + Arrays.asList("9ff344a5ab87a2c3b128e435e2e86db0")); executeTest("testJustOneSample", spec); } @@ -191,14 +191,14 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V " + privateTestDir + "gvcfExample1.vcf" + " -V " + privateTestDir + "gvcfExample2.vcf", 1, - Arrays.asList("54b76f721811c9c7958e849c40b8d4e2")); + Arrays.asList("0c07ed795562ea96eab427e63a970384")); executeTest("testSamplesWithDifferentLs", spec); } @Test(enabled = true) public void testNoPLsException() { // Test with input files with (1) 0/0 and (2) ./. - final String md5 = "276159213ddaaf82cd0e640cc7a77fc4"; + final String md5 = "2f3d71272fdac19ac861cc7159edfb08"; WalkerTestSpec spec1 = new WalkerTestSpec( "-T GenotypeGVCFs --no_cmdline_in_header -L 1:1115550-1115551 -o %s -R " + hg19Reference + " --variant " + privateTestDir + "combined_genotype_gvcf_exception.vcf", @@ -218,7 +218,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseBPResolutionString("-nda"), 1, - Arrays.asList("3c9c84b78e7d3b358c8cb7e29a2d302b")); + Arrays.asList("ce064429e6cbcaa956d52ef22e102f2f")); executeTest("testNDA", spec); } @@ -227,7 +227,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseBPResolutionString("-maxAltAlleles 1"), 1, - Arrays.asList("87ed70b8f910b662aa67e8ed1b2ed174")); + Arrays.asList("1f1c0605fc8a500c9646132e0d7420a0")); executeTest("testMaxAltAlleles", spec); } @@ -236,7 +236,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseBPResolutionString("-stand_call_conf 300 -stand_emit_conf 100"), 1, - Arrays.asList("1d98fb542a39090db3a8f89ae232e1e5")); + Arrays.asList("0283e784ed49bc2dce32a26137c43409")); executeTest("testStandardConf", spec); } @@ -259,7 +259,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { final WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -V " + gVCF.getAbsolutePath(), b37KGReference), 1, - Arrays.asList("d8eb4a64a2ae7e7dad1efc4fe8b4b3ed")); + Arrays.asList("34d76dc8dabc6a97e6d8f5365d7531e5")); spec.disableShadowBCF(); //TODO: Remove when BaseTest.assertAttributesEquals() works with SAC executeTest("testStrandAlleleCountsBySample", spec); } @@ -276,7 +276,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:combined2 " + privateTestDir + "combine.single.sample.pipeline.combined.vcf" + " --uniquifySamples", b37KGReference), 1, - Arrays.asList("1c552a9d76a1bbba4b92a94532f54a1a")); + Arrays.asList("16d7374502fa3cf99863d15d31b5ef86")); executeTest("testUniquifiedSamples", spec); } @@ -448,7 +448,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { } - private static final String simpleSpanningDeletionsMD5 = "85c14341171548997e4503f7b5a9253f"; + private static final String simpleSpanningDeletionsMD5 = "4629c2f02ff58c111828269091cded82"; @Test(enabled = true) public void testSpanningDeletionsMD5() { @@ -478,7 +478,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { "-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference + " -V " + privateTestDir + "spanningDel.1.g.vcf -V " + privateTestDir + "spanningDel.2.g.vcf -V " + privateTestDir + "spanningDel.3.g.vcf", 1, - Arrays.asList("6c5761ffb7a0c5252f3f5048d52f500e")); + Arrays.asList("7fe5364565585d31a0bb6a9dfa4a01d4")); spec.disableShadowBCF(); executeTest("testMultipleSpanningDeletionsMD5", spec); } @@ -489,7 +489,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { "-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference + " -V " + privateTestDir + "spanningDel.delOnly.g.vcf", 1, - Arrays.asList("c8414446dbac9a3639bfc2f347cc2c1d")); + Arrays.asList("057f9368f380bf3c12b539a749deac61")); spec.disableShadowBCF(); executeTest("testSpanningDeletionDoesNotGetGenotypedWithNoOtherAlleles", spec); } @@ -500,7 +500,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { "-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference + " -V " + privateTestDir + "spanningDel.depr.delOnly.g.vcf", 1, - Arrays.asList("d1d8c3db65905b4ef79f960f9565ca94")); + Arrays.asList("e8f5186718050fe0784416e41425563f")); spec.disableShadowBCF(); executeTest("testSpanningDeletionDoesNotGetGenotypedWithNoOtherAlleles", spec); } @@ -523,7 +523,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { "-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference + " -V " + privateTestDir + "ad-bug-input.vcf", 1, - Arrays.asList("a8dcb9024e3701449ec2a1fe75e0d057")); + Arrays.asList("5ed5cb6aac68aa8943dc45b8b90eb508")); spec.disableShadowBCF(); executeTest("testBadADPropagationHaploidBugTest", spec); } @@ -534,7 +534,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { "-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference + " -V " + privateTestDir + "261_S01_raw_variants_gvcf.vcf", 1, - Arrays.asList("01a9eee63801d46de8fcf1d6f80f8359")); + Arrays.asList("37eec6aedd26aa3430a15d90d7f8a011")); spec.disableShadowBCF(); executeTest("testSAC", spec); } @@ -545,7 +545,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { "-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference + " -V " + privateTestDir + "tetraploid-multisample-sac.g.vcf", 1, - Arrays.asList("8c79a16f6a524d49ff402b8c0b39b396")); + Arrays.asList("76532a74d4ba49f23362c149ad31a229")); spec.disableShadowBCF(); executeTest("testSACMultisampleTetraploid", spec); } @@ -556,8 +556,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { baseTestString(" -V " + privateTestDir + "set.zero.RGQs.no.call.sample1.g.vcf" + " -V " + privateTestDir + "set.zero.RGQs.no.call.sample2.g.vcf" + " -L chr16:1279274-1279874 -allSites", hg19ReferenceWithChrPrefixInChromosomeNames), - 1, - Arrays.asList("6505d305441b4e6ff975a40ef5d352b5")); + Arrays.asList("b7106be316e43ca04204b78038f65c9f")); executeTest("testSetZeroRGQsToNoCall", spec); } @@ -565,7 +564,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { public void testAlleleSpecificAnnotations() { final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard --disableDithering -V " + privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.chr20snippet.g.vcf"; - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("d4dd179d8a53c4a550d4a22cc9ef1aa8")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("89712a9fe5b6db16be2257be2b0b4759")); spec.disableShadowBCF(); executeTest("testAlleleSpecificAnnotations", spec); } @@ -574,7 +573,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { public void testASMateRankSumAnnotation() { final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard -A AS_MQMateRankSumTest --disableDithering -V " + privateTestDir + "NA12878.AS.MateRankSum.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.MateRankSum.chr20snippet.g.vcf"; - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("515771fa88b0c4bc2a40e9c233806fb1")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("8e41a139600ab58a67910cdc60053726")); spec.disableShadowBCF(); executeTest("testASMateRankSumAnnotation", spec); } @@ -583,7 +582,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { public void testASInsertSizeRankSumAnnotation() { final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard --disableDithering -V " + privateTestDir + "NA12878.AS.InsertSizeRankSum.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.InsertSizeRankSum.chr20snippet.g.vcf"; - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("c8a096f2533d06c28ec115a24ffb7ca0")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("b1334fbfbf21934aac1c1eda0b5062d5")); spec.disableShadowBCF(); executeTest("testASInsertSizeRankSumAnnotation", spec); } @@ -596,7 +595,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { public void testAlleleSpecificAnnotations_oneSample() { final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard --disableDithering -V " + privateTestDir + "NA12878.AS.chr20snippet.g.vcf"; - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("558486182a84d0a274534fc00fee326e")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("7d86260e91fe74588e01339a2064b59c")); spec.disableShadowBCF(); executeTest("testAlleleSpecificAnnotations_oneSample", spec); } @@ -606,7 +605,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { public void testAlleleSpecificAnnotations_elevenSamples() { final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard --disableDithering -V " + privateTestDir + "multiSamples.g.vcf"; - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("84b5723c9c8eeb5549aaceb4fd4053b5")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("a889fe6775575513e84905b4fa98f8b3")); spec.disableShadowBCF(); executeTest("testAlleleSpecificAnnotations_elevenSamples", spec); } @@ -615,7 +614,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { public void testMonomorphicVCwithAlt() { final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -G AS_Standard -o %s --no_cmdline_in_header --disableDithering -V " + privateTestDir + "monomorphicGVCwithAlt.vcf"; - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Collections.singletonList("ddf0a386c007b797fce3eb4ddc204216")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Collections.singletonList("8bf329a40637623515972dcc0e09a49e")); spec.disableShadowBCF(); executeTest("testAlleleSpecificAnnotations", spec); } @@ -624,7 +623,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { public void testFractionInformativeReads() { final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -G AS_Standard -o %s --no_cmdline_in_header -A FractionInformativeReads --disableDithering -V " + privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.chr20snippet.g.vcf"; - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Collections.singletonList("70fda103f68709e32d691393e9228a9b")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Collections.singletonList("b338bf1807791b23255b8cb1947c01b2")); spec.disableShadowBCF(); executeTest("testAlleleSpecificAnnotations", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/LeftAlignAndTrimVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/LeftAlignAndTrimVariantsIntegrationTest.java index 368f97751..1bcbc50e3 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/LeftAlignAndTrimVariantsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/LeftAlignAndTrimVariantsIntegrationTest.java @@ -136,7 +136,7 @@ public class LeftAlignAndTrimVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T LeftAlignAndTrimVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "forHardLeftAlignVariantsTest.vcf --no_cmdline_in_header -split", 1, - Arrays.asList("1158324223c312e4767fcefe9dde2fe1")); + Arrays.asList("58c09033814d41fab5da4c152eab7fa2")); executeTest("test left alignment with hard multiple alleles", spec); } @@ -145,7 +145,7 @@ public class LeftAlignAndTrimVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T LeftAlignAndTrimVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "forHardLeftAlignVariantsTest.vcf --dontTrimAlleles --no_cmdline_in_header -split", 1, - Arrays.asList("923cb1d06e2d0d9a98cda8f8f637d108")); + Arrays.asList("6d22a6b78d24ee2329b91f27a91751cf")); executeTest("test left alignment with hard multiple alleles, don't trim", spec); } @@ -154,7 +154,7 @@ public class LeftAlignAndTrimVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T LeftAlignAndTrimVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "multiallele-gt.vcf --no_cmdline_in_header -split", 1, - Arrays.asList("f7be485b0cc7b8db75b7139f31c0708d")); + Arrays.asList("0acb354a2c28e250ef2853c1e0a0fafb")); executeTest("test left alignment of multiple alleles with genoptypes", spec); } @@ -163,7 +163,7 @@ public class LeftAlignAndTrimVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T LeftAlignAndTrimVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "multiallele-gt-het-noref.vcf --no_cmdline_in_header -split", 1, - Arrays.asList("cd686641ab7fe491a0acc7ff07535192")); + Arrays.asList("a335913de938082061d6bbb863626ee2")); executeTest("test left alignment of multiple alleles with genoptypes, including het-noref", spec); } @@ -172,7 +172,7 @@ public class LeftAlignAndTrimVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T LeftAlignAndTrimVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "multiallele-gt.vcf --no_cmdline_in_header -split -keepOriginalAC", 1, - Arrays.asList("5da4ca9705fbb63446c1d317f7b6cae0")); + Arrays.asList("67657ee509665fd0d7a2c9024981ba92")); executeTest("test left alignment of multiple alleles with genoptypes, keep original AC", spec); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariantsIntegrationTest.java index 59f1ea955..b434d97d3 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -369,7 +369,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants --keepOriginalDP -R " + b37KGReference + " -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("ce5168e2eadee2550188892b1ea444be") + Arrays.asList("9ad02f0df308eecb0634b3cd386956e9") ); executeTest("testKeepOriginalDP--" + testFile, spec); @@ -395,7 +395,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b37KGReference + " --variant " + testFile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("cc33eb41a821d9aebdfb99d309854db0") + Arrays.asList("c78a65b41edbdd386211042e8f65220b") ); executeTest("testNoGTs--" + testFile, spec); @@ -408,7 +408,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b37KGReference + " -o %s --no_cmdline_in_header -sf " + samplesFile + " --excludeNonVariants -trimAlternates --variant " + testfile, 1, - Arrays.asList("b86340de516d6c37cc3a2eeb3bfb4821") + Arrays.asList("c963ca96d543ecccab8055295d2a4dab") ); executeTest("test select from multi allelic with excludeNonVariants --" + testfile, spec); } @@ -420,7 +420,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { "-T SelectVariants -R " + b37KGReference + " -o %s --no_cmdline_in_header " + "-sn SAMPLE-CC -sn SAMPLE-CT -sn SAMPLE-CA --excludeNonVariants --variant " + testfile, 1, - Arrays.asList("7807bb2bf8c70963f65a97f30c8deb39") + Arrays.asList("7f5484a74ab648608228eafea96f8ad3") ); executeTest("test multi allelic annotation ordering --" + testfile, spec); } @@ -471,19 +471,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest { final String testFile = privateTestDir + "forHardLeftAlignVariantsTest.vcf"; final String cmd = "-T SelectVariants -R " + b37KGReference + " -sn NA12878 -env -trimAlternates " + "-V " + testFile + " -o %s --no_cmdline_in_header"; - WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("354cd7aa25791465d0f4c7d53b81a3a3")); + WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("21d83006b012eeea84c6612976348d3c")); executeTest("testAlleleTrimming", spec); } @DataProvider(name="unusedAlleleTrimmingProvider") public Object[][] unusedAlleleTrimmingProvider() { return new Object[][] { - { privateTestDir+"forHardLeftAlignVariantsTest.vcf", "-trimAlternates", "354cd7aa25791465d0f4c7d53b81a3a3"}, - { privateTestDir+"forHardLeftAlignVariantsTest.vcf", "", "5e81af1825aa207b0a352f5eeb5db700"}, - { privateTestDir+"multi-allelic-ordering.vcf", "-sn SAMPLE-CC -sn SAMPLE-CT", "339cca608ff18a355abc629bca448043"}, - { privateTestDir+"multi-allelic-ordering.vcf", "-sn SAMPLE-CC -sn SAMPLE-CT -env", "3e8e2ebbc576ceee717a7ce80e23dd35"}, - { privateTestDir+"multi-allelic-ordering.vcf", "-sn SAMPLE-CC -sn SAMPLE-CT -trimAlternates", "2cbf4c8c991777254145aacf19cba508"}, - { privateTestDir+"multi-allelic-ordering.vcf", "-sn SAMPLE-CC -sn SAMPLE-CT -env -trimAlternates", "14538e17d5aca22c655c42e130f8cebc"} + { privateTestDir+"forHardLeftAlignVariantsTest.vcf", "-trimAlternates", "21d83006b012eeea84c6612976348d3c"}, + { privateTestDir+"forHardLeftAlignVariantsTest.vcf", "", "8fc0c8a7de6bb579e1534b936f844090"}, + { privateTestDir+"multi-allelic-ordering.vcf", "-sn SAMPLE-CC -sn SAMPLE-CT", "595392b623b0869f1d87e46edf3de122"}, + { privateTestDir+"multi-allelic-ordering.vcf", "-sn SAMPLE-CC -sn SAMPLE-CT -env", "bba873b8eeeb4c01199140c37deb6f6b"}, + { privateTestDir+"multi-allelic-ordering.vcf", "-sn SAMPLE-CC -sn SAMPLE-CT -trimAlternates", "93858f706dac876a8581f6b89bb85cc5"}, + { privateTestDir+"multi-allelic-ordering.vcf", "-sn SAMPLE-CC -sn SAMPLE-CT -env -trimAlternates", "5d831401367eb8b0ab49ffa34e0dd278"} }; } @@ -655,7 +655,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R "+b37KGReference + " -mv -mvq 0 --variant " + testFile + " -ped " + pedFile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("f7fe7cbc84b3f2dfadcc40e19eeeb1f9")); + Arrays.asList("c68779547b28dfef39792598df8a93e9")); executeTest("testMendelianViolationSelection--" + testFile, spec); } @@ -668,7 +668,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R "+b37KGReference + " -mv -mvq 0 -invMv --variant " + testFile + " -ped " + pedFile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("865418a69ee57be4432f248c027e6aff")); + Arrays.asList("0ac6fda76228080bdb39c0e698440718")); executeTest("testInvertMendelianViolationSelection--" + testFile, spec); } @@ -758,7 +758,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header -trimAlternates", 1, - Arrays.asList("c9d297e7758bf5681270029401cc97c2")); + Arrays.asList("d3bb7ea37a7c9dce8b34bf2020961619")); spec.disableShadowBCF(); executeTest("testSACSimpleDiploid", spec); } @@ -770,7 +770,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header -sn NA12891 -trimAlternates", 1, - Arrays.asList("a8c23f4d6f93806a34d432dd2c7a0449")); + Arrays.asList("67a92b4d4174ff41f6f88ddf5ab6e422")); spec.disableShadowBCF(); executeTest("testSACDiploid", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariantsParallelIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariantsParallelIntegrationTest.java index 396805970..45677ffee 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariantsParallelIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariantsParallelIntegrationTest.java @@ -95,12 +95,12 @@ public class SelectVariantsParallelIntegrationTest extends WalkerTest { { // new tests on b37 using testdir VCF final String testfile = privateTestDir + "NA12878.hg19.example1.vcf"; final String args = "-select 'DP > 30' -V " + testfile; - new ParallelSelectTestProvider(b37KGReference, args, "b899cebdd30e6641437489b746301797", nt); + new ParallelSelectTestProvider(b37KGReference, args, "64f9258e9e3024b6361abbeeeefafee9", nt); } { // AD and PL decoding race condition final String testfile = privateTestDir + "race_condition.vcf"; final String args = "-env -trimAlternates -sn SAMPLE -L 1:1-10,000,000 -V " + testfile; - new ParallelSelectTestProvider(b37KGReference, args, "ace613ed2e4929f448d30d85110d6ced", nt); + new ParallelSelectTestProvider(b37KGReference, args, "f289f22aacf1a5638a9fb6b32c5cf6fb", nt); } } diff --git a/public/external-example/pom.xml b/public/external-example/pom.xml index 17919f365..9aa973f0a 100644 --- a/public/external-example/pom.xml +++ b/public/external-example/pom.xml @@ -136,7 +136,7 @@ org.apache.maven.plugins maven-shade-plugin - 2.1 + 2.4.3 ${gatk.shade.phase} diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/EngineFeaturesIntegrationTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/EngineFeaturesIntegrationTest.java index df03b0b33..aa04e905f 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/EngineFeaturesIntegrationTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/EngineFeaturesIntegrationTest.java @@ -758,8 +758,8 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { @DataProvider(name = "vcfFeaturesData") public Object[][] getVCFFeaturesData() { return new Object[][]{ - {"--sites_only", "6ef742ee6d9bcbc7b23f928c0e8a1d0e"}, - {"--bcf", "cdea454ac0af9e9228147f9c2ed1e0a6"} + {"--sites_only", "99c07e55fc44694087af6a7d4795e1be"}, + {"--bcf", "b300dd7a1300c55640d9f2e42520d086"} }; } @@ -775,8 +775,8 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { @DataProvider(name = "vcfFormatHandlingData") public Object[][] getVCFFormatHandlingData() { return new Object[][]{ - {true, "870f39e19ec89c8a09f7eca0f5c4bcb9"}, - {false, "baf9a1755d3b4e0ed25b03233e99ca91"} + {true, "7517264dd6eb0c1ac5ca8dfd103c94fb"}, + {false, "4c69e6ae1d506ba3c029de3229de407b"} }; } diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java index 62d358f1e..244f67daf 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java @@ -41,16 +41,16 @@ public class CramIntegrationTest extends WalkerTest { return new Object[][] { {"PrintReads", "exampleBAM.bam", "", "cram", ""}, // Bypass MD5 check since the CRAM header stores the file name {"PrintReads", "exampleCRAM.cram", "", "cram", ""}, - {"PrintReads", "exampleCRAM.cram", "", "bam", "247805098718dd74b8a871796424d359"}, - {"PrintReads", "exampleCRAM.cram", " -L chr1:200", "bam", "a5b26631cd89f86f6184bcac7bc9c9ca"}, + {"PrintReads", "exampleCRAM.cram", "", "bam", "e7834d5992a69143d7c463275213bbf8"}, + {"PrintReads", "exampleCRAM.cram", " -L chr1:200", "bam", "d362fbf30a2c77a2653f1c8eb2dd8fc1"}, {"CountLoci", "exampleCRAM.cram", "", "txt", "ade93df31a6150321c1067e749cae9be"}, {"CountLoci", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"}, {"CountReads", "exampleCRAM.cram", "", "txt", "4fbafd6948b6529caa2b78e476359875"}, {"CountReads", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"}, - {"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "24dbd14b60220461f47ec5517962cb7f"}, + {"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "a11bd125b69f651aaa2ae68c8ccab22f"}, {"CountLoci", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"}, {"CountReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"}, - {"PrintReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "bam", "84bee5063d8fa0d07e7c3ff7e825ae3a"}, + {"PrintReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "bam", "9e3e8b5a58dfcb50f5b270547c01d56a"}, {"CountLoci", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"}, {"CountReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"}, }; diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/IntervalIntegrationTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/IntervalIntegrationTest.java index 0f4789cfe..6ba25ec8e 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/IntervalIntegrationTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/IntervalIntegrationTest.java @@ -89,7 +89,7 @@ public class IntervalIntegrationTest extends WalkerTest { File baseOutputFile = createTempFile("testMultipleIntervalInclusionOnCRAM", ".cram"); spec.setOutputFileLocation(baseOutputFile); spec.addAuxFile("", createTempFileFromBase(baseOutputFile.getAbsolutePath())); // Bypass MD5 check since the CRAM header stores the file name - spec.addAuxFile("ebbe6e311b6bb240554ec96ed9809216", createTempFileFromBase(baseOutputFile.getAbsolutePath() + ".bai")); + spec.addAuxFile("4bd9185ce1c7d2e97e8c131b77f76aef", createTempFileFromBase(baseOutputFile.getAbsolutePath() + ".bai")); executeTest("testMultipleIntervalInclusionOnCRAM", spec); } diff --git a/public/gatk-root/pom.xml b/public/gatk-root/pom.xml index e94b307be..c10fa86ec 100644 --- a/public/gatk-root/pom.xml +++ b/public/gatk-root/pom.xml @@ -24,8 +24,8 @@ UTF-8 ${sourceEncoding} ${sourceEncoding} - 1.7 - 1.7 + 1.8 + 1.8 yyyy/MM/dd HH:mm:ss ${project.basedir}/../.. true @@ -44,8 +44,8 @@ org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter - 2.0.0 - 2.0.0 + 2.3.0 + 2.3.0 @@ -373,7 +373,7 @@ org.apache.maven.plugins maven-shade-plugin - 2.1 + 2.4.3 org.apache.maven.plugins diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/readutils/PrintReadsIntegrationTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/readutils/PrintReadsIntegrationTest.java index fe5f47a58..8f848aa01 100644 --- a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/readutils/PrintReadsIntegrationTest.java +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/readutils/PrintReadsIntegrationTest.java @@ -66,7 +66,7 @@ public class PrintReadsIntegrationTest extends WalkerTest { // See: GATKBAMIndex.getStartOfLastLinearBin(), BAMScheduler.advance(), IntervalOverlapFilteringIterator.advance() {new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.bam"}, "", "0b58c903f54e8543a8b2ce1439aa769b")}, {new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.bam"}, " -L 1", "5b1154cc81dba6bcfe76188e4df8d79c")}, - {new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.cram"}, " -L 1:10001 -L GL000192.1:500204", "e9caf8a0e6ec947cdcbdfc48a4292eb5")}, + {new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.cram"}, " -L 1:10001 -L GL000192.1:500204", "a84efdc3d4a8d6329b5f0b494dd280d2")}, {new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.bam"}, " -L unmapped", "cbd3d1d50c8674f79033aa8c36aa3cd1")}, {new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.bam"}, " -L 1 -L unmapped", "5b1154cc81dba6bcfe76188e4df8d79c")}, {new PRTest(b37KGReference, new String[]{"oneReadAllInsertion.bam"}, "", "e212d1799ae797e781b17e630656a9a1")}, diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VCFIntegrationTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VCFIntegrationTest.java index 397626023..d7ba67fd0 100644 --- a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VCFIntegrationTest.java +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VCFIntegrationTest.java @@ -55,7 +55,7 @@ public class VCFIntegrationTest extends WalkerTest { @Test(enabled = true) public void testReadingAndWritingWitHNoChanges() { - String md5ofInputVCF = "3dc9ac85f2c0541df9bc57b4d81f480b"; + String md5ofInputVCF = "a492a97a8d18ace8eeef02a6fe5f179b"; String testVCF = privateTestDir + "vcf4.1.example.vcf"; String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s "; diff --git a/public/repo/com/google/code/cofoja/cofoja/1.2-20140817/cofoja-1.2-20140817.pom b/public/repo/com/google/code/cofoja/cofoja/1.2-20140817/cofoja-1.2-20140817.pom index 92fbea553..fb3102be3 100644 --- a/public/repo/com/google/code/cofoja/cofoja/1.2-20140817/cofoja-1.2-20140817.pom +++ b/public/repo/com/google/code/cofoja/cofoja/1.2-20140817/cofoja-1.2-20140817.pom @@ -80,9 +80,9 @@ com.sun tools - 1.5.0 + 1.8.0 system - ${toolsjar} + ${java.home}/lib/tools.jar From 6d5b643a9d89a5274baab4359596b38b81dfbede Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Tue, 17 May 2016 08:42:49 -0400 Subject: [PATCH 64/82] Fix tagging syntax Syntax was given as -V:format,name but should be -V:name,format --- .../gatk/tools/walkers/variantutils/CombineVariants.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariants.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariants.java index f89509603..8a11da65a 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariants.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariants.java @@ -72,9 +72,9 @@ import java.util.*; * * *

By default, the input sets will be named variants, variants2, variants3, and so on. You can override this by - * providing an explicit name tag for each input, using the syntax " -V:format,name". Each input tagged in this + * providing an explicit name tag for each input, using the syntax " -V:name,format". Each input tagged in this * way will be labeled as such in the output (i.e., set=name rather than set=variants2). For example, you could specify - * a set of control samples as " -V:vcf,control my_control_samples.vcf", and the resulting VCF records would contain + * a set of control samples as " -V:control,vcf my_control_samples.vcf", and the resulting VCF records would contain * the annotation "set=control" in the INFO field. It is strongly recommended to provide explicit names in this way * when a rod priority list is provided.

* @@ -123,8 +123,8 @@ import java.util.*; * *

Caveats

*
    - *
  • This tool is not intended to manipulate GVCFS! To combine GVCF files output by HaplotypeCaller, use CombineGVCFs.
  • - *
  • To join intermediate VCFs produced by running jobs in parallel by interval (e.g. by chromosome), use CatVariants.
  • + *
  • This tool is not intended to manipulate GVCFS! To combine GVCF files output for different samples by HaplotypeCaller, use CombineGVCFs.
  • + *
  • To join intermediate VCFs produced by running jobs in parallel by interval (e.g. by chromosome) from the same sample, use CatVariants.
  • *
* *

Additional notes

From d611c458060e217969f3327ed5b75bf45ae792bf Mon Sep 17 00:00:00 2001 From: Mark Fleharty Date: Fri, 22 Apr 2016 14:28:02 -0400 Subject: [PATCH 65/82] Adding OtherArgumentRequired option to allow an argument to require an additional argument. --- .../gatk/ArgumentDefinitionField.java | 19 +++++ .../queue/extensions/gatk/ArgumentField.java | 7 +- .../gatk/utils/commandline/Argument.java | 9 +++ .../utils/commandline/ArgumentDefinition.java | 20 +++++ .../commandline/ArgumentTypeDescriptor.java | 1 + .../gatk/utils/commandline/Input.java | 7 ++ .../gatk/utils/commandline/Output.java | 7 ++ .../gatk/utils/commandline/ParsingEngine.java | 36 ++++++++- .../commandline/ParsingEngineUnitTest.java | 74 +++++++++++++------ 9 files changed, 156 insertions(+), 24 deletions(-) diff --git a/public/gatk-queue-extensions-generator/src/main/java/org/broadinstitute/gatk/queue/extensions/gatk/ArgumentDefinitionField.java b/public/gatk-queue-extensions-generator/src/main/java/org/broadinstitute/gatk/queue/extensions/gatk/ArgumentDefinitionField.java index a1db9cb87..4de050f3c 100644 --- a/public/gatk-queue-extensions-generator/src/main/java/org/broadinstitute/gatk/queue/extensions/gatk/ArgumentDefinitionField.java +++ b/public/gatk-queue-extensions-generator/src/main/java/org/broadinstitute/gatk/queue/extensions/gatk/ArgumentDefinitionField.java @@ -53,6 +53,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { @Override protected String getShortName() { return escape(argumentDefinition.shortName); } @Override protected boolean isRequired() { return argumentDefinition.required; } @Override protected String getExclusiveOf() { return escape(argumentDefinition.exclusiveOf); } + @Override protected String getOtherArgumentRequired() { return escape(argumentDefinition.otherArgumentRequired); } @Override protected String getValidation() { return escape(argumentDefinition.validation); } protected boolean isFlag() { return argumentDefinition.isFlag; } protected boolean isMultiValued() { return argumentDefinition.isMultiValued; } @@ -250,6 +251,15 @@ public abstract class ArgumentDefinitionField extends ArgumentField { exclusiveOf.append(escape(argumentDefinition.fullName)).append("String"); return exclusiveOf.toString(); } + + @Override + protected String getOtherArgumentRequired() { + StringBuilder otherArgumentRequired = new StringBuilder(super.getOtherArgumentRequired()); + if (otherArgumentRequired.length() > 0) + otherArgumentRequired.append(","); + otherArgumentRequired.append(escape(argumentDefinition.fullName)).append("String"); + return otherArgumentRequired.toString(); + } } // if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) @@ -277,6 +287,15 @@ public abstract class ArgumentDefinitionField extends ArgumentField { exclusiveOf.append(escape(argumentDefinition.fullName)); return exclusiveOf.toString(); } + + @Override + protected String getOtherArgumentRequired() { + StringBuilder otherArgumentRequired = new StringBuilder(super.getOtherArgumentRequired()); + if (otherArgumentRequired.length() > 0) + otherArgumentRequired.append(","); + otherArgumentRequired.append(escape(argumentDefinition.fullName)); + return otherArgumentRequired.toString(); + } } // if (argumentDefinition.ioType == ArgumentIOType.INPUT) diff --git a/public/gatk-queue-extensions-generator/src/main/java/org/broadinstitute/gatk/queue/extensions/gatk/ArgumentField.java b/public/gatk-queue-extensions-generator/src/main/java/org/broadinstitute/gatk/queue/extensions/gatk/ArgumentField.java index 99ba25e46..9012e1d56 100644 --- a/public/gatk-queue-extensions-generator/src/main/java/org/broadinstitute/gatk/queue/extensions/gatk/ArgumentField.java +++ b/public/gatk-queue-extensions-generator/src/main/java/org/broadinstitute/gatk/queue/extensions/gatk/ArgumentField.java @@ -56,7 +56,8 @@ public abstract class ArgumentField { public final String getArgumentAddition() { return String.format("%n" + "/** %s */%n" + - "@%s(fullName=\"%s\", shortName=\"%s\", doc=\"%s\", required=%s, exclusiveOf=\"%s\", validation=\"%s\")%n" + + "@%s(fullName=\"%s\", shortName=\"%s\", doc=\"%s\", required=%s, exclusiveOf=\"%s\", " + + "otherArgumentRequired=\"%s\", validation=\"%s\")%n" + "%s%svar %s: %s = %s%n" + "%s", getDoc(), @@ -66,6 +67,7 @@ public abstract class ArgumentField { getDoc(), isRequired(), getExclusiveOf(), + getOtherArgumentRequired(), getValidation(), getGatherAnnotation(), getPrivacy(), getFieldName(), getFieldType(), getDefaultValue(), getDefineAddition()); @@ -97,6 +99,9 @@ public abstract class ArgumentField { /** @return A comma separated list of arguments that may be substituted for this field. */ protected String getExclusiveOf() { return ""; } + /** @return A string containing an argument that is necessary for the current argument to work. */ + protected String getOtherArgumentRequired() { return ""; } + /** @return A validation string for the argument. */ protected String getValidation() { return ""; } diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/Argument.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/Argument.java index 20c51fa78..3d43446c5 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/Argument.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/Argument.java @@ -81,6 +81,15 @@ public @interface Argument { */ String exclusiveOf() default ""; + /** + * Does this command-line argument require other arguments to go with it? + * Should be a string containing the name of the required argument. + * This option only supports a single required argument. + * @return A string with the other argument that this + * argument should require in order to work. + */ + String otherArgumentRequired() default ""; + /** * Provide a regexp-based validation string. * @return Non-empty regexp for validation, blank otherwise. diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/ArgumentDefinition.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/ArgumentDefinition.java index 6ad8d4329..8fd0c998d 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/ArgumentDefinition.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/ArgumentDefinition.java @@ -96,6 +96,11 @@ public class ArgumentDefinition { */ public final String exclusiveOf; + /** + * Does this argument require another argument? + */ + public final String otherArgumentRequired; + /** * Can we validate this regular expression? */ @@ -119,6 +124,7 @@ public class ArgumentDefinition { * @param isHidden Whether or not this argument should be hidden from the command-line argument system. * @param componentType For multivalued arguments the type of the components. * @param exclusiveOf Whether this command line argument is mutually exclusive of other arguments. + * @param otherArgumentRequired Other argument the current argument requires in order to work. * @param validation A regular expression for command-line argument validation. * @param validOptions is there a particular list of options that's valid for this argument definition? List them if so, otherwise set this to null. */ @@ -133,6 +139,7 @@ public class ArgumentDefinition { boolean isHidden, Class componentType, String exclusiveOf, + String otherArgumentRequired, String validation, List validOptions) { this.ioType = ioType; @@ -146,6 +153,7 @@ public class ArgumentDefinition { this.isHidden = isHidden; this.componentType = componentType; this.exclusiveOf = exclusiveOf; + this.otherArgumentRequired = otherArgumentRequired; this.validation = validation; this.validOptions = validOptions; @@ -177,6 +185,7 @@ public class ArgumentDefinition { boolean isHidden, Class componentType, String exclusiveOf, + String otherArgumentRequired, String validation, List validOptions) { @@ -210,6 +219,7 @@ public class ArgumentDefinition { this.isHidden = isHidden; this.componentType = componentType; this.exclusiveOf = exclusiveOf; + this.otherArgumentRequired = otherArgumentRequired; this.validation = validation; this.validOptions = validOptions; } @@ -275,6 +285,16 @@ public class ArgumentDefinition { return exclusiveOf.trim().length() > 0 ? exclusiveOf.trim() : null; } + /** + * Specifies other argument that is required to be used in conjunction with this argument. + * @param annotation Original field annotation. + * @return A required argument, or null if none are present. + */ + public static String getOtherArgumentRequired( Annotation annotation ) { + String otherArgumentRequired = ((String) CommandLineUtils.getValue(annotation, "otherArgumentRequired")).trim(); + return otherArgumentRequired.length() > 0 ? otherArgumentRequired : null; + } + /** * A regular expression which can be used for validation. * @param annotation Original field annotation. diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/ArgumentTypeDescriptor.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/ArgumentTypeDescriptor.java index 000c6d19f..66a10ef00 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/ArgumentTypeDescriptor.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/ArgumentTypeDescriptor.java @@ -166,6 +166,7 @@ public abstract class ArgumentTypeDescriptor { source.isHidden(), makeRawTypeIfNecessary(getCollectionComponentType(source.field)), ArgumentDefinition.getExclusiveOf(argumentAnnotation), + ArgumentDefinition.getOtherArgumentRequired(argumentAnnotation), ArgumentDefinition.getValidationRegex(argumentAnnotation), getValidOptions(source) ); } diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/Input.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/Input.java index 03d62de68..fe72b481a 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/Input.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/Input.java @@ -75,6 +75,13 @@ public @interface Input { */ String exclusiveOf() default ""; + /** + * Does this command-line argument require another argument. + * This will be the other argument required (presently only supports one). + * @return A string with the other required argument. + */ + String otherArgumentRequired() default ""; + /** * Provide a regexp-based validation string. * @return Non-empty regexp for validation, blank otherwise. diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/Output.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/Output.java index 1398604d7..1bb164c95 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/Output.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/Output.java @@ -82,6 +82,13 @@ public @interface Output { */ String exclusiveOf() default ""; + /** + * Does this command-line argument require another argument. + * This will be the other argument required (presently only supports one). + * @return A string with the other required argument. + */ + String otherArgumentRequired() default ""; + /** * Provide a regexp-based validation string. * @return Non-empty regexp for validation, blank otherwise. diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/ParsingEngine.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/ParsingEngine.java index 65b544933..d85ab5467 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/ParsingEngine.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/commandline/ParsingEngine.java @@ -270,7 +270,8 @@ public class ParsingEngine { InvalidArgumentValue, ValueMissingArgument, TooManyValuesForArgument, - MutuallyExclusive } + MutuallyExclusive, + OtherArgumentRequired} /** * Validates the list of command-line argument matches. @@ -372,6 +373,26 @@ public class ParsingEngine { if( !invalidPairs.isEmpty() ) throw new ArgumentsAreMutuallyExclusiveException( invalidPairs ); } + + // Find sets of options that have other required arguments + if( !skipValidationOf.contains(ValidationType.OtherArgumentRequired)) { + Collection missingRequiredPairs = new ArrayList(); + for( ArgumentMatch argumentMatch: argumentMatches.findSuccessfulMatches()) { + if(argumentMatch.definition.otherArgumentRequired != null) { + boolean otherRequiredArgumentSeen = false; + + for(ArgumentMatch otherRequiredMatch: argumentMatches.findSuccessfulMatches()) { + if(argumentMatch.definition.otherArgumentRequired.equals(otherRequiredMatch.label)) { + otherRequiredArgumentSeen = true; + } + } + if(!otherRequiredArgumentSeen) { + missingRequiredPairs.add(argumentMatch); + throw new OtherRequiredArgumentMissingException(missingRequiredPairs); + } + } + } + } } /** @@ -814,6 +835,19 @@ class ArgumentsAreMutuallyExclusiveException extends ArgumentException { } +class OtherRequiredArgumentMissingException extends ArgumentException { + public OtherRequiredArgumentMissingException( Collection arguments ) { + super( formatArguments(arguments) ); + } + + private static String formatArguments( Collection arguments ) { + StringBuilder sb = new StringBuilder(); + for( ArgumentMatch argument: arguments ) + sb.append( String.format("%nArguments '%s' and '%s' are required to go together.", argument.definition.fullName, argument.definition.otherArgumentRequired ) ); + return sb.toString(); + } +} + /** * An exception for when an argument doesn't match an of the enumerated options for that var type diff --git a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/commandline/ParsingEngineUnitTest.java b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/commandline/ParsingEngineUnitTest.java index 7528eb46f..4e0db008c 100644 --- a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/commandline/ParsingEngineUnitTest.java +++ b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/commandline/ParsingEngineUnitTest.java @@ -86,7 +86,7 @@ public class ParsingEngineUnitTest extends BaseTest { MultiCharShortNameArgProvider argProvider = new MultiCharShortNameArgProvider(); parsingEngine.loadArgumentsIntoObject( argProvider ); - Assert.assertEquals(argProvider.outputFile,"out.txt","Argument is not correctly initialized"); + Assert.assertEquals(argProvider.outputFile, "out.txt", "Argument is not correctly initialized"); } @@ -114,11 +114,11 @@ public class ParsingEngineUnitTest extends BaseTest { final String[] commandLine = new String[] {" --input_file ", "na12878.bam"}; parsingEngine.addArgumentSource( InputFileArgProvider.class ); - parsingEngine.parse( commandLine ); + parsingEngine.parse(commandLine); parsingEngine.validate(); InputFileArgProvider argProvider = new InputFileArgProvider(); - parsingEngine.loadArgumentsIntoObject( argProvider ); + parsingEngine.loadArgumentsIntoObject(argProvider); Assert.assertEquals(argProvider.inputFile,"na12878.bam","Argument is not correctly initialized"); } @@ -167,7 +167,7 @@ public class ParsingEngineUnitTest extends BaseTest { AllLociArgProvider argProvider = new AllLociArgProvider(); parsingEngine.loadArgumentsIntoObject( argProvider ); - Assert.assertTrue(argProvider.allLoci,"Argument is not correctly initialized"); + Assert.assertTrue(argProvider.allLoci, "Argument is not correctly initialized"); } private class AllLociArgProvider { @@ -214,12 +214,12 @@ public class ParsingEngineUnitTest extends BaseTest { public void enumMixedCaseTest() { final String[] commandLine = new String[] { "--test_enum", "oNe" }; - parsingEngine.addArgumentSource( EnumArgProvider.class ); - parsingEngine.parse( commandLine ); + parsingEngine.addArgumentSource(EnumArgProvider.class); + parsingEngine.parse(commandLine); parsingEngine.validate(); EnumArgProvider argProvider = new EnumArgProvider(); - parsingEngine.loadArgumentsIntoObject( argProvider ); + parsingEngine.loadArgumentsIntoObject(argProvider); Assert.assertEquals(argProvider.testEnum, TestEnum.ONE, "Enum value is not correct"); } @@ -228,12 +228,12 @@ public class ParsingEngineUnitTest extends BaseTest { public void enumDefaultTest() { final String[] commandLine = new String[] {}; - parsingEngine.addArgumentSource( EnumArgProvider.class ); - parsingEngine.parse( commandLine ); + parsingEngine.addArgumentSource(EnumArgProvider.class); + parsingEngine.parse(commandLine); parsingEngine.validate(); EnumArgProvider argProvider = new EnumArgProvider(); - parsingEngine.loadArgumentsIntoObject( argProvider ); + parsingEngine.loadArgumentsIntoObject(argProvider); Assert.assertEquals(argProvider.testEnum, TestEnum.THREE, "Enum value is not correct"); } @@ -300,7 +300,7 @@ public class ParsingEngineUnitTest extends BaseTest { final String[] commandLine = new String[0]; parsingEngine.addArgumentSource( RequiredArgProvider.class ); - parsingEngine.parse( commandLine ); + parsingEngine.parse(commandLine); parsingEngine.validate(); } @@ -345,10 +345,10 @@ public class ParsingEngineUnitTest extends BaseTest { parsingEngine.addArgumentSource( RequiredArgProvider.class ); parsingEngine.parse( commandLine ); - parsingEngine.validate( EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument) ); + parsingEngine.validate(EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument)); RequiredArgProvider argProvider = new RequiredArgProvider(); - parsingEngine.loadArgumentsIntoObject(argProvider ); + parsingEngine.loadArgumentsIntoObject(argProvider); Assert.assertNull(argProvider.value, "Value should have remain unset"); } @@ -396,7 +396,7 @@ public class ParsingEngineUnitTest extends BaseTest { @Test(expectedExceptions= ReviewedGATKException.class) public void duplicateShortNameTest() { - parsingEngine.addArgumentSource( DuplicateShortNameProvider.class ); + parsingEngine.addArgumentSource(DuplicateShortNameProvider.class); } @@ -444,7 +444,7 @@ public class ParsingEngineUnitTest extends BaseTest { final String[] commandLine = new String[] {"--value","1","--value","2","--value","3"}; parsingEngine.addArgumentSource( RequiredArgProvider.class ); - parsingEngine.parse( commandLine ); + parsingEngine.parse(commandLine); parsingEngine.validate(); } @@ -491,7 +491,7 @@ public class ParsingEngineUnitTest extends BaseTest { @Test public void correctDefaultArgNameTest() { - parsingEngine.addArgumentSource( CamelCaseArgProvider.class ); + parsingEngine.addArgumentSource(CamelCaseArgProvider.class); DefinitionMatcher matcher = ArgumentDefinitions.FullNameDefinitionMatcher; ArgumentDefinition definition = parsingEngine.argumentDefinitions.findArgumentDefinition("myarg", matcher); @@ -510,7 +510,7 @@ public class ParsingEngineUnitTest extends BaseTest { final String[] commandLine = new String[] {"--mybool", "true"}; parsingEngine.addArgumentSource( BooleanArgProvider.class ); - parsingEngine.parse( commandLine ); + parsingEngine.parse(commandLine); parsingEngine.validate(); } @@ -524,14 +524,14 @@ public class ParsingEngineUnitTest extends BaseTest { public void validParseForAnalysisTypeTest() { final String[] commandLine = new String[] {"--analysis_type", "Pileup" }; - parsingEngine.addArgumentSource( AnalysisTypeArgProvider.class ); - parsingEngine.parse( commandLine ); - parsingEngine.validate( EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument) ); + parsingEngine.addArgumentSource(AnalysisTypeArgProvider.class); + parsingEngine.parse(commandLine); + parsingEngine.validate(EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument)); AnalysisTypeArgProvider argProvider = new AnalysisTypeArgProvider(); - parsingEngine.loadArgumentsIntoObject( argProvider ); + parsingEngine.loadArgumentsIntoObject(argProvider); - Assert.assertEquals(argProvider.Analysis_Name,"Pileup","Argument is not correctly initialized"); + Assert.assertEquals(argProvider.Analysis_Name, "Pileup", "Argument is not correctly initialized"); } private class AnalysisTypeArgProvider { @@ -578,6 +578,36 @@ public class ParsingEngineUnitTest extends BaseTest { Integer bar; } + @Test(expectedExceptions=OtherRequiredArgumentMissingException.class) + public void otherArgumentRequiredTestWithoutRequiredArguments() { + String[] commandLine = new String[] {"--foo","5"}; + + parsingEngine.addArgumentSource( OtherArgumentRequiredArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + } + + @Test + public void otherArgumentRequiredTestWithRequiredArguments() { + String[] commandLine = new String[] {"--foo","5", "--bar", "6"}; + + parsingEngine.addArgumentSource( OtherArgumentRequiredArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + } + + @SuppressWarnings("unused") + private class OtherArgumentRequiredArgProvider { + @Argument(doc="foo", otherArgumentRequired="bar") + Integer foo; + + @Argument(doc="bar",required=false) + Integer bar; + + @Argument(doc="OtherIrrelevantArgument", required = false) + Integer OtherIrrelevantArgument; + } + @Test(expectedExceptions=InvalidArgumentValueException.class) public void argumentValidationTest() { // Passing only foo should work fine... From bf4b1a5421f69e1b6239ea4e6d2c90a3862862b0 Mon Sep 17 00:00:00 2001 From: Samuel Lee Date: Tue, 12 Apr 2016 17:35:26 -0400 Subject: [PATCH 66/82] Changed calls for GQ=0 from 0/0 to ./. for HaplotypeCaller in normal mode. --- .../HaplotypeCallerGenotypingEngine.java | 29 +++++++++++++++++-- ...lexAndSymbolicVariantsIntegrationTest.java | 2 +- .../HaplotypeCallerGVCFIntegrationTest.java | 6 ++-- .../HaplotypeCallerIntegrationTest.java | 10 +++++++ 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java index 4a3f12b44..90fe73575 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java @@ -238,8 +238,6 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine clearedGTs = new ArrayList<>(oldGTs.size()); + for ( final Genotype oldGT : oldGTs ) { + // set GT to no-call when GQ is 0 + if (oldGT.hasGQ() && oldGT.getGQ() == 0) { + final int ploidy = oldGT.getPloidy(); + final List noCallAlleles = GATKVariantContextUtils.noCallAlleles(ploidy); + final Genotype noCallGT = new GenotypeBuilder().alleles(noCallAlleles).make(); + clearedGTs.add(noCallGT); + } else { + clearedGTs.add(oldGT); + } + } + return new VariantContextBuilder(VC).genotypes(clearedGTs).make(); + } } \ No newline at end of file diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 0a62bcdcb..42649af05 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -72,7 +72,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "32ce23b3830f5f2c693161b40de8b15e"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "88255eda0e29e4a6e128ddb7177a03ab"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index 4ff188851..2105366c0 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -126,7 +126,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals; // this functionality can be adapted to provide input data for whatever you might want in your data - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "0c87e26fdd7ab5629eb33f36833e3607"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "8bf132d73cf6b0851ae73c6799f19ba9"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "90b25f3050435c9e67aa0ee325c24167"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "5f329540dc5c4556ab029d0e2cfcabcb"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "6ad7855dbf6dda2060aa93a3ee010b3e"}); @@ -144,10 +144,10 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals; // this functionality can be adapted to provide input data for whatever you might want in your data - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "0820ae1d19ba0a2da25737ded8e2c96f"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "6662cfc41393257dfd6c39f1af1e3843"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "70ee4e60d9f86b63aaab09075a71ddd3"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "700d79df3b0b481444e81471204e242e"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "09d1ae38586465b98dea0a0e432a7146"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "af0fe243e3b96e59097187cd16ba1597"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "228e1d2ec2e729a5f79c37f3f2557708"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "2fc7020457dde4439b4133c098d9ab9b"}); diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 19d4a675b..168dde631 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -490,5 +490,15 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testHBaseCountsBySample() throws IOException{ HCTest(NA12878_BAM, " -L 20:10001000-10010000 -A BaseCountsBySample", "f5ad4e03c0faaa806ee6ae536af8a479"); } + + @Test + public void testSetZeroGQsToNoCall() throws IOException{ + final File testBAM = new File(privateTestDir + "set.zero.GQs.no.call.bam"); + final String md5 = "b90da12d97fce42f5127bcb6cad07b09"; + final String base = String.format("-T HaplotypeCaller -R %s -I %s -L 8:17312375-17312975 ", REF, testBAM) + + " --no_cmdline_in_header -o %s"; + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList(md5)); + executeTest("testSetZeroGQsToNoCall", spec); + } } From f5456a3761d198c03edf570400c2f47e4dff1909 Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Wed, 18 May 2016 00:25:13 -0400 Subject: [PATCH 67/82] Fixed M2 max alt alleles threshold evaluation error Also clarified some argument docs --- .../cancer/m2/M2ArgumentCollection.java | 23 +++++++++++-------- .../gatk/tools/walkers/cancer/m2/MuTect2.java | 2 +- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/M2ArgumentCollection.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/M2ArgumentCollection.java index e3e9e5d31..6aca6a35f 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/M2ArgumentCollection.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/M2ArgumentCollection.java @@ -62,30 +62,32 @@ public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection /** * Artifact detection mode is used to prepare a panel of normals. This maintains the specified tumor LOD threshold, - * but disables the remaining pragmatic filters. See M2 usage examples for more information. + * but disables the remaining pragmatic filters. See usage examples above for more information. */ @Advanced @Argument(fullName = "artifact_detection_mode", required = false, doc="Enable artifact detection for creating panels of normals") public boolean ARTIFACT_DETECTION_MODE = false; /** - * This is the tumor LOD threshold to output the variant in the VCF, although it may be filtered + * This is the LOD threshold that a variant must pass in the tumor to be emitted to the VCF. Note that the variant may pass this threshold yet still be annotated as FILTERed based on other criteria. */ @Argument(fullName = "initial_tumor_lod", required = false, doc = "Initial LOD threshold for calling tumor variant") public double INITIAL_TUMOR_LOD_THRESHOLD = 4.0; - + /** + * This is the LOD threshold corresponding to the minimum amount of reference evidence in the normal for a variant to be considered somatic and emitted in the VCF + */ @Argument(fullName = "initial_normal_lod", required = false, doc = "Initial LOD threshold for calling normal variant") public double INITIAL_NORMAL_LOD_THRESHOLD = 0.5; /** - * Only variants with tumor LODs exceeding this thresholds can pass filtration + * Only variants with tumor LODs exceeding this threshold can pass filtering. */ @Argument(fullName = "tumor_lod", required = false, doc = "LOD threshold for calling tumor variant") public double TUMOR_LOD_THRESHOLD = 6.3; /** - * This is a measure of the minimum evidence to show that a variant observed in the tumor is not also present in its normal + * This is a measure of the minimum evidence to support that a variant observed in the tumor is not also present in the normal. */ @Argument(fullName = "normal_lod", required = false, doc = "LOD threshold for calling normal non-germline") public double NORMAL_LOD_THRESHOLD = 2.2; @@ -98,19 +100,22 @@ public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection public double NORMAL_DBSNP_LOD_THRESHOLD = 5.5; /** - * This argument is used for the M2 internal "alt_allele_in_normal" filter + * This argument is used for the internal "alt_allele_in_normal" filter. + * A variant will PASS the filter if the value tested is lower or equal to the threshold value. It will FAIL the filter if the value tested is greater than the max threshold value. **/ @Argument(fullName = "max_alt_alleles_in_normal_count", required = false, doc="Threshold for maximum alternate allele counts in normal") - public int MAX_ALT_ALLELES_IN_NORMAL_COUNT = 2; + public int MAX_ALT_ALLELES_IN_NORMAL_COUNT = 1; /** - * This argument is used for the M2 internal "alt_allele_in_normal" filter + * This argument is used for the internal "alt_allele_in_normal" filter. + * A variant will PASS the filter if the value tested is lower or equal to the threshold value. It will FAIL the filter if the value tested is greater than the max threshold value. */ @Argument(fullName = "max_alt_alleles_in_normal_qscore_sum", required = false, doc="Threshold for maximum alternate allele quality score sum in normal") public int MAX_ALT_ALLELES_IN_NORMAL_QSCORE_SUM = 20; /** - * This argument is used for the M2 internal "alt_allele_in_normal" filter + * This argument is used for the internal "alt_allele_in_normal" filter. + * A variant will PASS the filter if the value tested is lower or equal to the threshold value. It will FAIL the filter if the value tested is greater than the max threshold value. */ @Argument(fullName = "max_alt_allele_in_normal_fraction", required = false, doc="Threshold for maximum alternate allele fraction in normal") public double MAX_ALT_ALLELE_IN_NORMAL_FRACTION = 0.03; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java index 79299e87a..3c9bc8402 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java @@ -794,7 +794,7 @@ public class MuTect2 extends ActiveRegionWalker, Integer> i } } - if ( (normalAltCounts >= MTAC.MAX_ALT_ALLELES_IN_NORMAL_COUNT || normalF >= MTAC.MAX_ALT_ALLELE_IN_NORMAL_FRACTION ) && normalAltQualityScoreSum > MTAC.MAX_ALT_ALLELES_IN_NORMAL_QSCORE_SUM) { + if ( (normalAltCounts > MTAC.MAX_ALT_ALLELES_IN_NORMAL_COUNT || normalF > MTAC.MAX_ALT_ALLELE_IN_NORMAL_FRACTION ) && normalAltQualityScoreSum > MTAC.MAX_ALT_ALLELES_IN_NORMAL_QSCORE_SUM) { filters.add(GATKVCFConstants.ALT_ALLELE_IN_NORMAL_FILTER_NAME); } else { From 644076b1e13cfdd35465db267632857336c26fda Mon Sep 17 00:00:00 2001 From: Laura Gauthier Date: Wed, 4 May 2016 16:43:33 -0400 Subject: [PATCH 68/82] Add fix and test for finalizing MQ annotation at BP resolution for variant and ref samples Addresses issue #1356 --- .../tools/walkers/annotator/RMSAnnotation.java | 10 ++++++++-- .../GenotypeGVCFsIntegrationTest.java | 17 ++++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/RMSAnnotation.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/RMSAnnotation.java index 7196e7ee6..f6307f3f7 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/RMSAnnotation.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/RMSAnnotation.java @@ -212,8 +212,14 @@ public abstract class RMSAnnotation extends InfoFieldAnnotation implements Reduc numOfReads += Integer.parseInt(vc.getAttributeAsString(VCFConstants.DEPTH_KEY, "-1")); if(vc.hasGenotypes()) { for(Genotype gt : vc.getGenotypes()) { - if(gt.isHomRef() && gt.hasExtendedAttribute("MIN_DP")) //site-level DP contribution will come from MIN_DP for gVCF-called reference variants - numOfReads -= Integer.parseInt(gt.getExtendedAttribute("MIN_DP").toString()); + if(gt.isHomRef()) { + //site-level DP contribution will come from MIN_DP for gVCF-called reference variants or DP for BP resolution + if (gt.hasExtendedAttribute("MIN_DP")) + numOfReads -= Integer.parseInt(gt.getExtendedAttribute("MIN_DP").toString()); + else if (gt.hasDP()) + numOfReads -= gt.getDP(); + } + } } return numOfReads; diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java index 59d146167..b93f5f4a9 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java @@ -109,7 +109,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" + " -L 20:10,000,000-11,000,000", b37KGReference), 1, - Arrays.asList("d3fab0d45f0054b71aa1d031876a4bbb")); + Arrays.asList("c9edd4ca8c2801c4681322087d82e781")); executeTest("combineSingleSamplePipelineGVCF", spec); } @@ -158,7 +158,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" + " -L 20:10,000,000-20,000,000", b37KGReference), 1, - Arrays.asList("54a86ade63b84c87ff4e537e276987fc")); + Arrays.asList("f48114bc6348cdc9dc4f0960f5dcf5f8")); executeTest("combineSingleSamplePipelineGVCFHierarchical", spec); } @@ -170,10 +170,21 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" + " -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference), 1, - Arrays.asList("1a2728e7295a6ffca6c2ba5a01af3593")); + Arrays.asList("f88841deb5c2ce4f3bbea1e914a13898")); executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec); } + @Test(enabled = true) + public void combineBPresGVCFs() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(" -V " + privateTestDir + "NA12891.BPres.g.vcf" + + " -V " + privateTestDir + "NA12892.BPres.g.vcf" + + " -L 20:10433000-10436909", b37KGReference), + 1, + Arrays.asList("f342872f485e6978501facc78c354078")); + executeTest("combineBPresGVCFs", spec); + } + @Test(enabled = true) public void testJustOneSample() { WalkerTestSpec spec = new WalkerTestSpec( From e1fadae1394ccfb062dd083f3c94b87603014ef7 Mon Sep 17 00:00:00 2001 From: Steve Huang Date: Wed, 25 May 2016 17:23:26 -0400 Subject: [PATCH 69/82] Fix error in InfiniteRandomMatingPopulationModel.getLikelihoodsCalculator Same issue noticed in GATK4 [here](https://github.com/broadinstitute/gatk/issues/1856) --- .../walkers/genotyper/InfiniteRandomMatingPopulationModel.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/InfiniteRandomMatingPopulationModel.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/InfiniteRandomMatingPopulationModel.java index f1bdfa082..9de5e202a 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/InfiniteRandomMatingPopulationModel.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/InfiniteRandomMatingPopulationModel.java @@ -141,7 +141,7 @@ public class InfiniteRandomMatingPopulationModel implements GenotypingModel { } private GenotypeLikelihoodCalculator getLikelihoodsCalculator(final int samplePloidy, final int alleleCount) { - if (samplePloidy >= cacheAlleleCountCapacity) + if (samplePloidy >= cachePloidyCapacity) return GenotypeLikelihoodCalculators.getInstance(samplePloidy, alleleCount); else if (alleleCount >= cacheAlleleCountCapacity) return GenotypeLikelihoodCalculators.getInstance(samplePloidy, alleleCount); From 25fa25b61864ff6202c5dd7cc30caff54d880b35 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Thu, 26 May 2016 06:42:45 -0400 Subject: [PATCH 70/82] Added option to validate gvcf (for ValidateVariants) (#1379) * with option --gvcf CLP will now put extra checks that a gvcf must adhere to (existance of allele at every variant, and that the variants in total cover the entire requested intervals, or the whole genome if no intervals have been specified) * works on gvcf produced by HC when using either GVCF or BP_RESOLUTION mode * added positive and negative tests --- .../ValidateVariantsIntegrationTest.java | 78 +++++++++++- .../variantutils/ValidateVariants.java | 119 +++++++++++++----- .../broadinstitute/gatk/utils/GenomeLoc.java | 4 +- 3 files changed, 168 insertions(+), 33 deletions(-) diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariantsIntegrationTest.java index d9da8b821..8d4ccac28 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariantsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariantsIntegrationTest.java @@ -51,16 +51,19 @@ package org.broadinstitute.gatk.tools.walkers.variantutils; +import htsjdk.samtools.util.TestUtil; import org.apache.commons.io.FileUtils; import org.apache.log4j.Level; import org.broadinstitute.gatk.engine.walkers.WalkerTest; import org.broadinstitute.gatk.utils.exceptions.UserException; +import org.broadinstitute.gatk.utils.io.IOUtils; import org.testng.Assert; import org.testng.annotations.Test; import java.io.File; import java.io.IOException; import java.util.Arrays; +import java.util.Collections; public class ValidateVariantsIntegrationTest extends WalkerTest { @@ -279,7 +282,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("longAlleles-wrongLength.vcf", "ALL", "1", b37KGReference) + " --reference_window_stop 208 -U ALLOW_SEQ_DICT_INCOMPATIBILITY ", - 0, Arrays.asList(EMPTY_MD5)); + 0, Collections.singletonList(EMPTY_MD5)); executeTest("test to allow wrong header contig length, not checking dictionary incompatibility", spec); } @@ -288,7 +291,78 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("longAlleles-wrongLength.vcf", "ALL", "1", b37KGReference) + " --reference_window_stop 208 -U ", - 0, Arrays.asList(EMPTY_MD5)); + 0, Collections.singletonList(EMPTY_MD5)); executeTest("test to allow wrong header contig length, no compatibility checks", spec); } + + @Test + public void testGoodGvcf() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("NA12891.AS.chr20snippet.g.vcf", "ALL", "20:10433000-10437000", b37KGReference) + " -gvcf --reference_window_stop 208 -U ", + 0, Collections.singletonList("d41d8cd98f00b204e9800998ecf8427e")); + executeTest("tests correct gvcf", spec); + } + + @Test + public void testGoodGvcfExcludingAlleles() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("NA12891.AS.chr20snippet.g.vcf", "-ALLELES", "20:10433000-10437000", b37KGReference) + " -gvcf --reference_window_stop 208 -U ", + 0, Collections.singletonList("d41d8cd98f00b204e9800998ecf8427e")); + executeTest("tests correct gvcf", spec); + } + + + @Test(expectedExceptions = RuntimeException.class ) + public void testBadGvcfMissingNON_REF() { + + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("NA12891.AS.chr20snippet.BAD_MISSING_NON_REF.g.vcf", "-ALLELES", "20:10433000-10437000", b37KGReference) + " -gvcf --reference_window_stop 208 -U ", + 0, Collections.singletonList(EMPTY_MD5)); + executeTest("tests capture of missing NON_REF allele", spec); + } + + @Test(expectedExceptions = RuntimeException.class ) + public void testBadGvcfRegions() { + + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("diploid-gvcf.bad-IncompleteRegion.vcf", "-ALLELES", "20:10433000-10437000", b37KGReference) + " -gvcf --reference_window_stop 208 -U ", + 0, Collections.singletonList(EMPTY_MD5)); + executeTest("tests capture of non-complete region", spec); + } + + @Test(expectedExceptions = RuntimeException.class ) + public void testNonOverlappingRegions() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("NA12891.AS.chr20snippet_BAD_INCOMPLETE_REGION.g.vcf", "-ALLELES", "Y:4966254-4967190", b37KGReference) + " -gvcf --reference_window_stop 208 -U ", + 0, Collections.singletonList(EMPTY_MD5)); + executeTest("tests capture of non-complete region", spec); + } + + @Test + public void testNonOverlappingRegionsBP_RESOLUTION() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("gvcf.basepairResolution.vcf", "-ALLELES", "20:10000000-10010000", b37KGReference) + " -gvcf --reference_window_stop 208 -U ", + 0, Collections.singletonList(EMPTY_MD5)); + executeTest("tests capture of non-complete region, on BP_RESOLUTION gvcf", spec); + } + @Test + public void testCorrectCreationOfBlocks() throws IOException { + final File tempDir = IOUtils.tempDir("RefBlocks", "test", new File(privateTestDir)); + tempDir.mkdir(); + tempDir.deleteOnExit(); + final File output = File.createTempFile("RefBlocks", ".g.vcf", tempDir); + String baseIntervals = " 1:1-100 -L 5:1-200 "; + String intervalString = " -L " + baseIntervals; + final WalkerTestSpec hc = new WalkerTestSpec("-T HaplotypeCaller " + intervalString + " -I " + privateTestDir + "NA12878.4.snippet.bam " + + " -R /humgen/1kg/reference/human_g1k_v37_decoy.fasta -ERC GVCF -o " + output, Collections.singletonList(EMPTY_MD5)); + executeTest("running hc", hc); + + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(tempDir.getName() + "/" + output.getName(), "-ALLELES", baseIntervals, b37KGReference) + " -gvcf --reference_window_stop 208 -U ", + 0, Collections.singletonList(EMPTY_MD5)); + executeTest("testing the correct creation of reference blocks", spec); + + TestUtil.recursiveDelete(tempDir); + } + } diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariants.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariants.java index e34c78111..653bdaf50 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariants.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariants.java @@ -26,23 +26,26 @@ package org.broadinstitute.gatk.tools.walkers.variantutils; import htsjdk.tribble.TribbleException; -import org.broadinstitute.gatk.utils.commandline.Argument; -import org.broadinstitute.gatk.utils.commandline.ArgumentCollection; -import org.broadinstitute.gatk.engine.CommandLineGATK; -import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection; -import org.broadinstitute.gatk.engine.arguments.StandardVariantContextInputArgumentCollection; -import org.broadinstitute.gatk.utils.contexts.AlignmentContext; -import org.broadinstitute.gatk.utils.contexts.ReferenceContext; -import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; -import org.broadinstitute.gatk.engine.walkers.Reference; -import org.broadinstitute.gatk.engine.walkers.RodWalker; -import org.broadinstitute.gatk.engine.walkers.Window; -import org.broadinstitute.gatk.utils.exceptions.UserException; -import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; -import org.broadinstitute.gatk.utils.help.HelpConstants; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFConstants; +import org.broadinstitute.gatk.engine.CommandLineGATK; +import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection; +import org.broadinstitute.gatk.engine.arguments.StandardVariantContextInputArgumentCollection; +import org.broadinstitute.gatk.engine.walkers.Reference; +import org.broadinstitute.gatk.engine.walkers.RodWalker; +import org.broadinstitute.gatk.engine.walkers.Window; +import org.broadinstitute.gatk.utils.GenomeLoc; +import org.broadinstitute.gatk.utils.GenomeLocSortedSet; +import org.broadinstitute.gatk.utils.commandline.Argument; +import org.broadinstitute.gatk.utils.commandline.ArgumentCollection; +import org.broadinstitute.gatk.utils.contexts.AlignmentContext; +import org.broadinstitute.gatk.utils.contexts.ReferenceContext; +import org.broadinstitute.gatk.utils.exceptions.UserException; +import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; +import org.broadinstitute.gatk.utils.help.HelpConstants; +import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; +import org.broadinstitute.gatk.utils.variant.GATKVCFConstants; import java.io.File; import java.util.*; @@ -122,7 +125,7 @@ import java.util.*; */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VALIDATION, extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=0,stop=100)) -public class ValidateVariants extends RodWalker { +public class ValidateVariants extends RodWalker { // Log message for a reference allele that is too long protected static final String REFERENCE_ALLELE_TOO_LONG_MSG = "Reference allele is too long"; @@ -184,12 +187,21 @@ public class ValidateVariants extends RodWalker { /** * By default, even filtered records are validated. */ - @Argument(fullName = "doNotValidateFilteredRecords", shortName = "doNotValidateFilteredRecords", doc = "skip validation on filtered records", required = false) + @Argument(fullName = "doNotValidateFilteredRecords", shortName = "doNotValidateFilteredRecords", doc = "skip validation on filtered records", required = false, exclusiveOf = "VALIDATE_GVCF") protected Boolean DO_NOT_VALIDATE_FILTERED = false; @Argument(fullName = "warnOnErrors", shortName = "warnOnErrors", doc = "just emit warnings on errors instead of terminating the run at the first instance", required = false) protected Boolean WARN_ON_ERROR = false; + /** + * Validate this file as a gvcf. In particular, every variant must include a allele, and that + * every base in the territory under consideration is covered by a variant (or a reference block). + * If you specifed intervals (using -L or -XL) to restrict analysis to a subset of genomic regions, + * those intervals will need to be covered in a valid gvcf. + */ + @Argument(fullName = "validateGVCF", shortName = "gvcf", doc = "Validate this file as a GVCF", required = false, exclusiveOf = "DO_NOT_VALIDATE_FILTERED") + protected Boolean VALIDATE_GVCF = false; + private long numErrors = 0; private File file = null; @@ -208,29 +220,60 @@ public class ValidateVariants extends RodWalker { referenceWindowStop = getToolkit().getArguments().reference_window_stop; } - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( tracker == null ) - return 0; + return null; + + int lastVcEnd = -1; Collection VCs = tracker.getValues(variantCollection.variants, context.getLocation()); - for ( VariantContext vc : VCs ) - validate(vc, tracker, ref); + if (VALIDATE_GVCF && VCs.size() > 1) { + logger.error("GVCF validation can only be performed one file at a time. Validation results are invalid."); + } + for ( VariantContext vc : VCs ) { + validate(vc, tracker, ref, VALIDATE_GVCF); + lastVcEnd = vc.getEnd(); + } - return VCs.size(); + return GenomeLoc.setStop(ref.getLocus(), lastVcEnd); } - public Integer reduceInit() { return 0; } + @Override + public GenomeLocSortedSet reduce(GenomeLoc value, GenomeLocSortedSet sum) { + sum.add(value, true); + return sum; + } - public Integer reduce(Integer value, Integer sum) { return sum+value; } + @Override + public GenomeLocSortedSet reduceInit() { + return new GenomeLocSortedSet(getToolkit().getGenomeLocParser()); + } - public void onTraversalDone(Integer result) { - if ( numErrors == 0 ) - System.out.println("Successfully validated the input file. Checked " + result + " records with no failures."); + @Override + public void onTraversalDone(GenomeLocSortedSet result) { + if (VALIDATE_GVCF) { + final GenomeLocSortedSet uncoveredIntervals = getToolkit().getIntervals().subtractRegions(result); + if (uncoveredIntervals.coveredSize() > 0) { + final UserException e = new UserException.FailsStrictValidation(file, "A GVCF must cover the entire region. Found " + uncoveredIntervals.coveredSize() + + " loci with no VariantContext covering it. The first uncovered segment is:" + + uncoveredIntervals.iterator().next()); + + if (WARN_ON_ERROR) { + numErrors++; + logger.warn("***** " + e.getMessage() + " *****"); + } else { + throw e; + } + } + } + + if (numErrors == 0) + System.out.println("Successfully validated the input file. Checked " + result.size() + " records with no failures."); else System.out.println("Found " + numErrors + " records with failures."); } - private void validate(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref) { + private void validate(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, boolean gvcf) { if ( DO_NOT_VALIDATE_FILTERED && vc.isFiltered() ) return; @@ -241,7 +284,7 @@ public class ValidateVariants extends RodWalker { // reference length is greater than the reference window stop before and after expansion if ( refLength > 100 && refLength > referenceWindowStop ) { logger.info(String.format("%s (%d) at position %s:%d; skipping that record. Set --referenceWindowStop >= %d", - REFERENCE_ALLELE_TOO_LONG_MSG, refLength, vc.getChr(), vc.getStart(), refLength)); + REFERENCE_ALLELE_TOO_LONG_MSG, refLength, vc.getContig(), vc.getStart(), refLength)); return; } @@ -252,7 +295,7 @@ public class ValidateVariants extends RodWalker { // get the RS IDs Set rsIDs = null; if ( tracker.hasValues(dbsnp.dbsnp) ) { - rsIDs = new HashSet(); + rsIDs = new HashSet<>(); for ( VariantContext rsID : tracker.getValues(dbsnp.dbsnp, ref.getLocus()) ) rsIDs.addAll(Arrays.asList(rsID.getID().split(VCFConstants.ID_FIELD_SEPARATOR))); } @@ -260,6 +303,10 @@ public class ValidateVariants extends RodWalker { try { for (final ValidationType t : validationTypes) applyValidationType(vc, reportedRefAllele, observedRefAllele, rsIDs, t); + + if (gvcf) { + ValidateGVCFVariant(vc); + } } catch (TribbleException e) { if ( WARN_ON_ERROR ) { numErrors++; @@ -279,6 +326,13 @@ public class ValidateVariants extends RodWalker { * @return never {@code null} but perhaps an empty set. */ private Collection calculateValidationTypesToApply(final List excludeTypes) { + + if (VALIDATE_GVCF && !excludeTypes.contains(ValidationType.ALLELES)) { + // Note: in a future version allele validation might be OK for GVCFs, if that happens + // this will be more complicated. + logger.warn("GVCF format is currently incompatible with allele validation. Not validating Alleles."); + excludeTypes.add(ValidationType.ALLELES); + } if (excludeTypes.isEmpty()) return Collections.singleton(ValidationType.ALL); final Set excludeTypeSet = new LinkedHashSet<>(excludeTypes); @@ -295,6 +349,13 @@ public class ValidateVariants extends RodWalker { } } + private void ValidateGVCFVariant(final VariantContext vc) { + if (!vc.hasAllele(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)) { + throw new TribbleException.InternalCodecException(String.format("In a GVCF all records must contain a %s allele. Offending record: %s", + GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE_NAME, vc.toStringWithoutGenotypes())); + } + } + private void applyValidationType(VariantContext vc, Allele reportedRefAllele, Allele observedRefAllele, Set rsIDs, ValidationType t) { switch( t ) { case ALL: diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/GenomeLoc.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/GenomeLoc.java index ec70eff8c..c64d5798f 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/GenomeLoc.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/GenomeLoc.java @@ -540,7 +540,7 @@ public class GenomeLoc implements Comparable, Serializable, HasGenome * * @return a newly allocated GenomeLoc as loc but with start == start */ - public GenomeLoc setStart(GenomeLoc loc, int start) { + public static GenomeLoc setStart(GenomeLoc loc, int start) { return new GenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop()); } @@ -554,7 +554,7 @@ public class GenomeLoc implements Comparable, Serializable, HasGenome * * @return a newly allocated GenomeLoc as loc but with stop == stop */ - public GenomeLoc setStop(GenomeLoc loc, int stop) { + public static GenomeLoc setStop(GenomeLoc loc, int stop) { return new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop); } From 9d32dec9cd919fb6e4c56c5db982bcd753111225 Mon Sep 17 00:00:00 2001 From: Valentin Ruano Rubio Date: Mon, 25 Apr 2016 01:15:46 -0400 Subject: [PATCH 71/82] Fix for the sum(AD) > DP bug. Closes issue #1340 --- ...GenotypeCalculationArgumentCollection.java | 5 +- .../genotyper/GenotypingLikelihoods.java | 4 + .../walkers/genotyper/VariantCallContext.java | 2 +- .../genotyper/afcalc/AFCalculator.java | 7 + .../HaplotypeCallerGenotypingEngine.java | 246 ++++++++++++++---- .../HaplotypeCallerGVCFIntegrationTest.java | 2 +- .../HaplotypeCallerIntegrationTest.java | 4 +- .../broadinstitute/gatk/utils/MathUtils.java | 4 +- .../gatk/utils/genotyper/ReadLikelihoods.java | 231 ++++++++++------ 9 files changed, 361 insertions(+), 144 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/GenotypeCalculationArgumentCollection.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/GenotypeCalculationArgumentCollection.java index 3c9da90d2..d8c10145f 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/GenotypeCalculationArgumentCollection.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/GenotypeCalculationArgumentCollection.java @@ -61,6 +61,9 @@ import java.util.List; public class GenotypeCalculationArgumentCollection implements Cloneable{ + + public static final String MAX_ALTERNATE_ALLELES_SHORT_NAME = "maxAltAlleles"; + /** * Depending on the value of the --max_alternate_alleles argument, we may genotype only a fraction of the alleles being sent on for genotyping. * Using this argument instructs the genotyper to annotate (in the INFO field) the number of alternate alleles that were originally discovered at the site. @@ -122,7 +125,7 @@ public class GenotypeCalculationArgumentCollection implements Cloneable{ * As of GATK 2.2 the genotyper can handle a very large number of events, so the default maximum has been increased to 6. */ @Advanced - @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false) + @Argument(fullName = "max_alternate_alleles", shortName = MAX_ALTERNATE_ALLELES_SHORT_NAME, doc = "Maximum number of alternate alleles to genotype", required = false) public int MAX_ALTERNATE_ALLELES = 6; /** diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingLikelihoods.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingLikelihoods.java index c247862b5..c0e62c20a 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingLikelihoods.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingLikelihoods.java @@ -54,9 +54,13 @@ package org.broadinstitute.gatk.tools.walkers.genotyper; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.GenotypeLikelihoods; import org.broadinstitute.gatk.utils.genotyper.AlleleList; +import org.broadinstitute.gatk.utils.genotyper.AlleleListUtils; +import org.broadinstitute.gatk.utils.genotyper.IndexedAlleleList; import org.broadinstitute.gatk.utils.genotyper.SampleList; +import java.util.ArrayList; import java.util.List; +import java.util.Set; /** * Genotyping Likelihoods collection. diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/VariantCallContext.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/VariantCallContext.java index d7f36975e..252bec476 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/VariantCallContext.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/VariantCallContext.java @@ -69,7 +69,7 @@ public class VariantCallContext extends VariantContext { // Should this site be emitted? public boolean shouldEmit = true; - VariantCallContext(VariantContext vc, boolean confidentlyCalledP) { + public VariantCallContext(VariantContext vc, boolean confidentlyCalledP) { super(vc); this.confidentlyCalled = confidentlyCalledP; } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculator.java index fa91a2f2b..5680e5f15 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculator.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculator.java @@ -53,6 +53,9 @@ package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; +import org.apache.log4j.Logger; +import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingLikelihoods; +import org.broadinstitute.gatk.utils.SimpleTimer; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.GenotypeBuilder; import htsjdk.variant.variantcontext.GenotypesContext; @@ -128,6 +131,7 @@ public abstract class AFCalculator implements Cloneable { * @return result (for programming convenience) */ public AFCalculationResult getLog10PNonRef(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles, final double[] log10AlleleFrequencyPriors) { + if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null"); if ( vc.getNAlleles() == 1 ) throw new IllegalArgumentException("VariantContext has only a single reference allele, but getLog10PNonRef requires at least one at all " + vc); if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null"); @@ -135,6 +139,9 @@ public abstract class AFCalculator implements Cloneable { // reset the result, so we can store our new result there final StateTracker stateTracker = getStateTracker(true,maximumAlternativeAlleles); + //TODO All implementations of the reduce-scope seems to employ a bad criterion to + //TODO decide what alleles to keep. This must be changed eventually. + //TODO issue {@see https://github.com/broadinstitute/gsa-unstable/issues/1376} final VariantContext vcWorking = reduceScope(vc,defaultPloidy, maximumAlternativeAlleles); callTimer.start(); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java index 90fe73575..cad0697da 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java @@ -51,19 +51,19 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller; +import com.google.common.annotations.VisibleForTesting; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; +import htsjdk.samtools.util.StringUtil; import htsjdk.variant.variantcontext.*; +import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection; +import org.broadinstitute.gatk.utils.*; import org.broadinstitute.gatk.utils.contexts.ReferenceContext; -import org.broadinstitute.gatk.utils.genotyper.AlleleList; import org.broadinstitute.gatk.utils.genotyper.IndexedAlleleList; import org.broadinstitute.gatk.utils.genotyper.SampleList; import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.tools.walkers.genotyper.*; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider; -import org.broadinstitute.gatk.utils.GenomeLoc; -import org.broadinstitute.gatk.utils.GenomeLocParser; -import org.broadinstitute.gatk.utils.Utils; import org.broadinstitute.gatk.utils.collections.Pair; import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods; import org.broadinstitute.gatk.utils.haplotype.EventMap; @@ -82,6 +82,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine 0"}) @Ensures("result != null") // TODO - can this be refactored? this is hard to follow! - public CalledHaplotypes assignGenotypeLikelihoods( final List haplotypes, + CalledHaplotypes assignGenotypeLikelihoods( final List haplotypes, final ReadLikelihoods readLikelihoods, final Map> perSampleFilteredReadList, final byte[] ref, @@ -241,9 +242,6 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine mergeMap = new LinkedHashMap<>(); mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele for(int iii = 0; iii < eventsAtThisLoc.size(); iii++) { @@ -256,39 +254,25 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine it = allelesToDrop.iterator(); + final StringBuilder builder = new StringBuilder(); + for (int i = 0; i < MAX_DROPPED_ALTERNATIVE_ALLELES_TO_LOG; i++) { + builder.append(it.next().toString()).append(", "); + } + allelesToDropString = builder.append(it.next().toString()).append(" and ").append(allelesToDrop.size() - 20).append(" more").toString(); + } + logger.warn(String.format("location %s: too many alternative alleles found (%d) larger than the maximum requested with -%s (%d), the following will be dropped: %s.", location, + readAlleleLikelihoods.alleleCount() - 1, GenotypeCalculationArgumentCollection.MAX_ALTERNATE_ALLELES_SHORT_NAME, configuration.genotypeArgs.MAX_ALTERNATE_ALLELES, + allelesToDropString)); + readAlleleLikelihoods.dropAlleles(allelesToDrop); + } + + /** + * Returns the set of alleles that should be dropped in order to bring down the number + * of alternative alleles to the maximum allowed. + * + *

+ * The alleles that put forward for removal are those with the lowest estimated allele count. + *

+ *

+ * Allele counts are estimated herein as the weighted average count + * across samples and phased genotypes where the weight is the genotype likelihood-- we apply + * a uniform prior to all genotypes configurations. + *

+ *

+ * In case of a tie, unlikely for non trivial likelihoods, we keep the alleles with the lower index. + *

+ * + * @param genotypeLikelihoods target genotype likelihoods. + * @param maxAlternativeAlleles maximum number of alternative alleles allowed. + * @return never {@code null}. + */ + private Set excessAlternativeAlleles(final GenotypingLikelihoods genotypeLikelihoods, final int maxAlternativeAlleles) { + final int alleleCount = genotypeLikelihoods.alleleCount(); + final int excessAlternativeAlleleCount = Math.max(0, alleleCount - 1 - maxAlternativeAlleles); + if (excessAlternativeAlleleCount <= 0) { + return Collections.emptySet(); + } + + final double log10NumberOfAlleles = MathUtils.Log10Cache.get(alleleCount); // log10(Num of Alleles); e.g. log10(2) for diploids. + final double[] log10EstimatedACs = new double[alleleCount]; // where we store the AC estimates. + // Set allele counts to 0 (i.e. exp(-Inf)) at the start. + Arrays.fill(log10EstimatedACs, Double.NEGATIVE_INFINITY); + + for (int i = 0; i < genotypeLikelihoods.sampleCount(); i++) { + final GenotypeLikelihoodCalculator calculator = GenotypeLikelihoodCalculators.getInstance(genotypeLikelihoods.samplePloidy(i), alleleCount); + final int numberOfUnphasedGenotypes = calculator.genotypeCount(); + // unphased genotype log10 likelihoods + final double[] log10Likelihoods = genotypeLikelihoods.sampleLikelihoods(i).getAsVector(); + // total number of phased genotypes for all possible combinations of allele counts. + final double log10NumberOfPhasedGenotypes = calculator.ploidy() * log10NumberOfAlleles; + for (int j = 0; j < numberOfUnphasedGenotypes; j++) { + final GenotypeAlleleCounts alleleCounts = calculator.genotypeAlleleCountsAt(j); + // given the current unphased genotype, how many phased genotypes there are: + final double log10NumberOfPhasedGenotypesForThisUnphasedGenotype = alleleCounts.log10CombinationCount(); + final double log10GenotypeLikelihood = log10Likelihoods[j]; + for (int k = 0; k < alleleCounts.distinctAlleleCount(); k++) { + final int alleleIndex = alleleCounts.alleleIndexAt(k); + final int alleleCallCount = alleleCounts.alleleCountAt(k); + final double log10AlleleCount = MathUtils.Log10Cache.get(alleleCallCount); + final double log10Weight = log10GenotypeLikelihood + log10NumberOfPhasedGenotypesForThisUnphasedGenotype + - log10NumberOfPhasedGenotypes; + // update the allele AC adding the contribution of this unphased genotype at this sample. + log10EstimatedACs[alleleIndex] = MathUtils.log10sumLog10(log10EstimatedACs[alleleIndex], + log10Weight + log10AlleleCount); + } + } + } + + final PriorityQueue lessFrequentFirst = new PriorityQueue<>(alleleCount, new Comparator() { + @Override + public int compare(final Allele a1, final Allele a2) { + final int index1 = genotypeLikelihoods.alleleIndex(a1); + final int index2 = genotypeLikelihoods.alleleIndex(a2); + final double freq1 = log10EstimatedACs[index1]; + final double freq2 = log10EstimatedACs[index2]; + if (freq1 != freq2) { + return Double.compare(freq1, freq2); + } else { + return Integer.compare(index2, index1); + } + } + }); + + for (int i = 1; i < alleleCount; i++) { + lessFrequentFirst.add(genotypeLikelihoods.alleleAt(i)); + } + + final Set result = new HashSet<>(excessAlternativeAlleleCount); + for (int i = 0; i < excessAlternativeAlleleCount; i++) { + result.add(lessFrequentFirst.remove()); + } + return result; + } + /** * Tries to phase the individual alleles based on pairwise comparisons to the other alleles based on all called haplotypes * @@ -325,13 +451,14 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine> constructHaplotypeMapping(final List originalCalls, + @VisibleForTesting + static Map> constructHaplotypeMapping(final List originalCalls, final Set calledHaplotypes) { final Map> haplotypeMap = new HashMap<>(originalCalls.size()); for ( final VariantContext call : originalCalls ) { // don't try to phase if there is not exactly 1 alternate allele if ( ! isBiallelic(call) ) { - haplotypeMap.put(call, Collections.emptySet()); + haplotypeMap.put(call, Collections.emptySet()); continue; } @@ -362,10 +489,11 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine originalCalls, - final Map> haplotypeMap, - final int totalAvailableHaplotypes, - final Map> phaseSetMapping) { + @VisibleForTesting + static int constructPhaseSetMapping(final List originalCalls, + final Map> haplotypeMap, + final int totalAvailableHaplotypes, + final Map> phaseSetMapping) { final int numCalls = originalCalls.size(); int uniqueCounter = 0; @@ -457,9 +585,10 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine constructPhaseGroups(final List originalCalls, - final Map> phaseSetMapping, - final int indexTo) { + @VisibleForTesting + static List constructPhaseGroups(final List originalCalls, + final Map> phaseSetMapping, + final int indexTo) { final List phasedCalls = new ArrayList<>(originalCalls); // if we managed to find any phased groups, update the VariantContexts @@ -561,6 +690,10 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine(new HashSet<>(call.getAlleles()))); + } + // Skim the filtered map based on the location so that we do not add filtered read that are going to be removed // right after a few lines of code bellow. final Map> overlappingFilteredReads = overlappingFilteredReads(perSampleFilteredReadList, loc); @@ -687,15 +820,12 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine readLikelihoods, final VariantContext mergedVC, final List noCallAlleles ) { - final List vcAlleles = mergedVC.getAlleles(); - final AlleleList alleleList = readLikelihoods.alleleCount() == vcAlleles.size() ? readLikelihoods : new IndexedAlleleList<>(vcAlleles); - final GenotypingLikelihoods likelihoods = genotypingModel.calculateLikelihoods(alleleList,new GenotypingData<>(ploidyModel,readLikelihoods)); + private GenotypesContext calculateGLsForThisEvent(final ReadLikelihoods readLikelihoods, final List noCallAlleles) { + final GenotypingLikelihoods likelihoods = genotypingModel.calculateLikelihoods(readLikelihoods, new GenotypingData<>(ploidyModel, readLikelihoods)); final int sampleCount = samples.sampleCount(); final GenotypesContext result = GenotypesContext.create(sampleCount); for (int s = 0; s < sampleCount; s++) @@ -709,7 +839,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine haplotypes ) { + private static void cleanUpSymbolicUnassembledEvents(final List haplotypes) { final List haplotypesToRemove = new ArrayList<>(); for( final Haplotype h : haplotypes ) { for( final VariantContext vc : h.getEventMap().getVariantContexts() ) { @@ -742,9 +872,9 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine> eventMapper = new LinkedHashMap<>(eventsAtThisLoc.size()+1); final Event refEvent = new Event(null); - eventMapper.put(refEvent, new ArrayList()); + eventMapper.put(refEvent, new ArrayList<>()); for( final VariantContext vc : eventsAtThisLoc ) { - eventMapper.put(new Event(vc), new ArrayList()); + eventMapper.put(new Event(vc), new ArrayList<>()); } for( final Haplotype h : haplotypes ) { @@ -764,11 +894,12 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine generateVCsFromAlignment( final Haplotype haplotype, final byte[] ref, final GenomeLoc refLoc, final String sourceNameToAdd ) { + @VisibleForTesting + static Map generateVCsFromAlignment(final Haplotype haplotype, final byte[] ref, final GenomeLoc refLoc, final String sourceNameToAdd) { return new EventMap(haplotype, ref, refLoc, sourceNameToAdd); } - protected static boolean containsVCWithMatchingAlleles( final List list, final VariantContext vcToTest ) { + private static boolean containsVCWithMatchingAlleles( final List list, final VariantContext vcToTest ) { for( final VariantContext vc : list ) { if( vc.hasSameAllelesAs(vcToTest) ) { return true; @@ -780,7 +911,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine implements SampleList, AlleleList */ private int referenceAlleleIndex = -1; + /** + * Index of the non-ref allele if any, otherwise - 1. + */ + private int nonRefAlleleIndex = -1; + /** * Caches the read-list per sample list returned by {@link #sampleReads} */ @@ -138,6 +145,7 @@ public class ReadLikelihoods implements SampleList, AlleleList readListBySampleIndex = new List[sampleCount]; valuesBySampleIndex = new double[sampleCount][][]; referenceAlleleIndex = findReferenceAllele(alleles); + nonRefAlleleIndex = findNonRefAllele(alleles); readIndexBySampleIndex = new Object2IntMap[sampleCount]; @@ -219,6 +227,15 @@ public class ReadLikelihoods implements SampleList, AlleleList return -1; } + private int findNonRefAllele(final AlleleList alleles) { + final int alleleCount = alleles.alleleCount(); + for (int i = alleleCount - 1; i >= 0; i--) { + if (alleles.alleleAt(i).equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)) + return i; + } + return - 1; + } + /** * Returns the index of a sample within the likelihood collection. * @@ -419,16 +436,21 @@ public class ReadLikelihoods implements SampleList, AlleleList } - /** - * Search the best allele for a read. - * - * @param sampleIndex including sample index. - * @param readIndex target read index. - * - * @return never {@code null}, but with {@link BestAllele#allele allele} == {@code null} - * if non-could be found. - */ private BestAllele searchBestAllele(final int sampleIndex, final int readIndex, final boolean canBeReference) { + return searchBestAllele(sampleIndex, readIndex, canBeReference, alleles); + } + + /** + * Search the best allele for a read. + * + * @param sampleIndex including sample index. + * @param readIndex target read index. + * + * @return never {@code null}, but with {@link BestAllele#allele allele} == {@code null} + * if non-could be found. + */ + private BestAllele searchBestAllele(final int sampleIndex, final int readIndex, final boolean canBeReference, + final AlleleList allelesToConsider) { final int alleleCount = alleles.alleleCount(); if (alleleCount == 0 || (alleleCount == 1 && referenceAlleleIndex == 0 && !canBeReference)) return new BestAllele(sampleIndex,readIndex,-1,Double.NEGATIVE_INFINITY,Double.NEGATIVE_INFINITY); @@ -441,6 +463,10 @@ public class ReadLikelihoods implements SampleList, AlleleList for (int a = bestAlleleIndex + 1; a < alleleCount; a++) { if (!canBeReference && referenceAlleleIndex == a) continue; + if (nonRefAlleleIndex == a) + continue; + if (allelesToConsider.alleleIndex(alleles.alleleAt(a)) < 0) + continue; final double candidateLikelihood = sampleValues[a][readIndex]; if (candidateLikelihood > bestLikelihood) { bestAlleleIndex = a; @@ -501,6 +527,7 @@ public class ReadLikelihoods implements SampleList, AlleleList alleleList = null; int referenceIndex = this.referenceAlleleIndex; + int nonRefIndex = this.nonRefAlleleIndex; @SuppressWarnings("unchecked") final A[] newAlleles = (A[]) new Allele[newAlleleCount]; for (int a = 0; a < oldAlleleCount; a++) @@ -511,19 +538,20 @@ public class ReadLikelihoods implements SampleList, AlleleList if (referenceIndex != -1) throw new IllegalArgumentException("there cannot be more than one reference allele"); referenceIndex = newIndex; + } else if (allele.equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)) { + if (nonRefAlleleIndex != -1) + throw new IllegalArgumentException(String.format("there cannot be more than one %s allele", GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE_NAME)); + nonRefIndex = newIndex; } newAlleles[newIndex++] = allele; } alleles = new IndexedAlleleList<>(newAlleles); - if (referenceIndex != -1) - referenceAlleleIndex = referenceIndex; - final int sampleCount = samples.sampleCount(); for (int s = 0; s < sampleCount; s++) { final int sampleReadCount = readsBySampleIndex[s].length; - final double[][] newValuesBySampleIndex = Arrays.copyOf(valuesBySampleIndex[s],newAlleleCount); + final double[][] newValuesBySampleIndex = Arrays.copyOf(valuesBySampleIndex[s], newAlleleCount); for (int a = oldAlleleCount; a < newAlleleCount; a++) { newValuesBySampleIndex[a] = new double[sampleReadCount]; if (defaultLikelihood != 0.0) @@ -531,8 +559,86 @@ public class ReadLikelihoods implements SampleList, AlleleList } valuesBySampleIndex[s] = newValuesBySampleIndex; } + + if (referenceIndex != -1) + referenceAlleleIndex = referenceIndex; + if (nonRefIndex != -1) { + nonRefAlleleIndex = nonRefIndex; + updateNonRefAlleleLikelihoods(); + } } + /** + * Modify this likelihood collection dropping some of its alleles. + * @param allelesToDrop set of alleles to be dropped. + * @throws IllegalArgumentException if {@code allelesToDrop} is {@code null} or contain elements that are + * not alleles in this collection. + */ + public void dropAlleles(final Set allelesToDrop) { + if (allelesToDrop == null) { + throw new IllegalArgumentException("the input allele to drop set cannot be null"); + } + if (allelesToDrop.isEmpty()) { + return; + } + final boolean[] indicesToDrop = new boolean[alleles.alleleCount()]; + for (final A allele : allelesToDrop) { + final int index = alleles.alleleIndex(allele); + if (index < 0) { + throw new IllegalArgumentException("unknown allele: " + allele); + } + indicesToDrop[index] = true; + } + + @SuppressWarnings("unchecked") + final A[] newAlleles = (A[]) new Allele[alleles.alleleCount() - allelesToDrop.size()]; + final int[] newAlleleIndices = new int[newAlleles.length]; + int nextIndex = 0; + for (int i = 0; i < alleles.alleleCount(); i++) { + if (indicesToDrop[i]) { + continue; + } + newAlleleIndices[nextIndex] = i; + newAlleles[nextIndex++] = alleles.alleleAt(i); + } + for (int i = 0; i < samples.sampleCount(); i++) { + final double[][] oldSampleValues = valuesBySampleIndex[i]; + final double[][] newSampleValues = new double[newAlleles.length][]; + for (int j = 0; j < newAlleles.length; j++) { + newSampleValues[j] = oldSampleValues[newAlleleIndices[j]]; + } + valuesBySampleIndex[i] = newSampleValues; + } + alleleList = Collections.unmodifiableList(Arrays.asList(newAlleles)); + alleles = new IndexedAlleleList<>(alleleList); + if (nonRefAlleleIndex >= 0) { + nonRefAlleleIndex = findNonRefAllele(alleles); + updateNonRefAlleleLikelihoods(); + } + if (referenceAlleleIndex >= 0) + referenceAlleleIndex = findReferenceAllele(alleles); + } + + /** + * Drop all the alleles not present in the subset passed as a parameter. + * @param subset the alleles to retain. + * @throws IllegalArgumentException if {@code alleles} is {@code null} or contain alleles unknown to this likelihood + * collection. + */ + public void retainAlleles(final Set subset) { + if (alleles == null) { + throw new IllegalArgumentException("the retain subset must not be null"); + } else if (!alleles().containsAll(subset) || subset.size() > alleleCount()) { + throw new IllegalArgumentException("some of the alleles to retain are not present in the read-likelihoods collection"); + } else if (subset.isEmpty() || (subset.size() == 1 && subset.contains(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE))) { + throw new IllegalArgumentException("there must be at least one allele to retain"); + } else { + final Set allelesToDrop = new HashSet<>(alleles()); + allelesToDrop.removeAll(subset); + dropAlleles(allelesToDrop); + } + } + /** * Likelihood matrix between a set of alleles and reads. * @param the allele-type. @@ -543,13 +649,13 @@ public class ReadLikelihoods implements SampleList, AlleleList * List of reads in the matrix sorted by their index therein. * @return never {@code null}. */ - public List reads(); + List reads(); /** * List of alleles in the matrix sorted by their index in the collection. * @return never {@code null}. */ - public List alleles(); + List alleles(); /** * Set the likelihood of a read given an allele through their indices. @@ -561,7 +667,7 @@ public class ReadLikelihoods implements SampleList, AlleleList * @throws IllegalArgumentException if {@code alleleIndex} or {@code readIndex} * are not valid allele and read indices respectively. */ - public void set(final int alleleIndex, final int readIndex, final double value); + void set(final int alleleIndex, final int readIndex, final double value); /** * Returns the likelihood of a read given a haplotype. @@ -575,7 +681,7 @@ public class ReadLikelihoods implements SampleList, AlleleList * @return the requested likelihood, whatever value was provided using {@link #set(int,int,double) set} * or 0.0 if none was set. */ - public double get(final int alleleIndex, final int readIndex); + double get(final int alleleIndex, final int readIndex); /** * Queries the index of an allele in the matrix. @@ -586,7 +692,7 @@ public class ReadLikelihoods implements SampleList, AlleleList * @return -1 if such allele does not exist, otherwise its index which 0 or greater. */ @SuppressWarnings("unused") - public int alleleIndex(final A allele); + int alleleIndex(final A allele); /** * Queries the index of a read in the matrix. @@ -599,19 +705,19 @@ public class ReadLikelihoods implements SampleList, AlleleList * which is 0 or greater. */ @SuppressWarnings("unused") - public int readIndex(final GATKSAMRecord read); + int readIndex(final GATKSAMRecord read); /** * Number of allele in the matrix. * @return never negative. */ - public int alleleCount(); + int alleleCount(); /** * Number of reads in the matrix. * @return never negative. */ - public int readCount(); + int readCount(); /** * Returns the allele given its index. @@ -621,7 +727,7 @@ public class ReadLikelihoods implements SampleList, AlleleList * @throws IllegalArgumentException if {@code alleleIndex} is not a valid allele index. * @return never {@code null}. */ - public A alleleAt(final int alleleIndex); + A alleleAt(final int alleleIndex); /** * Returns the allele given its index. @@ -631,7 +737,7 @@ public class ReadLikelihoods implements SampleList, AlleleList * @throws IllegalArgumentException if {@code readIndex} is not a valid read index. * @return never {@code null}. */ - public GATKSAMRecord readAt(final int readIndex); + GATKSAMRecord readAt(final int readIndex); /** @@ -640,7 +746,7 @@ public class ReadLikelihoods implements SampleList, AlleleList * @param dest the destination array. * @param offset the copy offset within the destination allele */ - public void copyAlleleLikelihoods(final int alleleIndex, final double[] dest, final int offset); + void copyAlleleLikelihoods(final int alleleIndex, final double[] dest, final int offset); } /** @@ -905,7 +1011,6 @@ public class ReadLikelihoods implements SampleList, AlleleList * * @throws IllegalArgumentException the location cannot be {@code null} nor unmapped. */ - @SuppressWarnings("unused") public void filterToOnlyOverlappingUnclippedReads(final GenomeLoc location) { if (location == null) throw new IllegalArgumentException("the location cannot be null"); @@ -921,7 +1026,6 @@ public class ReadLikelihoods implements SampleList, AlleleList final int alleleCount = alleles.alleleCount(); final IntArrayList removeIndices = new IntArrayList(10); for (int s = 0; s < sampleCount; s++) { - int readRemoveCount = 0; final GATKSAMRecord[] sampleReads = readsBySampleIndex[s]; final int sampleReadCount = sampleReads.length; for (int r = 0; r < sampleReadCount; r++) @@ -932,28 +1036,6 @@ public class ReadLikelihoods implements SampleList, AlleleList } } - // Compare the read coordinates to the location of interest. - private boolean readOverlapsLocation(final String contig, final int locStart, - final int locEnd, final GATKSAMRecord read) { - final boolean overlaps; - - if (read.getReadUnmappedFlag()) - overlaps = false; - else if (!read.getReferenceName().equals(contig)) - overlaps = false; - else { - int alnStart = read.getAlignmentStart(); - int alnStop = read.getAlignmentEnd(); - if (alnStart > alnStop) { // Paranoia? based on GLP.createGenomeLoc(Read) this can happen?. - final int end = alnStart; - alnStart = alnStop; - alnStop = end; - } - overlaps = !(alnStop < locStart || alnStart > locEnd); - } - return overlaps; - } - /** * Removes those read that the best possible likelihood given any allele is just too low. * @@ -991,7 +1073,7 @@ public class ReadLikelihoods implements SampleList, AlleleList } // Check whether the read is poorly modelled. - protected boolean readIsPoorlyModelled(final int sampleIndex, final int readIndex, final GATKSAMRecord read, final double maxErrorRatePerBase) { + private boolean readIsPoorlyModelled(final int sampleIndex, final int readIndex, final GATKSAMRecord read, final double maxErrorRatePerBase) { final double maxErrorsForRead = Math.min(2.0, Math.ceil(read.getReadLength() * maxErrorRatePerBase)); final double log10QualPerBase = -4.0; final double log10MaxLikelihoodForTrueAllele = maxErrorsForRead * log10QualPerBase; @@ -1089,37 +1171,25 @@ public class ReadLikelihoods implements SampleList, AlleleList throw new IllegalArgumentException("non-ref allele cannot be null"); if (!nonRefAllele.equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)) throw new IllegalArgumentException("the non-ref allele is not valid"); - // Already present? - if (alleles.alleleIndex(nonRefAllele) != -1) - return; - - final int oldAlleleCount = alleles.alleleCount(); - final int newAlleleCount = oldAlleleCount + 1; - @SuppressWarnings("unchecked") - final A[] newAlleles = (A[]) new Allele[newAlleleCount]; - for (int a = 0; a < oldAlleleCount; a++) - newAlleles[a] = alleles.alleleAt(a); - newAlleles[oldAlleleCount] = nonRefAllele; - alleles = new IndexedAlleleList<>(newAlleles); - alleleList = null; // remove the cached alleleList. - - final int sampleCount = samples.sampleCount(); - for (int s = 0; s < sampleCount; s++) - addNonReferenceAlleleLikelihoodsPerSample(oldAlleleCount, newAlleleCount, s); + addMissingAlleles(Collections.singleton(nonRefAllele), Double.NEGATIVE_INFINITY); + updateNonRefAlleleLikelihoods(); } - // Updates per-sample structures according to the addition of the NON_REF allele. - private void addNonReferenceAlleleLikelihoodsPerSample(final int alleleCount, final int newAlleleCount, final int sampleIndex) { - final double[][] sampleValues = valuesBySampleIndex[sampleIndex] = Arrays.copyOf(valuesBySampleIndex[sampleIndex], newAlleleCount); - final int sampleReadCount = readsBySampleIndex[sampleIndex].length; + public void updateNonRefAlleleLikelihoods() { + updateNonRefAlleleLikelihoods(alleles); + } - final double[] nonRefAlleleLikelihoods = sampleValues[alleleCount] = new double [sampleReadCount]; - Arrays.fill(nonRefAlleleLikelihoods,Double.NEGATIVE_INFINITY); - for (int r = 0; r < sampleReadCount; r++) { - final BestAllele bestAllele = searchBestAllele(sampleIndex,r,true); - final double secondBestLikelihood = Double.isInfinite(bestAllele.confidence) ? bestAllele.likelihood - : bestAllele.likelihood - bestAllele.confidence; - nonRefAlleleLikelihoods[r] = secondBestLikelihood; + public void updateNonRefAlleleLikelihoods(final AlleleList allelesToConsider) { + if (nonRefAlleleIndex < 0) + return; + for (int s = 0; s < samples.sampleCount(); s++) { + final double[][] sampleValues = valuesBySampleIndex[s]; + for (int r = 0; r < sampleValues[0].length; r++) { + final BestAllele bestAllele = searchBestAllele(s, r, true, allelesToConsider); + final double secondBestLikelihood = Double.isInfinite(bestAllele.confidence) ? bestAllele.likelihood + : bestAllele.likelihood - bestAllele.confidence; + sampleValues[nonRefAlleleIndex][r] = secondBestLikelihood; + } } } @@ -1170,9 +1240,9 @@ public class ReadLikelihoods implements SampleList, AlleleList * * @return never {@code null}. */ - public static ReadLikelihoods fromPerAlleleReadLikelihoodsMap(final AlleleList alleleList, final Map map) { + @VisibleForTesting + static ReadLikelihoods fromPerAlleleReadLikelihoodsMap(final AlleleList alleleList, final Map map) { - //TODO add test code for this method. // First we need to create the read-likelihood collection with all required alleles, samples and reads. final SampleList sampleList = new IndexedSampleList(map.keySet()); final int alleleCount = alleleList.alleleCount(); @@ -1228,7 +1298,8 @@ public class ReadLikelihoods implements SampleList, AlleleList * @param sampleIndex the target sample. * @return never {@code null}, perhaps empty. */ - public Map> readsByBestAlleleMap(final int sampleIndex) { + @VisibleForTesting + Map> readsByBestAlleleMap(final int sampleIndex) { checkSampleIndex(sampleIndex); final int alleleCount = alleles.alleleCount(); final int sampleReadCount = readsBySampleIndex[sampleIndex].length; From 3b7e0c56bfa96c8b88cc785df3b2f435a614ea39 Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Wed, 18 May 2016 00:03:36 -0400 Subject: [PATCH 72/82] Moved post-IR MQ reverter filter to public --- ...OriginalMQAfterIndelRealignmentFilter.java | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignOriginalMQAfterIndelRealignmentFilter.java diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignOriginalMQAfterIndelRealignmentFilter.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignOriginalMQAfterIndelRealignmentFilter.java new file mode 100644 index 000000000..f979f0ad8 --- /dev/null +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignOriginalMQAfterIndelRealignmentFilter.java @@ -0,0 +1,68 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.filters; + +import htsjdk.samtools.SAMRecord; +import org.broadinstitute.gatk.utils.commandline.Argument; + +/** + * Revert the MQ of reads that were modified by IndelRealigner + * + *

IndelRealigner systematically adds +10 to the MQ of the reads it realigns. In some cases, that brings the resulting MQ to a value higher than MQ 60, which is the normal cap for MQ values. Since many downstream tools assume that MQ is <= 60, this may potentially cause problems.

+ * + *

This read filter makes it possible to revert the MQ values of all the reads touched by IndelRealigner. It works by subtracting 10 from the MQ of all reads that have an "OC" tag, which stands for "original CIGAR" and is added by IndelRealigner to any read that it realigns.

+ * + *

Usage example

+ * + *

Enable the filter

+ *
+ *     java -jar GenomeAnalysisTk.jar \
+ *         -T ToolName \
+ *         -R reference.fasta \
+ *         -I input.bam \
+ *         -o output.file \
+ *         -rf ReassignOriginalMQAfterIndelRealignmentFilter
+ * 
+ * + * + * + *

Caveat

+ * + *

There is currently no way to tell programmatically that a file has already been processed with this filter, so you should check the header manually before running this tool. Running it multiple times on the same BAM file would levy an unjustified penalty on realigned reads.

+ * + * @author ami + * @since 1/30/15. + */ +public class ReassignOriginalMQAfterIndelRealignmentFilter extends ReadFilter { + + public boolean filterOut(SAMRecord rec) { + final String ORIGINAL_CIGAR_TAG = "OC"; + + if (rec.getAttribute(ORIGINAL_CIGAR_TAG) != null) + rec.setMappingQuality(rec.getMappingQuality() - 10); + return false; + } +} From c4a06ad20a1adf5cd5bdf2211cbae07e41a79ba3 Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Fri, 27 May 2016 11:45:28 -0400 Subject: [PATCH 73/82] Move indel realignment to public --- .../tools/walkers/indels/LeftAlignIndels.java | 142 ------------- .../gatk/tools/walkers/indels/ReadBin.java | 128 ----------- .../ConstrainedMateFixingManagerUnitTest.java | 198 ------------------ .../indels/IndelRealignerIntegrationTest.java | 187 ----------------- .../indels/IndelRealignerLargeScaleTest.java | 108 ---------- .../indels/IndelRealignerUnitTest.java | 87 -------- .../tools/walkers/indels/ReadBinUnitTest.java | 117 ----------- ...RealignerTargetCreatorIntegrationTest.java | 159 -------------- .../RealignerTargetCreatorLargeScaleTest.java | 85 -------- .../indels/ConstrainedMateFixingManager.java | 62 ++---- .../tools/walkers/indels/IndelRealigner.java | 69 ++---- .../tools/walkers/indels/LeftAlignIndels.java | 116 ++++++++++ .../gatk/tools/walkers/indels/ReadBin.java | 102 +++++++++ .../indels/RealignerTargetCreator.java | 67 ++---- .../ConstrainedMateFixingManagerUnitTest.java | 172 +++++++++++++++ .../indels/IndelRealignerIntegrationTest.java | 161 ++++++++++++++ .../indels/IndelRealignerLargeScaleTest.java | 82 ++++++++ .../indels/IndelRealignerUnitTest.java | 61 ++++++ .../tools/walkers/indels/ReadBinUnitTest.java | 91 ++++++++ ...RealignerTargetCreatorIntegrationTest.java | 133 ++++++++++++ .../RealignerTargetCreatorLargeScaleTest.java | 59 ++++++ 21 files changed, 1038 insertions(+), 1348 deletions(-) delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/LeftAlignIndels.java delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/ReadBin.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/ConstrainedMateFixingManagerUnitTest.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerIntegrationTest.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerLargeScaleTest.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerUnitTest.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/ReadBinUnitTest.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreatorIntegrationTest.java delete mode 100644 protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreatorLargeScaleTest.java rename {protected/gatk-tools-protected => public/gatk-tools-public}/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/ConstrainedMateFixingManager.java (65%) rename {protected/gatk-tools-protected => public/gatk-tools-public}/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java (88%) create mode 100644 public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/LeftAlignIndels.java create mode 100644 public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/ReadBin.java rename {protected/gatk-tools-protected => public/gatk-tools-public}/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreator.java (65%) create mode 100644 public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/ConstrainedMateFixingManagerUnitTest.java create mode 100644 public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerIntegrationTest.java create mode 100644 public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerLargeScaleTest.java create mode 100644 public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerUnitTest.java create mode 100644 public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/ReadBinUnitTest.java create mode 100644 public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreatorIntegrationTest.java create mode 100644 public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreatorLargeScaleTest.java diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/LeftAlignIndels.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/LeftAlignIndels.java deleted file mode 100644 index 484529373..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/LeftAlignIndels.java +++ /dev/null @@ -1,142 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.tools.walkers.indels; - -import htsjdk.samtools.Cigar; -import htsjdk.samtools.SAMRecord; -import org.broadinstitute.gatk.utils.commandline.Output; -import org.broadinstitute.gatk.engine.CommandLineGATK; -import org.broadinstitute.gatk.utils.contexts.ReferenceContext; -import org.broadinstitute.gatk.utils.sam.GATKSAMFileWriter; -import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; -import org.broadinstitute.gatk.engine.walkers.ReadWalker; -import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; -import org.broadinstitute.gatk.utils.help.HelpConstants; -import org.broadinstitute.gatk.utils.sam.AlignmentUtils; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; - - -/** - * Left-align indels within reads in a bam file - * - *

This tool left-aligns any indels within read cigars in order to standardize representation when there are multiple valid - * representations possible (i.e. where the same indel can be placed at multiple positions and still represent the same haplotype). - * The standard convention is to place an indel at the left-most position possible, but this is not always followed, so - * this tool can be used to correct the representation of indels.

- * - *

Note

- *

This is only really needed when calling variants with legacy locus-based tools such as UnifiedGenotyper. With more - * sophisticated tools (like HaplotypeCaller) that involve reconstructing haplotypes (eg through reassembly), the problem - * of multiple valid representations is handled internally and does not need to be corrected explicitly.

- * - *

Input

- *

- * A bam file with mapped reads. - *

- * - *

Output

- *

- * A bam file in which indels have been left-aligned where appropriate. - *

- * - *

Usage example

- *
- * java -jar GenomeAnalysisTK.jar \
- *   -R reference.fasta \
- *   -T LeftAlignIndels \
- *   -I reads.bam \
- *   -o output_with_leftaligned_indels.bam
- * 
- * - */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class} ) -public class LeftAlignIndels extends ReadWalker { - - @Output(required=false, doc="Output bam") - protected GATKSAMFileWriter writer = null; - - public void initialize() {} - - private void emit(final SAMRecord read) { - if ( writer != null ) - writer.addAlignment(read); - } - - public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) { - // we can not deal with screwy records - if ( read.getReadUnmappedFlag() || read.getCigar().numCigarElements() == 0 ) { - emit(read); - return 0; - } - - // move existing indels (for 1 indel reads only) to leftmost position within identical sequence - int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read); - if ( numBlocks == 2 ) { - Cigar newCigar = AlignmentUtils.leftAlignIndel(IndelRealigner.unclipCigar(read.getCigar()), ref.getBases(), read.getReadBases(), 0, 0, true); - newCigar = IndelRealigner.reclipCigar(newCigar, read); - read.setCigar(newCigar); - } - - emit(read); - return 1; - } - - public Integer reduceInit() { - return 0; - } - - public Integer reduce(Integer value, Integer sum) { - return sum + value; - } - - public void onTraversalDone(Integer result) {} -} \ No newline at end of file diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/ReadBin.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/ReadBin.java deleted file mode 100644 index 532041688..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/ReadBin.java +++ /dev/null @@ -1,128 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.tools.walkers.indels; - -import com.google.java.contract.Requires; -import org.broadinstitute.gatk.utils.GenomeLoc; -import org.broadinstitute.gatk.utils.GenomeLocParser; -import org.broadinstitute.gatk.utils.HasGenomeLocation; -import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; - -import java.util.ArrayList; -import java.util.List; - -/** -* User: carneiro -* Date: 2/16/13 -* Time: 11:15 PM -*/ -class ReadBin implements HasGenomeLocation { - - private final ArrayList reads = new ArrayList(); - private byte[] reference = null; - private GenomeLoc loc = null; - private final GenomeLocParser parser; - private final int referencePadding; - - public ReadBin(final GenomeLocParser parser, final int referencePadding) { - this.parser = parser; - this.referencePadding = referencePadding; - } - - // Return false if we can't process this read bin because the reads are not correctly overlapping. - // This can happen if e.g. there's a large known indel with no overlapping reads. - public void add(GATKSAMRecord read) { - - final int readStart = read.getSoftStart(); - final int readStop = read.getSoftEnd(); - if ( loc == null ) - loc = parser.createGenomeLoc(read.getReferenceName(), readStart, Math.max(readStop, readStart)); // in case it's all an insertion - else if ( readStop > loc.getStop() ) - loc = parser.createGenomeLoc(loc.getContig(), loc.getStart(), readStop); - - reads.add(read); - } - - public List getReads() { - return reads; - } - - @Requires("referenceReader.isUppercasingBases()") - public byte[] getReference(CachingIndexedFastaSequenceFile referenceReader) { - // set up the reference if we haven't done so yet - if ( reference == null ) { - // first, pad the reference to handle deletions in narrow windows (e.g. those with only 1 read) - int padLeft = Math.max(loc.getStart()- referencePadding, 1); - int padRight = Math.min(loc.getStop()+ referencePadding, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength()); - loc = parser.createGenomeLoc(loc.getContig(), loc.getContigIndex(), padLeft, padRight); - reference = referenceReader.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases(); - } - - return reference; - } - - public GenomeLoc getLocation() { - return loc; - } - - public int size() { - return reads.size(); - } - - public void clear() { - reads.clear(); - reference = null; - loc = null; - } - -} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/ConstrainedMateFixingManagerUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/ConstrainedMateFixingManagerUnitTest.java deleted file mode 100644 index 420ccceef..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/ConstrainedMateFixingManagerUnitTest.java +++ /dev/null @@ -1,198 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.tools.walkers.indels; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMFileWriter; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.util.ProgressLoggerInterface; -import org.broadinstitute.gatk.utils.BaseTest; -import org.broadinstitute.gatk.utils.GenomeLocParser; -import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.List; - - -public class ConstrainedMateFixingManagerUnitTest extends BaseTest { - - private static SAMFileHeader header; - private static GenomeLocParser genomeLocParser; - - @BeforeClass - public void beforeClass() { - header = ArtificialSAMUtils.createArtificialSamHeader(3, 1, 10000); - genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); - } - - @Test - public void testSecondaryAlignmentsDoNotInterfere() { - final List properReads = ArtificialSAMUtils.createPair(header, "foo", 1, 10, 30, true, false); - final GATKSAMRecord read1 = properReads.get(0); - read1.setAlignmentStart(8); // move the read - read1.setFlags(99); // first in proper pair, mate negative strand - - final GATKSAMRecord read2Primary = properReads.get(1); - read2Primary.setFlags(147); // second in pair, mate unmapped, not primary alignment - - Assert.assertEquals(read1.getInferredInsertSize(), 21); - - final GATKSAMRecord read2NonPrimary = new GATKSAMRecord(read2Primary); - read2NonPrimary.setFlags(393); // second in proper pair, on reverse strand - - final ConstrainedMateFixingManager manager = new ConstrainedMateFixingManager(null, genomeLocParser, 1000, 1000, 1000); - manager.addRead(read1, true, false); - manager.addRead(read2NonPrimary, false, false); - manager.addRead(read2Primary, false, false); - - Assert.assertEquals(manager.getNReadsInQueue(), 3); - - for ( final SAMRecord read : manager.getReadsInQueueForTesting() ) { - if ( read.getFirstOfPairFlag() ) { - Assert.assertEquals(read.getFlags(), 99); - Assert.assertEquals(read.getInferredInsertSize(), 23); - } else if ( read.getNotPrimaryAlignmentFlag() ) { - Assert.assertEquals(read.getFlags(), 393); - Assert.assertEquals(read.getInferredInsertSize(), -21); - } else { - Assert.assertEquals(read.getFlags(), 147); - Assert.assertEquals(read.getInferredInsertSize(), -23); - } - } - } - - @Test - public void testSecondaryAlignmentsDoNotCauseAccidentalRemovalOfMate() { - final List properReads = ArtificialSAMUtils.createPair(header, "foo", 1, 530, 1594, true, false); - final GATKSAMRecord read1 = properReads.get(0); - read1.setFlags(99); // first in proper pair, mate negative strand - - final GATKSAMRecord read2Primary = properReads.get(1); - read2Primary.setFlags(147); // second in pair, mate unmapped, not primary alignment - read2Primary.setAlignmentStart(1596); // move the read - - final GATKSAMRecord read2NonPrimary = new GATKSAMRecord(read2Primary); - read2NonPrimary.setReadName("foo"); - read2NonPrimary.setFlags(393); // second in proper pair, on reverse strand - read2NonPrimary.setAlignmentStart(451); - read2NonPrimary.setMateAlignmentStart(451); - - final ConstrainedMateFixingManager manager = new ConstrainedMateFixingManager(null, genomeLocParser, 10000, 200, 10000); - manager.addRead(read2NonPrimary, false, false); - manager.addRead(read1, false, false); - - for ( int i = 0; i < ConstrainedMateFixingManager.EMIT_FREQUENCY; i++ ) - manager.addRead(ArtificialSAMUtils.createArtificialRead(header, "foo" + i, 0, 1500, 10), false, false); - - Assert.assertTrue(manager.forMateMatching.containsKey("foo")); - } - - @Test - public void testSupplementaryAlignmentsDoNotCauseBadMateFixing() { - final List properReads = ArtificialSAMUtils.createPair(header, "foo", 1, 1000, 2000, true, false); - final GATKSAMRecord read1 = properReads.get(0); - read1.setFlags(99); // first in pair, negative strand - - final GATKSAMRecord read2 = properReads.get(1); - read2.setFlags(161); // second in pair, mate negative strand - - final GATKSAMRecord read2Supp = new GATKSAMRecord(read2); - read2Supp.setReadName("foo"); - read2Supp.setFlags(2209); // second in pair, mate negative strand, supplementary - read2Supp.setAlignmentStart(100); - read2Supp.setMateAlignmentStart(1000); - - final DummyWriter writer = new DummyWriter(); - final ConstrainedMateFixingManager manager = new ConstrainedMateFixingManager(writer, genomeLocParser, 10000, 200, 10000); - manager.addRead(read2Supp, false, false); - manager.addRead(read1, false, false); - manager.addRead(read2, false, false); - manager.close(); // "write" the reads to our dummy writer - - // check to make sure that none of the mate locations were changed, which is the problem brought to us by a user - for ( final SAMRecord read : writer.reads ) { - final int start = read.getAlignmentStart(); - switch (start) { - case 100: - Assert.assertEquals(read.getMateAlignmentStart(), 1000); - break; - case 1000: - Assert.assertEquals(read.getMateAlignmentStart(), 2000); - break; - case 2000: - Assert.assertEquals(read.getMateAlignmentStart(), 1000); - break; - default: - Assert.assertTrue(false, "We saw a read located at the wrong position"); - } - } - } - - private class DummyWriter implements SAMFileWriter { - - public List reads; - - public DummyWriter() { reads = new ArrayList<>(10); } - - public void addAlignment(final SAMRecord alignment) { reads.add(alignment);} - - public SAMFileHeader getFileHeader() { return null; } - - public void setProgressLogger(final ProgressLoggerInterface progress) {} - - public void close() {} - } -} \ No newline at end of file diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerIntegrationTest.java deleted file mode 100644 index 98afc3bfc..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerIntegrationTest.java +++ /dev/null @@ -1,187 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.tools.walkers.indels; - -import org.broadinstitute.gatk.engine.walkers.WalkerTest; -import org.broadinstitute.gatk.utils.exceptions.UserException; -import org.testng.annotations.Test; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; - -public class IndelRealignerIntegrationTest extends WalkerTest { - - private static final String mainTestBam = validationDataLocation + "indelRealignerTest.pilot1.ceu.fixed.fixmates.bam"; - private static final String mainTestIntervals = validationDataLocation + "indelRealignerTest.pilot1.ceu.intervals"; - private static final String knownIndels = validationDataLocation + "indelRealignerTest.pilot1.ceu.vcf"; - private static final String baseCommandPrefix = "-T IndelRealigner -noPG -R " + b36KGReference + " -I " + mainTestBam + " -targetIntervals " + mainTestIntervals + " -compress 0 -L 20:49,500-55,500 "; - private static final String baseCommand = baseCommandPrefix + "-o %s "; - private static final String base_md5 = "ab7407d2299d9ba73449cea376eeb9c4"; - private static final String base_md5_with_SW_or_VCF = "fa57bd96b83038ac6a70e58e11bf5364"; - - @Test - public void testDefaults() { - - WalkerTestSpec spec1 = new WalkerTestSpec( - baseCommand, - 1, - Arrays.asList(base_md5)); - executeTest("test realigner defaults", spec1); - - WalkerTestSpec spec2 = new WalkerTestSpec( - baseCommand + "-known " + knownIndels, - 1, - Arrays.asList(base_md5_with_SW_or_VCF)); - executeTest("test realigner defaults with VCF", spec2); - } - - @Test - public void testKnownsOnly() { - WalkerTestSpec spec1 = new WalkerTestSpec( - baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -known " + knownIndels, - 1, - Arrays.asList("c42b6f3e1270e43cce2b6f75b6a38f30")); - executeTest("realigner known indels only from VCF", spec1); - } - - @Test - public void testUseSW() { - WalkerTestSpec spec1 = new WalkerTestSpec( - baseCommand + "--consensusDeterminationModel USE_SW -known " + knownIndels, - 1, - Arrays.asList(base_md5_with_SW_or_VCF)); - executeTest("realigner use SW from VCF", spec1); - } - - @Test - public void testLods() { - HashMap e = new HashMap(); - e.put("-LOD 60", base_md5); - e.put( "-LOD 1 --consensusDeterminationModel USE_SW", "0c4597e48b4e194de32ebe494704ea6b" ); - - for ( Map.Entry entry : e.entrySet() ) { - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - baseCommand + entry.getKey(), - 1, - Arrays.asList(entry.getValue())); - executeTest(String.format("realigner [%s]", entry.getKey()), spec); - } - } - - @Test - public void testLongRun() { - WalkerTestSpec spec = new WalkerTestSpec( - "-T IndelRealigner -noPG -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10,000,000-11,000,000 -targetIntervals " + validationDataLocation + "indelRealignerTest.NA12878.chrom1.intervals -compress 0 -o %s", - 1, - Arrays.asList("19e6859b9ef09c7e0a79a19626908b17")); - executeTest("realigner long run", spec); - } - - @Test - public void testNoTags() { - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand + "--noOriginalAlignmentTags --consensusDeterminationModel USE_SW", - 1, - Arrays.asList("8f5684359d7b26acaacfa657ef395a0c")); - executeTest("realigner no output tags", spec); - } - - @Test - public void testStats() { - WalkerTestSpec spec1 = new WalkerTestSpec( - baseCommandPrefix + "-stats %s -o /dev/null", - 1, - Arrays.asList("7ed8d4eed635613fd031598a5c9ef5a3")); - executeTest("realigner stats", spec1); - - WalkerTestSpec spec2 = new WalkerTestSpec( - baseCommandPrefix + "-LOD 60 -stats %s -o /dev/null", - 1, - Arrays.asList("e8b02bfc5debec55fe936a38c59463cc")); - executeTest("realigner stats", spec2); - } - - @Test - public void testMaxReadsInMemory() { - HashMap e = new HashMap(); - e.put("--maxReadsInMemory 10000", "236c64f2da0047534b44444d9d699378"); - e.put( "--maxReadsInMemory 40000", base_md5 ); - - for ( Map.Entry entry : e.entrySet() ) { - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - baseCommand + entry.getKey(), - 1, - Arrays.asList(entry.getValue())); - executeTest(String.format("realigner [%s]", entry.getKey()), spec); - } - } - - @Test - public void testNWayOut() { - WalkerTestSpec spec1 = new WalkerTestSpec( - baseCommandPrefix + " -nWayOut .clean.bam ", - 1, - Arrays.asList("d41d8cd98f00b204e9800998ecf8427e")); - executeTest("test realigner nWayOut", spec1); - } - - @Test - public void testBadCigarStringDoesNotFail() { - // Just making sure the test runs without an error, don't care about the MD5 value - WalkerTestSpec spec = new WalkerTestSpec( - "-T IndelRealigner -R " + b37KGReference + " -I " + privateTestDir + "Realigner.error.bam -L 19:5787200-5787300 -targetIntervals 19:5787205-5787300 -o %s", - 1, - Arrays.asList("")); - executeTest("test bad cigar string does not fail", spec); - } -} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerLargeScaleTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerLargeScaleTest.java deleted file mode 100644 index 217e06631..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerLargeScaleTest.java +++ /dev/null @@ -1,108 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.tools.walkers.indels; - -import org.broadinstitute.gatk.engine.walkers.WalkerTest; -import org.testng.annotations.Test; - -import java.util.ArrayList; - -public class IndelRealignerLargeScaleTest extends WalkerTest { - @Test( timeOut = 18000000 ) - public void testHighCoverage() { - WalkerTestSpec spec = new WalkerTestSpec( - - "-R " + b36KGReference + - " -T IndelRealigner" + - " -I " + validationDataLocation + "indelRealignerTest.pilot1.veryHighCoverage.bam" + - " -L 20:49,500-55,500" + - " -o /dev/null" + - " -targetIntervals " + validationDataLocation + "indelRealignerTest.pilot1.ceu.intervals", - 0, - new ArrayList(0)); - executeTest("testIndelRealignerHighCoverage", spec); - } - - @Test( timeOut = 18000000 ) - public void testRealigner() { - WalkerTestSpec spec1 = new WalkerTestSpec( - - "-R " + hg18Reference + - " -T IndelRealigner" + - " -LOD 5" + - " -maxConsensuses 100" + - " -greedy 100" + - " -known " + GATKDataLocation + "dbsnp_132.hg18.vcf" + - " -o /dev/null" + - " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + - " -L chr1:1-5,650,000" + - " -targetIntervals " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.realigner.intervals", - 0, - new ArrayList(0)); - executeTest("testIndelRealignerWholeGenome", spec1); - - WalkerTestSpec spec2 = new WalkerTestSpec( - "-R " + hg18Reference + - " -T IndelRealigner" + - " -LOD 5" + - " -maxConsensuses 100" + - " -greedy 100" + - " -known " + GATKDataLocation + "dbsnp_132.hg18.vcf" + - " -o /dev/null" + - " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" + - " -L chr1:1-150,000,000" + - " -targetIntervals " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.realigner.intervals", - 0, - new ArrayList(0)); - executeTest("testIndelRealignerWholeExome", spec2); - } -} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerUnitTest.java deleted file mode 100644 index bb3d66c1b..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerUnitTest.java +++ /dev/null @@ -1,87 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.tools.walkers.indels; - -import htsjdk.samtools.reference.IndexedFastaSequenceFile; -import htsjdk.samtools.SAMFileHeader; -import org.broadinstitute.gatk.utils.BaseTest; -import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Test; - -import java.io.File; -import java.io.FileNotFoundException; - -public class IndelRealignerUnitTest extends BaseTest { - - private SAMFileHeader header; - - @BeforeClass - public void setup() throws FileNotFoundException { - final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); - header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary()); - } - - @Test - public void realignAtContigBorderTest() { - final int contigEnd = header.getSequence(0).getSequenceLength(); - final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "goodRead", 0, contigEnd - 1, 2); - read.setCigarString("2M"); - Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), false); - read.setCigarString("1M1D1M"); - Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), true); - } - -} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/ReadBinUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/ReadBinUnitTest.java deleted file mode 100644 index 492f4e63f..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/ReadBinUnitTest.java +++ /dev/null @@ -1,117 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.tools.walkers.indels; - -import org.broadinstitute.gatk.utils.GenomeLoc; -import org.broadinstitute.gatk.utils.GenomeLocParser; -import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; -import org.broadinstitute.gatk.utils.sam.ReadUtils; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -/** - * User: carneiro - * Date: 2/16/13 - * Time: 11:48 PM - */ -public class ReadBinUnitTest { - private GenomeLocParser parser; - private ReadBin readBin; - - private final int readLength = 100; // all reads will have the same size - private final int referencePadding = 10; // standard reference padding - - @BeforeClass - public void init() { - parser = new GenomeLocParser(ArtificialSAMUtils.createArtificialSamHeader().getSequenceDictionary()); - readBin = new ReadBin(parser, referencePadding); - } - - @DataProvider(name = "reads") - public Object[][] reads() { - - return new Object[][]{ - {"20S80M", 80}, - {"80M20S", 1}, - {"20S60M20S", 50}, - {"50I", 60}, - {"100M", 500} - }; - } - - /** - * Tests the GenomeLoc variable in the ReadBin after adding arbitrary reads - * - * @param cigarString the read's cigar string - * @param alignmentStart the read's alignment start - */ - @Test(enabled = true, dataProvider = "reads") - public void testAddingReads(String cigarString, int alignmentStart) { - final GATKSAMRecord read = createReadAndAddToBin(cigarString, alignmentStart); - final GenomeLoc readLoc = parser.createGenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getSoftStart(), Math.max(read.getSoftStart(), read.getSoftEnd())); - Assert.assertEquals(readBin.getLocation(), readLoc); - readBin.clear(); - } - - public GATKSAMRecord createReadAndAddToBin(String cigarString, int alignmentStart) { - final GATKSAMRecord read = ReadUtils.createRandomRead(readLength); - read.setCigarString(cigarString); - read.setAlignmentStart(alignmentStart); - readBin.add(read); - return read; - } -} - - diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreatorIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreatorIntegrationTest.java deleted file mode 100644 index be41b6a96..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreatorIntegrationTest.java +++ /dev/null @@ -1,159 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.tools.walkers.indels; - -import htsjdk.samtools.reference.ReferenceSequenceFile; -import htsjdk.samtools.util.Interval; -import htsjdk.samtools.util.IntervalList; -import org.broadinstitute.gatk.engine.walkers.WalkerTest; -import org.broadinstitute.gatk.utils.BaseTest; -import org.broadinstitute.gatk.utils.GenomeLoc; -import org.broadinstitute.gatk.utils.GenomeLocParser; -import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.gatk.utils.interval.IntervalUtils; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -public class RealignerTargetCreatorIntegrationTest extends WalkerTest { - - @DataProvider(name = "intervals1") - public Object[][] intervals1() { - String arguments = "-T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000"; - return new Object[][]{ - {"test standard nt=1", arguments}, - {"test standard nt=4", "-nt 4 " + arguments} - }; - } - - @DataProvider(name = "intervals2") - public Object[][] intervals2() { - String arguments = "-T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,200,000"; - return new Object[][]{ - {"test with dbsnp nt=1", arguments}, - {"test with dbsnp nt=4", "-nt 4 " + arguments} - }; - } - - @Test(dataProvider = "intervals1") - public void testIntervals1(String testName, String arguments) { - String md5 = "3f0b63a393104d0c4158c7d1538153b8"; - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(arguments + " -o %s", 1, Arrays.asList(md5)); - executeTest(testName, spec); - } - - @Test(dataProvider = "intervals2") - public void testIntervals2(String testName, String arguments) { - String md5 = "d073237694175c75d37bd4f40b8c64db"; - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(arguments + " -o %s", 1, Arrays.asList(md5)); - executeTest(testName, spec); - } - - @Test - public void testKnownsOnly() { - WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( - "-T RealignerTargetCreator -R " + b36KGReference + " --known " + privateTestDir + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -L " + privateTestDir + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -o %s", - 1, - Arrays.asList("5206cee6c01b299417bf2feeb8b3dc96")); - executeTest("test rods only", spec3); - } - - @Test() - public void testBadCigarStringDoesNotFail() { - // Just making sure the test runs without an error, don't care about the MD5 value - WalkerTestSpec spec = new WalkerTestSpec( - "-T RealignerTargetCreator -R " + b37KGReference + " -I " + privateTestDir + "Realigner.error.bam -L 19:5787200-5787300 -o %s", - 1, - Arrays.asList("")); - executeTest("test bad cigar string string does not fail", spec); - } - - @Test(dataProvider = "intervals1") - public void testTargetListAgainstIntervalList(String testName, String arguments) throws IOException { - final List md5 = Collections.emptyList(); - final File targetListFile = createTempFile("RTCTest", ".targets"); - final File intervalListFile = createTempFile("RTCTest", ".interval_list"); - - WalkerTest.WalkerTestSpec targetListSpec = new WalkerTest.WalkerTestSpec(arguments, 1, md5); - WalkerTest.WalkerTestSpec intervalListSpec = new WalkerTest.WalkerTestSpec(arguments, 1, md5); - - targetListSpec.setOutputFileLocation(targetListFile); - intervalListSpec.setOutputFileLocation(intervalListFile); - - executeTest(testName + " (compare target-list and interval-list output)", targetListSpec); - executeTest(testName + " (compare target-list and interval-list output)", intervalListSpec); - - final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference)); - final GenomeLocParser hg19GenomeLocParser = new GenomeLocParser(seq); - final List targetList = IntervalUtils.intervalFileToList(hg19GenomeLocParser, - targetListFile.getAbsolutePath()); - final List targetListResult = new ArrayList<>(); - for ( GenomeLoc target : targetList ) { - targetListResult.add(new Interval(target.getContig(), target.getStart(), target.getStop())); - } - - final List intervalListResult = IntervalList.fromFile(intervalListFile).getIntervals(); - - Assert.assertFalse(targetListResult.isEmpty()); - Assert.assertFalse(intervalListResult.isEmpty()); - Assert.assertEquals(targetListResult, intervalListResult); - } -} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreatorLargeScaleTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreatorLargeScaleTest.java deleted file mode 100644 index cfe634acf..000000000 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreatorLargeScaleTest.java +++ /dev/null @@ -1,85 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.tools.walkers.indels; - -import org.broadinstitute.gatk.engine.walkers.WalkerTest; -import org.testng.annotations.Test; - -import java.util.ArrayList; - -public class RealignerTargetCreatorLargeScaleTest extends WalkerTest { - @Test( timeOut = 18000000 ) - public void testRealignerTargetCreator() { - - WalkerTestSpec spec1 = new WalkerTestSpec( - "-R " + hg18Reference + - " -T RealignerTargetCreator" + - " --known " + GATKDataLocation + "dbsnp_132.hg18.vcf" + - " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + - " -L chr1:1-50,000,000" + - " -o /dev/null", - 0, - new ArrayList(0)); - executeTest("testRealignerTargetCreatorWholeGenome", spec1); - - WalkerTestSpec spec2 = new WalkerTestSpec( - "-R " + hg18Reference + - " -T RealignerTargetCreator" + - " --known " + GATKDataLocation + "dbsnp_132.hg18.vcf" + - " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" + - " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" + - " -o /dev/null", - 0, - new ArrayList(0)); - executeTest("testRealignerTargetCreatorWholeExome", spec2); - } -} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/ConstrainedMateFixingManager.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/ConstrainedMateFixingManager.java similarity index 65% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/ConstrainedMateFixingManager.java rename to public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/ConstrainedMateFixingManager.java index eb45b5b76..766371358 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/ConstrainedMateFixingManager.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/ConstrainedMateFixingManager.java @@ -1,52 +1,26 @@ /* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. * Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. * -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: * -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. * -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.gatk.tools.walkers.indels; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java similarity index 88% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java rename to public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java index 99c3b5c46..afc710c9d 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java @@ -1,52 +1,26 @@ /* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. * Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. * -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: * -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. * -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.gatk.tools.walkers.indels; @@ -101,10 +75,11 @@ import java.util.*; * it is impossible to place reads on the reference genome such at mismatches are minimized across all reads. Consequently, even when some reads are * correctly mapped with indels, reads covering the indel near just the start or end of the read are often incorrectly mapped with respect the true indel, * also requiring realignment. Local realignment serves to transform regions with misalignments due to indels into clean reads containing a consensus - * indel suitable for standard variant discovery approaches. Unlike most mappers, this walker uses the full alignment context to determine whether an - * appropriate alternate reference (i.e. indel) exists. Following local realignment, the GATK tool Unified Genotyper can be used to sensitively and - * specifically identify indels. + * indel suitable for standard variant discovery approaches. *

+ *

Note that indel realignment is no longer necessary for variant discovery if you plan to use a variant caller that performs a haplotype assembly + * step, such as HaplotypeCaller or MuTect2. However it is still required when using legacy callers such as UnifiedGenotyper or the original MuTect.

+ * *

There are 2 steps to the realignment process:

*
    *
  1. Determining (small) suspicious intervals which are likely in need of realignment (see the RealignerTargetCreator tool)
  2. diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/LeftAlignIndels.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/LeftAlignIndels.java new file mode 100644 index 000000000..58202f059 --- /dev/null +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/LeftAlignIndels.java @@ -0,0 +1,116 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.tools.walkers.indels; + +import htsjdk.samtools.Cigar; +import htsjdk.samtools.SAMRecord; +import org.broadinstitute.gatk.utils.commandline.Output; +import org.broadinstitute.gatk.engine.CommandLineGATK; +import org.broadinstitute.gatk.utils.contexts.ReferenceContext; +import org.broadinstitute.gatk.utils.sam.GATKSAMFileWriter; +import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; +import org.broadinstitute.gatk.engine.walkers.ReadWalker; +import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; +import org.broadinstitute.gatk.utils.help.HelpConstants; +import org.broadinstitute.gatk.utils.sam.AlignmentUtils; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; + + +/** + * Left-align indels within reads in a bam file + * + *

    This tool left-aligns any indels within read cigars in order to standardize representation when there are multiple valid + * representations possible (i.e. where the same indel can be placed at multiple positions and still represent the same haplotype). + * The standard convention is to place an indel at the left-most position possible, but this is not always followed, so + * this tool can be used to correct the representation of indels.

    + * + *

    Note

    + *

    This is only really needed when calling variants with legacy locus-based tools such as UnifiedGenotyper. With more + * sophisticated tools (like HaplotypeCaller) that involve reconstructing haplotypes (eg through haplotype assembly), the problem + * of multiple valid representations is handled internally and does not need to be corrected explicitly.

    + * + *

    Input

    + *

    + * A bam file with mapped reads. + *

    + * + *

    Output

    + *

    + * A bam file in which indels have been left-aligned where appropriate. + *

    + * + *

    Usage example

    + *
    + * java -jar GenomeAnalysisTK.jar \
    + *   -R reference.fasta \
    + *   -T LeftAlignIndels \
    + *   -I reads.bam \
    + *   -o output_with_leftaligned_indels.bam
    + * 
    + * + */ +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class} ) +public class LeftAlignIndels extends ReadWalker { + + @Output(required=false, doc="Output bam") + protected GATKSAMFileWriter writer = null; + + public void initialize() {} + + private void emit(final SAMRecord read) { + if ( writer != null ) + writer.addAlignment(read); + } + + public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) { + // we can not deal with screwy records + if ( read.getReadUnmappedFlag() || read.getCigar().numCigarElements() == 0 ) { + emit(read); + return 0; + } + + // move existing indels (for 1 indel reads only) to leftmost position within identical sequence + int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read); + if ( numBlocks == 2 ) { + Cigar newCigar = AlignmentUtils.leftAlignIndel(IndelRealigner.unclipCigar(read.getCigar()), ref.getBases(), read.getReadBases(), 0, 0, true); + newCigar = IndelRealigner.reclipCigar(newCigar, read); + read.setCigar(newCigar); + } + + emit(read); + return 1; + } + + public Integer reduceInit() { + return 0; + } + + public Integer reduce(Integer value, Integer sum) { + return sum + value; + } + + public void onTraversalDone(Integer result) {} +} \ No newline at end of file diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/ReadBin.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/ReadBin.java new file mode 100644 index 000000000..a7eae01e1 --- /dev/null +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/ReadBin.java @@ -0,0 +1,102 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.tools.walkers.indels; + +import com.google.java.contract.Requires; +import org.broadinstitute.gatk.utils.GenomeLoc; +import org.broadinstitute.gatk.utils.GenomeLocParser; +import org.broadinstitute.gatk.utils.HasGenomeLocation; +import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; + +import java.util.ArrayList; +import java.util.List; + +/** +* User: carneiro +* Date: 2/16/13 +* Time: 11:15 PM +*/ +class ReadBin implements HasGenomeLocation { + + private final ArrayList reads = new ArrayList(); + private byte[] reference = null; + private GenomeLoc loc = null; + private final GenomeLocParser parser; + private final int referencePadding; + + public ReadBin(final GenomeLocParser parser, final int referencePadding) { + this.parser = parser; + this.referencePadding = referencePadding; + } + + // Return false if we can't process this read bin because the reads are not correctly overlapping. + // This can happen if e.g. there's a large known indel with no overlapping reads. + public void add(GATKSAMRecord read) { + + final int readStart = read.getSoftStart(); + final int readStop = read.getSoftEnd(); + if ( loc == null ) + loc = parser.createGenomeLoc(read.getReferenceName(), readStart, Math.max(readStop, readStart)); // in case it's all an insertion + else if ( readStop > loc.getStop() ) + loc = parser.createGenomeLoc(loc.getContig(), loc.getStart(), readStop); + + reads.add(read); + } + + public List getReads() { + return reads; + } + + @Requires("referenceReader.isUppercasingBases()") + public byte[] getReference(CachingIndexedFastaSequenceFile referenceReader) { + // set up the reference if we haven't done so yet + if ( reference == null ) { + // first, pad the reference to handle deletions in narrow windows (e.g. those with only 1 read) + int padLeft = Math.max(loc.getStart()- referencePadding, 1); + int padRight = Math.min(loc.getStop()+ referencePadding, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength()); + loc = parser.createGenomeLoc(loc.getContig(), loc.getContigIndex(), padLeft, padRight); + reference = referenceReader.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases(); + } + + return reference; + } + + public GenomeLoc getLocation() { + return loc; + } + + public int size() { + return reads.size(); + } + + public void clear() { + reads.clear(); + reference = null; + loc = null; + } + +} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreator.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreator.java similarity index 65% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreator.java rename to public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreator.java index 9475526bc..6d4e86252 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreator.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreator.java @@ -1,52 +1,26 @@ /* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. * Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. * -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: * -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. * -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.gatk.tools.walkers.indels; @@ -95,9 +69,10 @@ import java.util.TreeSet; * it is impossible to place reads on the reference genome such that mismatches are minimized across all reads. Consequently, even when some reads are * correctly mapped with indels, reads covering the indel near just the start or end of the read are often incorrectly mapped with respect the true indel, * also requiring realignment. Local realignment serves to transform regions with misalignments due to indels into clean reads containing a consensus - * indel suitable for standard variant discovery approaches. Unlike most mappers, this tool uses the full alignment context to determine whether an - * appropriate alternate reference (i.e. indel) exists. + * indel suitable for standard variant discovery approaches. *

    + *

    Note that indel realignment is no longer necessary for variant discovery if you plan to use a variant caller that performs a haplotype assembly + * step, such as HaplotypeCaller or MuTect2. However it is still required when using legacy callers such as UnifiedGenotyper or the original MuTect.

    *

    There are 2 steps to the realignment process:

    *
      *
    1. Determining (small) suspicious intervals which are likely in need of realignment (RealignerTargetCreator)
    2. diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/ConstrainedMateFixingManagerUnitTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/ConstrainedMateFixingManagerUnitTest.java new file mode 100644 index 000000000..7be6c1ca8 --- /dev/null +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/ConstrainedMateFixingManagerUnitTest.java @@ -0,0 +1,172 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.tools.walkers.indels; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.util.ProgressLoggerInterface; +import org.broadinstitute.gatk.utils.BaseTest; +import org.broadinstitute.gatk.utils.GenomeLocParser; +import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + + +public class ConstrainedMateFixingManagerUnitTest extends BaseTest { + + private static SAMFileHeader header; + private static GenomeLocParser genomeLocParser; + + @BeforeClass + public void beforeClass() { + header = ArtificialSAMUtils.createArtificialSamHeader(3, 1, 10000); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); + } + + @Test + public void testSecondaryAlignmentsDoNotInterfere() { + final List properReads = ArtificialSAMUtils.createPair(header, "foo", 1, 10, 30, true, false); + final GATKSAMRecord read1 = properReads.get(0); + read1.setAlignmentStart(8); // move the read + read1.setFlags(99); // first in proper pair, mate negative strand + + final GATKSAMRecord read2Primary = properReads.get(1); + read2Primary.setFlags(147); // second in pair, mate unmapped, not primary alignment + + Assert.assertEquals(read1.getInferredInsertSize(), 21); + + final GATKSAMRecord read2NonPrimary = new GATKSAMRecord(read2Primary); + read2NonPrimary.setFlags(393); // second in proper pair, on reverse strand + + final ConstrainedMateFixingManager manager = new ConstrainedMateFixingManager(null, genomeLocParser, 1000, 1000, 1000); + manager.addRead(read1, true, false); + manager.addRead(read2NonPrimary, false, false); + manager.addRead(read2Primary, false, false); + + Assert.assertEquals(manager.getNReadsInQueue(), 3); + + for ( final SAMRecord read : manager.getReadsInQueueForTesting() ) { + if ( read.getFirstOfPairFlag() ) { + Assert.assertEquals(read.getFlags(), 99); + Assert.assertEquals(read.getInferredInsertSize(), 23); + } else if ( read.getNotPrimaryAlignmentFlag() ) { + Assert.assertEquals(read.getFlags(), 393); + Assert.assertEquals(read.getInferredInsertSize(), -21); + } else { + Assert.assertEquals(read.getFlags(), 147); + Assert.assertEquals(read.getInferredInsertSize(), -23); + } + } + } + + @Test + public void testSecondaryAlignmentsDoNotCauseAccidentalRemovalOfMate() { + final List properReads = ArtificialSAMUtils.createPair(header, "foo", 1, 530, 1594, true, false); + final GATKSAMRecord read1 = properReads.get(0); + read1.setFlags(99); // first in proper pair, mate negative strand + + final GATKSAMRecord read2Primary = properReads.get(1); + read2Primary.setFlags(147); // second in pair, mate unmapped, not primary alignment + read2Primary.setAlignmentStart(1596); // move the read + + final GATKSAMRecord read2NonPrimary = new GATKSAMRecord(read2Primary); + read2NonPrimary.setReadName("foo"); + read2NonPrimary.setFlags(393); // second in proper pair, on reverse strand + read2NonPrimary.setAlignmentStart(451); + read2NonPrimary.setMateAlignmentStart(451); + + final ConstrainedMateFixingManager manager = new ConstrainedMateFixingManager(null, genomeLocParser, 10000, 200, 10000); + manager.addRead(read2NonPrimary, false, false); + manager.addRead(read1, false, false); + + for ( int i = 0; i < ConstrainedMateFixingManager.EMIT_FREQUENCY; i++ ) + manager.addRead(ArtificialSAMUtils.createArtificialRead(header, "foo" + i, 0, 1500, 10), false, false); + + Assert.assertTrue(manager.forMateMatching.containsKey("foo")); + } + + @Test + public void testSupplementaryAlignmentsDoNotCauseBadMateFixing() { + final List properReads = ArtificialSAMUtils.createPair(header, "foo", 1, 1000, 2000, true, false); + final GATKSAMRecord read1 = properReads.get(0); + read1.setFlags(99); // first in pair, negative strand + + final GATKSAMRecord read2 = properReads.get(1); + read2.setFlags(161); // second in pair, mate negative strand + + final GATKSAMRecord read2Supp = new GATKSAMRecord(read2); + read2Supp.setReadName("foo"); + read2Supp.setFlags(2209); // second in pair, mate negative strand, supplementary + read2Supp.setAlignmentStart(100); + read2Supp.setMateAlignmentStart(1000); + + final DummyWriter writer = new DummyWriter(); + final ConstrainedMateFixingManager manager = new ConstrainedMateFixingManager(writer, genomeLocParser, 10000, 200, 10000); + manager.addRead(read2Supp, false, false); + manager.addRead(read1, false, false); + manager.addRead(read2, false, false); + manager.close(); // "write" the reads to our dummy writer + + // check to make sure that none of the mate locations were changed, which is the problem brought to us by a user + for ( final SAMRecord read : writer.reads ) { + final int start = read.getAlignmentStart(); + switch (start) { + case 100: + Assert.assertEquals(read.getMateAlignmentStart(), 1000); + break; + case 1000: + Assert.assertEquals(read.getMateAlignmentStart(), 2000); + break; + case 2000: + Assert.assertEquals(read.getMateAlignmentStart(), 1000); + break; + default: + Assert.assertTrue(false, "We saw a read located at the wrong position"); + } + } + } + + private class DummyWriter implements SAMFileWriter { + + public List reads; + + public DummyWriter() { reads = new ArrayList<>(10); } + + public void addAlignment(final SAMRecord alignment) { reads.add(alignment);} + + public SAMFileHeader getFileHeader() { return null; } + + public void setProgressLogger(final ProgressLoggerInterface progress) {} + + public void close() {} + } +} \ No newline at end of file diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerIntegrationTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerIntegrationTest.java new file mode 100644 index 000000000..81b2c457c --- /dev/null +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerIntegrationTest.java @@ -0,0 +1,161 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.tools.walkers.indels; + +import org.broadinstitute.gatk.engine.walkers.WalkerTest; +import org.broadinstitute.gatk.utils.exceptions.UserException; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +public class IndelRealignerIntegrationTest extends WalkerTest { + + private static final String mainTestBam = validationDataLocation + "indelRealignerTest.pilot1.ceu.fixed.fixmates.bam"; + private static final String mainTestIntervals = validationDataLocation + "indelRealignerTest.pilot1.ceu.intervals"; + private static final String knownIndels = validationDataLocation + "indelRealignerTest.pilot1.ceu.vcf"; + private static final String baseCommandPrefix = "-T IndelRealigner -noPG -R " + b36KGReference + " -I " + mainTestBam + " -targetIntervals " + mainTestIntervals + " -compress 0 -L 20:49,500-55,500 "; + private static final String baseCommand = baseCommandPrefix + "-o %s "; + private static final String base_md5 = "ab7407d2299d9ba73449cea376eeb9c4"; + private static final String base_md5_with_SW_or_VCF = "fa57bd96b83038ac6a70e58e11bf5364"; + + @Test + public void testDefaults() { + + WalkerTestSpec spec1 = new WalkerTestSpec( + baseCommand, + 1, + Arrays.asList(base_md5)); + executeTest("test realigner defaults", spec1); + + WalkerTestSpec spec2 = new WalkerTestSpec( + baseCommand + "-known " + knownIndels, + 1, + Arrays.asList(base_md5_with_SW_or_VCF)); + executeTest("test realigner defaults with VCF", spec2); + } + + @Test + public void testKnownsOnly() { + WalkerTestSpec spec1 = new WalkerTestSpec( + baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -known " + knownIndels, + 1, + Arrays.asList("c42b6f3e1270e43cce2b6f75b6a38f30")); + executeTest("realigner known indels only from VCF", spec1); + } + + @Test + public void testUseSW() { + WalkerTestSpec spec1 = new WalkerTestSpec( + baseCommand + "--consensusDeterminationModel USE_SW -known " + knownIndels, + 1, + Arrays.asList(base_md5_with_SW_or_VCF)); + executeTest("realigner use SW from VCF", spec1); + } + + @Test + public void testLods() { + HashMap e = new HashMap(); + e.put("-LOD 60", base_md5); + e.put( "-LOD 1 --consensusDeterminationModel USE_SW", "0c4597e48b4e194de32ebe494704ea6b" ); + + for ( Map.Entry entry : e.entrySet() ) { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + baseCommand + entry.getKey(), + 1, + Arrays.asList(entry.getValue())); + executeTest(String.format("realigner [%s]", entry.getKey()), spec); + } + } + + @Test + public void testLongRun() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T IndelRealigner -noPG -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10,000,000-11,000,000 -targetIntervals " + validationDataLocation + "indelRealignerTest.NA12878.chrom1.intervals -compress 0 -o %s", + 1, + Arrays.asList("19e6859b9ef09c7e0a79a19626908b17")); + executeTest("realigner long run", spec); + } + + @Test + public void testNoTags() { + WalkerTestSpec spec = new WalkerTestSpec( + baseCommand + "--noOriginalAlignmentTags --consensusDeterminationModel USE_SW", + 1, + Arrays.asList("8f5684359d7b26acaacfa657ef395a0c")); + executeTest("realigner no output tags", spec); + } + + @Test + public void testStats() { + WalkerTestSpec spec1 = new WalkerTestSpec( + baseCommandPrefix + "-stats %s -o /dev/null", + 1, + Arrays.asList("7ed8d4eed635613fd031598a5c9ef5a3")); + executeTest("realigner stats", spec1); + + WalkerTestSpec spec2 = new WalkerTestSpec( + baseCommandPrefix + "-LOD 60 -stats %s -o /dev/null", + 1, + Arrays.asList("e8b02bfc5debec55fe936a38c59463cc")); + executeTest("realigner stats", spec2); + } + + @Test + public void testMaxReadsInMemory() { + HashMap e = new HashMap(); + e.put("--maxReadsInMemory 10000", "236c64f2da0047534b44444d9d699378"); + e.put( "--maxReadsInMemory 40000", base_md5 ); + + for ( Map.Entry entry : e.entrySet() ) { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + baseCommand + entry.getKey(), + 1, + Arrays.asList(entry.getValue())); + executeTest(String.format("realigner [%s]", entry.getKey()), spec); + } + } + + @Test + public void testNWayOut() { + WalkerTestSpec spec1 = new WalkerTestSpec( + baseCommandPrefix + " -nWayOut .clean.bam ", + 1, + Arrays.asList("d41d8cd98f00b204e9800998ecf8427e")); + executeTest("test realigner nWayOut", spec1); + } + + @Test + public void testBadCigarStringDoesNotFail() { + // Just making sure the test runs without an error, don't care about the MD5 value + WalkerTestSpec spec = new WalkerTestSpec( + "-T IndelRealigner -R " + b37KGReference + " -I " + privateTestDir + "Realigner.error.bam -L 19:5787200-5787300 -targetIntervals 19:5787205-5787300 -o %s", + 1, + Arrays.asList("")); + executeTest("test bad cigar string does not fail", spec); + } +} diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerLargeScaleTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerLargeScaleTest.java new file mode 100644 index 000000000..649644712 --- /dev/null +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerLargeScaleTest.java @@ -0,0 +1,82 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.tools.walkers.indels; + +import org.broadinstitute.gatk.engine.walkers.WalkerTest; +import org.testng.annotations.Test; + +import java.util.ArrayList; + +public class IndelRealignerLargeScaleTest extends WalkerTest { + @Test( timeOut = 18000000 ) + public void testHighCoverage() { + WalkerTestSpec spec = new WalkerTestSpec( + + "-R " + b36KGReference + + " -T IndelRealigner" + + " -I " + validationDataLocation + "indelRealignerTest.pilot1.veryHighCoverage.bam" + + " -L 20:49,500-55,500" + + " -o /dev/null" + + " -targetIntervals " + validationDataLocation + "indelRealignerTest.pilot1.ceu.intervals", + 0, + new ArrayList(0)); + executeTest("testIndelRealignerHighCoverage", spec); + } + + @Test( timeOut = 18000000 ) + public void testRealigner() { + WalkerTestSpec spec1 = new WalkerTestSpec( + + "-R " + hg18Reference + + " -T IndelRealigner" + + " -LOD 5" + + " -maxConsensuses 100" + + " -greedy 100" + + " -known " + GATKDataLocation + "dbsnp_132.hg18.vcf" + + " -o /dev/null" + + " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + + " -L chr1:1-5,650,000" + + " -targetIntervals " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.realigner.intervals", + 0, + new ArrayList(0)); + executeTest("testIndelRealignerWholeGenome", spec1); + + WalkerTestSpec spec2 = new WalkerTestSpec( + "-R " + hg18Reference + + " -T IndelRealigner" + + " -LOD 5" + + " -maxConsensuses 100" + + " -greedy 100" + + " -known " + GATKDataLocation + "dbsnp_132.hg18.vcf" + + " -o /dev/null" + + " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" + + " -L chr1:1-150,000,000" + + " -targetIntervals " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.realigner.intervals", + 0, + new ArrayList(0)); + executeTest("testIndelRealignerWholeExome", spec2); + } +} diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerUnitTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerUnitTest.java new file mode 100644 index 000000000..801db5269 --- /dev/null +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealignerUnitTest.java @@ -0,0 +1,61 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.tools.walkers.indels; + +import htsjdk.samtools.reference.IndexedFastaSequenceFile; +import htsjdk.samtools.SAMFileHeader; +import org.broadinstitute.gatk.utils.BaseTest; +import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileNotFoundException; + +public class IndelRealignerUnitTest extends BaseTest { + + private SAMFileHeader header; + + @BeforeClass + public void setup() throws FileNotFoundException { + final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); + header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary()); + } + + @Test + public void realignAtContigBorderTest() { + final int contigEnd = header.getSequence(0).getSequenceLength(); + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "goodRead", 0, contigEnd - 1, 2); + read.setCigarString("2M"); + Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), false); + read.setCigarString("1M1D1M"); + Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), true); + } + +} diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/ReadBinUnitTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/ReadBinUnitTest.java new file mode 100644 index 000000000..d843374d2 --- /dev/null +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/ReadBinUnitTest.java @@ -0,0 +1,91 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.tools.walkers.indels; + +import org.broadinstitute.gatk.utils.GenomeLoc; +import org.broadinstitute.gatk.utils.GenomeLocParser; +import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; +import org.broadinstitute.gatk.utils.sam.ReadUtils; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +/** + * User: carneiro + * Date: 2/16/13 + * Time: 11:48 PM + */ +public class ReadBinUnitTest { + private GenomeLocParser parser; + private ReadBin readBin; + + private final int readLength = 100; // all reads will have the same size + private final int referencePadding = 10; // standard reference padding + + @BeforeClass + public void init() { + parser = new GenomeLocParser(ArtificialSAMUtils.createArtificialSamHeader().getSequenceDictionary()); + readBin = new ReadBin(parser, referencePadding); + } + + @DataProvider(name = "reads") + public Object[][] reads() { + + return new Object[][]{ + {"20S80M", 80}, + {"80M20S", 1}, + {"20S60M20S", 50}, + {"50I", 60}, + {"100M", 500} + }; + } + + /** + * Tests the GenomeLoc variable in the ReadBin after adding arbitrary reads + * + * @param cigarString the read's cigar string + * @param alignmentStart the read's alignment start + */ + @Test(enabled = true, dataProvider = "reads") + public void testAddingReads(String cigarString, int alignmentStart) { + final GATKSAMRecord read = createReadAndAddToBin(cigarString, alignmentStart); + final GenomeLoc readLoc = parser.createGenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getSoftStart(), Math.max(read.getSoftStart(), read.getSoftEnd())); + Assert.assertEquals(readBin.getLocation(), readLoc); + readBin.clear(); + } + + public GATKSAMRecord createReadAndAddToBin(String cigarString, int alignmentStart) { + final GATKSAMRecord read = ReadUtils.createRandomRead(readLength); + read.setCigarString(cigarString); + read.setAlignmentStart(alignmentStart); + readBin.add(read); + return read; + } +} + + diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreatorIntegrationTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreatorIntegrationTest.java new file mode 100644 index 000000000..e4b7485d2 --- /dev/null +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreatorIntegrationTest.java @@ -0,0 +1,133 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.tools.walkers.indels; + +import htsjdk.samtools.reference.ReferenceSequenceFile; +import htsjdk.samtools.util.Interval; +import htsjdk.samtools.util.IntervalList; +import org.broadinstitute.gatk.engine.walkers.WalkerTest; +import org.broadinstitute.gatk.utils.BaseTest; +import org.broadinstitute.gatk.utils.GenomeLoc; +import org.broadinstitute.gatk.utils.GenomeLocParser; +import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.gatk.utils.interval.IntervalUtils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class RealignerTargetCreatorIntegrationTest extends WalkerTest { + + @DataProvider(name = "intervals1") + public Object[][] intervals1() { + String arguments = "-T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000"; + return new Object[][]{ + {"test standard nt=1", arguments}, + {"test standard nt=4", "-nt 4 " + arguments} + }; + } + + @DataProvider(name = "intervals2") + public Object[][] intervals2() { + String arguments = "-T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,200,000"; + return new Object[][]{ + {"test with dbsnp nt=1", arguments}, + {"test with dbsnp nt=4", "-nt 4 " + arguments} + }; + } + + @Test(dataProvider = "intervals1") + public void testIntervals1(String testName, String arguments) { + String md5 = "3f0b63a393104d0c4158c7d1538153b8"; + + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(arguments + " -o %s", 1, Arrays.asList(md5)); + executeTest(testName, spec); + } + + @Test(dataProvider = "intervals2") + public void testIntervals2(String testName, String arguments) { + String md5 = "d073237694175c75d37bd4f40b8c64db"; + + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(arguments + " -o %s", 1, Arrays.asList(md5)); + executeTest(testName, spec); + } + + @Test + public void testKnownsOnly() { + WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( + "-T RealignerTargetCreator -R " + b36KGReference + " --known " + privateTestDir + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -L " + privateTestDir + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -o %s", + 1, + Arrays.asList("5206cee6c01b299417bf2feeb8b3dc96")); + executeTest("test rods only", spec3); + } + + @Test() + public void testBadCigarStringDoesNotFail() { + // Just making sure the test runs without an error, don't care about the MD5 value + WalkerTestSpec spec = new WalkerTestSpec( + "-T RealignerTargetCreator -R " + b37KGReference + " -I " + privateTestDir + "Realigner.error.bam -L 19:5787200-5787300 -o %s", + 1, + Arrays.asList("")); + executeTest("test bad cigar string string does not fail", spec); + } + + @Test(dataProvider = "intervals1") + public void testTargetListAgainstIntervalList(String testName, String arguments) throws IOException { + final List md5 = Collections.emptyList(); + final File targetListFile = createTempFile("RTCTest", ".targets"); + final File intervalListFile = createTempFile("RTCTest", ".interval_list"); + + WalkerTest.WalkerTestSpec targetListSpec = new WalkerTest.WalkerTestSpec(arguments, 1, md5); + WalkerTest.WalkerTestSpec intervalListSpec = new WalkerTest.WalkerTestSpec(arguments, 1, md5); + + targetListSpec.setOutputFileLocation(targetListFile); + intervalListSpec.setOutputFileLocation(intervalListFile); + + executeTest(testName + " (compare target-list and interval-list output)", targetListSpec); + executeTest(testName + " (compare target-list and interval-list output)", intervalListSpec); + + final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference)); + final GenomeLocParser hg19GenomeLocParser = new GenomeLocParser(seq); + final List targetList = IntervalUtils.intervalFileToList(hg19GenomeLocParser, + targetListFile.getAbsolutePath()); + final List targetListResult = new ArrayList<>(); + for ( GenomeLoc target : targetList ) { + targetListResult.add(new Interval(target.getContig(), target.getStart(), target.getStop())); + } + + final List intervalListResult = IntervalList.fromFile(intervalListFile).getIntervals(); + + Assert.assertFalse(targetListResult.isEmpty()); + Assert.assertFalse(intervalListResult.isEmpty()); + Assert.assertEquals(targetListResult, intervalListResult); + } +} diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreatorLargeScaleTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreatorLargeScaleTest.java new file mode 100644 index 000000000..8b90ef419 --- /dev/null +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/indels/RealignerTargetCreatorLargeScaleTest.java @@ -0,0 +1,59 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.tools.walkers.indels; + +import org.broadinstitute.gatk.engine.walkers.WalkerTest; +import org.testng.annotations.Test; + +import java.util.ArrayList; + +public class RealignerTargetCreatorLargeScaleTest extends WalkerTest { + @Test( timeOut = 18000000 ) + public void testRealignerTargetCreator() { + + WalkerTestSpec spec1 = new WalkerTestSpec( + "-R " + hg18Reference + + " -T RealignerTargetCreator" + + " --known " + GATKDataLocation + "dbsnp_132.hg18.vcf" + + " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + + " -L chr1:1-50,000,000" + + " -o /dev/null", + 0, + new ArrayList(0)); + executeTest("testRealignerTargetCreatorWholeGenome", spec1); + + WalkerTestSpec spec2 = new WalkerTestSpec( + "-R " + hg18Reference + + " -T RealignerTargetCreator" + + " --known " + GATKDataLocation + "dbsnp_132.hg18.vcf" + + " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" + + " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" + + " -o /dev/null", + 0, + new ArrayList(0)); + executeTest("testRealignerTargetCreatorWholeExome", spec2); + } +} From 81d4eaea29beb68710e0a2862c1572fc34a23dac Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Fri, 27 May 2016 11:49:48 -0400 Subject: [PATCH 74/82] Fixed warnings in LAATV and ValidateVariants --- .../tools/walkers/variantutils/LeftAlignAndTrimVariants.java | 2 +- .../gatk/tools/walkers/variantutils/ValidateVariants.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/LeftAlignAndTrimVariants.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/LeftAlignAndTrimVariants.java index 834c19753..c169a2844 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/LeftAlignAndTrimVariants.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/LeftAlignAndTrimVariants.java @@ -249,7 +249,7 @@ public class LeftAlignAndTrimVariants extends RodWalker { // ignore if the reference length is greater than the reference window stop before and after expansion if ( refLength > MAX_INDEL_LENGTH && refLength > referenceWindowStop ) { - logger.info(String.format("%s (%d) at position %s:%d; skipping that record. Set --referenceWindowStop >= %d", + logger.info(String.format("%s (%d) at position %s:%d; skipping that record. Set --reference_window_stop >= %d", REFERENCE_ALLELE_TOO_LONG_MSG, refLength, vc.getChr(), vc.getStart(), refLength)); return 0; } diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariants.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariants.java index 653bdaf50..6c9912f06 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariants.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariants.java @@ -283,7 +283,7 @@ public class ValidateVariants extends RodWalker { // reference length is greater than the reference window stop before and after expansion if ( refLength > 100 && refLength > referenceWindowStop ) { - logger.info(String.format("%s (%d) at position %s:%d; skipping that record. Set --referenceWindowStop >= %d", + logger.info(String.format("%s (%d) at position %s:%d; skipping that record. Set --reference_window_stop >= %d", REFERENCE_ALLELE_TOO_LONG_MSG, refLength, vc.getContig(), vc.getStart(), refLength)); return; } From 3cc7d7e56daee63338b013e5471a8d052cab4e3c Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Fri, 27 May 2016 11:59:54 -0400 Subject: [PATCH 75/82] Hide deletion arguments in ASEReadCounter --- .../gatk/tools/walkers/rnaseq/ASEReadCounter.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java index b9f8a5591..634566b25 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java @@ -90,7 +90,7 @@ import java.util.List; * *

      Caveat

      *
        - *
      • This tool will only process biallelic sites. If your callset contains multiallelic sites, they will be ignored. + *
      • This tool will only process biallelic SNP sites. If your callset contains multiallelic sites, they will be ignored. * Optionally, you can subset your callset to just biallelic variants using e.g. * SelectVariants * with the option "-restrictAllelesTo BIALLELIC".
      • @@ -139,19 +139,20 @@ public class ASEReadCounter extends LocusWalker { public CoverageUtils.CountPileupType countType = CoverageUtils.CountPileupType.COUNT_FRAGMENTS_REQUIRE_SAME_BASE; /** - * Available options are csv, table, rtable. By default, the format is an r-readable table. + * Available options are csv, table, rtable. By default, the format is rtable (an r-readable table). */ @Argument(fullName = "outputFormat", doc = "Format of the output file, can be CSV, TABLE, RTABLE", required = false) public OUTPUT_FORMAT outputFormat = OUTPUT_FORMAT.RTABLE; + // Hiding these argument pending reevaluation (currently don't seem to work and aren't tested) /** * Consider a spanning deletion as contributing to coverage. Also enables deletion counts in per-base output. */ - @Advanced + @Hidden @Argument(fullName = "includeDeletions", shortName = "dels", doc = "Include information on deletions", required = false) public boolean includeDeletions = false; - @Advanced + @Hidden @Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false) public boolean ignoreDeletionSites = false; From 5b80c92cb701c94b21dc6df17ba5396643750294 Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Fri, 27 May 2016 12:19:59 -0400 Subject: [PATCH 76/82] Improved article linking in error messages --- .../engine/filters/MalformedReadFilter.java | 2 +- .../gatk/utils/exceptions/UserException.java | 32 +++++++++---------- .../gatk/utils/help/HelpConstants.java | 4 +-- .../gatk/utils/sam/ReadUtils.java | 6 ++-- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/MalformedReadFilter.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/MalformedReadFilter.java index bfb7adebd..95a65633f 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/MalformedReadFilter.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/MalformedReadFilter.java @@ -219,7 +219,7 @@ public class MalformedReadFilter extends ReadFilter { if (! filterReadsWithNCigar && !allowNCigars) { throw new UserException.UnsupportedCigarOperatorException( CigarOperator.N,read, - "If you are working with RNA-Seq data, see " + HelpConstants.articlePost("3891") + " for guidance. " + "If you are working with RNA-Seq data, see " + HelpConstants.articlePost(3891) + " for guidance. " + "If you choose to disregard those instructions, or for other uses, you have the option of either " + "filtering out all reads with operator " + CigarOperator.N + " in their CIGAR string" + " (add --" + FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME + " to your command line) or overriding this check (add -U " diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/exceptions/UserException.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/exceptions/UserException.java index 859437fb7..b29fb2a41 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/exceptions/UserException.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/exceptions/UserException.java @@ -50,7 +50,7 @@ public class UserException extends ReviewedGATKException { /** * The URL where people can get help messages. Printed when an error occurs */ - public static final String PHONE_HOME_DOCS_URL = "http://gatkforums.broadinstitute.org/discussion/1250/what-is-phone-home-and-how-does-it-affect-me#latest"; + public static final String PHONE_HOME_DOCS_URL = HelpConstants.articlePost(1250); public UserException(String msg) { super(msg); } public UserException(String msg, Throwable e) { super(msg, e); } @@ -136,7 +136,7 @@ public class UserException extends ReviewedGATKException { public static class BadTmpDir extends UserException { public BadTmpDir(String message) { - super(String.format("An error occurred while working with the tmp directory %s. You can specify -Djava.io.tmpdir=X on the command line (before the -jar argument) where X is a directory path, to use a more appropriate temporary directory. The exact error was %s", System.getProperties().get("java.io.tmpdir"), message)); + super(String.format("An error occurred while working with the tmp directory %s. You can specify -Djava.io.tmpdir=X on the command line (before the -jar argument) where X is a directory path, to use a more appropriate temporary directory. Note that this is a JVM argument, not a GATK argument. The exact error was %s", System.getProperties().get("java.io.tmpdir"), message)); } } @@ -160,7 +160,7 @@ public class UserException extends ReviewedGATKException { public static class ErrorWritingBamFile extends UserException { public ErrorWritingBamFile(String message) { - super(String.format("An error occurred when trying to write the BAM file. Usually this happens when there is not enough space in the directory to which the data is being written (generally the temp directory) or when your system's open file handle limit is too small. Your system administrator can help you resolve these issues. If you know what temporary directory to use, you can specify it by adding -Djava.io.tmpdir=X to the command line (before the -jar argument), where X is the directory path. The exact error was %s", message)); + super(String.format("An error occurred when trying to write the BAM file. Usually this happens when there is not enough space in the directory to which the data is being written (generally the temp directory) or when your system's open file handle limit is too small. Your system administrator can help you resolve these issues. If you know what temporary directory to use, you can specify it by adding -Djava.io.tmpdir=X to the command line (before the -jar argument), where X is the directory path. Note that this is a JVM argument, not a GATK argument. The exact error was %s", message)); } } @@ -225,20 +225,20 @@ public class UserException extends ReviewedGATKException { public static class MissortedBAM extends UserException { public MissortedBAM(SAMFileHeader.SortOrder order, File file, SAMFileHeader header) { - super(String.format("Missorted input SAM/BAM/CRAM files: %s must be sorted in %s order but order was: %s. Please see " + HelpConstants.forumPost("discussion/1317/collected-faqs-about-input-files-for-sequence-read-data-bam-cram") + "for more information.", file, order, header.getSortOrder())); + super(String.format("Missorted input SAM/BAM/CRAM files: %s must be sorted in %s order but order was: %s. Please see " + HelpConstants.articlePost(1317) + "for more information.", file, order, header.getSortOrder())); } public MissortedBAM(SAMFileHeader.SortOrder order, String message) { - super(String.format("Missorted input SAM/BAM/CRAM files: files are not sorted in %s order. Please see " + HelpConstants.forumPost("discussion/1317/collected-faqs-about-input-files-for-sequence-read-data-bam-cram") + "for more information. Error details: %s", order, message)); + super(String.format("Missorted input SAM/BAM/CRAM files: files are not sorted in %s order. Please see " + HelpConstants.articlePost(1317) + "for more information. Error details: %s", order, message)); } public MissortedBAM(SAMFileHeader.SortOrder order, SAMRecord read, String message) { - super(String.format("Missorted input SAM/BAM/CRAM file %s: file sorted in %s order but %s is required. Please see " + HelpConstants.forumPost("discussion/1317/collected-faqs-about-input-files-for-sequence-read-data-bam-cram") + "for more information. Error details: %s", + super(String.format("Missorted input SAM/BAM/CRAM file %s: file sorted in %s order but %s is required. Please see " + HelpConstants.articlePost(1317) + "for more information. Error details: %s", read.getFileSource().getReader(), read.getHeader().getSortOrder(), order, message)); } public MissortedBAM(String message) { - super(String.format("Missorted input SAM/BAM/CRAM files. Please see " + HelpConstants.forumPost("discussion/1317/collected-faqs-about-input-files-for-sequence-read-data-bam-cram") + "for more information. Error details: %s", message)); + super(String.format("Missorted input SAM/BAM/CRAM files. Please see " + HelpConstants.articlePost(1317) + "for more information. Error details: %s", message)); } } @@ -252,7 +252,7 @@ public class UserException extends ReviewedGATKException { } public MalformedBAM(String source, String message) { - super(String.format("SAM/BAM/CRAM file %s is malformed. Please see " + HelpConstants.forumPost("discussion/1317/collected-faqs-about-input-files-for-sequence-read-data-bam-cram") + "for more information. Error details: %s", source, message)); + super(String.format("SAM/BAM/CRAM file %s is malformed. Please see " + HelpConstants.articlePost(1317) + "for more information. Error details: %s", source, message)); } } @@ -262,7 +262,7 @@ public class UserException extends ReviewedGATKException { } public MisencodedBAM(String source, String message) { - super(String.format("SAM/BAM/CRAM file %s appears to be using the wrong encoding for quality scores: %s. Please see https://www.broadinstitute.org/gatk/guide?id=6470 for more details and options related to this error.", source, message)); + super(String.format("SAM/BAM/CRAM file %s appears to be using the wrong encoding for quality scores: %s. Please see " + HelpConstants.articlePost(6470) + " for more details and options related to this error.", source, message)); } } @@ -294,13 +294,13 @@ public class UserException extends ReviewedGATKException { public static class ReadMissingReadGroup extends MalformedBAM { public ReadMissingReadGroup(final SAMRecord read) { - super(read, String.format("Read %s is missing the read group (RG) tag, which is required by the GATK. Please see " + HelpConstants.forumPost("discussion/59/companion-utilities-replacereadgroups to fix this problem"), read.getReadName())); + super(read, String.format("Read %s is missing the read group (RG) tag, which is required by the GATK. Please see " + HelpConstants.articlePost(59), read.getReadName())); } } public static class ReadHasUndefinedReadGroup extends MalformedBAM { public ReadHasUndefinedReadGroup(final SAMRecord read, final String rgID) { - super(read, String.format("Read %s uses a read group (%s) that is not defined in the BAM header, which is not valid. Please see " + HelpConstants.forumPost("discussion/59/companion-utilities-replacereadgroups to fix this problem"), read.getReadName(), rgID)); + super(read, String.format("Read %s uses a read group (%s) that is not defined in the BAM header, which is not valid. Please see " + HelpConstants.articlePost(59), read.getReadName(), rgID)); } } @@ -312,7 +312,7 @@ public class UserException extends ReviewedGATKException { public static class MissortedFile extends UserException { public MissortedFile(File file, String message, Exception e) { - super(String.format("Missorted input file: %s is must be sorted in coordinate order. Please see " + HelpConstants.forumPost("discussion/1317/collected-faqs-about-input-files-for-sequence-read-data-bam-cram") + "for more information. Error details: %s and got error %s", file, message, getMessage(e))); + super(String.format("Missorted input file: %s is must be sorted in coordinate order. Please see " + HelpConstants.articlePost(1317) + "for more information. Error details: %s and got error %s", file, message, getMessage(e))); } } @@ -366,14 +366,14 @@ public class UserException extends ReviewedGATKException { public static class IncompatibleSequenceDictionaries extends UserException { public IncompatibleSequenceDictionaries(String message, String name1, SAMSequenceDictionary dict1, String name2, SAMSequenceDictionary dict2) { - super(String.format("Input files %s and %s have incompatible contigs. Please see " + HelpConstants.forumPost("discussion/63/input-files-have-incompatible-contigs") + "for more information. Error details: %s.\n %s contigs = %s\n %s contigs = %s", + super(String.format("Input files %s and %s have incompatible contigs. Please see " + HelpConstants.articlePost(63) + "for more information. Error details: %s.\n %s contigs = %s\n %s contigs = %s", name1, name2, message, name1, ReadUtils.prettyPrintSequenceRecords(dict1), name2, ReadUtils.prettyPrintSequenceRecords(dict2))); } } public static class LexicographicallySortedSequenceDictionary extends UserException { public LexicographicallySortedSequenceDictionary(String name, SAMSequenceDictionary dict) { - super(String.format("Lexicographically sorted human genome sequence detected in %s. Please see " + HelpConstants.articlePost("1328") + "for more information. Error details: %s contigs = %s", + super(String.format("Lexicographically sorted human genome sequence detected in %s. Please see " + HelpConstants.articlePost(1328) + "for more information. Error details: %s contigs = %s", name, name, ReadUtils.prettyPrintSequenceRecords(dict))); } } @@ -409,7 +409,7 @@ public class UserException extends ReviewedGATKException { public MissingReferenceFaiFile( final File indexFile, final File fastaFile ) { super(String.format("Fasta index file %s for reference %s does not exist. Please see %s for help creating it.", indexFile.getAbsolutePath(), fastaFile.getAbsolutePath(), - HelpConstants.forumPost("discussion/1601/how-can-i-prepare-a-fasta-file-to-use-as-reference"))); + HelpConstants.articlePost(1601))); } } @@ -417,7 +417,7 @@ public class UserException extends ReviewedGATKException { public MissingReferenceDictFile( final File dictFile, final File fastaFile ) { super(String.format("Fasta dict file %s for reference %s does not exist. Please see %s for help creating it.", dictFile.getAbsolutePath(), fastaFile.getAbsolutePath(), - HelpConstants.forumPost("discussion/1601/how-can-i-prepare-a-fasta-file-to-use-as-reference"))); + HelpConstants.articlePost(1601))); } } diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpConstants.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpConstants.java index 6c7068b19..279af20ed 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpConstants.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpConstants.java @@ -62,8 +62,8 @@ public class HelpConstants { return GATK_FORUM_URL + post; } - public static String articlePost(String id) { - return GATK_ARTICLE_URL + "?id=" + id; + public static String articlePost(Integer id) { + return GATK_ARTICLE_URL + "?id=" + id.toString(); } /** diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/ReadUtils.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/ReadUtils.java index 0cd84f5d9..75617e87d 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/ReadUtils.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/ReadUtils.java @@ -66,7 +66,7 @@ public class ReadUtils { public static Set getSAMFileSamples(final SAMFileHeader header) { if ( header == null ) { throw new IllegalArgumentException("Missing SAM file header. " + - "For more information on read groups, see " + HelpConstants.articlePost("6472")); + "For more information on read groups, see " + HelpConstants.articlePost(6472)); } // get all of the unique sample names @@ -74,13 +74,13 @@ public class ReadUtils { final List readGroups = header.getReadGroups(); if ( readGroups == null ) { throw new UserException("SAM file header is missing the Read Group (@RG). " + - "For more information on read groups, see " + HelpConstants.articlePost("6472")); + "For more information on read groups, see " + HelpConstants.articlePost(6472)); } for ( final SAMReadGroupRecord readGroup : readGroups ) { final String sample = readGroup.getSample(); if ( sample == null ) { throw new UserException("SAM file header is missing the sample field (SM) in the Read Group (@RG). " + - "For more information on read groups, see " + HelpConstants.articlePost("6472")); + "For more information on read groups, see " + HelpConstants.articlePost(6472)); } samples.add(readGroup.getSample()); } From efbbbb1bd94b5b1e181ec7ac305d333402eb9f43 Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Fri, 27 May 2016 13:48:27 -0400 Subject: [PATCH 77/82] Add M2 to the HC annotations check --- .../gatk/tools/walkers/annotator/AnnotationUtils.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AnnotationUtils.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AnnotationUtils.java index 70ad7c132..c8f322b08 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AnnotationUtils.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AnnotationUtils.java @@ -66,6 +66,7 @@ import org.broadinstitute.gatk.tools.walkers.variantutils.GenotypeGVCFs; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller; +import org.broadinstitute.gatk.tools.walkers.cancer.m2.MuTect2; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import java.util.ArrayList; @@ -75,7 +76,7 @@ import java.util.Set; public class AnnotationUtils { - public static final String ANNOTATION_HC_WARN_MSG = " annotation will not be calculated, must be called from HaplotypeCaller"; + public static final String ANNOTATION_HC_WARN_MSG = " annotation will not be calculated, must be called from HaplotypeCaller or MuTect2"; public static final int WARNINGS_LOGGED_SIZE = 3; /** @@ -154,7 +155,7 @@ public class AnnotationUtils { throw new ReviewedGATKException("Warnings logged array must have at least " + WARNINGS_LOGGED_SIZE + " elements, but has " + warningsLogged.length); } - if ( !(walker instanceof HaplotypeCaller) ) { + if ( !(walker instanceof HaplotypeCaller) && !(walker instanceof MuTect2)) { if ( !warningsLogged[0] ) { logger.warn(annotation + ANNOTATION_HC_WARN_MSG + ", not " + walker.getClass().getSimpleName()); warningsLogged[0] = true; From 0769c8ae3e24243c5563320ee40bf2417ec8a7b7 Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Fri, 27 May 2016 15:02:43 -0400 Subject: [PATCH 78/82] Remove Phone Home --- .../gatk/engine/CommandLineExecutable.java | 57 -- .../gatk/engine/GenomeAnalysisEngine.java | 4 - .../arguments/GATKArgumentCollection.java | 37 - .../gatk/engine/phonehome/GATKRunReport.java | 786 ------------------ .../engine/crypt/GATKKeyIntegrationTest.java | 157 ---- .../phonehome/GATKRunReportUnitTest.java | 358 -------- .../gatk/engine/walkers/WalkerTest.java | 4 - 7 files changed, 1403 deletions(-) delete mode 100644 public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/phonehome/GATKRunReport.java delete mode 100644 public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/crypt/GATKKeyIntegrationTest.java delete mode 100644 public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/phonehome/GATKRunReportUnitTest.java diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/CommandLineExecutable.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/CommandLineExecutable.java index 56cfac40b..73dc0749c 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/CommandLineExecutable.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/CommandLineExecutable.java @@ -34,15 +34,10 @@ import org.broadinstitute.gatk.engine.filters.ReadFilter; import org.broadinstitute.gatk.engine.io.stubs.OutputStreamArgumentTypeDescriptor; import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterArgumentTypeDescriptor; import org.broadinstitute.gatk.engine.io.stubs.VCFWriterArgumentTypeDescriptor; -import org.broadinstitute.gatk.engine.phonehome.GATKRunReport; import org.broadinstitute.gatk.utils.refdata.utils.RMDTriplet; import org.broadinstitute.gatk.engine.walkers.Walker; -import org.broadinstitute.gatk.engine.crypt.CryptUtils; -import org.broadinstitute.gatk.engine.crypt.GATKKey; -import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.text.ListFileUtils; -import java.security.PublicKey; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -85,9 +80,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram { Walker walker = engine.getWalkerByName(getAnalysisName()); try { - // Make sure a valid GATK user key is present, if required. - authorizeGATKRun(); - engine.setArguments(getArgumentCollection()); // File lists can require a bit of additional expansion. Set these explicitly by the engine. @@ -119,9 +111,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram { } engine.execute(); - generateGATKRunReport(walker); } catch ( Exception e ) { - generateGATKRunReport(walker, e); throw e; } @@ -129,53 +119,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram { return 0; } - /** - * Authorizes this run of the GATK by checking for a valid GATK user key, if required. - * Currently, a key is required only if running with the -et NO_ET or -et STDOUT options. - */ - private void authorizeGATKRun() { - if ( getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.NO_ET || - getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) { - if ( getArgumentCollection().gatkKeyFile == null ) { - throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " + - "Please see " + UserException.PHONE_HOME_DOCS_URL + - " for more information and instructions on how to obtain a key."); - } - else { - PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey(); - GATKKey gatkUserKey = new GATKKey(gatkPublicKey, getArgumentCollection().gatkKeyFile); - - if ( ! gatkUserKey.isValid() ) { - throw new UserException.KeySignatureVerificationException(getArgumentCollection().gatkKeyFile); - } - } - } - } - - /** - * Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled. - * This report will be written to either STDOUT or to the run repository, depending on the options - * for -et. - * - * @param e the exception, can be null if no exception occurred - */ - private void generateGATKRunReport(Walker walker, Exception e) { - if ( getArgumentCollection().phoneHomeType != GATKRunReport.PhoneHomeOption.NO_ET ) { - GATKRunReport report = new GATKRunReport(walker, e, engine, getArgumentCollection().phoneHomeType ); - report.postReport(getArgumentCollection().phoneHomeType); - } - } - - /** - * Convenience method for fully parameterized generateGATKRunReport when an exception has - * not occurred - * - * @param walker - */ - private void generateGATKRunReport(Walker walker) { - generateGATKRunReport(walker, null); - } - /** * Subclasses of CommandLinePrograms can provide their own types of command-line arguments. * @return A collection of type descriptors generating implementation-dependent placeholders. diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java index 9f5ccaf93..469e5491d 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java @@ -49,7 +49,6 @@ import org.broadinstitute.gatk.engine.io.OutputTracker; import org.broadinstitute.gatk.engine.io.stubs.Stub; import org.broadinstitute.gatk.engine.iterators.ReadTransformer; import org.broadinstitute.gatk.engine.iterators.ReadTransformersMode; -import org.broadinstitute.gatk.engine.phonehome.GATKRunReport; import org.broadinstitute.gatk.utils.io.ReferenceBacked; import org.broadinstitute.gatk.utils.refdata.tracks.IndexDictionaryUtils; import org.broadinstitute.gatk.utils.refdata.tracks.RMDTrackBuilder; @@ -248,9 +247,6 @@ public class GenomeAnalysisEngine { * @return the value of this traversal. */ public Object execute() { - // first thing is to make sure the AWS keys can be decrypted - GATKRunReport.checkAWSAreValid(); - //HeapSizeMonitor monitor = new HeapSizeMonitor(); //monitor.start(); setStartTime(new java.util.Date()); diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java index 9b74d9c57..1b4548d38 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java @@ -31,7 +31,6 @@ import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.utils.downsampling.DownsampleType; import org.broadinstitute.gatk.utils.downsampling.DownsamplingMethod; -import org.broadinstitute.gatk.engine.phonehome.GATKRunReport; import org.broadinstitute.gatk.engine.samples.PedigreeValidationType; import org.broadinstitute.gatk.utils.QualityUtils; import org.broadinstitute.gatk.utils.baq.BAQ; @@ -74,42 +73,6 @@ public class GATKArgumentCollection { @Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false, minValue = 0) public Integer readBufferSize = null; - // -------------------------------------------------------------------------------------------------------------- - // - // GATKRunReport options - // - // -------------------------------------------------------------------------------------------------------------- - - /** - * By default, GATK generates a run report that is uploaded to a cloud-based service. This report contains basic - * statistics about the run (which tool was used, whether the run was successful etc.) that help us for debugging - * and development. Up to version 3.3-0 the run report contains a record of the username and hostname associated - * with the run, but it does **NOT** contain any information that could be used to identify patient data. - * Nevertheless, if your data is subject to stringent confidentiality clauses (no outside communication) or if your - * run environment is not connected to the internet, you can disable the reporting system by seeting this option to - * "NO_ET". You will also need to request a key using the online request form on our website (see FAQs). - */ - @Argument(fullName = "phone_home", shortName = "et", doc="Run reporting mode", required = false) - public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.AWS; - /** - * Please see the "phone_home" argument below and the online documentation FAQs for more details on the key system - * and how to request a key. - */ - @Argument(fullName = "gatk_key", shortName = "K", doc="GATK key file required to run with -et NO_ET", required = false) - public File gatkKeyFile = null; - - /** - * The GATKRunReport supports tagging GATK runs with an arbitrary tag that can be - * used to group together runs during later analysis (as of GATK 2.2) . One use of this capability is to tag - * runs as GATK performance tests, so that the performance of the GATK over time can be assessed from the logs - * directly. - * - * Note that the tags do not conform to any ontology, so you are free to use any tags that you might find - * meaningful. - */ - @Argument(fullName = "tag", shortName = "tag", doc="Tag to identify this GATK run as part of a group of runs", required = false) - public String tag = "NA"; - // -------------------------------------------------------------------------------------------------------------- // // General features diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/phonehome/GATKRunReport.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/phonehome/GATKRunReport.java deleted file mode 100644 index 0186823a5..000000000 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/phonehome/GATKRunReport.java +++ /dev/null @@ -1,786 +0,0 @@ -/* -* Copyright 2012-2016 Broad Institute, Inc. -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.gatk.engine.phonehome; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; -import org.broadinstitute.gatk.engine.CommandLineGATK; -import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; -import org.broadinstitute.gatk.engine.walkers.Walker; -import org.broadinstitute.gatk.utils.Utils; -import org.broadinstitute.gatk.engine.crypt.CryptUtils; -import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; -import org.broadinstitute.gatk.utils.io.IOUtils; -import org.broadinstitute.gatk.utils.io.Resource; -import org.broadinstitute.gatk.utils.threading.ThreadEfficiencyMonitor; -import org.jets3t.service.S3Service; -import org.jets3t.service.S3ServiceException; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Object; -import org.jets3t.service.security.AWSCredentials; -import org.simpleframework.xml.Element; -import org.simpleframework.xml.Serializer; -import org.simpleframework.xml.core.Persister; - -import java.io.*; -import java.security.NoSuchAlgorithmException; -import java.security.PublicKey; -import java.text.DateFormat; -import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.Date; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.zip.GZIPInputStream; -import java.util.zip.GZIPOutputStream; - - -/** - * A detailed description of a GATK run, and error if applicable. Simply create a GATKRunReport - * with the constructor, providing the walker that was run and the fully instantiated GenomeAnalysisEngine - * after the run finishes and the GATKRunReport will collect all of the report information - * into this object. Call postReport to write out the report, as an XML document, to either STDOUT, - * a file (in which case the output is gzipped), or with no arguments the report will be posted to the - * GATK run report database. - * - * @author depristo - * @since 2010 - */ -public class GATKRunReport { - protected static final String REPORT_BUCKET_NAME = "broad.gsa.gatk.run.reports"; - protected static final String TEST_REPORT_BUCKET_NAME = "broad.gsa.gatk.run.reports.test"; - protected final static String AWS_ACCESS_KEY_MD5 = "34d4a26eb2062b3f06e833b28f9a38c6"; - protected final static String AWS_SECRET_KEY_MD5 = "83f2332eec99ef1d7425d5dc5d4b514a"; - - private static final DateFormat DATE_FORMAT = new SimpleDateFormat("yyyy/MM/dd HH.mm.ss"); - - /** - * our log - */ - protected static final Logger logger = Logger.getLogger(GATKRunReport.class); - - /** - * Default value for the number of milliseconds before an S3 put operation is timed-out. - * Can be overridden via a constructor argument. - */ - private static final long S3_DEFAULT_PUT_TIME_OUT_IN_MILLISECONDS = 30 * 1000; - - /** - * Number of milliseconds before an S3 put operation is timed-out. - */ - private long s3PutTimeOutInMilliseconds = S3_DEFAULT_PUT_TIME_OUT_IN_MILLISECONDS; - - // ----------------------------------------------------------------- - // elements captured for the report - // ----------------------------------------------------------------- - - @Element(required = false, name = "id") - private String id; - - @Element(required = false, name = "exception") - private GATKRunReportException mException; - - @Element(required = true, name = "start-time") - private String startTime = "ND"; - - @Element(required = true, name = "end-time") - private String endTime; - - @Element(required = true, name = "run-time") - private long runTime = 0; - - @Element(required = true, name = "walker-name") - private String walkerName; - - @Element(required = true, name = "svn-version") - private String svnVersion; - - @Element(required = true, name = "total-memory") - private long totalMemory; - - @Element(required = true, name = "max-memory") - private long maxMemory; - - @Element(required = true, name = "user-name") - private String userName; - - @Element(required = true, name = "host-name") - private String hostName; - - @Element(required = true, name = "java") - private String javaVersion; - - @Element(required = true, name = "machine") - private String machine; - - @Element(required = true, name = "iterations") - private long nIterations; - - @Element(required = true, name = "tag") - private String tag; - - @Element(required = true, name = "num-threads") - private int numThreads; - @Element(required = true, name = "percent-time-running") - private String percentTimeRunning; - @Element(required = true, name = "percent-time-waiting") - private String percentTimeWaiting; - @Element(required = true, name = "percent-time-blocking") - private String percentTimeBlocking; - @Element(required = true, name = "percent-time-waiting-for-io") - private String percentTimeWaitingForIO; - - /** The error message, if one occurred, or null if none did */ - public String errorMessage = null; - /** The error that occurred, if one did, or null if none did */ - public Throwable errorThrown = null; - - /** - * How should the GATK report its usage? - */ - public enum PhoneHomeOption { - /** Disable phone home */ - NO_ET, - /** Forces the report to go to S3 */ - AWS, - /** Force output to STDOUT. For debugging only */ - STDOUT - } - - /** - * To allow us to deserial reports from XML - */ - private GATKRunReport() { } - - /** - * Read a GATKRunReport from the serialized XML representation in String reportAsXML - * @param stream an input stream containing a serialized XML report - * @return a reconstituted GATKRunReport from reportAsXML - * @throws Exception if parsing fails for any reason - */ - @Ensures("result != null") - protected static GATKRunReport deserializeReport(final InputStream stream) throws Exception { - final Serializer serializer = new Persister(); - return serializer.read(GATKRunReport.class, stream); - } - - /** - * Create a new GATKRunReport from a report on S3 - * - * Assumes that s3Object has already been written to S3, and this function merely - * fetches it from S3 and deserializes it. The access keys must have permission to - * GetObject from S3. - * - * @param downloaderAccessKey AWS access key with permission to GetObject from bucketName - * @param downloaderSecretKey AWS secret key with permission to GetObject from bucketName - * @param bucketName the name of the bucket holding the report - * @param s3Object the s3Object we wrote to S3 in bucketName that we want to get back and decode - * @return a deserialized report derived from s3://bucketName/s3Object.getName() - * @throws Exception - */ - @Ensures("result != null") - protected static GATKRunReport deserializeReport(final String downloaderAccessKey, - final String downloaderSecretKey, - final String bucketName, - final S3Object s3Object) throws Exception { - final S3Service s3Service = initializeAWSService(downloaderAccessKey, downloaderSecretKey); - - // Retrieve the whole data object we created previously - final S3Object objectComplete = s3Service.getObject(bucketName, s3Object.getName()); - - // Read the data from the object's DataInputStream using a loop, and print it out. - return deserializeReport(new GZIPInputStream(objectComplete.getDataInputStream())); - } - - /** - * Create a new RunReport and population all of the fields with values from the walker and engine. - * Allows the S3 put timeout to be explicitly set. - * - * @param walker the GATK walker that we ran - * @param e the exception caused by running this walker, or null if we completed successfully - * @param engine the GAE we used to run the walker, so we can fetch runtime, args, etc - * @param type the GATK phone home setting - * @param s3PutTimeOutInMilliseconds number of milliseconds to wait before timing out an S3 put operation - */ - public GATKRunReport(final Walker walker, final Exception e, final GenomeAnalysisEngine engine, final PhoneHomeOption type, - final long s3PutTimeOutInMilliseconds) { - this(walker, e, engine, type); - this.s3PutTimeOutInMilliseconds = s3PutTimeOutInMilliseconds; - } - - /** - * Create a new RunReport and population all of the fields with values from the walker and engine. - * Leaves the S3 put timeout set to the default value of S3_DEFAULT_PUT_TIME_OUT_IN_MILLISECONDS. - * - * @param walker the GATK walker that we ran - * @param e the exception caused by running this walker, or null if we completed successfully - * @param engine the GAE we used to run the walker, so we can fetch runtime, args, etc - * @param type the GATK phone home setting - */ - public GATKRunReport(final Walker walker, final Exception e, final GenomeAnalysisEngine engine, final PhoneHomeOption type) { - if ( type == PhoneHomeOption.NO_ET ) - throw new ReviewedGATKException("Trying to create a run report when type is NO_ET!"); - - logger.debug("Aggregating data for run report"); - - // what did we run? - id = org.apache.commons.lang.RandomStringUtils.randomAlphanumeric(32); - walkerName = engine.getWalkerName(walker.getClass()); - svnVersion = CommandLineGATK.getVersionNumber(); - - // runtime performance metrics - Date end = new java.util.Date(); - endTime = DATE_FORMAT.format(end); - if ( engine.getStartTime() != null ) { // made it this far during initialization - startTime = DATE_FORMAT.format(engine.getStartTime()); - runTime = (end.getTime() - engine.getStartTime().getTime()) / 1000L; // difference in seconds - } - - // deal with memory usage - Runtime.getRuntime().gc(); // call GC so totalMemory is ~ used memory - maxMemory = Runtime.getRuntime().maxMemory(); - totalMemory = Runtime.getRuntime().totalMemory(); - - // we can only do some operations if an error hasn't occurred - if ( engine.getCumulativeMetrics() != null ) { - // it's possible we aborted so early that these data structures arent initialized - nIterations = engine.getCumulativeMetrics().getNumIterations(); - } - - tag = engine.getArguments().tag; - - // user and hostname -- information about the runner of the GATK - userName = System.getProperty("user.name"); - hostName = Utils.resolveHostname(); - - // basic java information - javaVersion = Utils.join("-", Arrays.asList(System.getProperty("java.vendor"), System.getProperty("java.version"))); - machine = Utils.join("-", Arrays.asList(System.getProperty("os.name"), System.getProperty("os.arch"))); - - // if there was an exception, capture it - this.mException = e == null ? null : new GATKRunReportException(e); - - numThreads = engine.getTotalNumberOfThreads(); - percentTimeRunning = getThreadEfficiencyPercent(engine, ThreadEfficiencyMonitor.State.USER_CPU); - percentTimeBlocking = getThreadEfficiencyPercent(engine, ThreadEfficiencyMonitor.State.BLOCKING); - percentTimeWaiting = getThreadEfficiencyPercent(engine, ThreadEfficiencyMonitor.State.WAITING); - percentTimeWaitingForIO = getThreadEfficiencyPercent(engine, ThreadEfficiencyMonitor.State.WAITING_FOR_IO); - } - - /** - * Get the random alpha-numeric ID of this GATKRunReport - * @return a non-null string ID - */ - @Ensures("result != null") - public String getID() { - return id; - } - - /** - * Return a string representing the percent of time the GATK spent in state, if possible. Otherwise return NA - * - * @param engine the GATK engine whose threading efficiency info we will use - * @param state the state whose occupancy we wish to know - * @return a string representation of the percent occupancy of state, or NA is not possible - */ - @Requires({"engine != null", "state != null"}) - @Ensures("result != null") - private String getThreadEfficiencyPercent(final GenomeAnalysisEngine engine, final ThreadEfficiencyMonitor.State state) { - final ThreadEfficiencyMonitor tem = engine.getThreadEfficiencyMonitor(); - return tem == null ? "NA" : String.format("%.2f", tem.getStatePercent(state)); - } - - /** - * Get a filename (no path) appropriate for this report - * - * @return a non-null string filename - */ - @Ensures("result != null") - protected String getReportFileName() { - return getID() + ".report.xml.gz"; - } - - // --------------------------------------------------------------------------- - // - // Main public interface method for posting reports - // - // --------------------------------------------------------------------------- - - /** - * Post this GATK report to the destination implied by the PhoneHomeOption type - * - * Guaranteed to never throw an exception (exception noted below) and to return - * with a reasonable (~10 seconds) time regardless of successful writing of the report. - * - * @throws IllegalArgumentException if type == null - * @param type the type of phoning home we want to do - * @return true if a report was successfully written, false otherwise - */ - public boolean postReport(final PhoneHomeOption type) { - if ( type == null ) throw new IllegalArgumentException("type cannot be null"); - - logger.debug("Posting report of type " + type); - switch (type) { - case NO_ET: // don't do anything - return false; - case AWS: - wentToAWS = true; - return postReportToAWSS3() != null; - case STDOUT: - return postReportToStream(System.out); - default: - exceptDuringRunReport("BUG: unexpected PhoneHomeOption "); - return false; - } - } - - // --------------------------------------------------------------------------- - // - // Code for sending reports to local files - // - // --------------------------------------------------------------------------- - - /** - * Write an XML representation of this report to the stream, throwing a GATKException if the marshalling - * fails for any reason. - * - * @param stream an output stream to write the report to - */ - @Requires("stream != null") - protected boolean postReportToStream(final OutputStream stream) { - final Serializer serializer = new Persister(); - try { - serializer.write(this, stream); - return true; - } catch (Exception e) { - return false; - } - } - - // --------------------------------------------------------------------------- - // - // Code for sending reports to s3 - // - // --------------------------------------------------------------------------- - - /** - * Get the name of the S3 bucket where we should upload this report - * - * @return the string name of the s3 bucket - */ - @Ensures("result != null") - protected String getS3ReportBucket() { - return s3ReportBucket; - } - - /** - * Decrypts encrypted AWS key from encryptedKeySource - * @param encryptedKeySource a file containing an encrypted AWS key - * @return a decrypted AWS key as a String - */ - @Ensures("result != null") - public static String decryptAWSKey(final File encryptedKeySource) throws FileNotFoundException { - if ( encryptedKeySource == null ) throw new IllegalArgumentException("encryptedKeySource cannot be null"); - return decryptAWSKey(new FileInputStream(encryptedKeySource)); - } - - /** - * @see #decryptAWSKey(java.io.File) but with input from an inputstream - */ - @Requires("encryptedKeySource != null") - @Ensures("result != null") - private static String decryptAWSKey(final InputStream encryptedKeySource) { - final PublicKey key = CryptUtils.loadGATKDistributedPublicKey(); - final byte[] fromDisk = IOUtils.readStreamIntoByteArray(encryptedKeySource); - final byte[] decrypted = CryptUtils.decryptData(fromDisk, key); - return new String(decrypted); - } - - /** - * Get the decrypted AWS key sorted in the resource directories of name - * @param name the name of the file containing the needed AWS key - * @return a non-null GATK - */ - @Requires("name != null") - @Ensures("result != null") - private static String getAWSKey(final String name) { - final Resource resource = new Resource(name, GATKRunReport.class); - return decryptAWSKey(resource.getResourceContentsAsStream()); - } - - /** - * Get the AWS access key for the GATK user - * @return a non-null AWS access key for the GATK user - */ - @Ensures("result != null") - protected static String getAWSUploadAccessKey() { - return getAWSKey("resources/GATK_AWS_access.key"); - } - - /** - * Get the AWS secret key for the GATK user - * @return a non-null AWS secret key for the GATK user - */ - @Ensures("result != null") - protected static String getAWSUploadSecretKey() { - return getAWSKey("resources/GATK_AWS_secret.key"); - } - - /** - * Check that the AWS keys can be decrypted and are what we expect them to be - * - * @throws ReviewedGATKException if anything goes wrong - */ - public static void checkAWSAreValid() { - try { - final String accessKeyMD5 = Utils.calcMD5(getAWSUploadAccessKey()); - final String secretKeyMD5 = Utils.calcMD5(getAWSUploadSecretKey()); - - if ( ! AWS_ACCESS_KEY_MD5.equals(accessKeyMD5) ) { - throw new ReviewedGATKException("Invalid AWS access key found, expected MD5 " + AWS_ACCESS_KEY_MD5 + " but got " + accessKeyMD5); - } - if ( ! AWS_SECRET_KEY_MD5.equals(secretKeyMD5) ) { - throw new ReviewedGATKException("Invalid AWS secret key found, expected MD5 " + AWS_SECRET_KEY_MD5 + " but got " + secretKeyMD5); - } - - } catch ( Exception e ) { - throw new ReviewedGATKException("Couldn't decrypt AWS keys, something is wrong with the GATK distribution"); - } - } - - /** - * Get an initialized S3Service for use in communicating with AWS/s3 - * - * @param awsAccessKey our AWS access key to use - * @param awsSecretKey our AWS secret key to use - * @return an initialized S3Service object that can be immediately used to interact with S3 - * @throws S3ServiceException - */ - @Requires({"awsAccessKey != null", "awsSecretKey != null"}) - @Ensures("result != null") - protected static S3Service initializeAWSService(final String awsAccessKey, final String awsSecretKey) throws S3ServiceException { - // To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP - // implementation based on HttpClient, as this is the most robust implementation provided with JetS3t. - final AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey); - return new RestS3Service(awsCredentials); - } - - /** - * A runnable that pushes this GATKReport up to s3. - * - * Should be run in a separate thread so we can time it out if something is taking too long - */ - private class S3PutRunnable implements Runnable { - /** Was the upload operation successful? */ - public final AtomicBoolean isSuccess; - /** The name of this report */ - private final String filename; - /** The contents of this report */ - private final byte[] contents; - - /** The s3Object that we created to upload, or null if it failed */ - public S3Object s3Object = null; - - @Requires({"filename != null", "contents != null"}) - public S3PutRunnable(final String filename, final byte[] contents){ - this.isSuccess = new AtomicBoolean(); - this.filename = filename; - this.contents = contents; - } - - public void run() { - try { - switch ( awsMode ) { - case FAIL_WITH_EXCEPTION: - throw new IllegalStateException("We are throwing an exception for testing purposes"); - case TIMEOUT: - try { - Thread.sleep(s3PutTimeOutInMilliseconds * 100); - } catch ( InterruptedException e ) { - // supposed to be empty - } - break; - case NORMAL: - // IAM GATK user credentials -- only right is to PutObject into broad.gsa.gatk.run.reports bucket - final S3Service s3Service = initializeAWSService(getAWSUploadAccessKey(), getAWSUploadSecretKey()); - - // Create an S3Object based on a file, with Content-Length set automatically and - // Content-Type set based on the file's extension (using the Mimetypes utility class) - final S3Object fileObject = new S3Object(filename, contents); - //logger.info("Created S3Object" + fileObject); - //logger.info("Uploading " + localFile + " to AWS bucket"); - s3Object = s3Service.putObject(getS3ReportBucket(), fileObject); - isSuccess.set(true); - break; - default: - throw new IllegalStateException("Unexpected AWS exception"); - } - } catch ( S3ServiceException e ) { - exceptDuringRunReport("S3 exception occurred", e); - } catch ( NoSuchAlgorithmException e ) { - exceptDuringRunReport("Couldn't calculate MD5", e); - } catch ( IOException e ) { - exceptDuringRunReport("Couldn't read report file", e); - } catch ( Exception e ) { - exceptDuringRunReport("An unexpected exception occurred during posting", e); - } - } - } - - /** - * Post this GATK report to the AWS s3 GATK_Run_Report log - * - * @return the s3Object pointing to our pushed report, or null if we failed to push - */ - protected S3Object postReportToAWSS3() { - // modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html - this.hostName = Utils.resolveHostname(); // we want to fill in the host name - final String key = getReportFileName(); - logger.debug("Generating GATK report to AWS S3 with key " + key); - - try { - // create an byte output stream so we can capture the output as a byte[] - final ByteArrayOutputStream byteStream = new ByteArrayOutputStream(8096); - final OutputStream outputStream = new GZIPOutputStream(byteStream); - postReportToStream(outputStream); - outputStream.close(); - final byte[] report = byteStream.toByteArray(); - - // stop us from printing the annoying, and meaningless, mime types warning - final Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class); - mimeTypeLogger.setLevel(Level.FATAL); - - // Set the S3 upload on its own thread with timeout: - final S3PutRunnable s3run = new S3PutRunnable(key,report); - final Thread s3thread = new Thread(s3run); - s3thread.setDaemon(true); - s3thread.setName("S3Put-Thread"); - s3thread.start(); - - s3thread.join(s3PutTimeOutInMilliseconds); - - if(s3thread.isAlive()){ - s3thread.interrupt(); - exceptDuringRunReport("Run statistics report upload to AWS S3 timed-out"); - } else if(s3run.isSuccess.get()) { - logger.info("Uploaded run statistics report to AWS S3"); - logger.debug("Uploaded to AWS: " + s3run.s3Object); - return s3run.s3Object; - } else { - // an exception occurred, the thread should have already invoked the exceptDuringRunReport function - } - } catch ( IOException e ) { - exceptDuringRunReport("Couldn't read report file", e); - } catch ( InterruptedException e) { - exceptDuringRunReport("Run statistics report upload interrupted", e); - } - - return null; - } - - // --------------------------------------------------------------------------- - // - // Error handling code - // - // --------------------------------------------------------------------------- - - /** - * Note that an exception occurred during creating or writing this report - * @param msg the message to print - * @param e the exception that occurred - */ - @Ensures("exceptionOccurredDuringPost()") - private void exceptDuringRunReport(final String msg, final Throwable e) { - this.errorMessage = msg; - this.errorThrown = e; - logger.debug("A problem occurred during GATK run reporting [*** everything is fine, but no report could be generated; please do not post this to the support forum ***]. Message is: " + msg + ". Error message is: " + e.getMessage()); - } - - /** - * Note that an exception occurred during creating or writing this report - * @param msg the message to print - */ - @Ensures("exceptionOccurredDuringPost()") - private void exceptDuringRunReport(final String msg) { - this.errorMessage = msg; - logger.debug("A problem occurred during GATK run reporting [*** everything is fine, but no report could be generated; please do not post this to the support forum ***]. Message is " + msg); - } - - /** - * Did an error occur during the posting of this run report? - * @return true if so, false if not - */ - public boolean exceptionOccurredDuringPost() { - return getErrorMessage() != null; - } - - /** - * If an error occurred during posting of this report, retrieve the message of the error that occurred, or null if - * no error occurred - * @return a string describing the error that occurred, or null if none did - */ - public String getErrorMessage() { - return errorMessage; - } - - /** - * Get the throwable that caused the exception during posting of this message, or null if none was available - * - * Note that getting a null valuable from this function doesn't not imply that no error occurred. Some - * errors that occurred many not have generated a throwable. - * - * @return the Throwable that caused the error, or null if no error occurred or was not caused by a throwable - */ - public Throwable getErrorThrown() { - return errorThrown; - } - - /** - * Helper method to format the exception that occurred during posting, or a string saying none occurred - * @return a non-null string - */ - @Ensures("result != null") - protected String formatError() { - return exceptionOccurredDuringPost() - ? String.format("Exception message=%s with cause=%s", getErrorMessage(), getErrorThrown()) - : "No exception occurred"; - } - - // --------------------------------------------------------------------------- - // - // Equals and hashcode -- purely for comparing reports for testing - // - // --------------------------------------------------------------------------- - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - GATKRunReport that = (GATKRunReport) o; - - if (maxMemory != that.maxMemory) return false; - if (nIterations != that.nIterations) return false; - if (numThreads != that.numThreads) return false; - if (runTime != that.runTime) return false; - if (totalMemory != that.totalMemory) return false; - if (endTime != null ? !endTime.equals(that.endTime) : that.endTime != null) return false; - if (hostName != null ? !hostName.equals(that.hostName) : that.hostName != null) return false; - if (id != null ? !id.equals(that.id) : that.id != null) return false; - if (javaVersion != null ? !javaVersion.equals(that.javaVersion) : that.javaVersion != null) return false; - if (mException != null ? !mException.equals(that.mException) : that.mException != null) return false; - if (machine != null ? !machine.equals(that.machine) : that.machine != null) return false; - if (percentTimeBlocking != null ? !percentTimeBlocking.equals(that.percentTimeBlocking) : that.percentTimeBlocking != null) - return false; - if (percentTimeRunning != null ? !percentTimeRunning.equals(that.percentTimeRunning) : that.percentTimeRunning != null) - return false; - if (percentTimeWaiting != null ? !percentTimeWaiting.equals(that.percentTimeWaiting) : that.percentTimeWaiting != null) - return false; - if (percentTimeWaitingForIO != null ? !percentTimeWaitingForIO.equals(that.percentTimeWaitingForIO) : that.percentTimeWaitingForIO != null) - return false; - if (startTime != null ? !startTime.equals(that.startTime) : that.startTime != null) return false; - if (svnVersion != null ? !svnVersion.equals(that.svnVersion) : that.svnVersion != null) return false; - if (tag != null ? !tag.equals(that.tag) : that.tag != null) return false; - if (userName != null ? !userName.equals(that.userName) : that.userName != null) return false; - if (walkerName != null ? !walkerName.equals(that.walkerName) : that.walkerName != null) return false; - - return true; - } - - @Override - public int hashCode() { - int result = id != null ? id.hashCode() : 0; - result = 31 * result + (mException != null ? mException.hashCode() : 0); - result = 31 * result + (startTime != null ? startTime.hashCode() : 0); - result = 31 * result + (endTime != null ? endTime.hashCode() : 0); - result = 31 * result + (int) (runTime ^ (runTime >>> 32)); - result = 31 * result + (walkerName != null ? walkerName.hashCode() : 0); - result = 31 * result + (svnVersion != null ? svnVersion.hashCode() : 0); - result = 31 * result + (int) (totalMemory ^ (totalMemory >>> 32)); - result = 31 * result + (int) (maxMemory ^ (maxMemory >>> 32)); - result = 31 * result + (userName != null ? userName.hashCode() : 0); - result = 31 * result + (hostName != null ? hostName.hashCode() : 0); - result = 31 * result + (javaVersion != null ? javaVersion.hashCode() : 0); - result = 31 * result + (machine != null ? machine.hashCode() : 0); - result = 31 * result + (int) (nIterations ^ (nIterations >>> 32)); - result = 31 * result + (tag != null ? tag.hashCode() : 0); - result = 31 * result + numThreads; - result = 31 * result + (percentTimeRunning != null ? percentTimeRunning.hashCode() : 0); - result = 31 * result + (percentTimeWaiting != null ? percentTimeWaiting.hashCode() : 0); - result = 31 * result + (percentTimeBlocking != null ? percentTimeBlocking.hashCode() : 0); - result = 31 * result + (percentTimeWaitingForIO != null ? percentTimeWaitingForIO.hashCode() : 0); - return result; - } - - // --------------------------------------------------------------------------- - // - // Code specifically for testing the GATKRunReport - // - // --------------------------------------------------------------------------- - - /** - * Enum specifying how the S3 uploader should behave. Must be normal by default. Purely for testing purposes - */ - protected enum AWSMode { - NORMAL, // write normally to AWS - FAIL_WITH_EXCEPTION, // artificially fail during writing - TIMEOUT // sleep, so we time out - } - /** Our AWS mode */ - private AWSMode awsMode = AWSMode.NORMAL; - /** The bucket were we send the GATK report on AWS/s3 */ - private String s3ReportBucket = REPORT_BUCKET_NAME; - /** Did we send the report to AWS? */ - private boolean wentToAWS = false; - - /** - * Send the report to the AWS test bucket -- for testing only - */ - protected void sendAWSToTestBucket() { - s3ReportBucket = TEST_REPORT_BUCKET_NAME; - } - - /** - * Has the report been written to AWS? - * - * Does not imply anything about the success of the send, just that it was attempted - * - * @return true if the report has been sent to AWS, false otherwise - */ - protected boolean wentToAWS() { - return wentToAWS; - } - - /** - * Purely for testing purposes. Tells the AWS uploader whether to actually upload or simulate errors - * @param mode what we want to do - */ - @Requires("mode != null") - protected void setAwsMode(final AWSMode mode) { - this.awsMode = mode; - } -} diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/crypt/GATKKeyIntegrationTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/crypt/GATKKeyIntegrationTest.java deleted file mode 100644 index fcf3afa6a..000000000 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/crypt/GATKKeyIntegrationTest.java +++ /dev/null @@ -1,157 +0,0 @@ -/* -* Copyright 2012-2016 Broad Institute, Inc. -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.gatk.engine.crypt; - -import org.broadinstitute.gatk.engine.walkers.WalkerTest; -import org.broadinstitute.gatk.engine.phonehome.GATKRunReport; -import org.broadinstitute.gatk.utils.exceptions.UserException; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.Arrays; - -public class GATKKeyIntegrationTest extends WalkerTest { - - public static final String BASE_COMMAND = String.format("-T TestPrintReadsWalker -R %s -I %s -o %%s", - publicTestDir + "exampleFASTA.fasta", - publicTestDir + "exampleBAM.bam"); - public static final String MD5_UPON_SUCCESSFUL_RUN = "462656ec9632f8c21ee534d35093c3f8"; - - - private void runGATKKeyTest ( String testName, String etArg, String keyArg, Class expectedException, String md5 ) { - String command = BASE_COMMAND + String.format(" %s %s", etArg, keyArg); - - WalkerTestSpec spec = expectedException != null ? - new WalkerTestSpec(command, 1, expectedException) : - new WalkerTestSpec(command, 1, Arrays.asList(md5)); - - spec.disableImplicitArgs(); // Turn off automatic inclusion of -et/-K args by WalkerTest - executeTest(testName, spec); - } - - @Test - public void testValidKeyNoET() { - runGATKKeyTest("testValidKeyNoET", - "-et " + GATKRunReport.PhoneHomeOption.NO_ET, - "-K " + keysDataLocation + "valid.key", - null, - MD5_UPON_SUCCESSFUL_RUN); - } - - @Test - public void testValidKeyETStdout() { - runGATKKeyTest("testValidKeyETStdout", - "-et " + GATKRunReport.PhoneHomeOption.STDOUT, - "-K " + keysDataLocation + "valid.key", - null, - MD5_UPON_SUCCESSFUL_RUN); - } - - @Test - public void testValidKeyETStandard() { - runGATKKeyTest("testValidKeyETStandard", - "", - "-K " + keysDataLocation + "valid.key", - null, - MD5_UPON_SUCCESSFUL_RUN); - } - - @Test - public void testNoKeyNoET() { - runGATKKeyTest("testNoKeyNoET", - "-et " + GATKRunReport.PhoneHomeOption.NO_ET, - "", - UserException.class, - null); - } - - @Test - public void testNoKeyETStdout() { - runGATKKeyTest("testNoKeyETStdout", - "-et " + GATKRunReport.PhoneHomeOption.STDOUT, - "", - UserException.class, - null); - } - - @Test - public void testNoKeyETStandard() { - runGATKKeyTest("testNoKeyETStandard", - "", - "", - null, - MD5_UPON_SUCCESSFUL_RUN); - } - - @Test - public void testRevokedKey() { - runGATKKeyTest("testRevokedKey", - "-et " + GATKRunReport.PhoneHomeOption.NO_ET, - "-K " + keysDataLocation + "revoked.key", - UserException.KeySignatureVerificationException.class, - null); - } - - @DataProvider(name = "CorruptKeyTestData") - public Object[][] corruptKeyDataProvider() { - return new Object[][] { - { "corrupt_empty.key", UserException.UnreadableKeyException.class }, - { "corrupt_single_byte_file.key", UserException.UnreadableKeyException.class }, - { "corrupt_random_contents.key", UserException.UnreadableKeyException.class }, - { "corrupt_single_byte_deletion.key", UserException.UnreadableKeyException.class }, - { "corrupt_single_byte_insertion.key", UserException.UnreadableKeyException.class }, - { "corrupt_single_byte_change.key", UserException.UnreadableKeyException.class }, - { "corrupt_multi_byte_deletion.key", UserException.UnreadableKeyException.class }, - { "corrupt_multi_byte_insertion.key", UserException.UnreadableKeyException.class }, - { "corrupt_multi_byte_change.key", UserException.UnreadableKeyException.class }, - { "corrupt_bad_isize_field.key", UserException.UnreadableKeyException.class }, - { "corrupt_bad_crc.key", UserException.UnreadableKeyException.class }, - { "corrupt_no_email_address.key", UserException.UnreadableKeyException.class }, - { "corrupt_no_sectional_delimiter.key", UserException.UnreadableKeyException.class }, - { "corrupt_no_signature.key", UserException.UnreadableKeyException.class }, - { "corrupt_bad_signature.key", UserException.KeySignatureVerificationException.class }, - { "corrupt_non_gzipped_valid_key.key", UserException.UnreadableKeyException.class } - }; - } - - @Test(dataProvider = "CorruptKeyTestData") - public void testCorruptKey ( String corruptKeyName, Class expectedException ) { - runGATKKeyTest(String.format("testCorruptKey (%s)", corruptKeyName), - "-et " + GATKRunReport.PhoneHomeOption.NO_ET, - "-K " + keysDataLocation + corruptKeyName, - expectedException, - null); - } - - @Test - public void testCorruptButNonRequiredKey() { - runGATKKeyTest("testCorruptButNonRequiredKey", - "", - "-K " + keysDataLocation + "corrupt_random_contents.key", - null, - MD5_UPON_SUCCESSFUL_RUN); - } -} diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/phonehome/GATKRunReportUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/phonehome/GATKRunReportUnitTest.java deleted file mode 100644 index 7a151bbec..000000000 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/phonehome/GATKRunReportUnitTest.java +++ /dev/null @@ -1,358 +0,0 @@ -/* -* Copyright 2012-2016 Broad Institute, Inc. -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.gatk.engine.phonehome; - -import org.broadinstitute.gatk.engine.walkers.*; -import org.broadinstitute.gatk.utils.BaseTest; -import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; -import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection; -import org.broadinstitute.gatk.utils.contexts.AlignmentContext; -import org.broadinstitute.gatk.utils.contexts.ReferenceContext; -import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; -import org.broadinstitute.gatk.utils.Utils; -import org.broadinstitute.gatk.utils.activeregion.ActiveRegion; -import org.broadinstitute.gatk.utils.activeregion.ActivityProfileState; -import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; -import org.broadinstitute.gatk.utils.exceptions.UserException; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; -import org.jets3t.service.S3Service; -import org.jets3t.service.S3ServiceException; -import org.jets3t.service.ServiceException; -import org.jets3t.service.model.S3Object; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.FileInputStream; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Properties; - -public class GATKRunReportUnitTest extends BaseTest { - private final static boolean DEBUG = false; - private static final long S3_PUT_TIMEOUT_IN_MILLISECONDS_FOR_TESTING = 30 * 1000; - private static final String AWS_DOWNLOADER_CREDENTIALS_PROPERTIES_FILE = privateTestDir + "phonehome/awsDownloaderCredentials.properties"; - - private Walker walker; - private Exception exception; - private GenomeAnalysisEngine engine; - private String downloaderAccessKey; - private String downloaderSecretKey; - - @BeforeClass - public void setup() throws Exception { - walker = new RunReportDummyReadWalker(); - exception = new IllegalArgumentException("javaException"); - engine = new GenomeAnalysisEngine(); - engine.setArguments(new GATKArgumentCollection()); - - Properties awsProperties = new Properties(); - awsProperties.load(new FileInputStream(AWS_DOWNLOADER_CREDENTIALS_PROPERTIES_FILE)); - downloaderAccessKey = awsProperties.getProperty("accessKey"); - downloaderSecretKey = awsProperties.getProperty("secretKey"); - } - - @Test(enabled = ! DEBUG) - public void testAWSKeysAreValid() { - // throws an exception if they aren't - GATKRunReport.checkAWSAreValid(); - } - - @Test(enabled = ! DEBUG) - public void testAccessKey() throws Exception { - testAWSKey(GATKRunReport.getAWSUploadAccessKey(), GATKRunReport.AWS_ACCESS_KEY_MD5); - } - - @Test(enabled = ! DEBUG) - public void testSecretKey() throws Exception { - testAWSKey(GATKRunReport.getAWSUploadSecretKey(), GATKRunReport.AWS_SECRET_KEY_MD5); - } - - private void testAWSKey(final String accessKey, final String expectedMD5) throws Exception { - Assert.assertNotNull(accessKey, "AccessKey should not be null"); - final String actualmd5 = Utils.calcMD5(accessKey); - Assert.assertEquals(actualmd5, expectedMD5); - } - - @DataProvider(name = "GATKReportCreationTest") - public Object[][] makeGATKReportCreationTest() { - List tests = new ArrayList(); - - final Walker readWalker = new RunReportDummyReadWalker(); - final Walker lociWalker = new RunReportDummyLocusWalker(); - final Walker rodWalker = new RunReportDummyRodWalker(); - final Walker artWalker = new RunReportDummyActiveRegionWalker(); - - final Exception noException = null; - final Exception javaException = new IllegalArgumentException("javaException"); - final Exception stingException = new ReviewedGATKException("GATKException"); - final Exception userException = new UserException("userException"); - - final GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); - engine.setArguments(new GATKArgumentCollection()); - - for ( final Walker walker : Arrays.asList(readWalker, lociWalker, rodWalker, artWalker) ) { - for ( final Exception exception : Arrays.asList(noException, javaException, stingException, userException) ) { - tests.add(new Object[]{walker, exception, engine}); - } - } - - return tests.toArray(new Object[][]{}); - } - - @Test(enabled = !DEBUG, dataProvider = "GATKReportCreationTest") - public void testGATKReportCreationReadingAndWriting(final Walker walker, final Exception exception, final GenomeAnalysisEngine engine) throws Exception { - final GATKRunReport report = new GATKRunReport(walker, exception, engine, GATKRunReport.PhoneHomeOption.STDOUT); - final ByteArrayOutputStream captureStream = new ByteArrayOutputStream(); - final boolean succeeded = report.postReportToStream(captureStream); - Assert.assertTrue(succeeded, "Failed to write report to stream"); - Assert.assertFalse(report.exceptionOccurredDuringPost(), "Post succeeded but report says it failed"); - Assert.assertNull(report.getErrorMessage(), "Post succeeded but there was an error message"); - Assert.assertNull(report.getErrorThrown(), "Post succeeded but there was an error message"); - final InputStream readStream = new ByteArrayInputStream(captureStream.toByteArray()); - - GATKRunReport deserialized = null; - try { - deserialized = GATKRunReport.deserializeReport(readStream); - } catch ( Exception e ) { - final String reportString = new String(captureStream.toByteArray()); - Assert.fail("Failed to deserialize GATK report " + reportString + " with exception " + e); - } - - if ( deserialized != null ) - Assert.assertEquals(report, deserialized); - } - - @DataProvider(name = "GATKAWSReportMode") - public Object[][] makeGATKAWSReportMode() { - List tests = new ArrayList(); - - for ( final GATKRunReport.AWSMode mode : GATKRunReport.AWSMode.values() ) { - tests.add(new Object[]{mode}); - } - - return tests.toArray(new Object[][]{}); - } - - // Will fail with timeout if AWS time out isn't working - // Will fail with exception if AWS doesn't protect itself from errors - @Test(enabled = ! DEBUG, dataProvider = "GATKAWSReportMode", timeOut = S3_PUT_TIMEOUT_IN_MILLISECONDS_FOR_TESTING * 2) - public void testAWS(final GATKRunReport.AWSMode awsMode) { - logger.warn("Starting testAWS mode=" + awsMode); - - // Use a shorter timeout than usual when we're testing GATKRunReport.AWSMode.TIMEOUT - final long thisTestS3Timeout = awsMode == GATKRunReport.AWSMode.TIMEOUT ? 30 * 1000 : S3_PUT_TIMEOUT_IN_MILLISECONDS_FOR_TESTING; - final GATKRunReport report = new GATKRunReport(walker, exception, engine, GATKRunReport.PhoneHomeOption.AWS, thisTestS3Timeout); - report.sendAWSToTestBucket(); - report.setAwsMode(awsMode); - final S3Object s3Object = report.postReportToAWSS3(); - - if ( awsMode == GATKRunReport.AWSMode.NORMAL ) { - Assert.assertNotNull(s3Object, "Upload to AWS failed, s3Object was null. error was " + report.formatError()); - Assert.assertFalse(report.exceptionOccurredDuringPost(), "The upload should have succeeded but the report says it didn't. Error was " + report.formatError()); - Assert.assertNull(report.getErrorMessage(), "Report succeeded but an error message was found"); - Assert.assertNull(report.getErrorThrown(), "Report succeeded but an thrown error was found"); - try { - final GATKRunReport deserialized = GATKRunReport.deserializeReport(downloaderAccessKey, downloaderSecretKey, report.getS3ReportBucket(), s3Object); - Assert.assertEquals(report, deserialized); - deleteFromS3(report); - } catch ( Exception e ) { - Assert.fail("Failed to read, deserialize, or delete GATK report " + s3Object.getName() + " with exception " + e); - } - } else { - Assert.assertNull(s3Object, "AWS upload should have failed for mode " + awsMode + " but got non-null s3 object back " + s3Object + " error was " + report.formatError()); - Assert.assertTrue(report.exceptionOccurredDuringPost(), "S3 object was null but the report says that the upload succeeded"); - Assert.assertNotNull(report.getErrorMessage(), "Report succeeded but an error message wasn't found"); - if ( awsMode == GATKRunReport.AWSMode.FAIL_WITH_EXCEPTION ) - Assert.assertNotNull(report.getErrorThrown()); - } - } - - private void deleteFromS3(final GATKRunReport report) throws Exception { - final S3Service s3Service = GATKRunReport.initializeAWSService(downloaderAccessKey, downloaderSecretKey); - // Retrieve the whole data object we created previously - s3Service.deleteObject(report.getS3ReportBucket(), report.getReportFileName()); - } - - @DataProvider(name = "PostReportByType") - public Object[][] makePostReportByType() { - List tests = new ArrayList(); - - for ( final GATKRunReport.PhoneHomeOption et : GATKRunReport.PhoneHomeOption.values() ) { - tests.add(new Object[]{et}); - } - - return tests.toArray(new Object[][]{}); - } - - @Test(enabled = ! DEBUG, dataProvider = "PostReportByType", timeOut = S3_PUT_TIMEOUT_IN_MILLISECONDS_FOR_TESTING * 2) - public void testPostReportByType(final GATKRunReport.PhoneHomeOption type) { - final GATKRunReport report = new GATKRunReport(walker, exception, engine, GATKRunReport.PhoneHomeOption.AWS, S3_PUT_TIMEOUT_IN_MILLISECONDS_FOR_TESTING); - Assert.assertFalse(report.exceptionOccurredDuringPost(), "An exception occurred during posting the report"); - final boolean succeeded = report.postReport(type); - - if ( type == GATKRunReport.PhoneHomeOption.NO_ET ) - Assert.assertFalse(succeeded, "NO_ET option shouldn't write a report"); - else { - Assert.assertTrue(succeeded, "Any non NO_ET option should succeed in writing a report"); - - if ( type == GATKRunReport.PhoneHomeOption.STDOUT ) { - // nothing to do - } else { - // must have gone to AWS - try { - Assert.assertTrue(report.wentToAWS(), "The report should have gone to AWS but the report says it wasn't"); - deleteFromS3(report); - } catch ( Exception e ) { - Assert.fail("Failed delete GATK report " + report.getReportFileName() + " with exception " + e); - } - } - } - } - - public interface S3Op { - public void apply() throws ServiceException; - } - - // Will fail with timeout if AWS time out isn't working - // Will fail with exception if AWS doesn't protect itself from errors - @Test(timeOut = S3_PUT_TIMEOUT_IN_MILLISECONDS_FOR_TESTING * 2) - public void testAWSPublicKeyHasAccessControls() throws Exception { - final GATKRunReport report = new GATKRunReport(walker, exception, engine, GATKRunReport.PhoneHomeOption.AWS, S3_PUT_TIMEOUT_IN_MILLISECONDS_FOR_TESTING); - report.sendAWSToTestBucket(); - final S3Object s3Object = report.postReportToAWSS3(); - Assert.assertNotNull(s3Object, "Upload to AWS failed, s3Object was null. error was " + report.formatError()); - - // create a service with the public key, and make sure it cannot list or delete - final S3Service s3Service = GATKRunReport.initializeAWSService(GATKRunReport.getAWSUploadAccessKey(), GATKRunReport.getAWSUploadSecretKey()); - assertOperationNotAllowed("listAllBuckets", new S3Op() { - @Override - public void apply() throws S3ServiceException { - s3Service.listAllBuckets(); - } - }); - assertOperationNotAllowed("listBucket", new S3Op() { - @Override - public void apply() throws S3ServiceException { s3Service.listObjects(report.getS3ReportBucket()); } - }); - assertOperationNotAllowed("createBucket", new S3Op() { - @Override - public void apply() throws S3ServiceException { s3Service.createBucket("ShouldNotCreate"); } - }); - assertOperationNotAllowed("deleteObject", new S3Op() { - @Override - public void apply() throws ServiceException { s3Service.deleteObject(report.getS3ReportBucket(), report.getReportFileName()); } - }); - } - - private void assertOperationNotAllowed(final String name, final S3Op op) { - try { - op.apply(); - // only gets here if the operation was successful - Assert.fail("Operation " + name + " ran successfully but we expected to it fail"); - } catch ( ServiceException e ) { - Assert.assertEquals(e.getErrorCode(), "AccessDenied"); - } - } - - class RunReportDummyReadWalker extends ReadWalker { - @Override - public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) { - return 0; - } - - @Override - public Integer reduceInit() { - return 0; - } - - @Override - public Integer reduce(Integer value, Integer sum) { - return 0; - } - } - - class RunReportDummyLocusWalker extends LocusWalker { - @Override - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - return 0; - } - - @Override - public Integer reduceInit() { - return 0; - } - - @Override - public Integer reduce(Integer value, Integer sum) { - return 0; - } - } - - class RunReportDummyRodWalker extends RodWalker { - @Override - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - return 0; - } - - @Override - public Integer reduceInit() { - return 0; - } - - @Override - public Integer reduce(Integer value, Integer sum) { - return 0; - } - } - - class RunReportDummyActiveRegionWalker extends ActiveRegionWalker { - @Override - public ActivityProfileState isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - return new ActivityProfileState(ref.getLocus(), 0.0); - } - - @Override - public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) { - return 0; - } - - @Override - public Integer reduceInit() { - return 0; - } - - @Override - public Integer reduce(Integer value, Integer sum) { - return 0; - } - } -} diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/walkers/WalkerTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/walkers/WalkerTest.java index b56618ce2..aad15de7c 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/walkers/WalkerTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/walkers/WalkerTest.java @@ -34,7 +34,6 @@ import org.apache.commons.lang.StringUtils; import org.broadinstitute.gatk.engine.CommandLineExecutable; import org.broadinstitute.gatk.engine.CommandLineGATK; import org.broadinstitute.gatk.engine.crypt.CryptUtils; -import org.broadinstitute.gatk.engine.phonehome.GATKRunReport; import org.broadinstitute.gatk.utils.BaseTest; import org.broadinstitute.gatk.utils.MD5DB; import org.broadinstitute.gatk.utils.MD5Mismatch; @@ -225,9 +224,6 @@ public class WalkerTest extends BaseTest { public String getArgsWithImplicitArgs() { String args = this.args; if ( includeImplicitArgs ) { - args = args + (ENABLE_PHONE_HOME_FOR_TESTS ? - String.format(" -et %s ", GATKRunReport.PhoneHomeOption.AWS) : - String.format(" -et %s -K %s ", GATKRunReport.PhoneHomeOption.NO_ET, gatkKeyFile)); if ( includeShadowBCF && GENERATE_SHADOW_BCF ) args = args + " --generateShadowBCF "; if ( ! ENABLE_AUTO_INDEX_CREATION_AND_LOCKING_FOR_TESTS ) From d87345cd1da0f7adc583b4d7024f32ae33c02a4b Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Fri, 27 May 2016 19:47:32 -0400 Subject: [PATCH 79/82] GATKDocs overhaul - Fixed displaying of default values - Removed code cruft - Reorganized tooldoc categories and improved names - Reorganized tools within categories where applicable - Touched up various tool docs - Switched default gatkdocs output to html - Added parameter in agrregator pom to control output type - Set gatkdocs publishing script to output php - Deprecated GenotypeAndValidate walker - Added back PhoneHome arguments with @Deprecated annotations --- pom.xml | 3 +- .../tools/walkers/annotator/ExcessHet.java | 2 +- .../tools/walkers/bqsr/AnalyzeCovariates.java | 4 +- .../tools/walkers/bqsr/BaseRecalibrator.java | 2 +- .../walkers/cancer/AssignSomaticStatus.java | 4 + .../AnnotatePopulationAFWalker.java | 4 + .../walkers/cancer/contamination/ContEst.java | 2 +- ...AssemblyBasedCallerArgumentCollection.java | 59 +- .../walkers/phasing/PhaseByTransmission.java | 2 +- .../walkers/phasing/ReadBackedPhasing.java | 14 +- .../SimulateReadsForVariants.java | 2 +- .../validation/GenotypeAndValidate.java | 551 ------------------ .../ValidationSiteSelector.java | 7 +- .../ApplyRecalibration.java | 21 +- .../VariantDataManager.java | 2 +- ...VariantRecalibratorArgumentCollection.java | 3 +- .../CalculateGenotypePosteriors.java | 2 +- .../variantutils/RegenotypeVariants.java | 2 +- .../gatk/engine/CommandLineGATK.java | 2 +- .../arguments/GATKArgumentCollection.java | 126 ++-- .../filters/ReassignMappingQualityFilter.java | 2 +- .../ReassignOneMappingQualityFilter.java | 2 +- .../annotator/FractionInformativeReads.java | 4 +- .../walkers/coverage/DepthOfCoverage.java | 2 +- .../walkers/examples/GATKPaperGenotyper.java | 8 +- .../walkers/filters/VariantFiltration.java | 2 +- .../tools/walkers/indels/IndelRealigner.java | 2 +- .../gatk/tools/walkers/qc/QCRef.java | 2 +- .../tools/walkers/rnaseq/ASEReadCounter.java | 17 +- .../walkers/varianteval/VariantEval.java | 2 +- .../variantutils/GenotypeConcordance.java | 2 +- .../variantutils/ValidateVariants.java | 2 +- .../gatk/utils/DeprecatedToolChecks.java | 1 + .../codecs/sampileup/SAMPileupCodec.java | 2 +- .../utils/codecs/samread/SAMReadCodec.java | 2 +- .../utils/help/DocumentedGATKFeature.java | 2 - .../help/DocumentedGATKFeatureObject.java | 10 +- .../gatk/utils/help/GATKDocUtils.java | 12 +- .../gatk/utils/help/GATKDoclet.java | 20 +- .../help/GenericDocumentationHandler.java | 21 +- .../gatk/utils/help/HelpConstants.java | 43 +- .../gatk/utils/help/HelpFormatter.java | 1 + settings/helpTemplates/common.html | 35 +- .../helpTemplates/generic.index.template.html | 31 +- settings/helpTemplates/generic.template.html | 55 +- 45 files changed, 220 insertions(+), 876 deletions(-) delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java diff --git a/pom.xml b/pom.xml index 728c772db..3da58fc29 100644 --- a/pom.xml +++ b/pom.xml @@ -32,6 +32,7 @@ false -build-timestamp "${maven.build.timestamp}" + html ${gatk.basedir}/public/src/main/scripts/shell ${gatk.basedir}/public/src/main/assembly @@ -768,7 +769,7 @@ false true private - -build-timestamp "${maven.build.timestamp}" -absolute-version ${build.version} ${gatkdocs.include.hidden} -settings-dir ${gatk.basedir}/settings/helpTemplates -destination-dir ${project.build.directory}/gatkdocs + -build-timestamp "${maven.build.timestamp}" -absolute-version ${build.version} ${gatkdocs.include.hidden} -settings-dir ${gatk.basedir}/settings/helpTemplates -destination-dir ${project.build.directory}/gatkdocs -output-file-extension ${gatkdocs.extension} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/ExcessHet.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/ExcessHet.java index 1f83f3ede..063e63560 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/ExcessHet.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/ExcessHet.java @@ -79,7 +79,7 @@ import java.util.*; /** * Phred-scaled p-value for exact test of excess heterozygosity * - * This annotation estimates excess heterozygosity in a population of samples. It is related to but distinct from InbreedingCoeff, which estimates evidence for inbreeding in a population. ExcessHet scales more reliably to large cohort sizes. + *

        This annotation estimates excess heterozygosity in a population of samples. It is related to but distinct from InbreedingCoeff, which estimates evidence for inbreeding in a population. ExcessHet scales more reliably to large cohort sizes.

        * *

        Statistical notes

        *

        This annotation is a one-sided phred-scaled p-value using an exact test of the Hardy-Weinberg Equilibrium. The null hypothesis is that the number of heterozygotes follows the Hardy-Weinberg Equilibrium. The p-value is the probability of getting the same or more heterozygotes as was observed, given the null hypothesis.

        diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/bqsr/AnalyzeCovariates.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/bqsr/AnalyzeCovariates.java index c4df9578a..45eb9e052 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/bqsr/AnalyzeCovariates.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/bqsr/AnalyzeCovariates.java @@ -78,8 +78,8 @@ import java.util.Map; /** * Create plots to visualize base recalibration results * - *

        - * This tool generates plots for visualizing the quality of a recalibration run (effected by BaseRecalibrator). + *

        + * This tool generates plots for visualizing the quality of a recalibration run (effected by BaseRecalibrator). *

        * *

        Input

        diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/bqsr/BaseRecalibrator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/bqsr/BaseRecalibrator.java index 0c807a161..4205ed2d2 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/bqsr/BaseRecalibrator.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/bqsr/BaseRecalibrator.java @@ -86,7 +86,7 @@ import java.util.Arrays; import java.util.List; /** - * Generate base recalibration table to compensate for systematic errors in basecalling confidences + * Detect systematic errors in base quality scores * *

        * Variant calling algorithms rely heavily on the quality scores assigned to the individual base calls in each sequence diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/AssignSomaticStatus.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/AssignSomaticStatus.java index 8068e6c3a..3714ba5b4 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/AssignSomaticStatus.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/AssignSomaticStatus.java @@ -54,6 +54,7 @@ package org.broadinstitute.gatk.tools.walkers.cancer; import org.broadinstitute.gatk.utils.commandline.Argument; import org.broadinstitute.gatk.utils.commandline.ArgumentCollection; import org.broadinstitute.gatk.utils.commandline.Output; +import org.broadinstitute.gatk.engine.CommandLineGATK; import org.broadinstitute.gatk.engine.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.gatk.utils.contexts.AlignmentContext; import org.broadinstitute.gatk.utils.contexts.ReferenceContext; @@ -66,6 +67,8 @@ import org.broadinstitute.gatk.engine.SampleUtils; import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.engine.GATKVCFUtils; import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; +import org.broadinstitute.gatk.utils.help.HelpConstants; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; @@ -78,6 +81,7 @@ import java.util.*; /** * Assigns somatic status to a set of calls */ +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class}, enable = false ) public class AssignSomaticStatus extends RodWalker implements TreeReducible { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/contamination/AnnotatePopulationAFWalker.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/contamination/AnnotatePopulationAFWalker.java index 2674a72ec..604cba9d3 100755 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/contamination/AnnotatePopulationAFWalker.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/contamination/AnnotatePopulationAFWalker.java @@ -52,6 +52,9 @@ package org.broadinstitute.gatk.tools.walkers.cancer.contamination; +import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; +import org.broadinstitute.gatk.utils.help.HelpConstants; +import org.broadinstitute.gatk.engine.CommandLineGATK; import org.broadinstitute.gatk.utils.commandline.Argument; import org.broadinstitute.gatk.utils.commandline.Input; import org.broadinstitute.gatk.utils.commandline.Output; @@ -78,6 +81,7 @@ import java.util.*; /** * Given a input VCF representing a collection of populations, split the input into each population, and annotate each record with population allele frequencies */ +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class}, enable = false ) // @Requires(DataSource.SAMPLE) <- require the sample data when this works public class AnnotatePopulationAFWalker extends RodWalker implements TreeReducible { // control the output diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/contamination/ContEst.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/contamination/ContEst.java index b9160d71f..a0639181f 100755 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/contamination/ContEst.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/contamination/ContEst.java @@ -80,7 +80,7 @@ import java.util.*; /** * Estimate cross-sample contamination * - * This tool determine the percent contamination of an input bam by sample, by lane, or in aggregate across all the input reads. + *

        This tool determine the percent contamination of an input bam by sample, by lane, or in aggregate across all the input reads.

        * *

        Usage examples

        *

        These are example commands that show how to run ContEst for typical use cases. Square brackets ("[ ]") diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java index da2f684f1..bba75dbfe 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java @@ -74,10 +74,12 @@ public class AssemblyBasedCallerArgumentCollection extends StandardCallerArgumen public boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = false; /** - * The reference confidence mode makes it possible to emit a per-bp or summarized confidence estimate for a site being strictly homozygous-reference. + * The reference confidence mode makes it possible to emit variant calls in GVCF format, which includes either a per-base + * pair (BP_RESOLUTION) or a summarized (GVCF) confidence estimate for each position being strictly homozygous-reference. * See http://www.broadinstitute.org/gatk/guide/article?id=2940 for more details of how this works. - * Note that if you set -ERC GVCF, you also need to set -variant_index_type LINEAR and -variant_index_parameter 128000 (with those exact values!). - * This requirement is a temporary workaround for an issue with index compression. + * Note that if you use -ERC to emit a GVCF or BP_RESOLUTION output, you either + * need to give the output file the extension .g.vcf or set the parameters -variant_index_type LINEAR + * and -variant_index_parameter 128000 (with those exact values!). This has to do with index compression. */ @Advanced @Argument(fullName="emitRefConfidence", shortName="ERC", doc="Mode for emitting reference confidence scores", required = false) @@ -89,41 +91,39 @@ public class AssemblyBasedCallerArgumentCollection extends StandardCallerArgumen } /** - * The assembled haplotypes and locally realigned reads will be written as BAM to this file if requested. Really - * for debugging purposes only. Note that not every input read is emitted to the bam. To include trimmed, downsampled, - * filtered and uninformative reads add the "--emitDroppedReads" argument. + * The assembled haplotypes and locally realigned reads will be written as BAM to this file if requested. This is + * intended to be used only for troubleshooting purposes, in specific areas where you want to better understand + * why the caller is making specific calls. Turning on this mode may result in serious performance cost for the + * caller, so we do NOT recommend using this argument systematically as it will significantly increase runtime. * - * Turning on this mode may result in serious performance cost for the caller. It's really only appropriate to - * use in specific areas where you want to better understand why the caller is making specific calls. + * The candidate haplotypes (called or all, depending on mode) are emitted as single reads covering the entire + * active region, coming from sample "HC" and a special read group called "ArtificialHaplotype". This will increase + * the pileup depth compared to what would be expected from the reads only, especially in complex regions. * * The reads are written out containing an "HC" tag (integer) that encodes which haplotype each read best matches * according to the haplotype caller's likelihood calculation. The use of this tag is primarily intended * to allow good coloring of reads in IGV. Simply go to "Color Alignments By > Tag" and enter "HC" to more - * easily see which reads go with these haplotype. + * easily see which reads go with these haplotype. You can also tell IGV to group reads by sample, which will + * separate the potential haplotypes from the reads. These features are illustrated in + * this screenshot. * - * Note that the haplotypes (called or all, depending on mode) are emitted as single reads covering the entire - * active region, coming from sample "HC" and a special read group called "ArtificialHaplotype". This will increase the - * pileup depth compared to what would be expected from the reads only, especially in complex regions. + * Note that only reads that are actually informative about the haplotypes are emitted with the HC tag. + * By informative we mean that there's a meaningful difference in the likelihood of the read coming from one + * haplotype compared to the next best haplotype. When coloring reads by HC tag in IGV, uninformative reads will + * remain grey. * - * Note also that only reads that are actually informative about the haplotypes are emitted. By informative we mean - * that there's a meaningful difference in the likelihood of the read coming from one haplotype compared to - * its next best haplotype. - * - * If multiple BAMs are passed as input to the tool (as is common for M2), then they will be combined in the bamout - * output and tagged with the appropriate sample names. - * - * The best way to visualize the output of this mode is with IGV. Tell IGV to color the alignments by tag, - * and give it the "HC" tag, so you can see which reads support each haplotype. Finally, you can tell IGV - * to group by sample, which will separate the potential haplotypes from the reads. All of this can be seen in - * this screenshot + * Note also that not every input read is emitted to the bam in this mode. To include all trimmed, downsampled, + * filtered and uninformative reads, add the --emitDroppedReads argument. * + * If multiple BAMs are passed as input to the tool (as is common for MuTect2), then they will be combined in the + * -bamout output and tagged with the appropriate sample names. */ @Advanced @Output(fullName="bamOutput", shortName="bamout", doc="File to which assembled haplotypes should be written", required = false, defaultToStdout = false) public GATKSAMFileWriter bamWriter = null; /** - * The type of BAM output we want to see. This determines whether HC will write out all of the haplotypes it + * The type of -bamout output we want to see. This determines whether HC will write out all of the haplotypes it * considered (top 128 max) or just the ones that were selected as alleles and assigned to samples. */ @Advanced @@ -131,8 +131,8 @@ public class AssemblyBasedCallerArgumentCollection extends StandardCallerArgumen public HaplotypeBAMWriter.Type bamWriterType = HaplotypeBAMWriter.Type.CALLED_HAPLOTYPES; /** - * Determines whether dropped reads will be tracked and emitted when a bamout is specified. Use this in combination - * with a specific interval of interest to avoid accumulating a large number of reads int eh bamout. + * Determines whether dropped reads will be tracked and emitted when -bamout is specified. Use this in combination + * with a specific interval of interest to avoid accumulating a large number of reads in the -bamout file. */ @Advanced @Argument(fullName="emitDroppedReads", shortName="edr", doc="Emit reads that are dropped for filtering, trimming, realignment failure", required = false) @@ -140,9 +140,10 @@ public class AssemblyBasedCallerArgumentCollection extends StandardCallerArgumen /** * If set, certain "early exit" optimizations in HaplotypeCaller, which aim to save compute and time by skipping - * calculations if an ActiveRegion is determined to contain no variants, will be disabled. This is most likely to be useful if - * you're using the -bamout argument to examine the placement of reads following reassembly and are interested in seeing the mapping of - * reads in regions with no variations. Setting the -forceActive and -dontTrimActiveRegions flags may also be necessary. + * calculations if an ActiveRegion is determined to contain no variants, will be disabled. This is most likely to be + * useful if you're using the -bamout argument to examine the placement of reads following reassembly + * and are interested in seeing the mapping of reads in regions with no variations. Setting the -forceActive + * and -dontTrimActiveRegions flags may also be helpful. */ @Advanced @Argument(fullName = "disableOptimizations", shortName="disableOptimizations", doc="Don't skip calculations in ActiveRegions with no variants", diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhaseByTransmission.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhaseByTransmission.java index 4da0b59cc..84fc325cb 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhaseByTransmission.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhaseByTransmission.java @@ -140,7 +140,7 @@ import java.util.*; * * */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} ) public class PhaseByTransmission extends RodWalker, HashMap> { @ArgumentCollection diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasing.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasing.java index 1bcd851c1..19928c3fd 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasing.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasing.java @@ -112,8 +112,20 @@ import static org.broadinstitute.gatk.engine.GATKVCFUtils.getVCFHeadersFromRods; * *

        Output

        *

        - * Phased VCF file. + * Phased VCF file using HP tags to link alleles at (possibly non-consecutive) genotypes of the same sample. *

        + *

        Example

        + *
        + *     GT:GQ:HP    0/1:99:17690409-1,17690409-2
        + *     GT:GQ:HP    0/1:99:17690409-2,17690409-1:1258.14
        + * 
        + *

        The second site's alternate allele (1) is on the same physical haplotype as the first site's reference allele (0), + * and vice versa [second site's 0 goes with first site's 1]. This is based on the fact that the HP pairs line up in + * reverse order between these two genotypes.

        + *

        In an old notation that RBP used to output in much earlier versions, the genotypes would have been: 0/1 and 1|0, + * respectively. This was changed because depending on the case it caused ambiguity, incompleteness, and possible + * inconsistency with trio-based phasing. In contrast, the HP tag is much more explicitl for linking alleles, especially + * if the genotypes are non-consecutive.

        * *

        Usage example

        *
        diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/simulatereads/SimulateReadsForVariants.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/simulatereads/SimulateReadsForVariants.java
        index 84aac5f50..2024bb70a 100644
        --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/simulatereads/SimulateReadsForVariants.java
        +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/simulatereads/SimulateReadsForVariants.java
        @@ -106,7 +106,7 @@ import java.util.*;
          * 
        * */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class}, gotoDev = HelpConstants.EB) +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class}) @Reference(window=@Window(start=-200,stop=200)) public class SimulateReadsForVariants extends RodWalker { private static Logger logger = Logger.getLogger(SimulateReadsForVariants.class); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java deleted file mode 100644 index ab2f38057..000000000 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java +++ /dev/null @@ -1,551 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE -* SOFTWARE LICENSE AGREEMENT -* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE"). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. PHONE-HOME FEATURE -* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation. -* -* 4. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012-2016 Broad Institute, Inc. -* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 5. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 6. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 7. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 8. MISCELLANEOUS -* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.gatk.tools.walkers.validation; - -import htsjdk.variant.vcf.*; -import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; -import org.broadinstitute.gatk.engine.walkers.*; -import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider; -import org.broadinstitute.gatk.utils.commandline.*; -import org.broadinstitute.gatk.engine.CommandLineGATK; -import org.broadinstitute.gatk.utils.contexts.AlignmentContext; -import org.broadinstitute.gatk.utils.contexts.ReferenceContext; -import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; -import org.broadinstitute.gatk.tools.walkers.genotyper.*; -import org.broadinstitute.gatk.engine.SampleUtils; -import org.broadinstitute.gatk.utils.help.HelpConstants; -import org.broadinstitute.gatk.engine.GATKVCFUtils; -import org.broadinstitute.gatk.utils.variant.GATKVCFConstants; -import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines; -import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; -import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; -import htsjdk.variant.variantcontext.VariantContext; -import htsjdk.variant.variantcontext.VariantContextBuilder; -import htsjdk.variant.variantcontext.writer.VariantContextWriter; - -import java.util.Map; -import java.util.Set; - -import static org.broadinstitute.gatk.utils.IndelUtils.isInsideExtendedIndel; - -/** - * Genotype and validate a dataset and the calls of another dataset using the Unified Genotyper - * - *

        Note that this is an old tool that makes use of the UnifiedGenotyper, which has since been - * deprecated in favor of the HaplotypeCaller.

        - *

        - * Genotype and Validate is a tool to evaluate the quality of a dataset for calling SNPs - * and Indels given a secondary (validation) data source. The data sources are BAM or VCF - * files. You can use them interchangeably (i.e. a BAM to validate calls in a VCF or a VCF - * to validate calls on a BAM). - *

        - * - *

        - * The simplest scenario is when you have a VCF of hand annotated SNPs and Indels, and you - * want to know how well a particular technology performs calling these snps. With a - * dataset (BAM file) generated by the technology in test, and the hand annotated VCF, you - * can run GenotypeAndValidate to asses the accuracy of the calls with the new technology's - * dataset. - *

        - * - *

        - * Another option is to validate the calls on a VCF file, using a deep coverage BAM file - * that you trust the calls on. The GenotypeAndValidate walker will make calls using the - * reads in the BAM file and take them as truth, then compare to the calls in the VCF file - * and produce a truth table. - *

        - * - * - *

        Input

        - *

        - * A BAM file to make calls on and a VCF file to use as truth validation dataset. - * - * You also have the option to invert the roles of the files using the command line options listed below. - *

        - * - *

        Output

        - *

        - * GenotypeAndValidate has two outputs. The truth table and the optional VCF file. The truth table is a - * 2x2 table correlating what was called in the dataset with the truth of the call (whether it's a true - * positive or a false positive). The table should look like this: - *

        - *
        - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
        ALTREFPredictive Value
        called altTrue Positive (TP)False Positive (FP)Positive PV
        called refFalse Negative (FN)True Negative (TN)Negative PV
        - *
        - * - *

        - * The positive predictive value (PPV) is the proportion of subjects with positive test results - * who are correctly diagnosed. - *

        - *

        - * The negative predictive value (NPV) is the proportion of subjects with a negative test result - * who are correctly diagnosed. - *

        - *

        - * The VCF file will contain only the variants that were called or not called, excluding the ones that - * were uncovered or didn't pass the filters. This file is useful if you are trying to compare - * the PPV and NPV of two different technologies on the exact same sites (so you can compare apples to - * apples). - *

        - * - *

        - * Here is an example of an annotated VCF file (info field clipped for clarity) - * - *

        - * #CHROM  POS ID  REF ALT QUAL    FILTER  INFO    FORMAT  NA12878
        - * 1   20568807    .   C   T   0    HapMapHet        AC=1;AF=0.50;AN=2;DP=0;GV=T  GT  0/1
        - * 1   22359922    .   T   C   282  WG-CG-HiSeq      AC=2;AF=0.50;GV=T;AN=4;DP=42 GT:AD:DP:GL:GQ  1/0 ./. 0/1:20,22:39:-72.79,-11.75,-67.94:99    ./.
        - * 13  102391461   .   G   A   341  Indel;SnpCluster AC=1;GV=F;AF=0.50;AN=2;DP=45 GT:AD:DP:GL:GQ  ./. ./. 0/1:32,13:45:-50.99,-13.56,-112.17:99   ./.
        - * 1   175516757   .   C   G   655  SnpCluster,WG    AC=1;AF=0.50;AN=2;GV=F;DP=74 GT:AD:DP:GL:GQ  ./. ./. 0/1:52,22:67:-89.02,-20.20,-191.27:99   ./.
        - * 
        - * - *

        - * - *

        Additional Details

        - *
          - *
        • - * You should always use -L on your VCF track, so that the GATK only looks at the sites on the VCF file. - * This speeds up the process a lot. - *
        • - *
        • - * The total number of visited bases may be greater than the number of variants in the original - * VCF file because of extended indels, as they trigger one call per new insertion or deletion. - * (i.e. ACTG/- will count as 4 genotyper calls, but it's only one line in the VCF). - *
        • - *
        - * - *

        Usage examples

        - *

        Genotypes BAM file from new technology using the VCF as a truth dataset

        - *
        - *  java
        - *      -jar GenomeAnalysisTK.jar \
        - *      -T  GenotypeAndValidate \
        - *      -R reference.fasta \
        - *      -I myNewTechReads.bam \
        - *      -alleles handAnnotatedVCF.vcf \
        - *      -L handAnnotatedVCF.vcf \
        - *      -o output.vcf
        - * 
        - * - *

        Genotypes BAM file from new technology a BAM file as the truth dataset

        - *
        - *  java
        - *      -jar GenomeAnalysisTK.jar \
        - *      -T  GenotypeAndValidate \
        - *      -R reference.fasta \
        - *      -I myTruthDataset.bam \
        - *      -alleles callsToValidate.vcf \
        - *      -L callsToValidate.vcf \
        - *      -bt \
        - *      -o output.vcf
        - * 
        - * - */ - -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VALIDATION, extraDocs = {CommandLineGATK.class} ) -@Requires(value={DataSource.READS, DataSource.REFERENCE}) -@Allows(value={DataSource.READS, DataSource.REFERENCE}) -@By(DataSource.REFERENCE) -@Reference(window=@Window(start=-200,stop=200)) -public class GenotypeAndValidate extends RodWalker implements TreeReducible { - - /** - * The optional output file that will have all the variants used in the Genotype and Validation essay. - * The new annotation `callStatus` will carry the value called in the validation VCF or BAM file." - */ - @Output(doc="Output VCF file with annotated variants", required=false) - protected VariantContextWriter vcfWriter = null; - - /** - * The callset to be used as truth (default) or validated (if BAM file is set to truth). - */ - @Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype", required=true) - public RodBinding alleles; - - /** - * Makes the Unified Genotyper calls to the BAM file the truth dataset and validates the alleles ROD binding callset. - */ - @Argument(fullName ="set_bam_truth", shortName ="bt", doc="Use the calls on the reads (bam file) as the truth dataset and validate the calls on the VCF", required=false) - private boolean bamIsTruth = false; - - /** - * The minimum base quality score necessary for a base to be considered when calling a genotype. This argument is passed to the Unified Genotyper. - */ - @Argument(fullName="minimum_base_quality_score", shortName="mbq", doc="Minimum base quality score for calling a genotype", required=false) - private int mbq = -1; - - /** - * The maximum deletion fraction allowed in a site for calling a genotype. This argument is passed to the Unified Genotyper. - */ - @Argument(fullName="maximum_deletion_fraction", shortName="deletions", doc="Maximum deletion fraction for calling a genotype", required=false) - private double deletions = -1; - - /** - * the minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. This argument is passed to the Unified Genotyper. - */ - @Argument(fullName="standard_min_confidence_threshold_for_calling", shortName="stand_call_conf", doc="the minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls", required=false) - private double callConf = -1; - - /** - * the minimum phred-scaled Qscore threshold to emit low confidence calls. This argument is passed to the Unified Genotyper. - */ - @Argument(fullName="standard_min_confidence_threshold_for_emitting", shortName="stand_emit_conf", doc="the minimum phred-scaled Qscore threshold to emit low confidence calls", required=false) - private double emitConf = -1; - - /** - * Only validate sites that have at least a given depth - */ - @Argument(fullName="condition_on_depth", shortName="depth", doc="Condition validation on a minimum depth of coverage by the reads", required=false) - private int minDepth = -1; - - /** - * Print out discordance sites to standard out. - */ - @Hidden - @Argument(fullName ="print_interesting_sites", shortName ="print_interesting", doc="Print out interesting sites to standard out", required=false) - private boolean printInterestingSites = false; - - private UnifiedGenotypingEngine snpEngine; - private UnifiedGenotypingEngine indelEngine; - private Set samples; - - private enum GVstatus { - T, F, NONE - } - - public static class CountedData { - private long nAltCalledAlt = 0L; - private long nAltCalledRef = 0L; - private long nAltNotCalled = 0L; - private long nRefCalledAlt = 0L; - private long nRefCalledRef = 0L; - private long nRefNotCalled = 0L; - private long nNoStatusCalledAlt = 0L; - private long nNoStatusCalledRef = 0L; - private long nNoStatusNotCalled = 0L; - private long nNotConfidentCalls = 0L; - private long nUncovered = 0L; - - /** - * Adds the values of other to this, returning this - * @param other the other object - */ - public void add(CountedData other) { - nAltCalledAlt += other.nAltCalledAlt; - nAltCalledRef += other.nAltCalledRef; - nAltNotCalled += other.nAltNotCalled; - nRefCalledAlt += other.nRefCalledAlt; - nRefCalledRef += other.nRefCalledRef; - nRefNotCalled += other.nRefNotCalled; - nNoStatusCalledAlt += other.nNoStatusCalledAlt; - nNoStatusCalledRef += other.nNoStatusCalledRef; - nNoStatusNotCalled += other.nNoStatusNotCalled; - nUncovered += other.nUncovered; - nNotConfidentCalls += other.nNotConfidentCalls; - } - } - - - - //--------------------------------------------------------------------------------------------------------------- - // - // initialize - // - //--------------------------------------------------------------------------------------------------------------- - - public void initialize() { - - // Initialize VCF header - if (vcfWriter != null) { - Map header = GATKVCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), alleles.getName()); - samples = SampleUtils.getSampleList(header, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); - Set headerLines = VCFUtils.smartMergeHeaders(header.values(), true); - headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate")); - headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.GENOTYPE_AND_VALIDATE_STATUS_KEY)); - vcfWriter.writeHeader(new VCFHeader(headerLines, samples)); - } - - // Filling in SNP calling arguments for UG - UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); - uac.outputMode = OutputMode.EMIT_ALL_SITES; - uac.alleles = alleles; - - // TODO -- if we change this tool to actually validate against the called allele, then this if statement is needed; - // TODO -- for now, though, we need to be able to validate the right allele (because we only test isVariant below) [EB] - //if (!bamIsTruth) - uac.genotypingOutputMode = GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES; - - if (mbq >= 0) uac.MIN_BASE_QUALTY_SCORE = mbq; - if (deletions >= 0) - uac.MAX_DELETION_FRACTION = deletions; - else - uac.MAX_DELETION_FRACTION = 1.0; - if (emitConf >= 0) uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_EMITTING = emitConf; - if (callConf >= 0) uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = callConf; - - final GenomeAnalysisEngine toolkit = getToolkit(); - uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; - snpEngine = new UnifiedGenotypingEngine(uac, - FixedAFCalculatorProvider.createThreadSafeProvider(toolkit, uac, logger),toolkit); - - - // Adding the INDEL calling arguments for UG - UnifiedArgumentCollection uac_indel = uac.clone(); - uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL; - indelEngine = new UnifiedGenotypingEngine(uac_indel, - FixedAFCalculatorProvider.createThreadSafeProvider(toolkit, uac, logger),toolkit); - - // make sure we have callConf set to the threshold set by the UAC so we can use it later. - callConf = uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING; - } - - //--------------------------------------------------------------------------------------------------------------- - // - // map - // - //--------------------------------------------------------------------------------------------------------------- - - public CountedData map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { - - final CountedData counter = new CountedData(); - - // For some reason RodWalkers get map calls with null trackers - if( tracker == null ) - return counter; - - VariantContext vcComp = tracker.getFirstValue(alleles); - if( vcComp == null ) - return counter; - - //todo - not sure I want this, may be misleading to filter extended indel events. - if (isInsideExtendedIndel(vcComp, ref)) - return counter; - - // Do not operate on variants that are not covered to the optional minimum depth - if (!context.hasReads() || (minDepth > 0 && context.getBasePileup().getBases().length < minDepth)) { - counter.nUncovered = 1L; - final GVstatus status = getGVstatus(vcComp); - if ( status == GVstatus.T ) - counter.nAltNotCalled = 1L; - else if ( status == GVstatus.F ) - counter.nRefNotCalled = 1L; - else - counter.nNoStatusNotCalled = 1L; - - return counter; - } - - VariantCallContext call; - if ( vcComp.isSNP() ) { - call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0); - } else if ( vcComp.isIndel() ) { - call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0); - } else if ( bamIsTruth ) { - // assume it's a SNP if no variation is present; this is necessary so that we can test supposed monomorphic sites against the truth bam - call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0); - } else { - logger.info("Not SNP or INDEL " + vcComp.getChr() + ":" + vcComp.getStart() + " " + vcComp.getAlleles()); - return counter; - } - - - boolean writeVariant = true; - - if (bamIsTruth) { - if (call.confidentlyCalled) { - // If truth is a confident REF call - if (call.isVariant()) { - if (vcComp.isVariant()) - counter.nAltCalledAlt = 1L; - else { - counter.nAltCalledRef = 1L; - if ( printInterestingSites ) - System.out.println("Truth=ALT Call=REF at " + call.getChr() + ":" + call.getStart()); - } - } - // If truth is a confident ALT call - else { - if (vcComp.isVariant()) { - counter.nRefCalledAlt = 1L; - if ( printInterestingSites ) - System.out.println("Truth=REF Call=ALT at " + call.getChr() + ":" + call.getStart()); - } else - counter.nRefCalledRef = 1L; - } - } - else { - counter.nNotConfidentCalls = 1L; - if ( printInterestingSites ) - System.out.println("Truth is not confident at " + call.getChr() + ":" + call.getStart()); - writeVariant = false; - } - } - else { -// if (!vcComp.hasExtendedAttribute("GV")) -// throw new UserException.BadInput("Variant has no GV annotation in the INFO field. " + vcComp.getChr() + ":" + vcComp.getStart()); - - final GVstatus status = getGVstatus(vcComp); - if (call.isCalledAlt(callConf)) { - if ( status == GVstatus.T ) - counter.nAltCalledAlt = 1L; - else if ( status == GVstatus.F ) { - counter.nRefCalledAlt = 1L; - if ( printInterestingSites ) - System.out.println("Truth=REF Call=ALT at " + call.getChr() + ":" + call.getStart()); - } - else - counter.nNoStatusCalledAlt = 1L; - } - else if (call.isCalledRef(callConf)) { - if ( status == GVstatus.T ) { - counter.nAltCalledRef = 1L; - if ( printInterestingSites ) - System.out.println("Truth=ALT Call=REF at " + call.getChr() + ":" + call.getStart()); - } - else if ( status == GVstatus.F ) - counter.nRefCalledRef = 1L; - - else - counter.nNoStatusCalledRef = 1L; - } - else { - counter.nNotConfidentCalls = 1L; - if ( status == GVstatus.T ) - counter.nAltNotCalled = 1L; - else if ( status == GVstatus.F ) - counter.nRefNotCalled = 1L; - else - counter.nNoStatusNotCalled = 1L; - - if ( printInterestingSites ) - System.out.println("Truth is not confident at " + call.getChr() + ":" + call.getStart()); - writeVariant = false; - } - } - - if (vcfWriter != null && writeVariant) { - if (!vcComp.hasAttribute(GATKVCFConstants.GENOTYPE_AND_VALIDATE_STATUS_KEY)) { - vcfWriter.add(new VariantContextBuilder(vcComp).attribute(GATKVCFConstants.GENOTYPE_AND_VALIDATE_STATUS_KEY, call.isCalledAlt(callConf) ? "ALT" : "REF").make()); - } - else - vcfWriter.add(vcComp); - } - return counter; - } - - private GVstatus getGVstatus(final VariantContext vc) { - return ( !vc.hasAttribute("GV") ) ? GVstatus.NONE : (vc.getAttribute("GV").equals("T") ? GVstatus.T : GVstatus.F); - } - - //--------------------------------------------------------------------------------------------------------------- - // - // reduce - // - //--------------------------------------------------------------------------------------------------------------- - - public CountedData reduceInit() { - return new CountedData(); - } - - public CountedData treeReduce( final CountedData sum1, final CountedData sum2) { - sum2.add(sum1); - return sum2; - } - - public CountedData reduce( final CountedData mapValue, final CountedData reduceSum ) { - reduceSum.add(mapValue); - return reduceSum; - } - - public void onTraversalDone( CountedData reduceSum ) { - double ppv = 100 * ((double) reduceSum.nAltCalledAlt /( reduceSum.nAltCalledAlt + reduceSum.nRefCalledAlt)); - double npv = 100 * ((double) reduceSum.nRefCalledRef /( reduceSum.nRefCalledRef + reduceSum.nAltCalledRef)); - double sensitivity = 100 * ((double) reduceSum.nAltCalledAlt /( reduceSum.nAltCalledAlt + reduceSum.nAltCalledRef)); - double specificity = (reduceSum.nRefCalledRef + reduceSum.nRefCalledAlt > 0) ? 100 * ((double) reduceSum.nRefCalledRef /( reduceSum.nRefCalledRef + reduceSum.nRefCalledAlt)) : 100; - logger.info(String.format("Resulting Truth Table Output\n\n" + - "------------------------------------------------------------------\n" + - "\t\t|\tALT\t|\tREF\t|\tNo Status\n" + - "------------------------------------------------------------------\n" + - "called alt\t|\t%d\t|\t%d\t|\t%d\n" + - "called ref\t|\t%d\t|\t%d\t|\t%d\n" + - "not called\t|\t%d\t|\t%d\t|\t%d\n" + - "------------------------------------------------------------------\n" + - "positive predictive value: %f%%\n" + - "negative predictive value: %f%%\n" + - "------------------------------------------------------------------\n" + - "sensitivity: %f%%\n" + - "specificity: %f%%\n" + - "------------------------------------------------------------------\n" + - "not confident: %d\n" + - "not covered: %d\n" + - "------------------------------------------------------------------\n", reduceSum.nAltCalledAlt, reduceSum.nRefCalledAlt, reduceSum.nNoStatusCalledAlt, reduceSum.nAltCalledRef, reduceSum.nRefCalledRef, reduceSum.nNoStatusCalledRef, reduceSum.nAltNotCalled, reduceSum.nRefNotCalled, reduceSum.nNoStatusNotCalled, ppv, npv, sensitivity, specificity, reduceSum.nNotConfidentCalls, reduceSum.nUncovered)); - } -} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/validationsiteselector/ValidationSiteSelector.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/validationsiteselector/ValidationSiteSelector.java index 48a8ae837..35fe33d43 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/validationsiteselector/ValidationSiteSelector.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/validationsiteselector/ValidationSiteSelector.java @@ -52,6 +52,7 @@ package org.broadinstitute.gatk.tools.walkers.validation.validationsiteselector; import org.broadinstitute.gatk.utils.commandline.*; +import org.broadinstitute.gatk.engine.GATKVCFUtils; import org.broadinstitute.gatk.engine.CommandLineGATK; import org.broadinstitute.gatk.utils.contexts.AlignmentContext; import org.broadinstitute.gatk.utils.contexts.ReferenceContext; @@ -60,11 +61,11 @@ import org.broadinstitute.gatk.engine.walkers.RodWalker; import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.engine.SampleUtils; import org.broadinstitute.gatk.utils.help.HelpConstants; +import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; + import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLine; -import org.broadinstitute.gatk.engine.GATKVCFUtils; -import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.writer.VariantContextWriter; @@ -134,7 +135,7 @@ import java.util.*; * * */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VALIDATION, extraDocs = {CommandLineGATK.class} ) +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) public class ValidationSiteSelector extends RodWalker { public enum AF_COMPUTATION_MODE { diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/ApplyRecalibration.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/ApplyRecalibration.java index a330a4790..d350af102 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/ApplyRecalibration.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/ApplyRecalibration.java @@ -119,27 +119,27 @@ import java.util.regex.Pattern; * *

        Usage example for filtering SNPs

        *
        - * java -Xmx3g -jar GenomeAnalysisTK.jar \
        + * java -jar GenomeAnalysisTK.jar \
          *   -T ApplyRecalibration \
          *   -R reference.fasta \
        - *   -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \
        + *   -input raw_variants.vcf \
          *   --ts_filter_level 99.0 \
        - *   -tranchesFile path/to/output.tranches \
        - *   -recalFile path/to/output.recal \
        + *   -tranchesFile output.tranches \
        + *   -recalFile output.recal \
          *   -mode SNP \
          *   -o path/to/output.recalibrated.filtered.vcf
          * 
        * *

        Allele-specific usage

        *
        - * java -Xmx3g -jar GenomeAnalysisTK.jar \
        + * java -jar GenomeAnalysisTK.jar \
          *   -T ApplyRecalibration \
          *   -R reference.fasta \
        - *   -input rawVariants.withASannotations.vcf \
        + *   -input raw_variants.withASannotations.vcf \
          *   -AS \
          *   --ts_filter_level 99.0 \
        - *   -tranchesFile path/to/output.AS.tranches \
        - *   -recalFile path/to/output.AS.recal \
        + *   -tranchesFile output.AS.tranches \
        + *   -recalFile output.AS.recal \
          *   -mode SNP \
          *   -o path/to/output.recalibrated.ASfiltered.vcf
          * 
        @@ -153,12 +153,11 @@ import java.util.regex.Pattern; *

        Caveats

        * *
          - *
        • The tranche values used in the example above is only a general example. You should determine the level of sensitivity + *
        • The tranche values used in the example above are only meant to be a general example. You should determine the level of sensitivity * that is appropriate for your specific project. Remember that higher sensitivity (more power to detect variants, yay!) comes * at the cost of specificity (more false negatives, boo!). You have to choose at what point you want to set the tradeoff.
        • *
        • In order to create the tranche reporting plots (which are only generated for SNPs, not indels!) Rscript needs to be - * in your environment PATH (this is the scripting version of R, not the interactive version). - * See http://www.r-project.org for more info on how to download and install R.
        • + * in your environment PATH (this is the scripting version of R, not the interactive version). *
        */ diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantDataManager.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantDataManager.java index e57c42c5f..b1b19433c 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantDataManager.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantDataManager.java @@ -110,7 +110,7 @@ public class VariantDataManager { final double theSTD = standardDeviation(theMean, iii, true); logger.info( annotationKeys.get(iii) + String.format(": \t mean = %.2f\t standard deviation = %.2f", theMean, theSTD) ); if( Double.isNaN(theMean) ) { - throw new UserException.BadInput("Values for " + annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations. See " + HelpConstants.forumPost("discussion/49/using-variant-annotator")); + throw new UserException.BadInput("Values for " + annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations."); } foundZeroVarianceAnnotation = foundZeroVarianceAnnotation || (theSTD < 1E-5); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java index 98bc91cdd..83b5a3262 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java @@ -203,7 +203,8 @@ public class VariantRecalibratorArgumentCollection { ///////////////////////////// // Deprecated Arguments - // Keeping them here is meant to provide users with error messages that are more informative than "arg not defined" when they use an argument that has been put out of service + // Keeping them here is meant to provide users with error messages that are more informative than "arg not defined" + // when they use an argument that has been put out of service ///////////////////////////// @Hidden diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriors.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriors.java index e887fe0bf..224caa97f 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriors.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriors.java @@ -186,7 +186,7 @@ import java.util.*; * * */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} ) public class CalculateGenotypePosteriors extends RodWalker { /** diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/RegenotypeVariants.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/RegenotypeVariants.java index 21e3e30e3..755bd087f 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/RegenotypeVariants.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/RegenotypeVariants.java @@ -114,7 +114,7 @@ import java.util.Set; * * */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} ) public class RegenotypeVariants extends RodWalker implements TreeReducible { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/CommandLineGATK.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/CommandLineGATK.java index 0092c30ca..83aab7006 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/CommandLineGATK.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/CommandLineGATK.java @@ -43,7 +43,7 @@ import java.net.InetAddress; import java.util.*; /** - * All command line parameters accepted by all tools in the GATK. + * Command line parameters accepted by most if not all tools in the GATK * *

        Info for end users

        * diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java index 1b4548d38..737a46ba1 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java @@ -65,8 +65,12 @@ public class GATKArgumentCollection { @Input(fullName = "input_file", shortName = "I", doc = "Input file containing sequence data (BAM or CRAM)", required = false) public List samFiles = new ArrayList<>(); + /** + * This emits a log entry (level INFO) containing the full list of sequence data files to be included in the analysis + * (including files inside .bam.list or .cram.list files). + */ @Advanced - @Argument(fullName = "showFullBamList",doc="Emit a log entry (level INFO) containing the full list of sequence data files to be included in the analysis (including files inside .bam.list or .cram.list files).") + @Argument(fullName = "showFullBamList",doc="Emit list of input BAM/CRAM files to log") public Boolean showFullBamList = false; @Advanced @@ -404,12 +408,18 @@ public class GATKArgumentCollection { required = false) public boolean disableAutoIndexCreationAndLockingWhenReadingRods = false; + /** + * FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests. + */ @Hidden - @Argument(fullName = "no_cmdline_in_header", shortName = "no_cmdline_in_header", doc = "Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", + @Argument(fullName = "no_cmdline_in_header", shortName = "no_cmdline_in_header", doc = "Don't include the command line in the VCF header", required = false) public boolean disableCommandLineInVCF = false; - @Argument(fullName = "sites_only", shortName = "sites_only", doc = "Just output sites without genotypes (i.e. only the first 8 columns of the VCF)", + /** + * This produces a VCF with only the first 8 columns of site-level information and without any sample-level info (genotypes etc). + */ + @Argument(fullName = "sites_only", shortName = "sites_only", doc = "Output sites-only VCF", required = false) public boolean sitesOnlyVCF = false; @@ -417,9 +427,9 @@ public class GATKArgumentCollection { *

        The VCF specification permits missing records to be dropped from the end of FORMAT fields, so long as GT is always output. * This option prevents GATK from performing that trimming.

        * - *

        For example, given a FORMAT of

        GT:AD:DP:PL
        , GATK will by default emit
        ./.
        for a variant with + *

        For example, given a FORMAT of GT:AD:DP:PL, GATK will by default emit ./. for a variant with * no reads present (ie, the AD, DP, and PL fields are trimmed). If you specify -writeFullFormat, this record - * would be emitted as

        ./.:.:.:.

        + * would be emitted as ./.:.:.:.

        */ @Argument(fullName = "never_trim_vcf_format_field", shortName = "writeFullFormat", doc = "Always output all the records in VCF FORMAT fields, even if some are missing", required = false) @@ -435,14 +445,19 @@ public class GATKArgumentCollection { minValue = 0, maxValue = 9, required = false) public Integer bamCompression = null; + /** + * If provided, output BAM/CRAM files will be simplified to include only key reads for downstream variation + * discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the + * kept reads except the read group identifier + */ @Advanced @Argument(fullName = "simplifyBAM", shortName = "simplifyBAM", - doc = "If provided, output BAM/CRAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier", + doc = "Strip down read content and tags", required = false) public boolean simplifyBAM = false; @Advanced - @Argument(fullName = "disable_bam_indexing", doc = "Turn off on-the-fly creation of indices for output BAM/CRAM files.", + @Argument(fullName = "disable_bam_indexing", doc = "Turn off on-the-fly creation of indices for output BAM/CRAM files", required = false) public boolean disableBAMIndexing = false; @@ -500,69 +515,16 @@ public class GATKArgumentCollection { // -------------------------------------------------------------------------------------------------------------- /** - *

        Reads PED file-formatted tabular text files describing meta-data about the samples being - * processed in the GATK.

        - * - * - * - *

        The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:

        - * - *
          - *
        • Family ID
        • - *
        • Individual ID
        • - *
        • Paternal ID
        • - *
        • Maternal ID
        • - *
        • Sex (1=male; 2=female; other=unknown)
        • - *
        • Phenotype
        • - *
        - * - *

        The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person. - * A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a - * quantitative trait or an affection status column: GATK will automatically detect which type - * (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).

        - * - *

        If an individual's sex is unknown, then any character other than 1 or 2 can be used.

        - * - *

        You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that - * line will be ignored. Do not start any family IDs with this character therefore.

        - * - *

        Affection status should be coded:

        - * - *
          - *
        • -9 missing
        • - *
        • 0 missing
        • - *
        • 1 unaffected
        • - *
        • 2 affected
        • - *
        - * - *

        If any value outside of -9,0,1,2 is detected than the samples are assumed - * to phenotype values are interpreted as string phenotype values. In this case -9 uniquely - * represents the missing value.

        - * - *

        Genotypes (column 7 onwards) cannot be specified to the GATK.

        - * - *

        For example, here are two individuals (one row = one person):

        - * - *
        -     *   FAM001  1  0 0  1  2
        -     *   FAM001  2  0 0  1  2
        -     * 
        - * - *

        Each -ped argument can be tagged with NO_FAMILY_ID, NO_PARENTS, NO_SEX, NO_PHENOTYPE to - * tell the GATK PED parser that the corresponding fields are missing from the ped file.

        - * - *

        Note that most GATK walkers do not use pedigree information. Walkers that require pedigree - * data should clearly indicate so in their arguments and will throw errors if required pedigree - * information is missing.

        + * Reads PED file-formatted tabular text files describing meta-data about the samples being + * processed in the GATK. See https://www.broadinstitute.org/gatk/guide/article?id=7696 for more information + * on format requirements. Note that most GATK tools do not use pedigree information; for those that do it + * is indicated in their documentation. */ @Argument(fullName="pedigree", shortName = "ped", doc="Pedigree files for samples",required=false) public List pedigreeFiles = Collections.emptyList(); /** - * Inline PED records (see -ped argument). Each -pedString STRING can contain one or more + * Inline PED records. Each -pedString STRING can contain one or more * valid PED records (see -ped) separated by semi-colons. Supports all tags for each pedString * as -ped supports */ @@ -572,7 +534,7 @@ public class GATKArgumentCollection { /** * How strict should we be in parsing the PED files? */ - @Argument(fullName="pedigreeValidationType", shortName = "pedValidationType", doc="Validation strictness for pedigree information",required=false) + @Argument(fullName="pedigreeValidationType", shortName = "pedValidationType", doc="Validation strictness for pedigree",required=false) public PedigreeValidationType pedigreeValidationType = PedigreeValidationType.STRICT; // -------------------------------------------------------------------------------------------------------------- @@ -614,17 +576,18 @@ public class GATKArgumentCollection { * DYNAMIC_SEEK attempts to optimize for minimal seek time by choosing an appropriate strategy and parameter (user-supplied parameter is ignored) * DYNAMIC_SIZE attempts to optimize for minimal index size by choosing an appropriate strategy and parameter (user-supplied parameter is ignored) * - * This argument is deprecated, using the output file ".g.vcf" extension will automatically set the appropriate value + * This argument is no longer necessary when producing GVCF files. Using the output file ".g.vcf" extension will automatically set the appropriate value */ @Argument(fullName="variant_index_type",shortName = "variant_index_type",doc="Type of IndexCreator to use for VCF/BCF indices",required=false) @Advanced public GATKVCFIndexType variant_index_type = GATKVCFUtils.DEFAULT_INDEX_TYPE; /** - * This is either the bin width or the number of features per bin, depending on the indexing strategy + * This is either the bin width or the number of features per bin, depending on the indexing strategy. * - * This argument is deprecated, using the output file ".g.vcf" extension will automatically set the appropriate value + * This argument is no longer necessary when producing GVCF files. Using the output file ".g.vcf" extension will + * automatically set the appropriate value */ - @Argument(fullName="variant_index_parameter",shortName = "variant_index_parameter",doc="Parameter to pass to the VCF/BCF IndexCreator",required=false) + @Argument(fullName="variant_index_parameter",shortName = "variant_index_parameter",doc="Parameter to pass to the VCF/BCF IndexCreator", required=false) @Advanced public int variant_index_parameter = GATKVCFUtils.DEFAULT_INDEX_PARAMETER; @@ -639,5 +602,28 @@ public class GATKArgumentCollection { @Argument(fullName = "reference_window_stop", shortName = "ref_win_stop", doc = "Reference window stop", minValue = 0, required = false) @Advanced public int reference_window_stop = DEFAULT_REFERENCE_WINDOW_STOP; + + // -------------------------------------------------------------------------------------------------------------- + // + // Deprecated Arguments + // Keeping them here is meant to provide users with error messages that are more informative than "arg not defined" + // when they use an argument that has been put out of service + // + // ------------------------------------------------------------------------------------------------------------- + + @Hidden + @Deprecated // argument definition changed to string since the original enum has been removed + @Argument(fullName = "phone_home", shortName = "et", doc="Run reporting mode", required = false) + public String phoneHomeType = ""; + + @Hidden + @Deprecated + @Argument(fullName = "gatk_key", shortName = "K", doc="GATK key file required to run with -et NO_ET", required = false) + public File gatkKeyFile = null; + + @Hidden + @Deprecated + @Argument(fullName = "tag", shortName = "tag", doc="Tag to identify this GATK run as part of a group of runs", required = false) + public String tag = "NA"; } diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignMappingQualityFilter.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignMappingQualityFilter.java index e68a31bb0..c29935d32 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignMappingQualityFilter.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignMappingQualityFilter.java @@ -29,7 +29,7 @@ import htsjdk.samtools.SAMRecord; import org.broadinstitute.gatk.utils.commandline.Argument; /** - * Set the mapping quality of all reads to a given value. + * Set the mapping quality of all reads to a given value * *

        * If a BAM file contains erroneous or missing mapping qualities (MAPQ), this read transformer will set all your diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignOneMappingQualityFilter.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignOneMappingQualityFilter.java index 44d24de9a..9ebfefadb 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignOneMappingQualityFilter.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignOneMappingQualityFilter.java @@ -29,7 +29,7 @@ import htsjdk.samtools.SAMRecord; import org.broadinstitute.gatk.utils.commandline.Argument; /** - * Set the mapping quality of reads with a given value to another given value. + * Set the mapping quality of reads with a given value to another given value * *

        * This read transformer will change a certain read mapping quality to a different value without affecting reads that diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FractionInformativeReads.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FractionInformativeReads.java index d2413e3f5..711bc560d 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FractionInformativeReads.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FractionInformativeReads.java @@ -45,7 +45,7 @@ import java.util.List; import java.util.Map; /** - * The fraction of reads that were deemed "informative" by the HaplotypeCaller over the entire cohort + * The fraction of reads deemed informative over the entire cohort * *

        The FractionInformativeReads annotation produces a single fraction for each site: sum(AD)/sum(DP). The sum in the numerator * is over all the samples in the cohort and all the alleles in each sample. The sum in the denominator is over all the samples. @@ -53,7 +53,7 @@ import java.util.Map; * *

        Caveats

        *
          - *
        • This annotation is generated by HaplotypeCaller or GenotypeGVCFs (it will not work when called from VariantAnnotator).
        • + *
        • This annotation can be generated by HaplotypeCaller, MuTect2 or GenotypeGVCFs (it will not work when called from VariantAnnotator).
        • *
        * *

        Related annotations

        diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/coverage/DepthOfCoverage.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/coverage/DepthOfCoverage.java index 282791d9e..942fadcd6 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/coverage/DepthOfCoverage.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/coverage/DepthOfCoverage.java @@ -106,7 +106,7 @@ import java.util.*; // todo -- alter logarithmic scaling to spread out bins more // todo -- allow for user to set linear binning (default is logarithmic) // todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class}, gotoDev = HelpConstants.MC) +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class}) @By(DataSource.REFERENCE) @PartitionBy(PartitionType.NONE) @Downsample(by= DownsampleType.NONE, toCoverage=Integer.MAX_VALUE) diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/examples/GATKPaperGenotyper.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/examples/GATKPaperGenotyper.java index 69d0d5f2a..29c9dc129 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/examples/GATKPaperGenotyper.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/examples/GATKPaperGenotyper.java @@ -44,12 +44,14 @@ import java.util.Arrays; import java.util.Comparator; /** - * A simple Bayesian genotyper, that outputs a text based call format. Intended to be used only as an - * example in the GATK publication. + * Simple Bayesian genotyper used in the original GATK paper + * + *

        This is a very simple implementation of a Bayesian genotyper that outputs a text based call format. It was developed + * solely to serve as a toy example in the original GATK publication, and should NOT be used in actual analysis work.

        * * @author aaron */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_TOY, extraDocs = {CommandLineGATK.class} ) +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} ) public class GATKPaperGenotyper extends LocusWalker implements TreeReducible { public static final double HUMAN_SNP_HETEROZYGOSITY = 1e-3; diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java index 1e950ece8..fb975c37b 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java @@ -94,7 +94,7 @@ import java.util.*; * separate commands.

        * */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VAREVAL, extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=-50,stop=50)) public class VariantFiltration extends RodWalker { diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java index afc710c9d..31e29fdcb 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java @@ -120,7 +120,7 @@ import java.util.*; * * @author ebanks */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class} ) +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class}) @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.ON_OUTPUT) public class IndelRealigner extends ReadWalker { diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/qc/QCRef.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/qc/QCRef.java index 818489cca..49983241d 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/qc/QCRef.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/qc/QCRef.java @@ -63,7 +63,7 @@ import java.io.PrintStream; * * */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_REFUTILS, extraDocs = {CommandLineGATK.class} ) public class QCRef extends RefWalker { @Output public PrintStream out; diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java index 634566b25..db0bfa087 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java @@ -110,30 +110,27 @@ public class ASEReadCounter extends LocusWalker { /** * If this argument is enabled, loci with total depth lower than this threshold after all filters have been applied - * will be skipped. This is set to -1 by default to disable the evaluation and ignore this threshold. + * will be skipped. This can be set to -1 by default to disable the evaluation and ignore this threshold. */ - @Argument(fullName = "minDepthOfNonFilteredBase", shortName = "minDepth", doc = "Minimum number of bases that pass filters", required = false, minValue = 0, maxValue = Integer.MAX_VALUE) + @Argument(fullName = "minDepthOfNonFilteredBase", shortName = "minDepth", doc = "Minimum number of bases that pass filters", required = false, minValue = -1, maxValue = Integer.MAX_VALUE) public int minDepthOfNonFilteredBases = -1; /** * If this argument is enabled, reads with mapping quality values lower than this threshold will not be counted. - * This is set to -1 by default to disable the evaluation and ignore this threshold. + * This can be set to -1 by default to disable the evaluation and ignore this threshold. */ - @Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum read mapping quality", required = false, minValue = 0, maxValue = Integer.MAX_VALUE) + @Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum read mapping quality", required = false, minValue = -1, maxValue = Integer.MAX_VALUE) public int minMappingQuality = 0; /** * If this argument is enabled, bases with quality scores lower than this threshold will not be counted. - * This is set to -1 by default to disable the evaluation and ignore this threshold. + * This can be set to -1 by default to disable the evaluation and ignore this threshold. */ - @Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum base quality", required = false, minValue = 0, maxValue = Byte.MAX_VALUE) + @Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum base quality", required = false, minValue = -1, maxValue = Byte.MAX_VALUE) public byte minBaseQuality = 0; /** - * These options modify how the tool deals with overlapping read pairs. - * COUNT_READS - Count all reads independently, even if they are from the same fragment. - * COUNT_FRAGMENTS - Count all fragments, even if the reads that compose the fragment are not consistent at that base. - * COUNT_FRAGMENTS_REQUIRE_SAME_BASE - Count all fragments, but only if the reads that compose the fragment are consistent at that base (default). + * These options modify how the tool deals with overlapping read pairs. The default value is COUNT_FRAGMENTS_REQUIRE_SAME_BASE. */ @Argument(fullName = "countOverlapReadsType", shortName = "overlap", doc = "Handling of overlapping reads from the same fragment", required = false) public CoverageUtils.CountPileupType countType = CoverageUtils.CountPileupType.COUNT_FRAGMENTS_REQUIRE_SAME_BASE; diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/varianteval/VariantEval.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/varianteval/VariantEval.java index b1855ae4d..1e97c388a 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/varianteval/VariantEval.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/varianteval/VariantEval.java @@ -138,7 +138,7 @@ import java.util.*; * command line, to rapidly ascertain whether it will work or not.

        * */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VAREVAL, extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=-50, stop=50)) @PartitionBy(PartitionType.NONE) public class VariantEval extends RodWalker implements TreeReducible { diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeConcordance.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeConcordance.java index c495eae7a..ccac49b24 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeConcordance.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeConcordance.java @@ -204,7 +204,7 @@ import java.util.*; * * */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VAREVAL, extraDocs = {CommandLineGATK.class} ) public class GenotypeConcordance extends RodWalker>,ConcordanceMetrics> { /** diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariants.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariants.java index 6c9912f06..ce9e84c0f 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariants.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariants.java @@ -123,7 +123,7 @@ import java.util.*; * * */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VALIDATION, extraDocs = {CommandLineGATK.class} ) +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VAREVAL, extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=0,stop=100)) public class ValidateVariants extends RodWalker { diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/DeprecatedToolChecks.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/DeprecatedToolChecks.java index 9a951f994..00097db21 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/DeprecatedToolChecks.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/DeprecatedToolChecks.java @@ -58,6 +58,7 @@ public class DeprecatedToolChecks { deprecatedGATKWalkers.put("LiftOverVariants","3.5 (use Picard LiftoverVCF instead; see documentation for usage)"); deprecatedGATKWalkers.put("FilterLiftedVariants","3.5 (use Picard LiftoverVCF instead; see documentation for usage)"); deprecatedGATKWalkers.put("ListAnnotations","3.5 (this tool was impractical; see the online documentation instead)"); + deprecatedGATKWalkers.put("GenotypeAndValidate","3.6 (this tool was old and untested -- no direct replacement)"); } diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/sampileup/SAMPileupCodec.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/sampileup/SAMPileupCodec.java index 993aa5fae..3571dde69 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/sampileup/SAMPileupCodec.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/sampileup/SAMPileupCodec.java @@ -37,7 +37,7 @@ import java.util.regex.Pattern; import static org.broadinstitute.gatk.utils.codecs.sampileup.SAMPileupFeature.VariantType; /** - * Decoder for SAM pileup data. + * Decoder for SAM pileup data * *

        * From the SAMTools project documentation: diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/samread/SAMReadCodec.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/samread/SAMReadCodec.java index 1078bf717..d83dee271 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/samread/SAMReadCodec.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/samread/SAMReadCodec.java @@ -34,7 +34,7 @@ import htsjdk.tribble.readers.LineIterator; import htsjdk.tribble.util.ParsingUtils; /** - * Decodes a simple SAM text string. + * Decodes a simple SAM text string * *

        * Reads in the SAM text version of a BAM file as a ROD. For testing only diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/DocumentedGATKFeature.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/DocumentedGATKFeature.java index c824407ff..6dd64fb3c 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/DocumentedGATKFeature.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/DocumentedGATKFeature.java @@ -45,6 +45,4 @@ public @interface DocumentedGATKFeature { public String summary() default ""; /** Are there links to other docs that we should include? CommandLineGATK.class for walkers, for example? */ public Class[] extraDocs() default {}; - /** Who is the go-to developer for operation/documentation issues? */ - public String gotoDev() default "NA"; } diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/DocumentedGATKFeatureObject.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/DocumentedGATKFeatureObject.java index f0b659076..8106caa9a 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/DocumentedGATKFeatureObject.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/DocumentedGATKFeatureObject.java @@ -36,20 +36,19 @@ class DocumentedGATKFeatureObject { private final Class classToDoc; /** Are we enabled? */ private final boolean enable; - private final String groupName, summary, gotoDev; + private final String groupName, summary; private final Class[] extraDocs; - public DocumentedGATKFeatureObject(Class classToDoc, final boolean enable, final String groupName, final String summary, final Class[] extraDocs, final String gotoDev) { + public DocumentedGATKFeatureObject(Class classToDoc, final boolean enable, final String groupName, final String summary, final Class[] extraDocs) { this.classToDoc = classToDoc; this.enable = enable; this.groupName = groupName; this.summary = summary; this.extraDocs = extraDocs; - this.gotoDev = gotoDev; } - public DocumentedGATKFeatureObject(Class classToDoc, final String groupName, final String summary, final String gotoDev) { - this(classToDoc, true, groupName, summary, new Class[]{}, gotoDev); + public DocumentedGATKFeatureObject(Class classToDoc, final String groupName, final String summary) { + this(classToDoc, true, groupName, summary, new Class[]{}); } public Class getClassToDoc() { return classToDoc; } @@ -57,5 +56,4 @@ class DocumentedGATKFeatureObject { public String groupName() { return groupName; } public String summary() { return summary; } public Class[] extraDocs() { return extraDocs; } - public String gotoDev() { return gotoDev; } } diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GATKDocUtils.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GATKDocUtils.java index 608a9803c..7512a0f99 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GATKDocUtils.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GATKDocUtils.java @@ -30,14 +30,6 @@ public class GATKDocUtils { * The URL root for RELEASED GATKDOC units */ public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = HelpConstants.GATK_DOCS_URL; - /** - * The URL root for STABLE GATKDOC units //TODO: do sthing with this or remove -- URL goes nowhere - */ - public final static String URL_ROOT_FOR_STABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/stable/"; - /** - * The URL root for UNSTABLE GATKDOC units //TODO: do sthing with this or remove -- URL goes nowhere - */ - public final static String URL_ROOT_FOR_UNSTABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/unstable/"; /** * Return the filename of the GATKDoc PHP that would be generated for Class. This @@ -59,7 +51,7 @@ public class GATKDocUtils { /** * Returns a full URL http://etc/ linking to the documentation for class (assuming it - * exists). Currently points to the RELEASE doc path only. //TODO: do sthing with other paths or remove ? + * exists). Currently points to the RELEASE doc path only. * * @param c * @return @@ -68,8 +60,6 @@ public class GATKDocUtils { String classPath = phpFilenameForClass(c); StringBuilder b = new StringBuilder(); b.append(URL_ROOT_FOR_RELEASE_GATKDOCS).append(classPath); - //b.append("stable version: ").append(URL_ROOT_FOR_STABLE_GATKDOCS).append(classPath).append("\n"); - //b.append("unstable version: ").append(URL_ROOT_FOR_UNSTABLE_GATKDOCS).append(classPath).append("\n"); return b.toString(); } } \ No newline at end of file diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GATKDoclet.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GATKDoclet.java index 6aaa249f9..e0bd7365e 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GATKDoclet.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GATKDoclet.java @@ -79,9 +79,9 @@ public abstract class GATKDoclet { final private static String FORUM_KEY_PATH = "/local/gsa-engineering/gatkdocs_publisher/forum.key"; - final private static String OUTPUT_FILE_EXTENSION = "php"; + final private static String OUTPUT_FILE_EXTENSION = "html"; - /** Controls the extension of the non-json output files, and also the HREFs to these files. Default: php */ + /** Controls the extension of the non-json output files, and also the HREFs to these files. Default: html */ final private static String OUTPUT_FILE_EXTENSION_OPTION = "-output-file-extension"; // ---------------------------------------------------------------------- // @@ -120,8 +120,7 @@ public abstract class GATKDoclet { static { STATIC_DOCS.add(new DocumentedGATKFeatureObject(FeatureCodec.class, HelpConstants.DOCS_CAT_RODCODECS, - "Tribble codecs for reading reference ordered data (ROD) files such as VCF or BED", - "NA")); + "Codecs for reading resource files in reference ordered data (ROD) files such as BED")); } /** @@ -264,7 +263,7 @@ public abstract class GATKDoclet { List old = ForumAPIUtils.getPostedTools(forumKey); for (String s : old) - System.out.println(s); + //System.out.println(s); System.out.printf("Forum has %d items%n", old.size()); System.out.printf("Docs have %d items%n", docWorkUnits.size()); @@ -354,11 +353,11 @@ public abstract class GATKDoclet { if (docClass.isAnnotationPresent(DocumentedGATKFeature.class)) { DocumentedGATKFeature f = docClass.getAnnotation(DocumentedGATKFeature.class); - return new DocumentedGATKFeatureObject(docClass, f.enable(), f.groupName(), f.summary(), f.extraDocs(), f.gotoDev()); + return new DocumentedGATKFeatureObject(docClass, f.enable(), f.groupName(), f.summary(), f.extraDocs()); } else { for (DocumentedGATKFeatureObject staticDocs : STATIC_DOCS) { if (staticDocs.getClassToDoc().isAssignableFrom(docClass)) { - return new DocumentedGATKFeatureObject(docClass, staticDocs.enable(), staticDocs.groupName(), staticDocs.summary(), staticDocs.extraDocs(), staticDocs.gotoDev()); + return new DocumentedGATKFeatureObject(docClass, staticDocs.enable(), staticDocs.groupName(), staticDocs.summary(), staticDocs.extraDocs()); } } return null; @@ -469,11 +468,12 @@ public abstract class GATKDoclet { if (annotation.groupName().endsWith(" Tools")) supercatValue = "tools"; else if (annotation.groupName().endsWith(" Utilities")) supercatValue = "utilities"; else if (annotation.groupName().startsWith("Engine ")) supercatValue = "engine"; - else if (annotation.groupName().endsWith(" (DevZone)")) supercatValue = "dev"; + else if (annotation.groupName().endsWith(" (Exclude)")) supercatValue = "exclude"; else supercatValue = "other"; - root.put("supercat", supercatValue); - + //if (!supercatValue.contentEquals("exclude")) { + root.put("supercat", supercatValue); + //} return root; } diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GenericDocumentationHandler.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GenericDocumentationHandler.java index a06edb68b..cb959d0a2 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GenericDocumentationHandler.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GenericDocumentationHandler.java @@ -117,8 +117,6 @@ public abstract class GenericDocumentationHandler extends DocumentedGATKFeatureH for (Tag tag : toProcess.classDoc.tags()) { root.put(tag.name(), tag.text()); } - - root.put("gotoDev", toProcess.annotation.gotoDev()); } /** @@ -190,7 +188,6 @@ public abstract class GenericDocumentationHandler extends DocumentedGATKFeatureH argBindings.put("maxValue", "NA"); argBindings.put("minRecValue", "NA"); argBindings.put("maxRecValue", "NA"); - argBindings.put("defaultValue", "NA"); } // Finalize argument bindings args.get(kind).add(argBindings); @@ -276,9 +273,9 @@ public abstract class GenericDocumentationHandler extends DocumentedGATKFeatureH final Object instance = makeInstanceIfPossible(toProcess.clazz); if (instance != null) { final Object value = getFieldValue(instance, argumentSource.field.getName()); - if (value != null) + if (value != null) { return value; - + } if (argumentSource.createsTypeDefault()) { try { // handle the case where there's an implicit default return argumentSource.typeDefaultDocString(); @@ -391,7 +388,7 @@ public abstract class GenericDocumentationHandler extends DocumentedGATKFeatureH } /** - * Pretty prints value + * Pretty prints value TODO I think this is what I need to fix the value problem *

        * Assumes value != null * @@ -512,7 +509,7 @@ public abstract class GenericDocumentationHandler extends DocumentedGATKFeatureH if (field.isAnnotationPresent(ArgumentCollection.class)) { ClassDoc typeDoc = getRootDoc().classNamed(fieldDoc.type().qualifiedTypeName()); if (typeDoc == null) - throw new ReviewedGATKException("Tried to get javadocs for ArgumentCollection field " + fieldDoc + " but could't find the class in the RootDoc"); + throw new ReviewedGATKException("Tried to get javadocs for ArgumentCollection field " + fieldDoc + " but couldn't find the class in the RootDoc"); else { FieldDoc result = getFieldDoc(typeDoc, name, false); if (result != null) @@ -563,7 +560,7 @@ public abstract class GenericDocumentationHandler extends DocumentedGATKFeatureH /** * Returns a human readable string that describes the Type type of a GATK argument. *

        - * This will include parameterized types, so that Set{T} shows up as Set(T) and not + * This will include parametrized types, so that Set{T} shows up as Set(T) and not * just Set in the docs. * * @param type @@ -644,9 +641,7 @@ public abstract class GenericDocumentationHandler extends DocumentedGATKFeatureH FeatureManager manager = new FeatureManager(); List rodTypes = new ArrayList(); for (FeatureManager.FeatureDescriptor descriptor : manager.getByFeature(featureClass)) { - rodTypes.add(String.format("%s", - GATKDocUtils.phpFilenameForClass(descriptor.getCodecClass()), - descriptor.getName())); + rodTypes.add(descriptor.getName()); } root.put("rodTypes", Utils.join(", ", rodTypes)); @@ -658,6 +653,10 @@ public abstract class GenericDocumentationHandler extends DocumentedGATKFeatureH root.put("summary", def.doc != null ? def.doc : ""); root.put("fulltext", fieldDoc.commentText()); + // Does this argument interact with any others? + root.put("otherArgumentRequired", def.otherArgumentRequired != null ? def.otherArgumentRequired : "NA"); + root.put("exclusiveOf", def.otherArgumentRequired != null ? def.exclusiveOf : "NA"); + // What are our enum options? if (def.validOptions != null) { root.put("options", docForEnumArgument(source.field.getType())); diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpConstants.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpConstants.java index 279af20ed..f9a23dd84 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpConstants.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpConstants.java @@ -44,44 +44,21 @@ public class HelpConstants { * The names get parsed to make supercategories in the doc index, * so be careful when making big changes -- see GATKDoclet.java toMap() */ - public final static String DOCS_CAT_DATA = "Sequence Data Processing Tools"; - public final static String DOCS_CAT_QC = "Diagnostics and Quality Control Tools"; public final static String DOCS_CAT_ENGINE = "Engine Parameters (available to all tools)"; - public final static String DOCS_CAT_RF = "Read Filters"; - public final static String DOCS_CAT_REFUTILS = "Reference Utilities"; - public final static String DOCS_CAT_RODCODECS = "ROD Codecs"; - public final static String DOCS_CAT_USRERR = "User Exceptions (DevZone)"; - public final static String DOCS_CAT_VALIDATION = "Validation Utilities"; - public final static String DOCS_CAT_ANNOT = "Variant Annotations"; + public final static String DOCS_CAT_QC = "Diagnostics and Quality Control Tools"; + public final static String DOCS_CAT_DATA = "Sequence Data Processing Tools"; public final static String DOCS_CAT_VARDISC = "Variant Discovery Tools"; - public final static String DOCS_CAT_VARMANIP = "Variant Evaluation and Manipulation Tools"; - public final static String DOCS_CAT_TOY = "Toy Walkers (DevZone)"; - public final static String DOCS_CAT_HELPUTILS = "Help Utilities"; - - public static String forumPost(String post) { - return GATK_FORUM_URL + post; - } + public final static String DOCS_CAT_VAREVAL = "Variant Evaluation Tools"; + public final static String DOCS_CAT_VARMANIP = "Variant Manipulation Tools"; + public final static String DOCS_CAT_ANNOT = "Annotation Modules"; + public final static String DOCS_CAT_RF = "Read Filters"; + public final static String DOCS_CAT_RODCODECS = "Resource File Codecs"; + public final static String DOCS_CAT_REFUTILS = "Reference Utilities"; + public final static String DOCS_CAT_USRERR = "User Exceptions (Exclude)"; + public final static String DOCS_CAT_TOY = "Toy Examples (Exclude)"; public static String articlePost(Integer id) { return GATK_ARTICLE_URL + "?id=" + id.toString(); } - /** - * Go-to developer name codes for tracking and display purposes. Only current team members should be in this list. - * When someone leaves, their charges should be redistributed. The actual string should be closest to the dev's - * abbreviated name or two/three-letter nickname as possible. The code can be something else if necessary to - * disambiguate from other variable. - */ - public final static String MC = "MC"; // Mauricio Carneiro - public final static String EB = "EB"; // Eric Banks - public final static String RP = "RP"; // Ryan Poplin - public final static String GVDA = "GG"; // Geraldine Van der Auwera - public final static String VRR = "VRR"; // Valentin Ruano-Rubio - public final static String ALM = "ALM"; // Ami Levy-Moonshine - public final static String BH = "BH"; // Bertrand Haas - public final static String JoT = "JT"; // Joel Thibault - public final static String DR = "DR"; // David Roazen - public final static String KS = "KS"; // Khalid Shakir - - } \ No newline at end of file diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpFormatter.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpFormatter.java index f613b9431..e553ba02d 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpFormatter.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpFormatter.java @@ -204,6 +204,7 @@ public class HelpFormatter { builder.append(Utils.join("|",argumentDefinition.validOptions)); builder.append(")"); } + return builder.toString(); } diff --git a/settings/helpTemplates/common.html b/settings/helpTemplates/common.html index 54001626d..20e144400 100644 --- a/settings/helpTemplates/common.html +++ b/settings/helpTemplates/common.html @@ -1,30 +1,7 @@ - - <#-- - This file contains all the theming neccesary to present GATKDocs on the GATK website - Included are the paths to our bootstrap assets as well as helper functions to generate relevant links + This file contains part of the theming neccesary to present GATKDocs on the GATK website. Included are the + paths to our bootstrap assets as well as helper functions to generate relevant links. Styling is separated + out, so pages will be minimalistic html unless replacement styling is provided. --> @@ -37,8 +14,8 @@

        Return to top


        See also - Guide Index | - Tool Documentation Index | + GATK Documentation Index | + Tool Docs Index | Support Forum

        @@ -62,7 +39,7 @@ }