From 9e7bf75e8926283a77701169b38ef97faeddc02a Mon Sep 17 00:00:00 2001 From: Valentin Ruano-Rubio Date: Fri, 24 Jan 2014 13:27:51 -0500 Subject: [PATCH 1/2] Fix for the PairHMM transition probability miscalculation. Problem: matchToMatch transition calculation was wrong resulting in transition probabilites coming out of the Match state that added more than 1. Reports: https://www.pivotaltracker.com/s/projects/793457/stories/62471780 https://www.pivotaltracker.com/s/projects/793457/stories/61082450 Changes: The transition matrix update code has been moved to a common place in PairHMMModel to dry out its multiple copies. MatchToMatch transtion calculation has been fixed and implemented in PairHMMModel. Affected integration test md5 have been updated, there were no differences in GT fields and example differences always implied small changes in likelihoods that is what is expected. --- .../utils/pairhmm/ArrayLoglessPairHMM.java | 52 +-- .../utils/pairhmm/FastLoglessPairHMM.java | 2 + .../sting/utils/pairhmm/LoglessPairHMM.java | 24 +- .../VariantAnnotatorIntegrationTest.java | 2 +- ...perGeneralPloidySuite1IntegrationTest.java | 2 +- ...perGeneralPloidySuite2IntegrationTest.java | 2 +- ...dGenotyperIndelCallingIntegrationTest.java | 9 +- ...GenotyperNormalCallingIntegrationTest.java | 6 +- ...dGenotyperReducedReadsIntegrationTest.java | 2 +- ...lexAndSymbolicVariantsIntegrationTest.java | 4 +- .../HaplotypeCallerIntegrationTest.java | 8 +- .../NanoSchedulerIntegrationTest.java | 2 +- .../utils/pairhmm/PairHMMModelUnitTest.java | 337 ++++++++++++++ .../sting/utils/QualityUtils.java | 16 +- .../sting/utils/pairhmm/Log10PairHMM.java | 22 +- .../sting/utils/pairhmm/N2MemoryPairHMM.java | 2 +- .../sting/utils/pairhmm/PairHMM.java | 6 +- .../sting/utils/pairhmm/PairHMMModel.java | 437 ++++++++++++++++++ .../org/broadinstitute/sting/BaseTest.java | 34 +- .../sting/utils/QualityUtilsUnitTest.java | 7 +- 20 files changed, 865 insertions(+), 111 deletions(-) create mode 100644 protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMModelUnitTest.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMMModel.java diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/ArrayLoglessPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/ArrayLoglessPairHMM.java index a693ec22d..e818c9899 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/ArrayLoglessPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/ArrayLoglessPairHMM.java @@ -1,45 +1,45 @@ /* * By downloading the PROGRAM you agree to the following terms of use: -* +* * BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* +* * This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). -* +* * WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and * WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. * NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* +* * 1. DEFINITIONS * 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. -* +* * 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. * The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. * 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY * LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. * Copyright 2012 Broad Institute, Inc. * Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. * LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* +* * 4. INDEMNIFICATION * LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* +* * 5. NO REPRESENTATIONS OR WARRANTIES * THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. * IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* +* * 6. ASSIGNMENT * This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* +* * 7. MISCELLANEOUS * 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. * 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. * 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. * 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. * 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. */ @@ -52,6 +52,8 @@ import org.broadinstitute.sting.utils.QualityUtils; import java.util.Arrays; +import static org.broadinstitute.sting.utils.pairhmm.PairHMMModel.*; + /** * Created with IntelliJ IDEA. * User: bradt @@ -64,13 +66,6 @@ public class ArrayLoglessPairHMM extends PairHMM { // we divide e by 3 because the observed base could have come from any of the non-observed alleles protected static final double TRISTATE_CORRECTION = 3.0; - private static final int matchToMatch = 0; - private static final int indelToMatch = 1; - private static final int matchToInsertion = 2; - private static final int insertionToInsertion = 3; - private static final int matchToDeletion = 4; - private static final int deletionToDeletion = 5; - protected double[][] transition = null; // The transition probabilities cache protected double[][] prior = null; // The prior probabilities cache @@ -106,7 +101,7 @@ public class ArrayLoglessPairHMM extends PairHMM { public void initialize(final int readMaxLength, final int haplotypeMaxLength ) { super.initialize(readMaxLength, haplotypeMaxLength); - transition = new double[paddedMaxReadLength][6]; + transition = PairHMMModel.createTransitionMatrix(maxReadLength); prior = new double[paddedMaxReadLength][paddedMaxHaplotypeLength]; // Initialize all arrays @@ -131,7 +126,6 @@ public class ArrayLoglessPairHMM extends PairHMM { nextMatchCacheArray = new double[paddedMaxReadLength]; nextDeleteCacheArray = new double[paddedMaxReadLength]; nextInsertCacheArray = new double [paddedMaxReadLength]; - } @@ -260,15 +254,7 @@ public class ArrayLoglessPairHMM extends PairHMM { }) @Ensures("constantsAreInitialized") protected static void initializeProbabilities(final double[][] transition, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP) { - for (int i = 0; i < insertionGOP.length; i++) { - final int qualIndexGOP = Math.min(insertionGOP[i] + deletionGOP[i], Byte.MAX_VALUE); - transition[i+1][matchToMatch] = QualityUtils.qualToProb((byte) qualIndexGOP); - transition[i+1][indelToMatch] = QualityUtils.qualToProb(overallGCP[i]); - transition[i+1][matchToInsertion] = QualityUtils.qualToErrorProb(insertionGOP[i]); - transition[i+1][insertionToInsertion] = QualityUtils.qualToErrorProb(overallGCP[i]); - transition[i+1][matchToDeletion] = QualityUtils.qualToErrorProb(deletionGOP[i]); - transition[i+1][deletionToDeletion] = QualityUtils.qualToErrorProb(overallGCP[i]); - } + PairHMMModel.qualToTransProbs(transition,insertionGOP,deletionGOP,overallGCP); } /** diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/FastLoglessPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/FastLoglessPairHMM.java index fb9dda8b2..72d5c9472 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/FastLoglessPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/FastLoglessPairHMM.java @@ -54,6 +54,8 @@ import java.util.Arrays; import java.util.HashMap; import java.util.Map; +import static org.broadinstitute.sting.utils.pairhmm.PairHMMModel.*; + /** * Fast partial PairHMM backed on the standard Logless PairHMM * diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java index 0725e24b4..ed35e6970 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java @@ -50,6 +50,9 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.QualityUtils; +import static org.broadinstitute.sting.utils.pairhmm.PairHMMModel.*; + + /** * Created with IntelliJ IDEA. * User: rpoplin, carneiro @@ -62,12 +65,6 @@ public class LoglessPairHMM extends N2MemoryPairHMM { // we divide e by 3 because the observed base could have come from any of the non-observed alleles protected static final double TRISTATE_CORRECTION = 3.0; - protected static final int matchToMatch = 0; - protected static final int indelToMatch = 1; - protected static final int matchToInsertion = 2; - protected static final int insertionToInsertion = 3; - protected static final int matchToDeletion = 4; - protected static final int deletionToDeletion = 5; /** @@ -158,20 +155,7 @@ public class LoglessPairHMM extends N2MemoryPairHMM { }) @Ensures("constantsAreInitialized") protected static void initializeProbabilities(final double[][] transition, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP) { - for (int i = 0; i < insertionGOP.length; i++) { - final int qualIndexGOP = Math.min(insertionGOP[i] + deletionGOP[i], Byte.MAX_VALUE); - transition[i+1][matchToMatch] = QualityUtils.qualToProb((byte) qualIndexGOP); - transition[i+1][indelToMatch] = QualityUtils.qualToProb(overallGCP[i]); - transition[i+1][matchToInsertion] = QualityUtils.qualToErrorProb(insertionGOP[i]); - transition[i+1][insertionToInsertion] = QualityUtils.qualToErrorProb(overallGCP[i]); - transition[i+1][matchToDeletion] = QualityUtils.qualToErrorProb(deletionGOP[i]); - transition[i+1][deletionToDeletion] = QualityUtils.qualToErrorProb(overallGCP[i]); - //TODO it seems that it is not always the case that matchToMatch + matchToDeletion + matchToInsertion == 1. - //TODO We have detected cases of 1.00002 which can cause problems downstream. This are typically masked - //TODO by the fact that we always add a indelToMatch penalty to all PairHMM likelihoods (~ -0.1) - //TODO This is in fact not well justified and although it does not have any effect (since is equally added to all - //TODO haplotypes likelihoods) perhaps we should just remove it eventually and fix this != 1.0 issue here. - } + PairHMMModel.qualToTransProbs(transition,insertionGOP,deletionGOP,overallGCP); } /** diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index cf22d941d..d4a909821 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -358,7 +358,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { final File outputVCFNoQD = executeTest("testQualByDepth calling without QD", specNoQD).getFirst().get(0); final String baseAnn = String.format("-T VariantAnnotator -R %s -V %s", REF, outputVCFNoQD.getAbsolutePath()) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -A QualByDepth"; - final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("b171258ed3ef5ae0d6c21fe04e5940fc")); + final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("78b8b498fdc34e59208150caacb25b1c")); specAnn.disableShadowBCF(); final File outputVCFAnn = executeTest("testQualByDepth re-annotation of QD", specAnn).getFirst().get(0); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index 5a16837f1..d1f13143f 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "2787064918c7b391071a6ad4e5b0aba8"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "d38b9223a3234af4cd3aec245c72fb53"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index 4c8c12887..6b4b9e8e4 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -58,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","50ebb7f74e5618acdd014dd87f2363fc"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","49f27dae0a86351128db87923735cb10"); } @Test(enabled = true) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index deb0289c9..d2f838779 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -73,8 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("1ad3943ae27a0062c52a19abe1c0d32c")); - + Arrays.asList("8a4de9e1f59cffe80a4372cf02fe809e")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -101,7 +100,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("8a0a751afdb2a8166432d9822e4d814c")); + Arrays.asList("2b92df91a9337b9d9f03db5699bb41f2")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -121,7 +120,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("01fec03933816e8d82aabe6e5b276dd5")); + Arrays.asList("d3d56be9e804132a8d085b5d0acb49f1")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -136,7 +135,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, - Arrays.asList("e3c95f745ebf2d4f26759878966c5280")); + Arrays.asList("505a0dfa1ec335af6850654f926ec051")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index 47ef49845..471c1af98 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("dd5ad3beaa75319bb2ef1434d2dd9f73")); + Arrays.asList("e7cb959912ea964bf9c897904aa5220b")); executeTest("test Multiple SNP alleles", spec); } @@ -112,7 +112,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("02c7804c8013ba1ead8e02b956b5e454")); + Arrays.asList("bc5a143868e3ad3acc9bb7c09798cdf2")); executeTest("test reverse trim", spec); } @@ -120,7 +120,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMismatchedPLs() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, - Arrays.asList("8d91d98c4e79897690d3c6918b6ac761")); + Arrays.asList("f29b3fa9d5642297cfc4b10aa2137c68")); executeTest("test mismatched PLs", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java index 0a54acbe4..eae37f142 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java @@ -74,7 +74,7 @@ public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest { @Test public void testReducedBamINDELs() { - testReducedCalling("INDEL", "d593628b2bc144e987a9e75e5eee0001"); + testReducedCalling("INDEL", "0281c3f46f7b1989c37b52ab7e337293"); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 2ea525108..8d67b3baf 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "b5cd204b9dd6a5210b49d91186cf2b1d"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "e966ca14532ae80fe5d8898a1a7b4e74"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -94,6 +94,6 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "ccc3864207d700c00238066ec5ae33c5"); + "f50e0b35e2240b19b1b8b6dfa0cf9796"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 74149e8cf..4a88bc406 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -104,7 +104,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerGraphBasedMultiSample() { - HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "8b75034f9aa8435962da98eb521c8f0b"); + HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "6a89f40fbeec05481fa1f2bf16289d5d"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -115,7 +115,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "fc271f21c0693e4fa6394e27d722fb52"); + "f62e874e2405689784764095b6abd1a7"); } @Test @@ -298,7 +298,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestAggressivePcrIndelModelWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1, - Arrays.asList("ee73759f4372df678e7aa97346d87a70")); + Arrays.asList("69bbadca5beb8202a77815daaa49e634")); executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec); } @@ -306,7 +306,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestConservativePcrIndelModelWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1, - Arrays.asList("a9fa660910bf5e35267475f3b2d75766")); + Arrays.asList("061a5a9bde0739fe58b314bf8bf8eee3")); executeTest("HC calling with conservative indel error modeling on WGS intervals", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java index 337f23afe..489eff0bc 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java @@ -67,7 +67,7 @@ public class NanoSchedulerIntegrationTest extends WalkerTest { for ( final int nct : Arrays.asList(1, 2) ) { // tests.add(new Object[]{ "SNP", "a1c7546f32a8919a3f3a70a04b2e8322", nt, nct }); //// tests.add(new Object[]{ "INDEL", "0a6d2be79f4f8a4b0eb788cc4751b31b", nt, nct }); - tests.add(new Object[]{ "BOTH", "a80925b58735828158491f77ae64998b", nt, nct }); + tests.add(new Object[]{ "BOTH", "392dc99dc279082fc6e729b249adfa2b", nt, nct }); } return tests.toArray(new Object[][]{}); diff --git a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMModelUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMModelUnitTest.java new file mode 100644 index 000000000..8c54326db --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMModelUnitTest.java @@ -0,0 +1,337 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.sting.utils.pairhmm; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.QualityUtils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.Iterator; + + +/** + * Unit tests for {@link PairHMMModel} + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class PairHMMModelUnitTest extends BaseTest { + + final double TOLERANCE = 1E-9; + + @Test(dataProvider="qualToProbsDataProvider") + public void testQualToProbs(final int insQual, final int delQual, final int gcp, final double[] expected) { + final double[] actual = PairHMMModel.qualToTransProbs((byte)insQual,(byte)delQual,(byte)gcp); + Assert.assertNotNull(actual); + Assert.assertEquals(actual.length, PairHMMModel.TRANS_PROB_ARRAY_LENGTH); + assertEqualsDoubleArray(actual,expected,TOLERANCE); + Assert.assertEquals(actual.length, PairHMMModel.TRANS_PROB_ARRAY_LENGTH); + } + + @Test(dataProvider="qualToProbsDataProvider") + public void testQualToProbsLog10(final int insQuals, final int delQual, final int gcp, final double[] expected) { + final double[] logExpected = new double[expected.length]; + for (int i = 0; i < logExpected.length; i++) + logExpected[i] = Math.log10(expected[i]); + final double[] actual = PairHMMModel.qualToTransProbsLog10((byte)insQuals,(byte)delQual,(byte)gcp); + Assert.assertNotNull(actual); + Assert.assertEquals(actual.length, PairHMMModel.TRANS_PROB_ARRAY_LENGTH); + assertEqualsDoubleArray(actual,logExpected,TOLERANCE); + } + + @Test(dataProvider="qualToProbsDataProvider") + public void testQualToProbsFill(final int insQual, final int delQual, final int gcp, final double[] expected) { + final double[] actual = new double[PairHMMModel.TRANS_PROB_ARRAY_LENGTH]; + PairHMMModel.qualToTransProbs(actual, (byte) insQual, (byte) delQual, (byte) gcp); + assertEqualsDoubleArray(actual,expected,TOLERANCE); + } + + @Test(dataProvider="qualToTransDataProvider") + public void testQualsToTransProbs(final byte[] insQuals, final byte[] delQuals, final byte[] gapQuals, final double[][] expected) { + final double[][] actual = PairHMMModel.qualToTransProbs(insQuals,delQuals,gapQuals); + Assert.assertNotNull(actual); + Assert.assertEquals(actual.length,expected.length); + Assert.assertNotNull(actual[0]); + Assert.assertEquals(actual[0].length,expected[0].length); + for (int i = 0; i < actual.length ; i++) + assertEqualsDoubleArray(actual[i],expected[i],TOLERANCE); + } + + @Test(dataProvider="qualToTransDataProvider") + public void testQualsToTransProbsLog10(final byte[] insQuals, final byte[] delQuals, final byte[] gapQuals, final double[][] expected) { + final double[][] actual = PairHMMModel.qualToTransProbsLog10(insQuals,delQuals,gapQuals); + final double[][] logExpected = new double[expected.length][expected[0].length]; + for (int i = 1; i < expected.length; i++) + for (int j = 0; j < expected[0].length; j++) + logExpected[i][j] = Math.log10(expected[i][j]); + Assert.assertNotNull(actual); + Assert.assertEquals(actual.length,logExpected.length); + Assert.assertNotNull(actual[0]); + Assert.assertEquals(actual[0].length,logExpected[0].length); + for (int i = 0; i < actual.length ; i++) + assertEqualsDoubleArray(actual[i],logExpected[i],TOLERANCE); + } + + @Test(dataProvider="qualToTransDataProvider") + public void testQualsToTransProbsLog10Fill(final byte[] insQuals, final byte[] delQuals, final byte[] gapQuals, final double[][] expected) { + final double[][] actual = PairHMMModel.createTransitionMatrix(insQuals.length); + PairHMMModel.qualToTransProbsLog10(actual,insQuals,delQuals,gapQuals); + final double[][] logExpected = new double[expected.length][expected[0].length]; + for (int i = 1; i < expected.length; i++) + for (int j = 0; j < expected[0].length; j++) + logExpected[i][j] = Math.log10(expected[i][j]); + Assert.assertNotNull(actual); + Assert.assertEquals(actual.length,logExpected.length); + Assert.assertNotNull(actual[0]); + Assert.assertEquals(actual[0].length,logExpected[0].length); + for (int i = 0; i < actual.length ; i++) + assertEqualsDoubleArray(actual[i],logExpected[i],TOLERANCE); + } + + @Test(dataProvider="qualToTransDataProvider") + public void testQualsToTransProbsFill(final byte[] insQuals, final byte[] delQuals, final byte[] gapQuals, final double[][] expected) { + final double[][] actual = PairHMMModel.createTransitionMatrix(insQuals.length); + PairHMMModel.qualToTransProbs(actual,insQuals,delQuals,gapQuals); + Assert.assertNotNull(actual); + Assert.assertEquals(actual.length,expected.length); + Assert.assertNotNull(actual[0]); + Assert.assertEquals(actual[0].length,expected[0].length); + for (int i = 0; i < actual.length ; i++) + assertEqualsDoubleArray(actual[i],expected[i],TOLERANCE); + } + @Test(dataProvider="qualToProbsDataProvider") + public void testQualToProbsLog10Fill(final int insQuals, final int delQual, final int gcp, final double[] expected) { + final double[] logExpected = new double[expected.length]; + for (int i = 0; i < logExpected.length; i++) + logExpected[i] = Math.log10(expected[i]); + final double[] actual = new double[PairHMMModel.TRANS_PROB_ARRAY_LENGTH]; + PairHMMModel.qualToTransProbsLog10(actual, (byte) insQuals, (byte) delQual, (byte) gcp); + assertEqualsDoubleArray(actual,logExpected,TOLERANCE); + } + + + @DataProvider(name="qualToTransDataProvider") + public Iterator qualToTransDataProvider() { + return new Iterator() { + + private final Iterator readLengthIterator = readLengthIterator(); + private Iterator qualsIterator = qualIterator(); + + @Override + public boolean hasNext() { + return readLengthIterator.hasNext(); + } + + @Override + public Object[] next() { + final int readLength = readLengthIterator.next(); + double[][] matrix = new double[readLength+1][PairHMMModel.TRANS_PROB_ARRAY_LENGTH]; + final byte[] insQuals = new byte[readLength]; + final byte[] delQuals = new byte[readLength]; + final byte[] gapQuals = new byte[readLength]; + for (int i = 0; i < readLength; i++) { + if (!qualsIterator.hasNext()) + qualsIterator = qualIterator(); + final int[] quals = qualsIterator.next(); + final int insQual = quals[0]; + final int delQual = quals[1]; + final int gapQual = quals[2]; + final double[] trans = qualsToProbs(insQual, delQual, gapQual); + matrix[i+1] = trans; + insQuals[i] = (byte)insQual; + delQuals[i] = (byte)delQual; + gapQuals[i] = (byte)gapQual; + } + + return new Object[] { insQuals, delQuals, gapQuals, matrix }; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + + + @DataProvider(name="qualToProbsDataProvider") + public Iterator qualToProbsDataProvider() { + return new Iterator() { + private final Iterator qualsIterator = qualIterator(); + + @Override + public boolean hasNext() { + return qualsIterator.hasNext(); + } + + @Override + public Object[] next() { + final int[] quals = qualsIterator.next(); + final int insQual = quals[0]; + final int delQual = quals[1]; + final int gapQual = quals[2]; + + final double[] trans = qualsToProbs(insQual, delQual, gapQual); + + + return new Object[] { insQual, delQual, gapQual, trans }; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + + private double[] qualsToProbs(final int insQual, final int delQual, final int gapQual) { + final double[] trans = new double[PairHMMModel.TRANS_PROB_ARRAY_LENGTH]; + final double matchToMatch = PairHMMModel.matchToMatchProb(insQual, delQual); + final double matchToInsert = QualityUtils.qualToErrorProb(insQual); + final double matchToDeletion = QualityUtils.qualToErrorProb(delQual); + final double indelToMatch = QualityUtils.qualToProb(gapQual); + final double indelToIndel = QualityUtils.qualToErrorProb(gapQual); + + trans[PairHMMModel.matchToMatch] = matchToMatch; + trans[PairHMMModel.matchToInsertion] = matchToInsert; + trans[PairHMMModel.matchToDeletion] = matchToDeletion; + trans[PairHMMModel.indelToMatch] = indelToMatch; + trans[PairHMMModel.deletionToDeletion] = trans[PairHMMModel.insertionToInsertion] = indelToIndel; + return trans; + } + + private Iterator readLengthIterator() { + return Arrays.asList(READ_LENGTHS).iterator(); + } + + private Iterator qualIterator() { + final int totalCount = INS_QUALS.length * DEL_QUALS.length * GAP_QUALS.length; + + return new Iterator() { + + private int i = 0; + + @Override + public boolean hasNext() { + return i < totalCount; + } + + @Override + public int[] next() { + final int gap = i % GAP_QUALS.length; + final int indelGroup = i / GAP_QUALS.length; + final int del = indelGroup % DEL_QUALS.length; + final int ins = indelGroup % DEL_QUALS.length; + i++; + return new int[] { INS_QUALS[ins], DEL_QUALS[del], GAP_QUALS[gap]}; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + + + + @Test(dataProvider = "dualTestDataProvider") + public void testDoubleQualToProb(final int insQual, final int delQual, final double log10Expected, final double expected) { + Assert.assertEquals(PairHMMModel.matchToMatchProb(insQual, delQual),expected,TOLERANCE); + Assert.assertEquals(PairHMMModel.matchToMatchProbLog10(insQual, delQual),log10Expected,TOLERANCE); + Assert.assertEquals(PairHMMModel.matchToMatchProb((byte) insQual, (byte) delQual),expected,TOLERANCE); + Assert.assertEquals(PairHMMModel.matchToMatchProbLog10((byte) insQual, (byte) delQual),log10Expected,TOLERANCE); + } + + @DataProvider(name = "dualTestDataProvider") + private Iterator dualTestDataProvider() { + final int[] testQuals = new int[] { 0, 1, 2, 5, 10, 13, 17, 20, 23, 27, 30, 43, 57, 70, 100, 200, 254}; + + return new Iterator() { + private int i = 0; + private int j = 0; + + @Override + public Object[] next() { + + final int qual1 = testQuals[i]; + final int qual2 = testQuals[j]; + + final double errorProb1 = Math.pow(10,- 0.1 * qual1); + final double errorProb2 = Math.pow(10,- 0.1 * qual2); + final double expected = Math.max(0, (1 - (errorProb1 + errorProb2))); + final Object[] result = new Object[] { qual1, qual2,Math.log10(Math.min(1,expected)),Math.min(1, expected)}; + + if (++j >= testQuals.length) { + i++; + j = i; + } + return result; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean hasNext() { + return i < testQuals.length; + } + }; + } + + + private static int[] INS_QUALS = {30, 45, 20, 10, 5, 60, 123 }; + + private static int[] DEL_QUALS = {30, 45, 20, 10, 5, 60, 123 }; + + private static int[] GAP_QUALS = {10, 20, 5}; + + private static Integer[] READ_LENGTHS = { 0, 1, 5, 20, 100, 250}; +} diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java index c0d1df09d..543923dd6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java @@ -57,15 +57,20 @@ public class QualityUtils { public final static byte MIN_USABLE_Q_SCORE = 6; public final static int MAPPING_QUALITY_UNAVAILABLE = 255; + /** + * Maximum sense quality value. + */ + public static final int MAX_QUAL = 254; + /** * Cached values for qual as byte calculations so they are very fast */ - private static double qualToErrorProbCache[] = new double[256]; - private static double qualToProbLog10Cache[] = new double[256]; + private static double qualToErrorProbCache[] = new double[MAX_QUAL + 1]; + private static double qualToProbLog10Cache[] = new double[MAX_QUAL + 1]; static { - for (int i = 0; i < 256; i++) { + for (int i = 0; i <= MAX_QUAL; i++) { qualToErrorProbCache[i] = qualToErrorProb((double) i); qualToProbLog10Cache[i] = Math.log10(1.0 - qualToErrorProbCache[i]); } @@ -386,4 +391,7 @@ public class QualityUtils { public static byte boundQual(final int qual, final byte maxQual) { return (byte) (Math.max(Math.min(qual, maxQual & 0xFF), 1) & 0xFF); } -} + + } + + diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java index b83a15d6d..0ee08e560 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.QualityUtils; import java.util.Arrays; import static java.lang.Math.log10; +import static org.broadinstitute.sting.utils.pairhmm.PairHMMModel.*; /** * Util class for performing the pair HMM for local alignment. Figure 4.3 in Durbin 1998 book. @@ -46,12 +47,6 @@ public class Log10PairHMM extends N2MemoryPairHMM { */ private final boolean doExactLog10; - protected static final int matchToMatch = 0; - protected static final int indelToMatch = 1; - protected static final int matchToInsertion = 2; - protected static final int insertionToInsertion = 3; - protected static final int matchToDeletion = 4; - protected static final int deletionToDeletion = 5; // we divide e by 3 because the observed base could have come from any of the non-observed alleles protected final static double log10_3 = log10(3.0); @@ -120,9 +115,7 @@ public class Log10PairHMM extends N2MemoryPairHMM { // final probability is the log10 sum of the last element in the Match and Insertion state arrays // this way we ignore all paths that ended in deletions! (huge) // but we have to sum all the paths ending in the M and I matrices, because they're no longer extended. - double finalSumProbabilities = finalLikelihoodCalculation(); - - return finalSumProbabilities; + return finalLikelihoodCalculation(); } protected void initializeMatrixValues(final byte[] haplotypeBases) { @@ -180,16 +173,7 @@ public class Log10PairHMM extends N2MemoryPairHMM { }) @Ensures("constantsAreInitialized") protected void initializeProbabilities(final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP) { - for (int i = 0; i < insertionGOP.length; i++) { - final int qualIndexGOP = Math.min(insertionGOP[i] + deletionGOP[i], Byte.MAX_VALUE); - transition[i+1][matchToMatch] = QualityUtils.qualToProbLog10((byte) qualIndexGOP); - transition[i+1][indelToMatch] = QualityUtils.qualToProbLog10(overallGCP[i]); - transition[i+1][matchToInsertion] = QualityUtils.qualToErrorProbLog10(insertionGOP[i]); - transition[i+1][insertionToInsertion] = QualityUtils.qualToErrorProbLog10(overallGCP[i]); - transition[i+1][matchToDeletion] = QualityUtils.qualToErrorProbLog10(deletionGOP[i]); - transition[i+1][deletionToDeletion] = QualityUtils.qualToErrorProbLog10(overallGCP[i]); - } - + PairHMMModel.qualToTransProbsLog10(transition,insertionGOP,deletionGOP,overallGCP); // note that we initialized the constants constantsAreInitialized = true; } diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java index 18cb9054b..057c67a55 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java @@ -62,7 +62,7 @@ abstract class N2MemoryPairHMM extends PairHMM { insertionMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength]; deletionMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength]; - transition = new double[paddedMaxReadLength][6]; + transition = PairHMMModel.createTransitionMatrix(maxReadLength); prior = new double[paddedMaxReadLength][paddedMaxHaplotypeLength]; } diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java index ff883c5ae..5762b33ba 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java @@ -218,8 +218,10 @@ public abstract class PairHMM { double result = subComputeReadLikelihoodGivenHaplotypeLog10(haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP, hapStartIndex, recacheReadValues, nextHapStartIndex); - if ( ! MathUtils.goodLog10Probability(result) ) - throw new IllegalStateException("PairHMM Log Probability cannot be greater than 0: " + String.format("haplotype: %s, read: %s, result: %f", Arrays.toString(haplotypeBases), Arrays.toString(readBases), result)); + if ( result > 0.0) + throw new IllegalStateException("PairHMM Log Probability cannot be greater than 0: " + String.format("haplotype: %s, read: %s, result: %f, PairHMM: %s", new String(haplotypeBases), new String(readBases), result, this.getClass().getSimpleName())); + else if (!MathUtils.goodLog10Probability(result)) + throw new IllegalStateException("Invalid Log Probability: " + result); // Warning: Careful if using the PairHMM in parallel! (this update has to be taken care of). // Warning: This assumes no downstream modification of the haplotype bases (saves us from copying the array). It is okay for the haplotype caller and the Unified Genotyper. diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMMModel.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMMModel.java new file mode 100644 index 000000000..0b79c044f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMMModel.java @@ -0,0 +1,437 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.utils.pairhmm; + +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; + +/** + * Helper class that implement calculations required to implement the PairHMM Finite State Automation (FSA) model. + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class PairHMMModel { + + + /** + * Prevents instantiation of this class + */ + private PairHMMModel() { + + } + + /** + * Length of the standard transition probability array. + */ + public static final int TRANS_PROB_ARRAY_LENGTH = 6; + + /** + * Position in the transition probability array for the Match-to-Match transition. + */ + public static final int matchToMatch = 0; + + /** + * Position in the transition probability array for the Indel-to-Match transition. + */ + public static final int indelToMatch = 1; + + /** + * Position in the transition probability array for the Match-to-Insertion transition. + */ + public static final int matchToInsertion = 2; + + /** + * Position in the transition probability array for the Insertion-to-Insertion transition. + */ + public static final int insertionToInsertion = 3; + + /** + * Position in the transition probability array for the Match-to-Deletion transition. + */ + public static final int matchToDeletion = 4; + + /** + * Position in the transition probability array for the Deletion-to-Deletion transition. + */ + public static final int deletionToDeletion = 5; + + /** + * Convenient ln10 constant. + */ + private static double LN10 = Math.log(10); + + /** + * Convenient (ln10)^-1 constant. + */ + private static double INV_LN10 = 1.0 / LN10; + + /** + * Holds pre-calculated the matchToMath probability values in linear scale. + * + *

+ * This is a triangular matrix stored in a unidimentional array like so: + *

+ * (0,0), (0,1), (1,1), (0,2), (1,2), (2,2), (0,3) ... ({@link QualityUtils#MAX_QUAL},{@link QualityUtils#MAX_QUAL}) + */ + private static double[] matchToMatchProb = new double[((QualityUtils.MAX_QUAL + 1) * (QualityUtils.MAX_QUAL + 2)) >> 1]; + + /** + * Holds pre-calculated the matchToMath probability values in log10 scale. + * + *

+ * This is a triangular matrix stored in a unidimentional array like so: + *

+ * (0,0), (0,1), (1,1), (0,2), (1,2), (2,2), (0,3) ... ({@link QualityUtils#MAX_QUAL},{@link QualityUtils#MAX_QUAL}) + */ + private static double[] matchToMatchLog10 = new double[((QualityUtils.MAX_QUAL + 1) * (QualityUtils.MAX_QUAL + 2)) >> 1]; + + /** + * Initialize matchToMatch cache tables {@link #matchToMatch} and {@link #matchToMatchLog10} + */ + static { + for (int i = 0, offset = 0; i <= QualityUtils.MAX_QUAL; offset += ++i) + for (int j = 0; j <= i; j++) { + final double log10Sum = MathUtils.approximateLog10SumLog10(-0.1 * i,-0.1 * j); + matchToMatchLog10[offset + j] = + Math.log1p( - Math.min(1,Math.pow(10,log10Sum))) * INV_LN10; + matchToMatchProb[offset + j] = Math.pow(10,matchToMatchLog10[offset + j]); + } + } + + /** + * Fills a transition probability array given the different quality scores affecting a read site + * + * @param insQual the insertion quality score as a byte. + * @param delQual the deletion quality score as a byte. + * @param gcp the gap-continuation-penalty score as a byte. + * + * @throws NullPointerException if {@code dest} is {@code null}. + * @throws ArrayIndexOutOfBoundsException if {@code dest} is not large enough. + * @throws IllegalArgumentException if {@code insQual}, {@code delQual} or {@code gcp} is less than negative. + */ + public static void qualToTransProbs(final double[] dest, final byte insQual, final byte delQual, final byte gcp) { + if (insQual < 0) throw new IllegalArgumentException("insert quality cannot less than 0: " + insQual); + if (delQual < 0) throw new IllegalArgumentException("deletion quality cannot be less than 0: " + delQual); + if (gcp < 0) throw new IllegalArgumentException("gcp cannot be less than 0: " + gcp); + dest[matchToMatch] = matchToMatchProb(insQual, delQual); + dest[matchToInsertion] = QualityUtils.qualToErrorProb(insQual); + dest[matchToDeletion] = QualityUtils.qualToErrorProb(delQual); + dest[indelToMatch] = QualityUtils.qualToProb(gcp); + dest[insertionToInsertion] = dest[deletionToDeletion] = QualityUtils.qualToErrorProb(gcp); + + if (dest[matchToInsertion] + dest[matchToDeletion] > 1) throw new IllegalStateException("insError and delError cannot be such that the add to more than 1, insQual: " + insQual + " delQual: " + delQual); + } + + /** + * Returns a transition probability array given the different quality scores affecting a read site. + * + * @param insQual the insertion quality score as a byte. + * @param delQual the deletion quality score as a byte. + * @param gcp the gap-continuation-penalty score as a byte. + * + * @throws NullPointerException if {@code dest} is {@code null}. + * @throws ArrayIndexOutOfBoundsException if {@code dest} is not large enough. + * @throws IllegalArgumentException if {@code insQual}, {@code delQual} or {@code gcp} is less than negative. + * + * @return never {@code null}. An array of length {@link #TRANS_PROB_ARRAY_LENGTH}. + */ + @SuppressWarnings("unused") + public static double[] qualToTransProbs(final byte insQual, final byte delQual, final byte gcp) { + final double[] dest = new double[TRANS_PROB_ARRAY_LENGTH]; + qualToTransProbs(dest,insQual,delQual,gcp); + return dest; + } + + /** + * Fills ax matrix with the transition probabilities for a number of bases. + * + *

+ * The first dimension of the matrix correspond to the different bases where the first one is stored in position 1. + * Thus the position 0 is left empty and the length of the resulting matrix is actually {@code insQual.length + 1}. + *

+ * Each entry is the transition probability array for that base with a length of {@link #TRANS_PROB_ARRAY_LENGTH}. + * + * @param dest the matrix to update + * @param insQuals insertion qualities. + * @param delQuals deletion qualities. + * @param gcps gap-continuation penalty qualities. + * + * @throws NullPointerException if any of the input arrays, matrices is {@code null} or any entry in {@code dest} is {@code null}. + * @throws IllegalArgumentException if {@code IllegalArgumentException} + * if the input array don't have the same length. + * @throws ArrayIndexOutOfBoundsException if {@code dest} or any of its elements is not large enough to contain the + * transition matrix. + */ + @SuppressWarnings("unused") + public static void qualToTransProbs(final double[][] dest, final byte[] insQuals, final byte[] delQuals, final byte[] gcps) { + final int readLength = insQuals.length; + if (delQuals.length != readLength) throw new IllegalArgumentException("deletion quality array length does not match insert quality array length: " + readLength + " != " + delQuals.length); + if (gcps.length != readLength) throw new IllegalArgumentException("deletion quality array length does not match insert quality array length: " + readLength + " != " + gcps.length); + + if (dest.length < readLength + 1) throw new IllegalArgumentException("destination length is not enough for the read length: " + dest.length + " < " + readLength + " + 1"); + + for (int i = 0; i < readLength; i++) + qualToTransProbs(dest[i + 1], insQuals[i], delQuals[i], gcps[i]); + } + + /** + * Returns a matrix with the transition probabilities for a number of bases. + * + *

+ * The first dimension of the matrix correspond to the different bases where the first one is stored in position 1. + * Thus the position 0 is left empty and the length of the resulting matrix is actually {@code insQual.length + 1}. + *

+ * Each entry is the transition probability array for that base with a length of {@link #TRANS_PROB_ARRAY_LENGTH}. + * + * @param insQuals insertion qualities. + * @param delQuals deletion qualities. + * @param gcps gap-continuation penalty qualities. + * + * @throws NullPointerException if any of the input arrays is {@code null}. + * @throws IllegalArgumentException if {@code IllegalArgumentException} + * if the input array don't have the same length. + * + * @return never {@code null}, an matrix of the dimensions explained above. + */ + @SuppressWarnings("unused") + public static double[][] qualToTransProbs(final byte[] insQuals, final byte[] delQuals, final byte[] gcps) { + final double[][] dest = createTransitionMatrix(insQuals.length); + qualToTransProbs(dest,insQuals,delQuals,gcps); + return dest; + } + + /** + * Fills a transition log10 probability array given the different quality scores affecting a read site. + * + * @param insQual the insertion quality score as a byte. + * @param delQual the deletion quality score as a byte. + * @param gcp the gap-continuation-penalty score as a byte. + * + * @throws NullPointerException if {@code dest} is {@code null}. + * @throws ArrayIndexOutOfBoundsException if {@code dest} is not large enough. + * @throws IllegalArgumentException if {@code insQual}, {@code delQual} or {@code gcp} is less than negative. + */ + public static void qualToTransProbsLog10(final double[] dest, final byte insQual, final byte delQual, final byte gcp) { + if (insQual < 0) throw new IllegalArgumentException("insert quality cannot less than 0: " + insQual); + if (delQual < 0) throw new IllegalArgumentException("deletion quality cannot be less than 0: " + delQual); + if (gcp < 0) throw new IllegalArgumentException("gcp cannot be less than 0: " + gcp); + dest[matchToMatch] = matchToMatchProbLog10(insQual, delQual); + dest[matchToInsertion] = QualityUtils.qualToErrorProbLog10(insQual); + dest[matchToDeletion] = QualityUtils.qualToErrorProbLog10(delQual); + dest[indelToMatch] = QualityUtils.qualToProbLog10(gcp); + dest[insertionToInsertion] = dest[deletionToDeletion] = QualityUtils.qualToErrorProbLog10(gcp); + } + + /** + * Returns a transition log10 probability array given the different quality scores affecting a read site. + * + * @param insQual the insertion quality score as a byte. + * @param delQual the deletion quality score as a byte. + * @param gcp the gap-continuation-penalty score as a byte. + * + * @throws NullPointerException if {@code dest} is {@code null}. + * @throws ArrayIndexOutOfBoundsException if {@code dest} is not large enough. + * @throws IllegalArgumentException if {@code insQual}, {@code delQual} or {@code gcp} is less than negative. + * + * @return never {@code null}. An array of length {@link #TRANS_PROB_ARRAY_LENGTH}. + */ + @SuppressWarnings("unused") + public static double[] qualToTransProbsLog10(final byte insQual, final byte delQual, final byte gcp) { + final double[] dest = new double[TRANS_PROB_ARRAY_LENGTH]; + qualToTransProbsLog10(dest,insQual,delQual,gcp); + return dest; + } + + /** + * Fills a matrix with the log10 transition probabilities for a number of bases. + * + *

+ * The first dimension of the matrix correspond to the different bases where the first one is stored in position 1. + * Thus the position 0 is left empty and the length of the resulting matrix is actually {@code insQual.length + 1}. + *

+ * Each entry is the transition probability array for that base with a length of {@link #TRANS_PROB_ARRAY_LENGTH}. + * + * @param insQuals insertion qualities. + * @param delQuals deletion qualities. + * @param gcps gap-continuation penalty qualities. + * + * @throws NullPointerException if any of the input arrays, matrices is {@code null} or any entry in {@code dest} is {@code null}. + * @throws IllegalArgumentException if {@code IllegalArgumentException} + * if the input array don't have the same length. + * @throws ArrayIndexOutOfBoundsException if {@code dest} or any of its elements is not large enough to contain the + * transition matrix. + */ + @SuppressWarnings("unused") + public static void qualToTransProbsLog10(final double[][] dest, final byte[] insQuals, final byte[] delQuals, final byte[] gcps) { + final int readLength = insQuals.length; + if (delQuals.length != readLength) throw new IllegalArgumentException("deletion quality array length does not match insert quality array length: " + readLength + " != " + delQuals.length); + if (gcps.length != readLength) throw new IllegalArgumentException("deletion quality array length does not match insert quality array length: " + readLength + " != " + gcps.length); + if (dest.length < readLength + 1) throw new IllegalArgumentException("destination length is not enough for the read length: " + dest.length + " < " + readLength + " + 1"); + + for (int i = 0; i < readLength; i++) + qualToTransProbsLog10(dest[i+1],insQuals[i],delQuals[i],gcps[i]); + } + + /** + * Returns a matrix with the log10 transition probabilities for a number of bases. + * + *

+ * The first dimension of the matrix correspond to the different bases where the first one is stored in position 1. + * Thus the position 0 is left empty and the length of the resulting matrix is actually {@code insQual.length + 1}. + *

+ * Each entry is the transition probability array for that base with a length of {@link #TRANS_PROB_ARRAY_LENGTH}. + * + * @param insQuals insertion qualities. + * @param delQuals deletion qualities. + * @param gcps gap-continuation penalty qualities. + * + * @throws NullPointerException if any of the input arrays is {@code null}. + * @throws IllegalArgumentException if {@code IllegalArgumentException} + * if the input array don't have the same length. + * + * @return never {@code null}, an matrix of the dimensions explained above. + */ + @SuppressWarnings("unused") + public static double[][] qualToTransProbsLog10(final byte[] insQuals, final byte[] delQuals, final byte[] gcps) { + final double[][] dest = createTransitionMatrix(insQuals.length); + qualToTransProbsLog10(dest,insQuals,delQuals,gcps); + return dest; + } + + /** + * Creates a transition probability matrix large enough to work with sequences of a particular length. + * + * @param maxReadLength the maximum read length for the transition matrix. + * + * @return never {@code null}. A matrix of {@code maxReadLength + 1} by {@link #TRANS_PROB_ARRAY_LENGTH} positions. + */ + public static double[][] createTransitionMatrix(final int maxReadLength) { + return new double[maxReadLength + 1][TRANS_PROB_ARRAY_LENGTH]; + } + + /** + * Returns the probability that neither of two event takes place. + *

+ * + * We assume that both event never occur together and that delQual is the conditional probability + * (qual. encoded) of the second event, given the first event didn't took place. So that the + * probability of no event is:
+ * + * We assume that both event never occur together so that the probability of no event is:
+ * + * 1 - ProbErr(insQual) - ProbErr(delQual)
+ * + * @param insQual PhRED scaled quality/probability of the first event. + * @param delQual PhRED scaled quality/probability of the second event. + * + * @return a value between 0 and 1. + */ + public static double matchToMatchProb(final byte insQual, final byte delQual) { + return matchToMatchProb((insQual & 0xFF), (delQual & 0xFF)); + } + + /** + * Returns the probability (log 10 scaled) that neither of two event, insertion and deletion, takes place. + *

+ * + * We assume that both event never occur together so that the probability of no event is:
+ * + * 1 - ProbErr(insQual) - ProbErr(delQual)
+ * + * @param insQual PhRED scaled quality/probability of an insertion. + * @param delQual PhRED scaled quality/probability of a deletion. + * + * @return a value between 0 and -Inf. + */ + public static double matchToMatchProbLog10(final byte insQual, final byte delQual) { + return matchToMatchProbLog10((insQual & 0xFF), (delQual & 0xFF)); + } + + /** + * Returns the probability that neither of two events, insertion and deletion, takes place. + *

+ * + * We assume that both event never occur together and that delQual is the conditional probability + * (qual. encoded) of the second event, given the first event didn't took place. So that the + * probability of no event is:
+ * + * We assume that both event never occur together so that the probability of no event is:
+ * + * 1 - ProbErr(insQual) - ProbErr(delQual)
+ * + * @param insQual PhRED scaled quality/probability of an insertion. + * @param delQual PhRED scaled quality/probability of a deletion. + * @return a value between 0 and 1. + */ + public static double matchToMatchProb(final int insQual, final int delQual) { + final int minQual; + final int maxQual; + if (insQual <= delQual) { + minQual = insQual; + maxQual = delQual; + } else { + minQual = delQual; + maxQual = insQual; + } + + if (minQual < 0) throw new IllegalArgumentException("quality cannot be negative: " + minQual + " and " + maxQual); + + return (QualityUtils.MAX_QUAL < maxQual) ? 1.0 - Math.pow(10, MathUtils.approximateLog10SumLog10(-0.1 * minQual, -0.1 * maxQual)) : + matchToMatchProb[((maxQual * (maxQual + 1)) >> 1) + minQual]; + } + + /** + * Returns the probability (log 10 scaled) that neither of two event takes place. + *

+ * + * We assume that both event never occur together and that delQual is the conditional probability (qual. encoded) + * of the second event, given the first event didn't took place. So that the probability of no event is:
+ * + * We assume that both event never occur together so that the probability of no event is:
+ * + * 1 - ProbErr(insQual) - ProbErr(delQual)
+ * + * @param insQual PhRED scaled quality/probability of an insertion. + * @param delQual PhRED scaled quality/probability of a deletion. + * + * @return a value between 0 and -Inf. + */ + public static double matchToMatchProbLog10(final int insQual, final int delQual) { + final int minQual; + final int maxQual; + if (insQual <= delQual) { + minQual = insQual; + maxQual = delQual; + } else { + minQual = delQual; + maxQual = insQual; + } + return (QualityUtils.MAX_QUAL < maxQual) ? Math.log1p ( + - Math.min(1,Math.pow(10, + MathUtils.approximateLog10SumLog10(-.1 * minQual, -.1 * maxQual)))) * INV_LN10 : + matchToMatchLog10[((maxQual * (maxQual + 1)) >> 1) + minQual]; + } +} diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index c1e11e2ce..9ceb5904d 100644 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -109,16 +109,14 @@ public abstract class BaseTest { public static final String hapmapDataLocation = comparisonDataLocation + "Validated/HapMap/3.3/"; public static final String b37hapmapGenotypes = hapmapDataLocation + "genotypes_r27_nr.b37_fwd.vcf"; - public static final String b37hapmapSites = hapmapDataLocation + "sites_r27_nr.b37_fwd.vcf"; - public static final String intervalsLocation = GATKDataLocation; + public static final String intervalsLocation = "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/"; public static final String hg19Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list"; - public static final String hg19Chr20Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.chr20.interval_list"; + public static final String hg19Chr20Intervals = GATKDataLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.chr20.interval_list"; public static final boolean REQUIRE_NETWORK_CONNECTION = false; private static final String networkTempDirRoot = "/broad/hptmp/"; private static final boolean networkTempDirRootExists = new File(networkTempDirRoot).exists(); - private static final String networkTempDir; private static final File networkTempDirFile; private static final String privateTestDirRelative = "private/testdata/"; @@ -157,9 +155,7 @@ public abstract class BaseTest { if (networkTempDirRootExists) { networkTempDirFile = IOUtils.tempDir("temp.", ".dir", new File(networkTempDirRoot + System.getProperty("user.name"))); networkTempDirFile.deleteOnExit(); - networkTempDir = networkTempDirFile.getAbsolutePath() + "/"; } else { - networkTempDir = null; networkTempDirFile = null; } @@ -190,26 +186,27 @@ public abstract class BaseTest { * 2: Create instances of your subclass. Return from it the call to getTests, providing * the class type of your test * - * @DataProvider(name = "summaries" + * + * {@literal @}DataProvider(name = "summaries") * public Object[][] createSummaries() { * new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2"); * new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1"); * return SummarizeDifferenceTest.getTests(SummarizeDifferenceTest.class); * } + * * * This class magically tracks created objects of this */ public static class TestDataProvider { - private static final Map> tests = new HashMap>(); + private static final Map> tests = new HashMap<>(); protected String name; /** * Create a new TestDataProvider instance bound to the class variable C - * @param c */ public TestDataProvider(Class c, String name) { if ( ! tests.containsKey(c) ) - tests.put(c, new ArrayList()); + tests.put(c, new ArrayList<>()); tests.get(c).add(this); this.name = name; } @@ -510,4 +507,21 @@ public abstract class BaseTest { } else return false; } + + /** + * Checks whether two double array contain the same values or not. + * @param actual actual produced array. + * @param expected expected array. + * @param tolerance maximum difference between double value to be consider equivalent. + */ + protected static void assertEqualsDoubleArray(final double[] actual, final double[] expected, final double tolerance) { + if (expected == null) + Assert.assertNull(actual); + else { + Assert.assertNotNull(actual); + Assert.assertEquals(actual.length,expected.length,"array length"); + } + for (int i = 0; i < actual.length; i++) + Assert.assertEquals(actual[i],expected[i],tolerance,"array position " + i); + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/QualityUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/QualityUtilsUnitTest.java index f5c7a14df..c8cbeeaf2 100644 --- a/public/java/test/org/broadinstitute/sting/utils/QualityUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/QualityUtilsUnitTest.java @@ -37,7 +37,8 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.*; +import java.util.ArrayList; +import java.util.List; /** * Basic unit test for QualityUtils class @@ -51,7 +52,7 @@ public class QualityUtilsUnitTest extends BaseTest { @DataProvider(name = "QualTest") public Object[][] makeMyDataProvider() { - List tests = new ArrayList(); + final List tests = new ArrayList<>(); for ( int qual = 0; qual < 255; qual++ ) { tests.add(new Object[]{(byte)(qual & 0xFF), Math.pow(10.0, ((double)qual)/-10.0)}); @@ -151,7 +152,7 @@ public class QualityUtilsUnitTest extends BaseTest { @DataProvider(name = "PhredScaleDoubleOps") public Object[][] makePhredDoubleTest() { - List tests = new ArrayList(); + final List tests = new ArrayList<>(); tests.add(new Object[]{0.0, -10 * Math.log10(Double.MIN_VALUE)}); tests.add(new Object[]{1.0, 0.0}); From 748d2fdf92f5f92ee58c25729b58862128e43417 Mon Sep 17 00:00:00 2001 From: Valentin Ruano-Rubio Date: Sun, 26 Jan 2014 23:27:03 -0500 Subject: [PATCH 2/2] Added Integration test to verify the bugs are not there anymore as reported in pivotracker --- .../PairHMMProbabilityBugIntegrationTest.java | 86 +++++++++++++++++++ .../sting/utils/pairhmm/PairHMMModel.java | 2 - 2 files changed, 86 insertions(+), 2 deletions(-) create mode 100644 protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMProbabilityBugIntegrationTest.java diff --git a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMProbabilityBugIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMProbabilityBugIntegrationTest.java new file mode 100644 index 000000000..69100bcdd --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMProbabilityBugIntegrationTest.java @@ -0,0 +1,86 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.sting.utils.pairhmm; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.Arrays; + +/** + * Test for the Prob > 1 bug in PairHMM using callers. + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class PairHMMProbabilityBugIntegrationTest extends WalkerTest { + + private static final File REFERENCE = new File("/humgen/gsa-hpprojects/GATK/bundle/current/hg19/ucsc.hg19.fasta").getAbsoluteFile(); + private static final File BAM = new File ("private/testdata", "pairhmm_prob_bug.bam").getAbsoluteFile(); + private static final File INTERVAL = new File ("private/testdata", "pairhmm_prob_bug.interval.bed").getAbsoluteFile(); + + private static final File UG_BAM = new File("private/testdata", "pairhmm_prob_bug.ug.bam").getAbsoluteFile(); + private static final File UG_INTERVAL = new File("private/testdata", "pairhmm_prob_bug.ug.intervals.bed").getAbsoluteFile(); + + + @Test + public void testHaplotypeCaller() { + final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s", + REFERENCE,BAM,INTERVAL); + final String name = getClass().getSimpleName() + ".testHaplotypeCaller"; + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("")); + executeTest(name, spec); + } + + @Test + public void testUnifiedGenotyper() { + final String commandLine = String.format("-T UnifiedGenotyper -R %s -I %s -L %s -dcov 200 -glm INDEL", + REFERENCE,UG_BAM,UG_INTERVAL); + final String name = getClass().getSimpleName() + ".testUnifiedGenotyper"; + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("")); + executeTest(name, spec); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMMModel.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMMModel.java index 0b79c044f..551be676a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMMModel.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMMModel.java @@ -141,8 +141,6 @@ public class PairHMMModel { dest[matchToDeletion] = QualityUtils.qualToErrorProb(delQual); dest[indelToMatch] = QualityUtils.qualToProb(gcp); dest[insertionToInsertion] = dest[deletionToDeletion] = QualityUtils.qualToErrorProb(gcp); - - if (dest[matchToInsertion] + dest[matchToDeletion] > 1) throw new IllegalStateException("insError and delError cannot be such that the add to more than 1, insQual: " + insQual + " delQual: " + delQual); } /**