diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 5eaaba0dd..dc5fed340 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -80,7 +80,7 @@ public class LikelihoodCalculationEngine { pairHMM = new Log10PairHMM(false); break; case LOGLESS_CACHING: - pairHMM = new LoglessCachingPairHMM(); + pairHMM = new LoglessPairHMM(); break; default: throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are ORIGINAL, EXACT, CACHING, and LOGLESS_CACHING."); diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessCachingPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java similarity index 74% rename from protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessCachingPairHMM.java rename to protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java index 24d6e1220..93a7f63d0 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessCachingPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java @@ -55,25 +55,14 @@ import org.broadinstitute.sting.utils.QualityUtils; * User: rpoplin, carneiro * Date: 10/16/12 */ -public class LoglessCachingPairHMM extends PairHMM { +public class LoglessPairHMM extends PairHMM { protected static final double SCALE_FACTOR_LOG10 = 300.0; + protected static final double INITIAL_CONDITION = Math.pow(10, SCALE_FACTOR_LOG10); - double[][] constantMatrix = null; // The cache - double[][] distanceMatrix = null; // The cache + double[][] transition = null; // The cache + double[][] prior = null; // The cache boolean constantsAreInitialized = false; - /** - * Cached data structure that describes the first row's edge condition in the HMM - */ - protected static final double [] firstRowConstantMatrix = { - QualityUtils.qualToProb((byte) (DEFAULT_GOP + DEFAULT_GOP)), - QualityUtils.qualToProb(DEFAULT_GCP), - QualityUtils.qualToErrorProb(DEFAULT_GOP), - QualityUtils.qualToErrorProb(DEFAULT_GCP), - 1.0, - 1.0 - }; - /** * {@inheritDoc} */ @@ -81,8 +70,8 @@ public class LoglessCachingPairHMM extends PairHMM { public void initialize( final int haplotypeMaxLength, final int readMaxLength) { super.initialize(haplotypeMaxLength, readMaxLength); - constantMatrix = new double[X_METRIC_MAX_LENGTH][6]; - distanceMatrix = new double[X_METRIC_MAX_LENGTH][Y_METRIC_MAX_LENGTH]; + transition = new double[paddedMaxReadLength][6]; + prior = new double[paddedMaxReadLength][paddedMaxHaplotypeLength]; } /** @@ -98,8 +87,8 @@ public class LoglessCachingPairHMM extends PairHMM { final int hapStartIndex, final boolean recacheReadValues ) { if ( ! constantsAreInitialized || recacheReadValues ) - initializeConstants( haplotypeBases.length, readBases.length, insertionGOP, deletionGOP, overallGCP ); - initializeDistanceMatrix( haplotypeBases, readBases, readQuals, hapStartIndex ); + initializeProbabilities(haplotypeBases.length, insertionGOP, deletionGOP, overallGCP); + initializePriors(haplotypeBases, readBases, readQuals, hapStartIndex); // NOTE NOTE NOTE -- because of caching we need to only operate over X and Y according to this // read and haplotype lengths, not the max lengths @@ -109,14 +98,19 @@ public class LoglessCachingPairHMM extends PairHMM { for (int i = 2; i < readXMetricLength; i++) { // +1 here is because hapStartIndex is 0-based, but our matrices are 1 based for (int j = hapStartIndex+1; j < hapYMetricLength; j++) { - updateCell(i, j, distanceMatrix[i][j], constantMatrix[i], matchMetricArray, XMetricArray, YMetricArray); + updateCell(i, j, prior[i][j], transition[i]); } } - // final probability is the log10 sum of the last element in all three state arrays + // final probability is the log10 sum of the last element in the Match and Insertion state arrays + // this way we ignore all paths that ended in deletions! (huge) + // but we have to sum all the paths ending in the M and I matrices, because they're no longer extended. final int endI = readXMetricLength - 1; - final int endJ = hapYMetricLength - 1; - return Math.log10( matchMetricArray[endI][endJ] + XMetricArray[endI][endJ] + YMetricArray[endI][endJ] ) - SCALE_FACTOR_LOG10; + double finalSumProbabilities = 0.0; + for (int j = 0; j < hapYMetricLength; j++) { + finalSumProbabilities += matchMatrix[endI][j] + insertionMatrix[endI][j]; + } + return Math.log10(finalSumProbabilities) - SCALE_FACTOR_LOG10; } /** @@ -128,10 +122,7 @@ public class LoglessCachingPairHMM extends PairHMM { * @param readQuals the base quality scores of the read * @param startIndex where to start updating the distanceMatrix (in case this read is similar to the previous read) */ - public void initializeDistanceMatrix( final byte[] haplotypeBases, - final byte[] readBases, - final byte[] readQuals, - final int startIndex ) { + public void initializePriors(final byte[] haplotypeBases, final byte[] readBases, final byte[] readQuals, final int startIndex) { // initialize the pBaseReadLog10 matrix for all combinations of read x haplotype bases // Abusing the fact that java initializes arrays with 0.0, so no need to fill in rows and columns below 2. @@ -141,7 +132,7 @@ public class LoglessCachingPairHMM extends PairHMM { final byte qual = readQuals[i]; for (int j = startIndex; j < haplotypeBases.length; j++) { final byte y = haplotypeBases[j]; - distanceMatrix[i+2][j+2] = ( x == y || x == (byte) 'N' || y == (byte) 'N' ? + prior[i+2][j+2] = ( x == y || x == (byte) 'N' || y == (byte) 'N' ? QualityUtils.qualToProb(qual) : QualityUtils.qualToErrorProb(qual) ); } } @@ -150,46 +141,36 @@ public class LoglessCachingPairHMM extends PairHMM { /** * Initializes the matrix that holds all the constants related to quality scores. * - * @param haplotypeSize the number of bases in the haplotype we are testing - * @param readSize the number of bases in the read we are testing * @param insertionGOP insertion quality scores of the read * @param deletionGOP deletion quality scores of the read * @param overallGCP overall gap continuation penalty */ @Requires({ - "haplotypeSize > 0", - "readSize > 0", - "insertionGOP != null && insertionGOP.length == readSize", - "deletionGOP != null && deletionGOP.length == readSize", - "overallGCP != null && overallGCP.length == readSize" + "insertionGOP != null", + "deletionGOP != null", + "overallGCP != null" }) @Ensures("constantsAreInitialized") - private void initializeConstants( final int haplotypeSize, - final int readSize, - final byte[] insertionGOP, - final byte[] deletionGOP, - final byte[] overallGCP ) { + private void initializeProbabilities(final int haplotypeLength, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP) { // the initial condition -- must be here because it needs that actual read and haplotypes, not the maximum in init - matchMetricArray[1][1] = Math.pow(10.0, SCALE_FACTOR_LOG10) / getNPotentialXStarts(haplotypeSize, readSize); + final double initialValue = INITIAL_CONDITION / haplotypeLength; + matchMatrix[1][1] = initialValue; // fill in the first row - for( int jjj = 2; jjj < Y_METRIC_MAX_LENGTH; jjj++ ) { - updateCell(1, jjj, 1.0, firstRowConstantMatrix, matchMetricArray, XMetricArray, YMetricArray); + for( int jjj = 2; jjj < paddedMaxHaplotypeLength; jjj++ ) { + deletionMatrix[1][jjj] = initialValue; } final int l = insertionGOP.length; - constantMatrix[1] = firstRowConstantMatrix; for (int i = 0; i < l; i++) { final int qualIndexGOP = Math.min(insertionGOP[i] + deletionGOP[i], Byte.MAX_VALUE); - constantMatrix[i+2][0] = QualityUtils.qualToProb((byte) qualIndexGOP); - constantMatrix[i+2][1] = QualityUtils.qualToProb(overallGCP[i]); - constantMatrix[i+2][2] = QualityUtils.qualToErrorProb(insertionGOP[i]); - constantMatrix[i+2][3] = QualityUtils.qualToErrorProb(overallGCP[i]); - constantMatrix[i+2][4] = QualityUtils.qualToErrorProb(deletionGOP[i]); - constantMatrix[i+2][5] = QualityUtils.qualToErrorProb(overallGCP[i]); + transition[i+2][0] = QualityUtils.qualToProb((byte) qualIndexGOP); + transition[i+2][1] = QualityUtils.qualToProb(overallGCP[i]); + transition[i+2][2] = QualityUtils.qualToErrorProb(insertionGOP[i]); + transition[i+2][3] = QualityUtils.qualToErrorProb(overallGCP[i]); + transition[i+2][4] = QualityUtils.qualToErrorProb(deletionGOP[i]); + transition[i+2][5] = QualityUtils.qualToErrorProb(overallGCP[i]); } - constantMatrix[l+1][4] = 1.0; - constantMatrix[l+1][5] = 1.0; // note that we initialized the constants constantsAreInitialized = true; @@ -204,18 +185,14 @@ public class LoglessCachingPairHMM extends PairHMM { * @param indI row index in the matrices to update * @param indJ column index in the matrices to update * @param prior the likelihood editing distance matrix for the read x haplotype - * @param constants an array with the six constants relevant to this location - * @param matchMetricArray the matches likelihood matrix - * @param XMetricArray the insertions likelihood matrix - * @param YMetricArray the deletions likelihood matrix + * @param transitition an array with the six transitition relevant to this location */ - private void updateCell( final int indI, final int indJ, final double prior, final double[] constants, - final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) { + private void updateCell( final int indI, final int indJ, final double prior, final double[] transitition) { - matchMetricArray[indI][indJ] = prior * ( matchMetricArray[indI - 1][indJ - 1] * constants[0] + - XMetricArray[indI - 1][indJ - 1] * constants[1] + - YMetricArray[indI - 1][indJ - 1] * constants[1] ); - XMetricArray[indI][indJ] = matchMetricArray[indI - 1][indJ] * constants[2] + XMetricArray[indI - 1][indJ] * constants[3]; - YMetricArray[indI][indJ] = matchMetricArray[indI][indJ - 1] * constants[4] + YMetricArray[indI][indJ - 1] * constants[5]; + matchMatrix[indI][indJ] = prior * ( matchMatrix[indI - 1][indJ - 1] * transitition[0] + + insertionMatrix[indI - 1][indJ - 1] * transitition[1] + + deletionMatrix[indI - 1][indJ - 1] * transitition[1] ); + insertionMatrix[indI][indJ] = matchMatrix[indI - 1][indJ] * transitition[2] + insertionMatrix[indI - 1][indJ] * transitition[3]; + deletionMatrix[indI][indJ] = matchMatrix[indI][indJ - 1] * transitition[4] + deletionMatrix[indI][indJ - 1] * transitition[5]; } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index ef9f483ff..5cdc2c65f 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.WalkerTest; import org.testng.annotations.Test; -import static org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperGeneralPloidyTestExecutor.*; +import static org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperGeneralPloidyTestExecutor.LSV_ALLELES; /** * Created by IntelliJ IDEA. @@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "5812da66811887d834d0379a33e655c0"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "faadc0b77a91a716dbb1191fd579d025"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index dc9220b7e..4299b024b 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -49,7 +49,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.WalkerTest; import org.testng.annotations.Test; -import static org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperGeneralPloidyTestExecutor.*; +import static org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperGeneralPloidyTestExecutor.CEUTRIO_BAM; +import static org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperGeneralPloidyTestExecutor.NA12891_CALLS; public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTest { @@ -57,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","3a321896c4b8b6457973c76c486da4d4"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","fe715b715526a7c1ebd575ff66bba716"); } @Test(enabled = true) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index 8d0c1f04f..d0d77c8e0 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -72,7 +72,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("1cb469b9cc8e6c70430021540bf1af8b")); + Arrays.asList("51e022d07ead45a4e154f949b6642e84")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -87,7 +87,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("c7e59f9ab718df4c604626a0f51af606")); + Arrays.asList("1d9c6fda344eeee76cbe4221251dc341")); executeTest(String.format("test indel caller in SLX with low min allele count"), spec); } @@ -100,7 +100,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("b6ad80cef63cab4f75fa4b1fb2517d1d")); + Arrays.asList("2ec7262f0a3d04534ce1fe15cc79f52e")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -110,7 +110,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("86880ec78755ae91cb5bb34a0631a32c")); + Arrays.asList("3131cd7c49b623983a106db5228754b3")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -120,7 +120,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("2584d5e3ade1b548f1fe9cdcafbe1b28")); + Arrays.asList("273f5daa936e93da98efd6ceb37d7533")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -135,7 +135,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, - Arrays.asList("939da0bb73b706badd8a0def7446b384")); + Arrays.asList("00a003a0908281384e981294434a9f3e")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -175,7 +175,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("556c214366e82e4682e753ce93307a4e")); + Arrays.asList("87521a1bde124c7c5908ed067060fe45")); executeTest("test minIndelFraction 0.0", spec); } @@ -183,7 +183,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction25() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.25", 1, - Arrays.asList("1df02b805d9dfbd532fa3632875a989d")); + Arrays.asList("8a880b8b1662e31e0b5c65733eac6b74")); executeTest("test minIndelFraction 0.25", spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index 2512dd5c2..c10b3d6df 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -64,7 +64,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("2f15ef1ead56d875a3f1d53772f52b3a")); + Arrays.asList("3f8ee598c9b85aa1d2b85746ad46c1af")); executeTest("test MultiSample Pilot1", spec); } @@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("39ec0b48cd51d797af7ed09cb9ba607e")); + Arrays.asList("31c0f0074b3306b54170056e93b69e11")); executeTest("test Multiple SNP alleles", spec); } @@ -112,7 +112,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("eb9604b77a7d6baab60c81ac3db5e47b")); + Arrays.asList("753d6358b1634107de76900200116805")); executeTest("test reverse trim", spec); } @@ -120,7 +120,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMismatchedPLs() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, - Arrays.asList("6b77b8f1002ec577bf0482fbe03222a4")); + Arrays.asList("274eadae8a630a3fda9281d6d6253dea")); executeTest("test mismatched PLs", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java index 0620f15df..b63c591ce 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java @@ -74,7 +74,7 @@ public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest { @Test public void testReducedBamINDELs() { - testReducedCalling("INDEL", "9a702e7a85465f6c42d6c1828aee6c38"); + testReducedCalling("INDEL", "c5939a7f5f85ea2fe994ce912732e180"); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 3aaffdeaa..9400b3dd2 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -49,10 +49,11 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import org.broadinstitute.sting.WalkerTest; import org.testng.annotations.Test; -import static org.broadinstitute.sting.gatk.walkers.haplotypecaller.HaplotypeCallerIntegrationTest.*; - import java.util.Arrays; +import static org.broadinstitute.sting.gatk.walkers.haplotypecaller.HaplotypeCallerIntegrationTest.NA12878_CHR20_BAM; +import static org.broadinstitute.sting.gatk.walkers.haplotypecaller.HaplotypeCallerIntegrationTest.REF; + public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends WalkerTest { private void HCTestComplexVariants(String bam, String args, String md5) { @@ -63,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "f9fa4d3c88fd9c0f23c7a3ddd3d24a8c"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "a898b551f78c71befee4d12070d3a788"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -87,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "e8ffbfae3c1af5be02631a31f386a431"); + "8a110549543412fa682419e9a8f0dd1d"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "c3a98b19efa7cb36fe5f5f2ab893ef56"); + "5429c234d471434adc09d9e60b87de24"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index c5614d405..c416938cd 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -69,12 +69,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "45856ad67bfe8d8bea45808d8258bcf1"); + HCTest(CEUTRIO_BAM, "", "008958c211a8a439a7213a96f3dd7f6c"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "b6c93325f851ac358ea49260fb11b75c"); + HCTest(NA12878_BAM, "", "3b60c6133eeadfea028dffea93b88478"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -85,7 +85,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "4ca6b560d0569cdca400d3e50915e211"); + "70bd5d0805bf6f51e5f61b377526c979"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "5d06ec5502d3f157964bd7b275d6a0cb"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "4141b4c24a136a3fe4c0b0a4c231cdfa"); } @Test @@ -111,14 +111,14 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("53a50dae68f0175ca3088dea1d3bb881")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("35a8edeca7518835d67a10de21493eca")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("d3bc6adde8cd9514ae5c49cd366d5de4")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c81d7e69dd4116890f06a71b19870300")); executeTest("HCTestStructuralIndels: ", spec); } @@ -140,7 +140,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("4adb833ed8af20224b76bba61e2b0d93")); + Arrays.asList("f0a215faed194dc160f19e26293e85f8")); executeTest("HC calling on a ReducedRead BAM", spec); } @@ -148,7 +148,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("1704b0901c86f8f597d931222d5c8dd8")); + Arrays.asList("bea274584344fa6b4b0f98eee327bad8")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java index 555c02cde..96eaa109e 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java @@ -67,7 +67,7 @@ public class NanoSchedulerIntegrationTest extends WalkerTest { for ( final int nct : Arrays.asList(1, 2) ) { // tests.add(new Object[]{ "SNP", "a1c7546f32a8919a3f3a70a04b2e8322", nt, nct }); //// tests.add(new Object[]{ "INDEL", "0a6d2be79f4f8a4b0eb788cc4751b31b", nt, nct }); - tests.add(new Object[]{ "BOTH", "85fc5d6dfeb60ed89763470f4b4c981e", nt, nct }); + tests.add(new Object[]{ "BOTH", "9a1202d849653f0480932f450ec507b4", nt, nct }); } return tests.toArray(new Object[][]{}); diff --git a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java index c94674c98..6dbcd0220 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java @@ -70,7 +70,7 @@ public class PairHMMUnitTest extends BaseTest { final static boolean EXTENSIVE_TESTING = true; final PairHMM exactHMM = new Log10PairHMM(true); // the log truth implementation final PairHMM originalHMM = new Log10PairHMM(false); // the reference implementation - final PairHMM loglessHMM = new LoglessCachingPairHMM(); + final PairHMM loglessHMM = new LoglessPairHMM(); private List getHMMs() { return Arrays.asList(exactHMM, originalHMM, loglessHMM); @@ -116,13 +116,12 @@ public class PairHMMUnitTest extends BaseTest { return String.format("ref=%s read=%s b/i/d/c quals = %d/%d/%d/%d l/r flank = %b/%b e[qual]=%d", ref, read, baseQual, insQual, delQual, gcp, left, right, expectedQual); } - public double expectedLogL(final PairHMM hmm) { - return (expectedQual / -10.0) + 0.03 + - hmm.getNPotentialXStartsLikelihoodPenaltyLog10(refBasesWithContext.length, readBasesWithContext.length); + public double expectedLogL() { + return (expectedQual / -10.0) + 0.03 + Math.log10(1.0/refBasesWithContext.length); } public double getTolerance(final PairHMM hmm) { - if ( hmm instanceof LoglessCachingPairHMM ) + if ( hmm instanceof LoglessPairHMM) return toleranceFromExact(); if ( hmm instanceof Log10PairHMM ) { return ((Log10PairHMM)hmm).isDoingExactLog10Calculations() ? toleranceFromExact() : toleranceFromReference(); @@ -150,7 +149,7 @@ public class PairHMMUnitTest extends BaseTest { qualAsBytes(gcp, false, anchorIndel), 0, true); } - private final byte[] asBytes(final String bases, final boolean left, final boolean right) { + private byte[] asBytes(final String bases, final boolean left, final boolean right) { return ( (left ? LEFT_FLANK : "") + CONTEXT + bases + CONTEXT + (right ? RIGHT_FLANK : "")).getBytes(); } @@ -163,7 +162,7 @@ public class PairHMMUnitTest extends BaseTest { // update just the bases corresponding to the provided micro read with the quality scores if( doGOP ) { - phredQuals[0 + CONTEXT.length()] = (byte)phredQual; + phredQuals[CONTEXT.length()] = (byte)phredQual; } else { for ( int i = 0; i < read.length(); i++) phredQuals[i + CONTEXT.length()] = (byte)phredQual; @@ -270,7 +269,7 @@ public class PairHMMUnitTest extends BaseTest { final double exactLogL = cfg.calcLogL( exactHMM, true ); for ( final PairHMM hmm : getHMMs() ) { double actualLogL = cfg.calcLogL( hmm, true ); - double expectedLogL = cfg.expectedLogL(hmm); + double expectedLogL = cfg.expectedLogL(); // compare to our theoretical expectation with appropriate tolerance Assert.assertEquals(actualLogL, expectedLogL, cfg.toleranceFromTheoretical(), "Failed with hmm " + hmm); @@ -322,8 +321,8 @@ public class PairHMMUnitTest extends BaseTest { System.out.format("H:%s\nR: %s\n Pos:%d Result:%4.2f\n",new String(haplotype1), new String(mread), k,res1); - // - log10 is because of number of start positions - Assert.assertEquals(res1, -2.0 - Math.log10(originalHMM.getNPotentialXStarts(haplotype1.length, mread.length)), 1e-2); + final double expected = Math.log10(1.0/haplotype1.length * Math.pow(QualityUtils.qualToProb(90), mread.length-1) * QualityUtils.qualToErrorProb(20)); + Assert.assertEquals(res1, expected, 1e-2); } } @@ -354,8 +353,8 @@ public class PairHMMUnitTest extends BaseTest { System.out.format("H:%s\nR: %s\n Pos:%d Result:%4.2f\n",new String(haplotype1), new String(mread), k,res1); - // - log10 is because of number of start positions - Assert.assertEquals(res1, -2.0 - Math.log10(originalHMM.getNPotentialXStarts(haplotype1.length, mread.length)), 1e-2); + final double expected = Math.log10(1.0/haplotype1.length * Math.pow(QualityUtils.qualToProb(90), mread.length-1) * QualityUtils.qualToErrorProb(20)); + Assert.assertEquals(res1, expected, 1e-2); } } @@ -406,8 +405,14 @@ public class PairHMMUnitTest extends BaseTest { Utils.dupBytes(insQual, readBases.length), Utils.dupBytes(delQual, readBases.length), Utils.dupBytes(gcp, readBases.length), 0, true); - final double expected = Math.log10(Math.pow(1.0 - QualityUtils.qualToErrorProb(baseQual), readBases.length)); - Assert.assertEquals(d, expected, 1e-3, "Likelihoods should sum to just the error prob of the read"); + double expected = 0; + final double initialCondition = ((double) Math.abs(refBases.length-readBases.length+1))/refBases.length; + if (readBases.length < refBases.length) { + expected = Math.log10(initialCondition * Math.pow(QualityUtils.qualToProb(baseQual), readBases.length)); + } else if (readBases.length > refBases.length) { + expected = Math.log10(initialCondition * Math.pow(QualityUtils.qualToProb(baseQual), refBases.length) * Math.pow(QualityUtils.qualToErrorProb(insQual), readBases.length - refBases.length)); + } + Assert.assertEquals(d, expected, 1e-3, "Likelihoods should sum to just the error prob of the read " + String.format("readSize=%d refSize=%d", readSize, refSize)); } @DataProvider(name = "HMMProviderWithBigReads") @@ -472,7 +477,7 @@ public class PairHMMUnitTest extends BaseTest { Utils.dupBytes(insQual, readBases.length), Utils.dupBytes(delQual, readBases.length), Utils.dupBytes(gcp, readBases.length), 0, true); - loglessHMM.dumpMatrices(); +// loglessHMM.dumpMatrices(); } @DataProvider(name = "JustHMMProvider") @@ -610,7 +615,7 @@ public class PairHMMUnitTest extends BaseTest { public Object[][] makeUninitializedHMMs() { List tests = new ArrayList(); - tests.add(new Object[]{new LoglessCachingPairHMM()}); + tests.add(new Object[]{new LoglessPairHMM()}); tests.add(new Object[]{new Log10PairHMM(true)}); return tests.toArray(new Object[][]{}); diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java index 62793bc54..d7c55e37c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java @@ -67,10 +67,10 @@ public class Log10PairHMM extends PairHMM { public void initialize( final int haplotypeMaxLength, final int readMaxLength) { super.initialize(haplotypeMaxLength, readMaxLength); - for( int iii=0; iii < X_METRIC_MAX_LENGTH; iii++ ) { - Arrays.fill(matchMetricArray[iii], Double.NEGATIVE_INFINITY); - Arrays.fill(XMetricArray[iii], Double.NEGATIVE_INFINITY); - Arrays.fill(YMetricArray[iii], Double.NEGATIVE_INFINITY); + for( int iii=0; iii < paddedMaxReadLength; iii++ ) { + Arrays.fill(matchMatrix[iii], Double.NEGATIVE_INFINITY); + Arrays.fill(insertionMatrix[iii], Double.NEGATIVE_INFINITY); + Arrays.fill(deletionMatrix[iii], Double.NEGATIVE_INFINITY); } } @@ -88,7 +88,8 @@ public class Log10PairHMM extends PairHMM { final boolean recacheReadValues ) { // the initial condition -- must be in subComputeReadLikelihoodGivenHaplotypeLog10 because it needs that actual // read and haplotypes, not the maximum - matchMetricArray[1][1] = getNPotentialXStartsLikelihoodPenaltyLog10(haplotypeBases.length, readBases.length); + final double initialValue = Math.log10((double) 1/haplotypeBases.length); + matchMatrix[1][1] = initialValue; // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment final int X_METRIC_LENGTH = readBases.length + 2; @@ -104,14 +105,17 @@ public class Log10PairHMM extends PairHMM { for( int jjj = hapStartIndex + 1; jjj < Y_METRIC_LENGTH; jjj++ ) { if( (iii == 1 && jjj == 1) ) { continue; } updateCell(iii, jjj, haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP, - matchMetricArray, XMetricArray, YMetricArray); + matchMatrix, insertionMatrix, deletionMatrix); } } // final probability is the log10 sum of the last element in all three state arrays final int endI = X_METRIC_LENGTH - 1; - final int endJ = Y_METRIC_LENGTH - 1; - return myLog10SumLog10(new double[]{matchMetricArray[endI][endJ], XMetricArray[endI][endJ], YMetricArray[endI][endJ]}); + double result = myLog10SumLog10(new double[]{matchMatrix[endI][1], insertionMatrix[endI][1]}); + for (int j = 2; j < Y_METRIC_LENGTH; j++) + result = myLog10SumLog10(new double[]{result, matchMatrix[endI][j], insertionMatrix[endI][j]}); + + return result; } /** @@ -134,7 +138,7 @@ public class Log10PairHMM extends PairHMM { private void updateCell( final int indI, final int indJ, final byte[] haplotypeBases, final byte[] readBases, final byte[] readQuals, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP, - final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) { + final double[][] matchMatrix, final double[][] insertionMatrix, final double[][] deletionMatrix ) { // the read and haplotype indices are offset by one because the state arrays have an extra column to hold the initial conditions final int im1 = indI - 1; @@ -151,18 +155,18 @@ public class Log10PairHMM extends PairHMM { final int qualIndexGOP = ( im1 == 0 ? DEFAULT_GOP + DEFAULT_GOP : ( insertionGOP[im1-1] + deletionGOP[im1-1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : insertionGOP[im1-1] + deletionGOP[im1-1]) ); final double d0 = QualityUtils.qualToProbLog10((byte)qualIndexGOP); final double e0 = ( im1 == 0 ? QualityUtils.qualToProbLog10(DEFAULT_GCP) : QualityUtils.qualToProbLog10(overallGCP[im1-1]) ); - matchMetricArray[indI][indJ] = pBaseReadLog10 + myLog10SumLog10(new double[]{matchMetricArray[indI - 1][indJ - 1] + d0, XMetricArray[indI - 1][indJ - 1] + e0, YMetricArray[indI - 1][indJ - 1] + e0}); + matchMatrix[indI][indJ] = pBaseReadLog10 + myLog10SumLog10(new double[]{matchMatrix[indI - 1][indJ - 1] + d0, insertionMatrix[indI - 1][indJ - 1] + e0, deletionMatrix[indI - 1][indJ - 1] + e0}); // update the X (insertion) array final double d1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GOP) : QualityUtils.qualToErrorProbLog10(insertionGOP[im1-1]) ); final double e1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GCP) : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) ); final double qBaseReadLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0 - XMetricArray[indI][indJ] = qBaseReadLog10 + myLog10SumLog10(new double[]{matchMetricArray[indI - 1][indJ] + d1, XMetricArray[indI - 1][indJ] + e1}); + insertionMatrix[indI][indJ] = qBaseReadLog10 + myLog10SumLog10(new double[]{matchMatrix[indI - 1][indJ] + d1, insertionMatrix[indI - 1][indJ] + e1}); // update the Y (deletion) array, with penalty of zero on the left and right flanks to allow for a local alignment within the haplotype - final double d2 = ( im1 == 0 || im1 == readBases.length ? 0.0 : QualityUtils.qualToErrorProbLog10(deletionGOP[im1-1]) ); - final double e2 = ( im1 == 0 || im1 == readBases.length ? 0.0 : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) ); + final double d2 = ( im1 == 0 ) ? 0.0 : QualityUtils.qualToErrorProbLog10(deletionGOP[im1-1]); + final double e2 = ( im1 == 0 ) ? 0.0 : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]); final double qBaseRefLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0 - YMetricArray[indI][indJ] = qBaseRefLog10 + myLog10SumLog10(new double[]{matchMetricArray[indI][indJ - 1] + d2, YMetricArray[indI][indJ - 1] + e2}); + deletionMatrix[indI][indJ] = qBaseRefLog10 + myLog10SumLog10(new double[]{matchMatrix[indI][indJ - 1] + d2, deletionMatrix[indI][indJ - 1] + e2}); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java index 0d2eb0c1c..bd3360370 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java @@ -25,10 +25,12 @@ package org.broadinstitute.sting.utils.pairhmm; -import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.Arrays; /** * Util class for performing the pair HMM for local alignment. Figure 4.3 in Durbin 1998 book. @@ -52,11 +54,11 @@ public abstract class PairHMM { LOGLESS_CACHING } - protected double[][] matchMetricArray = null; - protected double[][] XMetricArray = null; - protected double[][] YMetricArray = null; + protected double[][] matchMatrix = null; + protected double[][] insertionMatrix = null; + protected double[][] deletionMatrix = null; protected int maxHaplotypeLength, maxReadLength; - protected int X_METRIC_MAX_LENGTH, Y_METRIC_MAX_LENGTH; + protected int paddedMaxReadLength, paddedMaxHaplotypeLength; private boolean initialized = false; /** @@ -72,12 +74,12 @@ public abstract class PairHMM { maxReadLength = readMaxLength; // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment - X_METRIC_MAX_LENGTH = readMaxLength + 2; - Y_METRIC_MAX_LENGTH = haplotypeMaxLength + 2; + paddedMaxReadLength = readMaxLength + 2; + paddedMaxHaplotypeLength = haplotypeMaxLength + 2; - matchMetricArray = new double[X_METRIC_MAX_LENGTH][Y_METRIC_MAX_LENGTH]; - XMetricArray = new double[X_METRIC_MAX_LENGTH][Y_METRIC_MAX_LENGTH]; - YMetricArray = new double[X_METRIC_MAX_LENGTH][Y_METRIC_MAX_LENGTH]; + matchMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength]; + insertionMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength]; + deletionMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength]; initialized = true; } @@ -124,21 +126,17 @@ public abstract class PairHMM { double result = subComputeReadLikelihoodGivenHaplotypeLog10(haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP, hapStartIndex, recacheReadValues); - // TODO -- remove max when PairHMM no longer returns likelihoods >= 0 - result = Math.min(result, 0.0); - if ( MathUtils.goodLog10Probability(result) ) return result; else - throw new IllegalStateException("Bad likelihoods detected: " + result); -// return result; + throw new ReviewedStingException("PairHMM Log Probability cannot be greater than 0: " + String.format("haplotype: %s, read: %s, result: %f", Arrays.toString(haplotypeBases), Arrays.toString(readBases), result)); } /** * To be overloaded by subclasses to actually do calculation for #computeReadLikelihoodGivenHaplotypeLog10 */ @Requires({"readBases.length == readQuals.length", "readBases.length == insertionGOP.length", "readBases.length == deletionGOP.length", - "readBases.length == overallGCP.length", "matchMetricArray!=null", "XMetricArray!=null", "YMetricArray!=null"}) + "readBases.length == overallGCP.length", "matchMatrix!=null", "insertionMatrix!=null", "deletionMatrix!=null"}) protected abstract double subComputeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotypeBases, final byte[] readBases, final byte[] readQuals, @@ -148,41 +146,13 @@ public abstract class PairHMM { final int hapStartIndex, final boolean recacheReadValues ); - /** - * How many potential starting locations are a read with readSize bases against a haplotype with haplotypeSize bases? - * - * for example, a 3 bp read against a 5 bp haplotype could potentially start at 1, 2, 3 = 5 - 3 + 1 = 3 - * the max value is necessary in the case where the read is longer than the haplotype, in which case - * there's a single unique start site by assumption - * - * @param haplotypeSize the number of bases in the haplotype we are testing - * @param readSize the number of bases in the read we are testing - * @return a positive integer >= 1 - */ - @Ensures("result >= 1") - protected int getNPotentialXStarts(final int haplotypeSize, final int readSize) { - return Math.max(haplotypeSize - readSize + 1, 1); - } - - /** - * The the log10 probability penalty for the number of potential start sites of the read aginst the haplotype - * - * @param haplotypeSize the number of bases in the haplotype we are testing - * @param readSize the number of bases in the read we are testing - * @return a log10 probability - */ - @Ensures("MathUtils.goodLog10Probability(result)") - protected double getNPotentialXStartsLikelihoodPenaltyLog10(final int haplotypeSize, final int readSize) { - return - Math.log10(getNPotentialXStarts(haplotypeSize, readSize)); - } - /** * Print out the core hmm matrices for debugging */ protected void dumpMatrices() { - dumpMatrix("matchMetricArray", matchMetricArray); - dumpMatrix("XMetricArray", XMetricArray); - dumpMatrix("YMetricArray", YMetricArray); + dumpMatrix("matchMetricArray", matchMatrix); + dumpMatrix("insertionMatrix", insertionMatrix); + dumpMatrix("deletionMatrix", deletionMatrix); } /**