From 7519484a386d2defc0a0c143e76dceac299ff4c3 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 25 Feb 2013 12:24:35 -0500 Subject: [PATCH 1/2] Refactored PairHMM.initialize to first take haplotype max length and then the read max length so that it is consistent with other PairHMM methods. --- .../LikelihoodCalculationEngine.java | 2 +- .../indels/PairHMMIndelErrorModel.java | 2 +- .../utils/pairhmm/LoglessCachingPairHMM.java | 4 ++-- .../sting/utils/pairhmm/PairHMMUnitTest.java | 24 +++++++++---------- .../sting/utils/pairhmm/Log10PairHMM.java | 4 ++-- .../sting/utils/pairhmm/PairHMM.java | 4 ++-- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index c3e7276a6..aeeb95c87 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -109,7 +109,7 @@ public class LikelihoodCalculationEngine { Y_METRIC_LENGTH += 2; // initialize arrays to hold the probabilities of being in the match, insertion and deletion cases - pairHMM.initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH); + pairHMM.initialize(Y_METRIC_LENGTH, X_METRIC_LENGTH); // for each sample's reads for( final Map.Entry> sampleEntry : perSampleReadList.entrySet() ) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index f5f4b9aeb..041089c62 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -385,7 +385,7 @@ public class PairHMMIndelErrorModel { if (previousHaplotypeSeen == null) { //no need to reallocate arrays for each new haplotype, as length won't change - pairHMM.initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH); + pairHMM.initialize(Y_METRIC_LENGTH, X_METRIC_LENGTH); } int startIndexInHaplotype = 0; diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessCachingPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessCachingPairHMM.java index 6f8bec94f..24d6e1220 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessCachingPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessCachingPairHMM.java @@ -78,8 +78,8 @@ public class LoglessCachingPairHMM extends PairHMM { * {@inheritDoc} */ @Override - public void initialize( final int readMaxLength, final int haplotypeMaxLength) { - super.initialize(readMaxLength, haplotypeMaxLength); + public void initialize( final int haplotypeMaxLength, final int readMaxLength) { + super.initialize(haplotypeMaxLength, readMaxLength); constantMatrix = new double[X_METRIC_MAX_LENGTH][6]; distanceMatrix = new double[X_METRIC_MAX_LENGTH][Y_METRIC_MAX_LENGTH]; diff --git a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java index 9de562aa5..64819c245 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java @@ -136,7 +136,7 @@ public class PairHMMUnitTest extends BaseTest { } public double calcLogL( final PairHMM pairHMM, boolean anchorIndel ) { - pairHMM.initialize(readBasesWithContext.length, refBasesWithContext.length); + pairHMM.initialize(refBasesWithContext.length, readBasesWithContext.length); return pairHMM.computeReadLikelihoodGivenHaplotypeLog10( refBasesWithContext, readBasesWithContext, qualAsBytes(baseQual, false, anchorIndel), qualAsBytes(insQual, true, anchorIndel), qualAsBytes(delQual, true, anchorIndel), @@ -262,7 +262,7 @@ public class PairHMMUnitTest extends BaseTest { double expectedLogL = cfg.expectedLogL(hmm); // compare to our theoretical expectation with appropriate tolerance - Assert.assertEquals(actualLogL, expectedLogL, cfg.toleranceFromTheoretical(), "Failed with hmm " + hmm); + Assert.assertEquals(actualLogL, expectedLogL, cfg.toleranceFromTheoretical(), "Failed with hmm " + hmm + (hmm instanceof Log10PairHMM ? " (" + ((Log10PairHMM)hmm).isDoingExactLog10Calculations() + ")" : "")); // compare to the exact reference implementation with appropriate tolerance Assert.assertEquals(actualLogL, exactLogL, cfg.getTolerance(hmm), "Failed with hmm " + hmm); Assert.assertTrue(MathUtils.goodLog10Probability(actualLogL), "Bad log10 likelihood " + actualLogL); @@ -303,7 +303,7 @@ public class PairHMMUnitTest extends BaseTest { byte[] mread = Arrays.copyOfRange(haplotype1,offset,haplotype1.length-offset); // change single base at position k to C. If it's a C, change to T mread[k] = ( mread[k] == (byte)'C' ? (byte)'T' : (byte)'C'); - originalHMM.initialize(mread.length, haplotype1.length); + originalHMM.initialize(haplotype1.length, mread.length); double res1 = originalHMM.computeReadLikelihoodGivenHaplotypeLog10( haplotype1, mread, quals, gop, gop, @@ -335,7 +335,7 @@ public class PairHMMUnitTest extends BaseTest { byte[] mread = Arrays.copyOfRange(haplotype1,offset,haplotype1.length); // change single base at position k to C. If it's a C, change to T mread[k] = ( mread[k] == (byte)'C' ? (byte)'T' : (byte)'C'); - originalHMM.initialize(mread.length, haplotype1.length); + originalHMM.initialize(haplotype1.length, mread.length); double res1 = originalHMM.computeReadLikelihoodGivenHaplotypeLog10( haplotype1, mread, quals, gop, gop, @@ -372,7 +372,7 @@ public class PairHMMUnitTest extends BaseTest { byte insQual = 37; byte delQual = 37; byte gcp = 10; - hmm.initialize(readBases.length, refBases.length); + hmm.initialize(refBases.length, readBases.length); double d = hmm.computeReadLikelihoodGivenHaplotypeLog10( refBases, readBases, Utils.dupBytes(baseQual, readBases.length), Utils.dupBytes(insQual, readBases.length), @@ -389,7 +389,7 @@ public class PairHMMUnitTest extends BaseTest { byte insQual = 100; byte delQual = 100; byte gcp = 100; - hmm.initialize(readBases.length, refBases.length); + hmm.initialize(refBases.length, readBases.length); double d = hmm.computeReadLikelihoodGivenHaplotypeLog10( refBases, readBases, Utils.dupBytes(baseQual, readBases.length), Utils.dupBytes(insQual, readBases.length), @@ -429,7 +429,7 @@ public class PairHMMUnitTest extends BaseTest { byte insQual = 40; byte delQual = 40; byte gcp = 10; - hmm.initialize(readBases.length, refBases.length); + hmm.initialize(refBases.length, readBases.length); double d = hmm.computeReadLikelihoodGivenHaplotypeLog10( refBases, readBases, Utils.dupBytes(baseQual, readBases.length), Utils.dupBytes(insQual, readBases.length), @@ -447,7 +447,7 @@ public class PairHMMUnitTest extends BaseTest { byte delQual = 40; byte gcp = 10; - exactHMM.initialize(readBases.length, refBases.length); + exactHMM.initialize(refBases.length, readBases.length); double d = exactHMM.computeReadLikelihoodGivenHaplotypeLog10( refBases, readBases, Utils.dupBytes(baseQual, readBases.length), Utils.dupBytes(insQual, readBases.length), @@ -455,7 +455,7 @@ public class PairHMMUnitTest extends BaseTest { Utils.dupBytes(gcp, readBases.length), 0, true); //exactHMM.dumpMatrices(); - loglessHMM.initialize(readBases.length, refBases.length); + loglessHMM.initialize(refBases.length, readBases.length); double logless = loglessHMM.computeReadLikelihoodGivenHaplotypeLog10( refBases, readBases, Utils.dupBytes(baseQual, readBases.length), Utils.dupBytes(insQual, readBases.length), @@ -489,7 +489,7 @@ public class PairHMMUnitTest extends BaseTest { final int maxHaplotypeLength = refBases.length + nExtraMaxSize; final int maxReadLength = readBases.length + nExtraMaxSize; - hmm.initialize(maxReadLength, maxHaplotypeLength); + hmm.initialize(maxHaplotypeLength, maxReadLength); double d = hmm.computeReadLikelihoodGivenHaplotypeLog10( refBases, readBases, quals, insQual, @@ -535,7 +535,7 @@ public class PairHMMUnitTest extends BaseTest { final int maxHaplotypeLength = prefix.length() + root1.length(); // the initialization occurs once, at the start of the evalution of reads - hmm.initialize(maxReadLength, maxHaplotypeLength); + hmm.initialize(maxHaplotypeLength, maxReadLength); for ( int prefixStart = prefix.length(); prefixStart >= 0; prefixStart-- ) { final String myPrefix = prefix.substring(prefixStart, prefix.length()); @@ -633,7 +633,7 @@ public class PairHMMUnitTest extends BaseTest { byte[] refBases = "AAAT".getBytes(); byte[] baseQuals = Utils.dupBytes((byte)30, readBases.length); - hmm.initialize(2, 3); + hmm.initialize(3, 2); double d = hmm.computeReadLikelihoodGivenHaplotypeLog10( refBases, readBases, baseQuals, baseQuals, baseQuals, baseQuals, 0, true); } diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java index c9d364aac..62793bc54 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java @@ -64,8 +64,8 @@ public class Log10PairHMM extends PairHMM { * {@inheritDoc} */ @Override - public void initialize( final int readMaxLength, final int haplotypeMaxLength) { - super.initialize(readMaxLength, haplotypeMaxLength); + public void initialize( final int haplotypeMaxLength, final int readMaxLength) { + super.initialize(haplotypeMaxLength, readMaxLength); for( int iii=0; iii < X_METRIC_MAX_LENGTH; iii++ ) { Arrays.fill(matchMetricArray[iii], Double.NEGATIVE_INFINITY); diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java index f898faaf3..e590d1df8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java @@ -61,10 +61,10 @@ public abstract class PairHMM { /** * Initialize this PairHMM, making it suitable to run against a read and haplotype with given lengths - * @param readMaxLength the max length of reads we want to use with this PairHMM * @param haplotypeMaxLength the max length of haplotypes we want to use with this PairHMM + * @param readMaxLength the max length of reads we want to use with this PairHMM */ - public void initialize( final int readMaxLength, final int haplotypeMaxLength ) { + public void initialize( final int haplotypeMaxLength, final int readMaxLength ) { if ( readMaxLength <= 0 ) throw new IllegalArgumentException("READ_MAX_LENGTH must be > 0 but got " + readMaxLength); if ( haplotypeMaxLength <= 0 ) throw new IllegalArgumentException("HAPLOTYPE_MAX_LENGTH must be > 0 but got " + haplotypeMaxLength); From 396b7e093307a21008b80645ee504f8b2d7d600b Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 25 Feb 2013 14:58:17 -0500 Subject: [PATCH 2/2] Fixed the intermittent PairHMM unit test failure. The issue here is that the OptimizedLikelihoodTestProvider uses the same basic underlying class as the BasicLikelihoodTestProvider and we were using the BasicTestProvider functionality to pull out tests of that class; so if the optimized tests were run first we were unintentionally running those same tests again with the basic ones (but expecting different results). --- .../sting/utils/pairhmm/PairHMMUnitTest.java | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java index 64819c245..c94674c98 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java @@ -82,11 +82,12 @@ public class PairHMMUnitTest extends BaseTest { // // -------------------------------------------------------------------------------- - private class BasicLikelihoodTestProvider extends TestDataProvider { + private class BasicLikelihoodTestProvider { final String ref, read; final byte[] refBasesWithContext, readBasesWithContext; final int baseQual, insQual, delQual, gcp; final int expectedQual; + final boolean left, right; final static String CONTEXT = "ACGTAATGACGATTGCA"; final static String LEFT_FLANK = "GATTTATCATCGAGTCTGC"; final static String RIGHT_FLANK = "CATGGATCGTTATCAGCTATCTCGAGGGATTCACTTAACAGTTTTA"; @@ -96,7 +97,6 @@ public class PairHMMUnitTest extends BaseTest { } public BasicLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp, final boolean left, final boolean right) { - super(BasicLikelihoodTestProvider.class, String.format("ref=%s read=%s b/i/d/c quals = %d/%d/%d/%d l/r flank = %b/%b e[qual]=%d", ref, read, baseQual, insQual, delQual, gcp, left, right, expectedQual)); this.baseQual = baseQual; this.delQual = delQual; this.insQual = insQual; @@ -104,11 +104,18 @@ public class PairHMMUnitTest extends BaseTest { this.read = read; this.ref = ref; this.expectedQual = expectedQual; + this.left = left; + this.right = right; refBasesWithContext = asBytes(ref, left, right); readBasesWithContext = asBytes(read, false, false); } + @Override + public String toString() { + return String.format("ref=%s read=%s b/i/d/c quals = %d/%d/%d/%d l/r flank = %b/%b e[qual]=%d", ref, read, baseQual, insQual, delQual, gcp, left, right, expectedQual); + } + public double expectedLogL(final PairHMM hmm) { return (expectedQual / -10.0) + 0.03 + hmm.getNPotentialXStartsLikelihoodPenaltyLog10(refBasesWithContext.length, readBasesWithContext.length); @@ -178,6 +185,8 @@ public class PairHMMUnitTest extends BaseTest { final List gcps = EXTENSIVE_TESTING ? Arrays.asList(8, 10, 20) : Arrays.asList(10); final List sizes = EXTENSIVE_TESTING ? Arrays.asList(2,3,4,5,7,8,9,10,20,30,35) : Arrays.asList(2); + final List tests = new ArrayList(); + for ( final int baseQual : baseQuals ) { for ( final int indelQual : indelQuals ) { for ( final int gcp : gcps ) { @@ -188,7 +197,7 @@ public class PairHMMUnitTest extends BaseTest { final String ref = new String(new byte[]{refBase}); final String read = new String(new byte[]{readBase}); final int expected = refBase == readBase ? 0 : baseQual; - new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp); + tests.add(new Object[]{new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp)}); } } @@ -204,10 +213,10 @@ public class PairHMMUnitTest extends BaseTest { final String ref = insertionP ? small : big; final String read = insertionP ? big : small; - new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp); - new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, false); - new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, false, true); - new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, true); + tests.add(new Object[]{new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp)}); + tests.add(new Object[]{new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, false)}); + tests.add(new Object[]{new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, false, true)}); + tests.add(new Object[]{new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, true)}); } } } @@ -215,7 +224,7 @@ public class PairHMMUnitTest extends BaseTest { } } - return BasicLikelihoodTestProvider.getTests(BasicLikelihoodTestProvider.class); + return tests.toArray(new Object[][]{}); } @DataProvider(name = "OptimizedLikelihoodTestProvider") @@ -227,6 +236,8 @@ public class PairHMMUnitTest extends BaseTest { final List gcps = EXTENSIVE_TESTING ? Arrays.asList(10, 20, 30) : Arrays.asList(10); final List sizes = EXTENSIVE_TESTING ? Arrays.asList(3, 20, 50, 90, 160) : Arrays.asList(2); + final List tests = new ArrayList(); + for ( final int baseQual : baseQuals ) { for ( final int indelQual : indelQuals ) { for ( final int gcp : gcps ) { @@ -243,14 +254,14 @@ public class PairHMMUnitTest extends BaseTest { for ( final boolean leftFlank : Arrays.asList(true, false) ) for ( final boolean rightFlank : Arrays.asList(true, false) ) - new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, -0, gcp, leftFlank, rightFlank); + tests.add(new Object[]{new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, -0, gcp, leftFlank, rightFlank)}); } } } } } - return BasicLikelihoodTestProvider.getTests(BasicLikelihoodTestProvider.class); + return tests.toArray(new Object[][]{}); } @Test(enabled = !DEBUG, dataProvider = "BasicLikelihoodTestProvider") @@ -262,7 +273,7 @@ public class PairHMMUnitTest extends BaseTest { double expectedLogL = cfg.expectedLogL(hmm); // compare to our theoretical expectation with appropriate tolerance - Assert.assertEquals(actualLogL, expectedLogL, cfg.toleranceFromTheoretical(), "Failed with hmm " + hmm + (hmm instanceof Log10PairHMM ? " (" + ((Log10PairHMM)hmm).isDoingExactLog10Calculations() + ")" : "")); + Assert.assertEquals(actualLogL, expectedLogL, cfg.toleranceFromTheoretical(), "Failed with hmm " + hmm); // compare to the exact reference implementation with appropriate tolerance Assert.assertEquals(actualLogL, exactLogL, cfg.getTolerance(hmm), "Failed with hmm " + hmm); Assert.assertTrue(MathUtils.goodLog10Probability(actualLogL), "Bad log10 likelihood " + actualLogL);