diff --git a/.gitignore b/.gitignore index ac3b931eb..819c120a6 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ out/ /atlassian-ide-plugin.xml maven-metadata-local.xml dependency-reduced-pom.xml +public/external-example/target/ diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngineInstance.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngineInstance.java index 66ea7be03..a5b1716c8 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngineInstance.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngineInstance.java @@ -412,28 +412,28 @@ public class GraphBasedLikelihoodCalculationEngineInstance { for (final GATKSAMRecord read : mayNeedAdjustment) { final Map existingLikelihoods = map.get(read); if (existingLikelihoods != null) { - Allele bestAllele = null; double worstRelativeLikelihood = 0; double bestRelativeLikelihood = Double.NEGATIVE_INFINITY; for (final Map.Entry entry : map.get(read).entrySet()) { final double candidateRelativeLikelihood = entry.getValue(); if (candidateRelativeLikelihood > bestRelativeLikelihood) { - bestAllele = entry.getKey(); bestRelativeLikelihood = candidateRelativeLikelihood; } if (!Double.isInfinite(candidateRelativeLikelihood) && worstRelativeLikelihood > candidateRelativeLikelihood) worstRelativeLikelihood = candidateRelativeLikelihood; } - - worstRelativeLikelihood += UNREPORTED_HAPLOTYPE_LIKELIHOOD_PENALTY; - if (bestAllele == null) - throw new IllegalStateException("No best allele for read " + read.getReadName()); + if (Double.isInfinite(bestRelativeLikelihood)) { + bestRelativeLikelihood = 0; + worstRelativeLikelihood = 0; + } else { + worstRelativeLikelihood += UNREPORTED_HAPLOTYPE_LIKELIHOOD_PENALTY; + } final double bestLikelihood = 0.0; // the best becomes zero. maxAlternative.put(read, bestLikelihood); for (final Map.Entry entry : alleleVersions.entrySet()) { final Allele a = entry.getValue(); final Double relativeLikelihoodO = existingLikelihoods.get(a); - final double relativeLikelihood = relativeLikelihoodO == null ? worstRelativeLikelihood : relativeLikelihoodO; + final double relativeLikelihood = relativeLikelihoodO == null || Double.isInfinite(relativeLikelihoodO) ? worstRelativeLikelihood : relativeLikelihoodO; final double likelihood = relativeLikelihood - bestRelativeLikelihood + bestLikelihood; if (likelihood > 0) throw new IllegalStateException("Likelihood larger than 1 with read " + read.getReadName()); @@ -628,7 +628,7 @@ public class GraphBasedLikelihoodCalculationEngineInstance { // Complete the read segment cost with the corresponding path bases for (final Route p : acrossBlockPaths) { - final ReadSegmentCost readSegmentCost = new ReadSegmentCost(read, p, Double.NaN); + final ReadSegmentCost readSegmentCost = new ReadSegmentCost(read, p, 0); pathBases[nextPath++] = readSegmentCost.bases = eventBlockPathBases(p, includePathEnds); pathSizes.add(readSegmentCost.bases.length); readSegmentCosts.add(readSegmentCost); @@ -652,8 +652,6 @@ public class GraphBasedLikelihoodCalculationEngineInstance { readSegmentCost.path = new Route<>(readSegmentCost.path,appendVertex); addReadSegmentCost(destination,readSegmentCost); } - - } /** diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index b6eddab51..871ef78b7 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -1151,7 +1151,6 @@ public class HaplotypeCaller extends ActiveRegionWalker, In for( final GATKSAMRecord read : reads ) { returnMap.get(read.getReadGroup().getSample()).add(read); } - return returnMap; } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReadCost.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReadCost.java index d5f62a6a3..6fc784799 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReadCost.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReadCost.java @@ -45,6 +45,7 @@ */ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Comparator; @@ -53,6 +54,10 @@ import java.util.Comparator; * A pair read-likelihood (cost). */ public class ReadCost { + + /** + * Reference to the read record this cost makes reference to. + */ public final GATKSAMRecord read; /** @@ -71,13 +76,12 @@ public class ReadCost { */ public ReadCost(final GATKSAMRecord r, final double initialCost) { if (r == null) throw new NullPointerException(); - if (Double.isNaN(initialCost) || Double.isInfinite(initialCost) || initialCost > 0) + if (!MathUtils.goodLog10Probability(initialCost)) throw new IllegalArgumentException("initial cost must be a finite 0 or negative value (" + initialCost + ")"); read = r; cost = initialCost; } - /** * Comparator used to sort ReadCosts */ @@ -90,15 +94,15 @@ public class ReadCost { } }; - /** * Add to the cost. * @param value value to add. */ public void addCost(final double value) { - if (cost + value > 0) - throw new IllegalArgumentException("value brings cost over 0. Current cost " + cost + " value " + value); + final double previousCost = cost; cost += value; + if (!MathUtils.goodLog10Probability(cost)) + throw new IllegalArgumentException("invalid log10 likelihood value (" + cost + ") after adding (" + value + ") to (" + previousCost + ")"); } /** @@ -108,5 +112,4 @@ public class ReadCost { public double getCost() { return cost; } - } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReadSegmentCost.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReadSegmentCost.java index b5544f1a2..b362dcb76 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReadSegmentCost.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/ReadSegmentCost.java @@ -45,10 +45,10 @@ */ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; -import com.google.java.contract.Requires; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.MultiSampleEdge; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.Route; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading.MultiDeBruijnVertex; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.concurrent.atomic.AtomicLong; @@ -63,7 +63,17 @@ import java.util.concurrent.atomic.AtomicLong; */ class ReadSegmentCost { + /** + * Reference to the path the cost makes reference to. Public and writable for convenience, notice that this is not + * a public class. + */ public Route path; + + + /** + * Reference to the read the cost makes reference to. Public and writable for convenience, notice that this is not + * a public class. + */ public GATKSAMRecord read; /** @@ -78,23 +88,38 @@ class ReadSegmentCost { /** * Construct a new path cost. + * * @param read the corresponding read. * @param path the corresponding path. * @param cost initial cost estimate. Might be updated later. */ - @Requires("route != null") public ReadSegmentCost(final GATKSAMRecord read, - final Route path, double cost) { + final Route path, final double cost) { + if (read == null) + throw new IllegalArgumentException("the read provided cannot be null"); + if (path == null) + throw new IllegalArgumentException("the path provided cannot be null"); this.read = read; this.path = path; setCost(cost); } + /** + * Returns the cost of a read vs a haplotype. + * + * @return a valid log10 likelihood + */ public double getCost() { return cost; } + /** + * Changes the cost of the current path vs read combination. + * @param value + */ public void setCost(final double value) { + if (!MathUtils.goodLog10Probability(value)) + throw new IllegalArgumentException("infinite cost are not allowed"); cost = value; } @@ -110,7 +135,7 @@ class ReadSegmentCost { /** * Returns the this path-cost unique identifier. - * @return + * @return never {@code unique}. */ public long uniqueId() { if (uniqueId == null) diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/FastLoglessPairHMM.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/FastLoglessPairHMM.java index 72d5c9472..f6c746b32 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/FastLoglessPairHMM.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/FastLoglessPairHMM.java @@ -62,7 +62,6 @@ import static org.broadinstitute.sting.utils.pairhmm.PairHMMModel.*; */ public class FastLoglessPairHMM extends LoglessPairHMM implements FlexibleHMM { - /** * Initial read length capacity. */ @@ -89,6 +88,10 @@ public class FastLoglessPairHMM extends LoglessPairHMM implements FlexibleHMM { private int maxToCol; private int haplotypeLength; + private double[][] logTransition; + + private boolean indelToIndelIsConstant; + /** * Returns the currently loaded read base qualities. * @@ -140,7 +143,6 @@ public class FastLoglessPairHMM extends LoglessPairHMM implements FlexibleHMM { return readGepQuals; } - /** * Creates a new pair-hmm calculator instance give the gap continuation penalty. * @@ -374,8 +376,7 @@ public class FastLoglessPairHMM extends LoglessPairHMM implements FlexibleHMM { */ public void loadRead(final byte[] readBases, final byte[] readQuals, final byte[] readInsQuals, final byte[] readDelQuals, int mq) { // TODO This is a copy&paste from PairHMM*Engine read data preparation code. - // TODO It is simply to difficult to share the code without changing that class and I don't want - // TODO to do so for now. + // TODO It is simply to difficult to share the code without changing that class if (readBases.length != readQuals.length) throw new IllegalArgumentException("the read quality array length does not match the read base array length"); if (readBases.length != readInsQuals.length) throw new IllegalArgumentException("the read insert quality array length does not match the read base array length"); if (readBases.length != readDelQuals.length) throw new IllegalArgumentException("the read deletion quality length does not match the read base array length"); @@ -402,12 +403,28 @@ public class FastLoglessPairHMM extends LoglessPairHMM implements FlexibleHMM { this.readInsQuals = readInsQuals; this.readDelQuals = readDelQuals; this.readGepQuals = overallGCP; + initializeProbabilities(transition,readInsQuals, readDelQuals, overallGCP); + PairHMMModel.qualToTransProbsLog10(logTransition,readInsQuals,readDelQuals,overallGCP); + + indelToIndelIsConstant = true; + for (final double d : overallGCP) + if (d != overallGCP[0]) { + indelToIndelIsConstant = false; + break; + } + if (haplotypeBases != null) fillPriorsTable(0); cachedResults.clear(); } + @Override + public void initialize( final int readMaxLength, final int haplotypeMaxLength ) { + super.initialize(readMaxLength,haplotypeMaxLength); + logTransition = PairHMMModel.createTransitionMatrix(readMaxLength); + } + @Override public void loadHaplotypeBases(final byte[] haplotypeBases) { if (readBases == null) @@ -587,30 +604,27 @@ public class FastLoglessPairHMM extends LoglessPairHMM implements FlexibleHMM { * Fast likelihood when the pair-hmm represents a deletion in the read. */ private double calculateLocalLikelihoodDeletion(final int readStart, final int hapStart, final int hapEnd) { - double result = INITIAL_CONDITION; - if (readStart > 0) { // no penalty if at the beginning. - result *= transition[readStart][matchToDeletion]; - result *= - StrictMath.pow(transition[readStart][deletionToDeletion],hapEnd - hapStart - 1); - result *= transition[readStart][indelToMatch]; - } - return StrictMath.log10(result) - INITIAL_CONDITION_LOG10; + if (readStart == 0 || readStart >= readBases.length) // Deletion at the beginning or end not have a cost. + return 0; + return logTransition[readStart + 1][matchToDeletion] + + logTransition[readStart + 1][deletionToDeletion] * (hapEnd - hapStart - 1); } - /** * Fast likelihood when the pair-hmm represents a insertion in the read. */ private double calculateLocalLikelihoodInsertion(final int readStart, final int readEnd) { - double result = INITIAL_CONDITION; - result *= transition[readStart + 1][matchToInsertion]; - for (int i = readStart + 1; i < readEnd; i++) { - result *= transition[i + 1][insertionToInsertion]; - } - if (readEnd < readBases.length) { - result *= transition[readEnd + 1][indelToMatch]; - } - return StrictMath.log10(result) - INITIAL_CONDITION_LOG10; + double result = logTransition[readStart + 1][matchToInsertion]; + + if (indelToIndelIsConstant) + result += logTransition[readStart + 1][insertionToInsertion] * (readEnd - readStart - 1); + else + for (int i = readStart + 1; i < readEnd; i++) + result += logTransition[i + 1][insertionToInsertion]; + + if (readEnd < readBases.length) + result += logTransition[readEnd + 1][indelToMatch]; + return result; } /** diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 7271b2642..25df0703b 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -99,12 +99,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerGraphBasedSingleSample() { - HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "83fe0694621bc1e0240f6f79eb6d6999"); + HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "f87bb723bfb9b8f604db1d53624d24e3"); } @Test public void testHaplotypeCallerGraphBasedMultiSample() { - HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "d45b2b26434dd3bd48df5a43b3d2954a"); + HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "891295d8795c0ac894a2aec873e2a1d4"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGSGraphBased() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("6f6213bfc62e9cd9db56a7277ebe04e0")); + Arrays.asList("d2bb1a904ad419c565b00c3ac98048d1")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java index 057c67a55..972c9fc9d 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java @@ -55,6 +55,7 @@ abstract class N2MemoryPairHMM extends PairHMM { * @param haplotypeMaxLength the max length of haplotypes we want to use with this PairHMM * @param readMaxLength the max length of reads we want to use with this PairHMM */ + @Override public void initialize( final int readMaxLength, final int haplotypeMaxLength ) { super.initialize(readMaxLength, haplotypeMaxLength);