From 574d5b467fccaf48faedf8e1108afc5f930750f8 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Sun, 9 Dec 2012 02:09:34 -0500 Subject: [PATCH] Bug fix for indel HMM: protect against situation where long reads (e.g. Sanger) in a pileup can lead to a read starting after the haplotype end for a given haplotype. --- .../afcalc/AFCalcPerformanceTest.java | 4 +- .../afcalc/AFCalcResultUnitTest.java | 6 +- .../genotyper/afcalc/AFCalcUnitTest.java | 2 +- ...dentAllelesDiploidExactAFCalcUnitTest.java | 2 +- .../sting/gatk/report/GATKReport.java | 2 +- .../genotyper/afcalc/AFCalcResult.java | 38 ++++++---- .../genotyper/afcalc/ExactCallLogger.java | 10 +-- .../IndependentAllelesDiploidExactAFCalc.java | 10 +-- .../afcalc/OriginalDiploidExactAFCalc.java | 6 +- .../genotyper/afcalc/StateTracker.java | 8 +-- .../indels/PairHMMIndelErrorModel.java | 7 +- .../traversals/TraverseActiveRegionsTest.java | 69 ++++++++++++++++--- 12 files changed, 113 insertions(+), 51 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java index e9ed6b153..0a3512aa6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java @@ -240,8 +240,8 @@ public class AFCalcPerformanceTest { if ( a.isNonReference() ) { final String warningmeMLE = call.originalCall.getAlleleCountAtMLE(a) != result.getAlleleCountAtMLE(a) ? " DANGER-MLE-DIFFERENT" : ""; logger.info("\t\t MLE " + a + ": " + call.originalCall.getAlleleCountAtMLE(a) + " vs " + result.getAlleleCountAtMLE(a) + warningmeMLE); - final String warningmePost = call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) == 0 && result.getLog10PosteriorOfAFGt0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : ""; - logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFGt0ForAllele(a) + warningmePost); + final String warningmePost = call.originalCall.getLog10PosteriorOfAFEq0ForAllele(a) == 0 && result.getLog10PosteriorOfAFEq0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : ""; + logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFEq0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFEq0ForAllele(a) + warningmePost); } } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java index 96e055e92..016926e12 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java @@ -83,8 +83,8 @@ public class AFCalcResultUnitTest extends BaseTest { List tests = new ArrayList(); final List pValues = new LinkedList(); - for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999) ) - for ( final double espilon : Arrays.asList(-1e-5, 0.0, 1e-5) ) + for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999, 1 - 1e-4, 1 - 1e-5, 1 - 1e-6) ) + for ( final double espilon : Arrays.asList(-1e-7, 0.0, 1e-7) ) pValues.add(p + espilon); for ( final double pNonRef : pValues ) { @@ -106,7 +106,7 @@ public class AFCalcResultUnitTest extends BaseTest { alleles, MathUtils.normalizeFromLog10(new double[]{1 - pNonRef, pNonRef}, true, false), log10Even, - Collections.singletonMap(C, Math.log10(pNonRef))); + Collections.singletonMap(C, Math.log10(1 - pNonRef))); } @Test(enabled = true, dataProvider = "TestIsPolymorphic") diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java index 2d346e548..7ee909fe0 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java @@ -681,7 +681,7 @@ public class AFCalcUnitTest extends BaseTest { // must be getCalledChrCount because we cannot ensure that the VC made has our desired ACs Assert.assertEquals(result.getAlleleCountAtMLE(alt), vc.getCalledChrCount(alt)); - Assert.assertEquals(result.isPolymorphic(alt, -1), (boolean)expectedPoly.get(i), "isPolymorphic for allele " + alt + " " + result.getLog10PosteriorOfAFGt0ForAllele(alt)); + Assert.assertEquals(result.isPolymorphic(alt, -1), (boolean)expectedPoly.get(i), "isPolymorphic for allele " + alt + " " + result.getLog10PosteriorOfAFEq0ForAllele(alt)); } } } \ No newline at end of file diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java index 391c99990..663471106 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java @@ -148,7 +148,7 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest { for ( int i = 0; i < log10LAlleles.size(); i++ ) { final double log10LAllele1 = log10LAlleles.get(i); final double[] L1 = MathUtils.normalizeFromLog10(new double[]{log10LAllele1, 0.0}, true); - final AFCalcResult result1 = new AFCalcResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, 0.0)); + final AFCalcResult result1 = new AFCalcResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, -10000.0)); originalPriors.add(result1); pNonRefN.add(log10pNonRef*(i+1)); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index 7ae2bb453..605a6680f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -343,7 +343,7 @@ public class GATKReport { GATKReportTable table = tables.firstEntry().getValue(); if ( table.getNumColumns() != values.length ) - throw new ReviewedStingException("The number of arguments in writeRow() " + values.length + " must match the number of columns in the table" + table.getNumColumns()); + throw new ReviewedStingException("The number of arguments in writeRow (" + values.length + ") must match the number of columns in the table (" + table.getNumColumns() + ")" ); final int rowIndex = table.getNumRows(); for ( int i = 0; i < values.length; i++ ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java index dbb0e8cdd..d6a5cb16d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java @@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.variantcontext.Allele; @@ -52,7 +51,7 @@ public class AFCalcResult { private final double[] log10PriorsOfAC; private final double[] log10PosteriorsOfAC; - private final Map log10pNonRefByAllele; + private final Map log10pRefByAllele; /** * The AC values for all ALT alleles at the MLE @@ -74,16 +73,16 @@ public class AFCalcResult { final List allelesUsedInGenotyping, final double[] log10LikelihoodsOfAC, final double[] log10PriorsOfAC, - final Map log10pNonRefByAllele) { + final Map log10pRefByAllele) { if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.size() < 1 ) throw new IllegalArgumentException("allelesUsedInGenotyping must be non-null list of at least 1 value " + allelesUsedInGenotyping); if ( alleleCountsOfMLE == null ) throw new IllegalArgumentException("alleleCountsOfMLE cannot be null"); if ( alleleCountsOfMLE.length != allelesUsedInGenotyping.size() - 1) throw new IllegalArgumentException("alleleCountsOfMLE.length " + alleleCountsOfMLE.length + " != allelesUsedInGenotyping.size() " + allelesUsedInGenotyping.size()); if ( nEvaluations < 0 ) throw new IllegalArgumentException("nEvaluations must be >= 0 but saw " + nEvaluations); if ( log10LikelihoodsOfAC.length != 2 ) throw new IllegalArgumentException("log10LikelihoodsOfAC must have length equal 2"); if ( log10PriorsOfAC.length != 2 ) throw new IllegalArgumentException("log10PriorsOfAC must have length equal 2"); - if ( log10pNonRefByAllele == null ) throw new IllegalArgumentException("log10pNonRefByAllele cannot be null"); - if ( log10pNonRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pNonRefByAllele has the wrong number of elements: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping); - if ( ! allelesUsedInGenotyping.containsAll(log10pNonRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pNonRefByAllele doesn't contain all of the alleles used in genotyping: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping); + if ( log10pRefByAllele == null ) throw new IllegalArgumentException("log10pRefByAllele cannot be null"); + if ( log10pRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pRefByAllele has the wrong number of elements: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping); + if ( ! allelesUsedInGenotyping.containsAll(log10pRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pRefByAllele doesn't contain all of the alleles used in genotyping: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping); if ( ! MathUtils.goodLog10ProbVector(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES, false) ) throw new IllegalArgumentException("log10LikelihoodsOfAC are bad " + Utils.join(",", log10LikelihoodsOfAC)); if ( ! MathUtils.goodLog10ProbVector(log10PriorsOfAC, LOG_10_ARRAY_SIZES, true) ) throw new IllegalArgumentException("log10priors are bad " + Utils.join(",", log10PriorsOfAC)); @@ -94,7 +93,7 @@ public class AFCalcResult { this.log10LikelihoodsOfAC = Arrays.copyOf(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES); this.log10PriorsOfAC = Arrays.copyOf(log10PriorsOfAC, LOG_10_ARRAY_SIZES); this.log10PosteriorsOfAC = computePosteriors(log10LikelihoodsOfAC, log10PriorsOfAC); - this.log10pNonRefByAllele = new HashMap(log10pNonRefByAllele); + this.log10pRefByAllele = new HashMap(log10pRefByAllele); } /** @@ -104,7 +103,7 @@ public class AFCalcResult { * @return */ public AFCalcResult withNewPriors(final double[] log10PriorsOfAC) { - return new AFCalcResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pNonRefByAllele); + return new AFCalcResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele); } /** @@ -219,7 +218,7 @@ public class AFCalcResult { public String toString() { final List byAllele = new LinkedList(); for ( final Allele a : getAllelesUsedInGenotyping() ) - if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFGt0ForAllele(a))); + if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFEq0ForAllele(a))); return String.format("AFCalc%n\t\tlog10PosteriorOfAFGT0=%.2f%n\t\t%s", getLog10LikelihoodOfAFGT0(), Utils.join("\n\t\t", byAllele)); } @@ -237,7 +236,7 @@ public class AFCalcResult { */ @Requires("MathUtils.goodLog10Probability(log10minPNonRef)") public boolean isPolymorphic(final Allele allele, final double log10minPNonRef) { - return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef; + return getLog10PosteriorOfAFEq0ForAllele(allele) < log10minPNonRef; } /** @@ -245,7 +244,7 @@ public class AFCalcResult { */ public boolean isPolymorphicPhredScaledQual(final Allele allele, final double minPNonRefPhredScaledQual) { if ( minPNonRefPhredScaledQual < 0 ) throw new IllegalArgumentException("phredScaledQual " + minPNonRefPhredScaledQual + " < 0 "); - final double log10Threshold = Math.log10(QualityUtils.qualToProb(minPNonRefPhredScaledQual)); + final double log10Threshold = minPNonRefPhredScaledQual / -10; return isPolymorphic(allele, log10Threshold); } @@ -263,7 +262,16 @@ public class AFCalcResult { } /** - * Returns the log10 probability that allele is segregating + * Returns the log10 probability that allele is not segregating + * + * Note that this function is p not segregating so that we can store + * internally the log10 value of AF == 0, which grows very quickly + * negative and yet has sufficient resolution for high confidence tests. + * For example, if log10pRef == -100, not an unreasonably high number, + * if we tried to store log10pNonRef we'd be looking at 1 - 10^-100, which + * quickly underflows to 1. So the logic here is backward from what + * you really want (the p of segregating) but we do that for numerical + * reasons * * Unlike the sites-level annotation, this calculation is specific to allele, and can be * used to separately determine how much evidence there is that allele is independently @@ -272,11 +280,11 @@ public class AFCalcResult { * evidence for one allele but not so much for any other allele * * @param allele the allele we're interested in, must be in getAllelesUsedInGenotyping - * @return the log10 probability that allele is segregating at this site + * @return the log10 probability that allele is not segregating at this site */ @Ensures("MathUtils.goodLog10Probability(result)") - public double getLog10PosteriorOfAFGt0ForAllele(final Allele allele) { - final Double log10pNonRef = log10pNonRefByAllele.get(allele); + public double getLog10PosteriorOfAFEq0ForAllele(final Allele allele) { + final Double log10pNonRef = log10pRefByAllele.get(allele); if ( log10pNonRef == null ) throw new IllegalArgumentException("Unknown allele " + allele); return log10pNonRef; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java index f13fe4429..b138ddf70 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java @@ -77,7 +77,7 @@ public class ExactCallLogger implements Cloneable { for ( final Allele allele : result.getAllelesUsedInGenotyping() ) { if ( allele.isNonReference() ) { printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele)); - printCallElement(vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele)); + printCallElement(vc, "pRefByAllele", allele, result.getLog10PosteriorOfAFEq0ForAllele(allele)); } } @@ -123,7 +123,7 @@ public class ExactCallLogger implements Cloneable { final double[] posteriors = new double[2]; final double[] priors = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true); final List mle = new ArrayList(); - final Map log10pNonRefByAllele = new HashMap(); + final Map log10pRefByAllele = new HashMap(); long runtimeNano = -1; GenomeLoc currentLoc = null; @@ -148,7 +148,7 @@ public class ExactCallLogger implements Cloneable { builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); builder.genotypes(genotypes); final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{})); - final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele); + final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pRefByAllele); calls.add(new ExactCall(builder.make(), runtimeNano, result)); } break; @@ -165,9 +165,9 @@ public class ExactCallLogger implements Cloneable { posteriors[1] = Double.valueOf(value); } else if (variable.equals("MLE")) { mle.add(Integer.valueOf(value)); - } else if (variable.equals("pNonRefByAllele")) { + } else if (variable.equals("pRefByAllele")) { final Allele a = Allele.create(key); - log10pNonRefByAllele.put(a, Double.valueOf(value)); + log10pRefByAllele.put(a, Double.valueOf(value)); } else if (variable.equals("runtime.nano")) { runtimeNano = Long.valueOf(value); } else { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java index d0b801a20..937ef2ffc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java @@ -125,8 +125,8 @@ import java.util.*; */ final List supporting; - private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map log10pNonRefByAllele, List supporting) { - super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pNonRefByAllele); + private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map log10pRefByAllele, List supporting) { + super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele); this.supporting = supporting; } } @@ -323,7 +323,7 @@ import java.util.*; final int nAltAlleles = sortedResultsWithThetaNPriors.size(); final int[] alleleCountsOfMLE = new int[nAltAlleles]; final double[] log10PriorsOfAC = new double[2]; - final Map log10pNonRefByAllele = new HashMap(nAltAlleles); + final Map log10pRefByAllele = new HashMap(nAltAlleles); // the sum of the log10 posteriors for AF == 0 and AF > 0 to determine joint probs double log10PosteriorOfACEq0Sum = 0.0; @@ -348,7 +348,7 @@ import java.util.*; log10PosteriorOfACGt0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0(); // bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior - log10pNonRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0()); + log10pRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0()); // trivial -- update the number of evaluations nEvaluations += sortedResultWithThetaNPriors.nEvaluations; @@ -384,6 +384,6 @@ import java.util.*; MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true), // priors incorporate multiple alt alleles, must be normalized MathUtils.normalizeFromLog10(log10PriorsOfAC, true), - log10pNonRefByAllele, sortedResultsWithThetaNPriors); + log10pRefByAllele, sortedResultsWithThetaNPriors); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java index fc26111e0..67cc79646 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java @@ -30,13 +30,13 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc { final double[] log10Priors = new double[]{log10AlleleFrequencyPriors[0], MathUtils.log10sumLog10(log10AlleleFrequencyPriors, 1)}; final double[] log10Posteriors = MathUtils.vectorSum(log10Likelihoods, log10Priors); - final double log10PNonRef = log10Posteriors[1] > log10Posteriors[0] ? 0.0 : MathUtils.LOG10_P_OF_ZERO; - final Map log10pNonRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PNonRef); + final double log10PRef = log10Posteriors[1] > log10Posteriors[0] ? MathUtils.LOG10_P_OF_ZERO : 0.0; + final Map log10pRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PRef); return new AFCalcResult(new int[]{mleK}, 0, vc.getAlleles(), MathUtils.normalizeFromLog10(log10Likelihoods, true), MathUtils.normalizeFromLog10(log10Priors, true), - log10pNonRefByAllele); + log10pRefByAllele); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java index b82ec1d29..ad6361a3f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java @@ -165,14 +165,14 @@ final class StateTracker { final double[] log10Likelihoods = MathUtils.normalizeFromLog10(new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero()}, true); final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true); - final Map log10pNonRefByAllele = new HashMap(allelesUsedInGenotyping.size()); + final Map log10pRefByAllele = new HashMap(allelesUsedInGenotyping.size()); for ( int i = 0; i < subACOfMLE.length; i++ ) { final Allele allele = allelesUsedInGenotyping.get(i+1); - final double log10PNonRef = alleleCountsOfMAP[i] > 0 ? 0 : -10000; // TODO -- a total hack but in effect what the old behavior was - log10pNonRefByAllele.put(allele, log10PNonRef); + final double log10PRef = alleleCountsOfMAP[i] > 0 ? -10000 : 0; // TODO -- a total hack but in effect what the old behavior was + log10pRefByAllele.put(allele, log10PRef); } - return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pNonRefByAllele); + return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pRefByAllele); } // -------------------------------------------------------------------------------- diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 7b797432d..848aaf8a3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -287,6 +287,9 @@ public class PairHMMIndelErrorModel { if (startLocationInRefForHaplotypes < ref.getWindow().getStart()) { startLocationInRefForHaplotypes = ref.getWindow().getStart(); // read starts before haplotype: read will have to be cut numStartSoftClippedBases += ref.getWindow().getStart() - startLocationInRefForHaplotypes; } + else if (startLocationInRefForHaplotypes > ref.getWindow().getStop()) { + startLocationInRefForHaplotypes = ref.getWindow().getStop(); // read starts after haplotype: read will have to be clipped completely; + } if (stopLocationInRefForHaplotypes > ref.getWindow().getStop()) { stopLocationInRefForHaplotypes = ref.getWindow().getStop(); // check also if end of read will go beyond reference context @@ -338,6 +341,8 @@ public class PairHMMIndelErrorModel { if (startLocationInRefForHaplotypes < haplotype.getStartPosition()) startLocationInRefForHaplotypes = haplotype.getStartPosition(); + else if (startLocationInRefForHaplotypes > haplotype.getStopPosition()) + startLocationInRefForHaplotypes = haplotype.getStopPosition(); final long indStart = startLocationInRefForHaplotypes - haplotype.getStartPosition(); final long indStop = stopLocationInRefForHaplotypes - haplotype.getStartPosition(); @@ -347,8 +352,6 @@ public class PairHMMIndelErrorModel { System.out.format("indStart: %d indStop: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d C:%s\n", indStart, indStop, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength(), read.getCigar().toString()); - - final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(), (int)indStart, (int)indStop); diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java index a65b0cb45..69907d485 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java @@ -109,12 +109,16 @@ public class TraverseActiveRegionsTest extends BaseTest { dictionary = reference.getSequenceDictionary(); genomeLocParser = new GenomeLocParser(dictionary); + // TODO: test shard boundaries + intervals = new ArrayList(); intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20)); intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999)); intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999)); intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999)); intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000)); + intervals.add(genomeLocParser.createGenomeLoc("1", 249250600, 249250621)); + intervals.add(genomeLocParser.createGenomeLoc("2", 1, 100)); intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100)); intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList(); @@ -126,6 +130,7 @@ public class TraverseActiveRegionsTest extends BaseTest { reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008)); reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990)); reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000)); + reads.add(buildSAMRecord("end_of_chr1", "1", 249250600, 249250700)); reads.add(buildSAMRecord("simple20", "20", 10025, 10075)); // required by LocusIteratorByState, and I prefer to list them in test case order above @@ -139,8 +144,6 @@ public class TraverseActiveRegionsTest extends BaseTest { List activeIntervals = getIsActiveIntervals(walker, intervals); // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call verifyEqualIntervals(intervals, activeIntervals); - - // TODO: more tests and edge cases } private List getIsActiveIntervals(DummyActiveRegionWalker walker, List intervals) { @@ -171,8 +174,6 @@ public class TraverseActiveRegionsTest extends BaseTest { Collection activeRegions = getActiveRegions(walker, intervals).values(); verifyActiveRegionCoverage(intervals, activeRegions); - - // TODO: more tests and edge cases } private void verifyActiveRegionCoverage(List intervals, Collection activeRegions) { @@ -231,6 +232,7 @@ public class TraverseActiveRegionsTest extends BaseTest { // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999 // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999 // outside_intervals: none + // end_of_chr1: Primary in 1:249250600-249250621 // simple20: Primary in 20:10000-10100 Map activeRegions = getActiveRegions(walker, intervals); @@ -245,6 +247,7 @@ public class TraverseActiveRegionsTest extends BaseTest { verifyReadNotPlaced(region, "boundary_unequal"); verifyReadNotPlaced(region, "extended_and_np"); verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "end_of_chr1"); verifyReadNotPlaced(region, "simple20"); region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999)); @@ -256,6 +259,7 @@ public class TraverseActiveRegionsTest extends BaseTest { getRead(region, "boundary_unequal"); getRead(region, "extended_and_np"); verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "end_of_chr1"); verifyReadNotPlaced(region, "simple20"); region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); @@ -267,6 +271,19 @@ public class TraverseActiveRegionsTest extends BaseTest { verifyReadNotPlaced(region, "boundary_unequal"); verifyReadNotPlaced(region, "extended_and_np"); verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "end_of_chr1"); + verifyReadNotPlaced(region, "simple20"); + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621)); + + verifyReadNotPlaced(region, "simple"); + verifyReadNotPlaced(region, "overlap_equal"); + verifyReadNotPlaced(region, "overlap_unequal"); + verifyReadNotPlaced(region, "boundary_equal"); + verifyReadNotPlaced(region, "boundary_unequal"); + verifyReadNotPlaced(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + getRead(region, "end_of_chr1"); verifyReadNotPlaced(region, "simple20"); region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100)); @@ -278,9 +295,8 @@ public class TraverseActiveRegionsTest extends BaseTest { verifyReadNotPlaced(region, "boundary_unequal"); verifyReadNotPlaced(region, "extended_and_np"); verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "end_of_chr1"); getRead(region, "simple20"); - - // TODO: more tests and edge cases } @Test @@ -300,6 +316,7 @@ public class TraverseActiveRegionsTest extends BaseTest { // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999 // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999 // outside_intervals: none + // end_of_chr1: Primary in 1:249250600-249250621 // simple20: Primary in 20:10000-10100 Map activeRegions = getActiveRegions(walker, intervals); @@ -314,6 +331,7 @@ public class TraverseActiveRegionsTest extends BaseTest { verifyReadNotPlaced(region, "boundary_unequal"); getRead(region, "extended_and_np"); verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "end_of_chr1"); verifyReadNotPlaced(region, "simple20"); region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999)); @@ -325,6 +343,7 @@ public class TraverseActiveRegionsTest extends BaseTest { getRead(region, "boundary_unequal"); getRead(region, "extended_and_np"); verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "end_of_chr1"); verifyReadNotPlaced(region, "simple20"); region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); @@ -336,6 +355,19 @@ public class TraverseActiveRegionsTest extends BaseTest { getRead(region, "boundary_unequal"); verifyReadNotPlaced(region, "extended_and_np"); verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "end_of_chr1"); + verifyReadNotPlaced(region, "simple20"); + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621)); + + verifyReadNotPlaced(region, "simple"); + verifyReadNotPlaced(region, "overlap_equal"); + verifyReadNotPlaced(region, "overlap_unequal"); + verifyReadNotPlaced(region, "boundary_equal"); + verifyReadNotPlaced(region, "boundary_unequal"); + verifyReadNotPlaced(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + getRead(region, "end_of_chr1"); verifyReadNotPlaced(region, "simple20"); region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100)); @@ -347,9 +379,8 @@ public class TraverseActiveRegionsTest extends BaseTest { verifyReadNotPlaced(region, "boundary_unequal"); verifyReadNotPlaced(region, "extended_and_np"); verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "end_of_chr1"); getRead(region, "simple20"); - - // TODO: more tests and edge cases } @Test @@ -370,6 +401,7 @@ public class TraverseActiveRegionsTest extends BaseTest { // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999 // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999 // outside_intervals: none + // end_of_chr1: Primary in 1:249250600-249250621 // simple20: Primary in 20:10000-10100 Map activeRegions = getActiveRegions(walker, intervals); @@ -384,6 +416,7 @@ public class TraverseActiveRegionsTest extends BaseTest { verifyReadNotPlaced(region, "boundary_unequal"); getRead(region, "extended_and_np"); verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "end_of_chr1"); verifyReadNotPlaced(region, "simple20"); region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999)); @@ -395,6 +428,7 @@ public class TraverseActiveRegionsTest extends BaseTest { getRead(region, "boundary_unequal"); getRead(region, "extended_and_np"); verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "end_of_chr1"); verifyReadNotPlaced(region, "simple20"); region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); @@ -406,6 +440,19 @@ public class TraverseActiveRegionsTest extends BaseTest { getRead(region, "boundary_unequal"); getRead(region, "extended_and_np"); verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "end_of_chr1"); + verifyReadNotPlaced(region, "simple20"); + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621)); + + verifyReadNotPlaced(region, "simple"); + verifyReadNotPlaced(region, "overlap_equal"); + verifyReadNotPlaced(region, "overlap_unequal"); + verifyReadNotPlaced(region, "boundary_equal"); + verifyReadNotPlaced(region, "boundary_unequal"); + verifyReadNotPlaced(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + getRead(region, "end_of_chr1"); verifyReadNotPlaced(region, "simple20"); region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100)); @@ -417,9 +464,13 @@ public class TraverseActiveRegionsTest extends BaseTest { verifyReadNotPlaced(region, "boundary_unequal"); verifyReadNotPlaced(region, "extended_and_np"); verifyReadNotPlaced(region, "outside_intervals"); + verifyReadNotPlaced(region, "end_of_chr1"); getRead(region, "simple20"); + } - // TODO: more tests and edge cases + @Test + public void testUnmappedReads() { + // TODO } private void verifyReadNotPlaced(ActiveRegion region, String readName) {