Merge pull request #580 from broadinstitute/vrr_graphbase_infinite_likelihoods_reprise
Fixed bug using GraphBased due to infinite likelihoods resulting from th...
This commit is contained in:
commit
45c192bb6d
|
|
@ -26,3 +26,4 @@ out/
|
|||
/atlassian-ide-plugin.xml
|
||||
maven-metadata-local.xml
|
||||
dependency-reduced-pom.xml
|
||||
public/external-example/target/
|
||||
|
|
|
|||
|
|
@ -412,28 +412,28 @@ public class GraphBasedLikelihoodCalculationEngineInstance {
|
|||
for (final GATKSAMRecord read : mayNeedAdjustment) {
|
||||
final Map<Allele, Double> existingLikelihoods = map.get(read);
|
||||
if (existingLikelihoods != null) {
|
||||
Allele bestAllele = null;
|
||||
double worstRelativeLikelihood = 0;
|
||||
double bestRelativeLikelihood = Double.NEGATIVE_INFINITY;
|
||||
for (final Map.Entry<Allele, Double> entry : map.get(read).entrySet()) {
|
||||
final double candidateRelativeLikelihood = entry.getValue();
|
||||
if (candidateRelativeLikelihood > bestRelativeLikelihood) {
|
||||
bestAllele = entry.getKey();
|
||||
bestRelativeLikelihood = candidateRelativeLikelihood;
|
||||
}
|
||||
if (!Double.isInfinite(candidateRelativeLikelihood) && worstRelativeLikelihood > candidateRelativeLikelihood)
|
||||
worstRelativeLikelihood = candidateRelativeLikelihood;
|
||||
}
|
||||
|
||||
worstRelativeLikelihood += UNREPORTED_HAPLOTYPE_LIKELIHOOD_PENALTY;
|
||||
if (bestAllele == null)
|
||||
throw new IllegalStateException("No best allele for read " + read.getReadName());
|
||||
if (Double.isInfinite(bestRelativeLikelihood)) {
|
||||
bestRelativeLikelihood = 0;
|
||||
worstRelativeLikelihood = 0;
|
||||
} else {
|
||||
worstRelativeLikelihood += UNREPORTED_HAPLOTYPE_LIKELIHOOD_PENALTY;
|
||||
}
|
||||
final double bestLikelihood = 0.0; // the best becomes zero.
|
||||
maxAlternative.put(read, bestLikelihood);
|
||||
for (final Map.Entry<Haplotype, Allele> entry : alleleVersions.entrySet()) {
|
||||
final Allele a = entry.getValue();
|
||||
final Double relativeLikelihoodO = existingLikelihoods.get(a);
|
||||
final double relativeLikelihood = relativeLikelihoodO == null ? worstRelativeLikelihood : relativeLikelihoodO;
|
||||
final double relativeLikelihood = relativeLikelihoodO == null || Double.isInfinite(relativeLikelihoodO) ? worstRelativeLikelihood : relativeLikelihoodO;
|
||||
final double likelihood = relativeLikelihood - bestRelativeLikelihood + bestLikelihood;
|
||||
if (likelihood > 0)
|
||||
throw new IllegalStateException("Likelihood larger than 1 with read " + read.getReadName());
|
||||
|
|
@ -628,7 +628,7 @@ public class GraphBasedLikelihoodCalculationEngineInstance {
|
|||
|
||||
// Complete the read segment cost with the corresponding path bases
|
||||
for (final Route<MultiDeBruijnVertex, MultiSampleEdge> p : acrossBlockPaths) {
|
||||
final ReadSegmentCost readSegmentCost = new ReadSegmentCost(read, p, Double.NaN);
|
||||
final ReadSegmentCost readSegmentCost = new ReadSegmentCost(read, p, 0);
|
||||
pathBases[nextPath++] = readSegmentCost.bases = eventBlockPathBases(p, includePathEnds);
|
||||
pathSizes.add(readSegmentCost.bases.length);
|
||||
readSegmentCosts.add(readSegmentCost);
|
||||
|
|
@ -652,8 +652,6 @@ public class GraphBasedLikelihoodCalculationEngineInstance {
|
|||
readSegmentCost.path = new Route<>(readSegmentCost.path,appendVertex);
|
||||
addReadSegmentCost(destination,readSegmentCost);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1151,7 +1151,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
for( final GATKSAMRecord read : reads ) {
|
||||
returnMap.get(read.getReadGroup().getSample()).add(read);
|
||||
}
|
||||
|
||||
return returnMap;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@
|
|||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
|
@ -53,6 +54,10 @@ import java.util.Comparator;
|
|||
* A pair read-likelihood (cost).
|
||||
*/
|
||||
public class ReadCost {
|
||||
|
||||
/**
|
||||
* Reference to the read record this cost makes reference to.
|
||||
*/
|
||||
public final GATKSAMRecord read;
|
||||
|
||||
/**
|
||||
|
|
@ -71,13 +76,12 @@ public class ReadCost {
|
|||
*/
|
||||
public ReadCost(final GATKSAMRecord r, final double initialCost) {
|
||||
if (r == null) throw new NullPointerException();
|
||||
if (Double.isNaN(initialCost) || Double.isInfinite(initialCost) || initialCost > 0)
|
||||
if (!MathUtils.goodLog10Probability(initialCost))
|
||||
throw new IllegalArgumentException("initial cost must be a finite 0 or negative value (" + initialCost + ")");
|
||||
read = r;
|
||||
cost = initialCost;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Comparator used to sort ReadCosts
|
||||
*/
|
||||
|
|
@ -90,15 +94,15 @@ public class ReadCost {
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Add to the cost.
|
||||
* @param value value to add.
|
||||
*/
|
||||
public void addCost(final double value) {
|
||||
if (cost + value > 0)
|
||||
throw new IllegalArgumentException("value brings cost over 0. Current cost " + cost + " value " + value);
|
||||
final double previousCost = cost;
|
||||
cost += value;
|
||||
if (!MathUtils.goodLog10Probability(cost))
|
||||
throw new IllegalArgumentException("invalid log10 likelihood value (" + cost + ") after adding (" + value + ") to (" + previousCost + ")");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -108,5 +112,4 @@ public class ReadCost {
|
|||
public double getCost() {
|
||||
return cost;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,10 +45,10 @@
|
|||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.MultiSampleEdge;
|
||||
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.Route;
|
||||
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading.MultiDeBruijnVertex;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
|
@ -63,7 +63,17 @@ import java.util.concurrent.atomic.AtomicLong;
|
|||
*/
|
||||
class ReadSegmentCost {
|
||||
|
||||
/**
|
||||
* Reference to the path the cost makes reference to. Public and writable for convenience, notice that this is not
|
||||
* a public class.
|
||||
*/
|
||||
public Route<MultiDeBruijnVertex, MultiSampleEdge> path;
|
||||
|
||||
|
||||
/**
|
||||
* Reference to the read the cost makes reference to. Public and writable for convenience, notice that this is not
|
||||
* a public class.
|
||||
*/
|
||||
public GATKSAMRecord read;
|
||||
|
||||
/**
|
||||
|
|
@ -78,23 +88,38 @@ class ReadSegmentCost {
|
|||
|
||||
/**
|
||||
* Construct a new path cost.
|
||||
*
|
||||
* @param read the corresponding read.
|
||||
* @param path the corresponding path.
|
||||
* @param cost initial cost estimate. Might be updated later.
|
||||
*/
|
||||
@Requires("route != null")
|
||||
public ReadSegmentCost(final GATKSAMRecord read,
|
||||
final Route<MultiDeBruijnVertex, MultiSampleEdge> path, double cost) {
|
||||
final Route<MultiDeBruijnVertex, MultiSampleEdge> path, final double cost) {
|
||||
if (read == null)
|
||||
throw new IllegalArgumentException("the read provided cannot be null");
|
||||
if (path == null)
|
||||
throw new IllegalArgumentException("the path provided cannot be null");
|
||||
this.read = read;
|
||||
this.path = path;
|
||||
setCost(cost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cost of a read vs a haplotype.
|
||||
*
|
||||
* @return a valid log10 likelihood
|
||||
*/
|
||||
public double getCost() {
|
||||
return cost;
|
||||
}
|
||||
|
||||
/**
|
||||
* Changes the cost of the current path vs read combination.
|
||||
* @param value
|
||||
*/
|
||||
public void setCost(final double value) {
|
||||
if (!MathUtils.goodLog10Probability(value))
|
||||
throw new IllegalArgumentException("infinite cost are not allowed");
|
||||
cost = value;
|
||||
}
|
||||
|
||||
|
|
@ -110,7 +135,7 @@ class ReadSegmentCost {
|
|||
|
||||
/**
|
||||
* Returns the this path-cost unique identifier.
|
||||
* @return
|
||||
* @return never {@code unique}.
|
||||
*/
|
||||
public long uniqueId() {
|
||||
if (uniqueId == null)
|
||||
|
|
|
|||
|
|
@ -62,7 +62,6 @@ import static org.broadinstitute.sting.utils.pairhmm.PairHMMModel.*;
|
|||
*/
|
||||
public class FastLoglessPairHMM extends LoglessPairHMM implements FlexibleHMM {
|
||||
|
||||
|
||||
/**
|
||||
* Initial read length capacity.
|
||||
*/
|
||||
|
|
@ -89,6 +88,10 @@ public class FastLoglessPairHMM extends LoglessPairHMM implements FlexibleHMM {
|
|||
private int maxToCol;
|
||||
private int haplotypeLength;
|
||||
|
||||
private double[][] logTransition;
|
||||
|
||||
private boolean indelToIndelIsConstant;
|
||||
|
||||
/**
|
||||
* Returns the currently loaded read base qualities.
|
||||
*
|
||||
|
|
@ -140,7 +143,6 @@ public class FastLoglessPairHMM extends LoglessPairHMM implements FlexibleHMM {
|
|||
return readGepQuals;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a new pair-hmm calculator instance give the gap continuation penalty.
|
||||
*
|
||||
|
|
@ -374,8 +376,7 @@ public class FastLoglessPairHMM extends LoglessPairHMM implements FlexibleHMM {
|
|||
*/
|
||||
public void loadRead(final byte[] readBases, final byte[] readQuals, final byte[] readInsQuals, final byte[] readDelQuals, int mq) {
|
||||
// TODO This is a copy&paste from PairHMM*Engine read data preparation code.
|
||||
// TODO It is simply to difficult to share the code without changing that class and I don't want
|
||||
// TODO to do so for now.
|
||||
// TODO It is simply to difficult to share the code without changing that class
|
||||
if (readBases.length != readQuals.length) throw new IllegalArgumentException("the read quality array length does not match the read base array length");
|
||||
if (readBases.length != readInsQuals.length) throw new IllegalArgumentException("the read insert quality array length does not match the read base array length");
|
||||
if (readBases.length != readDelQuals.length) throw new IllegalArgumentException("the read deletion quality length does not match the read base array length");
|
||||
|
|
@ -402,12 +403,28 @@ public class FastLoglessPairHMM extends LoglessPairHMM implements FlexibleHMM {
|
|||
this.readInsQuals = readInsQuals;
|
||||
this.readDelQuals = readDelQuals;
|
||||
this.readGepQuals = overallGCP;
|
||||
|
||||
initializeProbabilities(transition,readInsQuals, readDelQuals, overallGCP);
|
||||
PairHMMModel.qualToTransProbsLog10(logTransition,readInsQuals,readDelQuals,overallGCP);
|
||||
|
||||
indelToIndelIsConstant = true;
|
||||
for (final double d : overallGCP)
|
||||
if (d != overallGCP[0]) {
|
||||
indelToIndelIsConstant = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (haplotypeBases != null)
|
||||
fillPriorsTable(0);
|
||||
cachedResults.clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize( final int readMaxLength, final int haplotypeMaxLength ) {
|
||||
super.initialize(readMaxLength,haplotypeMaxLength);
|
||||
logTransition = PairHMMModel.createTransitionMatrix(readMaxLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void loadHaplotypeBases(final byte[] haplotypeBases) {
|
||||
if (readBases == null)
|
||||
|
|
@ -587,30 +604,27 @@ public class FastLoglessPairHMM extends LoglessPairHMM implements FlexibleHMM {
|
|||
* Fast likelihood when the pair-hmm represents a deletion in the read.
|
||||
*/
|
||||
private double calculateLocalLikelihoodDeletion(final int readStart, final int hapStart, final int hapEnd) {
|
||||
double result = INITIAL_CONDITION;
|
||||
if (readStart > 0) { // no penalty if at the beginning.
|
||||
result *= transition[readStart][matchToDeletion];
|
||||
result *=
|
||||
StrictMath.pow(transition[readStart][deletionToDeletion],hapEnd - hapStart - 1);
|
||||
result *= transition[readStart][indelToMatch];
|
||||
}
|
||||
return StrictMath.log10(result) - INITIAL_CONDITION_LOG10;
|
||||
if (readStart == 0 || readStart >= readBases.length) // Deletion at the beginning or end not have a cost.
|
||||
return 0;
|
||||
return logTransition[readStart + 1][matchToDeletion]
|
||||
+ logTransition[readStart + 1][deletionToDeletion] * (hapEnd - hapStart - 1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Fast likelihood when the pair-hmm represents a insertion in the read.
|
||||
*/
|
||||
private double calculateLocalLikelihoodInsertion(final int readStart, final int readEnd) {
|
||||
double result = INITIAL_CONDITION;
|
||||
result *= transition[readStart + 1][matchToInsertion];
|
||||
for (int i = readStart + 1; i < readEnd; i++) {
|
||||
result *= transition[i + 1][insertionToInsertion];
|
||||
}
|
||||
if (readEnd < readBases.length) {
|
||||
result *= transition[readEnd + 1][indelToMatch];
|
||||
}
|
||||
return StrictMath.log10(result) - INITIAL_CONDITION_LOG10;
|
||||
double result = logTransition[readStart + 1][matchToInsertion];
|
||||
|
||||
if (indelToIndelIsConstant)
|
||||
result += logTransition[readStart + 1][insertionToInsertion] * (readEnd - readStart - 1);
|
||||
else
|
||||
for (int i = readStart + 1; i < readEnd; i++)
|
||||
result += logTransition[i + 1][insertionToInsertion];
|
||||
|
||||
if (readEnd < readBases.length)
|
||||
result += logTransition[readEnd + 1][indelToMatch];
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -99,12 +99,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerGraphBasedSingleSample() {
|
||||
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "83fe0694621bc1e0240f6f79eb6d6999");
|
||||
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "f87bb723bfb9b8f604db1d53624d24e3");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerGraphBasedMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "d45b2b26434dd3bd48df5a43b3d2954a");
|
||||
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "891295d8795c0ac894a2aec873e2a1d4");
|
||||
}
|
||||
|
||||
@Test(enabled = false) // can't annotate the rsID's yet
|
||||
|
|
@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGSGraphBased() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("6f6213bfc62e9cd9db56a7277ebe04e0"));
|
||||
Arrays.asList("d2bb1a904ad419c565b00c3ac98048d1"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ abstract class N2MemoryPairHMM extends PairHMM {
|
|||
* @param haplotypeMaxLength the max length of haplotypes we want to use with this PairHMM
|
||||
* @param readMaxLength the max length of reads we want to use with this PairHMM
|
||||
*/
|
||||
@Override
|
||||
public void initialize( final int readMaxLength, final int haplotypeMaxLength ) {
|
||||
super.initialize(readMaxLength, haplotypeMaxLength);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue