Merge pull request #134 from broadinstitute/mc_phmm_experiments
PairHMM rework
This commit is contained in:
commit
a58a3e7e1e
|
|
@ -80,7 +80,7 @@ public class LikelihoodCalculationEngine {
|
|||
pairHMM = new Log10PairHMM(false);
|
||||
break;
|
||||
case LOGLESS_CACHING:
|
||||
pairHMM = new LoglessCachingPairHMM();
|
||||
pairHMM = new LoglessPairHMM();
|
||||
break;
|
||||
default:
|
||||
throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are ORIGINAL, EXACT, CACHING, and LOGLESS_CACHING.");
|
||||
|
|
|
|||
|
|
@ -55,25 +55,14 @@ import org.broadinstitute.sting.utils.QualityUtils;
|
|||
* User: rpoplin, carneiro
|
||||
* Date: 10/16/12
|
||||
*/
|
||||
public class LoglessCachingPairHMM extends PairHMM {
|
||||
public final class LoglessPairHMM extends PairHMM {
|
||||
protected static final double SCALE_FACTOR_LOG10 = 300.0;
|
||||
protected static final double INITIAL_CONDITION = Math.pow(10, SCALE_FACTOR_LOG10);
|
||||
|
||||
double[][] constantMatrix = null; // The cache
|
||||
double[][] distanceMatrix = null; // The cache
|
||||
double[][] transition = null; // The cache
|
||||
double[][] prior = null; // The cache
|
||||
boolean constantsAreInitialized = false;
|
||||
|
||||
/**
|
||||
* Cached data structure that describes the first row's edge condition in the HMM
|
||||
*/
|
||||
protected static final double [] firstRowConstantMatrix = {
|
||||
QualityUtils.qualToProb((byte) (DEFAULT_GOP + DEFAULT_GOP)),
|
||||
QualityUtils.qualToProb(DEFAULT_GCP),
|
||||
QualityUtils.qualToErrorProb(DEFAULT_GOP),
|
||||
QualityUtils.qualToErrorProb(DEFAULT_GCP),
|
||||
1.0,
|
||||
1.0
|
||||
};
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
|
|
@ -81,8 +70,8 @@ public class LoglessCachingPairHMM extends PairHMM {
|
|||
public void initialize( final int haplotypeMaxLength, final int readMaxLength) {
|
||||
super.initialize(haplotypeMaxLength, readMaxLength);
|
||||
|
||||
constantMatrix = new double[X_METRIC_MAX_LENGTH][6];
|
||||
distanceMatrix = new double[X_METRIC_MAX_LENGTH][Y_METRIC_MAX_LENGTH];
|
||||
transition = new double[paddedMaxReadLength][6];
|
||||
prior = new double[paddedMaxReadLength][paddedMaxHaplotypeLength];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -98,8 +87,8 @@ public class LoglessCachingPairHMM extends PairHMM {
|
|||
final int hapStartIndex,
|
||||
final boolean recacheReadValues ) {
|
||||
if ( ! constantsAreInitialized || recacheReadValues )
|
||||
initializeConstants( haplotypeBases.length, readBases.length, insertionGOP, deletionGOP, overallGCP );
|
||||
initializeDistanceMatrix( haplotypeBases, readBases, readQuals, hapStartIndex );
|
||||
initializeProbabilities(haplotypeBases.length, insertionGOP, deletionGOP, overallGCP);
|
||||
initializePriors(haplotypeBases, readBases, readQuals, hapStartIndex);
|
||||
|
||||
// NOTE NOTE NOTE -- because of caching we need to only operate over X and Y according to this
|
||||
// read and haplotype lengths, not the max lengths
|
||||
|
|
@ -109,14 +98,19 @@ public class LoglessCachingPairHMM extends PairHMM {
|
|||
for (int i = 2; i < readXMetricLength; i++) {
|
||||
// +1 here is because hapStartIndex is 0-based, but our matrices are 1 based
|
||||
for (int j = hapStartIndex+1; j < hapYMetricLength; j++) {
|
||||
updateCell(i, j, distanceMatrix[i][j], constantMatrix[i], matchMetricArray, XMetricArray, YMetricArray);
|
||||
updateCell(i, j, prior[i][j], transition[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// final probability is the log10 sum of the last element in all three state arrays
|
||||
// final probability is the log10 sum of the last element in the Match and Insertion state arrays
|
||||
// this way we ignore all paths that ended in deletions! (huge)
|
||||
// but we have to sum all the paths ending in the M and I matrices, because they're no longer extended.
|
||||
final int endI = readXMetricLength - 1;
|
||||
final int endJ = hapYMetricLength - 1;
|
||||
return Math.log10( matchMetricArray[endI][endJ] + XMetricArray[endI][endJ] + YMetricArray[endI][endJ] ) - SCALE_FACTOR_LOG10;
|
||||
double finalSumProbabilities = 0.0;
|
||||
for (int j = 0; j < hapYMetricLength; j++) {
|
||||
finalSumProbabilities += matchMatrix[endI][j] + insertionMatrix[endI][j];
|
||||
}
|
||||
return Math.log10(finalSumProbabilities) - SCALE_FACTOR_LOG10;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -128,10 +122,7 @@ public class LoglessCachingPairHMM extends PairHMM {
|
|||
* @param readQuals the base quality scores of the read
|
||||
* @param startIndex where to start updating the distanceMatrix (in case this read is similar to the previous read)
|
||||
*/
|
||||
public void initializeDistanceMatrix( final byte[] haplotypeBases,
|
||||
final byte[] readBases,
|
||||
final byte[] readQuals,
|
||||
final int startIndex ) {
|
||||
public void initializePriors(final byte[] haplotypeBases, final byte[] readBases, final byte[] readQuals, final int startIndex) {
|
||||
|
||||
// initialize the pBaseReadLog10 matrix for all combinations of read x haplotype bases
|
||||
// Abusing the fact that java initializes arrays with 0.0, so no need to fill in rows and columns below 2.
|
||||
|
|
@ -141,7 +132,7 @@ public class LoglessCachingPairHMM extends PairHMM {
|
|||
final byte qual = readQuals[i];
|
||||
for (int j = startIndex; j < haplotypeBases.length; j++) {
|
||||
final byte y = haplotypeBases[j];
|
||||
distanceMatrix[i+2][j+2] = ( x == y || x == (byte) 'N' || y == (byte) 'N' ?
|
||||
prior[i+2][j+2] = ( x == y || x == (byte) 'N' || y == (byte) 'N' ?
|
||||
QualityUtils.qualToProb(qual) : QualityUtils.qualToErrorProb(qual) );
|
||||
}
|
||||
}
|
||||
|
|
@ -150,46 +141,36 @@ public class LoglessCachingPairHMM extends PairHMM {
|
|||
/**
|
||||
* Initializes the matrix that holds all the constants related to quality scores.
|
||||
*
|
||||
* @param haplotypeSize the number of bases in the haplotype we are testing
|
||||
* @param readSize the number of bases in the read we are testing
|
||||
* @param insertionGOP insertion quality scores of the read
|
||||
* @param deletionGOP deletion quality scores of the read
|
||||
* @param overallGCP overall gap continuation penalty
|
||||
*/
|
||||
@Requires({
|
||||
"haplotypeSize > 0",
|
||||
"readSize > 0",
|
||||
"insertionGOP != null && insertionGOP.length == readSize",
|
||||
"deletionGOP != null && deletionGOP.length == readSize",
|
||||
"overallGCP != null && overallGCP.length == readSize"
|
||||
"insertionGOP != null",
|
||||
"deletionGOP != null",
|
||||
"overallGCP != null"
|
||||
})
|
||||
@Ensures("constantsAreInitialized")
|
||||
private void initializeConstants( final int haplotypeSize,
|
||||
final int readSize,
|
||||
final byte[] insertionGOP,
|
||||
final byte[] deletionGOP,
|
||||
final byte[] overallGCP ) {
|
||||
private void initializeProbabilities(final int haplotypeLength, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP) {
|
||||
// the initial condition -- must be here because it needs that actual read and haplotypes, not the maximum in init
|
||||
matchMetricArray[1][1] = Math.pow(10.0, SCALE_FACTOR_LOG10) / getNPotentialXStarts(haplotypeSize, readSize);
|
||||
final double initialValue = INITIAL_CONDITION / haplotypeLength;
|
||||
matchMatrix[1][1] = initialValue;
|
||||
|
||||
// fill in the first row
|
||||
for( int jjj = 2; jjj < Y_METRIC_MAX_LENGTH; jjj++ ) {
|
||||
updateCell(1, jjj, 1.0, firstRowConstantMatrix, matchMetricArray, XMetricArray, YMetricArray);
|
||||
for( int jjj = 2; jjj < paddedMaxHaplotypeLength; jjj++ ) {
|
||||
deletionMatrix[1][jjj] = initialValue;
|
||||
}
|
||||
|
||||
final int l = insertionGOP.length;
|
||||
constantMatrix[1] = firstRowConstantMatrix;
|
||||
for (int i = 0; i < l; i++) {
|
||||
final int qualIndexGOP = Math.min(insertionGOP[i] + deletionGOP[i], Byte.MAX_VALUE);
|
||||
constantMatrix[i+2][0] = QualityUtils.qualToProb((byte) qualIndexGOP);
|
||||
constantMatrix[i+2][1] = QualityUtils.qualToProb(overallGCP[i]);
|
||||
constantMatrix[i+2][2] = QualityUtils.qualToErrorProb(insertionGOP[i]);
|
||||
constantMatrix[i+2][3] = QualityUtils.qualToErrorProb(overallGCP[i]);
|
||||
constantMatrix[i+2][4] = QualityUtils.qualToErrorProb(deletionGOP[i]);
|
||||
constantMatrix[i+2][5] = QualityUtils.qualToErrorProb(overallGCP[i]);
|
||||
transition[i+2][0] = QualityUtils.qualToProb((byte) qualIndexGOP);
|
||||
transition[i+2][1] = QualityUtils.qualToProb(overallGCP[i]);
|
||||
transition[i+2][2] = QualityUtils.qualToErrorProb(insertionGOP[i]);
|
||||
transition[i+2][3] = QualityUtils.qualToErrorProb(overallGCP[i]);
|
||||
transition[i+2][4] = QualityUtils.qualToErrorProb(deletionGOP[i]);
|
||||
transition[i+2][5] = QualityUtils.qualToErrorProb(overallGCP[i]);
|
||||
}
|
||||
constantMatrix[l+1][4] = 1.0;
|
||||
constantMatrix[l+1][5] = 1.0;
|
||||
|
||||
// note that we initialized the constants
|
||||
constantsAreInitialized = true;
|
||||
|
|
@ -204,18 +185,14 @@ public class LoglessCachingPairHMM extends PairHMM {
|
|||
* @param indI row index in the matrices to update
|
||||
* @param indJ column index in the matrices to update
|
||||
* @param prior the likelihood editing distance matrix for the read x haplotype
|
||||
* @param constants an array with the six constants relevant to this location
|
||||
* @param matchMetricArray the matches likelihood matrix
|
||||
* @param XMetricArray the insertions likelihood matrix
|
||||
* @param YMetricArray the deletions likelihood matrix
|
||||
* @param transitition an array with the six transitition relevant to this location
|
||||
*/
|
||||
private void updateCell( final int indI, final int indJ, final double prior, final double[] constants,
|
||||
final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
|
||||
private void updateCell( final int indI, final int indJ, final double prior, final double[] transitition) {
|
||||
|
||||
matchMetricArray[indI][indJ] = prior * ( matchMetricArray[indI - 1][indJ - 1] * constants[0] +
|
||||
XMetricArray[indI - 1][indJ - 1] * constants[1] +
|
||||
YMetricArray[indI - 1][indJ - 1] * constants[1] );
|
||||
XMetricArray[indI][indJ] = matchMetricArray[indI - 1][indJ] * constants[2] + XMetricArray[indI - 1][indJ] * constants[3];
|
||||
YMetricArray[indI][indJ] = matchMetricArray[indI][indJ - 1] * constants[4] + YMetricArray[indI][indJ - 1] * constants[5];
|
||||
matchMatrix[indI][indJ] = prior * ( matchMatrix[indI - 1][indJ - 1] * transitition[0] +
|
||||
insertionMatrix[indI - 1][indJ - 1] * transitition[1] +
|
||||
deletionMatrix[indI - 1][indJ - 1] * transitition[1] );
|
||||
insertionMatrix[indI][indJ] = matchMatrix[indI - 1][indJ] * transitition[2] + insertionMatrix[indI - 1][indJ] * transitition[3];
|
||||
deletionMatrix[indI][indJ] = matchMatrix[indI][indJ - 1] * transitition[4] + deletionMatrix[indI][indJ - 1] * transitition[5];
|
||||
}
|
||||
}
|
||||
|
|
@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperGeneralPloidyTestExecutor.*;
|
||||
import static org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperGeneralPloidyTestExecutor.LSV_ALLELES;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "5812da66811887d834d0379a33e655c0");
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "faadc0b77a91a716dbb1191fd579d025");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,7 +49,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperGeneralPloidyTestExecutor.*;
|
||||
import static org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperGeneralPloidyTestExecutor.CEUTRIO_BAM;
|
||||
import static org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperGeneralPloidyTestExecutor.NA12891_CALLS;
|
||||
|
||||
public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTest {
|
||||
|
||||
|
|
@ -57,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","3a321896c4b8b6457973c76c486da4d4");
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","fe715b715526a7c1ebd575ff66bba716");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("1cb469b9cc8e6c70430021540bf1af8b"));
|
||||
Arrays.asList("51e022d07ead45a4e154f949b6642e84"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -87,7 +87,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("c7e59f9ab718df4c604626a0f51af606"));
|
||||
Arrays.asList("1d9c6fda344eeee76cbe4221251dc341"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -100,7 +100,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("b6ad80cef63cab4f75fa4b1fb2517d1d"));
|
||||
Arrays.asList("2ec7262f0a3d04534ce1fe15cc79f52e"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -110,7 +110,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("86880ec78755ae91cb5bb34a0631a32c"));
|
||||
Arrays.asList("3131cd7c49b623983a106db5228754b3"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -120,7 +120,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("2584d5e3ade1b548f1fe9cdcafbe1b28"));
|
||||
Arrays.asList("273f5daa936e93da98efd6ceb37d7533"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -135,7 +135,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1,
|
||||
Arrays.asList("939da0bb73b706badd8a0def7446b384"));
|
||||
Arrays.asList("00a003a0908281384e981294434a9f3e"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -175,7 +175,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("556c214366e82e4682e753ce93307a4e"));
|
||||
Arrays.asList("87521a1bde124c7c5908ed067060fe45"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -183,7 +183,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("1df02b805d9dfbd532fa3632875a989d"));
|
||||
Arrays.asList("8a880b8b1662e31e0b5c65733eac6b74"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("2f15ef1ead56d875a3f1d53772f52b3a"));
|
||||
Arrays.asList("3f8ee598c9b85aa1d2b85746ad46c1af"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("39ec0b48cd51d797af7ed09cb9ba607e"));
|
||||
Arrays.asList("31c0f0074b3306b54170056e93b69e11"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -112,7 +112,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("eb9604b77a7d6baab60c81ac3db5e47b"));
|
||||
Arrays.asList("753d6358b1634107de76900200116805"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
|
|
@ -120,7 +120,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testMismatchedPLs() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||
Arrays.asList("6b77b8f1002ec577bf0482fbe03222a4"));
|
||||
Arrays.asList("274eadae8a630a3fda9281d6d6253dea"));
|
||||
executeTest("test mismatched PLs", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testReducedBamINDELs() {
|
||||
testReducedCalling("INDEL", "9a702e7a85465f6c42d6c1828aee6c38");
|
||||
testReducedCalling("INDEL", "c5939a7f5f85ea2fe994ce912732e180");
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -49,10 +49,11 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
|||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.broadinstitute.sting.gatk.walkers.haplotypecaller.HaplotypeCallerIntegrationTest.*;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import static org.broadinstitute.sting.gatk.walkers.haplotypecaller.HaplotypeCallerIntegrationTest.NA12878_CHR20_BAM;
|
||||
import static org.broadinstitute.sting.gatk.walkers.haplotypecaller.HaplotypeCallerIntegrationTest.REF;
|
||||
|
||||
public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends WalkerTest {
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
|
|
@ -63,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "f9fa4d3c88fd9c0f23c7a3ddd3d24a8c");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "a898b551f78c71befee4d12070d3a788");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
@ -87,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
|
||||
"e8ffbfae3c1af5be02631a31f386a431");
|
||||
"8a110549543412fa682419e9a8f0dd1d");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||
"c3a98b19efa7cb36fe5f5f2ab893ef56");
|
||||
"5429c234d471434adc09d9e60b87de24");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -69,12 +69,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "45856ad67bfe8d8bea45808d8258bcf1");
|
||||
HCTest(CEUTRIO_BAM, "", "008958c211a8a439a7213a96f3dd7f6c");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "b6c93325f851ac358ea49260fb11b75c");
|
||||
HCTest(NA12878_BAM, "", "3b60c6133eeadfea028dffea93b88478");
|
||||
}
|
||||
|
||||
@Test(enabled = false) // can't annotate the rsID's yet
|
||||
|
|
@ -85,7 +85,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"4ca6b560d0569cdca400d3e50915e211");
|
||||
"70bd5d0805bf6f51e5f61b377526c979");
|
||||
}
|
||||
|
||||
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
||||
|
|
@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "5d06ec5502d3f157964bd7b275d6a0cb");
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "4141b4c24a136a3fe4c0b0a4c231cdfa");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -111,14 +111,14 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("53a50dae68f0175ca3088dea1d3bb881"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("35a8edeca7518835d67a10de21493eca"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestStructuralIndels() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("d3bc6adde8cd9514ae5c49cd366d5de4"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c81d7e69dd4116890f06a71b19870300"));
|
||||
executeTest("HCTestStructuralIndels: ", spec);
|
||||
}
|
||||
|
||||
|
|
@ -140,7 +140,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("4adb833ed8af20224b76bba61e2b0d93"));
|
||||
Arrays.asList("f0a215faed194dc160f19e26293e85f8"));
|
||||
executeTest("HC calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -148,7 +148,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void testReducedBamWithReadsNotFullySpanningDeletion() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
|
||||
Arrays.asList("1704b0901c86f8f597d931222d5c8dd8"));
|
||||
Arrays.asList("bea274584344fa6b4b0f98eee327bad8"));
|
||||
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ public class NanoSchedulerIntegrationTest extends WalkerTest {
|
|||
for ( final int nct : Arrays.asList(1, 2) ) {
|
||||
// tests.add(new Object[]{ "SNP", "a1c7546f32a8919a3f3a70a04b2e8322", nt, nct });
|
||||
//// tests.add(new Object[]{ "INDEL", "0a6d2be79f4f8a4b0eb788cc4751b31b", nt, nct });
|
||||
tests.add(new Object[]{ "BOTH", "85fc5d6dfeb60ed89763470f4b4c981e", nt, nct });
|
||||
tests.add(new Object[]{ "BOTH", "9a1202d849653f0480932f450ec507b4", nt, nct });
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
final static boolean EXTENSIVE_TESTING = true;
|
||||
final PairHMM exactHMM = new Log10PairHMM(true); // the log truth implementation
|
||||
final PairHMM originalHMM = new Log10PairHMM(false); // the reference implementation
|
||||
final PairHMM loglessHMM = new LoglessCachingPairHMM();
|
||||
final PairHMM loglessHMM = new LoglessPairHMM();
|
||||
|
||||
private List<PairHMM> getHMMs() {
|
||||
return Arrays.asList(exactHMM, originalHMM, loglessHMM);
|
||||
|
|
@ -116,13 +116,12 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
return String.format("ref=%s read=%s b/i/d/c quals = %d/%d/%d/%d l/r flank = %b/%b e[qual]=%d", ref, read, baseQual, insQual, delQual, gcp, left, right, expectedQual);
|
||||
}
|
||||
|
||||
public double expectedLogL(final PairHMM hmm) {
|
||||
return (expectedQual / -10.0) + 0.03 +
|
||||
hmm.getNPotentialXStartsLikelihoodPenaltyLog10(refBasesWithContext.length, readBasesWithContext.length);
|
||||
public double expectedLogL() {
|
||||
return (expectedQual / -10.0) + 0.03 + Math.log10(1.0/refBasesWithContext.length);
|
||||
}
|
||||
|
||||
public double getTolerance(final PairHMM hmm) {
|
||||
if ( hmm instanceof LoglessCachingPairHMM )
|
||||
if ( hmm instanceof LoglessPairHMM)
|
||||
return toleranceFromExact();
|
||||
if ( hmm instanceof Log10PairHMM ) {
|
||||
return ((Log10PairHMM)hmm).isDoingExactLog10Calculations() ? toleranceFromExact() : toleranceFromReference();
|
||||
|
|
@ -150,7 +149,7 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
qualAsBytes(gcp, false, anchorIndel), 0, true);
|
||||
}
|
||||
|
||||
private final byte[] asBytes(final String bases, final boolean left, final boolean right) {
|
||||
private byte[] asBytes(final String bases, final boolean left, final boolean right) {
|
||||
return ( (left ? LEFT_FLANK : "") + CONTEXT + bases + CONTEXT + (right ? RIGHT_FLANK : "")).getBytes();
|
||||
}
|
||||
|
||||
|
|
@ -163,7 +162,7 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
|
||||
// update just the bases corresponding to the provided micro read with the quality scores
|
||||
if( doGOP ) {
|
||||
phredQuals[0 + CONTEXT.length()] = (byte)phredQual;
|
||||
phredQuals[CONTEXT.length()] = (byte)phredQual;
|
||||
} else {
|
||||
for ( int i = 0; i < read.length(); i++)
|
||||
phredQuals[i + CONTEXT.length()] = (byte)phredQual;
|
||||
|
|
@ -270,7 +269,7 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
final double exactLogL = cfg.calcLogL( exactHMM, true );
|
||||
for ( final PairHMM hmm : getHMMs() ) {
|
||||
double actualLogL = cfg.calcLogL( hmm, true );
|
||||
double expectedLogL = cfg.expectedLogL(hmm);
|
||||
double expectedLogL = cfg.expectedLogL();
|
||||
|
||||
// compare to our theoretical expectation with appropriate tolerance
|
||||
Assert.assertEquals(actualLogL, expectedLogL, cfg.toleranceFromTheoretical(), "Failed with hmm " + hmm);
|
||||
|
|
@ -322,8 +321,8 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
|
||||
System.out.format("H:%s\nR: %s\n Pos:%d Result:%4.2f\n",new String(haplotype1), new String(mread), k,res1);
|
||||
|
||||
// - log10 is because of number of start positions
|
||||
Assert.assertEquals(res1, -2.0 - Math.log10(originalHMM.getNPotentialXStarts(haplotype1.length, mread.length)), 1e-2);
|
||||
final double expected = Math.log10(1.0/haplotype1.length * Math.pow(QualityUtils.qualToProb(90), mread.length-1) * QualityUtils.qualToErrorProb(20));
|
||||
Assert.assertEquals(res1, expected, 1e-2);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -354,8 +353,8 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
|
||||
System.out.format("H:%s\nR: %s\n Pos:%d Result:%4.2f\n",new String(haplotype1), new String(mread), k,res1);
|
||||
|
||||
// - log10 is because of number of start positions
|
||||
Assert.assertEquals(res1, -2.0 - Math.log10(originalHMM.getNPotentialXStarts(haplotype1.length, mread.length)), 1e-2);
|
||||
final double expected = Math.log10(1.0/haplotype1.length * Math.pow(QualityUtils.qualToProb(90), mread.length-1) * QualityUtils.qualToErrorProb(20));
|
||||
Assert.assertEquals(res1, expected, 1e-2);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -406,8 +405,14 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
Utils.dupBytes(insQual, readBases.length),
|
||||
Utils.dupBytes(delQual, readBases.length),
|
||||
Utils.dupBytes(gcp, readBases.length), 0, true);
|
||||
final double expected = Math.log10(Math.pow(1.0 - QualityUtils.qualToErrorProb(baseQual), readBases.length));
|
||||
Assert.assertEquals(d, expected, 1e-3, "Likelihoods should sum to just the error prob of the read");
|
||||
double expected = 0;
|
||||
final double initialCondition = ((double) Math.abs(refBases.length-readBases.length+1))/refBases.length;
|
||||
if (readBases.length < refBases.length) {
|
||||
expected = Math.log10(initialCondition * Math.pow(QualityUtils.qualToProb(baseQual), readBases.length));
|
||||
} else if (readBases.length > refBases.length) {
|
||||
expected = Math.log10(initialCondition * Math.pow(QualityUtils.qualToProb(baseQual), refBases.length) * Math.pow(QualityUtils.qualToErrorProb(insQual), readBases.length - refBases.length));
|
||||
}
|
||||
Assert.assertEquals(d, expected, 1e-3, "Likelihoods should sum to just the error prob of the read " + String.format("readSize=%d refSize=%d", readSize, refSize));
|
||||
}
|
||||
|
||||
@DataProvider(name = "HMMProviderWithBigReads")
|
||||
|
|
@ -472,7 +477,7 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
Utils.dupBytes(insQual, readBases.length),
|
||||
Utils.dupBytes(delQual, readBases.length),
|
||||
Utils.dupBytes(gcp, readBases.length), 0, true);
|
||||
loglessHMM.dumpMatrices();
|
||||
// loglessHMM.dumpMatrices();
|
||||
}
|
||||
|
||||
@DataProvider(name = "JustHMMProvider")
|
||||
|
|
@ -610,7 +615,7 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
public Object[][] makeUninitializedHMMs() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
tests.add(new Object[]{new LoglessCachingPairHMM()});
|
||||
tests.add(new Object[]{new LoglessPairHMM()});
|
||||
tests.add(new Object[]{new Log10PairHMM(true)});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
|
|
|
|||
|
|
@ -67,10 +67,10 @@ public class Log10PairHMM extends PairHMM {
|
|||
public void initialize( final int haplotypeMaxLength, final int readMaxLength) {
|
||||
super.initialize(haplotypeMaxLength, readMaxLength);
|
||||
|
||||
for( int iii=0; iii < X_METRIC_MAX_LENGTH; iii++ ) {
|
||||
Arrays.fill(matchMetricArray[iii], Double.NEGATIVE_INFINITY);
|
||||
Arrays.fill(XMetricArray[iii], Double.NEGATIVE_INFINITY);
|
||||
Arrays.fill(YMetricArray[iii], Double.NEGATIVE_INFINITY);
|
||||
for( int iii=0; iii < paddedMaxReadLength; iii++ ) {
|
||||
Arrays.fill(matchMatrix[iii], Double.NEGATIVE_INFINITY);
|
||||
Arrays.fill(insertionMatrix[iii], Double.NEGATIVE_INFINITY);
|
||||
Arrays.fill(deletionMatrix[iii], Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -88,7 +88,8 @@ public class Log10PairHMM extends PairHMM {
|
|||
final boolean recacheReadValues ) {
|
||||
// the initial condition -- must be in subComputeReadLikelihoodGivenHaplotypeLog10 because it needs that actual
|
||||
// read and haplotypes, not the maximum
|
||||
matchMetricArray[1][1] = getNPotentialXStartsLikelihoodPenaltyLog10(haplotypeBases.length, readBases.length);
|
||||
final double initialValue = Math.log10((double) 1/haplotypeBases.length);
|
||||
matchMatrix[1][1] = initialValue;
|
||||
|
||||
// M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
|
||||
final int X_METRIC_LENGTH = readBases.length + 2;
|
||||
|
|
@ -104,14 +105,17 @@ public class Log10PairHMM extends PairHMM {
|
|||
for( int jjj = hapStartIndex + 1; jjj < Y_METRIC_LENGTH; jjj++ ) {
|
||||
if( (iii == 1 && jjj == 1) ) { continue; }
|
||||
updateCell(iii, jjj, haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP,
|
||||
matchMetricArray, XMetricArray, YMetricArray);
|
||||
matchMatrix, insertionMatrix, deletionMatrix);
|
||||
}
|
||||
}
|
||||
|
||||
// final probability is the log10 sum of the last element in all three state arrays
|
||||
final int endI = X_METRIC_LENGTH - 1;
|
||||
final int endJ = Y_METRIC_LENGTH - 1;
|
||||
return myLog10SumLog10(new double[]{matchMetricArray[endI][endJ], XMetricArray[endI][endJ], YMetricArray[endI][endJ]});
|
||||
double result = myLog10SumLog10(new double[]{matchMatrix[endI][1], insertionMatrix[endI][1]});
|
||||
for (int j = 2; j < Y_METRIC_LENGTH; j++)
|
||||
result = myLog10SumLog10(new double[]{result, matchMatrix[endI][j], insertionMatrix[endI][j]});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -134,7 +138,7 @@ public class Log10PairHMM extends PairHMM {
|
|||
|
||||
private void updateCell( final int indI, final int indJ, final byte[] haplotypeBases, final byte[] readBases,
|
||||
final byte[] readQuals, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP,
|
||||
final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
|
||||
final double[][] matchMatrix, final double[][] insertionMatrix, final double[][] deletionMatrix ) {
|
||||
|
||||
// the read and haplotype indices are offset by one because the state arrays have an extra column to hold the initial conditions
|
||||
final int im1 = indI - 1;
|
||||
|
|
@ -151,18 +155,18 @@ public class Log10PairHMM extends PairHMM {
|
|||
final int qualIndexGOP = ( im1 == 0 ? DEFAULT_GOP + DEFAULT_GOP : ( insertionGOP[im1-1] + deletionGOP[im1-1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : insertionGOP[im1-1] + deletionGOP[im1-1]) );
|
||||
final double d0 = QualityUtils.qualToProbLog10((byte)qualIndexGOP);
|
||||
final double e0 = ( im1 == 0 ? QualityUtils.qualToProbLog10(DEFAULT_GCP) : QualityUtils.qualToProbLog10(overallGCP[im1-1]) );
|
||||
matchMetricArray[indI][indJ] = pBaseReadLog10 + myLog10SumLog10(new double[]{matchMetricArray[indI - 1][indJ - 1] + d0, XMetricArray[indI - 1][indJ - 1] + e0, YMetricArray[indI - 1][indJ - 1] + e0});
|
||||
matchMatrix[indI][indJ] = pBaseReadLog10 + myLog10SumLog10(new double[]{matchMatrix[indI - 1][indJ - 1] + d0, insertionMatrix[indI - 1][indJ - 1] + e0, deletionMatrix[indI - 1][indJ - 1] + e0});
|
||||
|
||||
// update the X (insertion) array
|
||||
final double d1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GOP) : QualityUtils.qualToErrorProbLog10(insertionGOP[im1-1]) );
|
||||
final double e1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GCP) : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) );
|
||||
final double qBaseReadLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0
|
||||
XMetricArray[indI][indJ] = qBaseReadLog10 + myLog10SumLog10(new double[]{matchMetricArray[indI - 1][indJ] + d1, XMetricArray[indI - 1][indJ] + e1});
|
||||
insertionMatrix[indI][indJ] = qBaseReadLog10 + myLog10SumLog10(new double[]{matchMatrix[indI - 1][indJ] + d1, insertionMatrix[indI - 1][indJ] + e1});
|
||||
|
||||
// update the Y (deletion) array, with penalty of zero on the left and right flanks to allow for a local alignment within the haplotype
|
||||
final double d2 = ( im1 == 0 || im1 == readBases.length ? 0.0 : QualityUtils.qualToErrorProbLog10(deletionGOP[im1-1]) );
|
||||
final double e2 = ( im1 == 0 || im1 == readBases.length ? 0.0 : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) );
|
||||
final double d2 = ( im1 == 0 ) ? 0.0 : QualityUtils.qualToErrorProbLog10(deletionGOP[im1-1]);
|
||||
final double e2 = ( im1 == 0 ) ? 0.0 : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]);
|
||||
final double qBaseRefLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0
|
||||
YMetricArray[indI][indJ] = qBaseRefLog10 + myLog10SumLog10(new double[]{matchMetricArray[indI][indJ - 1] + d2, YMetricArray[indI][indJ - 1] + e2});
|
||||
deletionMatrix[indI][indJ] = qBaseRefLog10 + myLog10SumLog10(new double[]{matchMatrix[indI][indJ - 1] + d2, deletionMatrix[indI][indJ - 1] + e2});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,11 +25,12 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.pairhmm;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Util class for performing the pair HMM for local alignment. Figure 4.3 in Durbin 1998 book.
|
||||
*
|
||||
|
|
@ -52,11 +53,11 @@ public abstract class PairHMM {
|
|||
LOGLESS_CACHING
|
||||
}
|
||||
|
||||
protected double[][] matchMetricArray = null;
|
||||
protected double[][] XMetricArray = null;
|
||||
protected double[][] YMetricArray = null;
|
||||
protected double[][] matchMatrix = null;
|
||||
protected double[][] insertionMatrix = null;
|
||||
protected double[][] deletionMatrix = null;
|
||||
protected int maxHaplotypeLength, maxReadLength;
|
||||
protected int X_METRIC_MAX_LENGTH, Y_METRIC_MAX_LENGTH;
|
||||
protected int paddedMaxReadLength, paddedMaxHaplotypeLength;
|
||||
private boolean initialized = false;
|
||||
|
||||
/**
|
||||
|
|
@ -72,12 +73,12 @@ public abstract class PairHMM {
|
|||
maxReadLength = readMaxLength;
|
||||
|
||||
// M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
|
||||
X_METRIC_MAX_LENGTH = readMaxLength + 2;
|
||||
Y_METRIC_MAX_LENGTH = haplotypeMaxLength + 2;
|
||||
paddedMaxReadLength = readMaxLength + 2;
|
||||
paddedMaxHaplotypeLength = haplotypeMaxLength + 2;
|
||||
|
||||
matchMetricArray = new double[X_METRIC_MAX_LENGTH][Y_METRIC_MAX_LENGTH];
|
||||
XMetricArray = new double[X_METRIC_MAX_LENGTH][Y_METRIC_MAX_LENGTH];
|
||||
YMetricArray = new double[X_METRIC_MAX_LENGTH][Y_METRIC_MAX_LENGTH];
|
||||
matchMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength];
|
||||
insertionMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength];
|
||||
deletionMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength];
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
|
|
@ -124,21 +125,17 @@ public abstract class PairHMM {
|
|||
|
||||
double result = subComputeReadLikelihoodGivenHaplotypeLog10(haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP, hapStartIndex, recacheReadValues);
|
||||
|
||||
// TODO -- remove max when PairHMM no longer returns likelihoods >= 0
|
||||
result = Math.min(result, 0.0);
|
||||
|
||||
if ( MathUtils.goodLog10Probability(result) )
|
||||
return result;
|
||||
else
|
||||
throw new IllegalStateException("Bad likelihoods detected: " + result);
|
||||
// return result;
|
||||
throw new IllegalStateException("PairHMM Log Probability cannot be greater than 0: " + String.format("haplotype: %s, read: %s, result: %f", Arrays.toString(haplotypeBases), Arrays.toString(readBases), result));
|
||||
}
|
||||
|
||||
/**
|
||||
* To be overloaded by subclasses to actually do calculation for #computeReadLikelihoodGivenHaplotypeLog10
|
||||
*/
|
||||
@Requires({"readBases.length == readQuals.length", "readBases.length == insertionGOP.length", "readBases.length == deletionGOP.length",
|
||||
"readBases.length == overallGCP.length", "matchMetricArray!=null", "XMetricArray!=null", "YMetricArray!=null"})
|
||||
"readBases.length == overallGCP.length", "matchMatrix!=null", "insertionMatrix!=null", "deletionMatrix!=null"})
|
||||
protected abstract double subComputeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotypeBases,
|
||||
final byte[] readBases,
|
||||
final byte[] readQuals,
|
||||
|
|
@ -148,41 +145,13 @@ public abstract class PairHMM {
|
|||
final int hapStartIndex,
|
||||
final boolean recacheReadValues );
|
||||
|
||||
/**
|
||||
* How many potential starting locations are a read with readSize bases against a haplotype with haplotypeSize bases?
|
||||
*
|
||||
* for example, a 3 bp read against a 5 bp haplotype could potentially start at 1, 2, 3 = 5 - 3 + 1 = 3
|
||||
* the max value is necessary in the case where the read is longer than the haplotype, in which case
|
||||
* there's a single unique start site by assumption
|
||||
*
|
||||
* @param haplotypeSize the number of bases in the haplotype we are testing
|
||||
* @param readSize the number of bases in the read we are testing
|
||||
* @return a positive integer >= 1
|
||||
*/
|
||||
@Ensures("result >= 1")
|
||||
protected int getNPotentialXStarts(final int haplotypeSize, final int readSize) {
|
||||
return Math.max(haplotypeSize - readSize + 1, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* The the log10 probability penalty for the number of potential start sites of the read aginst the haplotype
|
||||
*
|
||||
* @param haplotypeSize the number of bases in the haplotype we are testing
|
||||
* @param readSize the number of bases in the read we are testing
|
||||
* @return a log10 probability
|
||||
*/
|
||||
@Ensures("MathUtils.goodLog10Probability(result)")
|
||||
protected double getNPotentialXStartsLikelihoodPenaltyLog10(final int haplotypeSize, final int readSize) {
|
||||
return - Math.log10(getNPotentialXStarts(haplotypeSize, readSize));
|
||||
}
|
||||
|
||||
/**
|
||||
* Print out the core hmm matrices for debugging
|
||||
*/
|
||||
protected void dumpMatrices() {
|
||||
dumpMatrix("matchMetricArray", matchMetricArray);
|
||||
dumpMatrix("XMetricArray", XMetricArray);
|
||||
dumpMatrix("YMetricArray", YMetricArray);
|
||||
dumpMatrix("matchMetricArray", matchMatrix);
|
||||
dumpMatrix("insertionMatrix", insertionMatrix);
|
||||
dumpMatrix("deletionMatrix", deletionMatrix);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -215,8 +184,8 @@ public abstract class PairHMM {
|
|||
* @return the index of the first position in haplotype1 and haplotype2 where the byte isn't the same
|
||||
*/
|
||||
public static int findFirstPositionWhereHaplotypesDiffer(final byte[] haplotype1, final byte[] haplotype2) {
|
||||
if ( haplotype1 == null || haplotype1.length == 0 ) throw new IllegalArgumentException("Haplotype1 is bad " + haplotype1);
|
||||
if ( haplotype2 == null || haplotype2.length == 0 ) throw new IllegalArgumentException("Haplotype2 is bad " + haplotype2);
|
||||
if ( haplotype1 == null || haplotype1.length == 0 ) throw new IllegalArgumentException("Haplotype1 is bad " + Arrays.toString(haplotype1));
|
||||
if ( haplotype2 == null || haplotype2.length == 0 ) throw new IllegalArgumentException("Haplotype2 is bad " + Arrays.toString(haplotype2));
|
||||
|
||||
for( int iii = 0; iii < haplotype1.length && iii < haplotype2.length; iii++ ) {
|
||||
if( haplotype1[iii] != haplotype2[iii] ) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue