Refactoring of PairHMM to support reduced reads

This commit is contained in:
Mark DePristo 2011-09-26 13:28:56 -04:00
parent a6b65d6347
commit fa0efbc4ca
1 changed files with 151 additions and 144 deletions

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.indels;
import net.sf.samtools.Cigar; import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement; import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator; import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
@ -268,7 +269,7 @@ public class PairHMMIndelErrorModel {
dataManager.addToAllTables( key, datum, PRESERVE_QSCORES_LESS_THAN ); dataManager.addToAllTables( key, datum, PRESERVE_QSCORES_LESS_THAN );
} }
*/ */
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, boolean dovit) { public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, boolean dovit) {
this(indelGOP, indelGCP, deb, doCDP); this(indelGOP, indelGCP, deb, doCDP);
this.doViterbi = dovit; this.doViterbi = dovit;
@ -711,8 +712,8 @@ public class PairHMMIndelErrorModel {
HashMap<PileupElement, LinkedHashMap<Allele,Double>> indelLikelihoodMap){ HashMap<PileupElement, LinkedHashMap<Allele,Double>> indelLikelihoodMap){
int numHaplotypes = haplotypeMap.size(); int numHaplotypes = haplotypeMap.size();
double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes]; final double readLikelihoods[][] = new double[pileup.size()][numHaplotypes];
double readLikelihoods[][] = new double[pileup.getReads().size()][numHaplotypes]; final int readCounts[] = new int[pileup.size()];
int readIdx=0; int readIdx=0;
LinkedHashMap<Allele,double[]> gapOpenProbabilityMap = new LinkedHashMap<Allele,double[]>(); LinkedHashMap<Allele,double[]> gapOpenProbabilityMap = new LinkedHashMap<Allele,double[]>();
@ -751,6 +752,9 @@ public class PairHMMIndelErrorModel {
} }
} }
for (PileupElement p: pileup) { for (PileupElement p: pileup) {
// > 1 when the read is a consensus read representing multiple independent observations
final boolean isReduced = ReadUtils.isReducedRead(p.getRead());
readCounts[readIdx] = isReduced ? p.getReducedCount() : 1;
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location) // check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
if (indelLikelihoodMap.containsKey(p)) { if (indelLikelihoodMap.containsKey(p)) {
@ -762,10 +766,14 @@ public class PairHMMIndelErrorModel {
} }
else { else {
//System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName()); //System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName());
GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead()); SAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead());
if (read == null) if (read == null)
continue; continue;
if ( isReduced ) {
read = ReadUtils.reducedReadWithReducedQuals(read);
}
if(ReadUtils.is454Read(read) && !getGapPenaltiesFromFile) { if(ReadUtils.is454Read(read) && !getGapPenaltiesFromFile) {
continue; continue;
} }
@ -971,7 +979,7 @@ public class PairHMMIndelErrorModel {
System.out.println(new String(haplotypeBases)); System.out.println(new String(haplotypeBases));
} }
Double readLikelihood = 0.0; double readLikelihood = 0.0;
if (useAffineGapModel) { if (useAffineGapModel) {
double[] currentContextGOP = null; double[] currentContextGOP = null;
@ -1004,7 +1012,7 @@ public class PairHMMIndelErrorModel {
if (DEBUG) { if (DEBUG) {
System.out.println("\nLikelihood summary"); System.out.println("\nLikelihood summary");
for (readIdx=0; readIdx < pileup.getReads().size(); readIdx++) { for (readIdx=0; readIdx < pileup.size(); readIdx++) {
System.out.format("Read Index: %d ",readIdx); System.out.format("Read Index: %d ",readIdx);
for (int i=0; i < readLikelihoods[readIdx].length; i++) for (int i=0; i < readLikelihoods[readIdx].length; i++)
System.out.format("L%d: %f ",i,readLikelihoods[readIdx][i]); System.out.format("L%d: %f ",i,readLikelihoods[readIdx][i]);
@ -1012,36 +1020,35 @@ public class PairHMMIndelErrorModel {
} }
} }
return getHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods);
}
private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) {
final double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
// todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplied to just a single loop without the intermediate NxN matrix
for (int i=0; i < numHaplotypes; i++) { for (int i=0; i < numHaplotypes; i++) {
for (int j=i; j < numHaplotypes; j++){ for (int j=i; j < numHaplotypes; j++){
// combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j] // combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
// L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2) // L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2)
//readLikelihoods[k][j] has log10(Pr(R_k) | H[j] ) //readLikelihoods[k][j] has log10(Pr(R_k) | H[j] )
for (readIdx=0; readIdx < pileup.getReads().size(); readIdx++) { for (int readIdx = 0; readIdx < readLikelihoods.length; readIdx++) {
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2) // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
// First term is approximated by Jacobian log with table lookup. // First term is approximated by Jacobian log with table lookup.
if (Double.isInfinite(readLikelihoods[readIdx][i]) && Double.isInfinite(readLikelihoods[readIdx][j])) if (Double.isInfinite(readLikelihoods[readIdx][i]) && Double.isInfinite(readLikelihoods[readIdx][j]))
continue; continue;
haplotypeLikehoodMatrix[i][j] += ( MathUtils.softMax(readLikelihoods[readIdx][i], final double li = readLikelihoods[readIdx][i];
readLikelihoods[readIdx][j]) + LOG_ONE_HALF); final double lj = readLikelihoods[readIdx][j];
final int readCount = readCounts[readIdx];
haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.softMax(li, lj) + LOG_ONE_HALF);
} }
} }
} }
return getHaplotypeLikelihoods(haplotypeLikehoodMatrix); final double[] genotypeLikelihoods = new double[numHaplotypes*(numHaplotypes+1)/2];
}
public static double[] getHaplotypeLikelihoods(double[][] haplotypeLikehoodMatrix) {
int hSize = haplotypeLikehoodMatrix.length;
double[] genotypeLikelihoods = new double[hSize*(hSize+1)/2];
int k=0; int k=0;
for (int j=0; j < hSize; j++) { for (int j=0; j < numHaplotypes; j++) {
for (int i=0; i <= j; i++){ for (int i=0; i <= j; i++){
genotypeLikelihoods[k++] = haplotypeLikehoodMatrix[i][j]; genotypeLikelihoods[k++] = haplotypeLikehoodMatrix[i][j];
} }
@ -1124,5 +1131,5 @@ public class PairHMMIndelErrorModel {
//return newQualityByte; //return newQualityByte;
} }
*/ */
} }