diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java index 2ad7892e6..e505b556a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java @@ -36,19 +36,12 @@ import org.broadinstitute.sting.utils.genotype.Haplotype; import java.util.Arrays; import java.util.List; -/** - * Created by IntelliJ IDEA. - * User: delangel - * Date: Aug 24, 2010 - * Time: 1:31:08 PM - * To change this template use File | Settings | File Templates. - */ public class HaplotypeIndelErrorModel { private final int maxReadDeletionLength; // maximum length of deletion on a read private final double noDeletionProbability; // alpha for geometric distribution for deletion length private final int haplotypeSize; - private final int maxReadLength; + private final int PATH_METRIC_TABLE_LENGTH; private final int RIGHT_ALIGN_INDEX; private final int LEFT_ALIGN_INDEX; @@ -57,7 +50,7 @@ public class HaplotypeIndelErrorModel { private double deletionErrorProbabilities[]; - private double pathMetricArray[]; + private double pathMetricArray[][]; private int bestStateIndexArray[][]; private final double logOneMinusInsertionStartProbability; @@ -68,17 +61,18 @@ public class HaplotypeIndelErrorModel { private boolean DEBUG = false; private boolean doSimpleCalculationModel = false; - public HaplotypeIndelErrorModel(int mrdl, double insStart, double insEnd, double alpha, int haplotypeSize, int maxReadLength, + private static final double QUAL_ONE_HALF = -10*Math.log10(0.5); + + public HaplotypeIndelErrorModel(int mrdl, double insStart, double insEnd, double alpha, int haplotypeSize, boolean dosimple, boolean deb) { - this(mrdl, insStart, insEnd, alpha, haplotypeSize, maxReadLength); + this(mrdl, insStart, insEnd, alpha, haplotypeSize); this.DEBUG = deb; this.doSimpleCalculationModel = dosimple; } - public HaplotypeIndelErrorModel(int mrdl, double insStart, double insEnd, double alpha, int haplotypeSize, int maxReadLength) { + public HaplotypeIndelErrorModel(int mrdl, double insStart, double insEnd, double alpha, int haplotypeSize) { this.maxReadDeletionLength = mrdl; this.noDeletionProbability = 1-alpha; this.haplotypeSize = haplotypeSize; - this.maxReadLength = maxReadLength; PATH_METRIC_TABLE_LENGTH = haplotypeSize+2; RIGHT_ALIGN_INDEX = PATH_METRIC_TABLE_LENGTH-1; @@ -90,13 +84,6 @@ public class HaplotypeIndelErrorModel { logOneMinusInsertionEndProbability = probToQual(1-insEnd); - // initialize path metric and traceback memories for Viterbi computation - pathMetricArray = new double[2*PATH_METRIC_TABLE_LENGTH]; - bestStateIndexArray = new int[maxReadLength][2*(PATH_METRIC_TABLE_LENGTH)]; - - for (int k=0; k < 2*PATH_METRIC_TABLE_LENGTH; k++) - pathMetricArray[k] = 0; - // fill in probability for read deletion of length k = C*exp(k-1) double prob = 1.0; double sumProb = 0.0; @@ -140,39 +127,22 @@ public class HaplotypeIndelErrorModel { byte[] readQuals = Arrays.copyOfRange(read.getBaseQualities(),(int)numStartClippedBases, (int)(read.getReadBases().length-numEndClippedBases)); - double[] previousPathMetricArray = new double[2*PATH_METRIC_TABLE_LENGTH]; - int readStartIdx, initialIndexInHaplotype; - if (readStartPosition < haplotypeStartPosition) { - readStartIdx = (int)(haplotypeStartPosition- readStartPosition); - initialIndexInHaplotype = 0; // implicit above - if ((readStartPosition+readBases.length-1) < haplotypeStartPosition) - // when ignoring read CIGAR alignment, this read actually didn't overlap with haplotype: - // just ignore read - return INFINITE; - } - else { - readStartIdx = 0; - initialIndexInHaplotype = (int)(readStartPosition-haplotypeStartPosition); - } + int readLength = readBases.length; - // special case 1: we're analyzing an insertion and the read starts right at the insertion. - if ((readStartPosition > vc.getStart()-eventLength) && vc.isInsertion() && !haplotype.isReference()) { - initialIndexInHaplotype += eventLength; - } - - if (DEBUG) { - System.out.println("READ: "+read.getReadName()); + // initialize path metric and traceback memories for Viterbi computation + pathMetricArray = new double[readLength+1][PATH_METRIC_TABLE_LENGTH]; + bestStateIndexArray = new int[readLength+1][PATH_METRIC_TABLE_LENGTH]; - System.out.println(haplotypeStartPosition); - System.out.println(readStartPosition); - System.out.println(readStartIdx); - System.out.println(initialIndexInHaplotype); - } + for (int k=1; k < PATH_METRIC_TABLE_LENGTH; k++) + pathMetricArray[0][k] = 0; + + /* + + if (doSimpleCalculationModel) { - double pRead = 0; - if (doSimpleCalculationModel) { // No Viterbi algorithm - assume no sequencing indel artifacts, + // so we can collapse computations and pr(read | haplotype) is just probability of observing overlap // of read with haplotype. int haplotypeIndex = initialIndexInHaplotype; @@ -231,7 +201,7 @@ public class HaplotypeIndelErrorModel { System.out.format("\nLikelihood:%f\n",pRead); } -/* + if (read.getReadName().contains("106880")) { System.out.println("aca"); @@ -248,87 +218,51 @@ public class HaplotypeIndelErrorModel { System.out.format("%c ", readBases[k]); } - } */ + } return pRead; } - // initialize path metric array to hard-code certainty that initial position is aligned - for (int k=0; k < 2*PATH_METRIC_TABLE_LENGTH; k++) - pathMetricArray[k] = INFINITE; - - pathMetricArray[initialIndexInHaplotype] = 0; + */ // Update path metric computations based on branch metric (Add/Compare/Select operations) // do forward direction first, ie from anchor to end of read // outer loop - for (int indR=readStartIdx; indR < readBases.length; indR++) { + for (int indR=0; indR < readLength; indR++) { byte readBase = readBases[indR]; byte readQual = readQuals[indR]; - System.arraycopy(pathMetricArray, 0, previousPathMetricArray, 0, 2*PATH_METRIC_TABLE_LENGTH); - - for (int indX=LEFT_ALIGN_INDEX; indX <= RIGHT_ALIGN_INDEX; indX++) { byte haplotypeBase; - if (indX > LEFT_ALIGN_INDEX && indX <= haplotypeSize) + if (indX > LEFT_ALIGN_INDEX && indX < RIGHT_ALIGN_INDEX) haplotypeBase = haplotype.getBasesAsBytes()[indX-1]; else haplotypeBase = readBase; - updatePathMetrics(haplotypeBase, indX, indR, readBase, readQual, previousPathMetricArray, true); + updatePathMetrics(haplotypeBase, indX, indR, readBase, readQual); } } - double[] forwardPathMetricArray = new double[2*PATH_METRIC_TABLE_LENGTH]; - System.arraycopy(pathMetricArray, 0, forwardPathMetricArray, 0, 2*PATH_METRIC_TABLE_LENGTH); - double forwardMetric = MathUtils.arrayMin(pathMetricArray); - - // reinitialize path metric array to known position at start of read - // initialize path metric array to hard-code certainty that initial position is aligned - for (int k=0; k < 2*PATH_METRIC_TABLE_LENGTH; k++) - pathMetricArray[k] = INFINITE; - - pathMetricArray[initialIndexInHaplotype] = 0; - - // do now backward direction (from anchor to start of read) - // outer loop - for (int indR=readStartIdx-1; indR >= 0; indR--) { - byte readBase = readBases[indR]; - byte readQual = readQuals[indR]; - - System.arraycopy(pathMetricArray, 0, previousPathMetricArray, 0, 2*PATH_METRIC_TABLE_LENGTH); - - for (int indX=LEFT_ALIGN_INDEX; indX <= RIGHT_ALIGN_INDEX; indX++) { - byte haplotypeBase; - if (indX > 0 && indX <= haplotypeSize) - haplotypeBase = haplotype.getBasesAsBytes()[indX-1]; - else - haplotypeBase = readBase; - - updatePathMetrics(haplotypeBase, indX, indR, readBase, readQual, previousPathMetricArray, false); - - } - } // for debugging only: compute backtracking to find optimal route through trellis. Since I'm only interested // in log-likelihood of best state, this isn't really necessary. + int[] bestIndexArray = new int[readLength]; + double bestMetric = MathUtils.arrayMin(pathMetricArray[readLength]); + + // start from last position of read, go backwards to find optimal alignment + int bestIndex = MathUtils.minElementIndex(pathMetricArray[readLength]); + bestIndexArray[readLength-1] = bestIndex; + + for (int k=readLength-2; k>=0; k--) { + bestIndex = bestStateIndexArray[k][bestIndex]; + bestIndexArray[k] = bestIndex; + } if (DEBUG) { - int[] bestIndexArray = new int[readBases.length+1]; - - - int bestIndex = MathUtils.minElementIndex(forwardPathMetricArray); - bestIndexArray[readBases.length] = bestIndex; - - for (int k=readBases.length-1; k>=0; k--) { - bestIndex = bestStateIndexArray[k][bestIndex]; - bestIndexArray[k] = bestIndex; - } System.out.println(read.getReadName()); @@ -352,155 +286,91 @@ public class HaplotypeIndelErrorModel { System.out.println(); } // now just take optimum along all path metrics: that's the log likelihood of best alignment - double backwardMetric = MathUtils.arrayMin(pathMetricArray); if (DEBUG) - System.out.format("Likelihood: %5.4f\n", forwardMetric + backwardMetric); - return forwardMetric + backwardMetric; + System.out.format("Likelihood: %5.4f\n", bestMetric); + return bestMetric; } - private void updatePathMetrics(byte haplotypeBase, int indX, int indR, byte readBase, byte readQual, - double[] previousPathMetricArray, boolean isForward) { + private void updatePathMetrics(byte haplotypeBase, int indX, int indR, byte readBase, byte readQual) { double bmetric; - // Pr(Rb', Xb',Ib'| Xb,Ib=0) is populated starting on Xb , = - // Pr(Rb'|Xb',Ib') Pr(Xb',Ib'|Xb,Ib) = - // Pr(Rb'|Xb',Ib') Pr(Xb'|Ib',Xb,Ib) Pr(Ib'|Ib) - - // start with case Ib=0, Ib'=0: no insertion double bestMetric = INFINITE; - int bestMetricIndex = 0; - double pBaseRead = getProbabilityOfReadBaseGivenXandI(haplotypeBase, readBase, readQual, indX, 0); + int bestMetricIndex = -1; - if (isForward) { - if (indX==LEFT_ALIGN_INDEX){ - // there can't be a transition into Xb=0 - // special case: only one incoming path - // record best path metric -// pathMetricArray[indX] = INFINITE; -// bestStateIndexArray[indR][indX] = 0; -// bestMetric = INFINITE; -// bestMetricIndex = 0; - } - else { - for (int indXold = indX-1; indXold >= indX-this.maxReadDeletionLength; indXold--){ + // compute metric for match/mismatch + double pBaseRead; - if (indXold < 0) - break; + // workaround for reads whose bases quality = 0, + if (readQual < 1) + readQual = 1; + + double baseProb = QualityUtils.qualToProb(readQual); - // fetch path metric and add branch metric - bmetric = previousPathMetricArray[indXold] + deletionErrorProbabilities[indX-indXold] + - logOneMinusInsertionStartProbability + pBaseRead; + + double pBaseMatch = probToQual(baseProb); + double pBaseMismatch = (double)(readQual); - if (bmetric < bestMetric) { - bestMetric = bmetric; - bestMetricIndex = indXold; - } - } - if (indX == RIGHT_ALIGN_INDEX) { - // special treatment for R->R case (read aligns to the right of haplotype) when Xold = X = R - bmetric = previousPathMetricArray[indX] + pBaseRead; - - if (bmetric < bestMetric) { - bestMetric = bmetric; - bestMetricIndex = indX; - } + if (haplotypeBase == readBase) + pBaseRead = pBaseMatch; + else + pBaseRead = pBaseMismatch; - } - } + + + + if (indX == LEFT_ALIGN_INDEX) { + // special treatment for L->L case (read aligns to the right/left of haplotype) when Xold = X = R + bestMetric = pathMetricArray[indR][indX] + QUAL_ONE_HALF; //1/2 in log scale + bestMetricIndex = indX; } else { - for (int indXold = indX+1; indXold <=this.maxReadDeletionLength + indX; indXold++){ - if (indXold > this.haplotypeSize+1) + for (int indXold = indX-1; indXold >= indX-this.maxReadDeletionLength; indXold--) { + + if (indXold < 0) break; // fetch path metric and add branch metric - bmetric = previousPathMetricArray[indXold] + deletionErrorProbabilities[indXold-indX] + - logOneMinusInsertionStartProbability + pBaseRead; + bmetric = pathMetricArray[indR][indXold] + deletionErrorProbabilities[indX-indXold] + pBaseRead; + + if (indXold == indX-1) { + // special case for exact walk down diagonal: need to consider that an insertion may have ended + // bmetric += logInsertionEndProbability; + } else { + bmetric += logOneMinusInsertionStartProbability; + } if (bmetric < bestMetric) { bestMetric = bmetric; bestMetricIndex = indXold; } - if (indX == LEFT_ALIGN_INDEX) { - // special treatment for R->R case (read aligns to the right of haplotype) when Xold = X = R - bmetric = previousPathMetricArray[indX] + pBaseRead; - - if (bmetric < bestMetric) { - bestMetric = bmetric; - bestMetricIndex = indX; - } - - - } } - - } - // now Pr(Xb',Ib'=0| Xb,Ib=1): an insertion ended. Only case if Xb'=Xb+1 (walk diag down in path array). - - if (indX > LEFT_ALIGN_INDEX && indX < RIGHT_ALIGN_INDEX) { - if (isForward) - bmetric = previousPathMetricArray[indX-1+PATH_METRIC_TABLE_LENGTH] + logInsertionEndProbability + pBaseRead; - else - bmetric = previousPathMetricArray[indX+1+PATH_METRIC_TABLE_LENGTH] + logInsertionEndProbability + pBaseRead; + // additional case: a walk right (ie indXold = indX). Can be because of an insertion in the middle of reads, + // or we're aligned to right of read + bmetric = pathMetricArray[indR][indX]+pBaseRead; + if (indX < RIGHT_ALIGN_INDEX) { + bmetric += logInsertionStartProbability + logOneMinusInsertionEndProbability; + } + else { + // anything extra to do for R->R case? + bmetric = pathMetricArray[indR][indX] + QUAL_ONE_HALF; + } if (bmetric < bestMetric) { bestMetric = bmetric; - if (isForward) - bestMetricIndex = indX-1 + PATH_METRIC_TABLE_LENGTH; - else - bestMetricIndex = indX+1 + PATH_METRIC_TABLE_LENGTH; - - } - } - // record best path metric - pathMetricArray[indX] = bestMetric; - bestStateIndexArray[indR][indX] = bestMetricIndex; - - // now Pr(Xb', Ib'=1 | Xb, Ib=0) : an insertion started: (walk right in path array) - bestMetric = INFINITE; - pBaseRead = getProbabilityOfReadBaseGivenXandI(haplotypeBase, readBase, readQual, indX, 1); - - bmetric = previousPathMetricArray[indX] + logInsertionStartProbability + pBaseRead; - if (bmetric < bestMetric) { //if superfluous, could clean up - bestMetric = bmetric; - bestMetricIndex = indX; - } - - // final case: Pr(Xb', Ib'=1 | Xb, Ib=1): insertion continues, also walk right in path array - if (indX > LEFT_ALIGN_INDEX && indX < RIGHT_ALIGN_INDEX) { - bmetric = previousPathMetricArray[indX+PATH_METRIC_TABLE_LENGTH] + logOneMinusInsertionEndProbability + pBaseRead; - if (bmetric < bestMetric) { //if superfluous, could clean up - bestMetric = bmetric; - bestMetricIndex = indX+PATH_METRIC_TABLE_LENGTH; + bestMetricIndex = indX; } - // record best path metric - pathMetricArray[indX+PATH_METRIC_TABLE_LENGTH] = bestMetric; - bestStateIndexArray[indR][indX+PATH_METRIC_TABLE_LENGTH] = bestMetricIndex; + } + + // record best path metric + pathMetricArray[indR+1][indX] = bestMetric; + bestStateIndexArray[indR+1][indX] = bestMetricIndex; + } - private double getProbabilityOfReadBaseGivenXandI(byte haplotypeBase, byte readBase, byte readQual, int indX, int indI) { - - - if (indX == RIGHT_ALIGN_INDEX || indX == LEFT_ALIGN_INDEX) { - // X = L or R - double baseProb = QualityUtils.qualToProb(readQual); - return probToQual(baseProb); - } - - if (haplotypeBase == readBase) { - double baseProb = QualityUtils.qualToProb(readQual); - return probToQual(baseProb); - } - else { - return (double)(readQual); - } - // TODO - optimize for speed by avoiding so many log conversions, can use a LUT - - } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/SimpleIndelGenotyperWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/SimpleIndelGenotyperWalker.java index d0d7c64c2..a4ee07319 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/SimpleIndelGenotyperWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/SimpleIndelGenotyperWalker.java @@ -38,7 +38,9 @@ import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.collections.CircularArray; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.genotype.Haplotype; import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; @@ -60,25 +62,22 @@ public class SimpleIndelGenotyperWalker extends RefWalker { int maxReadDeletionLength = 3; @Argument(fullName="insertionStartProbability",shortName="insertionStartProbability",doc="Assess only sites with coverage at or below the specified value.",required=false) - double insertionStartProbability = 0.01; + double insertionStartProbability = 0.001; @Argument(fullName="insertionEndProbability",shortName="insertionEndProbability",doc="Assess only sites with coverage at or below the specified value.",required=false) double insertionEndProbability = 0.5; @Argument(fullName="alphaDeletionProbability",shortName="alphaDeletionProbability",doc="Assess only sites with coverage at or below the specified value.",required=false) - double alphaDeletionProbability = 0.01; + double alphaDeletionProbability = 0.001; - @Argument(fullName="haplotypeSize",shortName="hsize",doc="Size of haplotypes to evaluate calls.",required=true) - int HAPLOTYPE_SIZE = 40; + @Argument(fullName="haplotypeSize",shortName="hsize",doc="Size of haplotypes to evaluate calls.",required=false) + int HAPLOTYPE_SIZE = 80; @Argument(fullName="indelHeterozygozity",shortName="indhet",doc="Indel Heterozygosity (assumed 1/6500 from empirical human data)",required=false) double indelHeterozygosity = (double)1.0/6500.0; - @Argument(fullName="sampleName",shortName="sample",doc="Sample name to evaluate genotypes in",required=true) - String sampleName; - @Argument(fullName="doSimpleCalculationModel",shortName="simple",doc="Use Simple Calculation Model for Pr(Reads | Haplotype)",required=false) boolean doSimple = false; @@ -91,6 +90,14 @@ public class SimpleIndelGenotyperWalker extends RefWalker { @Argument(fullName="useFlatPriors",shortName="flat",doc="If present, use flat priors on haplotypes",required=false) boolean useFlatPriors = false; + @Argument(fullName="useDynamicHaplotypeSize",shortName="dhsize",doc="If present, use dynamic haplotype size",required=false) + boolean useDynamicHaplotypeSize = false; + + + @Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called", required = false) + public double STANDARD_CONFIDENCE_FOR_CALLING = 10.0; + + @Override public boolean generateExtendedEvents() { return true; } @@ -100,15 +107,16 @@ public class SimpleIndelGenotyperWalker extends RefWalker { private HaplotypeIndelErrorModel model; - private static final int MAX_READ_LENGTH = 1000; // TODO- make this dynamic - + Set sampleNames; @Override public void initialize() { model = new HaplotypeIndelErrorModel(maxReadDeletionLength, insertionStartProbability, - insertionEndProbability, alphaDeletionProbability, HAPLOTYPE_SIZE, MAX_READ_LENGTH, doSimple, DEBUG); + insertionEndProbability, alphaDeletionProbability, HAPLOTYPE_SIZE, doSimple, DEBUG); + + sampleNames = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); } @@ -121,6 +129,7 @@ public class SimpleIndelGenotyperWalker extends RefWalker { if (!context.hasBasePileup()) return 0; + VariantContext vc = tracker.getVariantContext(ref, "indels", null, context.getLocation(), true); // ignore places where we don't have a variant if ( vc == null ) @@ -130,8 +139,50 @@ public class SimpleIndelGenotyperWalker extends RefWalker { if (!vc.isIndel()) return 0; + int eventLength = vc.getReference().getBaseString().length() - vc.getAlternateAllele(0).getBaseString().length(); // assume only one alt allele for now + if (eventLength<0) + eventLength = - eventLength; - List haplotypesInVC = Haplotype.makeHaplotypeListFromVariantContextAlleles( vc, ref, HAPLOTYPE_SIZE); + int currentHaplotypeSize = HAPLOTYPE_SIZE; + List haplotypesInVC = new ArrayList(); + + int minHaplotypeSize = Haplotype.LEFT_WINDOW_SIZE + eventLength + 2; // to be safe + + + if (useDynamicHaplotypeSize) { + if (currentHaplotypeSize < minHaplotypeSize) + currentHaplotypeSize = minHaplotypeSize; + + boolean doneHaplotype = false; + while (!doneHaplotype) { + + haplotypesInVC = Haplotype.makeHaplotypeListFromVariantContextAlleles( vc, ref, currentHaplotypeSize); + String[] haplotypeStrings = new String[haplotypesInVC.size()]; + for (int k = 0; k < haplotypeStrings.length; k++) + haplotypeStrings[k] = new String(haplotypesInVC.get(k).getBasesAsBytes()); + + // check if all strings are the same. + for (int k = 1; k < haplotypeStrings.length; k++) { + if (!haplotypeStrings[k].equals(haplotypeStrings[0])) + doneHaplotype = true; + + } + + if (doneHaplotype) + break; + else + currentHaplotypeSize = 2*currentHaplotypeSize; + + } + + } + else { + if (currentHaplotypeSize < minHaplotypeSize) + throw new UserException.BadArgumentValue("Insufficient haplotype length to represent all indels:", + String.valueOf(currentHaplotypeSize)); + + haplotypesInVC = Haplotype.makeHaplotypeListFromVariantContextAlleles( vc, ref, currentHaplotypeSize); + } // combine likelihoods for all possible haplotype pair (n*(n-1)/2 combinations) @@ -140,6 +191,10 @@ public class SimpleIndelGenotyperWalker extends RefWalker { // for given allele haplotype, compute likelihood of read pileup given haplotype ReadBackedPileup pileup = context.getBasePileup().getPileupWithoutMappingQualityZeroReads(); + if (pileup.getSamples().size() > 1) { + throw new UserException.BadInput("Currently only a single-sample BAM is supported for Indel genotyper"); + } + // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them. // In general, we'll assume: even spread of indels throughout genome (not true, but simplifying assumption), @@ -160,9 +215,6 @@ public class SimpleIndelGenotyperWalker extends RefWalker { // and -10*log10(above) = -10*log10(K)+ 18.9*log10(abs(L)) + 0.15*abs(L) - 10*log10(theta*L), and we ignore terms that would be // added to ref hypothesis. // Equation above is prior model. - int eventLength = vc.getReference().getBaseString().length() - vc.getAlternateAllele(0).getBaseString().length(); // assume only one alt allele for now - if (eventLength<0) - eventLength = - eventLength; if (!useFlatPriors) { @@ -177,10 +229,12 @@ public class SimpleIndelGenotyperWalker extends RefWalker { int bestIndexI =-1, bestIndexJ=-1; double callConfidence = 0.0; + if (pileup.getReads().size() > 0) { double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()]; int i=0; for (SAMRecord read : pileup.getReads()) { + // for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi)) // = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent for (int j=0; j < haplotypesInVC.size(); j++) { @@ -216,7 +270,8 @@ public class SimpleIndelGenotyperWalker extends RefWalker { if (haplotypeLikehoodMatrix[i][j] < bestLikelihood) { bestIndexI = i; bestIndexJ = j; - callConfidence = bestLikelihood - haplotypeLikehoodMatrix[i][j]; + if (i > 0 || j > 0) + callConfidence = bestLikelihood - haplotypeLikehoodMatrix[i][j]; bestLikelihood = haplotypeLikehoodMatrix[i][j]; } } @@ -259,26 +314,36 @@ public class SimpleIndelGenotyperWalker extends RefWalker { else newG = "OTHER"; + + if (callConfidence < STANDARD_CONFIDENCE_FOR_CALLING) { + newG = "NOCALL"; + } out.format("NewG %s OldG %s DadG %s MomG %s\n", newG, oldG, dadG, momG); } else { - Genotype originalGenotype = vc.getGenotype(sampleName); - String oldG, newG; - oldG = getGenotypeString(originalGenotype); - int x = bestIndexI+bestIndexJ; - if (x == 0) - newG = "HOMREF"; - else if (x == 1) - newG = "HET"; - else if (x == 2) - newG = "HOMVAR"; - else if (x < 0) - newG = "NOCALL"; - else - newG = "OTHER"; - out.format("NewG %s OldG %s\n", newG, oldG); + for (String sample: sampleNames) { + String oldG, newG; + Genotype originalGenotype = vc.getGenotype(sample); + oldG = getGenotypeString(originalGenotype); + int x = bestIndexI+bestIndexJ; + if (x == 0) + newG = "HOMREF"; + else if (x == 1) + newG = "HET"; + else if (x == 2) + newG = "HOMVAR"; + else if (x < 0) + newG = "NOCALL"; + else + newG = "OTHER"; + if (callConfidence < STANDARD_CONFIDENCE_FOR_CALLING) { + newG = "NOCALL"; + } + + out.format("NewG %s OldG %s\n", newG, oldG); + } } /* diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java b/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java index 884a26466..53a5b9bf5 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java @@ -40,6 +40,7 @@ public class Haplotype { protected double[] quals = null; private GenomeLoc genomeLocation = null; private boolean isReference = false; + public static final int LEFT_WINDOW_SIZE = 20; /** * Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual @@ -111,7 +112,7 @@ public class Haplotype { byte[] refBases = ref.getBases(); - int numPrefBases = 20; + int numPrefBases = LEFT_WINDOW_SIZE; int startIdxInReference = (int)(1+vc.getStart()-numPrefBases-ref.getWindow().getStart()); //int numPrefBases = (int)(vc.getStart()-ref.getWindow().getStart()+1); // indel vc starts one before event