diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 0798e751d..d13696aa1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -1,17 +1,23 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; -import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.vcf.VCFCompoundHeaderLine; import org.broad.tribble.vcf.VCFFormatHeaderLine; import org.broad.tribble.vcf.VCFHeaderLineType; -import org.broad.tribble.vcf.VCFCompoundHeaderLine; -import org.broadinstitute.sting.gatk.contexts.*; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.utils.pileup.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import java.util.*; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnotation { @@ -45,18 +51,19 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnot counts[0] = alleleCounts.get(vc.getReference().getBases()[0]); for (int i = 0; i < vc.getAlternateAlleles().size(); i++) counts[i+1] = alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]); - + Map map = new HashMap(); map.put(getKeyNames().get(0), counts); return map; } private Map annotateIndel(StratifiedAlignmentContext stratifiedContext, VariantContext vc) { - ReadBackedExtendedEventPileup pileup = stratifiedContext.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup(); - if ( pileup == null ) +// ReadBackedExtendedEventPileup pileup = stratifiedContext.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup(); + //ReadBackedPileup pileup = stratifiedContext.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(); + //if ( pileup == null ) return null; - Integer[] counts = new Integer[2]; + //Integer[] counts = new Integer[2]; // TODO -- fix me /* @@ -69,9 +76,9 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnot } */ - Map map = new HashMap(); - map.put(getKeyNames().get(0), counts); - return map; + //Map map = new HashMap(); + //map.put(getKeyNames().get(0), counts); + //return map; } public List getKeyNames() { return Arrays.asList("AD"); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index 0441ec819..7382a5bbd 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -1,21 +1,22 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFConstants; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; -import org.broad.tribble.vcf.VCFConstants; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class MappingQualityZero implements InfoFieldAnnotation, StandardAnnotation { @@ -26,10 +27,18 @@ public class MappingQualityZero implements InfoFieldAnnotation, StandardAnnotati int mq0 = 0; for ( Map.Entry sample : stratifiedContexts.entrySet() ) { - ReadBackedPileup pileup = sample.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(); - for (PileupElement p : pileup ) { - if ( p.getMappingQual() == 0 ) - mq0++; + AlignmentContext context = sample.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE); + ReadBackedPileup pileup = null; + if (context.hasExtendedEventPileup()) + pileup = context.getExtendedEventPileup(); + else if (context.hasBasePileup()) + pileup = context.getBasePileup(); + + if (pileup != null) { + for (PileupElement p : pileup ) { + if ( p.getMappingQual() == 0 ) + mq0++; + } } } Map map = new HashMap(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 0f03a2576..22e52dbd8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -1,17 +1,18 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFConstants; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; -import org.broad.tribble.vcf.VCFConstants; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.*; @@ -24,9 +25,17 @@ public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotatio ArrayList qualities = new ArrayList(); for ( Map.Entry sample : stratifiedContexts.entrySet() ) { - ReadBackedPileup pileup = sample.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(); - for (PileupElement p : pileup ) - qualities.add(p.getRead().getMappingQuality()); + AlignmentContext context = sample.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE); + ReadBackedPileup pileup = null; + if (context.hasExtendedEventPileup()) + pileup = context.getExtendedEventPileup(); + else if (context.hasBasePileup()) + pileup = context.getBasePileup(); + + if (pileup != null) { + for (PileupElement p : pileup ) + qualities.add(p.getRead().getMappingQuality()); + } } int[] quals = new int[qualities.size()]; int index = 0; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index 8351d718e..387e32188 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -10,10 +11,10 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import java.util.Map; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.Arrays; +import java.util.Map; public class SpanningDeletions implements InfoFieldAnnotation, StandardAnnotation { @@ -25,9 +26,17 @@ public class SpanningDeletions implements InfoFieldAnnotation, StandardAnnotatio int deletions = 0; int depth = 0; for ( Map.Entry sample : stratifiedContexts.entrySet() ) { - ReadBackedPileup pileup = sample.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(); - deletions += pileup.getNumberOfDeletions(); - depth += pileup.size(); + AlignmentContext context = sample.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE); + ReadBackedPileup pileup = null; + if (context.hasExtendedEventPileup()) + pileup = context.getExtendedEventPileup(); + else if (context.hasBasePileup()) + pileup = context.getBasePileup(); + + if (pileup != null) { + deletions += pileup.getNumberOfDeletions(); + depth += pileup.size(); + } } Map map = new HashMap(); map.put(getKeyNames().get(0), String.format("%.2f", depth == 0 ? 0.0 : (double)deletions/(double)depth)); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java index e505b556a..1af6a3f89 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.genotype.Haplotype; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.Arrays; import java.util.List; @@ -203,7 +204,7 @@ public class HaplotypeIndelErrorModel { } if (read.getReadName().contains("106880")) { - + System.out.println("aca"); System.out.println("Haplotype:"); @@ -305,10 +306,10 @@ public class HaplotypeIndelErrorModel { // workaround for reads whose bases quality = 0, if (readQual < 1) readQual = 1; - + double baseProb = QualityUtils.qualToProb(readQual); - + double pBaseMatch = probToQual(baseProb); double pBaseMismatch = (double)(readQual); @@ -372,5 +373,66 @@ public class HaplotypeIndelErrorModel { } + public double[][] computeReadHaplotypeLikelihoods(ReadBackedPileup pileup, List haplotypesInVC, + VariantContext vc, int eventLength){ + double[][] haplotypeLikehoodMatrix = new double[haplotypesInVC.size()][haplotypesInVC.size()]; + double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()]; + int i=0; + for (SAMRecord read : pileup.getReads()) { + + // for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi)) + // = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent + for (int j=0; j < haplotypesInVC.size(); j++) { + readLikelihoods[i][j]= computeReadLikelihoodGivenHaplotype(haplotypesInVC.get(j), read, vc, eventLength); + if (DEBUG) { + System.out.print(read.getReadName()+" "); + + System.out.format("%d %d S:%d US:%d E:%d UE:%d C:%s %3.4f\n",i, j, read.getAlignmentStart(), + read.getUnclippedStart(), read.getAlignmentEnd(), read.getUnclippedEnd(), + read.getCigarString(), readLikelihoods[i][j]); + } + + } + i++; + } + + for (i=0; i < haplotypesInVC.size(); i++) { + for (int j=i; j < haplotypesInVC.size(); j++){ + // combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j] + // L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2) + //readLikelihoods[k][j] has log10(Pr(R_k) | H[j] ) + double[] readLikelihood = new double[2]; // diploid sample + for (int readIdx=0; readIdx < pileup.getReads().size(); readIdx++) { + readLikelihood[0] = -readLikelihoods[readIdx][i]/10; + readLikelihood[1] = -readLikelihoods[readIdx][j]/10; + + double probRGivenHPair = MathUtils.sumLog10(readLikelihood)/2; + haplotypeLikehoodMatrix[i][j] += HaplotypeIndelErrorModel.probToQual(probRGivenHPair); + + } + + + } + } + + return haplotypeLikehoodMatrix; + + } + + public static double[] getPosteriorProbabilitesFromHaplotypeLikelihoods(double[][] haplotypeLikehoodMatrix) { + int hSize = haplotypeLikehoodMatrix.length; + double[] genotypeLikelihoods = new double[hSize*(hSize+1)/2]; + + int k=0; + for (int i=0; i < hSize; i++) { + for (int j=i; j < hSize; j++){ + genotypeLikelihoods[k++] = -haplotypeLikehoodMatrix[i][j]/10; + } + } + // normalize likelihoods and pass to linear domain. + return MathUtils.normalizeFromLog10(genotypeLikelihoods); + + } + } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/SimpleIndelGenotyperWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/SimpleIndelGenotyperWalker.java deleted file mode 100755 index f6bd5de79..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/SimpleIndelGenotyperWalker.java +++ /dev/null @@ -1,419 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.oneoffprojects.walkers; - -import net.sf.samtools.SAMRecord; -import org.broad.tribble.util.variantcontext.Genotype; -import org.broad.tribble.util.variantcontext.VariantContext; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.collections.CircularArray; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.genotype.Haplotype; -import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.io.PrintStream; -import java.util.*; - -import org.broadinstitute.sting.gatk.walkers.Reference; -import org.broadinstitute.sting.gatk.walkers.Window; - - -@Reference(window=@Window(start=-30,stop=200)) -@Allows({DataSource.READS, DataSource.REFERENCE}) -public class SimpleIndelGenotyperWalker extends RefWalker { - @Output - PrintStream out; - @Argument(fullName="maxReadDeletionLength",shortName="maxReadDeletionLength",doc="Max deletion length allowed when aligning reads to candidate haplotypes.",required=false) - int maxReadDeletionLength = 3; - - @Argument(fullName="insertionStartProbability",shortName="insertionStartProbability",doc="Assess only sites with coverage at or below the specified value.",required=false) - double insertionStartProbability = 0.001; - - @Argument(fullName="insertionEndProbability",shortName="insertionEndProbability",doc="Assess only sites with coverage at or below the specified value.",required=false) - double insertionEndProbability = 0.5; - - @Argument(fullName="alphaDeletionProbability",shortName="alphaDeletionProbability",doc="Assess only sites with coverage at or below the specified value.",required=false) - double alphaDeletionProbability = 0.001; - - - @Argument(fullName="haplotypeSize",shortName="hsize",doc="Size of haplotypes to evaluate calls.",required=false) - int HAPLOTYPE_SIZE = 80; - - - @Argument(fullName="indelHeterozygozity",shortName="indhet",doc="Indel Heterozygosity (assumed 1/6500 from empirical human data)",required=false) - double indelHeterozygosity = (double)1.0/6500.0; - - @Argument(fullName="doSimpleCalculationModel",shortName="simple",doc="Use Simple Calculation Model for Pr(Reads | Haplotype)",required=false) - boolean doSimple = false; - - @Argument(fullName="enableDebugOutput",shortName="debugout",doc="Output debug data",required=false) - boolean DEBUG = false; - - @Argument(fullName="doTrio",shortName="trio",doc="Output 1KG CEU trio genotype data (sampleName ignored)",required=false) - boolean doTrio = false; - - @Argument(fullName="useFlatPriors",shortName="flat",doc="If present, use flat priors on haplotypes",required=false) - boolean useFlatPriors = false; - - @Argument(fullName="useDynamicHaplotypeSize",shortName="dhsize",doc="If present, use dynamic haplotype size",required=false) - boolean useDynamicHaplotypeSize = false; - - - @Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called", required = false) - public double STANDARD_CONFIDENCE_FOR_CALLING = 10.0; - - - @Override - public boolean generateExtendedEvents() { return true; } - - @Override - public boolean includeReadsWithDeletionAtLoci() { return true; } - - - private HaplotypeIndelErrorModel model; - - - Set sampleNames; - - - @Override - public void initialize() { - model = new HaplotypeIndelErrorModel(maxReadDeletionLength, insertionStartProbability, - insertionEndProbability, alphaDeletionProbability, HAPLOTYPE_SIZE, doSimple, DEBUG); - - sampleNames = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); - - } - - - - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null ) - return 0; - - if (!context.hasBasePileup()) - return 0; - - - VariantContext vc = tracker.getVariantContext(ref, "indels", null, context.getLocation(), true); - // ignore places where we don't have a variant - if ( vc == null ) - return 0; - - - if (!vc.isIndel()) - return 0; - - int eventLength = vc.getReference().getBaseString().length() - vc.getAlternateAllele(0).getBaseString().length(); // assume only one alt allele for now - if (eventLength<0) - eventLength = - eventLength; - - int currentHaplotypeSize = HAPLOTYPE_SIZE; - List haplotypesInVC = new ArrayList(); - - int minHaplotypeSize = Haplotype.LEFT_WINDOW_SIZE + eventLength + 2; // to be safe - - - if (useDynamicHaplotypeSize) { - if (currentHaplotypeSize < minHaplotypeSize) - currentHaplotypeSize = minHaplotypeSize; - - boolean doneHaplotype = false; - while (!doneHaplotype) { - - haplotypesInVC = Haplotype.makeHaplotypeListFromVariantContextAlleles( vc, ref, currentHaplotypeSize); - String[] haplotypeStrings = new String[haplotypesInVC.size()]; - for (int k = 0; k < haplotypeStrings.length; k++) - haplotypeStrings[k] = new String(haplotypesInVC.get(k).getBasesAsBytes()); - - // check if all strings are the same. - for (int k = 1; k < haplotypeStrings.length; k++) { - if (!haplotypeStrings[k].equals(haplotypeStrings[0])) - doneHaplotype = true; - - } - - if (doneHaplotype) - break; - else - currentHaplotypeSize = 2*currentHaplotypeSize; - - } - - } - else { - if (currentHaplotypeSize < minHaplotypeSize) - throw new UserException.BadArgumentValue("Insufficient haplotype length to represent all indels:", - String.valueOf(currentHaplotypeSize)); - - haplotypesInVC = Haplotype.makeHaplotypeListFromVariantContextAlleles( vc, ref, currentHaplotypeSize); - } - - // combine likelihoods for all possible haplotype pair (n*(n-1)/2 combinations) - - double[][] haplotypeLikehoodMatrix = new double[haplotypesInVC.size()][haplotypesInVC.size()]; - double bestLikelihood = 1e13; - // for given allele haplotype, compute likelihood of read pileup given haplotype - ReadBackedPileup pileup = context.getBasePileup().getPileupWithoutMappingQualityZeroReads(); - - if (pileup.getSamples().size() > 1) { - throw new UserException.BadInput("Currently only a single-sample BAM is supported for Indel genotyper"); - } - - - // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them. - // In general, we'll assume: even spread of indels throughout genome (not true, but simplifying assumption), - // and memoryless spread (i.e. probability that an indel lies in an interval A is independent of probability of - // another indel lying in interval B iff A and B don't overlap), then we can approximate inter-indel distances - // by an exponential distribution of mean 1/theta (theta = heterozygozity), and the number of indels on an interval - // of size L is Poisson-distributed with parameter lambda = theta*L. - - // Since typically, for small haplotype sizes and human heterozygozity, lambda will be <<1, we'll further approximate it - // by assuming that only one indel can happen in a particular interval, with Pr(indel present) = lambda*exp(-lambda), and - // pr(no indel) = 1-lambda*exp(-lambda) ~= exp(-lambda) for small lambda. - - // We also assume that a deletion is equally likely as an insertion (empirical observation, see e.g. Mills et al, Genome Research 2006) - // and we assume the following frequency spectrum for indel sizes Pr(event Length = L)= K*abs(L)^(-1.89)*10^(-0.015*abs(L)), - // taking positive L = insertions, negative L = deletions. K turns out to be about 1.5716 for probabilities to sum to one. - // so -10*log10(Pr event Length = L) =-10*log10(K)+ 18.9*log10(abs(L)) + 0.15*abs(L). - // Hence, Pr(observe event size = L in interval) ~ Pr(observe event L | event present) Pr (event present in interval) - // and -10*log10(above) = -10*log10(K)+ 18.9*log10(abs(L)) + 0.15*abs(L) - 10*log10(theta*L), and we ignore terms that would be - // added to ref hypothesis. - // Equation above is prior model. - - if (!useFlatPriors) { - - double lambda = (double)HAPLOTYPE_SIZE * indelHeterozygosity; - double altPrior = HaplotypeIndelErrorModel.probToQual(lambda)-HaplotypeIndelErrorModel.probToQual(eventLength)*1.89 + 0.15*eventLength - + HaplotypeIndelErrorModel.probToQual(1.5716)+ HaplotypeIndelErrorModel.probToQual(0.5); - - haplotypeLikehoodMatrix[0][1] = altPrior; - haplotypeLikehoodMatrix[1][1] = 2*altPrior; - } - - int bestIndexI =-1, bestIndexJ=-1; - double callConfidence = bestLikelihood; - - - if (pileup.getReads().size() > 0) { - double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()]; - int i=0; - for (SAMRecord read : pileup.getReads()) { - - // for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi)) - // = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent - for (int j=0; j < haplotypesInVC.size(); j++) { - readLikelihoods[i][j]= model.computeReadLikelihoodGivenHaplotype(haplotypesInVC.get(j), read, vc, eventLength); - if (DEBUG) { - System.out.print(read.getReadName()+" "); - - System.out.format("%d %d S:%d US:%d E:%d UE:%d C:%s %3.4f\n",i, j, read.getAlignmentStart(), - read.getUnclippedStart(), read.getAlignmentEnd(), read.getUnclippedEnd(), - read.getCigarString(), readLikelihoods[i][j]); - } - - } - i++; - } - - for (i=0; i < haplotypesInVC.size(); i++) { - for (int j=i; j < haplotypesInVC.size(); j++){ - // combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j] - // L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2) - //readLikelihoods[k][j] has log10(Pr(R_k) | H[j] ) - double[] readLikelihood = new double[2]; // diploid sample - for (int readIdx=0; readIdx < pileup.getReads().size(); readIdx++) { - readLikelihood[0] = -readLikelihoods[readIdx][i]/10; - readLikelihood[1] = -readLikelihoods[readIdx][j]/10; - - double probRGivenHPair = MathUtils.sumLog10(readLikelihood)/2; - haplotypeLikehoodMatrix[i][j] += HaplotypeIndelErrorModel.probToQual(probRGivenHPair); - - } - - - if (haplotypeLikehoodMatrix[i][j] < bestLikelihood) { - bestIndexI = i; - bestIndexJ = j; - - callConfidence = bestLikelihood - haplotypeLikehoodMatrix[i][j]; - bestLikelihood = haplotypeLikehoodMatrix[i][j]; - } - } - } - } - - //we say that most likely genotype at site is index (i,j) maximizing likelihood matrix - String type; - if (vc.isDeletion()) - type = "DEL"; - else if (vc.isInsertion()) - type = "INS"; - else - type = "OTH"; - - type += vc.getIndelLengths().toString(); - - - out.format("%s %d %s ",vc.getChr(), vc.getStart(), type); - - if (doTrio) { - Genotype originalGenotype = vc.getGenotype("NA12878"); - Genotype dadGenotype = vc.getGenotype("NA12891"); - Genotype momGenotype = vc.getGenotype("NA12892"); - - String dadG, momG, oldG, newG; - oldG = getGenotypeString(originalGenotype); - dadG = getGenotypeString(dadGenotype); - momG = getGenotypeString(momGenotype); - - int x = bestIndexI+bestIndexJ; - if (x == 0) - newG = "HOMREF"; - else if (x == 1) - newG = "HET"; - else if (x == 2) - newG = "HOMVAR"; - else if (x < 0) - newG = "NOCALL"; - else - newG = "OTHER"; - - - if (callConfidence < STANDARD_CONFIDENCE_FOR_CALLING) { - newG = "NOCALL"; - } - out.format("NewG %s OldG %s DadG %s MomG %s\n", newG, oldG, dadG, momG); - } - else { - - for (String sample: sampleNames) { - String oldG, newG; - Genotype originalGenotype = vc.getGenotype(sample); - oldG = getGenotypeString(originalGenotype); - int x = bestIndexI+bestIndexJ; - if (x == 0) - newG = "HOMREF"; - else if (x == 1) - newG = "HET"; - else if (x == 2) - newG = "HOMVAR"; - else if (x < 0) - newG = "NOCALL"; - else - newG = "OTHER"; - - if (callConfidence < STANDARD_CONFIDENCE_FOR_CALLING) { - newG = "NOCALL"; - } - - out.format("NewG %s OldG %s\n", newG, oldG); - } - } - -/* - if ( context.hasExtendedEventPileup() ) { - - // if we got indels at current position: - - ReadBackedExtendedEventPileup pileup = context.getExtendedEventPileup().getPileupWithoutMappingQualityZeroReads(); - if ( pileup.size() < MIN_COVERAGE ) return 0; - - if ( pileup.getNumberOfDeletions() + pileup.getNumberOfInsertions() > MAX_INDELS ) { - // we got too many indel events. Maybe it's even a true event, and what we are looking for are - // errors rather than true calls. Hence, we do not need these indels. We have to 1) discard - // all remaining indels from the buffer: if they are still in the buffer, they are too close - // to the current position; and 2) make sure that the next position at which we attempt to count again is - // sufficiently far *after* the current position. - // System.out.println("Non countable indel event at "+pileup.getLocation()); - countableIndelBuffer.clear(); - coverageBuffer.clear(); // we do not want to count observations (read bases) around non-countable indel as well - skipToLoc = GenomeLocParser.createGenomeLoc(pileup.getLocation().getContigIndex(),pileup.getLocation().getStop()+pileup.getMaxDeletionLength()+MIN_DISTANCE+1); - // System.out.println("Skip to "+skipToLoc); - } else { - // pileup does not contain too many indels, we need to store them in the buffer and count them later, - // if a non-countable indel event(s) do not show up too soon: - countableIndelBuffer.add(pileup); - } - return 0; - } - */ - - - return 1; //To change body of implemented methods use File | Settings | File Templates. - } - - private String getGenotypeString(Genotype genotype) { - String oldG; - if (genotype.isHomRef()) - oldG = "HOMREF"; - else if (genotype.isHet()) - oldG = "HET"; - else if (genotype.isHomVar()) - oldG = "HOMVAR"; - else - oldG = "OTHER"; - - return oldG; - } - /** - * Provide an initial value for reduce computations. - * - * @return Initial value of reduce. - */ - public Integer reduceInit() { - return 0; //To change body of implemented methods use File | Settings | File Templates. - } - - /** - * Reduces a single map with the accumulator provided as the ReduceType. - * - * @param value result of the map. - * @param sum accumulator for the reduce. - * @return accumulator with result of the map taken into account. - */ - public Integer reduce(Integer value, Integer sum) { - return value+sum; //To change body of implemented methods use File | Settings | File Templates. - } - - @Override - public void onTraversalDone(Integer result) { - } - -} - diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index dbdb60b6d..d63dc5947 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -179,7 +179,8 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { AFMatrix.clear(); for ( BiallelicGenotypeLikelihoods GL : GLs.values() ) { - if ( isClearRefCall(GL) ) { + // todo - gdebug workaround for crash + if (false) { //isClearRefCall(GL) ) { refCalls.add(GL); } else { AFMatrix.setLikelihoods(GL.getPosteriors(), GL.getSample()); @@ -206,7 +207,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { } } - private enum GenotypeType { AA, AB, BB } + protected enum GenotypeType { AA, AB, BB } protected static final double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE; protected static class AlleleFrequencyMatrix { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java index cb97e6940..5a73a54a2 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java @@ -26,7 +26,12 @@ package org.broadinstitute.sting.playground.gatk.walkers.genotyper; import org.apache.log4j.Logger; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.genotype.Haplotype; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; @@ -37,9 +42,23 @@ import org.broad.tribble.util.variantcontext.Allele; import java.util.*; public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel { + private final int maxReadDeletionLength = 3; + private final double insertionStartProbability = 1e-3; + private final double insertionEndProbability = 0.5; + private final double alphaDeletionProbability = 1e-3; + private final int HAPLOTYPE_SIZE = 80; + + // todo - the following need to be exposed for command line argument control + private final double indelHeterozygosity = 1.0/8000; + boolean useFlatPriors = true; + boolean DEBUGOUT = false; + + private HaplotypeIndelErrorModel model; protected DindelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { super(UAC, logger); + model = new HaplotypeIndelErrorModel(maxReadDeletionLength, insertionStartProbability, + insertionEndProbability, alphaDeletionProbability, HAPLOTYPE_SIZE, false, DEBUGOUT); } public Allele getLikelihoods(RefMetaDataTracker tracker, @@ -48,17 +67,71 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo StratifiedAlignmentContext.StratifiedContextType contextType, GenotypePriors priors, Map GLs) { - // TODO: check to make sure the priors instanceof a valid priors class - // TODO: create a single set of Alleles to be passed into each BiallelicGenotypeLikelihoods object to minimize memory consumption + if ( tracker == null ) + return null; + + + VariantContext vc = tracker.getVariantContext(ref, "indels", null, ref.getLocus(), true); + // ignore places where we don't have a variant + if ( vc == null ) + return null; + + + if (!vc.isIndel()) + return null; + + if ( !(priors instanceof DiploidIndelGenotypePriors) ) + throw new StingException("Only diploid-based Indel priors are supported in the DINDEL GL model"); + + int eventLength = vc.getReference().getBaseString().length() - vc.getAlternateAllele(0).getBaseString().length(); + // assume only one alt allele for now + if (eventLength<0) + eventLength = - eventLength; + + int currentHaplotypeSize = HAPLOTYPE_SIZE; + List haplotypesInVC = new ArrayList(); + int minHaplotypeSize = Haplotype.LEFT_WINDOW_SIZE + eventLength + 2; // to be safe + + // int numSamples = getNSamples(contexts); + haplotypesInVC = Haplotype.makeHaplotypeListFromVariantContextAlleles( vc, ref, currentHaplotypeSize); + // For each sample, get genotype likelihoods based on pileup + // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them. + // initialize the GenotypeLikelihoods + GLs.clear(); + + double[][] haplotypeLikehoodMatrix; + + if (useFlatPriors) { + priors = new DiploidIndelGenotypePriors(); + } + else + priors = new DiploidIndelGenotypePriors(indelHeterozygosity,eventLength,currentHaplotypeSize); + + //double[] priorLikelihoods = priors.getPriors(); for ( Map.Entry sample : contexts.entrySet() ) { - // TODO: fill me in + AlignmentContext context = sample.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE); - //GLs.put(sample.getKey(), new BiallelicGenotypeLikelihoods(sample.getKey(), refAllele, altAllele, ...)); + ReadBackedPileup pileup = null; + if (context.hasExtendedEventPileup()) + pileup = context.getExtendedEventPileup(); + else if (context.hasBasePileup()) + pileup = context.getBasePileup(); + + if (pileup != null ) { + haplotypeLikehoodMatrix = model.computeReadHaplotypeLikelihoods( pileup, haplotypesInVC, vc, eventLength); + + + double[] genotypeLikelihoods = HaplotypeIndelErrorModel.getPosteriorProbabilitesFromHaplotypeLikelihoods( haplotypeLikehoodMatrix); + + GLs.put(sample.getKey(), new BiallelicGenotypeLikelihoods(sample.getKey(),vc.getReference(), + vc.getAlternateAllele(0), + Math.log10(genotypeLikelihoods[0]),Math.log10(genotypeLikelihoods[1]), Math.log10(genotypeLikelihoods[2]))); + + } } - // TODO: return the reference Allele - return null; + return vc.getReference(); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java new file mode 100755 index 000000000..5a2b7985d --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java @@ -0,0 +1,125 @@ +package org.broadinstitute.sting.playground.gatk.walkers.genotyper; + +import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.genotype.DiploidGenotype; + +import java.util.Arrays; + +/** + * Created by IntelliJ IDEA. + * User: delangel + * Date: Sep 30, 2010 + * Time: 1:47:55 PM + * To change this template use File | Settings | File Templates. + */ +public class DiploidIndelGenotypePriors implements GenotypePriors { + // -------------------------------------------------------------------------------------------------------------- + // + // Constants and static information + // + // -------------------------------------------------------------------------------------------------------------- + public static final double INDEL_HETEROZYGOSITY = 1e-4; + + private final static double[] flatPriors = new double[DiploidGenotype.values().length]; + + // -------------------------------------------------------------------------------------------------------------- + // + // Diploid priors + // + // -------------------------------------------------------------------------------------------------------------- + private double[] priors = null; + + /** + * Create a new DiploidGenotypePriors object with flat priors for each diploid genotype + */ + public DiploidIndelGenotypePriors() { + priors = flatPriors.clone(); + } + + public DiploidIndelGenotypePriors(double indelHeterozygosity, int eventLength, int haplotypeSize) { + double varPrior = getHaplotypePriors(indelHeterozygosity, eventLength, haplotypeSize); + priors[2] = Math.log10(varPrior*varPrior); + priors[1] = Math.log10(2*varPrior*(1-varPrior)); + priors[0] = Math.log10((1-varPrior)*(1-varPrior)); + + } + + + /** + * Returns an array of priors for each genotype, indexed by DiploidGenotype.ordinal values(). + * + * @return log10 prior as a double array + */ + public double[] getPriors() { + return priors; + } + + /** + * Returns the prior associated with DiploidGenotype g + * @param g + * @return log10 prior as a double + */ + public double getPrior(DiploidGenotype g) { + return getPriors()[g.ordinal()]; + } + + public double getHeterozygosity() { return INDEL_HETEROZYGOSITY; } + + public boolean validate(boolean throwException) { + try { + + for ( DiploidGenotype g : DiploidGenotype.values() ) { + int i = g.ordinal(); + if ( ! MathUtils.wellFormedDouble(priors[i]) || ! MathUtils.isNegativeOrZero(priors[i]) ) { + String bad = String.format("Prior %f is badly formed %b", priors[i], MathUtils.isNegativeOrZero(priors[i])); + throw new IllegalStateException(String.format("At %s: %s", g.toString(), bad)); + } + } + } catch ( IllegalStateException e ) { + if ( throwException ) + throw new RuntimeException(e); + else + return false; + } + + return true; + } + + public double getHaplotypePriors(double indelHeterozygosity, int eventLength, int haplotypeSize) { + // compute prior likelihoods on haplotypes. + // In general, we'll assume: even spread of indels throughout genome (not true, but simplifying assumption), + // and memoryless spread (i.e. probability that an indel lies in an interval A is independent of probability of + // another indel lying in interval B iff A and B don't overlap), then we can approximate inter-indel distances + // by an exponential distribution of mean 1/theta (theta = heterozygozity), and the number of indels on an interval + // of size L is Poisson-distributed with parameter lambda = theta*L. + + // Since typically, for small haplotype sizes and human heterozygozity, lambda will be <<1, we'll further approximate it + // by assuming that only one indel can happen in a particular interval, with Pr(indel present) = lambda*exp(-lambda), and + // pr(no indel) = 1-lambda*exp(-lambda) ~= exp(-lambda) for small lambda. + + // We also assume that a deletion is equally likely as an insertion (empirical observation, see e.g. Mills et al, Genome Research 2006) + // and we assume the following frequency spectrum for indel sizes Pr(event Length = L)= K*abs(L)^(-1.89)*10^(-0.015*abs(L)), + // taking positive L = insertions, negative L = deletions. K turns out to be about 1.5716 for probabilities to sum to one. + // so -10*log10(Pr event Length = L) =-10*log10(K)+ 18.9*log10(abs(L)) + 0.15*abs(L). + // Hence, Pr(observe event size = L in interval) ~ Pr(observe event L | event present) Pr (event present in interval) + // and -10*log10(above) = -10*log10(K)+ 18.9*log10(abs(L)) + 0.15*abs(L) - 10*log10(theta*L), and we ignore terms that would be + // added to ref hypothesis. + // Equation above is prior model. + + double lambda = (double)haplotypeSize * indelHeterozygosity; + return HaplotypeIndelErrorModel.probToQual(lambda)-HaplotypeIndelErrorModel.probToQual(eventLength)*1.89 + 0.15*eventLength + + HaplotypeIndelErrorModel.probToQual(1.5716)+ HaplotypeIndelErrorModel.probToQual(0.5); + + + + } + + + static { + for ( DiploidGenotype g : DiploidGenotype.values() ) { + flatPriors[g.ordinal()] = Math.log10(1.0 / DiploidGenotype.values().length); + } + } +} + diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java index a347ab8a7..9122626bd 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -26,6 +26,12 @@ package org.broadinstitute.sting.playground.gatk.walkers.genotyper; import org.apache.log4j.Logger; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.GenotypeLikelihoods; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFConstants; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -35,10 +41,9 @@ import java.io.PrintStream; public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { - private int maxNumIterations = 50; - private double tol = 1e-4; - private boolean DEBUGOUT = true; - + private boolean DEBUGOUT = false; + private boolean SIMPLE_GREEDY_GENOTYPER = false; + protected ExactAFCalculationModel(int N, Logger logger, PrintStream verboseWriter) { super(N, logger, verboseWriter); } @@ -50,30 +55,21 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { double[] log10AlleleFrequencyPosteriors) { // Math requires linear math to make efficient updates. - double[] alleleFrequencyPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPriors); + double[] alleleFrequencyPriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPriors); + double[] alleleFrequencyPosteriors; + - // now that we have genotype likelihoods for each sample, we can start refining allele frequency estimate + alleleFrequencyPosteriors = updateAFEstimate(GLs, alleleFrequencyPriors); double meanAF = computeMeanAF(alleleFrequencyPosteriors); + if (DEBUGOUT) - System.out.format("Initial Mean AF: %5.6f\n", meanAF); + System.out.format("Mean AF: %5.4f. PVariant: %5.5f\n", meanAF,1.0-alleleFrequencyPosteriors[0]); - for (int numIterations=0; numIterations < maxNumIterations; numIterations++) { - double oldMeanAF = meanAF; - alleleFrequencyPosteriors = updateAFEstimate(GLs, alleleFrequencyPosteriors); - meanAF = computeMeanAF(alleleFrequencyPosteriors); - if (DEBUGOUT) - System.out.format("Mean AF: %5.4f. PVariant: %5.5f\n", meanAF,1.0-alleleFrequencyPosteriors[0]); - - if (Math.abs(meanAF-oldMeanAF) < tol) - break; - - } - - for (int k=0; k < alleleFrequencyPosteriors.length; k++) + for (int k=0; k < alleleFrequencyPosteriors.length; k++) { log10AlleleFrequencyPosteriors[k] = Math.log10(alleleFrequencyPosteriors[k]); - + } } private double[] updateAFEstimate(Map GLs, double[] alleleFrequencyPriors) { @@ -93,11 +89,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { double den = 2.0*j*(2.0*j-1); for (int k=0; k <= 2*j; k++ ) { - double tmp = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[2]; + double tmp = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[GenotypeType.AA.ordinal()]; if (k > 0) - tmp += (2.0*k)*(2.0*j-k)*YMatrix[j-1][k-1]*genotypeLikelihoods[1]; + tmp += (2.0*k)*(2.0*j-k)*YMatrix[j-1][k-1]*genotypeLikelihoods[GenotypeType.AB.ordinal()]; if (k > 1) - tmp += k*(k-1)*YMatrix[j-1][k-2]*genotypeLikelihoods[0]; + tmp += k*(k-1)*YMatrix[j-1][k-2]*genotypeLikelihoods[GenotypeType.BB.ordinal()]; YMatrix[j][k] = tmp/den; @@ -112,7 +108,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { double[] newAF = new double[alleleFrequencyPriors.length]; for (int k=0; k <= numChr; k++) { - double prod = YMatrix[j][numChr-k] * alleleFrequencyPriors[k]; + double prod = YMatrix[j][k] * alleleFrequencyPriors[k]; newAF[k] = prod; sum += prod; } @@ -135,4 +131,112 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } + protected Map generateCalls(Map contexts, + Map GLs, + int bestAFguess) { + // first experiment: refine do genotype assignment by Hardy Weinberg equilibrium assumption. + HashMap calls = new HashMap(); + + + double[][] pathMetricArray = new double[GLs.size()+1][bestAFguess+1]; + int[][] tracebackArray = new int[GLs.size()+1][bestAFguess+1]; + + ArrayList sampleIndices = new ArrayList(); + int sampleIdx = 0; + + if (SIMPLE_GREEDY_GENOTYPER) { + sampleIndices.addAll(GLs.keySet()); + sampleIdx = GLs.size(); + } + else { + + for (String sample: GLs.keySet()) { + sampleIndices.add(sample); + + double[] likelihoods = GLs.get(sample).getLikelihoods(); + + for (int k=0; k <= bestAFguess; k++) { + + double bestMetric = pathMetricArray[sampleIdx][k] + likelihoods[0]; + int bestIndex = k; + + if (k>0) { + double m2 = pathMetricArray[sampleIdx][k-1] + likelihoods[1]; + if (m2 > bestMetric) { + bestMetric = m2; + bestIndex = k-1; + } + } + + if (k>1) { + double m2 = pathMetricArray[sampleIdx][k-2] + likelihoods[2]; + if (m2 > bestMetric) { + bestMetric = m2; + bestIndex = k-2; + } + } + + pathMetricArray[sampleIdx+1][k] = bestMetric; + tracebackArray[sampleIdx+1][k] = bestIndex; + } + sampleIdx++; + } + } + + int startIdx = bestAFguess; + for (int k = sampleIdx; k > 0; k--) { + int bestGTguess; + String sample = sampleIndices.get(k-1); + BiallelicGenotypeLikelihoods GL = GLs.get(sample); + Allele alleleA = GL.getAlleleA(); + Allele alleleB = GL.getAlleleB(); + + if (SIMPLE_GREEDY_GENOTYPER) + bestGTguess = Utils.findIndexOfMaxEntry(GLs.get(sample).getLikelihoods()); + else { + int newIdx = tracebackArray[k][startIdx]; + bestGTguess = startIdx - newIdx; + startIdx = newIdx; + } + + HashMap attributes = new HashMap(); + ArrayList myAlleles = new ArrayList(); + attributes.put(VCFConstants.DEPTH_KEY, contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size()); + double qual = 0.0; + double[] posteriors = GLs.get(sample).getPosteriors(); + + if (bestGTguess == 0) { + myAlleles.add(alleleA); + myAlleles.add(alleleA); + //qual = posteriors[0] - Math.max(posteriors[1],posteriors[2]); + } else if(bestGTguess == 1) { + myAlleles.add(alleleA); + myAlleles.add(alleleB); + //qual = posteriors[1] - Math.max(posteriors[0],posteriors[2]); + + } else { + myAlleles.add(alleleB); + myAlleles.add(alleleB); + // qual = posteriors[2] - Math.max(posteriors[1],posteriors[0]); + } +/* + if (qual <= 0.0) { + qual = 0.0; + myAlleles.clear(); + myAlleles.add(Allele.NO_CALL); +// myAlleles.add(Allele.NO_CALL); + } +*/ + attributes.put(VCFConstants.GENOTYPE_QUALITY_KEY,String.format("%4.2f", 10*qual)); + + GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods()); + attributes.put(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, likelihoods.getAsString()); + calls.put(sample, new Genotype(sample, myAlleles, qual, null, attributes, false)); + + } + + + return calls; + } + } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index f740a6433..c32fcd371 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -164,7 +164,9 @@ public class UnifiedGenotyperEngine { if ( bestAFguess != 0 ) { phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]); if ( Double.isInfinite(phredScaledConfidence) ) - phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0]; + // todo - verify this is OK + phredScaledConfidence = (double)QualityUtils.MAX_QUAL_SCORE; + //phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0]; } else { phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF); if ( Double.isInfinite(phredScaledConfidence) ) { @@ -224,11 +226,22 @@ public class UnifiedGenotyperEngine { } GenomeLoc loc = refContext.getLocus(); - VariantContext vc = new VariantContext("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence, atTriggerTrack) ? null : filter, attributes); + + // todo - temp fix until we can deal with extended events properly + //VariantContext vc = new VariantContext("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence, atTriggerTrack) ? null : filter, attributes); + VariantContext vc = new VariantContext("UG_call", loc.getContig(), loc.getStart(), + loc.getStart()+refAllele.length(), alleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence, atTriggerTrack) ? null : filter, attributes); + if ( annotationEngine != null ) { // first off, we want to use the *unfiltered* context for the annotations - stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup(), UAC.ASSUME_SINGLE_SAMPLE); + ReadBackedPileup pileup = null; + if (rawContext.hasExtendedEventPileup()) + pileup = rawContext.getExtendedEventPileup(); + else if (rawContext.hasBasePileup()) + pileup = rawContext.getBasePileup(); + + stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE); Collection variantContexts = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vc); vc = variantContexts.iterator().next(); //We know the collection will always have exactly 1 element. @@ -413,8 +426,8 @@ public class UnifiedGenotyperEngine { priors = new DiploidSNPGenotypePriors(); break; case DINDEL: - // TODO: create indel priors object - priors = null; + // create flat priors for Indels, actual priors will depend on event length to be genotyped + priors = new DiploidIndelGenotypePriors(); break; default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + UAC.GLmodel); }