diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index fb3bd8427..f37f42cd0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -1,13 +1,18 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.GenotypeLikelihoods; import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFConstants; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.Utils; import java.util.Map; import java.util.HashMap; @@ -15,7 +20,7 @@ import java.util.List; import java.util.Arrays; -public class QualByDepth extends AnnotationByDepth implements StandardAnnotation { +public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) @@ -25,12 +30,40 @@ public class QualByDepth extends AnnotationByDepth implements StandardAnnotation if ( genotypes == null || genotypes.size() == 0 ) return null; - //double QbyD = genotypeQualByDepth(genotypes, stratifiedContexts); - int qDepth = annotationByVariantDepth(genotypes, stratifiedContexts); - if ( qDepth == 0 ) + double qual = 0.0; + int depth = 0; + + for ( Map.Entry genotype : genotypes.entrySet() ) { + + // we care only about variant calls with likelihoods + if ( genotype.getValue().isHomRef() ) + continue; + + StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey()); + if ( context == null ) + continue; + + depth += context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size(); + + if ( genotype.getValue().hasLikelihoods() ) { + GenotypeLikelihoods GLs = genotype.getValue().getLikelihoods(); + double[] likelihoods = GLs.getAsVector(); + if ( GLs.getKey() == VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY ) { + for (int i = 0; i < likelihoods.length; i++) + likelihoods[i] /= -10.0; + } + + qual += 10.0 * getQual(likelihoods); + } + } + + if ( depth == 0 ) return null; - double QbyD = 10.0 * vc.getNegLog10PError() / (double)qDepth; + if ( qual == 0.0 ) + qual = 10.0 * vc.getNegLog10PError(); + + double QbyD = qual / (double)depth; Map map = new HashMap(); map.put(getKeyNames().get(0), String.format("%.2f", QbyD)); return map; @@ -40,4 +73,17 @@ public class QualByDepth extends AnnotationByDepth implements StandardAnnotation public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); } + private double getQual(double[] GLs) { + + // normalize so that we don't have precision issues + double[] adjustedLikelihoods = new double[GLs.length]; + double maxValue = Utils.findMaxEntry(GLs); + for (int i = 0; i < GLs.length; i++) + adjustedLikelihoods[i] = GLs[i] - maxValue; + + // AB + BB (in real space) + double variantWeight = Math.pow(10, adjustedLikelihoods[1]) + Math.pow(10, adjustedLikelihoods[2]); + // (AB + BB) / AA (in log space) + return Math.log10(variantWeight) - adjustedLikelihoods[0]; + } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java index 818346506..2835dea11 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java @@ -32,6 +32,9 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; +import java.util.HashSet; +import java.util.Set; + import static java.lang.Math.log10; import static java.lang.Math.pow; @@ -70,8 +73,8 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { protected final static int FIXED_PLOIDY = 2; protected final static int MAX_PLOIDY = FIXED_PLOIDY + 1; protected final static double ploidyAdjustment = log10(FIXED_PLOIDY); + protected final static double log10_3 = log10(3.0); - protected boolean enableCacheFlag = true; protected boolean VERBOSE = false; // @@ -82,50 +85,23 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { protected DiploidSNPGenotypePriors priors = null; - protected FourBaseLikelihoods fourBaseLikelihoods = null; - /** * Create a new GenotypeLikelhoods object with flat priors for each diploid genotype * - * @param m base model */ - public DiploidSNPGenotypeLikelihoods(BaseMismatchModel m) { + public DiploidSNPGenotypeLikelihoods() { this.priors = new DiploidSNPGenotypePriors(); - initialize(m, null); - } - - /** - * Create a new GenotypeLikelhoods object with flat priors for each diploid genotype - * - * @param m base model - * @param pl default platform - */ - public DiploidSNPGenotypeLikelihoods(BaseMismatchModel m, EmpiricalSubstitutionProbabilities.SequencerPlatform pl) { - this.priors = new DiploidSNPGenotypePriors(); - initialize(m, pl); + setToZero(); } /** * Create a new GenotypeLikelhoods object with given priors for each diploid genotype * - * @param m base model * @param priors priors */ - public DiploidSNPGenotypeLikelihoods(BaseMismatchModel m, DiploidSNPGenotypePriors priors) { + public DiploidSNPGenotypeLikelihoods(DiploidSNPGenotypePriors priors) { this.priors = priors; - initialize(m, null); - } - - /** - * Create a new GenotypeLikelhoods object with given priors for each diploid genotype - * - * @param m base model - * @param priors priors - * @param pl default platform - */ - public DiploidSNPGenotypeLikelihoods(BaseMismatchModel m, DiploidSNPGenotypePriors priors, EmpiricalSubstitutionProbabilities.SequencerPlatform pl) { - this.priors = priors; - initialize(m, pl); + setToZero(); } /** @@ -138,37 +114,14 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { c.priors = priors; c.log10Likelihoods = log10Likelihoods.clone(); c.log10Posteriors = log10Posteriors.clone(); - c.fourBaseLikelihoods = (FourBaseLikelihoods)fourBaseLikelihoods.clone(); return c; } - protected void initialize(BaseMismatchModel m, EmpiricalSubstitutionProbabilities.SequencerPlatform pl) { - fourBaseLikelihoods = FourBaseLikelihoodsFactory.makeFourBaseLikelihoods(m, pl); - setToZero(); - } - protected void setToZero() { - log10Likelihoods = zeros.clone(); // likelihoods are all zeros + log10Likelihoods = genotypeZeros.clone(); // likelihoods are all zeros log10Posteriors = priors.getPriors().clone(); // posteriors are all the priors } - public void setVerbose(boolean v) { - VERBOSE = v; - fourBaseLikelihoods.setVerbose(v); - } - - public boolean isVerbose() { - return VERBOSE; - } - - public int getMinQScoreToInclude() { - return fourBaseLikelihoods.getMinQScoreToInclude(); - } - - public void setMinQScoreToInclude(int minQScoreToInclude) { - fourBaseLikelihoods.setMinQScoreToInclude(minQScoreToInclude); - } - /** * Returns an array of log10 likelihoods for each genotype, indexed by DiploidGenotype.ordinal values() * @return likelihoods array @@ -257,7 +210,7 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { */ public void setPriors(DiploidSNPGenotypePriors priors) { this.priors = priors; - log10Posteriors = zeros.clone(); + log10Posteriors = genotypeZeros.clone(); for ( DiploidGenotype g : DiploidGenotype.values() ) { int i = g.ordinal(); log10Posteriors[i] = priors.getPriors()[i] + log10Likelihoods[i]; @@ -273,22 +226,6 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { return getPriors()[g.ordinal()]; } - /** - * Simple function to overload to control the caching of genotype likelihood outputs. - * By default the function trues true -- do enable caching. If you are experimenting with an - * complex calcluation of P(B | G) and caching might not work correctly for you, overload this - * function and return false, so the super() object won't try to cache your GL calculations. - * - * @return true if caching should be enabled, false otherwise - */ - public boolean cacheIsEnabled() { - return enableCacheFlag; - } - - public void setEnableCacheFlag(boolean enable) { - enableCacheFlag = enable; - } - public int add(ReadBackedPileup pileup) { return add(pileup, false, false); } @@ -306,28 +243,45 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { public int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) { int n = 0; + // todo: first validate that my changes were good by passing in fragments representing just a single read... + //Set fragments = new HashSet(); for ( PileupElement p : pileup ) { if ( usableBase(p, ignoreBadBases) ) { + + /**** + Set fragment = new HashSet(); + fragment.add(p); + fragments.add(new PerFragmentPileupElement(fragment)); + } + } + + // for each fragment, add to the likelihoods + for ( PerFragmentPileupElement fragment : fragments ) { + n += add(fragment, capBaseQualsAtMappingQual); + } + ****/ + byte qual = capBaseQualsAtMappingQual ? (byte)Math.min((int)p.getQual(), p.getMappingQual()) : p.getQual(); n += add(p.getBase(), qual, p.getRead(), p.getOffset()); } } - + return n; } - public int add(byte observedBase, byte qualityScore, SAMRecord read, int offset) { + // public int add(PerFragmentPileupElement fragment, boolean capBaseQualsAtMappingQual) { - // Handle caching if requested. Just look up the cached result if its available, or compute and store it + public int add(byte observedBase, byte qualityScore, SAMRecord read, int offset) { + if ( qualityScore == 0 ) { // zero quals are wrong + return 0; + } + + // Just look up the cached result if it's available, or compute and store it DiploidSNPGenotypeLikelihoods gl; - if ( cacheIsEnabled() ) { - if ( ! inCache( observedBase, qualityScore, FIXED_PLOIDY, read) ) { - gl = calculateCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read, offset); - } else { - gl = getCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read); - } + if ( ! inCache( observedBase, qualityScore, FIXED_PLOIDY, read) ) { + gl = calculateCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read, offset); } else { - gl = calculateGenotypeLikelihoods(observedBase, qualityScore, read, offset); + gl = getCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read); } // for bad bases, there are no likelihoods @@ -352,7 +306,7 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { return 1; } - static DiploidSNPGenotypeLikelihoods[][][][][][] CACHE = new DiploidSNPGenotypeLikelihoods[BaseMismatchModel.values().length][EmpiricalSubstitutionProbabilities.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE+1][MAX_PLOIDY][2]; + static DiploidSNPGenotypeLikelihoods[][][][] CACHE = new DiploidSNPGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE+1][MAX_PLOIDY][2]; protected boolean inCache( byte observedBase, byte qualityScore, int ploidy, SAMRecord read) { return getCache(CACHE, observedBase, qualityScore, ploidy, read) != null; @@ -372,36 +326,29 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { return gl; } - protected void setCache( DiploidSNPGenotypeLikelihoods[][][][][][] cache, + protected void setCache( DiploidSNPGenotypeLikelihoods[][][][] cache, byte observedBase, byte qualityScore, int ploidy, SAMRecord read, DiploidSNPGenotypeLikelihoods val ) { - int m = FourBaseLikelihoodsFactory.getBaseMismatchModel(fourBaseLikelihoods).ordinal(); - int a = fourBaseLikelihoods.getReadSequencerPlatformIndex(read); int i = BaseUtils.simpleBaseToBaseIndex(observedBase); int j = qualityScore; int k = ploidy; int x = strandIndex(! read.getReadNegativeStrandFlag() ); - cache[m][a][i][j][k][x] = val; + cache[i][j][k][x] = val; } - protected DiploidSNPGenotypeLikelihoods getCache( DiploidSNPGenotypeLikelihoods[][][][][][] cache, + protected DiploidSNPGenotypeLikelihoods getCache( DiploidSNPGenotypeLikelihoods[][][][] cache, byte observedBase, byte qualityScore, int ploidy, SAMRecord read) { - int m = FourBaseLikelihoodsFactory.getBaseMismatchModel(fourBaseLikelihoods).ordinal(); - int a = fourBaseLikelihoods.getReadSequencerPlatformIndex(read); int i = BaseUtils.simpleBaseToBaseIndex(observedBase); int j = qualityScore; int k = ploidy; int x = strandIndex(! read.getReadNegativeStrandFlag() ); - return cache[m][a][i][j][k][x]; + return cache[i][j][k][x]; } protected DiploidSNPGenotypeLikelihoods calculateGenotypeLikelihoods(byte observedBase, byte qualityScore, SAMRecord read, int offset) { - FourBaseLikelihoods fbl = fourBaseLikelihoods.computeLog10Likelihoods(observedBase, qualityScore, read, offset); - if ( fbl == null ) - return null; + double[] log10FourBaseLikelihoods = computeLog10Likelihoods(observedBase, qualityScore, read); - double[] fbLikelihoods = fbl.getLog10Likelihoods(); try { DiploidSNPGenotypeLikelihoods gl = (DiploidSNPGenotypeLikelihoods)this.clone(); @@ -412,8 +359,8 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { // todo assumes ploidy is 2 -- should be generalized. Obviously the below code can be turned into a loop double p_base = 0.0; - p_base += pow(10, fbLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base1)] - ploidyAdjustment); - p_base += pow(10, fbLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base2)] - ploidyAdjustment); + p_base += pow(10, log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base1)] - ploidyAdjustment); + p_base += pow(10, log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base2)] - ploidyAdjustment); double likelihood = log10(p_base); gl.log10Likelihoods[g.ordinal()] += likelihood; @@ -434,6 +381,57 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { } } + /** + * Updates likelihoods and posteriors to reflect an additional observation of observedBase with + * qualityScore. + * + * @param observedBase observed base + * @param qualityScore base quality + * @param read SAM read + * @return likelihoods for this observation or null if the base was not considered good enough to add to the likelihoods (Q0 or 'N', for example) + */ + protected double[] computeLog10Likelihoods(byte observedBase, byte qualityScore, SAMRecord read) { + double[] log10FourBaseLikelihoods = baseZeros.clone(); + + for ( byte base : BaseUtils.BASES ) { + double likelihood = log10PofObservingBaseGivenChromosome(observedBase, base, qualityScore); + + if ( VERBOSE ) { + boolean fwdStrand = ! read.getReadNegativeStrandFlag(); + System.out.printf(" L(%c | b=%s, Q=%d, S=%s) = %f / %f%n", + observedBase, base, qualityScore, fwdStrand ? "+" : "-", pow(10,likelihood) * 100, likelihood); + } + + log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(base)] = likelihood; + } + + return log10FourBaseLikelihoods; + } + + /** + * + * @param observedBase observed base + * @param chromBase target base + * @param qual base quality + * @return log10 likelihood + */ + protected double log10PofObservingBaseGivenChromosome(byte observedBase, byte chromBase, byte qual) { + + double logP; + + if ( observedBase == chromBase ) { + // the base is consistent with the chromosome -- it's 1 - e + //logP = oneMinusData[qual]; + double e = pow(10, (qual / -10.0)); + logP = log10(1.0 - e); + } else { + // the base is inconsistent with the chromosome -- it's e * P(chromBase | observedBase is an error) + logP = qual / -10.0 + (-log10_3); + } + + //System.out.printf("%c %c %d => %f%n", observedBase, chromBase, qual, logP); + return logP; + } // ----------------------------------------------------------------------------------------------------------------- // @@ -535,11 +533,15 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { // // Constant static data // - final static double[] zeros = new double[DiploidGenotype.values().length]; + private final static double[] genotypeZeros = new double[DiploidGenotype.values().length]; + private final static double[] baseZeros = new double[BaseUtils.BASES.length]; static { for ( DiploidGenotype g : DiploidGenotype.values() ) { - zeros[g.ordinal()] = 0.0; + genotypeZeros[g.ordinal()] = 0.0; + } + for ( byte base : BaseUtils.BASES ) { + baseZeros[BaseUtils.simpleBaseToBaseIndex(base)] = 0.0; } } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EmpiricalSubstitutionProbabilities.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EmpiricalSubstitutionProbabilities.java deleted file mode 100755 index 15e19d436..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EmpiricalSubstitutionProbabilities.java +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Copyright (c) 2010. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import org.broadinstitute.sting.utils.BaseUtils; - -import static java.lang.Math.log10; -import java.util.TreeMap; -import java.util.EnumMap; - -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMReadGroupRecord; - -public class EmpiricalSubstitutionProbabilities extends FourBaseLikelihoods { - // -------------------------------------------------------------------------------------------------------------- - // - // Static methods to manipulate machine platforms - // - // -------------------------------------------------------------------------------------------------------------- - public enum SequencerPlatform { - SOLEXA, // Solexa / Illumina - ROCHE454, // 454 - SOLID, // SOLiD - CG, // Complete Genomics - UNKNOWN // No idea -- defaulting to 1/3 - } - - private static TreeMap PLFieldToSequencerPlatform = new TreeMap(); - private static void bind(String s, SequencerPlatform x) { - PLFieldToSequencerPlatform.put(s, x); - PLFieldToSequencerPlatform.put(s.toUpperCase(), x); - PLFieldToSequencerPlatform.put(s.toLowerCase(), x); - } - - // - // Static list of platforms supported by this system. - // - static { - bind("LS454", SequencerPlatform.ROCHE454); - bind("454", SequencerPlatform.ROCHE454); - bind("ILLUMINA", SequencerPlatform.SOLEXA); - bind("SOLEXA", SequencerPlatform.SOLEXA); - bind("SOLID", SequencerPlatform.SOLID); - bind("ABI_SOLID", SequencerPlatform.SOLID); - bind("CG", SequencerPlatform.CG); - } - - public static SequencerPlatform standardizeSequencerPlatform( final String sequencerString ) { - String lcSequencerString = sequencerString == null ? null : sequencerString.toLowerCase(); - if ( sequencerString != null && PLFieldToSequencerPlatform.containsKey(lcSequencerString) ) { - return PLFieldToSequencerPlatform.get(lcSequencerString); - } else { - return SequencerPlatform.UNKNOWN; - } - } - - private static ThreadLocal lastReadForPL = new ThreadLocal(); - private static ThreadLocal plOfLastRead = new ThreadLocal(); - public static SequencerPlatform getReadSequencerPlatform( SAMRecord read ) { - if ( lastReadForPL.get() != read ) { - lastReadForPL.set(read); - SAMReadGroupRecord readGroup = read.getReadGroup(); - final String platformName = readGroup == null ? null : readGroup.getPlatform(); - plOfLastRead.set(standardizeSequencerPlatform(platformName)); - } - - return plOfLastRead.get(); - } - - public int getReadSequencerPlatformIndex( SAMRecord read ) { - return getReadSequencerPlatform(read).ordinal(); - } - - // -------------------------------------------------------------------------------------------------------------- - // - // Static methods to get at the transition tables themselves - // - // -------------------------------------------------------------------------------------------------------------- - - /** - * A matrix of value i x j -> log10(p) where - * - * i - byte of the miscalled base (i.e., A) - * j - byte of the presumed true base (i.e., C) - * log10p - empirical probability p that A is actually C - * - * The table is available for each technology - */ - private final static EnumMap log10pTrueGivenMiscall = new EnumMap(SequencerPlatform.class); - - private static void addMisCall(final SequencerPlatform pl, byte miscalledBase, byte trueBase, double p) { - if ( ! log10pTrueGivenMiscall.containsKey(pl) ) - log10pTrueGivenMiscall.put(pl, new double[4][4]); - - double[][] misCallProbs = log10pTrueGivenMiscall.get(pl); - int i = BaseUtils.simpleBaseToBaseIndex(miscalledBase); - int j = BaseUtils.simpleBaseToBaseIndex(trueBase); - misCallProbs[i][j] = log10(p); - } - - private static double getProbMiscallIsBase(SequencerPlatform pl, byte miscalledBase, byte trueBase) { - int i = BaseUtils.simpleBaseToBaseIndex(miscalledBase); - int j = BaseUtils.simpleBaseToBaseIndex(trueBase); - - double logP = log10pTrueGivenMiscall.get(pl)[i][j]; - if ( logP == 0.0 ) - throw new RuntimeException(String.format("Bad miscall base request miscalled=%c true=%c", miscalledBase, trueBase)); - else - return logP; - } - - private static void addSolexa() { - SequencerPlatform pl = SequencerPlatform.SOLEXA; - addMisCall(pl, BaseUtils.A, BaseUtils.C, 57.7/100.0); - addMisCall(pl, BaseUtils.A, BaseUtils.G, 17.1/100.0); - addMisCall(pl, BaseUtils.A, BaseUtils.T, 25.2/100.0); - - addMisCall(pl, BaseUtils.C, BaseUtils.A, 34.9/100.0); - addMisCall(pl, BaseUtils.C, BaseUtils.G, 11.3/100.0); - addMisCall(pl, BaseUtils.C, BaseUtils.T, 53.9/100.0); - - addMisCall(pl, BaseUtils.G, BaseUtils.A, 31.9/100.0); - addMisCall(pl, BaseUtils.G, BaseUtils.C, 5.1/100.0); - addMisCall(pl, BaseUtils.G, BaseUtils.T, 63.0/100.0); - - addMisCall(pl, BaseUtils.T, BaseUtils.A, 45.8/100.0); - addMisCall(pl, BaseUtils.T, BaseUtils.C, 22.1/100.0); - addMisCall(pl, BaseUtils.T, BaseUtils.G, 32.0/100.0); - } - - private static void addSOLiD() { - SequencerPlatform pl = SequencerPlatform.SOLID; - addMisCall(pl, BaseUtils.A, BaseUtils.C, 18.7/100.0); - addMisCall(pl, BaseUtils.A, BaseUtils.G, 42.5/100.0); - addMisCall(pl, BaseUtils.A, BaseUtils.T, 38.7/100.0); - - addMisCall(pl, BaseUtils.C, BaseUtils.A, 27.0/100.0); - addMisCall(pl, BaseUtils.C, BaseUtils.G, 18.9/100.0); - addMisCall(pl, BaseUtils.C, BaseUtils.T, 54.1/100.0); - - addMisCall(pl, BaseUtils.G, BaseUtils.A, 61.0/100.0); - addMisCall(pl, BaseUtils.G, BaseUtils.C, 15.7/100.0); - addMisCall(pl, BaseUtils.G, BaseUtils.T, 23.2/100.0); - - addMisCall(pl, BaseUtils.T, BaseUtils.A, 40.5/100.0); - addMisCall(pl, BaseUtils.T, BaseUtils.C, 34.3/100.0); - addMisCall(pl, BaseUtils.T, BaseUtils.G, 25.2/100.0); - } - - private static void add454() { - SequencerPlatform pl = SequencerPlatform.ROCHE454; - addMisCall(pl, BaseUtils.A, BaseUtils.C, 23.2/100.0); - addMisCall(pl, BaseUtils.A, BaseUtils.G, 42.6/100.0); - addMisCall(pl, BaseUtils.A, BaseUtils.T, 34.3/100.0); - - addMisCall(pl, BaseUtils.C, BaseUtils.A, 19.7/100.0); - addMisCall(pl, BaseUtils.C, BaseUtils.G, 8.4/100.0); - addMisCall(pl, BaseUtils.C, BaseUtils.T, 71.9/100.0); - - addMisCall(pl, BaseUtils.G, BaseUtils.A, 71.5/100.0); - addMisCall(pl, BaseUtils.G, BaseUtils.C, 6.6/100.0); - addMisCall(pl, BaseUtils.G, BaseUtils.T, 21.9/100.0); - - addMisCall(pl, BaseUtils.T, BaseUtils.A, 43.8/100.0); - addMisCall(pl, BaseUtils.T, BaseUtils.C, 37.8/100.0); - addMisCall(pl, BaseUtils.T, BaseUtils.G, 18.5/100.0); - } - - private static void addCG() { - SequencerPlatform pl = SequencerPlatform.CG; - addMisCall(pl, BaseUtils.A, BaseUtils.C, 28.2/100.0); - addMisCall(pl, BaseUtils.A, BaseUtils.G, 28.7/100.0); - addMisCall(pl, BaseUtils.A, BaseUtils.T, 43.1/100.0); - - addMisCall(pl, BaseUtils.C, BaseUtils.A, 29.8/100.0); - addMisCall(pl, BaseUtils.C, BaseUtils.G, 18.6/100.0); - addMisCall(pl, BaseUtils.C, BaseUtils.T, 51.6/100.0); - - addMisCall(pl, BaseUtils.G, BaseUtils.A, 32.5/100.0); - addMisCall(pl, BaseUtils.G, BaseUtils.C, 21.4/100.0); - addMisCall(pl, BaseUtils.G, BaseUtils.T, 46.1/100.0); - - addMisCall(pl, BaseUtils.T, BaseUtils.A, 42.6/100.0); - addMisCall(pl, BaseUtils.T, BaseUtils.C, 34.1/100.0); - addMisCall(pl, BaseUtils.T, BaseUtils.G, 23.3/100.0); - } - - private static void addUnknown() { - SequencerPlatform pl = SequencerPlatform.UNKNOWN; - for ( byte b1 : BaseUtils.BASES ) { - for ( byte b2 : BaseUtils.BASES ) { - if ( b1 != b2 ) - addMisCall(pl, b1, b2, 1.0/3.0); - } - - } - } - - static { - addSolexa(); - add454(); - addSOLiD(); - addCG(); - addUnknown(); - } - - - // -------------------------------------------------------------------------------------------------------------- - // - // The actual objects themselves - // - // -------------------------------------------------------------------------------------------------------------- - private boolean raiseErrorOnUnknownPlatform = true; - private SequencerPlatform defaultPlatform = SequencerPlatform.UNKNOWN; - - // - // forwarding constructors -- don't do anything at all - // - public EmpiricalSubstitutionProbabilities() { super(); } - - public EmpiricalSubstitutionProbabilities(boolean raiseErrorOnUnknownPlatform) { - super(); - this.raiseErrorOnUnknownPlatform = raiseErrorOnUnknownPlatform; - } - - public EmpiricalSubstitutionProbabilities(SequencerPlatform assumeUnknownPlatformsAreThis) { - super(); - - if ( assumeUnknownPlatformsAreThis != null ) { - raiseErrorOnUnknownPlatform = false; - defaultPlatform = assumeUnknownPlatformsAreThis; - } - } - - /** - * Cloning of the object - * @return clone - * @throws CloneNotSupportedException - */ - protected Object clone() throws CloneNotSupportedException { - return super.clone(); - } - - // ----------------------------------------------------------------------------------------------------------------- - // - // - // calculation of p(B|GT) - // - // - // ----------------------------------------------------------------------------------------------------------------- - - protected double log10PofTrueBaseGivenMiscall(byte observedBase, byte chromBase, SAMRecord read, int offset) { - boolean fwdStrand = ! read.getReadNegativeStrandFlag(); - SequencerPlatform pl = getReadSequencerPlatform(read); - - if ( pl == SequencerPlatform.UNKNOWN ) { - if ( raiseErrorOnUnknownPlatform ) - throw new RuntimeException("Unknown sequencer platform for read " + read.format() + "; your BAM file is either missing the PL tag for some read groups or an unsupported platform is being used."); - else { - pl = defaultPlatform; - //System.out.printf("Using default platform %s", pl); - } - } - - //System.out.printf("%s for %s%n", pl, read); - - double log10p; - if ( fwdStrand ) { - log10p = getProbMiscallIsBase(pl, observedBase, chromBase); - } else { - log10p = getProbMiscallIsBase(pl, BaseUtils.simpleComplement(observedBase), BaseUtils.simpleComplement(chromBase)); - } - - //System.out.printf("p = %f for %s %c %c fwd=%b %d at %s%n", pow(10,log10p), pl, observedBase, chromBase, fwdStrand, offset, read.getReadName() ); - - return log10p; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/FourBaseLikelihoods.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/FourBaseLikelihoods.java deleted file mode 100755 index ece9aac99..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/FourBaseLikelihoods.java +++ /dev/null @@ -1,373 +0,0 @@ -/* - * Copyright (c) 2010. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.*; - -import static java.lang.Math.log10; -import static java.lang.Math.pow; - -/** - * Stable, error checking version of strict 4 base likelihoods. Useful for calculating the likelihoods, priors, - * and posteriors given a pile of bases and quality scores (in conjuncion with GenotypeLikelihoods) - * - * Suppose we have bases b1, b2, ..., bN with qualities scores q1, q2, ..., qN. This object - * calculates: - * - * P(b | D) = P(b) * P(D | b) - * - * where - * - * P(D | b) = sum_i log10 P(bi | b) - * - * and - * - * P(bi | b) = 1 - P(error | q1) if bi = b - * = P(error | q1) / 3 if bi != b - * - * - */ -public abstract class FourBaseLikelihoods implements Cloneable { - - protected boolean enableCacheFlag = true; - - // - // The fundamental data array associated with 4-base likelihoods - // - protected double[] log10Likelihoods = null; - - - /** - * If true, lots of output will be generated about the Likelihoods at each site - */ - private boolean verbose = false; - - /** - * Bases with Q scores below this threshold aren't included in the Likelihood calculation - */ - private int minQScoreToInclude = 0; - - /** - * Create a new FourBaseLikelihoods object - */ - public FourBaseLikelihoods() { - log10Likelihoods = zeros.clone(); // Likelihoods are all zeros - } - - /** - * Cloning of the object - * @return clone - * @throws CloneNotSupportedException - */ - protected Object clone() throws CloneNotSupportedException { - FourBaseLikelihoods c = (FourBaseLikelihoods)super.clone(); - c.log10Likelihoods = log10Likelihoods.clone(); - return c; - } - - public void setVerbose(boolean v) { - verbose = v; - } - - public boolean isVerbose() { - return verbose; - } - - public int getMinQScoreToInclude() { - return minQScoreToInclude; - } - - public void setMinQScoreToInclude(int minQScoreToInclude) { - this.minQScoreToInclude = minQScoreToInclude; - } - - /** - * Returns an array of log10 likelihoods for each base, indexed by BaseUtils.BASES.ordinal values() - * @return probs - */ - public double[] getLog10Likelihoods() { - return log10Likelihoods; - } - - /** - * Returns the likelihood associated with a base - * @param base base - * @return log10 likelihood as a double - */ - public double getLog10Likelihood(byte base) { - return getLog10Likelihood(BaseUtils.simpleBaseToBaseIndex(base)); - } - - public double getLog10Likelihood(int baseIndex) { - return (baseIndex < 0 ? 0.0 : getLog10Likelihoods()[baseIndex]); - } - - /** - * Returns an array of likelihoods for each base, indexed by BaseUtils.BASES.ordinal values() - * @return probs - */ - public double[] getLikelihoods() { - double[] probs = new double[4]; - for (int i = 0; i < 4; i++) - probs[i] = Math.pow(10, log10Likelihoods[i]); - return probs; - } - - /** - * Returns the likelihoods associated with a base - * @param base base - * @return likelihoods as a double - */ - public double getLikelihood(byte base) { - return getLikelihood(BaseUtils.simpleBaseToBaseIndex(base)); - } - - public double getLikelihood(int baseIndex) { - return (baseIndex < 0 ? 0.0 : Math.pow(10, log10Likelihoods[baseIndex])); - } - - - // ----------------------------------------------------------------------------------------------------------------- - // - // - // add() -- the heart of - // - // - // ----------------------------------------------------------------------------------------------------------------- - - /** - * Updates likelihoods and posteriors to reflect an additional observation of observedBase with - * qualityScore. - * - * @param observedBase observed base - * @param qualityScore base quality - * @param read SAM read - * @param offset offset on read - * @return 1 if the base was considered good enough to add to the likelihoods (not Q0 or 'N', for example) - */ - public int add(byte observedBase, byte qualityScore, SAMRecord read, int offset) { - FourBaseLikelihoods fbl = computeLog10Likelihoods(observedBase, qualityScore, read, offset); - if ( fbl == null ) - return 0; - - for ( byte base : BaseUtils.BASES ) { - double likelihood = fbl.getLikelihood(base); - log10Likelihoods[BaseUtils.simpleBaseToBaseIndex(base)] += likelihood; - } - - if ( verbose ) { - for ( byte base : BaseUtils.BASES ) { System.out.printf("%s\t", (char)base); } - System.out.println(); - for ( byte base : BaseUtils.BASES ) { System.out.printf("%.2f\t", log10Likelihoods[BaseUtils.simpleBaseToBaseIndex(base)]); } - System.out.println(); - } - - return 1; - } - - /** - * Updates likelihoods and posteriors to reflect an additional observation of observedBase with - * qualityScore. - * - * @param observedBase observed base - * @param qualityScore base quality - * @param read SAM read - * @param offset offset on read - * @return likelihoods for this observation or null if the base was not considered good enough to add to the likelihoods (Q0 or 'N', for example) - */ - public FourBaseLikelihoods computeLog10Likelihoods(byte observedBase, byte qualityScore, SAMRecord read, int offset) { - if ( badBase(observedBase) ) { - return null; - } - - try { - if ( qualityScore > getMinQScoreToInclude() ) { - - FourBaseLikelihoods fbl = (FourBaseLikelihoods)this.clone(); - fbl.log10Likelihoods = zeros.clone(); - - for ( byte base : BaseUtils.BASES ) { - double likelihood = log10PofObservingBaseGivenChromosome(observedBase, base, qualityScore, read, offset); - - if ( verbose ) { - boolean fwdStrand = ! read.getReadNegativeStrandFlag(); - System.out.printf(" L(%c | b=%s, Q=%d, S=%s) = %f / %f%n", - observedBase, base, qualityScore, fwdStrand ? "+" : "-", pow(10,likelihood) * 100, likelihood); - } - - fbl.log10Likelihoods[BaseUtils.simpleBaseToBaseIndex(base)] = likelihood; - } - - return fbl; - } - } catch ( CloneNotSupportedException e ) { - throw new RuntimeException(e); - } - - return null; - } - - - // ----------------------------------------------------------------------------------------------------------------- - // - // - // helper routines - // - // - // ----------------------------------------------------------------------------------------------------------------- - - /** - * Returns true when the observedBase is considered bad and shouldn't be processed by this object. A base - * is considered bad if: - * - * Criterion 1: observed base isn't a A,C,T,G or lower case equivalent - * - * @param observedBase observed base - * @return true if the base is a bad base - */ - private boolean badBase(byte observedBase) { - return BaseUtils.simpleBaseToBaseIndex(observedBase) == -1; - } - - /** - * Return a string representation of this object in a moderately usable form - * - * @return string representation - */ - public String toString() { - double sum = 0; - StringBuilder s = new StringBuilder(); - for ( byte base : BaseUtils.BASES ) { - int baseIndex = BaseUtils.simpleBaseToBaseIndex(base); - s.append(String.format("%s %.10f ", base, log10Likelihoods[baseIndex])); - sum += Math.pow(10, log10Likelihoods[baseIndex]); - } - s.append(String.format(" %f", sum)); - return s.toString(); - } - - // in general, we don't care about the platform index; EmpiricalSubstitutionProbabilities overloads this - public int getReadSequencerPlatformIndex( SAMRecord read ) { - return 0; - } - - // ----------------------------------------------------------------------------------------------------------------- - // - // - // Validation routines - // - // - // ----------------------------------------------------------------------------------------------------------------- - - public boolean validate() { - return validate(true); - } - - public boolean validate(boolean throwException) { - try { - - for ( byte base : BaseUtils.BASES ) { - - int i = BaseUtils.simpleBaseToBaseIndex(base); - if ( ! MathUtils.wellFormedDouble(log10Likelihoods[i]) || ! MathUtils.isNegativeOrZero(log10Likelihoods[i]) ) { - String bad = String.format("Likelihood %f is badly formed", log10Likelihoods[i]); - throw new IllegalStateException(String.format("At %s: %s", base, bad)); - } - } - } catch ( IllegalStateException e ) { - if ( throwException ) - throw new RuntimeException(e); - else - return false; - } - - return true; - } - - // ----------------------------------------------------------------------------------------------------------------- - // - // - // Hearty math calculations follow - // - // -- these should not be messed with unless you know what you are doing - // - // ----------------------------------------------------------------------------------------------------------------- - - /** - * - * @param observedBase observed base - * @param chromBase target base - * @param qual base quality - * @param read SAM read - * @param offset offset on read - * @return log10 likelihood - */ - - protected double log10PofObservingBaseGivenChromosome(byte observedBase, byte chromBase, byte qual, SAMRecord read, int offset) { - if (qual == 0) { // zero quals are wrong - throw new RuntimeException(String.format("Unexpected Q0 base discovered in log10PofObservingBaseGivenChromosome: %c %s %d at %d in %s", - observedBase, chromBase, qual, offset, read)); - } - - double logP; - - if ( observedBase == chromBase ) { - // the base is consistent with the chromosome -- it's 1 - e - //logP = oneMinusData[qual]; - double e = pow(10, (qual / -10.0)); - logP = log10(1.0 - e); - } else { - // the base is inconsistent with the chromosome -- it's e * P(chromBase | observedBase is an error) - logP = qual / -10.0 + log10PofTrueBaseGivenMiscall(observedBase, chromBase, read, offset); - } - - //System.out.printf("%c %c %d => %f%n", observedBase, chromBase, qual, logP); - return logP; - } - - /** - * Must be overridden by concrete subclasses - * - * @param observedBase observed base - * @param chromBase target base - * @param read SAM read - * @param offset offset on read - * @return log10 likelihood - */ - protected abstract double log10PofTrueBaseGivenMiscall(byte observedBase, byte chromBase, SAMRecord read, int offset); - - // - // Constant static data - // - private final static double[] zeros = new double[BaseUtils.BASES.length]; - - static { - for ( byte base : BaseUtils.BASES ) { - zeros[BaseUtils.simpleBaseToBaseIndex(base)] = 0.0; - } - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/FourBaseLikelihoodsFactory.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/FourBaseLikelihoodsFactory.java deleted file mode 100755 index a2cea355c..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/FourBaseLikelihoodsFactory.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2010. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import static org.broadinstitute.sting.gatk.walkers.genotyper.BaseMismatchModel.*; - -public class FourBaseLikelihoodsFactory { - //private FourBaseProbabilitiesFactory() {} // cannot be instantiated - - public static BaseMismatchModel getBaseMismatchModel(final String name) { - return valueOf(name); - } - - public static BaseMismatchModel getBaseMismatchModel(final FourBaseLikelihoods m) { - if ( m instanceof ThreeStateErrorProbabilities) - return THREE_STATE; - else if ( m instanceof EmpiricalSubstitutionProbabilities) - return EMPIRICAL; - - throw new RuntimeException("Unexpected BaseMismatchModel " + m.getClass()); - - } - /** - * General and correct way to create FourBaseLikelihood objects for arbitrary base mismatching models - * - * @param m model - * @param pl default platform - * @return new 4-base model - */ - public static FourBaseLikelihoods makeFourBaseLikelihoods(BaseMismatchModel m, - EmpiricalSubstitutionProbabilities.SequencerPlatform pl ) { - switch ( m ) { - case THREE_STATE: return new ThreeStateErrorProbabilities(); - case EMPIRICAL: return new EmpiricalSubstitutionProbabilities(pl); - default: throw new RuntimeException("Unexpected BaseMismatchModel " + m); - } - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 3bdfff71d..596a6be4c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -87,7 +87,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC ReadBackedPileup pileup = sample.getValue().getContext(contextType).getBasePileup(); // create the GenotypeLikelihoods object - DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods(UAC.baseModel, (DiploidSNPGenotypePriors)priors, UAC.defaultPlatform); + DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors); int nGoodBases = GL.add(pileup, true, true); if ( nGoodBases == 0 ) continue; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ThreeStateErrorProbabilities.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ThreeStateErrorProbabilities.java deleted file mode 100755 index a39990340..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ThreeStateErrorProbabilities.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2010. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import static java.lang.Math.log10; - -import net.sf.samtools.SAMRecord; - -public class ThreeStateErrorProbabilities extends FourBaseLikelihoods { - // - // forwarding constructors -- don't do anything at all - // - public ThreeStateErrorProbabilities() { super(); } - - /** - * Cloning of the object - * @return clone - * @throws CloneNotSupportedException - */ - protected Object clone() throws CloneNotSupportedException { - return super.clone(); - } - - /** - * Simple log10(3) cached value - */ - protected static final double log103 = log10(3.0); - - /** - * - * @param observedBase observed base - * @param chromBase target base - * @param read SAM read - * @param offset offset on read - * @return log10 likelihood - */ - protected double log10PofTrueBaseGivenMiscall(byte observedBase, byte chromBase, SAMRecord read, int offset) { - return -log103; // equivalent to e / 3 model - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 079290532..9f35eb157 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -38,9 +38,6 @@ public class UnifiedArgumentCollection { @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false) public AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT; - @Argument(fullName = "base_model", shortName = "bm", doc = "Base substitution model to employ when using the SNP Genotype Likelihoods model -- EMPIRICAL is the recommended default, but it's possible to select the THREE_STATE model for comparison purposes", required = false) - public BaseMismatchModel baseModel = BaseMismatchModel.EMPIRICAL; - @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false) public Double heterozygosity = DiploidSNPGenotypePriors.HUMAN_HETEROZYGOSITY; @@ -72,10 +69,6 @@ public class UnifiedArgumentCollection { @Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false) public String ASSUME_SINGLE_SAMPLE = null; - @Hidden - @Argument(fullName = "platform", shortName = "pl", doc = "Causes the genotyper to assume that reads without PL header TAG are this platform. Defaults to null, indicating that the system will throw a runtime exception when such reads are detected", required = false) - public EmpiricalSubstitutionProbabilities.SequencerPlatform defaultPlatform = null; - // control the various parameters to be used @Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false) @@ -98,13 +91,11 @@ public class UnifiedArgumentCollection { UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); uac.GLmodel = GLmodel; - uac.baseModel = baseModel; uac.heterozygosity = heterozygosity; uac.GENOTYPE_MODE = GENOTYPE_MODE; uac.ALL_BASES_MODE = ALL_BASES_MODE; uac.NO_SLOD = NO_SLOD; uac.ASSUME_SINGLE_SAMPLE = ASSUME_SINGLE_SAMPLE; - uac.defaultPlatform = defaultPlatform; uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING; uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING; uac.TRIGGER_CONFIDENCE_FOR_CALLING = TRIGGER_CONFIDENCE_FOR_CALLING; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculateBaseLikelihoodsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculateBaseLikelihoodsWalker.java index 8a414969e..da0e23759 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculateBaseLikelihoodsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculateBaseLikelihoodsWalker.java @@ -114,7 +114,7 @@ public class CalculateBaseLikelihoodsWalker extends LocusWalker>{ } //Calculate posterior probabilities - DiploidSNPGenotypeLikelihoods G = new DiploidSNPGenotypeLikelihoods(BaseMismatchModel.THREE_STATE); + DiploidSNPGenotypeLikelihoods G = new DiploidSNPGenotypeLikelihoods(); SAMRecord read; int offset; char base; byte qual; int mapquality; String readname; //Check for bad bases and ensure mapping quality myself. This works. diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 82fe5380a..442e950cd 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -25,7 +25,7 @@ public class public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("05359047d4d914c9740466c386a1addc")); + Arrays.asList("c4c75b08bb49fa404403c01198269133")); executeTest("testMultiSamplePilot1", spec); } @@ -33,7 +33,7 @@ public class public void testMultiSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1, - Arrays.asList("755f6abfca9db358a839b1ee2d35c5ca")); + Arrays.asList("17cea9547213f3fbdd28a8e8c5971564")); executeTest("testMultiSamplePilot2", spec); } @@ -41,7 +41,7 @@ public class public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("59d6bf8a5030119d1ef0a644a76b3212")); + Arrays.asList("43f7691454678e5ff529a1e21ce1c0e6")); executeTest("testSingleSamplePilot2", spec); } @@ -51,7 +51,7 @@ public class // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "6c4f8e5da47fc4346e8385c109ce731b"; + private final static String COMPRESSED_OUTPUT_MD5 = "bb2e7c10d69d526a298947b292d17620"; @Test public void testCompressedOutput() { @@ -78,7 +78,7 @@ public class @Test public void testParallelization() { - String md5 = "f0d9f168a484d48b23b924f388f088b3"; + String md5 = "9a4b3035ba16959d5b553ab70b791751"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, @@ -105,12 +105,12 @@ public class @Test public void testParameter() { HashMap e = new HashMap(); - e.put( "-genotype", "b4ed6bd0129088a9ab5effadcbbe8469" ); - e.put( "-all_bases", "4bdfede52a766574ae9638ad3012bce5" ); - e.put( "--min_base_quality_score 26", "8123a3ca9a7b83a4afacabb33f157613" ); - e.put( "--min_mapping_quality_score 26", "6e79b6386624b537398e9971bdc5c6f4" ); - e.put( "--max_mismatches_in_40bp_window 5", "b5f9af45dfc8e168bba619d158dd56a4" ); - e.put( "--p_nonref_model GRID_SEARCH", "e57cfae0e2caf42172bc7c466d8ff004" ); + e.put( "-genotype", "01f3d6d0e18267bfb7dc0331d4cb798d" ); + e.put( "-all_bases", "a2ff7f6c232e9210607e0c918da4bc61" ); + e.put( "--min_base_quality_score 26", "3af28da44a4bcfa7385797795ea8fed1" ); + e.put( "--min_mapping_quality_score 26", "8a61e97afa50571ce21cd39e72aa840f" ); + e.put( "--max_mismatches_in_40bp_window 5", "29f53ab510c8d65b75436f1fc41a9321" ); + e.put( "--p_nonref_model GRID_SEARCH", "00593066022525ed9e3bad2c009a7d2a" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -124,12 +124,12 @@ public class public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("e57cfae0e2caf42172bc7c466d8ff004")); + Arrays.asList("00593066022525ed9e3bad2c009a7d2a")); executeTest("testConfidence1", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("4440f676bb27c345953b8aaa1e57a1ed")); + Arrays.asList("d32899eb5817ad451a84a69a16edfe67")); executeTest("testConfidence2", spec2); } @@ -141,8 +141,8 @@ public class @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "f92075cf201ecfe7161b9646195348a5" ); - e.put( 1.0 / 1850, "a428671a373246b20eabd66911e64612" ); + e.put( 0.01, "76c75d43c19086a30f99098305d108eb" ); + e.put( 1.0 / 1850, "12aee2a5aab5c604b02eee07b9ad0c15" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -152,25 +152,6 @@ public class } } - // -------------------------------------------------------------------------------------------------------------- - // - // testing other base calling models - // - // -------------------------------------------------------------------------------------------------------------- - - @Test - public void testOtherBaseCallModel() { - HashMap e = new HashMap(); - e.put( "three_state", "e06dd3becb9bfd083c4706bc0230b8e9" ); - - for ( Map.Entry entry : e.entrySet() ) { - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000 -bm " + entry.getKey(), 1, - Arrays.asList(entry.getValue())); - executeTest(String.format("testOtherBaseCallModel[%s]", entry.getKey()), spec); - } - } - // -------------------------------------------------------------------------------------------------------------- // // testing calls with SLX, 454, and SOLID data @@ -184,7 +165,7 @@ public class " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("878a76dfd83f1c12dd68dc5a8bdfa483")); + Arrays.asList("bde6ecc6ca31cbfc4d08ca65887ed842")); executeTest(String.format("testMultiTechnologies"), spec); }