3 big changes that all kill the integration tests: 1. Don't cap the PLs by 255 anymore. 2. Move over to the 3state model as the only available base model for UG (no more base transition tables). 3. New QD implementation when GLs/PLs are available.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4846 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-12-15 16:24:28 +00:00
parent 46cd227613
commit 5c0b66cb7c
11 changed files with 172 additions and 950 deletions

View File

@ -1,13 +1,18 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.GenotypeLikelihoods;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFConstants;
import org.broad.tribble.vcf.VCFHeaderLineType;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import java.util.Map;
import java.util.HashMap;
@ -15,7 +20,7 @@ import java.util.List;
import java.util.Arrays;
public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {
public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
@ -25,12 +30,40 @@ public class QualByDepth extends AnnotationByDepth implements StandardAnnotation
if ( genotypes == null || genotypes.size() == 0 )
return null;
//double QbyD = genotypeQualByDepth(genotypes, stratifiedContexts);
int qDepth = annotationByVariantDepth(genotypes, stratifiedContexts);
if ( qDepth == 0 )
double qual = 0.0;
int depth = 0;
for ( Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) {
// we care only about variant calls with likelihoods
if ( genotype.getValue().isHomRef() )
continue;
StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey());
if ( context == null )
continue;
depth += context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size();
if ( genotype.getValue().hasLikelihoods() ) {
GenotypeLikelihoods GLs = genotype.getValue().getLikelihoods();
double[] likelihoods = GLs.getAsVector();
if ( GLs.getKey() == VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY ) {
for (int i = 0; i < likelihoods.length; i++)
likelihoods[i] /= -10.0;
}
qual += 10.0 * getQual(likelihoods);
}
}
if ( depth == 0 )
return null;
double QbyD = 10.0 * vc.getNegLog10PError() / (double)qDepth;
if ( qual == 0.0 )
qual = 10.0 * vc.getNegLog10PError();
double QbyD = qual / (double)depth;
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%.2f", QbyD));
return map;
@ -40,4 +73,17 @@ public class QualByDepth extends AnnotationByDepth implements StandardAnnotation
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); }
private double getQual(double[] GLs) {
// normalize so that we don't have precision issues
double[] adjustedLikelihoods = new double[GLs.length];
double maxValue = Utils.findMaxEntry(GLs);
for (int i = 0; i < GLs.length; i++)
adjustedLikelihoods[i] = GLs[i] - maxValue;
// AB + BB (in real space)
double variantWeight = Math.pow(10, adjustedLikelihoods[1]) + Math.pow(10, adjustedLikelihoods[2]);
// (AB + BB) / AA (in log space)
return Math.log10(variantWeight) - adjustedLikelihoods[0];
}
}

View File

@ -32,6 +32,9 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import java.util.HashSet;
import java.util.Set;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
@ -70,8 +73,8 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
protected final static int FIXED_PLOIDY = 2;
protected final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
protected final static double ploidyAdjustment = log10(FIXED_PLOIDY);
protected final static double log10_3 = log10(3.0);
protected boolean enableCacheFlag = true;
protected boolean VERBOSE = false;
//
@ -82,50 +85,23 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
protected DiploidSNPGenotypePriors priors = null;
protected FourBaseLikelihoods fourBaseLikelihoods = null;
/**
* Create a new GenotypeLikelhoods object with flat priors for each diploid genotype
*
* @param m base model
*/
public DiploidSNPGenotypeLikelihoods(BaseMismatchModel m) {
public DiploidSNPGenotypeLikelihoods() {
this.priors = new DiploidSNPGenotypePriors();
initialize(m, null);
}
/**
* Create a new GenotypeLikelhoods object with flat priors for each diploid genotype
*
* @param m base model
* @param pl default platform
*/
public DiploidSNPGenotypeLikelihoods(BaseMismatchModel m, EmpiricalSubstitutionProbabilities.SequencerPlatform pl) {
this.priors = new DiploidSNPGenotypePriors();
initialize(m, pl);
setToZero();
}
/**
* Create a new GenotypeLikelhoods object with given priors for each diploid genotype
*
* @param m base model
* @param priors priors
*/
public DiploidSNPGenotypeLikelihoods(BaseMismatchModel m, DiploidSNPGenotypePriors priors) {
public DiploidSNPGenotypeLikelihoods(DiploidSNPGenotypePriors priors) {
this.priors = priors;
initialize(m, null);
}
/**
* Create a new GenotypeLikelhoods object with given priors for each diploid genotype
*
* @param m base model
* @param priors priors
* @param pl default platform
*/
public DiploidSNPGenotypeLikelihoods(BaseMismatchModel m, DiploidSNPGenotypePriors priors, EmpiricalSubstitutionProbabilities.SequencerPlatform pl) {
this.priors = priors;
initialize(m, pl);
setToZero();
}
/**
@ -138,37 +114,14 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
c.priors = priors;
c.log10Likelihoods = log10Likelihoods.clone();
c.log10Posteriors = log10Posteriors.clone();
c.fourBaseLikelihoods = (FourBaseLikelihoods)fourBaseLikelihoods.clone();
return c;
}
protected void initialize(BaseMismatchModel m, EmpiricalSubstitutionProbabilities.SequencerPlatform pl) {
fourBaseLikelihoods = FourBaseLikelihoodsFactory.makeFourBaseLikelihoods(m, pl);
setToZero();
}
protected void setToZero() {
log10Likelihoods = zeros.clone(); // likelihoods are all zeros
log10Likelihoods = genotypeZeros.clone(); // likelihoods are all zeros
log10Posteriors = priors.getPriors().clone(); // posteriors are all the priors
}
public void setVerbose(boolean v) {
VERBOSE = v;
fourBaseLikelihoods.setVerbose(v);
}
public boolean isVerbose() {
return VERBOSE;
}
public int getMinQScoreToInclude() {
return fourBaseLikelihoods.getMinQScoreToInclude();
}
public void setMinQScoreToInclude(int minQScoreToInclude) {
fourBaseLikelihoods.setMinQScoreToInclude(minQScoreToInclude);
}
/**
* Returns an array of log10 likelihoods for each genotype, indexed by DiploidGenotype.ordinal values()
* @return likelihoods array
@ -257,7 +210,7 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
*/
public void setPriors(DiploidSNPGenotypePriors priors) {
this.priors = priors;
log10Posteriors = zeros.clone();
log10Posteriors = genotypeZeros.clone();
for ( DiploidGenotype g : DiploidGenotype.values() ) {
int i = g.ordinal();
log10Posteriors[i] = priors.getPriors()[i] + log10Likelihoods[i];
@ -273,22 +226,6 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
return getPriors()[g.ordinal()];
}
/**
* Simple function to overload to control the caching of genotype likelihood outputs.
* By default the function trues true -- do enable caching. If you are experimenting with an
* complex calcluation of P(B | G) and caching might not work correctly for you, overload this
* function and return false, so the super() object won't try to cache your GL calculations.
*
* @return true if caching should be enabled, false otherwise
*/
public boolean cacheIsEnabled() {
return enableCacheFlag;
}
public void setEnableCacheFlag(boolean enable) {
enableCacheFlag = enable;
}
public int add(ReadBackedPileup pileup) {
return add(pileup, false, false);
}
@ -306,28 +243,45 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
public int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) {
int n = 0;
// todo: first validate that my changes were good by passing in fragments representing just a single read...
//Set<PerFragmentPileupElement> fragments = new HashSet<PerFragmentPileupElement>();
for ( PileupElement p : pileup ) {
if ( usableBase(p, ignoreBadBases) ) {
/****
Set<PileupElement> fragment = new HashSet<PileupElement>();
fragment.add(p);
fragments.add(new PerFragmentPileupElement(fragment));
}
}
// for each fragment, add to the likelihoods
for ( PerFragmentPileupElement fragment : fragments ) {
n += add(fragment, capBaseQualsAtMappingQual);
}
****/
byte qual = capBaseQualsAtMappingQual ? (byte)Math.min((int)p.getQual(), p.getMappingQual()) : p.getQual();
n += add(p.getBase(), qual, p.getRead(), p.getOffset());
}
}
return n;
}
public int add(byte observedBase, byte qualityScore, SAMRecord read, int offset) {
// public int add(PerFragmentPileupElement fragment, boolean capBaseQualsAtMappingQual) {
// Handle caching if requested. Just look up the cached result if its available, or compute and store it
public int add(byte observedBase, byte qualityScore, SAMRecord read, int offset) {
if ( qualityScore == 0 ) { // zero quals are wrong
return 0;
}
// Just look up the cached result if it's available, or compute and store it
DiploidSNPGenotypeLikelihoods gl;
if ( cacheIsEnabled() ) {
if ( ! inCache( observedBase, qualityScore, FIXED_PLOIDY, read) ) {
gl = calculateCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read, offset);
} else {
gl = getCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read);
}
if ( ! inCache( observedBase, qualityScore, FIXED_PLOIDY, read) ) {
gl = calculateCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read, offset);
} else {
gl = calculateGenotypeLikelihoods(observedBase, qualityScore, read, offset);
gl = getCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read);
}
// for bad bases, there are no likelihoods
@ -352,7 +306,7 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
return 1;
}
static DiploidSNPGenotypeLikelihoods[][][][][][] CACHE = new DiploidSNPGenotypeLikelihoods[BaseMismatchModel.values().length][EmpiricalSubstitutionProbabilities.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE+1][MAX_PLOIDY][2];
static DiploidSNPGenotypeLikelihoods[][][][] CACHE = new DiploidSNPGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE+1][MAX_PLOIDY][2];
protected boolean inCache( byte observedBase, byte qualityScore, int ploidy, SAMRecord read) {
return getCache(CACHE, observedBase, qualityScore, ploidy, read) != null;
@ -372,36 +326,29 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
return gl;
}
protected void setCache( DiploidSNPGenotypeLikelihoods[][][][][][] cache,
protected void setCache( DiploidSNPGenotypeLikelihoods[][][][] cache,
byte observedBase, byte qualityScore, int ploidy,
SAMRecord read, DiploidSNPGenotypeLikelihoods val ) {
int m = FourBaseLikelihoodsFactory.getBaseMismatchModel(fourBaseLikelihoods).ordinal();
int a = fourBaseLikelihoods.getReadSequencerPlatformIndex(read);
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
int j = qualityScore;
int k = ploidy;
int x = strandIndex(! read.getReadNegativeStrandFlag() );
cache[m][a][i][j][k][x] = val;
cache[i][j][k][x] = val;
}
protected DiploidSNPGenotypeLikelihoods getCache( DiploidSNPGenotypeLikelihoods[][][][][][] cache,
protected DiploidSNPGenotypeLikelihoods getCache( DiploidSNPGenotypeLikelihoods[][][][] cache,
byte observedBase, byte qualityScore, int ploidy, SAMRecord read) {
int m = FourBaseLikelihoodsFactory.getBaseMismatchModel(fourBaseLikelihoods).ordinal();
int a = fourBaseLikelihoods.getReadSequencerPlatformIndex(read);
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
int j = qualityScore;
int k = ploidy;
int x = strandIndex(! read.getReadNegativeStrandFlag() );
return cache[m][a][i][j][k][x];
return cache[i][j][k][x];
}
protected DiploidSNPGenotypeLikelihoods calculateGenotypeLikelihoods(byte observedBase, byte qualityScore, SAMRecord read, int offset) {
FourBaseLikelihoods fbl = fourBaseLikelihoods.computeLog10Likelihoods(observedBase, qualityScore, read, offset);
if ( fbl == null )
return null;
double[] log10FourBaseLikelihoods = computeLog10Likelihoods(observedBase, qualityScore, read);
double[] fbLikelihoods = fbl.getLog10Likelihoods();
try {
DiploidSNPGenotypeLikelihoods gl = (DiploidSNPGenotypeLikelihoods)this.clone();
@ -412,8 +359,8 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
// todo assumes ploidy is 2 -- should be generalized. Obviously the below code can be turned into a loop
double p_base = 0.0;
p_base += pow(10, fbLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base1)] - ploidyAdjustment);
p_base += pow(10, fbLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base2)] - ploidyAdjustment);
p_base += pow(10, log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base1)] - ploidyAdjustment);
p_base += pow(10, log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(g.base2)] - ploidyAdjustment);
double likelihood = log10(p_base);
gl.log10Likelihoods[g.ordinal()] += likelihood;
@ -434,6 +381,57 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
}
}
/**
* Updates likelihoods and posteriors to reflect an additional observation of observedBase with
* qualityScore.
*
* @param observedBase observed base
* @param qualityScore base quality
* @param read SAM read
* @return likelihoods for this observation or null if the base was not considered good enough to add to the likelihoods (Q0 or 'N', for example)
*/
protected double[] computeLog10Likelihoods(byte observedBase, byte qualityScore, SAMRecord read) {
double[] log10FourBaseLikelihoods = baseZeros.clone();
for ( byte base : BaseUtils.BASES ) {
double likelihood = log10PofObservingBaseGivenChromosome(observedBase, base, qualityScore);
if ( VERBOSE ) {
boolean fwdStrand = ! read.getReadNegativeStrandFlag();
System.out.printf(" L(%c | b=%s, Q=%d, S=%s) = %f / %f%n",
observedBase, base, qualityScore, fwdStrand ? "+" : "-", pow(10,likelihood) * 100, likelihood);
}
log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(base)] = likelihood;
}
return log10FourBaseLikelihoods;
}
/**
*
* @param observedBase observed base
* @param chromBase target base
* @param qual base quality
* @return log10 likelihood
*/
protected double log10PofObservingBaseGivenChromosome(byte observedBase, byte chromBase, byte qual) {
double logP;
if ( observedBase == chromBase ) {
// the base is consistent with the chromosome -- it's 1 - e
//logP = oneMinusData[qual];
double e = pow(10, (qual / -10.0));
logP = log10(1.0 - e);
} else {
// the base is inconsistent with the chromosome -- it's e * P(chromBase | observedBase is an error)
logP = qual / -10.0 + (-log10_3);
}
//System.out.printf("%c %c %d => %f%n", observedBase, chromBase, qual, logP);
return logP;
}
// -----------------------------------------------------------------------------------------------------------------
//
@ -535,11 +533,15 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
//
// Constant static data
//
final static double[] zeros = new double[DiploidGenotype.values().length];
private final static double[] genotypeZeros = new double[DiploidGenotype.values().length];
private final static double[] baseZeros = new double[BaseUtils.BASES.length];
static {
for ( DiploidGenotype g : DiploidGenotype.values() ) {
zeros[g.ordinal()] = 0.0;
genotypeZeros[g.ordinal()] = 0.0;
}
for ( byte base : BaseUtils.BASES ) {
baseZeros[BaseUtils.simpleBaseToBaseIndex(base)] = 0.0;
}
}
}

View File

@ -1,301 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.BaseUtils;
import static java.lang.Math.log10;
import java.util.TreeMap;
import java.util.EnumMap;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMReadGroupRecord;
public class EmpiricalSubstitutionProbabilities extends FourBaseLikelihoods {
// --------------------------------------------------------------------------------------------------------------
//
// Static methods to manipulate machine platforms
//
// --------------------------------------------------------------------------------------------------------------
public enum SequencerPlatform {
SOLEXA, // Solexa / Illumina
ROCHE454, // 454
SOLID, // SOLiD
CG, // Complete Genomics
UNKNOWN // No idea -- defaulting to 1/3
}
private static TreeMap<String, SequencerPlatform> PLFieldToSequencerPlatform = new TreeMap<String, SequencerPlatform>();
private static void bind(String s, SequencerPlatform x) {
PLFieldToSequencerPlatform.put(s, x);
PLFieldToSequencerPlatform.put(s.toUpperCase(), x);
PLFieldToSequencerPlatform.put(s.toLowerCase(), x);
}
//
// Static list of platforms supported by this system.
//
static {
bind("LS454", SequencerPlatform.ROCHE454);
bind("454", SequencerPlatform.ROCHE454);
bind("ILLUMINA", SequencerPlatform.SOLEXA);
bind("SOLEXA", SequencerPlatform.SOLEXA);
bind("SOLID", SequencerPlatform.SOLID);
bind("ABI_SOLID", SequencerPlatform.SOLID);
bind("CG", SequencerPlatform.CG);
}
public static SequencerPlatform standardizeSequencerPlatform( final String sequencerString ) {
String lcSequencerString = sequencerString == null ? null : sequencerString.toLowerCase();
if ( sequencerString != null && PLFieldToSequencerPlatform.containsKey(lcSequencerString) ) {
return PLFieldToSequencerPlatform.get(lcSequencerString);
} else {
return SequencerPlatform.UNKNOWN;
}
}
private static ThreadLocal<SAMRecord> lastReadForPL = new ThreadLocal<SAMRecord>();
private static ThreadLocal<SequencerPlatform> plOfLastRead = new ThreadLocal<SequencerPlatform>();
public static SequencerPlatform getReadSequencerPlatform( SAMRecord read ) {
if ( lastReadForPL.get() != read ) {
lastReadForPL.set(read);
SAMReadGroupRecord readGroup = read.getReadGroup();
final String platformName = readGroup == null ? null : readGroup.getPlatform();
plOfLastRead.set(standardizeSequencerPlatform(platformName));
}
return plOfLastRead.get();
}
public int getReadSequencerPlatformIndex( SAMRecord read ) {
return getReadSequencerPlatform(read).ordinal();
}
// --------------------------------------------------------------------------------------------------------------
//
// Static methods to get at the transition tables themselves
//
// --------------------------------------------------------------------------------------------------------------
/**
* A matrix of value i x j -> log10(p) where
*
* i - byte of the miscalled base (i.e., A)
* j - byte of the presumed true base (i.e., C)
* log10p - empirical probability p that A is actually C
*
* The table is available for each technology
*/
private final static EnumMap<SequencerPlatform, double[][]> log10pTrueGivenMiscall = new EnumMap<SequencerPlatform, double[][]>(SequencerPlatform.class);
private static void addMisCall(final SequencerPlatform pl, byte miscalledBase, byte trueBase, double p) {
if ( ! log10pTrueGivenMiscall.containsKey(pl) )
log10pTrueGivenMiscall.put(pl, new double[4][4]);
double[][] misCallProbs = log10pTrueGivenMiscall.get(pl);
int i = BaseUtils.simpleBaseToBaseIndex(miscalledBase);
int j = BaseUtils.simpleBaseToBaseIndex(trueBase);
misCallProbs[i][j] = log10(p);
}
private static double getProbMiscallIsBase(SequencerPlatform pl, byte miscalledBase, byte trueBase) {
int i = BaseUtils.simpleBaseToBaseIndex(miscalledBase);
int j = BaseUtils.simpleBaseToBaseIndex(trueBase);
double logP = log10pTrueGivenMiscall.get(pl)[i][j];
if ( logP == 0.0 )
throw new RuntimeException(String.format("Bad miscall base request miscalled=%c true=%c", miscalledBase, trueBase));
else
return logP;
}
private static void addSolexa() {
SequencerPlatform pl = SequencerPlatform.SOLEXA;
addMisCall(pl, BaseUtils.A, BaseUtils.C, 57.7/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.G, 17.1/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.T, 25.2/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.A, 34.9/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.G, 11.3/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.T, 53.9/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.A, 31.9/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.C, 5.1/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.T, 63.0/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.A, 45.8/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.C, 22.1/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.G, 32.0/100.0);
}
private static void addSOLiD() {
SequencerPlatform pl = SequencerPlatform.SOLID;
addMisCall(pl, BaseUtils.A, BaseUtils.C, 18.7/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.G, 42.5/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.T, 38.7/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.A, 27.0/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.G, 18.9/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.T, 54.1/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.A, 61.0/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.C, 15.7/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.T, 23.2/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.A, 40.5/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.C, 34.3/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.G, 25.2/100.0);
}
private static void add454() {
SequencerPlatform pl = SequencerPlatform.ROCHE454;
addMisCall(pl, BaseUtils.A, BaseUtils.C, 23.2/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.G, 42.6/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.T, 34.3/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.A, 19.7/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.G, 8.4/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.T, 71.9/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.A, 71.5/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.C, 6.6/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.T, 21.9/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.A, 43.8/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.C, 37.8/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.G, 18.5/100.0);
}
private static void addCG() {
SequencerPlatform pl = SequencerPlatform.CG;
addMisCall(pl, BaseUtils.A, BaseUtils.C, 28.2/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.G, 28.7/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.T, 43.1/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.A, 29.8/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.G, 18.6/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.T, 51.6/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.A, 32.5/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.C, 21.4/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.T, 46.1/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.A, 42.6/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.C, 34.1/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.G, 23.3/100.0);
}
private static void addUnknown() {
SequencerPlatform pl = SequencerPlatform.UNKNOWN;
for ( byte b1 : BaseUtils.BASES ) {
for ( byte b2 : BaseUtils.BASES ) {
if ( b1 != b2 )
addMisCall(pl, b1, b2, 1.0/3.0);
}
}
}
static {
addSolexa();
add454();
addSOLiD();
addCG();
addUnknown();
}
// --------------------------------------------------------------------------------------------------------------
//
// The actual objects themselves
//
// --------------------------------------------------------------------------------------------------------------
private boolean raiseErrorOnUnknownPlatform = true;
private SequencerPlatform defaultPlatform = SequencerPlatform.UNKNOWN;
//
// forwarding constructors -- don't do anything at all
//
public EmpiricalSubstitutionProbabilities() { super(); }
public EmpiricalSubstitutionProbabilities(boolean raiseErrorOnUnknownPlatform) {
super();
this.raiseErrorOnUnknownPlatform = raiseErrorOnUnknownPlatform;
}
public EmpiricalSubstitutionProbabilities(SequencerPlatform assumeUnknownPlatformsAreThis) {
super();
if ( assumeUnknownPlatformsAreThis != null ) {
raiseErrorOnUnknownPlatform = false;
defaultPlatform = assumeUnknownPlatformsAreThis;
}
}
/**
* Cloning of the object
* @return clone
* @throws CloneNotSupportedException
*/
protected Object clone() throws CloneNotSupportedException {
return super.clone();
}
// -----------------------------------------------------------------------------------------------------------------
//
//
// calculation of p(B|GT)
//
//
// -----------------------------------------------------------------------------------------------------------------
protected double log10PofTrueBaseGivenMiscall(byte observedBase, byte chromBase, SAMRecord read, int offset) {
boolean fwdStrand = ! read.getReadNegativeStrandFlag();
SequencerPlatform pl = getReadSequencerPlatform(read);
if ( pl == SequencerPlatform.UNKNOWN ) {
if ( raiseErrorOnUnknownPlatform )
throw new RuntimeException("Unknown sequencer platform for read " + read.format() + "; your BAM file is either missing the PL tag for some read groups or an unsupported platform is being used.");
else {
pl = defaultPlatform;
//System.out.printf("Using default platform %s", pl);
}
}
//System.out.printf("%s for %s%n", pl, read);
double log10p;
if ( fwdStrand ) {
log10p = getProbMiscallIsBase(pl, observedBase, chromBase);
} else {
log10p = getProbMiscallIsBase(pl, BaseUtils.simpleComplement(observedBase), BaseUtils.simpleComplement(chromBase));
}
//System.out.printf("p = %f for %s %c %c fwd=%b %d at %s%n", pow(10,log10p), pl, observedBase, chromBase, fwdStrand, offset, read.getReadName() );
return log10p;
}
}

View File

@ -1,373 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.*;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
/**
* Stable, error checking version of strict 4 base likelihoods. Useful for calculating the likelihoods, priors,
* and posteriors given a pile of bases and quality scores (in conjuncion with GenotypeLikelihoods)
*
* Suppose we have bases b1, b2, ..., bN with qualities scores q1, q2, ..., qN. This object
* calculates:
*
* P(b | D) = P(b) * P(D | b)
*
* where
*
* P(D | b) = sum_i log10 P(bi | b)
*
* and
*
* P(bi | b) = 1 - P(error | q1) if bi = b
* = P(error | q1) / 3 if bi != b
*
*
*/
public abstract class FourBaseLikelihoods implements Cloneable {
protected boolean enableCacheFlag = true;
//
// The fundamental data array associated with 4-base likelihoods
//
protected double[] log10Likelihoods = null;
/**
* If true, lots of output will be generated about the Likelihoods at each site
*/
private boolean verbose = false;
/**
* Bases with Q scores below this threshold aren't included in the Likelihood calculation
*/
private int minQScoreToInclude = 0;
/**
* Create a new FourBaseLikelihoods object
*/
public FourBaseLikelihoods() {
log10Likelihoods = zeros.clone(); // Likelihoods are all zeros
}
/**
* Cloning of the object
* @return clone
* @throws CloneNotSupportedException
*/
protected Object clone() throws CloneNotSupportedException {
FourBaseLikelihoods c = (FourBaseLikelihoods)super.clone();
c.log10Likelihoods = log10Likelihoods.clone();
return c;
}
public void setVerbose(boolean v) {
verbose = v;
}
public boolean isVerbose() {
return verbose;
}
public int getMinQScoreToInclude() {
return minQScoreToInclude;
}
public void setMinQScoreToInclude(int minQScoreToInclude) {
this.minQScoreToInclude = minQScoreToInclude;
}
/**
* Returns an array of log10 likelihoods for each base, indexed by BaseUtils.BASES.ordinal values()
* @return probs
*/
public double[] getLog10Likelihoods() {
return log10Likelihoods;
}
/**
* Returns the likelihood associated with a base
* @param base base
* @return log10 likelihood as a double
*/
public double getLog10Likelihood(byte base) {
return getLog10Likelihood(BaseUtils.simpleBaseToBaseIndex(base));
}
public double getLog10Likelihood(int baseIndex) {
return (baseIndex < 0 ? 0.0 : getLog10Likelihoods()[baseIndex]);
}
/**
* Returns an array of likelihoods for each base, indexed by BaseUtils.BASES.ordinal values()
* @return probs
*/
public double[] getLikelihoods() {
double[] probs = new double[4];
for (int i = 0; i < 4; i++)
probs[i] = Math.pow(10, log10Likelihoods[i]);
return probs;
}
/**
* Returns the likelihoods associated with a base
* @param base base
* @return likelihoods as a double
*/
public double getLikelihood(byte base) {
return getLikelihood(BaseUtils.simpleBaseToBaseIndex(base));
}
public double getLikelihood(int baseIndex) {
return (baseIndex < 0 ? 0.0 : Math.pow(10, log10Likelihoods[baseIndex]));
}
// -----------------------------------------------------------------------------------------------------------------
//
//
// add() -- the heart of
//
//
// -----------------------------------------------------------------------------------------------------------------
/**
* Updates likelihoods and posteriors to reflect an additional observation of observedBase with
* qualityScore.
*
* @param observedBase observed base
* @param qualityScore base quality
* @param read SAM read
* @param offset offset on read
* @return 1 if the base was considered good enough to add to the likelihoods (not Q0 or 'N', for example)
*/
public int add(byte observedBase, byte qualityScore, SAMRecord read, int offset) {
FourBaseLikelihoods fbl = computeLog10Likelihoods(observedBase, qualityScore, read, offset);
if ( fbl == null )
return 0;
for ( byte base : BaseUtils.BASES ) {
double likelihood = fbl.getLikelihood(base);
log10Likelihoods[BaseUtils.simpleBaseToBaseIndex(base)] += likelihood;
}
if ( verbose ) {
for ( byte base : BaseUtils.BASES ) { System.out.printf("%s\t", (char)base); }
System.out.println();
for ( byte base : BaseUtils.BASES ) { System.out.printf("%.2f\t", log10Likelihoods[BaseUtils.simpleBaseToBaseIndex(base)]); }
System.out.println();
}
return 1;
}
/**
* Updates likelihoods and posteriors to reflect an additional observation of observedBase with
* qualityScore.
*
* @param observedBase observed base
* @param qualityScore base quality
* @param read SAM read
* @param offset offset on read
* @return likelihoods for this observation or null if the base was not considered good enough to add to the likelihoods (Q0 or 'N', for example)
*/
public FourBaseLikelihoods computeLog10Likelihoods(byte observedBase, byte qualityScore, SAMRecord read, int offset) {
if ( badBase(observedBase) ) {
return null;
}
try {
if ( qualityScore > getMinQScoreToInclude() ) {
FourBaseLikelihoods fbl = (FourBaseLikelihoods)this.clone();
fbl.log10Likelihoods = zeros.clone();
for ( byte base : BaseUtils.BASES ) {
double likelihood = log10PofObservingBaseGivenChromosome(observedBase, base, qualityScore, read, offset);
if ( verbose ) {
boolean fwdStrand = ! read.getReadNegativeStrandFlag();
System.out.printf(" L(%c | b=%s, Q=%d, S=%s) = %f / %f%n",
observedBase, base, qualityScore, fwdStrand ? "+" : "-", pow(10,likelihood) * 100, likelihood);
}
fbl.log10Likelihoods[BaseUtils.simpleBaseToBaseIndex(base)] = likelihood;
}
return fbl;
}
} catch ( CloneNotSupportedException e ) {
throw new RuntimeException(e);
}
return null;
}
// -----------------------------------------------------------------------------------------------------------------
//
//
// helper routines
//
//
// -----------------------------------------------------------------------------------------------------------------
/**
* Returns true when the observedBase is considered bad and shouldn't be processed by this object. A base
* is considered bad if:
*
* Criterion 1: observed base isn't a A,C,T,G or lower case equivalent
*
* @param observedBase observed base
* @return true if the base is a bad base
*/
private boolean badBase(byte observedBase) {
return BaseUtils.simpleBaseToBaseIndex(observedBase) == -1;
}
/**
* Return a string representation of this object in a moderately usable form
*
* @return string representation
*/
public String toString() {
double sum = 0;
StringBuilder s = new StringBuilder();
for ( byte base : BaseUtils.BASES ) {
int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
s.append(String.format("%s %.10f ", base, log10Likelihoods[baseIndex]));
sum += Math.pow(10, log10Likelihoods[baseIndex]);
}
s.append(String.format(" %f", sum));
return s.toString();
}
// in general, we don't care about the platform index; EmpiricalSubstitutionProbabilities overloads this
public int getReadSequencerPlatformIndex( SAMRecord read ) {
return 0;
}
// -----------------------------------------------------------------------------------------------------------------
//
//
// Validation routines
//
//
// -----------------------------------------------------------------------------------------------------------------
public boolean validate() {
return validate(true);
}
public boolean validate(boolean throwException) {
try {
for ( byte base : BaseUtils.BASES ) {
int i = BaseUtils.simpleBaseToBaseIndex(base);
if ( ! MathUtils.wellFormedDouble(log10Likelihoods[i]) || ! MathUtils.isNegativeOrZero(log10Likelihoods[i]) ) {
String bad = String.format("Likelihood %f is badly formed", log10Likelihoods[i]);
throw new IllegalStateException(String.format("At %s: %s", base, bad));
}
}
} catch ( IllegalStateException e ) {
if ( throwException )
throw new RuntimeException(e);
else
return false;
}
return true;
}
// -----------------------------------------------------------------------------------------------------------------
//
//
// Hearty math calculations follow
//
// -- these should not be messed with unless you know what you are doing
//
// -----------------------------------------------------------------------------------------------------------------
/**
*
* @param observedBase observed base
* @param chromBase target base
* @param qual base quality
* @param read SAM read
* @param offset offset on read
* @return log10 likelihood
*/
protected double log10PofObservingBaseGivenChromosome(byte observedBase, byte chromBase, byte qual, SAMRecord read, int offset) {
if (qual == 0) { // zero quals are wrong
throw new RuntimeException(String.format("Unexpected Q0 base discovered in log10PofObservingBaseGivenChromosome: %c %s %d at %d in %s",
observedBase, chromBase, qual, offset, read));
}
double logP;
if ( observedBase == chromBase ) {
// the base is consistent with the chromosome -- it's 1 - e
//logP = oneMinusData[qual];
double e = pow(10, (qual / -10.0));
logP = log10(1.0 - e);
} else {
// the base is inconsistent with the chromosome -- it's e * P(chromBase | observedBase is an error)
logP = qual / -10.0 + log10PofTrueBaseGivenMiscall(observedBase, chromBase, read, offset);
}
//System.out.printf("%c %c %d => %f%n", observedBase, chromBase, qual, logP);
return logP;
}
/**
* Must be overridden by concrete subclasses
*
* @param observedBase observed base
* @param chromBase target base
* @param read SAM read
* @param offset offset on read
* @return log10 likelihood
*/
protected abstract double log10PofTrueBaseGivenMiscall(byte observedBase, byte chromBase, SAMRecord read, int offset);
//
// Constant static data
//
private final static double[] zeros = new double[BaseUtils.BASES.length];
static {
for ( byte base : BaseUtils.BASES ) {
zeros[BaseUtils.simpleBaseToBaseIndex(base)] = 0.0;
}
}
}

View File

@ -1,61 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import static org.broadinstitute.sting.gatk.walkers.genotyper.BaseMismatchModel.*;
public class FourBaseLikelihoodsFactory {
//private FourBaseProbabilitiesFactory() {} // cannot be instantiated
public static BaseMismatchModel getBaseMismatchModel(final String name) {
return valueOf(name);
}
public static BaseMismatchModel getBaseMismatchModel(final FourBaseLikelihoods m) {
if ( m instanceof ThreeStateErrorProbabilities)
return THREE_STATE;
else if ( m instanceof EmpiricalSubstitutionProbabilities)
return EMPIRICAL;
throw new RuntimeException("Unexpected BaseMismatchModel " + m.getClass());
}
/**
* General and correct way to create FourBaseLikelihood objects for arbitrary base mismatching models
*
* @param m model
* @param pl default platform
* @return new 4-base model
*/
public static FourBaseLikelihoods makeFourBaseLikelihoods(BaseMismatchModel m,
EmpiricalSubstitutionProbabilities.SequencerPlatform pl ) {
switch ( m ) {
case THREE_STATE: return new ThreeStateErrorProbabilities();
case EMPIRICAL: return new EmpiricalSubstitutionProbabilities(pl);
default: throw new RuntimeException("Unexpected BaseMismatchModel " + m);
}
}
}

View File

@ -87,7 +87,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
ReadBackedPileup pileup = sample.getValue().getContext(contextType).getBasePileup();
// create the GenotypeLikelihoods object
DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods(UAC.baseModel, (DiploidSNPGenotypePriors)priors, UAC.defaultPlatform);
DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors);
int nGoodBases = GL.add(pileup, true, true);
if ( nGoodBases == 0 )
continue;

View File

@ -1,63 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import static java.lang.Math.log10;
import net.sf.samtools.SAMRecord;
public class ThreeStateErrorProbabilities extends FourBaseLikelihoods {
//
// forwarding constructors -- don't do anything at all
//
public ThreeStateErrorProbabilities() { super(); }
/**
* Cloning of the object
* @return clone
* @throws CloneNotSupportedException
*/
protected Object clone() throws CloneNotSupportedException {
return super.clone();
}
/**
* Simple log10(3) cached value
*/
protected static final double log103 = log10(3.0);
/**
*
* @param observedBase observed base
* @param chromBase target base
* @param read SAM read
* @param offset offset on read
* @return log10 likelihood
*/
protected double log10PofTrueBaseGivenMiscall(byte observedBase, byte chromBase, SAMRecord read, int offset) {
return -log103; // equivalent to e / 3 model
}
}

View File

@ -38,9 +38,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false)
public AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
@Argument(fullName = "base_model", shortName = "bm", doc = "Base substitution model to employ when using the SNP Genotype Likelihoods model -- EMPIRICAL is the recommended default, but it's possible to select the THREE_STATE model for comparison purposes", required = false)
public BaseMismatchModel baseModel = BaseMismatchModel.EMPIRICAL;
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
public Double heterozygosity = DiploidSNPGenotypePriors.HUMAN_HETEROZYGOSITY;
@ -72,10 +69,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false)
public String ASSUME_SINGLE_SAMPLE = null;
@Hidden
@Argument(fullName = "platform", shortName = "pl", doc = "Causes the genotyper to assume that reads without PL header TAG are this platform. Defaults to null, indicating that the system will throw a runtime exception when such reads are detected", required = false)
public EmpiricalSubstitutionProbabilities.SequencerPlatform defaultPlatform = null;
// control the various parameters to be used
@Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
@ -98,13 +91,11 @@ public class UnifiedArgumentCollection {
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
uac.GLmodel = GLmodel;
uac.baseModel = baseModel;
uac.heterozygosity = heterozygosity;
uac.GENOTYPE_MODE = GENOTYPE_MODE;
uac.ALL_BASES_MODE = ALL_BASES_MODE;
uac.NO_SLOD = NO_SLOD;
uac.ASSUME_SINGLE_SAMPLE = ASSUME_SINGLE_SAMPLE;
uac.defaultPlatform = defaultPlatform;
uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING;
uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING;
uac.TRIGGER_CONFIDENCE_FOR_CALLING = TRIGGER_CONFIDENCE_FOR_CALLING;

View File

@ -114,7 +114,7 @@ public class CalculateBaseLikelihoodsWalker extends LocusWalker<Integer, Pair<Lo
//}
//Calculate posterior probabilities
DiploidSNPGenotypeLikelihoods G = new DiploidSNPGenotypeLikelihoods(BaseMismatchModel.THREE_STATE);
DiploidSNPGenotypeLikelihoods G = new DiploidSNPGenotypeLikelihoods();
SAMRecord read; int offset; char base; byte qual; int mapquality; String readname;
//Check for bad bases and ensure mapping quality

View File

@ -353,7 +353,7 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
}
//Calculate posterior probabilities
DiploidSNPGenotypeLikelihoods G = new DiploidSNPGenotypeLikelihoods(BaseMismatchModel.THREE_STATE);
DiploidSNPGenotypeLikelihoods G = new DiploidSNPGenotypeLikelihoods();
SAMRecord read; int offset; char base; byte qual; int mapquality; String readname;
//Check for bad bases and ensure mapping quality myself. This works.

View File

@ -25,7 +25,7 @@ public class
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("05359047d4d914c9740466c386a1addc"));
Arrays.asList("c4c75b08bb49fa404403c01198269133"));
executeTest("testMultiSamplePilot1", spec);
}
@ -33,7 +33,7 @@ public class
public void testMultiSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1,
Arrays.asList("755f6abfca9db358a839b1ee2d35c5ca"));
Arrays.asList("17cea9547213f3fbdd28a8e8c5971564"));
executeTest("testMultiSamplePilot2", spec);
}
@ -41,7 +41,7 @@ public class
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("59d6bf8a5030119d1ef0a644a76b3212"));
Arrays.asList("43f7691454678e5ff529a1e21ce1c0e6"));
executeTest("testSingleSamplePilot2", spec);
}
@ -51,7 +51,7 @@ public class
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "6c4f8e5da47fc4346e8385c109ce731b";
private final static String COMPRESSED_OUTPUT_MD5 = "bb2e7c10d69d526a298947b292d17620";
@Test
public void testCompressedOutput() {
@ -78,7 +78,7 @@ public class
@Test
public void testParallelization() {
String md5 = "f0d9f168a484d48b23b924f388f088b3";
String md5 = "9a4b3035ba16959d5b553ab70b791751";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -105,12 +105,12 @@ public class
@Test
public void testParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-genotype", "b4ed6bd0129088a9ab5effadcbbe8469" );
e.put( "-all_bases", "4bdfede52a766574ae9638ad3012bce5" );
e.put( "--min_base_quality_score 26", "8123a3ca9a7b83a4afacabb33f157613" );
e.put( "--min_mapping_quality_score 26", "6e79b6386624b537398e9971bdc5c6f4" );
e.put( "--max_mismatches_in_40bp_window 5", "b5f9af45dfc8e168bba619d158dd56a4" );
e.put( "--p_nonref_model GRID_SEARCH", "e57cfae0e2caf42172bc7c466d8ff004" );
e.put( "-genotype", "01f3d6d0e18267bfb7dc0331d4cb798d" );
e.put( "-all_bases", "a2ff7f6c232e9210607e0c918da4bc61" );
e.put( "--min_base_quality_score 26", "3af28da44a4bcfa7385797795ea8fed1" );
e.put( "--min_mapping_quality_score 26", "8a61e97afa50571ce21cd39e72aa840f" );
e.put( "--max_mismatches_in_40bp_window 5", "29f53ab510c8d65b75436f1fc41a9321" );
e.put( "--p_nonref_model GRID_SEARCH", "00593066022525ed9e3bad2c009a7d2a" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -124,12 +124,12 @@ public class
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("e57cfae0e2caf42172bc7c466d8ff004"));
Arrays.asList("00593066022525ed9e3bad2c009a7d2a"));
executeTest("testConfidence1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("4440f676bb27c345953b8aaa1e57a1ed"));
Arrays.asList("d32899eb5817ad451a84a69a16edfe67"));
executeTest("testConfidence2", spec2);
}
@ -141,8 +141,8 @@ public class
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "f92075cf201ecfe7161b9646195348a5" );
e.put( 1.0 / 1850, "a428671a373246b20eabd66911e64612" );
e.put( 0.01, "76c75d43c19086a30f99098305d108eb" );
e.put( 1.0 / 1850, "12aee2a5aab5c604b02eee07b9ad0c15" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -152,25 +152,6 @@ public class
}
}
// --------------------------------------------------------------------------------------------------------------
//
// testing other base calling models
//
// --------------------------------------------------------------------------------------------------------------
@Test
public void testOtherBaseCallModel() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "three_state", "e06dd3becb9bfd083c4706bc0230b8e9" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000 -bm " + entry.getKey(), 1,
Arrays.asList(entry.getValue()));
executeTest(String.format("testOtherBaseCallModel[%s]", entry.getKey()), spec);
}
}
// --------------------------------------------------------------------------------------------------------------
//
// testing calls with SLX, 454, and SOLID data
@ -184,7 +165,7 @@ public class
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("878a76dfd83f1c12dd68dc5a8bdfa483"));
Arrays.asList("bde6ecc6ca31cbfc4d08ca65887ed842"));
executeTest(String.format("testMultiTechnologies"), spec);
}