Significant restructuring of the Exact model, as discussed within the dev group last week. There is no more marginalizing over alternate alleles, and we now keep track of the MLE and MAP. Important notes: 1) integration tests change because the previous marginalization wasn't done correctly (as pointed out by Guillermo) and our confidences were too high for many multi-allelic sites; 2) there is a major TO-DO item that needs to be discussed within the dev group (so they should expect a follow up email); 3) this code is still in flux as I am awaiting feedback from Ryan now on its performance with the Haplotype Caller (the good news, Ryan, is that we recover that site that we were losing previously).

This commit is contained in:
Eric Banks 2012-03-27 00:27:44 -05:00
parent e8bb8ade1a
commit c07a577ba3
9 changed files with 182 additions and 381 deletions

View File

@ -69,6 +69,6 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
* @return the alleles used for genotyping * @return the alleles used for genotyping
*/ */
protected abstract List<Allele> getLog10PNonRef(final VariantContext vc, protected abstract List<Allele> getLog10PNonRef(final VariantContext vc,
final double[][] log10AlleleFrequencyPriors, final double[] log10AlleleFrequencyPriors,
final AlleleFrequencyCalculationResult result); final AlleleFrequencyCalculationResult result);
} }

View File

@ -25,6 +25,11 @@
package org.broadinstitute.sting.gatk.walkers.genotyper; package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.MathUtils;
import java.util.ArrayList;
import java.util.Arrays;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
* User: ebanks * User: ebanks
@ -34,23 +39,54 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
*/ */
public class AlleleFrequencyCalculationResult { public class AlleleFrequencyCalculationResult {
// IMPORTANT NOTE: // These variables are intended to contain the MLE and MAP (and their corresponding allele counts) of the site over all alternate alleles
// These 2 arrays are intended to contain the likelihoods/posterior probabilities for each alternate allele over each possible frequency (from 0 to 2N). private double log10MLE;
// For any given alternate allele and frequency, the likelihoods are marginalized over values for all other alternate alleles. What this means is that private double log10MAP;
// the likelihoods at cell index zero (AF=0) in the array is actually that of the site's being polymorphic (because although this alternate allele may final private int[] alleleCountsOfMLE;
// be at AF=0, it is marginalized over all other alternate alleles which are not necessarily at AF=0). final private int[] alleleCountsOfMAP;
// In the bi-allelic case (where there are no other alternate alleles over which to marginalize),
// the value at cell index zero will be equal to AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED.
final double[][] log10AlleleFrequencyLikelihoods;
final double[][] log10AlleleFrequencyPosteriors;
// These 2 variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles) // The posteriors seen, not including that of AF=0
double log10LikelihoodOfAFzero = 0.0; // TODO -- better implementation needed here (see below)
double log10PosteriorOfAFzero = 0.0; private ArrayList<Double> log10PosteriorMatrixValues = new ArrayList<Double>(100000);
private Double log10PosteriorMatrixSum = null;
public AlleleFrequencyCalculationResult(int maxAltAlleles, int numChr) { // These variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles)
log10AlleleFrequencyLikelihoods = new double[maxAltAlleles][numChr+1]; private double log10LikelihoodOfAFzero;
log10AlleleFrequencyPosteriors = new double[maxAltAlleles][numChr+1]; private double log10PosteriorOfAFzero;
public AlleleFrequencyCalculationResult(final int maxAltAlleles) {
alleleCountsOfMLE = new int[maxAltAlleles];
alleleCountsOfMAP = new int[maxAltAlleles];
reset();
}
public double getLog10MLE() {
return log10MLE;
}
public double getLog10MAP() {
return log10MAP;
}
public double getLog10PosteriorMatrixSum() {
if ( log10PosteriorMatrixSum == null ) {
// TODO -- we absolutely need a better implementation here as we don't want to store all values from the matrix in memory;
// TODO -- will discuss with the team what the best option is
final double[] tmp = new double[log10PosteriorMatrixValues.size()];
for ( int i = 0; i < tmp.length; i++ )
tmp[i] = log10PosteriorMatrixValues.get(i);
log10PosteriorMatrixSum = MathUtils.log10sumLog10(tmp);
}
return log10PosteriorMatrixSum;
}
public int[] getAlleleCountsOfMLE() {
return alleleCountsOfMLE;
}
public int[] getAlleleCountsOfMAP() {
return alleleCountsOfMAP;
} }
public double getLog10LikelihoodOfAFzero() { public double getLog10LikelihoodOfAFzero() {
@ -60,4 +96,47 @@ public class AlleleFrequencyCalculationResult {
public double getLog10PosteriorOfAFzero() { public double getLog10PosteriorOfAFzero() {
return log10PosteriorOfAFzero; return log10PosteriorOfAFzero;
} }
public void reset() {
log10MLE = log10MAP = log10LikelihoodOfAFzero = log10PosteriorOfAFzero = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
for ( int i = 0; i < alleleCountsOfMLE.length; i++ ) {
alleleCountsOfMLE[i] = 0;
alleleCountsOfMAP[i] = 0;
}
log10PosteriorMatrixValues.clear();
log10PosteriorMatrixSum = null;
}
public void updateMLEifNeeded(final double log10LofK, final int[] alleleCountsForK) {
if ( log10LofK > log10MLE ) {
log10MLE = log10LofK;
for ( int i = 0; i < alleleCountsForK.length; i++ )
alleleCountsOfMLE[i] = alleleCountsForK[i];
}
}
public void updateMAPifNeeded(final double log10LofK, final int[] alleleCountsForK) {
log10PosteriorMatrixValues.add(log10LofK);
if ( log10LofK > log10MAP ) {
log10MAP = log10LofK;
for ( int i = 0; i < alleleCountsForK.length; i++ )
alleleCountsOfMAP[i] = alleleCountsForK[i];
}
}
public void setLog10LikelihoodOfAFzero(final double log10LikelihoodOfAFzero) {
this.log10LikelihoodOfAFzero = log10LikelihoodOfAFzero;
if ( log10LikelihoodOfAFzero > log10MLE ) {
log10MLE = log10LikelihoodOfAFzero;
Arrays.fill(alleleCountsOfMLE, 0);
}
}
public void setLog10PosteriorOfAFzero(final double log10PosteriorOfAFzero) {
this.log10PosteriorOfAFzero = log10PosteriorOfAFzero;
if ( log10PosteriorOfAFzero > log10MAP ) {
log10MAP = log10PosteriorOfAFzero;
Arrays.fill(alleleCountsOfMAP, 0);
}
}
} }

View File

@ -43,7 +43,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
} }
public List<Allele> getLog10PNonRef(final VariantContext vc, public List<Allele> getLog10PNonRef(final VariantContext vc,
final double[][] log10AlleleFrequencyPriors, final double[] log10AlleleFrequencyPriors,
final AlleleFrequencyCalculationResult result) { final AlleleFrequencyCalculationResult result) {
GenotypesContext GLs = vc.getGenotypes(); GenotypesContext GLs = vc.getGenotypes();
@ -59,7 +59,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
GLs = UnifiedGenotyperEngine.subsetAlleles(vc, alleles, false); GLs = UnifiedGenotyperEngine.subsetAlleles(vc, alleles, false);
} }
//linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result); linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result);
return alleles; return alleles;
@ -207,20 +206,9 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
} }
} }
// TODO -- remove me
public static void linearExactMultiAllelic(final GenotypesContext GLs, public static void linearExactMultiAllelic(final GenotypesContext GLs,
final int numAlternateAlleles, final int numAlternateAlleles,
final double[][] log10AlleleFrequencyPriors, final double[] log10AlleleFrequencyPriors,
final AlleleFrequencyCalculationResult result,
final boolean foo) {
linearExactMultiAllelic(GLs, numAlternateAlleles, log10AlleleFrequencyPriors, result);
}
public static void linearExactMultiAllelic(final GenotypesContext GLs,
final int numAlternateAlleles,
final double[][] log10AlleleFrequencyPriors,
final AlleleFrequencyCalculationResult result) { final AlleleFrequencyCalculationResult result) {
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs); final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
@ -272,7 +260,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
final int numChr, final int numChr,
final LinkedList<ExactACset> ACqueue, final LinkedList<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset, final HashMap<ExactACcounts, ExactACset> indexesToACset,
final double[][] log10AlleleFrequencyPriors, final double[] log10AlleleFrequencyPriors,
final AlleleFrequencyCalculationResult result) { final AlleleFrequencyCalculationResult result) {
//if ( DEBUG ) //if ( DEBUG )
@ -360,7 +348,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
private static void computeLofK(final ExactACset set, private static void computeLofK(final ExactACset set,
final ArrayList<double[]> genotypeLikelihoods, final ArrayList<double[]> genotypeLikelihoods,
final double[][] log10AlleleFrequencyPriors, final double[] log10AlleleFrequencyPriors,
final AlleleFrequencyCalculationResult result) { final AlleleFrequencyCalculationResult result) {
set.log10Likelihoods[0] = 0.0; // the zero case set.log10Likelihoods[0] = 0.0; // the zero case
@ -370,10 +358,15 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
if ( totalK == 0 ) { if ( totalK == 0 ) {
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) for ( int j = 1; j < set.log10Likelihoods.length; j++ )
set.log10Likelihoods[j] = set.log10Likelihoods[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX]; set.log10Likelihoods[j] = set.log10Likelihoods[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX];
final double log10Lof0 = set.log10Likelihoods[set.log10Likelihoods.length-1];
result.setLog10LikelihoodOfAFzero(log10Lof0);
result.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
return;
} }
// k > 0 for at least one k
else { // if we got here, then k > 0 for at least one k.
// the non-AA possible conformations were dealt with by pushes from dependent sets; // the non-AA possible conformations were already dealt with by pushes from dependent sets;
// now deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value // now deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
@ -386,31 +379,18 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
set.log10Likelihoods[j] = set.log10Likelihoods[j] - logDenominator; set.log10Likelihoods[j] = set.log10Likelihoods[j] - logDenominator;
} }
}
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1]; double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
// determine the power of theta to use // update the MLE if necessary
int nonRefAlleles = 0; result.updateMLEifNeeded(log10LofK, set.ACcounts.counts);
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
if ( set.ACcounts.getCounts()[i] > 0 )
nonRefAlleles++;
}
// for k=0, we don't want to put that value into the likelihoods/posteriors matrix, but instead want to set the value in the results object // apply the priors over each alternate allele
if ( nonRefAlleles == 0 ) { for ( final int ACcount : set.ACcounts.getCounts() ) {
result.log10LikelihoodOfAFzero = log10LofK; if ( ACcount > 0 )
result.log10PosteriorOfAFzero = log10LofK + log10AlleleFrequencyPriors[0][0]; log10LofK += log10AlleleFrequencyPriors[ACcount];
} else {
// update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
int AC = set.ACcounts.getCounts()[i];
result.log10AlleleFrequencyLikelihoods[i][AC] = MathUtils.approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK);
final double prior = log10AlleleFrequencyPriors[nonRefAlleles-1][AC];
result.log10AlleleFrequencyPosteriors[i][AC] = MathUtils.approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
}
} }
result.updateMAPifNeeded(log10LofK, set.ACcounts.counts);
} }
private static void pushData(final ExactACset targetSet, private static void pushData(final ExactACset targetSet,

View File

@ -1,209 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.apache.commons.lang.NotImplementedException;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.security.cert.CertificateNotYetValidException;
import java.util.*;
import org.broadinstitute.sting.utils.codecs.vcf.*;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: 8/30/11
* Time: 10:08 AM
* To change this template use File | Settings | File Templates.
*/
public class UGBoundAF extends RodWalker<VariantContext,Integer> {
@Output(shortName="vcf",fullName="VCF",doc="file to write to",required=true)
VCFWriter writer;
@Input(shortName="V",fullName="Variants",doc="variant tracks to use in calculation",required=true)
List<RodBinding<VariantContext>> variants;
private static double EPS_LOWER_LIMIT = Math.pow(10,-6.0);
private HashMap<Integer,Pair<Double,Double>> epsilonPosteriorCache = new HashMap<Integer,Pair<Double,Double>>(8192);
private HashMap<Integer,Double> logAC0Cache = new HashMap<Integer, Double>(8192);
private int QUANTIZATION_FACTOR = 1000;
public void initialize() {
Set<VCFHeaderLine> allHeaderLines = new HashSet<VCFHeaderLine>(1024);
for ( RodBinding<VariantContext> v : variants ) {
String trackName = v.getName();
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>(vcfHeaders.get(trackName).getMetaData());
}
allHeaderLines.add(new VCFInfoHeaderLine("AFB",2,VCFHeaderLineType.Float,"The 95% bounds on the allele "+
"frequency. First value=95% probability AF>x. Second value=95% probability AF<x."));
writer.writeHeader(new VCFHeader(allHeaderLines));
}
public Integer reduceInit() {
return 0;
}
@Override
public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext unused ) {
List<VariantContext> allVariants = tracker.getValues(variants);
if ( allVariants.size() == 0 ) {
return null;
}
List<Allele> alternateAlleles = getAllAlternateAlleles(allVariants);
VariantContextBuilder builder = new VariantContextBuilder(allVariants.get(0).subContextFromSamples(new TreeSet<String>()));
if ( alternateAlleles.size() > 1 ) {
logger.warn("Multiple Segregating Variants at position "+ref.getLocus().toString());
alternateAlleles.add(allVariants.get(0).getReference());
builder.alleles(alternateAlleles);
builder.filters(String.format("MULTIPLE_SEGREGATING[%s]", Utils.join(",",alternateAlleles)));
} else {
// get all the genotype likelihoods
GenotypesContext context = GenotypesContext.create();
int numNoCall = 0;
for ( VariantContext v : allVariants ) {
numNoCall += v.getNoCallCount();
context.addAll(v.getGenotypes());
}
builder.attribute("AFB",boundAlleleFrequency(getACPosteriors(context)));
}
return builder.make();
}
private List<Allele> getAllAlternateAlleles(List<VariantContext> variants) {
List<Allele> alleles = new ArrayList<Allele>(3); // some overhead
for ( VariantContext v : variants ) {
alleles.addAll(v.getAlternateAlleles());
}
return alleles;
}
@Override
public Integer reduce(VariantContext value, Integer sum) {
if ( value == null )
return sum;
writer.add(value);
return ++sum;
}
private int N_ITERATIONS = 1;
private double[] getACPosteriors(GenotypesContext gc) {
// note this uses uniform priors (!)
double[][] zeroPriors = new double[1][1+2*gc.size()];
AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2,2*gc.size());
// todo -- allow multiple alleles here
for ( int i = 0; i < N_ITERATIONS; i ++ ) {
ExactAFCalculationModel.linearExactMultiAllelic(gc, 2, zeroPriors, result, false);
}
return result.log10AlleleFrequencyPosteriors[0];
}
private String boundAlleleFrequency(double[] ACLikelihoods) {
// note that no-calls are unnecessary: the ML likelihoods take nocalls into account as 0,0,0 GLs
// thus, for sites with K 100,40,0 likelihoods and M no-calls, the likelihoods will be
// agnostic between 2*K alleles through 2*(K+M) alleles - exactly what we want to marginalize over
// want to pick a lower limit x and upper limit y such that
// int_{f = x to y} sum_{c = 0 to 2*AN} P(AF=f | c, AN) df = 0.95
// int_{f=x to y} calculateAFPosterior(f) df = 0.95
// and that (y-x) is minimized
// this is done by quantizing [0,1] into small bins and, since the distribution is
// unimodal, greedily adding them until the probability is >= 0.95
throw new ReviewedStingException("This walker is unsupported, and is not fully implemented", new NotImplementedException("bound allele frequency not implemented"));
}
private double calculateAFPosterior(double[] likelihoods, double af) {
double[] probLiks = new double[likelihoods.length];
for ( int c = 0; c < likelihoods.length; c++) {
probLiks[c] = calculateAFPosterior(c,likelihoods.length,af);
}
return MathUtils.log10sumLog10(probLiks);
}
private double calculateAFPosterior(int ac, int n, double af) {
// evaluate the allele frequency posterior distribution at AF given AC observations of N chromosomes
switch ( ac ) {
case 0:
return logAC0Coef(n) + n*Math.log10(1 - af) - Math.log10(af);
case 1:
return Math.log10(n) + (n-1)*Math.log10(1-af) - n*Math.log10(1-EPS_LOWER_LIMIT);
case 2:
return Math.log10(n) + Math.log10(n-1) + Math.log10(af) + (n-2)*Math.log10(1-af) - Math.log10(1-(n-1)*EPS_LOWER_LIMIT) - (n-1)*Math.log10(EPS_LOWER_LIMIT);
default:
return (ac-1)*Math.log10(af)+ac*Math.log10( (double) n-ac)-(n-ac)*af*Math.log10(Math.E) - MathUtils.log10Gamma(ac);
}
}
private double logAC0Coef(int an) {
if ( ! logAC0Cache.containsKey(an) ) {
double coef = -Math.log10(EPS_LOWER_LIMIT);
for ( int k = 1; k <= an; k++ ) {
// note this should typically just be
// term = ( 1 - Math.pow(EPS_LOWER_LIMIT,k) ) * MathUtils.binomialCoefficient(an,k) / k
// but the 1-E term will just be 1, so we do the following to mitigate this problem
double binom = MathUtils.binomialCoefficient(an,k);
double eps_correction = EPS_LOWER_LIMIT*Math.pow(binom,1/k);
double term = binom/k - Math.pow(eps_correction,k);
if ( k % 2 == 0 ) {
coef += term;
} else {
coef -= term;
}
}
logAC0Cache.put(an,coef);
}
return logAC0Cache.get(an);
}
private double adaptiveSimpson(double[] likelihoods, double start, double stop, double err, int cap) {
double mid = (start + stop)/2;
double size = stop-start;
double fa = calculateAFPosterior(likelihoods,start);
double fb = calculateAFPosterior(likelihoods,mid);
double fc = calculateAFPosterior(likelihoods,stop);
double s = (size/6)*(fa + 4*fc + fb);
double h = simpAux(likelihoods,start,stop,err,s,fa,fb,fc,cap);
return h;
}
private double simpAux(double[] likelihoods, double a,double b,double eps,double s,double fa,double fb,double fc,double cap){
if ( s == 0 )
return -300.0;
double c = ( a + b )/2;
double h = b-a;
double d = (a + c)/2;
double e = (c + b)/2;
double fd = calculateAFPosterior(likelihoods, d);
double fe = calculateAFPosterior(likelihoods, e);
double s_l = (h/12)*(fa + 4*fd + fc);
double s_r = (h/12)*(fc + 4*fe + fb);
double s_2 = s_l + s_r;
if ( cap <= 0 || Math.abs(s_2 - s) <= 15*eps ){
return Math.log10(s_2 + (s_2 - s)/15.0);
}
return MathUtils.approximateLog10SumLog10(simpAux(likelihoods,a,c,eps/2,s_l,fa,fc,fd,cap-1),simpAux(likelihoods, c, b, eps / 2, s_r, fc, fb, fe, cap - 1));
}
}

View File

@ -137,6 +137,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
@Output(doc="File to which variants should be written",required=true) @Output(doc="File to which variants should be written",required=true)
protected VCFWriter writer = null; protected VCFWriter writer = null;
@Hidden
@Argument(fullName = "debug_file", shortName = "debug_file", doc = "File to print all of the annotated and detailed debugging output", required = false) @Argument(fullName = "debug_file", shortName = "debug_file", doc = "File to print all of the annotated and detailed debugging output", required = false)
protected PrintStream verboseWriter = null; protected PrintStream verboseWriter = null;
@ -218,7 +219,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
// initialize the verbose writer // initialize the verbose writer
if ( verboseWriter != null ) if ( verboseWriter != null )
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior"); verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples); UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);

View File

@ -81,8 +81,8 @@ public class UnifiedGenotyperEngine {
private ThreadLocal<double[]> posteriorsArray = new ThreadLocal<double[]>(); private ThreadLocal<double[]> posteriorsArray = new ThreadLocal<double[]>();
// because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything // because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything
private final double[][] log10AlleleFrequencyPriorsSNPs; private final double[] log10AlleleFrequencyPriorsSNPs;
private final double[][] log10AlleleFrequencyPriorsIndels; private final double[] log10AlleleFrequencyPriorsIndels;
// the priors object // the priors object
private final GenotypePriors genotypePriorsSNPs; private final GenotypePriors genotypePriorsSNPs;
@ -128,8 +128,8 @@ public class UnifiedGenotyperEngine {
this.annotationEngine = engine; this.annotationEngine = engine;
N = 2 * this.samples.size(); N = 2 * this.samples.size();
log10AlleleFrequencyPriorsSNPs = new double[UAC.MAX_ALTERNATE_ALLELES][N+1]; log10AlleleFrequencyPriorsSNPs = new double[N+1];
log10AlleleFrequencyPriorsIndels = new double[UAC.MAX_ALTERNATE_ALLELES][N+1]; log10AlleleFrequencyPriorsIndels = new double[N+1];
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity); computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY); computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY);
genotypePriorsSNPs = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.SNP); genotypePriorsSNPs = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.SNP);
@ -265,8 +265,8 @@ public class UnifiedGenotyperEngine {
// initialize the data for this thread if that hasn't been done yet // initialize the data for this thread if that hasn't been done yet
if ( afcm.get() == null ) { if ( afcm.get() == null ) {
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
alleleFrequencyCalculationResult.set(new AlleleFrequencyCalculationResult(UAC.MAX_ALTERNATE_ALLELES, N)); alleleFrequencyCalculationResult.set(new AlleleFrequencyCalculationResult(UAC.MAX_ALTERNATE_ALLELES));
posteriorsArray.set(new double[N + 2]); posteriorsArray.set(new double[2]);
} }
AlleleFrequencyCalculationResult AFresult = alleleFrequencyCalculationResult.get(); AlleleFrequencyCalculationResult AFresult = alleleFrequencyCalculationResult.get();
@ -279,9 +279,7 @@ public class UnifiedGenotyperEngine {
generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext)); generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
} }
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) AFresult.reset();
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
List<Allele> allelesUsedInGenotyping = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model), AFresult); List<Allele> allelesUsedInGenotyping = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model), AFresult);
// is the most likely frequency conformation AC=0 for all alternate alleles? // is the most likely frequency conformation AC=0 for all alternate alleles?
@ -297,11 +295,10 @@ public class UnifiedGenotyperEngine {
if ( indexOfAllele == -1 ) if ( indexOfAllele == -1 )
continue; continue;
int indexOfBestAC = MathUtils.maxElementIndex(AFresult.log10AlleleFrequencyPosteriors[indexOfAllele-1]); int indexOfBestAC = AFresult.getAlleleCountsOfMAP()[indexOfAllele-1];
// if the most likely AC is not 0, then this is a good alternate allele to use; // if the most likely AC is not 0, then this is a good alternate allele to use
// make sure to test against log10PosteriorOfAFzero since that no longer is an entry in the array if ( indexOfBestAC != 0 ) {
if ( indexOfBestAC != 0 && AFresult.log10AlleleFrequencyPosteriors[indexOfAllele-1][indexOfBestAC] > AFresult.log10PosteriorOfAFzero ) {
myAlleles.add(alternateAllele); myAlleles.add(alternateAllele);
bestGuessIsRef = false; bestGuessIsRef = false;
} }
@ -312,7 +309,6 @@ public class UnifiedGenotyperEngine {
} }
// calculate p(f>0): // calculate p(f>0):
// because the likelihoods are marginalized for each alternate allele, we only need to compare log10PosteriorOfAFzero against any one of them
final double[] normalizedPosteriors = generateNormalizedPosteriors(AFresult, posteriorsArray.get()); final double[] normalizedPosteriors = generateNormalizedPosteriors(AFresult, posteriorsArray.get());
final double PofF = 1.0 - normalizedPosteriors[0]; final double PofF = 1.0 - normalizedPosteriors[0];
@ -320,18 +316,11 @@ public class UnifiedGenotyperEngine {
if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]); phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
if ( Double.isInfinite(phredScaledConfidence) ) if ( Double.isInfinite(phredScaledConfidence) )
phredScaledConfidence = -10.0 * AFresult.log10PosteriorOfAFzero; phredScaledConfidence = -10.0 * AFresult.getLog10PosteriorOfAFzero();
} else { } else {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF); phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
if ( Double.isInfinite(phredScaledConfidence) ) { if ( Double.isInfinite(phredScaledConfidence) ) {
double sum = AFresult.log10AlleleFrequencyPosteriors[0][0]; final double sum = AFresult.getLog10PosteriorMatrixSum();
if ( sum == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
sum = 0.0;
for (int i = 1; i <= N; i++) {
if ( AFresult.log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
break;
sum += AFresult.log10AlleleFrequencyPosteriors[0][i];
}
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum); phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
} }
} }
@ -360,7 +349,7 @@ public class UnifiedGenotyperEngine {
// print out stats if we have a writer // print out stats if we have a writer
if ( verboseWriter != null && !limitedContext ) if ( verboseWriter != null && !limitedContext )
printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model); printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, model);
// *** note that calculating strand bias involves overwriting data structures, so we do that last // *** note that calculating strand bias involves overwriting data structures, so we do that last
final HashMap<String, Object> attributes = new HashMap<String, Object>(); final HashMap<String, Object> attributes = new HashMap<String, Object>();
@ -374,29 +363,27 @@ public class UnifiedGenotyperEngine {
// the overall lod // the overall lod
//double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0]; //double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
double overallLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0); double overallLog10PofF = AFresult.getLog10PosteriorMatrixSum();
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
List<Allele> alternateAllelesToUse = builder.make().getAlternateAlleles(); List<Allele> alternateAllelesToUse = builder.make().getAlternateAlleles();
// the forward lod // the forward lod
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, alternateAllelesToUse, false, model); VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, alternateAllelesToUse, false, model);
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods); AFresult.reset();
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult); afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult);
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
double forwardLog10PofNull = AFresult.log10PosteriorOfAFzero; double forwardLog10PofNull = AFresult.getLog10PosteriorOfAFzero();
double forwardLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0); double forwardLog10PofF = AFresult.getLog10PosteriorMatrixSum();
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
// the reverse lod // the reverse lod
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, alternateAllelesToUse, false, model); VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, alternateAllelesToUse, false, model);
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods); AFresult.reset();
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult); afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult);
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
double reverseLog10PofNull = AFresult.log10PosteriorOfAFzero; double reverseLog10PofNull = AFresult.getLog10PosteriorOfAFzero();
double reverseLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0); double reverseLog10PofF = AFresult.getLog10PosteriorMatrixSum();
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF); //if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF; double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
@ -433,9 +420,9 @@ public class UnifiedGenotyperEngine {
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
} }
private double[] generateNormalizedPosteriors(AlleleFrequencyCalculationResult AFresult, double[] normalizedPosteriors) { public static double[] generateNormalizedPosteriors(final AlleleFrequencyCalculationResult AFresult, final double[] normalizedPosteriors) {
normalizedPosteriors[0] = AFresult.log10PosteriorOfAFzero; normalizedPosteriors[0] = AFresult.getLog10PosteriorOfAFzero();
System.arraycopy(AFresult.log10AlleleFrequencyPosteriors[0], 0, normalizedPosteriors, 1, normalizedPosteriors.length-1); normalizedPosteriors[1] = AFresult.getLog10PosteriorMatrixSum();
return MathUtils.normalizeFromLog10(normalizedPosteriors); return MathUtils.normalizeFromLog10(normalizedPosteriors);
} }
@ -495,14 +482,6 @@ public class UnifiedGenotyperEngine {
return stratifiedContexts; return stratifiedContexts;
} }
protected static void clearAFarray(double[][] AFs) {
for ( int i = 0; i < AFs.length; i++ ) {
for ( int j = 0; j < AFs[i].length; j++ ) {
AFs[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
}
}
}
private final static double[] binomialProbabilityDepthCache = new double[10000]; private final static double[] binomialProbabilityDepthCache = new double[10000];
static { static {
for ( int i = 1; i < binomialProbabilityDepthCache.length; i++ ) { for ( int i = 1; i < binomialProbabilityDepthCache.length; i++ ) {
@ -547,7 +526,7 @@ public class UnifiedGenotyperEngine {
return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false); return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
} }
protected void printVerboseData(String pos, VariantContext vc, double PofF, double phredScaledConfidence, double[] normalizedPosteriors, final GenotypeLikelihoodsCalculationModel.Model model) { protected void printVerboseData(String pos, VariantContext vc, double PofF, double phredScaledConfidence, final GenotypeLikelihoodsCalculationModel.Model model) {
Allele refAllele = null, altAllele = null; Allele refAllele = null, altAllele = null;
for ( Allele allele : vc.getAlleles() ) { for ( Allele allele : vc.getAlleles() ) {
if ( allele.isReference() ) if ( allele.isReference() )
@ -570,11 +549,8 @@ public class UnifiedGenotyperEngine {
AFline.append(i + "/" + N + "\t"); AFline.append(i + "/" + N + "\t");
AFline.append(String.format("%.2f\t", ((float)i)/N)); AFline.append(String.format("%.2f\t", ((float)i)/N));
AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i])); AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i]));
if ( alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED) AFline.append(String.format("%.8f\t", alleleFrequencyCalculationResult.get().getLog10MLE()));
AFline.append("0.00000000\t"); AFline.append(String.format("%.8f\t", alleleFrequencyCalculationResult.get().getLog10MAP()));
else
AFline.append(String.format("%.8f\t", alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[i]));
AFline.append(String.format("%.8f\t", normalizedPosteriors[i]));
verboseWriter.println(AFline.toString()); verboseWriter.println(AFline.toString());
} }
@ -638,25 +614,22 @@ public class UnifiedGenotyperEngine {
return null; return null;
} }
protected static void computeAlleleFrequencyPriors(final int N, final double[][] priors, final double theta) { protected static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) {
// the dimension here is the number of alternate alleles; with e.g. 2 alternate alleles the prior will be theta^2 / i
for (int alleles = 1; alleles <= priors.length; alleles++) {
double sum = 0.0; double sum = 0.0;
// for each i // for each i
for (int i = 1; i <= N; i++) { for (int i = 1; i <= N; i++) {
double value = Math.pow(theta, alleles) / (double)i; final double value = theta / (double)i;
priors[alleles-1][i] = Math.log10(value); priors[i] = Math.log10(value);
sum += value; sum += value;
} }
// null frequency for AF=0 is (1 - sum(all other frequencies)) // null frequency for AF=0 is (1 - sum(all other frequencies))
priors[alleles-1][0] = Math.log10(1.0 - sum); priors[0] = Math.log10(1.0 - sum);
}
} }
protected double[][] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) { protected double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
switch( model ) { switch( model ) {
case SNP: case SNP:
return log10AlleleFrequencyPriorsSNPs; return log10AlleleFrequencyPriorsSNPs;

View File

@ -25,19 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
import org.broadinstitute.sting.gatk.walkers.genotyper.AlleleFrequencyCalculationResult; import org.broadinstitute.sting.gatk.walkers.genotyper.AlleleFrequencyCalculationResult;
import org.broadinstitute.sting.gatk.walkers.genotyper.ExactAFCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.ExactAFCalculationModel;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeSet; import java.util.TreeSet;
public class GLBasedSampleSelector extends SampleSelector { public class GLBasedSampleSelector extends SampleSelector {
Map<Integer,double[][]> numAllelePriorMatrix = new HashMap<Integer,double[][]>(); double[] flatPriors = null;
double referenceLikelihood; double referenceLikelihood;
public GLBasedSampleSelector(TreeSet<String> sm, double refLik) { public GLBasedSampleSelector(TreeSet<String> sm, double refLik) {
super(sm); super(sm);
@ -53,9 +47,11 @@ public class GLBasedSampleSelector extends SampleSelector {
// now check to see (using EXACT model) whether this should be variant // now check to see (using EXACT model) whether this should be variant
// do we want to apply a prior? maybe user-spec? // do we want to apply a prior? maybe user-spec?
double[][] flatPrior = createFlatPrior(vc.getAlleles()); if ( flatPriors == null ) {
AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(vc.getAlternateAlleles().size(),2*samples.size()); flatPriors = new double[1+2*samples.size()];
ExactAFCalculationModel.linearExactMultiAllelic(subContext.getGenotypes(),vc.getAlternateAlleles().size(),flatPrior,result,true); }
AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(vc.getAlternateAlleles().size());
ExactAFCalculationModel.linearExactMultiAllelic(subContext.getGenotypes(),vc.getAlternateAlleles().size(),flatPriors,result);
// do we want to let this qual go up or down? // do we want to let this qual go up or down?
if ( result.getLog10PosteriorOfAFzero() < referenceLikelihood ) { if ( result.getLog10PosteriorOfAFzero() < referenceLikelihood ) {
return true; return true;
@ -63,12 +59,4 @@ public class GLBasedSampleSelector extends SampleSelector {
return false; return false;
} }
private double[][] createFlatPrior(List<Allele> alleles) {
if ( ! numAllelePriorMatrix.containsKey(alleles.size()) ) {
numAllelePriorMatrix.put(alleles.size(), new double[alleles.size()][1+2*samples.size()]);
}
return numAllelePriorMatrix.get(alleles.size());
}
} }

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.genotyper; package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
@ -18,7 +17,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
static double[] AA1, AB1, BB1; static double[] AA1, AB1, BB1;
static double[] AA2, AB2, AC2, BB2, BC2, CC2; static double[] AA2, AB2, AC2, BB2, BC2, CC2;
static final int numSamples = 3; static final int numSamples = 3;
static double[][] priors = new double[2][2*numSamples+1]; // flat priors static double[] priors = new double[2*numSamples+1]; // flat priors
@BeforeSuite @BeforeSuite
public void before() { public void before() {
@ -83,26 +82,16 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
@Test(dataProvider = "getGLs") @Test(dataProvider = "getGLs")
public void testGLs(GetGLsTest cfg) { public void testGLs(GetGLsTest cfg) {
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2, 2*numSamples); final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2);
for ( int i = 0; i < 2; i++ ) {
for ( int j = 0; j < 2*numSamples+1; j++ ) {
result.log10AlleleFrequencyLikelihoods[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
result.log10AlleleFrequencyPosteriors[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
}
}
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result, false); ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result);
int nameIndex = 1; int nameIndex = 1;
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) { for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1)); int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
int calculatedAlleleCount = MathUtils.maxElementIndex(result.log10AlleleFrequencyPosteriors[allele]); int calculatedAlleleCount = result.getAlleleCountsOfMAP()[allele];
if ( result.log10AlleleFrequencyPosteriors[0][0] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) {
Assert.assertTrue(calculatedAlleleCount == expectedAlleleCount || result.log10AlleleFrequencyPosteriors[0][calculatedAlleleCount] < result.log10PosteriorOfAFzero);
} else {
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount); Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
} }
} }
}
} }

View File

@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() { public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("8f81a14fffc1a59b4b066f8595dc1232")); Arrays.asList("ac3737b4212f634a03c640c83f670955"));
executeTest("test MultiSample Pilot1", spec); executeTest("test MultiSample Pilot1", spec);
} }
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultipleSNPAlleles() { public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1, "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
Arrays.asList("849ee8b21b4bbb02dfc7867a4f1bc14b")); Arrays.asList("6f70dfbaf3bb70c702f9e9dbacd67c17"));
executeTest("test Multiple SNP alleles", spec); executeTest("test Multiple SNP alleles", spec);
} }
@ -138,7 +138,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSLOD() { public void testSLOD() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("6172d2f3d370132f4c57a26aa94c256e")); Arrays.asList("e9d23a08472e4e27b4f25e844f5bad57"));
executeTest("test SLOD", spec); executeTest("test SLOD", spec);
} }
@ -146,8 +146,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testOutputParameter() { public void testOutputParameter() {
HashMap<String, String> e = new HashMap<String, String>(); HashMap<String, String> e = new HashMap<String, String>();
e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" ); e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "553f6b4cbf380885bec9dd634cf68742" ); e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "ecf92054c1e4bd9d6529b8002d385165" );
e.put( "--output_mode EMIT_ALL_SITES", "6d8624e45ad9dae5803ac705b39e4ffa" ); e.put( "--output_mode EMIT_ALL_SITES", "119c9fcefbc69e0fc10b1dc52f6438e3" );
for ( Map.Entry<String, String> entry : e.entrySet() ) { for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -300,13 +300,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSampleIndels() { public void testMultiSampleIndels() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("52340d578a708fa709b69ce48987bc9d")); Arrays.asList("fbc48d7d9e622c9af7922f91bc858151"));
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("9566c7abef5ee5829a516d90445b347f")); Arrays.asList("94c52ef70e44709ccd947d32e9c27da9"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
} }