Cleanup Exact model, better unit tests

-- Added combinatorial unit tests for both Diploid and General (in diploid-case) for 2 and 3 alleles in all combinations of sample types (i.e., AA, AB, BB and equiv. for tri-allelic).  More assert statements to ensure quality of the result.
-- Added docs (DOCUMENT YOUR CODE!) to AlleleFrequencyCalculationResult, with proper input error handling and contracts.  Made mutation functions all protected
-- No longer need to call reset on your AlleleFrequencyCalculationResult -- it'd done for you in the calculation function.  reset is a protected method now, so it's all cleaner and nicer this way
-- TODO still -- need to add edge-case tests for non-informative samples (0,0,0), for the impact of priors, and I need to add some way to test the result of the pNonRef
This commit is contained in:
Mark DePristo 2012-09-30 20:21:18 -04:00
parent 3e01a76590
commit de941ddbbe
6 changed files with 232 additions and 89 deletions

View File

@ -36,7 +36,6 @@ import java.util.*;
public class GeneralPloidyExactAFCalculation extends ExactAFCalculation { public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
final protected UnifiedArgumentCollection UAC;
private final int ploidy; private final int ploidy;
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
@ -45,8 +44,11 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
protected GeneralPloidyExactAFCalculation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { protected GeneralPloidyExactAFCalculation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
super(UAC, N, logger, verboseWriter); super(UAC, N, logger, verboseWriter);
ploidy = UAC.samplePloidy; ploidy = UAC.samplePloidy;
this.UAC = UAC; }
public GeneralPloidyExactAFCalculation(final int nSamples, final int maxAltAlleles, final int ploidy) {
super(nSamples, maxAltAlleles, false, null, null, null);
this.ploidy = ploidy;
} }
@Override @Override
@ -63,7 +65,6 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
builder.alleles(alleles); builder.alleles(alleles);
builder.genotypes(subsetAlleles(vc, alleles, false, ploidy)); builder.genotypes(subsetAlleles(vc, alleles, false, ploidy));
return builder.make(); return builder.make();
} else { } else {
return vc; return vc;
} }

View File

@ -100,7 +100,7 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
* *
* Allocates a new results object. Useful for testing but slow in practice. * Allocates a new results object. Useful for testing but slow in practice.
*/ */
public AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc, public final AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc,
final double[] log10AlleleFrequencyPriors) { final double[] log10AlleleFrequencyPriors) {
return getLog10PNonRef(vc, log10AlleleFrequencyPriors, new AlleleFrequencyCalculationResult(MAX_ALTERNATE_ALLELES_TO_GENOTYPE)); return getLog10PNonRef(vc, log10AlleleFrequencyPriors, new AlleleFrequencyCalculationResult(MAX_ALTERNATE_ALLELES_TO_GENOTYPE));
} }
@ -113,15 +113,17 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
* @param result a pre-allocated (for efficiency) object to hold the result of the calculation * @param result a pre-allocated (for efficiency) object to hold the result of the calculation
* @return result (for programming convenience) * @return result (for programming convenience)
*/ */
public AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc, public final AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc,
final double[] log10AlleleFrequencyPriors, final double[] log10AlleleFrequencyPriors,
final AlleleFrequencyCalculationResult result) { final AlleleFrequencyCalculationResult result) {
if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null"); if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null");
if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null"); if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null");
if ( result == null ) throw new IllegalArgumentException("Results object cannot be null"); if ( result == null ) throw new IllegalArgumentException("Results object cannot be null");
// reset the result, so we can store our new result there
result.reset();
final VariantContext vcWorking = reduceScope(vc); final VariantContext vcWorking = reduceScope(vc);
result.setAllelesUsedInGenotyping(vcWorking.getAlleles());
callTimer.start(); callTimer.start();
computeLog10PNonRef(vcWorking, log10AlleleFrequencyPriors, result); computeLog10PNonRef(vcWorking, log10AlleleFrequencyPriors, result);
@ -130,6 +132,7 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
if ( callReport != null ) if ( callReport != null )
printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, result.getLog10PosteriorOfAFzero()); printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, result.getLog10PosteriorOfAFzero());
result.setAllelesUsedInGenotyping(vcWorking.getAlleles());
return result; return result;
} }

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper; package org.broadinstitute.sting.gatk.walkers.genotyper;
import com.google.java.contract.Ensures;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -39,7 +40,6 @@ import java.util.List;
* Useful helper class to communicate the results of the allele frequency calculation * Useful helper class to communicate the results of the allele frequency calculation
*/ */
public class AlleleFrequencyCalculationResult { public class AlleleFrequencyCalculationResult {
// These variables are intended to contain the MLE and MAP (and their corresponding allele counts) of the site over all alternate alleles // These variables are intended to contain the MLE and MAP (and their corresponding allele counts) of the site over all alternate alleles
private double log10MLE; private double log10MLE;
private double log10MAP; private double log10MAP;
@ -56,22 +56,77 @@ public class AlleleFrequencyCalculationResult {
private double log10LikelihoodOfAFzero; private double log10LikelihoodOfAFzero;
private double log10PosteriorOfAFzero; private double log10PosteriorOfAFzero;
private List<Allele> allelesUsedInGenotyping; /**
* The list of alleles actually used in computing the AF
*/
private List<Allele> allelesUsedInGenotyping = null;
/**
* Create a results object capability of storing results for calls with up to maxAltAlleles
*
* @param maxAltAlleles an integer >= 1
*/
public AlleleFrequencyCalculationResult(final int maxAltAlleles) { public AlleleFrequencyCalculationResult(final int maxAltAlleles) {
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be >= 0, saw " + maxAltAlleles);
alleleCountsOfMLE = new int[maxAltAlleles]; alleleCountsOfMLE = new int[maxAltAlleles];
alleleCountsOfMAP = new int[maxAltAlleles]; alleleCountsOfMAP = new int[maxAltAlleles];
reset(); reset();
} }
/**
* Get the log10 value of the probability mass at the MLE
*
* @return a log10 prob
*/
@Ensures("result < 0")
public double getLog10MLE() { public double getLog10MLE() {
return log10MLE; return log10MLE;
} }
/**
* Get the log10 value of the probability mass at the max. a posterior (MAP)
*
* @return a log10 prob
*/
@Ensures("result < 0")
public double getLog10MAP() { public double getLog10MAP() {
return log10MAP; return log10MAP;
} }
/**
* Returns a vector with maxAltAlleles values containing AC values at the MLE
*
* The values of the ACs for this call are stored in the getAllelesUsedInGenotyping order,
* starting from index 0 (i.e., the first alt allele is at 0). The vector is always
* maxAltAlleles in length, and so only the first getAllelesUsedInGenotyping.size() - 1 values
* are meaningful.
*
* @return a vector with allele counts, not all of which may be meaningful
*/
@Ensures("result != null")
public int[] getAlleleCountsOfMLE() {
return alleleCountsOfMLE;
}
/**
* Returns a vector with maxAltAlleles values containing AC values at the MAP
*
* @see #getAlleleCountsOfMLE() for the encoding of results in this vector
*
* @return a non-null vector of ints
*/
@Ensures("result != null")
public int[] getAlleleCountsOfMAP() {
return alleleCountsOfMAP;
}
/**
* TODO -- eric what is this supposed to return? my unit tests don't do what I think they should
*
* @return
*/
public double getLog10PosteriorsMatrixSumWithoutAFzero() { public double getLog10PosteriorsMatrixSumWithoutAFzero() {
if ( log10PosteriorMatrixSum == null ) { if ( log10PosteriorMatrixSum == null ) {
log10PosteriorMatrixSum = MathUtils.log10sumLog10(log10PosteriorMatrixValues, 0, currentPosteriorsCacheIndex); log10PosteriorMatrixSum = MathUtils.log10sumLog10(log10PosteriorMatrixValues, 0, currentPosteriorsCacheIndex);
@ -79,23 +134,53 @@ public class AlleleFrequencyCalculationResult {
return log10PosteriorMatrixSum; return log10PosteriorMatrixSum;
} }
public int[] getAlleleCountsOfMLE() { /**
return alleleCountsOfMLE; * TODO -- eric what is this supposed to return? my unit tests don't do what I think they should
} *
* @return
public int[] getAlleleCountsOfMAP() { */
return alleleCountsOfMAP;
}
public double getLog10LikelihoodOfAFzero() { public double getLog10LikelihoodOfAFzero() {
return log10LikelihoodOfAFzero; return log10LikelihoodOfAFzero;
} }
/**
* TODO -- eric what is this supposed to return? my unit tests don't do what I think they should
*
* @return
*/
public double getLog10PosteriorOfAFzero() { public double getLog10PosteriorOfAFzero() {
return log10PosteriorOfAFzero; return log10PosteriorOfAFzero;
} }
public void reset() { /**
* Get the list of alleles actually used in genotyping.
*
* Due to computational / implementation constraints this may be smaller than
* the actual list of alleles requested
*
* @return a non-empty list of alleles used during genotyping
*/
@Ensures({"result != null", "! result.isEmpty()"})
public List<Allele> getAllelesUsedInGenotyping() {
if ( allelesUsedInGenotyping == null )
throw new IllegalStateException("allelesUsedInGenotyping requested but not yet set");
return allelesUsedInGenotyping;
}
// --------------------------------------------------------------------------------
//
// Protected mutational methods only for use within the calculation models themselves
//
// --------------------------------------------------------------------------------
/**
* Reset the data in this results object, so that it can be used in a subsequent AF calculation
*
* Resetting of the data is done by the calculation model itself, so shouldn't be done by callers any longer
*/
protected void reset() {
log10MLE = log10MAP = log10LikelihoodOfAFzero = log10PosteriorOfAFzero = AlleleFrequencyCalculation.VALUE_NOT_CALCULATED; log10MLE = log10MAP = log10LikelihoodOfAFzero = log10PosteriorOfAFzero = AlleleFrequencyCalculation.VALUE_NOT_CALCULATED;
for ( int i = 0; i < alleleCountsOfMLE.length; i++ ) { for ( int i = 0; i < alleleCountsOfMLE.length; i++ ) {
alleleCountsOfMLE[i] = 0; alleleCountsOfMLE[i] = 0;
@ -106,7 +191,7 @@ public class AlleleFrequencyCalculationResult {
allelesUsedInGenotyping = null; allelesUsedInGenotyping = null;
} }
public void updateMLEifNeeded(final double log10LofK, final int[] alleleCountsForK) { protected void updateMLEifNeeded(final double log10LofK, final int[] alleleCountsForK) {
if ( log10LofK > log10MLE ) { if ( log10LofK > log10MLE ) {
log10MLE = log10LofK; log10MLE = log10LofK;
for ( int i = 0; i < alleleCountsForK.length; i++ ) for ( int i = 0; i < alleleCountsForK.length; i++ )
@ -114,7 +199,7 @@ public class AlleleFrequencyCalculationResult {
} }
} }
public void updateMAPifNeeded(final double log10LofK, final int[] alleleCountsForK) { protected void updateMAPifNeeded(final double log10LofK, final int[] alleleCountsForK) {
addToPosteriorsCache(log10LofK); addToPosteriorsCache(log10LofK);
if ( log10LofK > log10MAP ) { if ( log10LofK > log10MAP ) {
@ -136,7 +221,7 @@ public class AlleleFrequencyCalculationResult {
} }
} }
public void setLog10LikelihoodOfAFzero(final double log10LikelihoodOfAFzero) { protected void setLog10LikelihoodOfAFzero(final double log10LikelihoodOfAFzero) {
this.log10LikelihoodOfAFzero = log10LikelihoodOfAFzero; this.log10LikelihoodOfAFzero = log10LikelihoodOfAFzero;
if ( log10LikelihoodOfAFzero > log10MLE ) { if ( log10LikelihoodOfAFzero > log10MLE ) {
log10MLE = log10LikelihoodOfAFzero; log10MLE = log10LikelihoodOfAFzero;
@ -144,7 +229,7 @@ public class AlleleFrequencyCalculationResult {
} }
} }
public void setLog10PosteriorOfAFzero(final double log10PosteriorOfAFzero) { protected void setLog10PosteriorOfAFzero(final double log10PosteriorOfAFzero) {
this.log10PosteriorOfAFzero = log10PosteriorOfAFzero; this.log10PosteriorOfAFzero = log10PosteriorOfAFzero;
if ( log10PosteriorOfAFzero > log10MAP ) { if ( log10PosteriorOfAFzero > log10MAP ) {
log10MAP = log10PosteriorOfAFzero; log10MAP = log10PosteriorOfAFzero;
@ -152,11 +237,10 @@ public class AlleleFrequencyCalculationResult {
} }
} }
public List<Allele> getAllelesUsedInGenotyping() { protected void setAllelesUsedInGenotyping(List<Allele> allelesUsedInGenotyping) {
return allelesUsedInGenotyping; if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.isEmpty() )
} throw new IllegalArgumentException("allelesUsedInGenotyping cannot be null or empty");
public void setAllelesUsedInGenotyping(List<Allele> allelesUsedInGenotyping) {
this.allelesUsedInGenotyping = allelesUsedInGenotyping; this.allelesUsedInGenotyping = allelesUsedInGenotyping;
} }
} }

View File

@ -41,6 +41,14 @@ public class DiploidExactAFCalculation extends ExactAFCalculation {
super(nSamples, maxAltAlleles, false, null, null, null); super(nSamples, maxAltAlleles, false, null, null, null);
} }
/**
* Dynamically found in UnifiedGenotyperEngine
*
* @param UAC
* @param N
* @param logger
* @param verboseWriter
*/
public DiploidExactAFCalculation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { public DiploidExactAFCalculation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
super(UAC, N, logger, verboseWriter); super(UAC, N, logger, verboseWriter);
} }

View File

@ -370,7 +370,6 @@ public class UnifiedGenotyperEngine {
generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext)); generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
} }
AFresult.reset();
afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model), AFresult); afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model), AFresult);
// is the most likely frequency conformation AC=0 for all alternate alleles? // is the most likely frequency conformation AC=0 for all alternate alleles?
@ -477,7 +476,6 @@ public class UnifiedGenotyperEngine {
// the forward lod // the forward lod
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap); VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
AFresult.reset();
afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult); afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult);
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
double forwardLog10PofNull = AFresult.getLog10PosteriorOfAFzero(); double forwardLog10PofNull = AFresult.getLog10PosteriorOfAFzero();
@ -486,7 +484,6 @@ public class UnifiedGenotyperEngine {
// the reverse lod // the reverse lod
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap); VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
AFresult.reset();
afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult); afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult);
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
double reverseLog10PofNull = AFresult.getLog10PosteriorOfAFzero(); double reverseLog10PofNull = AFresult.getLog10PosteriorOfAFzero();

View File

@ -1,46 +1,85 @@
package org.broadinstitute.sting.gatk.walkers.genotyper; package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.BeforeSuite; import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider; import org.testng.annotations.DataProvider;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.List; import java.util.List;
public class ExactAFCalculationModelUnitTest extends BaseTest { public class ExactAFCalculationModelUnitTest extends BaseTest {
static Allele A = Allele.create("A", true);
static Allele C = Allele.create("C");
static Allele G = Allele.create("G");
static Allele T = Allele.create("T");
static double[] AA1, AB1, BB1; static int sampleNameCounter = 0;
static double[] AA2, AB2, AC2, BB2, BC2, CC2; static Genotype AA1, AB1, BB1;
static final int numSamples = 3; static Genotype AA2, AB2, AC2, BB2, BC2, CC2;
static double[] priors = new double[2*numSamples+1]; // flat priors final double[] FLAT_3SAMPLE_PRIORS = new double[2*3+1]; // flat priors
@BeforeSuite @BeforeSuite
public void before() { public void before() {
AA1 = new double[]{0.0, -20.0, -20.0}; AA1 = makePL(Arrays.asList(A, A), 0, 20, 20);
AB1 = new double[]{-20.0, 0.0, -20.0}; AB1 = makePL(Arrays.asList(A, C), 20, 0, 20);
BB1 = new double[]{-20.0, -20.0, 0.0}; BB1 = makePL(Arrays.asList(C, C), 20, 20, 0);
AA2 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0};
AB2 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0}; AA2 = makePL(Arrays.asList(A, A), 0, 20, 20, 20, 20, 20);
AC2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0}; AB2 = makePL(Arrays.asList(A, C), 20, 0, 20, 20, 20, 20);
BB2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0}; BB2 = makePL(Arrays.asList(C, C), 20, 20, 0, 20, 20, 20);
BC2 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0, -20.0}; AC2 = makePL(Arrays.asList(A, G), 20, 20, 20, 0, 20, 20);
CC2 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, 0.0}; BC2 = makePL(Arrays.asList(C, G), 20, 20, 20, 20, 0, 20);
CC2 = makePL(Arrays.asList(G, G), 20, 20, 20, 20, 20, 0);
}
private Genotype makePL(final List<Allele> expectedGT, int ... pls) {
GenotypeBuilder gb = new GenotypeBuilder("sample" + sampleNameCounter++);
gb.alleles(expectedGT);
gb.PL(pls);
return gb.make();
} }
private class GetGLsTest extends TestDataProvider { private class GetGLsTest extends TestDataProvider {
GenotypesContext GLs; GenotypesContext GLs;
int numAltAlleles; int numAltAlleles;
String name; final ExactAFCalculation calc;
final int[] expectedACs;
final double[] priors;
private GetGLsTest(String name, int numAltAlleles, Genotype... arg) { private GetGLsTest(final ExactAFCalculation calculation, int numAltAlleles, List<Genotype> arg, final double[] priors) {
super(GetGLsTest.class, name); super(GetGLsTest.class);
GLs = GenotypesContext.create(arg); GLs = GenotypesContext.create(new ArrayList<Genotype>(arg));
this.name = name;
this.numAltAlleles = numAltAlleles; this.numAltAlleles = numAltAlleles;
this.calc = calculation;
this.priors = priors;
expectedACs = new int[numAltAlleles+1];
for ( int alleleI = 0; alleleI < expectedACs.length; alleleI++ ) {
expectedACs[alleleI] = 0;
final Allele allele = getAlleles().get(alleleI);
for ( Genotype g : arg ) {
expectedACs[alleleI] += Collections.frequency(g.getAlleles(), allele);
}
}
}
public AlleleFrequencyCalculationResult execute() {
return getCalc().getLog10PNonRef(getVC(), getPriors());
}
public double[] getPriors() {
return priors;
}
public ExactAFCalculation getCalc() {
return calc;
} }
public VariantContext getVC() { public VariantContext getVC() {
@ -56,51 +95,66 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
Allele.create("T")).subList(0, numAltAlleles+1); Allele.create("T")).subList(0, numAltAlleles+1);
} }
public boolean isNonRef() {
return expectedACs[0] < getVC().getNSamples() * 2;
}
public int getExpectedAltAC(final int alleleI) {
return expectedACs[alleleI+1];
}
public String toString() { public String toString() {
return String.format("%s input=%s", super.toString(), GLs); return String.format("%s model=%s input=%s", super.toString(), calc.getClass().getSimpleName(), GLs);
} }
} }
private static Genotype createGenotype(String name, double[] gls) { @DataProvider(name = "wellFormedGLs")
return new GenotypeBuilder(name, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).PL(gls).make(); public Object[][] createSimpleGLsData() {
} final List<Genotype> biAllelicSamples = Arrays.asList(AA1, AB1, BB1);
final List<Genotype> triAllelicSamples = Arrays.asList(AA2, AB2, BB2, AC2, BC2, CC2);
@DataProvider(name = "getGLs") for ( final int nSamples : Arrays.asList(1, 2, 3, 4) ) {
public Object[][] createGLsData() { final DiploidExactAFCalculation diploidCalc = new DiploidExactAFCalculation(nSamples, 4);
final GeneralPloidyExactAFCalculation generalCalc = new GeneralPloidyExactAFCalculation(nSamples, 4, 2);
final double[] priors = new double[2*nSamples+1]; // flat priors
// bi-allelic case for ( ExactAFCalculation model : Arrays.asList(diploidCalc, generalCalc) ) {
new GetGLsTest("B0", 1, createGenotype("AA1", AA1), createGenotype("AA2", AA1), createGenotype("AA3", AA1)); // bi-allelic
new GetGLsTest("B1", 1, createGenotype("AA1", AA1), createGenotype("AA2", AA1), createGenotype("AB", AB1)); if ( nSamples <= biAllelicSamples.size() )
new GetGLsTest("B2", 1, createGenotype("AA1", AA1), createGenotype("BB", BB1), createGenotype("AA2", AA1)); for ( List<Genotype> genotypes : Utils.makePermutations(biAllelicSamples, nSamples, true) )
new GetGLsTest("B3a", 1, createGenotype("AB", AB1), createGenotype("AA", AA1), createGenotype("BB", BB1)); new GetGLsTest(model, 1, genotypes, priors);
new GetGLsTest("B3b", 1, createGenotype("AB1", AB1), createGenotype("AB2", AB1), createGenotype("AB3", AB1));
new GetGLsTest("B4", 1, createGenotype("BB1", BB1), createGenotype("BB2", BB1), createGenotype("AA", AA1));
new GetGLsTest("B5", 1, createGenotype("BB1", BB1), createGenotype("AB", AB1), createGenotype("BB2", BB1));
new GetGLsTest("B6", 1, createGenotype("BB1", BB1), createGenotype("BB2", BB1), createGenotype("BB3", BB1));
// tri-allelic case // tri-allelic
new GetGLsTest("B1C0", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("AB", AB2)); for ( List<Genotype> genotypes : Utils.makePermutations(triAllelicSamples, nSamples, true) )
new GetGLsTest("B0C1", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("AC", AC2)); new GetGLsTest(model, 2, genotypes, priors);
new GetGLsTest("B1C1a", 2, createGenotype("AA", AA2), createGenotype("AB", AB2), createGenotype("AC", AC2)); }
new GetGLsTest("B1C1b", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("BC", BC2)); }
new GetGLsTest("B2C1", 2, createGenotype("AB1", AB2), createGenotype("AB2", AB2), createGenotype("AC", AC2));
new GetGLsTest("B3C2a", 2, createGenotype("AB", AB2), createGenotype("BC1", BC2), createGenotype("BC2", BC2));
new GetGLsTest("B3C2b", 2, createGenotype("AB", AB2), createGenotype("BB", BB2), createGenotype("CC", CC2));
return GetGLsTest.getTests(GetGLsTest.class); return GetGLsTest.getTests(GetGLsTest.class);
} }
@Test(dataProvider = "getGLs") @Test(dataProvider = "wellFormedGLs")
public void testGLs(GetGLsTest cfg) { public void testGLs(GetGLsTest cfg) {
final AlleleFrequencyCalculationResult result = cfg.execute();
final DiploidExactAFCalculation afCalculation = new DiploidExactAFCalculation(cfg.getVC().getNSamples(), cfg.numAltAlleles); if ( cfg.isNonRef() ) {
final AlleleFrequencyCalculationResult result = afCalculation.getLog10PNonRef(cfg.getVC(), priors); //logger.warn("pNonRef = " + result.getLog10PosteriorOfAFzero());
Assert.assertTrue(result.getLog10PosteriorOfAFzero() < -1, "Genotypes imply pNonRef > 0 but we had posterior AF = 0 of " + result.getLog10PosteriorOfAFzero());
int nameIndex = 1; // TODO -- why does this fail?
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) { //Assert.assertTrue(result.getLog10PosteriorsMatrixSumWithoutAFzero() > -1, "Genotypes imply pNonRef > 0 but posterior sum over all non-AF0 fields was only " + result.getLog10PosteriorsMatrixSumWithoutAFzero());
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
int calculatedAlleleCount = result.getAlleleCountsOfMAP()[allele]; // todo -- I'm not sure this is supposed to be true
//Assert.assertEquals(Math.pow(10, result.getLog10PosteriorOfAFzero()) + Math.pow(10, result.getLog10PosteriorsMatrixSumWithoutAFzero()), 1.0, 1e-3, "Total posterior prob didn't sum to 1");
}
Assert.assertNotNull(result.getAllelesUsedInGenotyping());
Assert.assertTrue(cfg.getAlleles().containsAll(result.getAllelesUsedInGenotyping()), "Result object has alleles not in our initial allele list");
for ( int altAlleleI = 0; altAlleleI < cfg.numAltAlleles; altAlleleI++ ) {
int expectedAlleleCount = cfg.getExpectedAltAC(altAlleleI);
int calculatedAlleleCount = result.getAlleleCountsOfMAP()[altAlleleI];
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount); Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
} }
@ -108,12 +162,10 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
@Test @Test
public void testLargeGLs() { public void testLargeGLs() {
final Genotype BB = makePL(Arrays.asList(C, C), 20000000, 20000000, 0);
GetGLsTest cfg = new GetGLsTest(new DiploidExactAFCalculation(1, 1), 1, Arrays.asList(BB, BB, BB), FLAT_3SAMPLE_PRIORS);
final double[] BB = new double[]{-20000000.0, -20000000.0, 0.0}; final AlleleFrequencyCalculationResult result = cfg.execute();
GetGLsTest cfg = new GetGLsTest("B6", 1, createGenotype("1", BB), createGenotype("2", BB), createGenotype("3", BB));
final DiploidExactAFCalculation afCalculation = new DiploidExactAFCalculation(1, 1);
final AlleleFrequencyCalculationResult result = afCalculation.getLog10PNonRef(cfg.getVC(), priors);
int calculatedAlleleCount = result.getAlleleCountsOfMAP()[0]; int calculatedAlleleCount = result.getAlleleCountsOfMAP()[0];
Assert.assertEquals(calculatedAlleleCount, 6); Assert.assertEquals(calculatedAlleleCount, 6);
@ -121,13 +173,11 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
@Test @Test
public void testMismatchedGLs() { public void testMismatchedGLs() {
final Genotype AB = makePL(Arrays.asList(A,C), 2000, 0, 2000, 2000, 2000, 2000);
final Genotype AC = makePL(Arrays.asList(A,G), 100, 100, 100, 0, 100, 100);
GetGLsTest cfg = new GetGLsTest(new DiploidExactAFCalculation(2, 2), 2, Arrays.asList(AB, AC), FLAT_3SAMPLE_PRIORS);
final double[] AB = new double[]{-2000.0, 0.0, -2000.0, -2000.0, -2000.0, -2000.0}; final AlleleFrequencyCalculationResult result = cfg.execute();
final double[] AC = new double[]{-100.0, -100.0, -100.0, 0.0, -100.0, -100.0};
GetGLsTest cfg = new GetGLsTest("B1C1", 2, createGenotype("1", AC), createGenotype("2", AB));
final DiploidExactAFCalculation afCalculation = new DiploidExactAFCalculation(2, 2);
final AlleleFrequencyCalculationResult result = afCalculation.getLog10PNonRef(cfg.getVC(), priors);
Assert.assertEquals(result.getAlleleCountsOfMAP()[0], 1); Assert.assertEquals(result.getAlleleCountsOfMAP()[0], 1);
Assert.assertEquals(result.getAlleleCountsOfMAP()[1], 1); Assert.assertEquals(result.getAlleleCountsOfMAP()[1], 1);