Cleanup Exact model, better unit tests
-- Added combinatorial unit tests for both Diploid and General (in diploid-case) for 2 and 3 alleles in all combinations of sample types (i.e., AA, AB, BB and equiv. for tri-allelic). More assert statements to ensure quality of the result. -- Added docs (DOCUMENT YOUR CODE!) to AlleleFrequencyCalculationResult, with proper input error handling and contracts. Made mutation functions all protected -- No longer need to call reset on your AlleleFrequencyCalculationResult -- it'd done for you in the calculation function. reset is a protected method now, so it's all cleaner and nicer this way -- TODO still -- need to add edge-case tests for non-informative samples (0,0,0), for the impact of priors, and I need to add some way to test the result of the pNonRef
This commit is contained in:
parent
3e01a76590
commit
de941ddbbe
|
|
@ -36,7 +36,6 @@ import java.util.*;
|
|||
|
||||
public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
||||
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
|
||||
final protected UnifiedArgumentCollection UAC;
|
||||
|
||||
private final int ploidy;
|
||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||
|
|
@ -45,8 +44,11 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
|||
protected GeneralPloidyExactAFCalculation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
super(UAC, N, logger, verboseWriter);
|
||||
ploidy = UAC.samplePloidy;
|
||||
this.UAC = UAC;
|
||||
}
|
||||
|
||||
public GeneralPloidyExactAFCalculation(final int nSamples, final int maxAltAlleles, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, false, null, null, null);
|
||||
this.ploidy = ploidy;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -63,7 +65,6 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
|||
builder.alleles(alleles);
|
||||
builder.genotypes(subsetAlleles(vc, alleles, false, ploidy));
|
||||
return builder.make();
|
||||
|
||||
} else {
|
||||
return vc;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
|
|||
*
|
||||
* Allocates a new results object. Useful for testing but slow in practice.
|
||||
*/
|
||||
public AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc,
|
||||
public final AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors) {
|
||||
return getLog10PNonRef(vc, log10AlleleFrequencyPriors, new AlleleFrequencyCalculationResult(MAX_ALTERNATE_ALLELES_TO_GENOTYPE));
|
||||
}
|
||||
|
|
@ -113,15 +113,17 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
|
|||
* @param result a pre-allocated (for efficiency) object to hold the result of the calculation
|
||||
* @return result (for programming convenience)
|
||||
*/
|
||||
public AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
public final AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null");
|
||||
if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null");
|
||||
if ( result == null ) throw new IllegalArgumentException("Results object cannot be null");
|
||||
|
||||
// reset the result, so we can store our new result there
|
||||
result.reset();
|
||||
|
||||
final VariantContext vcWorking = reduceScope(vc);
|
||||
result.setAllelesUsedInGenotyping(vcWorking.getAlleles());
|
||||
|
||||
callTimer.start();
|
||||
computeLog10PNonRef(vcWorking, log10AlleleFrequencyPriors, result);
|
||||
|
|
@ -130,6 +132,7 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
|
|||
if ( callReport != null )
|
||||
printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, result.getLog10PosteriorOfAFzero());
|
||||
|
||||
result.setAllelesUsedInGenotyping(vcWorking.getAlleles());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
|
|
@ -39,7 +40,6 @@ import java.util.List;
|
|||
* Useful helper class to communicate the results of the allele frequency calculation
|
||||
*/
|
||||
public class AlleleFrequencyCalculationResult {
|
||||
|
||||
// These variables are intended to contain the MLE and MAP (and their corresponding allele counts) of the site over all alternate alleles
|
||||
private double log10MLE;
|
||||
private double log10MAP;
|
||||
|
|
@ -56,22 +56,77 @@ public class AlleleFrequencyCalculationResult {
|
|||
private double log10LikelihoodOfAFzero;
|
||||
private double log10PosteriorOfAFzero;
|
||||
|
||||
private List<Allele> allelesUsedInGenotyping;
|
||||
/**
|
||||
* The list of alleles actually used in computing the AF
|
||||
*/
|
||||
private List<Allele> allelesUsedInGenotyping = null;
|
||||
|
||||
/**
|
||||
* Create a results object capability of storing results for calls with up to maxAltAlleles
|
||||
*
|
||||
* @param maxAltAlleles an integer >= 1
|
||||
*/
|
||||
public AlleleFrequencyCalculationResult(final int maxAltAlleles) {
|
||||
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be >= 0, saw " + maxAltAlleles);
|
||||
|
||||
alleleCountsOfMLE = new int[maxAltAlleles];
|
||||
alleleCountsOfMAP = new int[maxAltAlleles];
|
||||
|
||||
reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the log10 value of the probability mass at the MLE
|
||||
*
|
||||
* @return a log10 prob
|
||||
*/
|
||||
@Ensures("result < 0")
|
||||
public double getLog10MLE() {
|
||||
return log10MLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the log10 value of the probability mass at the max. a posterior (MAP)
|
||||
*
|
||||
* @return a log10 prob
|
||||
*/
|
||||
@Ensures("result < 0")
|
||||
public double getLog10MAP() {
|
||||
return log10MAP;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a vector with maxAltAlleles values containing AC values at the MLE
|
||||
*
|
||||
* The values of the ACs for this call are stored in the getAllelesUsedInGenotyping order,
|
||||
* starting from index 0 (i.e., the first alt allele is at 0). The vector is always
|
||||
* maxAltAlleles in length, and so only the first getAllelesUsedInGenotyping.size() - 1 values
|
||||
* are meaningful.
|
||||
*
|
||||
* @return a vector with allele counts, not all of which may be meaningful
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public int[] getAlleleCountsOfMLE() {
|
||||
return alleleCountsOfMLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a vector with maxAltAlleles values containing AC values at the MAP
|
||||
*
|
||||
* @see #getAlleleCountsOfMLE() for the encoding of results in this vector
|
||||
*
|
||||
* @return a non-null vector of ints
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public int[] getAlleleCountsOfMAP() {
|
||||
return alleleCountsOfMAP;
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO -- eric what is this supposed to return? my unit tests don't do what I think they should
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public double getLog10PosteriorsMatrixSumWithoutAFzero() {
|
||||
if ( log10PosteriorMatrixSum == null ) {
|
||||
log10PosteriorMatrixSum = MathUtils.log10sumLog10(log10PosteriorMatrixValues, 0, currentPosteriorsCacheIndex);
|
||||
|
|
@ -79,23 +134,53 @@ public class AlleleFrequencyCalculationResult {
|
|||
return log10PosteriorMatrixSum;
|
||||
}
|
||||
|
||||
public int[] getAlleleCountsOfMLE() {
|
||||
return alleleCountsOfMLE;
|
||||
}
|
||||
|
||||
public int[] getAlleleCountsOfMAP() {
|
||||
return alleleCountsOfMAP;
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO -- eric what is this supposed to return? my unit tests don't do what I think they should
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public double getLog10LikelihoodOfAFzero() {
|
||||
return log10LikelihoodOfAFzero;
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO -- eric what is this supposed to return? my unit tests don't do what I think they should
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public double getLog10PosteriorOfAFzero() {
|
||||
return log10PosteriorOfAFzero;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
/**
|
||||
* Get the list of alleles actually used in genotyping.
|
||||
*
|
||||
* Due to computational / implementation constraints this may be smaller than
|
||||
* the actual list of alleles requested
|
||||
*
|
||||
* @return a non-empty list of alleles used during genotyping
|
||||
*/
|
||||
@Ensures({"result != null", "! result.isEmpty()"})
|
||||
public List<Allele> getAllelesUsedInGenotyping() {
|
||||
if ( allelesUsedInGenotyping == null )
|
||||
throw new IllegalStateException("allelesUsedInGenotyping requested but not yet set");
|
||||
|
||||
return allelesUsedInGenotyping;
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Protected mutational methods only for use within the calculation models themselves
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Reset the data in this results object, so that it can be used in a subsequent AF calculation
|
||||
*
|
||||
* Resetting of the data is done by the calculation model itself, so shouldn't be done by callers any longer
|
||||
*/
|
||||
protected void reset() {
|
||||
log10MLE = log10MAP = log10LikelihoodOfAFzero = log10PosteriorOfAFzero = AlleleFrequencyCalculation.VALUE_NOT_CALCULATED;
|
||||
for ( int i = 0; i < alleleCountsOfMLE.length; i++ ) {
|
||||
alleleCountsOfMLE[i] = 0;
|
||||
|
|
@ -106,7 +191,7 @@ public class AlleleFrequencyCalculationResult {
|
|||
allelesUsedInGenotyping = null;
|
||||
}
|
||||
|
||||
public void updateMLEifNeeded(final double log10LofK, final int[] alleleCountsForK) {
|
||||
protected void updateMLEifNeeded(final double log10LofK, final int[] alleleCountsForK) {
|
||||
if ( log10LofK > log10MLE ) {
|
||||
log10MLE = log10LofK;
|
||||
for ( int i = 0; i < alleleCountsForK.length; i++ )
|
||||
|
|
@ -114,7 +199,7 @@ public class AlleleFrequencyCalculationResult {
|
|||
}
|
||||
}
|
||||
|
||||
public void updateMAPifNeeded(final double log10LofK, final int[] alleleCountsForK) {
|
||||
protected void updateMAPifNeeded(final double log10LofK, final int[] alleleCountsForK) {
|
||||
addToPosteriorsCache(log10LofK);
|
||||
|
||||
if ( log10LofK > log10MAP ) {
|
||||
|
|
@ -136,7 +221,7 @@ public class AlleleFrequencyCalculationResult {
|
|||
}
|
||||
}
|
||||
|
||||
public void setLog10LikelihoodOfAFzero(final double log10LikelihoodOfAFzero) {
|
||||
protected void setLog10LikelihoodOfAFzero(final double log10LikelihoodOfAFzero) {
|
||||
this.log10LikelihoodOfAFzero = log10LikelihoodOfAFzero;
|
||||
if ( log10LikelihoodOfAFzero > log10MLE ) {
|
||||
log10MLE = log10LikelihoodOfAFzero;
|
||||
|
|
@ -144,7 +229,7 @@ public class AlleleFrequencyCalculationResult {
|
|||
}
|
||||
}
|
||||
|
||||
public void setLog10PosteriorOfAFzero(final double log10PosteriorOfAFzero) {
|
||||
protected void setLog10PosteriorOfAFzero(final double log10PosteriorOfAFzero) {
|
||||
this.log10PosteriorOfAFzero = log10PosteriorOfAFzero;
|
||||
if ( log10PosteriorOfAFzero > log10MAP ) {
|
||||
log10MAP = log10PosteriorOfAFzero;
|
||||
|
|
@ -152,11 +237,10 @@ public class AlleleFrequencyCalculationResult {
|
|||
}
|
||||
}
|
||||
|
||||
public List<Allele> getAllelesUsedInGenotyping() {
|
||||
return allelesUsedInGenotyping;
|
||||
}
|
||||
protected void setAllelesUsedInGenotyping(List<Allele> allelesUsedInGenotyping) {
|
||||
if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.isEmpty() )
|
||||
throw new IllegalArgumentException("allelesUsedInGenotyping cannot be null or empty");
|
||||
|
||||
public void setAllelesUsedInGenotyping(List<Allele> allelesUsedInGenotyping) {
|
||||
this.allelesUsedInGenotyping = allelesUsedInGenotyping;
|
||||
}
|
||||
}
|
||||
|
|
@ -41,6 +41,14 @@ public class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
super(nSamples, maxAltAlleles, false, null, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Dynamically found in UnifiedGenotyperEngine
|
||||
*
|
||||
* @param UAC
|
||||
* @param N
|
||||
* @param logger
|
||||
* @param verboseWriter
|
||||
*/
|
||||
public DiploidExactAFCalculation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
super(UAC, N, logger, verboseWriter);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -370,7 +370,6 @@ public class UnifiedGenotyperEngine {
|
|||
generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
|
||||
}
|
||||
|
||||
AFresult.reset();
|
||||
afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model), AFresult);
|
||||
|
||||
// is the most likely frequency conformation AC=0 for all alternate alleles?
|
||||
|
|
@ -477,7 +476,6 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// the forward lod
|
||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
||||
AFresult.reset();
|
||||
afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult);
|
||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
double forwardLog10PofNull = AFresult.getLog10PosteriorOfAFzero();
|
||||
|
|
@ -486,7 +484,6 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// the reverse lod
|
||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
||||
AFresult.reset();
|
||||
afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult);
|
||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
double reverseLog10PofNull = AFresult.getLog10PosteriorOfAFzero();
|
||||
|
|
|
|||
|
|
@ -1,46 +1,85 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class ExactAFCalculationModelUnitTest extends BaseTest {
|
||||
static Allele A = Allele.create("A", true);
|
||||
static Allele C = Allele.create("C");
|
||||
static Allele G = Allele.create("G");
|
||||
static Allele T = Allele.create("T");
|
||||
|
||||
static double[] AA1, AB1, BB1;
|
||||
static double[] AA2, AB2, AC2, BB2, BC2, CC2;
|
||||
static final int numSamples = 3;
|
||||
static double[] priors = new double[2*numSamples+1]; // flat priors
|
||||
static int sampleNameCounter = 0;
|
||||
static Genotype AA1, AB1, BB1;
|
||||
static Genotype AA2, AB2, AC2, BB2, BC2, CC2;
|
||||
final double[] FLAT_3SAMPLE_PRIORS = new double[2*3+1]; // flat priors
|
||||
|
||||
@BeforeSuite
|
||||
public void before() {
|
||||
AA1 = new double[]{0.0, -20.0, -20.0};
|
||||
AB1 = new double[]{-20.0, 0.0, -20.0};
|
||||
BB1 = new double[]{-20.0, -20.0, 0.0};
|
||||
AA2 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0};
|
||||
AB2 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0};
|
||||
AC2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0};
|
||||
BB2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0};
|
||||
BC2 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0, -20.0};
|
||||
CC2 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, 0.0};
|
||||
AA1 = makePL(Arrays.asList(A, A), 0, 20, 20);
|
||||
AB1 = makePL(Arrays.asList(A, C), 20, 0, 20);
|
||||
BB1 = makePL(Arrays.asList(C, C), 20, 20, 0);
|
||||
|
||||
AA2 = makePL(Arrays.asList(A, A), 0, 20, 20, 20, 20, 20);
|
||||
AB2 = makePL(Arrays.asList(A, C), 20, 0, 20, 20, 20, 20);
|
||||
BB2 = makePL(Arrays.asList(C, C), 20, 20, 0, 20, 20, 20);
|
||||
AC2 = makePL(Arrays.asList(A, G), 20, 20, 20, 0, 20, 20);
|
||||
BC2 = makePL(Arrays.asList(C, G), 20, 20, 20, 20, 0, 20);
|
||||
CC2 = makePL(Arrays.asList(G, G), 20, 20, 20, 20, 20, 0);
|
||||
}
|
||||
|
||||
private Genotype makePL(final List<Allele> expectedGT, int ... pls) {
|
||||
GenotypeBuilder gb = new GenotypeBuilder("sample" + sampleNameCounter++);
|
||||
gb.alleles(expectedGT);
|
||||
gb.PL(pls);
|
||||
return gb.make();
|
||||
}
|
||||
|
||||
private class GetGLsTest extends TestDataProvider {
|
||||
GenotypesContext GLs;
|
||||
int numAltAlleles;
|
||||
String name;
|
||||
final ExactAFCalculation calc;
|
||||
final int[] expectedACs;
|
||||
final double[] priors;
|
||||
|
||||
private GetGLsTest(String name, int numAltAlleles, Genotype... arg) {
|
||||
super(GetGLsTest.class, name);
|
||||
GLs = GenotypesContext.create(arg);
|
||||
this.name = name;
|
||||
private GetGLsTest(final ExactAFCalculation calculation, int numAltAlleles, List<Genotype> arg, final double[] priors) {
|
||||
super(GetGLsTest.class);
|
||||
GLs = GenotypesContext.create(new ArrayList<Genotype>(arg));
|
||||
this.numAltAlleles = numAltAlleles;
|
||||
this.calc = calculation;
|
||||
this.priors = priors;
|
||||
|
||||
expectedACs = new int[numAltAlleles+1];
|
||||
for ( int alleleI = 0; alleleI < expectedACs.length; alleleI++ ) {
|
||||
expectedACs[alleleI] = 0;
|
||||
final Allele allele = getAlleles().get(alleleI);
|
||||
for ( Genotype g : arg ) {
|
||||
expectedACs[alleleI] += Collections.frequency(g.getAlleles(), allele);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public AlleleFrequencyCalculationResult execute() {
|
||||
return getCalc().getLog10PNonRef(getVC(), getPriors());
|
||||
}
|
||||
|
||||
public double[] getPriors() {
|
||||
return priors;
|
||||
}
|
||||
|
||||
public ExactAFCalculation getCalc() {
|
||||
return calc;
|
||||
}
|
||||
|
||||
public VariantContext getVC() {
|
||||
|
|
@ -56,51 +95,66 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
Allele.create("T")).subList(0, numAltAlleles+1);
|
||||
}
|
||||
|
||||
public boolean isNonRef() {
|
||||
return expectedACs[0] < getVC().getNSamples() * 2;
|
||||
}
|
||||
|
||||
public int getExpectedAltAC(final int alleleI) {
|
||||
return expectedACs[alleleI+1];
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s input=%s", super.toString(), GLs);
|
||||
return String.format("%s model=%s input=%s", super.toString(), calc.getClass().getSimpleName(), GLs);
|
||||
}
|
||||
}
|
||||
|
||||
private static Genotype createGenotype(String name, double[] gls) {
|
||||
return new GenotypeBuilder(name, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).PL(gls).make();
|
||||
}
|
||||
@DataProvider(name = "wellFormedGLs")
|
||||
public Object[][] createSimpleGLsData() {
|
||||
final List<Genotype> biAllelicSamples = Arrays.asList(AA1, AB1, BB1);
|
||||
final List<Genotype> triAllelicSamples = Arrays.asList(AA2, AB2, BB2, AC2, BC2, CC2);
|
||||
|
||||
@DataProvider(name = "getGLs")
|
||||
public Object[][] createGLsData() {
|
||||
for ( final int nSamples : Arrays.asList(1, 2, 3, 4) ) {
|
||||
final DiploidExactAFCalculation diploidCalc = new DiploidExactAFCalculation(nSamples, 4);
|
||||
final GeneralPloidyExactAFCalculation generalCalc = new GeneralPloidyExactAFCalculation(nSamples, 4, 2);
|
||||
final double[] priors = new double[2*nSamples+1]; // flat priors
|
||||
|
||||
// bi-allelic case
|
||||
new GetGLsTest("B0", 1, createGenotype("AA1", AA1), createGenotype("AA2", AA1), createGenotype("AA3", AA1));
|
||||
new GetGLsTest("B1", 1, createGenotype("AA1", AA1), createGenotype("AA2", AA1), createGenotype("AB", AB1));
|
||||
new GetGLsTest("B2", 1, createGenotype("AA1", AA1), createGenotype("BB", BB1), createGenotype("AA2", AA1));
|
||||
new GetGLsTest("B3a", 1, createGenotype("AB", AB1), createGenotype("AA", AA1), createGenotype("BB", BB1));
|
||||
new GetGLsTest("B3b", 1, createGenotype("AB1", AB1), createGenotype("AB2", AB1), createGenotype("AB3", AB1));
|
||||
new GetGLsTest("B4", 1, createGenotype("BB1", BB1), createGenotype("BB2", BB1), createGenotype("AA", AA1));
|
||||
new GetGLsTest("B5", 1, createGenotype("BB1", BB1), createGenotype("AB", AB1), createGenotype("BB2", BB1));
|
||||
new GetGLsTest("B6", 1, createGenotype("BB1", BB1), createGenotype("BB2", BB1), createGenotype("BB3", BB1));
|
||||
for ( ExactAFCalculation model : Arrays.asList(diploidCalc, generalCalc) ) {
|
||||
// bi-allelic
|
||||
if ( nSamples <= biAllelicSamples.size() )
|
||||
for ( List<Genotype> genotypes : Utils.makePermutations(biAllelicSamples, nSamples, true) )
|
||||
new GetGLsTest(model, 1, genotypes, priors);
|
||||
|
||||
// tri-allelic case
|
||||
new GetGLsTest("B1C0", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("AB", AB2));
|
||||
new GetGLsTest("B0C1", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("AC", AC2));
|
||||
new GetGLsTest("B1C1a", 2, createGenotype("AA", AA2), createGenotype("AB", AB2), createGenotype("AC", AC2));
|
||||
new GetGLsTest("B1C1b", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("BC", BC2));
|
||||
new GetGLsTest("B2C1", 2, createGenotype("AB1", AB2), createGenotype("AB2", AB2), createGenotype("AC", AC2));
|
||||
new GetGLsTest("B3C2a", 2, createGenotype("AB", AB2), createGenotype("BC1", BC2), createGenotype("BC2", BC2));
|
||||
new GetGLsTest("B3C2b", 2, createGenotype("AB", AB2), createGenotype("BB", BB2), createGenotype("CC", CC2));
|
||||
// tri-allelic
|
||||
for ( List<Genotype> genotypes : Utils.makePermutations(triAllelicSamples, nSamples, true) )
|
||||
new GetGLsTest(model, 2, genotypes, priors);
|
||||
}
|
||||
}
|
||||
|
||||
return GetGLsTest.getTests(GetGLsTest.class);
|
||||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "getGLs")
|
||||
@Test(dataProvider = "wellFormedGLs")
|
||||
public void testGLs(GetGLsTest cfg) {
|
||||
final AlleleFrequencyCalculationResult result = cfg.execute();
|
||||
|
||||
final DiploidExactAFCalculation afCalculation = new DiploidExactAFCalculation(cfg.getVC().getNSamples(), cfg.numAltAlleles);
|
||||
final AlleleFrequencyCalculationResult result = afCalculation.getLog10PNonRef(cfg.getVC(), priors);
|
||||
if ( cfg.isNonRef() ) {
|
||||
//logger.warn("pNonRef = " + result.getLog10PosteriorOfAFzero());
|
||||
Assert.assertTrue(result.getLog10PosteriorOfAFzero() < -1, "Genotypes imply pNonRef > 0 but we had posterior AF = 0 of " + result.getLog10PosteriorOfAFzero());
|
||||
|
||||
int nameIndex = 1;
|
||||
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
|
||||
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
|
||||
int calculatedAlleleCount = result.getAlleleCountsOfMAP()[allele];
|
||||
// TODO -- why does this fail?
|
||||
//Assert.assertTrue(result.getLog10PosteriorsMatrixSumWithoutAFzero() > -1, "Genotypes imply pNonRef > 0 but posterior sum over all non-AF0 fields was only " + result.getLog10PosteriorsMatrixSumWithoutAFzero());
|
||||
|
||||
// todo -- I'm not sure this is supposed to be true
|
||||
//Assert.assertEquals(Math.pow(10, result.getLog10PosteriorOfAFzero()) + Math.pow(10, result.getLog10PosteriorsMatrixSumWithoutAFzero()), 1.0, 1e-3, "Total posterior prob didn't sum to 1");
|
||||
}
|
||||
|
||||
Assert.assertNotNull(result.getAllelesUsedInGenotyping());
|
||||
Assert.assertTrue(cfg.getAlleles().containsAll(result.getAllelesUsedInGenotyping()), "Result object has alleles not in our initial allele list");
|
||||
|
||||
for ( int altAlleleI = 0; altAlleleI < cfg.numAltAlleles; altAlleleI++ ) {
|
||||
int expectedAlleleCount = cfg.getExpectedAltAC(altAlleleI);
|
||||
int calculatedAlleleCount = result.getAlleleCountsOfMAP()[altAlleleI];
|
||||
|
||||
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
|
||||
}
|
||||
|
|
@ -108,12 +162,10 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testLargeGLs() {
|
||||
final Genotype BB = makePL(Arrays.asList(C, C), 20000000, 20000000, 0);
|
||||
GetGLsTest cfg = new GetGLsTest(new DiploidExactAFCalculation(1, 1), 1, Arrays.asList(BB, BB, BB), FLAT_3SAMPLE_PRIORS);
|
||||
|
||||
final double[] BB = new double[]{-20000000.0, -20000000.0, 0.0};
|
||||
GetGLsTest cfg = new GetGLsTest("B6", 1, createGenotype("1", BB), createGenotype("2", BB), createGenotype("3", BB));
|
||||
|
||||
final DiploidExactAFCalculation afCalculation = new DiploidExactAFCalculation(1, 1);
|
||||
final AlleleFrequencyCalculationResult result = afCalculation.getLog10PNonRef(cfg.getVC(), priors);
|
||||
final AlleleFrequencyCalculationResult result = cfg.execute();
|
||||
|
||||
int calculatedAlleleCount = result.getAlleleCountsOfMAP()[0];
|
||||
Assert.assertEquals(calculatedAlleleCount, 6);
|
||||
|
|
@ -121,13 +173,11 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testMismatchedGLs() {
|
||||
final Genotype AB = makePL(Arrays.asList(A,C), 2000, 0, 2000, 2000, 2000, 2000);
|
||||
final Genotype AC = makePL(Arrays.asList(A,G), 100, 100, 100, 0, 100, 100);
|
||||
GetGLsTest cfg = new GetGLsTest(new DiploidExactAFCalculation(2, 2), 2, Arrays.asList(AB, AC), FLAT_3SAMPLE_PRIORS);
|
||||
|
||||
final double[] AB = new double[]{-2000.0, 0.0, -2000.0, -2000.0, -2000.0, -2000.0};
|
||||
final double[] AC = new double[]{-100.0, -100.0, -100.0, 0.0, -100.0, -100.0};
|
||||
GetGLsTest cfg = new GetGLsTest("B1C1", 2, createGenotype("1", AC), createGenotype("2", AB));
|
||||
|
||||
final DiploidExactAFCalculation afCalculation = new DiploidExactAFCalculation(2, 2);
|
||||
final AlleleFrequencyCalculationResult result = afCalculation.getLog10PNonRef(cfg.getVC(), priors);
|
||||
final AlleleFrequencyCalculationResult result = cfg.execute();
|
||||
|
||||
Assert.assertEquals(result.getAlleleCountsOfMAP()[0], 1);
|
||||
Assert.assertEquals(result.getAlleleCountsOfMAP()[1], 1);
|
||||
|
|
|
|||
Loading…
Reference in New Issue