The old bi-allelic implementation of the Exact model has been completely deprecated - you can only use the multi-allelic implementation now.
This commit is contained in:
parent
044f211a30
commit
7c4b9338ad
|
|
@ -39,12 +39,9 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||||
|
|
||||||
private final boolean USE_MULTI_ALLELIC_CALCULATION;
|
|
||||||
|
|
||||||
|
|
||||||
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||||
super(UAC, N, logger, verboseWriter);
|
super(UAC, N, logger, verboseWriter);
|
||||||
USE_MULTI_ALLELIC_CALCULATION = UAC.MULTI_ALLELIC;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void getLog10PNonRef(final GenotypesContext GLs,
|
public void getLog10PNonRef(final GenotypesContext GLs,
|
||||||
|
|
@ -54,10 +51,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
final double[][] log10AlleleFrequencyPosteriors) {
|
final double[][] log10AlleleFrequencyPosteriors) {
|
||||||
final int numAlleles = alleles.size();
|
final int numAlleles = alleles.size();
|
||||||
|
|
||||||
if ( USE_MULTI_ALLELIC_CALCULATION )
|
//linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
|
||||||
linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false);
|
linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false);
|
||||||
else
|
|
||||||
linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
||||||
|
|
@ -77,120 +72,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Linearized, ~O(N), implementation.
|
|
||||||
//
|
|
||||||
// -------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A simple data structure that holds the current, prev, and prev->prev likelihoods vectors
|
|
||||||
* for the exact model calculation
|
|
||||||
*/
|
|
||||||
private final static class ExactACCache {
|
|
||||||
double[] kMinus2, kMinus1, kMinus0;
|
|
||||||
|
|
||||||
private final static double[] create(int n) {
|
|
||||||
return new double[n];
|
|
||||||
}
|
|
||||||
|
|
||||||
public ExactACCache(int n) {
|
|
||||||
kMinus2 = create(n);
|
|
||||||
kMinus1 = create(n);
|
|
||||||
kMinus0 = create(n);
|
|
||||||
}
|
|
||||||
|
|
||||||
final public void rotate() {
|
|
||||||
double[] tmp = kMinus2;
|
|
||||||
kMinus2 = kMinus1;
|
|
||||||
kMinus1 = kMinus0;
|
|
||||||
kMinus0 = tmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
final public double[] getkMinus2() {
|
|
||||||
return kMinus2;
|
|
||||||
}
|
|
||||||
|
|
||||||
final public double[] getkMinus1() {
|
|
||||||
return kMinus1;
|
|
||||||
}
|
|
||||||
|
|
||||||
final public double[] getkMinus0() {
|
|
||||||
return kMinus0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public int linearExact(GenotypesContext GLs,
|
|
||||||
double[] log10AlleleFrequencyPriors,
|
|
||||||
double[][] log10AlleleFrequencyLikelihoods,
|
|
||||||
double[][] log10AlleleFrequencyPosteriors) {
|
|
||||||
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
|
|
||||||
final int numSamples = genotypeLikelihoods.size()-1;
|
|
||||||
final int numChr = 2*numSamples;
|
|
||||||
|
|
||||||
final ExactACCache logY = new ExactACCache(numSamples+1);
|
|
||||||
logY.getkMinus0()[0] = 0.0; // the zero case
|
|
||||||
|
|
||||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
|
||||||
boolean done = false;
|
|
||||||
int lastK = -1;
|
|
||||||
|
|
||||||
for (int k=0; k <= numChr && ! done; k++ ) {
|
|
||||||
final double[] kMinus0 = logY.getkMinus0();
|
|
||||||
|
|
||||||
if ( k == 0 ) { // special case for k = 0
|
|
||||||
for ( int j=1; j <= numSamples; j++ ) {
|
|
||||||
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[0];
|
|
||||||
}
|
|
||||||
} else { // k > 0
|
|
||||||
final double[] kMinus1 = logY.getkMinus1();
|
|
||||||
final double[] kMinus2 = logY.getkMinus2();
|
|
||||||
|
|
||||||
for ( int j=1; j <= numSamples; j++ ) {
|
|
||||||
final double[] gl = genotypeLikelihoods.get(j);
|
|
||||||
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
|
||||||
|
|
||||||
double aa = Double.NEGATIVE_INFINITY;
|
|
||||||
double ab = Double.NEGATIVE_INFINITY;
|
|
||||||
if (k < 2*j-1)
|
|
||||||
aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[0];
|
|
||||||
|
|
||||||
if (k < 2*j)
|
|
||||||
ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[1];
|
|
||||||
|
|
||||||
double log10Max;
|
|
||||||
if (k > 1) {
|
|
||||||
final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[2];
|
|
||||||
log10Max = approximateLog10SumLog10(aa, ab, bb);
|
|
||||||
} else {
|
|
||||||
// we know we aren't considering the BB case, so we can use an optimized log10 function
|
|
||||||
log10Max = approximateLog10SumLog10(aa, ab);
|
|
||||||
}
|
|
||||||
|
|
||||||
// finally, update the L(j,k) value
|
|
||||||
kMinus0[j] = log10Max - logDenominator;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// update the posteriors vector
|
|
||||||
final double log10LofK = kMinus0[numSamples];
|
|
||||||
log10AlleleFrequencyLikelihoods[0][k] = log10LofK;
|
|
||||||
log10AlleleFrequencyPosteriors[0][k] = log10LofK + log10AlleleFrequencyPriors[k];
|
|
||||||
|
|
||||||
// can we abort early?
|
|
||||||
lastK = k;
|
|
||||||
maxLog10L = Math.max(maxLog10L, log10LofK);
|
|
||||||
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
|
||||||
if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
|
|
||||||
done = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
logY.rotate();
|
|
||||||
}
|
|
||||||
|
|
||||||
return lastK;
|
|
||||||
}
|
|
||||||
|
|
||||||
final static double approximateLog10SumLog10(double[] vals) {
|
final static double approximateLog10SumLog10(double[] vals) {
|
||||||
if ( vals.length < 2 )
|
if ( vals.length < 2 )
|
||||||
throw new ReviewedStingException("Passing array with fewer than 2 values when computing approximateLog10SumLog10");
|
throw new ReviewedStingException("Passing array with fewer than 2 values when computing approximateLog10SumLog10");
|
||||||
|
|
@ -201,10 +82,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
return approx;
|
return approx;
|
||||||
}
|
}
|
||||||
|
|
||||||
final static double approximateLog10SumLog10(double a, double b, double c) {
|
|
||||||
return approximateLog10SumLog10(approximateLog10SumLog10(a, b), c);
|
|
||||||
}
|
|
||||||
|
|
||||||
final static double approximateLog10SumLog10(double small, double big) {
|
final static double approximateLog10SumLog10(double small, double big) {
|
||||||
// make sure small is really the smaller value
|
// make sure small is really the smaller value
|
||||||
if ( small > big ) {
|
if ( small > big ) {
|
||||||
|
|
@ -579,4 +456,126 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
return coeff;
|
return coeff;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Deprecated bi-allelic ~O(N) implementation. Kept here for posterity.
|
||||||
|
//
|
||||||
|
// -------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple data structure that holds the current, prev, and prev->prev likelihoods vectors
|
||||||
|
* for the exact model calculation
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
private final static class ExactACCache {
|
||||||
|
double[] kMinus2, kMinus1, kMinus0;
|
||||||
|
|
||||||
|
private final static double[] create(int n) {
|
||||||
|
return new double[n];
|
||||||
|
}
|
||||||
|
|
||||||
|
public ExactACCache(int n) {
|
||||||
|
kMinus2 = create(n);
|
||||||
|
kMinus1 = create(n);
|
||||||
|
kMinus0 = create(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
final public void rotate() {
|
||||||
|
double[] tmp = kMinus2;
|
||||||
|
kMinus2 = kMinus1;
|
||||||
|
kMinus1 = kMinus0;
|
||||||
|
kMinus0 = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
final public double[] getkMinus2() {
|
||||||
|
return kMinus2;
|
||||||
|
}
|
||||||
|
|
||||||
|
final public double[] getkMinus1() {
|
||||||
|
return kMinus1;
|
||||||
|
}
|
||||||
|
|
||||||
|
final public double[] getkMinus0() {
|
||||||
|
return kMinus0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int linearExact(GenotypesContext GLs,
|
||||||
|
double[] log10AlleleFrequencyPriors,
|
||||||
|
double[][] log10AlleleFrequencyLikelihoods,
|
||||||
|
double[][] log10AlleleFrequencyPosteriors) {
|
||||||
|
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
|
||||||
|
final int numSamples = genotypeLikelihoods.size()-1;
|
||||||
|
final int numChr = 2*numSamples;
|
||||||
|
|
||||||
|
final ExactACCache logY = new ExactACCache(numSamples+1);
|
||||||
|
logY.getkMinus0()[0] = 0.0; // the zero case
|
||||||
|
|
||||||
|
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||||
|
boolean done = false;
|
||||||
|
int lastK = -1;
|
||||||
|
|
||||||
|
for (int k=0; k <= numChr && ! done; k++ ) {
|
||||||
|
final double[] kMinus0 = logY.getkMinus0();
|
||||||
|
|
||||||
|
if ( k == 0 ) { // special case for k = 0
|
||||||
|
for ( int j=1; j <= numSamples; j++ ) {
|
||||||
|
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[0];
|
||||||
|
}
|
||||||
|
} else { // k > 0
|
||||||
|
final double[] kMinus1 = logY.getkMinus1();
|
||||||
|
final double[] kMinus2 = logY.getkMinus2();
|
||||||
|
|
||||||
|
for ( int j=1; j <= numSamples; j++ ) {
|
||||||
|
final double[] gl = genotypeLikelihoods.get(j);
|
||||||
|
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
||||||
|
|
||||||
|
double aa = Double.NEGATIVE_INFINITY;
|
||||||
|
double ab = Double.NEGATIVE_INFINITY;
|
||||||
|
if (k < 2*j-1)
|
||||||
|
aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[0];
|
||||||
|
|
||||||
|
if (k < 2*j)
|
||||||
|
ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[1];
|
||||||
|
|
||||||
|
double log10Max;
|
||||||
|
if (k > 1) {
|
||||||
|
final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[2];
|
||||||
|
log10Max = approximateLog10SumLog10(aa, ab, bb);
|
||||||
|
} else {
|
||||||
|
// we know we aren't considering the BB case, so we can use an optimized log10 function
|
||||||
|
log10Max = approximateLog10SumLog10(aa, ab);
|
||||||
|
}
|
||||||
|
|
||||||
|
// finally, update the L(j,k) value
|
||||||
|
kMinus0[j] = log10Max - logDenominator;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// update the posteriors vector
|
||||||
|
final double log10LofK = kMinus0[numSamples];
|
||||||
|
log10AlleleFrequencyLikelihoods[0][k] = log10LofK;
|
||||||
|
log10AlleleFrequencyPosteriors[0][k] = log10LofK + log10AlleleFrequencyPriors[k];
|
||||||
|
|
||||||
|
// can we abort early?
|
||||||
|
lastK = k;
|
||||||
|
maxLog10L = Math.max(maxLog10L, log10LofK);
|
||||||
|
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||||
|
if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
|
||||||
|
done = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
logY.rotate();
|
||||||
|
}
|
||||||
|
|
||||||
|
return lastK;
|
||||||
|
}
|
||||||
|
|
||||||
|
final static double approximateLog10SumLog10(double a, double b, double c) {
|
||||||
|
return approximateLog10SumLog10(approximateLog10SumLog10(a, b), c);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -15,9 +15,9 @@ import java.util.Map;
|
||||||
|
|
||||||
public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
private final static String baseCommand = "-T UnifiedGenotyper --multiallelic -R " + b36KGReference + " -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129;
|
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129;
|
||||||
private final static String baseCommandIndels = "-T UnifiedGenotyper --multiallelic -R " + b36KGReference + " -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b36dbSNP129;
|
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b36dbSNP129;
|
||||||
private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper --multiallelic -R " + b37KGReference + " -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
|
private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue