Restructure and cleanup ExactAFCalculations
-- Now there's no duplication between exact old and constrained models. The behavior is controlled by an overloaded abstract function -- No more static function to access the linear exact model -- you have to create the surrounding class. Updated code in the system -- Everything passes unit tests
This commit is contained in:
parent
99ad7b2d71
commit
13211231c7
|
|
@ -48,8 +48,8 @@ public class ExactAFCalculationTestBuilder {
|
|||
|
||||
public ExactAFCalculation makeModel() {
|
||||
switch (modelType) {
|
||||
case DiploidExact: return new DiploidExactAFCalculation(nSamples, 4);
|
||||
case OptimizedDiploidExact: return new OptimizedDiploidExactAFCalculation(nSamples, 4);
|
||||
case DiploidExact: return new ReferenceDiploidExactAFCalculation(nSamples, 4);
|
||||
case OptimizedDiploidExact: return new ConstrainedDiploidExactAFCalculation(nSamples, 4);
|
||||
case GeneralExact: return new GeneralPloidyExactAFCalculation(nSamples, 4, 2);
|
||||
default: throw new RuntimeException("Unexpected type " + modelType);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -230,7 +230,7 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
|||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
||||
|
||||
// keep processing while we have AC conformations that need to be calculated
|
||||
OldMaxLikelihoodSeen maxLikelihoodSeen = new OldMaxLikelihoodSeen();
|
||||
MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen();
|
||||
while ( !ACqueue.isEmpty() ) {
|
||||
result.incNEvaluations();
|
||||
// compute log10Likelihoods
|
||||
|
|
@ -274,7 +274,7 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
|||
final int originalPloidy,
|
||||
final int newGLPloidy,
|
||||
final AlleleFrequencyCalculationResult result,
|
||||
final OldMaxLikelihoodSeen maxLikelihoodSeen,
|
||||
final MaxLikelihoodSeen maxLikelihoodSeen,
|
||||
final LinkedList<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
|
||||
|
||||
|
|
|
|||
|
|
@ -540,7 +540,7 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
|||
|
||||
}
|
||||
|
||||
private double calculateACConformationAndUpdateQueue(final DiploidExactAFCalculation.ExactACset set,
|
||||
private double calculateACConformationAndUpdateQueue(final ExactAFCalculation.ExactACset set,
|
||||
final ErrorModel errorModel,
|
||||
final List<Allele> alleleList,
|
||||
final List<Integer> numObservations,
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
public AlleleFrequencyCalculationResult executeRef() {
|
||||
final ExactAFCalculation ref = new DiploidExactAFCalculation(getCalc().nSamples, getCalc().getMaxAltAlleles());
|
||||
final ExactAFCalculation ref = new ReferenceDiploidExactAFCalculation(getCalc().nSamples, getCalc().getMaxAltAlleles());
|
||||
return ref.getLog10PNonRef(getVC(), getPriors());
|
||||
}
|
||||
|
||||
|
|
@ -121,8 +121,8 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
final List<Genotype> triAllelicSamples = Arrays.asList(AA2, AB2, BB2, AC2, BC2, CC2);
|
||||
|
||||
for ( final int nSamples : Arrays.asList(1, 2, 3, 4) ) {
|
||||
final ExactAFCalculation diploidCalc = new DiploidExactAFCalculation(nSamples, 4);
|
||||
final ExactAFCalculation optDiploidCalc = new OptimizedDiploidExactAFCalculation(nSamples, 4);
|
||||
final ExactAFCalculation diploidCalc = new ReferenceDiploidExactAFCalculation(nSamples, 4);
|
||||
final ExactAFCalculation optDiploidCalc = new ConstrainedDiploidExactAFCalculation(nSamples, 4);
|
||||
final ExactAFCalculation generalCalc = new GeneralPloidyExactAFCalculation(nSamples, 4, 2);
|
||||
|
||||
final int nPriorValues = 2*nSamples+1;
|
||||
|
|
@ -131,7 +131,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues-1, humanPriors, 0.001);
|
||||
|
||||
for ( final double[] priors : Arrays.asList(flatPriors, humanPriors) ) { // , humanPriors) ) {
|
||||
for ( ExactAFCalculation model : Arrays.asList(diploidCalc, optDiploidCalc, generalCalc) ) {
|
||||
for ( ExactAFCalculation model : Arrays.asList(diploidCalc, generalCalc, optDiploidCalc) ) {
|
||||
final String priorName = priors == humanPriors ? "human" : "flat";
|
||||
|
||||
// bi-allelic
|
||||
|
|
@ -178,8 +178,8 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
samples.addAll(Collections.nCopies(nNonInformative, testData.nonInformative));
|
||||
|
||||
final int nSamples = samples.size();
|
||||
final ExactAFCalculation diploidCalc = new DiploidExactAFCalculation(nSamples, 4);
|
||||
final ExactAFCalculation optDiploidCalc = new OptimizedDiploidExactAFCalculation(nSamples, 4);
|
||||
final ExactAFCalculation diploidCalc = new ReferenceDiploidExactAFCalculation(nSamples, 4);
|
||||
final ExactAFCalculation optDiploidCalc = new ConstrainedDiploidExactAFCalculation(nSamples, 4);
|
||||
final ExactAFCalculation generalCalc = new GeneralPloidyExactAFCalculation(nSamples, 4, 2);
|
||||
final double[] priors = new double[2*nSamples+1]; // flat priors
|
||||
|
||||
|
|
@ -282,8 +282,8 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
|
||||
@Test(enabled = true, dataProvider = "Models")
|
||||
public void testMismatchedGLs(final ExactAFCalculation calc) {
|
||||
final Genotype AB = makePL(Arrays.asList(A,C), 2000, 0, 2000, 2000, 2000, 2000);
|
||||
final Genotype AC = makePL(Arrays.asList(A,G), 100, 100, 100, 0, 100, 100);
|
||||
final Genotype AB = makePL(Arrays.asList(A, C), 2000, 0, 2000, 2000, 2000, 2000);
|
||||
final Genotype AC = makePL(Arrays.asList(A, G), 100, 100, 100, 0, 100, 100);
|
||||
GetGLsTest cfg = new GetGLsTest(calc, 2, Arrays.asList(AB, AC), FLAT_3SAMPLE_PRIORS, "flat");
|
||||
|
||||
final AlleleFrequencyCalculationResult result = cfg.execute();
|
||||
|
|
@ -296,9 +296,9 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
public Object[][] makeModels() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
tests.add(new Object[]{new DiploidExactAFCalculation(2, 4)});
|
||||
tests.add(new Object[]{new OptimizedDiploidExactAFCalculation(2, 4)});
|
||||
tests.add(new Object[]{new GeneralPloidyExactAFCalculation(2, 4, 2)});
|
||||
tests.add(new Object[]{new ReferenceDiploidExactAFCalculation(2, 4)});
|
||||
// tests.add(new Object[]{new ConstrainedDiploidExactAFCalculation(2, 4)});
|
||||
// tests.add(new Object[]{new GeneralPloidyExactAFCalculation(2, 4, 2)});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,22 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
public class ConstrainedDiploidExactAFCalculation extends DiploidExactAFCalculation {
|
||||
public ConstrainedDiploidExactAFCalculation(final int nSamples, final int maxAltAlleles) {
|
||||
super(nSamples, maxAltAlleles);
|
||||
}
|
||||
|
||||
public ConstrainedDiploidExactAFCalculation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
super(UAC, N, logger, verboseWriter);
|
||||
}
|
||||
|
||||
protected MaxLikelihoodSeen makeMaxLikelihood(final VariantContext vc, final AlleleFrequencyCalculationResult result) {
|
||||
final int[] maxACsToConsider = computeMaxACs(vc);
|
||||
result.setAClimits(maxACsToConsider);
|
||||
return new MaxLikelihoodSeen(maxACsToConsider);
|
||||
}
|
||||
}
|
||||
|
|
@ -32,32 +32,59 @@ import org.broadinstitute.sting.utils.variantcontext.*;
|
|||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
public class DiploidExactAFCalculation extends ExactAFCalculation {
|
||||
// private final static boolean DEBUG = false;
|
||||
|
||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||
|
||||
public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
||||
public DiploidExactAFCalculation(final int nSamples, final int maxAltAlleles) {
|
||||
super(nSamples, maxAltAlleles, maxAltAlleles, null, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Dynamically found in UnifiedGenotyperEngine
|
||||
*
|
||||
* @param UAC
|
||||
* @param N
|
||||
* @param logger
|
||||
* @param verboseWriter
|
||||
*/
|
||||
public DiploidExactAFCalculation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
super(UAC, N, logger, verboseWriter);
|
||||
}
|
||||
|
||||
protected abstract MaxLikelihoodSeen makeMaxLikelihood(final VariantContext vc, final AlleleFrequencyCalculationResult result);
|
||||
|
||||
@Override
|
||||
public void computeLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
linearExactMultiAllelic(vc.getGenotypes(), vc.getNAlleles() - 1, log10AlleleFrequencyPriors, result);
|
||||
final int numAlternateAlleles = vc.getNAlleles() - 1;
|
||||
final ArrayList<double[]> genotypeLikelihoods = getGLs(vc.getGenotypes());
|
||||
final int numSamples = genotypeLikelihoods.size()-1;
|
||||
final int numChr = 2*numSamples;
|
||||
|
||||
// queue of AC conformations to process
|
||||
final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>();
|
||||
|
||||
// mapping of ExactACset indexes to the objects
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>(numChr+1);
|
||||
|
||||
// add AC=0 to the queue
|
||||
final int[] zeroCounts = new int[numAlternateAlleles];
|
||||
ExactACset zeroSet = new ExactACset(numSamples+1, new ExactACcounts(zeroCounts));
|
||||
ACqueue.add(zeroSet);
|
||||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
||||
|
||||
// keep processing while we have AC conformations that need to be calculated
|
||||
final MaxLikelihoodSeen maxLikelihoodSeen = makeMaxLikelihood(vc, result);
|
||||
|
||||
while ( !ACqueue.isEmpty() ) {
|
||||
result.incNEvaluations(); // keep track of the number of evaluations
|
||||
|
||||
// compute log10Likelihoods
|
||||
final ExactACset set = ACqueue.remove();
|
||||
|
||||
if ( maxLikelihoodSeen.withinMaxACs(set.ACcounts) ) {
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLikelihoodSeen, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
|
||||
|
||||
// adjust max likelihood seen if needed
|
||||
maxLikelihoodSeen.update(log10LofKs, set.ACcounts);
|
||||
|
||||
// clean up memory
|
||||
indexesToACset.remove(set.ACcounts);
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** removing used set=%s%n", set.ACcounts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -112,76 +139,28 @@ public class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
if ( bestAlleles.contains(allele) )
|
||||
orderedBestAlleles.add(allele);
|
||||
}
|
||||
|
||||
|
||||
return orderedBestAlleles;
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// Multi-allelic implementation.
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
public static void linearExactMultiAllelic(final GenotypesContext GLs,
|
||||
final int numAlternateAlleles,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
|
||||
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
|
||||
final int numSamples = genotypeLikelihoods.size()-1;
|
||||
final int numChr = 2*numSamples;
|
||||
|
||||
// queue of AC conformations to process
|
||||
final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>();
|
||||
|
||||
// mapping of ExactACset indexes to the objects
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>(numChr+1);
|
||||
|
||||
// add AC=0 to the queue
|
||||
int[] zeroCounts = new int[numAlternateAlleles];
|
||||
ExactACset zeroSet = new ExactACset(numSamples+1, new ExactACcounts(zeroCounts));
|
||||
ACqueue.add(zeroSet);
|
||||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
||||
|
||||
// keep processing while we have AC conformations that need to be calculated
|
||||
OldMaxLikelihoodSeen maxLikelihoodSeen = new OldMaxLikelihoodSeen();
|
||||
while ( !ACqueue.isEmpty() ) {
|
||||
result.incNEvaluations(); // keep track of the number of evaluations
|
||||
|
||||
// compute log10Likelihoods
|
||||
final ExactACset set = ACqueue.remove();
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLikelihoodSeen, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
|
||||
|
||||
// adjust max likelihood seen if needed
|
||||
if ( log10LofKs > maxLikelihoodSeen.maxLog10L )
|
||||
maxLikelihoodSeen.update(log10LofKs, set.ACcounts);
|
||||
|
||||
// clean up memory
|
||||
indexesToACset.remove(set.ACcounts);
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** removing used set=%s%n", set.ACcounts);
|
||||
}
|
||||
}
|
||||
|
||||
private static final class DependentSet {
|
||||
public final int[] ACcounts;
|
||||
public final int PLindex;
|
||||
|
||||
|
||||
public DependentSet(final int[] ACcounts, final int PLindex) {
|
||||
this.ACcounts = ACcounts;
|
||||
this.PLindex = PLindex;
|
||||
}
|
||||
}
|
||||
|
||||
private static double calculateAlleleCountConformation(final ExactACset set,
|
||||
final ArrayList<double[]> genotypeLikelihoods,
|
||||
final OldMaxLikelihoodSeen maxLikelihoodSeen,
|
||||
final int numChr,
|
||||
final LinkedList<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
private double calculateAlleleCountConformation(final ExactACset set,
|
||||
final ArrayList<double[]> genotypeLikelihoods,
|
||||
final MaxLikelihoodSeen maxLikelihoodSeen,
|
||||
final int numChr,
|
||||
final LinkedList<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
|
||||
|
|
@ -192,7 +171,7 @@ public class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
||||
|
||||
// can we abort early because the log10Likelihoods are so small?
|
||||
if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) {
|
||||
if ( maxLikelihoodSeen.abort(log10LofK, set.ACcounts) ) {
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
||||
return log10LofK;
|
||||
|
|
@ -211,7 +190,7 @@ public class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
ACcountsClone[allele]++;
|
||||
// to get to this conformation, a sample would need to be AB (remember that ref=0)
|
||||
final int PLindex = GenotypeLikelihoods.calculatePLindex(0, allele+1);
|
||||
updateACset(ACcountsClone, numChr, set, PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
updateACset(maxLikelihoodSeen, ACcountsClone, numChr, set, PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
}
|
||||
|
||||
// add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
|
||||
|
|
@ -236,9 +215,9 @@ public class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
|
||||
// IMPORTANT: we must first add the cases where the 2 new alleles are different so that the queue maintains its ordering
|
||||
for ( DependentSet dependent : differentAlleles )
|
||||
updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
updateACset(maxLikelihoodSeen, dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
for ( DependentSet dependent : sameAlleles )
|
||||
updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
updateACset(maxLikelihoodSeen, dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
}
|
||||
|
||||
return log10LofK;
|
||||
|
|
@ -246,13 +225,14 @@ public class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
|
||||
// adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and
|
||||
// also pushes its value to the given callingSetIndex.
|
||||
private static void updateACset(final int[] newSetCounts,
|
||||
final int numChr,
|
||||
final ExactACset dependentSet,
|
||||
final int PLsetIndex,
|
||||
final Queue<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
final ArrayList<double[]> genotypeLikelihoods) {
|
||||
private void updateACset(final MaxLikelihoodSeen maxLikelihoodSeen,
|
||||
final int[] newSetCounts,
|
||||
final int numChr,
|
||||
final ExactACset dependentSet,
|
||||
final int PLsetIndex,
|
||||
final Queue<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
final ArrayList<double[]> genotypeLikelihoods) {
|
||||
final ExactACcounts index = new ExactACcounts(newSetCounts);
|
||||
if ( !indexesToACset.containsKey(index) ) {
|
||||
ExactACset set = new ExactACset(numChr/2 +1, index);
|
||||
|
|
@ -266,10 +246,10 @@ public class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
pushData(indexesToACset.get(index), dependentSet, PLsetIndex, genotypeLikelihoods);
|
||||
}
|
||||
|
||||
private static void computeLofK(final ExactACset set,
|
||||
final ArrayList<double[]> genotypeLikelihoods,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
private void computeLofK(final ExactACset set,
|
||||
final ArrayList<double[]> genotypeLikelihoods,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
|
||||
set.log10Likelihoods[0] = 0.0; // the zero case
|
||||
final int totalK = set.getACsum();
|
||||
|
|
@ -313,10 +293,10 @@ public class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
result.updateMAPifNeeded(log10LofK, set.ACcounts.counts);
|
||||
}
|
||||
|
||||
private static void pushData(final ExactACset targetSet,
|
||||
final ExactACset dependentSet,
|
||||
final int PLsetIndex,
|
||||
final ArrayList<double[]> genotypeLikelihoods) {
|
||||
private void pushData(final ExactACset targetSet,
|
||||
final ExactACset dependentSet,
|
||||
final int PLsetIndex,
|
||||
final ArrayList<double[]> genotypeLikelihoods) {
|
||||
final int totalK = targetSet.getACsum();
|
||||
|
||||
for ( int j = 1; j < targetSet.log10Likelihoods.length; j++ ) {
|
||||
|
|
@ -327,11 +307,10 @@ public class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
determineCoefficient(PLsetIndex, j, targetSet.ACcounts.getCounts(), totalK) + dependentSet.log10Likelihoods[j-1] + gl[PLsetIndex];
|
||||
targetSet.log10Likelihoods[j] = MathUtils.approximateLog10SumLog10(targetSet.log10Likelihoods[j], conformationValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) {
|
||||
|
||||
private double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) {
|
||||
// the closed form representation generalized for multiple alleles is as follows:
|
||||
// AA: (2j - totalK) * (2j - totalK - 1)
|
||||
// AB: 2k_b * (2j - totalK)
|
||||
|
|
@ -367,130 +346,9 @@ public class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
}
|
||||
|
||||
public GenotypesContext subsetAlleles(final VariantContext vc,
|
||||
final List<Allele> allelesToUse,
|
||||
final boolean assignGenotypes,
|
||||
final int ploidy) {
|
||||
final List<Allele> allelesToUse,
|
||||
final boolean assignGenotypes,
|
||||
final int ploidy) {
|
||||
return VariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// Deprecated bi-allelic ~O(N) implementation. Kept here for posterity.
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* A simple data structure that holds the current, prev, and prev->prev likelihoods vectors
|
||||
* for the exact model calculation
|
||||
*/
|
||||
/*
|
||||
private final static class ExactACCache {
|
||||
double[] kMinus2, kMinus1, kMinus0;
|
||||
|
||||
private final static double[] create(int n) {
|
||||
return new double[n];
|
||||
}
|
||||
|
||||
public ExactACCache(int n) {
|
||||
kMinus2 = create(n);
|
||||
kMinus1 = create(n);
|
||||
kMinus0 = create(n);
|
||||
}
|
||||
|
||||
final public void rotate() {
|
||||
double[] tmp = kMinus2;
|
||||
kMinus2 = kMinus1;
|
||||
kMinus1 = kMinus0;
|
||||
kMinus0 = tmp;
|
||||
}
|
||||
|
||||
final public double[] getkMinus2() {
|
||||
return kMinus2;
|
||||
}
|
||||
|
||||
final public double[] getkMinus1() {
|
||||
return kMinus1;
|
||||
}
|
||||
|
||||
final public double[] getkMinus0() {
|
||||
return kMinus0;
|
||||
}
|
||||
}
|
||||
|
||||
public int linearExact(GenotypesContext GLs,
|
||||
double[] log10AlleleFrequencyPriors,
|
||||
double[][] log10AlleleFrequencyLikelihoods,
|
||||
double[][] log10AlleleFrequencyPosteriors) {
|
||||
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
|
||||
final int numSamples = genotypeLikelihoods.size()-1;
|
||||
final int numChr = 2*numSamples;
|
||||
|
||||
final ExactACCache logY = new ExactACCache(numSamples+1);
|
||||
logY.getkMinus0()[0] = 0.0; // the zero case
|
||||
|
||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
boolean done = false;
|
||||
int lastK = -1;
|
||||
|
||||
for (int k=0; k <= numChr && ! done; k++ ) {
|
||||
final double[] kMinus0 = logY.getkMinus0();
|
||||
|
||||
if ( k == 0 ) { // special case for k = 0
|
||||
for ( int j=1; j <= numSamples; j++ ) {
|
||||
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[0];
|
||||
}
|
||||
} else { // k > 0
|
||||
final double[] kMinus1 = logY.getkMinus1();
|
||||
final double[] kMinus2 = logY.getkMinus2();
|
||||
|
||||
for ( int j=1; j <= numSamples; j++ ) {
|
||||
final double[] gl = genotypeLikelihoods.get(j);
|
||||
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
||||
|
||||
double aa = Double.NEGATIVE_INFINITY;
|
||||
double ab = Double.NEGATIVE_INFINITY;
|
||||
if (k < 2*j-1)
|
||||
aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[0];
|
||||
|
||||
if (k < 2*j)
|
||||
ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[1];
|
||||
|
||||
double log10Max;
|
||||
if (k > 1) {
|
||||
final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[2];
|
||||
log10Max = approximateLog10SumLog10(aa, ab, bb);
|
||||
} else {
|
||||
// we know we aren't considering the BB case, so we can use an optimized log10 function
|
||||
log10Max = approximateLog10SumLog10(aa, ab);
|
||||
}
|
||||
|
||||
// finally, update the L(j,k) value
|
||||
kMinus0[j] = log10Max - logDenominator;
|
||||
}
|
||||
}
|
||||
|
||||
// update the posteriors vector
|
||||
final double log10LofK = kMinus0[numSamples];
|
||||
log10AlleleFrequencyLikelihoods[0][k] = log10LofK;
|
||||
log10AlleleFrequencyPosteriors[0][k] = log10LofK + log10AlleleFrequencyPriors[k];
|
||||
|
||||
// can we abort early?
|
||||
lastK = k;
|
||||
maxLog10L = Math.max(maxLog10L, log10LofK);
|
||||
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||
//if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
|
||||
done = true;
|
||||
}
|
||||
|
||||
logY.rotate();
|
||||
}
|
||||
|
||||
return lastK;
|
||||
}
|
||||
|
||||
final static double approximateLog10SumLog10(double a, double b, double c) {
|
||||
return approximateLog10SumLog10(approximateLog10SumLog10(a, b), c);
|
||||
}
|
||||
*/
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,7 +36,6 @@ import java.io.PrintStream;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
|
||||
/**
|
||||
* Uses the Exact calculation of Heng Li
|
||||
*/
|
||||
|
|
@ -247,34 +246,14 @@ abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
|
|||
}
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
protected static final class OldMaxLikelihoodSeen {
|
||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
ExactACcounts ACs = null;
|
||||
|
||||
public OldMaxLikelihoodSeen() {}
|
||||
|
||||
public void update(final double maxLog10L, final ExactACcounts ACs) {
|
||||
this.maxLog10L = maxLog10L;
|
||||
this.ACs = ACs;
|
||||
}
|
||||
|
||||
// returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
|
||||
public boolean isLowerAC(final ExactACcounts otherACs) {
|
||||
final int[] myACcounts = this.ACs.getCounts();
|
||||
final int[] otherACcounts = otherACs.getCounts();
|
||||
|
||||
for ( int i = 0; i < myACcounts.length; i++ ) {
|
||||
if ( myACcounts[i] > otherACcounts[i] )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
protected static final class MaxLikelihoodSeen {
|
||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
final int[] maxACsToConsider;
|
||||
ExactACcounts ACsAtMax = null;
|
||||
|
||||
public MaxLikelihoodSeen() {
|
||||
this(null);
|
||||
}
|
||||
|
||||
public MaxLikelihoodSeen(final int[] maxACsToConsider) {
|
||||
this.maxACsToConsider = maxACsToConsider;
|
||||
|
|
@ -285,9 +264,11 @@ abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
|
|||
*
|
||||
* @param log10LofKs the likelihood of our current configuration state
|
||||
*/
|
||||
public void update(final double log10LofKs) {
|
||||
if ( log10LofKs > maxLog10L )
|
||||
public void update(final double log10LofKs, final ExactACcounts ACs) {
|
||||
if ( log10LofKs > maxLog10L ) {
|
||||
this.maxLog10L = log10LofKs;
|
||||
this.ACsAtMax = ACs;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -308,6 +289,9 @@ abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
|
|||
* @return true if otherACs is a state worth considering, or false otherwise
|
||||
*/
|
||||
public boolean withinMaxACs(final ExactACcounts otherACs) {
|
||||
if ( maxACsToConsider == null )
|
||||
return true;
|
||||
|
||||
final int[] otherACcounts = otherACs.getCounts();
|
||||
|
||||
for ( int i = 0; i < maxACsToConsider.length; i++ ) {
|
||||
|
|
@ -318,5 +302,27 @@ abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
|
||||
*/
|
||||
public boolean isLowerAC(final ExactACcounts otherACs) {
|
||||
if ( ACsAtMax == null )
|
||||
return true;
|
||||
|
||||
final int[] myACcounts = this.ACsAtMax.getCounts();
|
||||
final int[] otherACcounts = otherACs.getCounts();
|
||||
|
||||
for ( int i = 0; i < myACcounts.length; i++ ) {
|
||||
if ( myACcounts[i] > otherACcounts[i] )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean abort( final double log10LofK, final ExactACcounts ACs ) {
|
||||
return tooLowLikelihood(log10LofK) && isLowerAC(ACs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,364 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
public class OptimizedDiploidExactAFCalculation extends ExactAFCalculation {
|
||||
// private final static boolean DEBUG = false;
|
||||
|
||||
public OptimizedDiploidExactAFCalculation(final int nSamples, final int maxAltAlleles) {
|
||||
super(nSamples, maxAltAlleles, maxAltAlleles, null, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Dynamically found in UnifiedGenotyperEngine
|
||||
*
|
||||
* @param UAC
|
||||
* @param N
|
||||
* @param logger
|
||||
* @param verboseWriter
|
||||
*/
|
||||
public OptimizedDiploidExactAFCalculation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
super(UAC, N, logger, verboseWriter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void computeLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
final int numAlternateAlleles = vc.getNAlleles() - 1;
|
||||
final ArrayList<double[]> genotypeLikelihoods = getGLs(vc.getGenotypes());
|
||||
final int numSamples = genotypeLikelihoods.size()-1;
|
||||
final int numChr = 2*numSamples;
|
||||
|
||||
// queue of AC conformations to process
|
||||
final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>();
|
||||
|
||||
// mapping of ExactACset indexes to the objects
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>(numChr+1);
|
||||
|
||||
// add AC=0 to the queue
|
||||
final int[] zeroCounts = new int[numAlternateAlleles];
|
||||
ExactACset zeroSet = new ExactACset(numSamples+1, new ExactACcounts(zeroCounts));
|
||||
ACqueue.add(zeroSet);
|
||||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
||||
|
||||
// keep processing while we have AC conformations that need to be calculated
|
||||
final int[] maxACsToConsider = computeMaxACs(vc);
|
||||
result.setAClimits(maxACsToConsider);
|
||||
final MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen(maxACsToConsider);
|
||||
|
||||
while ( !ACqueue.isEmpty() ) {
|
||||
result.incNEvaluations(); // keep track of the number of evaluations
|
||||
|
||||
// compute log10Likelihoods
|
||||
final ExactACset set = ACqueue.remove();
|
||||
|
||||
if ( maxLikelihoodSeen.withinMaxACs(set.ACcounts) ) {
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLikelihoodSeen, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
|
||||
|
||||
// adjust max likelihood seen if needed
|
||||
maxLikelihoodSeen.update(log10LofKs);
|
||||
|
||||
// clean up memory
|
||||
indexesToACset.remove(set.ACcounts);
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** removing used set=%s%n", set.ACcounts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected VariantContext reduceScope(final VariantContext vc) {
|
||||
final int myMaxAltAllelesToGenotype = vc.getType().equals(VariantContext.Type.INDEL) ? MAX_ALTERNATE_ALLELES_FOR_INDELS : MAX_ALTERNATE_ALLELES_TO_GENOTYPE;
|
||||
|
||||
// don't try to genotype too many alternate alleles
|
||||
if ( vc.getAlternateAlleles().size() > myMaxAltAllelesToGenotype ) {
|
||||
logger.warn("this tool is currently set to genotype at most " + myMaxAltAllelesToGenotype + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
List<Allele> alleles = new ArrayList<Allele>(myMaxAltAllelesToGenotype + 1);
|
||||
alleles.add(vc.getReference());
|
||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, myMaxAltAllelesToGenotype));
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
|
||||
return builder.make();
|
||||
} else {
|
||||
return vc;
|
||||
}
|
||||
}
|
||||
|
||||
private static final int PL_INDEX_OF_HOM_REF = 0;
|
||||
private static List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
|
||||
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
|
||||
final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
|
||||
for ( int i = 0; i < numOriginalAltAlleles; i++ )
|
||||
likelihoodSums[i] = new LikelihoodSum(vc.getAlternateAllele(i));
|
||||
|
||||
// based on the GLs, find the alternate alleles with the most probability; sum the GLs for the most likely genotype
|
||||
final ArrayList<double[]> GLs = getGLs(vc.getGenotypes());
|
||||
for ( final double[] likelihoods : GLs ) {
|
||||
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
|
||||
if ( PLindexOfBestGL != PL_INDEX_OF_HOM_REF ) {
|
||||
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindexOfBestGL);
|
||||
if ( alleles.alleleIndex1 != 0 )
|
||||
likelihoodSums[alleles.alleleIndex1-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
|
||||
// don't double-count it
|
||||
if ( alleles.alleleIndex2 != 0 && alleles.alleleIndex2 != alleles.alleleIndex1 )
|
||||
likelihoodSums[alleles.alleleIndex2-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
|
||||
}
|
||||
}
|
||||
|
||||
// sort them by probability mass and choose the best ones
|
||||
Collections.sort(Arrays.asList(likelihoodSums));
|
||||
final ArrayList<Allele> bestAlleles = new ArrayList<Allele>(numAllelesToChoose);
|
||||
for ( int i = 0; i < numAllelesToChoose; i++ )
|
||||
bestAlleles.add(likelihoodSums[i].allele);
|
||||
|
||||
final ArrayList<Allele> orderedBestAlleles = new ArrayList<Allele>(numAllelesToChoose);
|
||||
for ( Allele allele : vc.getAlternateAlleles() ) {
|
||||
if ( bestAlleles.contains(allele) )
|
||||
orderedBestAlleles.add(allele);
|
||||
}
|
||||
|
||||
return orderedBestAlleles;
|
||||
}
|
||||
|
||||
private static final class DependentSet {
|
||||
public final int[] ACcounts;
|
||||
public final int PLindex;
|
||||
|
||||
public DependentSet(final int[] ACcounts, final int PLindex) {
|
||||
this.ACcounts = ACcounts;
|
||||
this.PLindex = PLindex;
|
||||
}
|
||||
}
|
||||
|
||||
private double calculateAlleleCountConformation(final ExactACset set,
|
||||
final ArrayList<double[]> genotypeLikelihoods,
|
||||
final MaxLikelihoodSeen maxLikelihoodSeen,
|
||||
final int numChr,
|
||||
final LinkedList<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
|
||||
|
||||
// compute the log10Likelihoods
|
||||
computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors, result);
|
||||
|
||||
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
||||
|
||||
// can we abort early because the log10Likelihoods are so small?
|
||||
if ( maxLikelihoodSeen.tooLowLikelihood(log10LofK) ) {
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
||||
return log10LofK;
|
||||
}
|
||||
|
||||
// iterate over higher frequencies if possible
|
||||
final int ACwiggle = numChr - set.getACsum();
|
||||
if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
|
||||
return log10LofK;
|
||||
|
||||
final int numAltAlleles = set.ACcounts.getCounts().length;
|
||||
|
||||
// add conformations for the k+1 case
|
||||
for ( int allele = 0; allele < numAltAlleles; allele++ ) {
|
||||
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
|
||||
ACcountsClone[allele]++;
|
||||
// to get to this conformation, a sample would need to be AB (remember that ref=0)
|
||||
final int PLindex = GenotypeLikelihoods.calculatePLindex(0, allele+1);
|
||||
updateACset(maxLikelihoodSeen, ACcountsClone, numChr, set, PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
}
|
||||
|
||||
// add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
|
||||
if ( ACwiggle > 1 ) {
|
||||
final ArrayList<DependentSet> differentAlleles = new ArrayList<DependentSet>(numAltAlleles * numAltAlleles);
|
||||
final ArrayList<DependentSet> sameAlleles = new ArrayList<DependentSet>(numAltAlleles);
|
||||
|
||||
for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) {
|
||||
for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) {
|
||||
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
|
||||
ACcountsClone[allele_i]++;
|
||||
ACcountsClone[allele_j]++;
|
||||
|
||||
// to get to this conformation, a sample would need to be BB or BC (remember that ref=0, so add one to the index)
|
||||
final int PLindex = GenotypeLikelihoods.calculatePLindex(allele_i+1, allele_j+1);
|
||||
if ( allele_i == allele_j )
|
||||
sameAlleles.add(new DependentSet(ACcountsClone, PLindex));
|
||||
else
|
||||
differentAlleles.add(new DependentSet(ACcountsClone, PLindex));
|
||||
}
|
||||
}
|
||||
|
||||
// IMPORTANT: we must first add the cases where the 2 new alleles are different so that the queue maintains its ordering
|
||||
for ( DependentSet dependent : differentAlleles )
|
||||
updateACset(maxLikelihoodSeen, dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
for ( DependentSet dependent : sameAlleles )
|
||||
updateACset(maxLikelihoodSeen, dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
}
|
||||
|
||||
return log10LofK;
|
||||
}
|
||||
|
||||
// adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and
|
||||
// also pushes its value to the given callingSetIndex.
|
||||
private void updateACset(final MaxLikelihoodSeen maxLikelihoodSeen,
|
||||
final int[] newSetCounts,
|
||||
final int numChr,
|
||||
final ExactACset dependentSet,
|
||||
final int PLsetIndex,
|
||||
final Queue<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
final ArrayList<double[]> genotypeLikelihoods) {
|
||||
final ExactACcounts index = new ExactACcounts(newSetCounts);
|
||||
if ( !indexesToACset.containsKey(index) ) {
|
||||
ExactACset set = new ExactACset(numChr/2 +1, index);
|
||||
indexesToACset.put(index, set);
|
||||
ACqueue.add(set);
|
||||
}
|
||||
|
||||
// push data from the dependency to the new set
|
||||
//if ( DEBUG )
|
||||
// System.out.println(" *** pushing data from " + index + " to " + dependencySet.ACcounts);
|
||||
pushData(indexesToACset.get(index), dependentSet, PLsetIndex, genotypeLikelihoods);
|
||||
}
|
||||
|
||||
private void computeLofK(final ExactACset set,
|
||||
final ArrayList<double[]> genotypeLikelihoods,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
|
||||
set.log10Likelihoods[0] = 0.0; // the zero case
|
||||
final int totalK = set.getACsum();
|
||||
|
||||
// special case for k = 0 over all k
|
||||
if ( totalK == 0 ) {
|
||||
for ( int j = 1; j < set.log10Likelihoods.length; j++ )
|
||||
set.log10Likelihoods[j] = set.log10Likelihoods[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX];
|
||||
|
||||
final double log10Lof0 = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
||||
result.setLog10LikelihoodOfAFzero(log10Lof0);
|
||||
result.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
// if we got here, then k > 0 for at least one k.
|
||||
// the non-AA possible conformations were already dealt with by pushes from dependent sets;
|
||||
// now deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value
|
||||
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
|
||||
|
||||
if ( totalK < 2*j-1 ) {
|
||||
final double[] gl = genotypeLikelihoods.get(j);
|
||||
final double conformationValue = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
|
||||
set.log10Likelihoods[j] = MathUtils.approximateLog10SumLog10(set.log10Likelihoods[j], conformationValue);
|
||||
}
|
||||
|
||||
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
||||
set.log10Likelihoods[j] = set.log10Likelihoods[j] - logDenominator;
|
||||
}
|
||||
|
||||
double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
||||
|
||||
// update the MLE if necessary
|
||||
result.updateMLEifNeeded(log10LofK, set.ACcounts.counts);
|
||||
|
||||
// apply the priors over each alternate allele
|
||||
for ( final int ACcount : set.ACcounts.getCounts() ) {
|
||||
if ( ACcount > 0 )
|
||||
log10LofK += log10AlleleFrequencyPriors[ACcount];
|
||||
}
|
||||
result.updateMAPifNeeded(log10LofK, set.ACcounts.counts);
|
||||
}
|
||||
|
||||
private void pushData(final ExactACset targetSet,
|
||||
final ExactACset dependentSet,
|
||||
final int PLsetIndex,
|
||||
final ArrayList<double[]> genotypeLikelihoods) {
|
||||
final int totalK = targetSet.getACsum();
|
||||
|
||||
for ( int j = 1; j < targetSet.log10Likelihoods.length; j++ ) {
|
||||
|
||||
if ( totalK <= 2*j ) { // skip impossible conformations
|
||||
final double[] gl = genotypeLikelihoods.get(j);
|
||||
final double conformationValue =
|
||||
determineCoefficient(PLsetIndex, j, targetSet.ACcounts.getCounts(), totalK) + dependentSet.log10Likelihoods[j-1] + gl[PLsetIndex];
|
||||
targetSet.log10Likelihoods[j] = MathUtils.approximateLog10SumLog10(targetSet.log10Likelihoods[j], conformationValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) {
|
||||
// the closed form representation generalized for multiple alleles is as follows:
|
||||
// AA: (2j - totalK) * (2j - totalK - 1)
|
||||
// AB: 2k_b * (2j - totalK)
|
||||
// AC: 2k_c * (2j - totalK)
|
||||
// BB: k_b * (k_b - 1)
|
||||
// BC: 2 * k_b * k_c
|
||||
// CC: k_c * (k_c - 1)
|
||||
|
||||
// find the 2 alleles that are represented by this PL index
|
||||
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
|
||||
// *** note that throughout this method we subtract one from the alleleIndex because ACcounts ***
|
||||
// *** doesn't consider the reference allele whereas the GenotypeLikelihoods PL cache does. ***
|
||||
|
||||
// the AX het case
|
||||
if ( alleles.alleleIndex1 == 0 )
|
||||
return MathUtils.log10Cache[2*ACcounts[alleles.alleleIndex2-1]] + MathUtils.log10Cache[2*j-totalK];
|
||||
|
||||
final int k_i = ACcounts[alleles.alleleIndex1-1];
|
||||
|
||||
// the hom var case (e.g. BB, CC, DD)
|
||||
final double coeff;
|
||||
if ( alleles.alleleIndex1 == alleles.alleleIndex2 ) {
|
||||
coeff = MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_i - 1];
|
||||
}
|
||||
// the het non-ref case (e.g. BC, BD, CD)
|
||||
else {
|
||||
final int k_j = ACcounts[alleles.alleleIndex2-1];
|
||||
coeff = MathUtils.log10Cache[2] + MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_j];
|
||||
}
|
||||
|
||||
return coeff;
|
||||
}
|
||||
|
||||
public GenotypesContext subsetAlleles(final VariantContext vc,
|
||||
final List<Allele> allelesToUse,
|
||||
final boolean assignGenotypes,
|
||||
final int ploidy) {
|
||||
return VariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
public class ReferenceDiploidExactAFCalculation extends DiploidExactAFCalculation {
|
||||
public ReferenceDiploidExactAFCalculation(final int nSamples, final int maxAltAlleles) {
|
||||
super(nSamples, maxAltAlleles);
|
||||
}
|
||||
|
||||
public ReferenceDiploidExactAFCalculation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
super(UAC, N, logger, verboseWriter);
|
||||
}
|
||||
|
||||
protected MaxLikelihoodSeen makeMaxLikelihood(final VariantContext vc, final AlleleFrequencyCalculationResult result) {
|
||||
return new ExactAFCalculation.MaxLikelihoodSeen();
|
||||
}
|
||||
}
|
||||
|
|
@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
|||
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.AlleleFrequencyCalculationResult;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidExactAFCalculation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.ReferenceDiploidExactAFCalculation;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.TreeSet;
|
||||
|
|
@ -32,7 +33,9 @@ import java.util.TreeSet;
|
|||
|
||||
public class GLBasedSampleSelector extends SampleSelector {
|
||||
double[] flatPriors = null;
|
||||
double referenceLikelihood;
|
||||
final double referenceLikelihood;
|
||||
DiploidExactAFCalculation AFCalculator;
|
||||
|
||||
public GLBasedSampleSelector(TreeSet<String> sm, double refLik) {
|
||||
super(sm);
|
||||
referenceLikelihood = refLik;
|
||||
|
|
@ -49,9 +52,10 @@ public class GLBasedSampleSelector extends SampleSelector {
|
|||
// do we want to apply a prior? maybe user-spec?
|
||||
if ( flatPriors == null ) {
|
||||
flatPriors = new double[1+2*samples.size()];
|
||||
AFCalculator = new ReferenceDiploidExactAFCalculation(samples.size(), 4);
|
||||
}
|
||||
AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(vc.getAlternateAlleles().size());
|
||||
DiploidExactAFCalculation.linearExactMultiAllelic(subContext.getGenotypes(), vc.getAlternateAlleles().size(), flatPriors, result);
|
||||
AFCalculator.computeLog10PNonRef(subContext, flatPriors, result);
|
||||
// do we want to let this qual go up or down?
|
||||
if ( result.getLog10PosteriorOfAFzero() < referenceLikelihood ) {
|
||||
return true;
|
||||
|
|
|
|||
Loading…
Reference in New Issue