Reorganize and cleanup AFCalculations
-- Now contained in a package called afcalc -- Extracted standard alone classes from private static classes in ExactAF -- Most fields are now private, with accessors -- Overall cleaner organization now
This commit is contained in:
parent
13211231c7
commit
cf3f9d6ee8
|
|
@ -26,6 +26,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import net.sf.samtools.SAMUtils;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACcounts;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
|
@ -123,7 +125,7 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
|||
*
|
||||
*
|
||||
*/
|
||||
protected static class SumIterator {
|
||||
public static class SumIterator {
|
||||
private int[] currentState;
|
||||
private final int[] finalState;
|
||||
private final int restrictSumTo;
|
||||
|
|
@ -491,32 +493,32 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
|||
// If neighbors fall below maximum - threshold, we don't queue up THEIR own neighbors
|
||||
// and we repeat until queue is empty
|
||||
// queue of AC conformations to process
|
||||
final LinkedList<ExactAFCalculation.ExactACset> ACqueue = new LinkedList<ExactAFCalculation.ExactACset>();
|
||||
final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>();
|
||||
// mapping of ExactACset indexes to the objects
|
||||
final HashMap<ExactAFCalculation.ExactACcounts, ExactAFCalculation.ExactACset> indexesToACset = new HashMap<ExactAFCalculation.ExactACcounts, ExactAFCalculation.ExactACset>(likelihoodDim);
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>(likelihoodDim);
|
||||
// add AC=0 to the queue
|
||||
final int[] zeroCounts = new int[nAlleles];
|
||||
zeroCounts[0] = numChromosomes;
|
||||
|
||||
ExactAFCalculation.ExactACset zeroSet =
|
||||
new ExactAFCalculation.ExactACset(1, new ExactAFCalculation.ExactACcounts(zeroCounts));
|
||||
ExactACset zeroSet =
|
||||
new ExactACset(1, new ExactACcounts(zeroCounts));
|
||||
|
||||
ACqueue.add(zeroSet);
|
||||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
||||
indexesToACset.put(zeroSet.getACcounts(), zeroSet);
|
||||
|
||||
// keep processing while we have AC conformations that need to be calculated
|
||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
while ( !ACqueue.isEmpty() ) {
|
||||
// compute log10Likelihoods
|
||||
final ExactAFCalculation.ExactACset ACset = ACqueue.remove();
|
||||
final ExactACset ACset = ACqueue.remove();
|
||||
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, errorModel, alleleList, numObservations, maxLog10L, ACqueue, indexesToACset, pileup);
|
||||
|
||||
// adjust max likelihood seen if needed
|
||||
maxLog10L = Math.max(maxLog10L, log10LofKs);
|
||||
// clean up memory
|
||||
indexesToACset.remove(ACset.ACcounts);
|
||||
indexesToACset.remove(ACset.getACcounts());
|
||||
if ( VERBOSE )
|
||||
System.out.printf(" *** removing used set=%s%n", ACset.ACcounts);
|
||||
System.out.printf(" *** removing used set=%s%n", ACset.getACcounts());
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -525,13 +527,13 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
|||
int plIdx = 0;
|
||||
SumIterator iterator = new SumIterator(nAlleles, numChromosomes);
|
||||
while (iterator.hasNext()) {
|
||||
ExactAFCalculation.ExactACset ACset =
|
||||
new ExactAFCalculation.ExactACset(1, new ExactAFCalculation.ExactACcounts(iterator.getCurrentVector()));
|
||||
ExactACset ACset =
|
||||
new ExactACset(1, new ExactACcounts(iterator.getCurrentVector()));
|
||||
// for observed base X, add Q(jX,k) to likelihood vector for all k in error model
|
||||
//likelihood(jA,jC,jG,jT) = logsum(logPr (errorModel[k],nA*Q(jA,k) + nC*Q(jC,k) + nG*Q(jG,k) + nT*Q(jT,k))
|
||||
getLikelihoodOfConformation(ACset, errorModel, alleleList, numObservations, pileup);
|
||||
|
||||
setLogPLs(plIdx++, ACset.log10Likelihoods[0]);
|
||||
setLogPLs(plIdx++, ACset.getLog10Likelihoods()[0]);
|
||||
iterator.next();
|
||||
}
|
||||
}
|
||||
|
|
@ -540,40 +542,40 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
|||
|
||||
}
|
||||
|
||||
private double calculateACConformationAndUpdateQueue(final ExactAFCalculation.ExactACset set,
|
||||
private double calculateACConformationAndUpdateQueue(final ExactACset set,
|
||||
final ErrorModel errorModel,
|
||||
final List<Allele> alleleList,
|
||||
final List<Integer> numObservations,
|
||||
final double maxLog10L,
|
||||
final LinkedList<ExactAFCalculation.ExactACset> ACqueue,
|
||||
final HashMap<ExactAFCalculation.ExactACcounts,
|
||||
ExactAFCalculation.ExactACset> indexesToACset,
|
||||
final LinkedList<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts,
|
||||
ExactACset> indexesToACset,
|
||||
final ReadBackedPileup pileup) {
|
||||
// compute likelihood of set
|
||||
getLikelihoodOfConformation(set, errorModel, alleleList, numObservations, pileup);
|
||||
final double log10LofK = set.log10Likelihoods[0];
|
||||
final double log10LofK = set.getLog10Likelihoods()[0];
|
||||
|
||||
// log result in PL vector
|
||||
int idx = getLinearIndex(set.ACcounts.getCounts(), nAlleles, numChromosomes);
|
||||
int idx = getLinearIndex(set.getACcounts().getCounts(), nAlleles, numChromosomes);
|
||||
setLogPLs(idx, log10LofK);
|
||||
|
||||
// can we abort early because the log10Likelihoods are so small?
|
||||
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||
if ( VERBOSE )
|
||||
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
||||
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.getACcounts(), log10LofK, maxLog10L);
|
||||
return log10LofK;
|
||||
}
|
||||
|
||||
// iterate over higher frequencies if possible
|
||||
// by convention, ACcounts contained in set have full vector of possible pool ac counts including ref count.
|
||||
final int ACwiggle = numChromosomes - set.getACsum() + set.ACcounts.counts[0];
|
||||
final int ACwiggle = numChromosomes - set.getACsum() + set.getACcounts().getCounts()[0];
|
||||
if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
|
||||
return log10LofK;
|
||||
|
||||
|
||||
// add conformations for other cases
|
||||
for ( int allele = 1; allele < nAlleles; allele++ ) {
|
||||
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
|
||||
final int[] ACcountsClone = set.getACcounts().getCounts().clone();
|
||||
ACcountsClone[allele]++;
|
||||
// is this a valid conformation?
|
||||
int altSum = (int)MathUtils.sum(ACcountsClone) - ACcountsClone[0];
|
||||
|
|
@ -597,7 +599,7 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
|||
* @param numObservations Number of observations for each allele
|
||||
* @param pileup Read backed pileup in case it's necessary
|
||||
*/
|
||||
public abstract void getLikelihoodOfConformation(final ExactAFCalculation.ExactACset ACset,
|
||||
public abstract void getLikelihoodOfConformation(final ExactACset ACset,
|
||||
final ErrorModel errorModel,
|
||||
final List<Allele> alleleList,
|
||||
final List<Integer> numObservations,
|
||||
|
|
@ -608,12 +610,12 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
|||
|
||||
// Static methods
|
||||
public static void updateACset(final int[] newSetCounts,
|
||||
final LinkedList<ExactAFCalculation.ExactACset> ACqueue,
|
||||
final HashMap<ExactAFCalculation.ExactACcounts, ExactAFCalculation.ExactACset> indexesToACset) {
|
||||
final LinkedList<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
|
||||
|
||||
final ExactAFCalculation.ExactACcounts index = new ExactAFCalculation.ExactACcounts(newSetCounts);
|
||||
final ExactACcounts index = new ExactACcounts(newSetCounts);
|
||||
if ( !indexesToACset.containsKey(index) ) {
|
||||
ExactAFCalculation.ExactACset newSet = new ExactAFCalculation.ExactACset(1, index);
|
||||
ExactACset newSet = new ExactACset(1, index);
|
||||
indexesToACset.put(index, newSet);
|
||||
ACqueue.add(newSet);
|
||||
if (VERBOSE)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset;
|
||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
|
@ -188,12 +189,12 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
|
|||
* @param alleleList List of alleles
|
||||
* @param numObservations Number of observations for each allele in alleleList
|
||||
*/
|
||||
public void getLikelihoodOfConformation(final ExactAFCalculation.ExactACset ACset,
|
||||
public void getLikelihoodOfConformation(final ExactACset ACset,
|
||||
final ErrorModel errorModel,
|
||||
final List<Allele> alleleList,
|
||||
final List<Integer> numObservations,
|
||||
final ReadBackedPileup pileup) {
|
||||
final int[] currentCnt = Arrays.copyOf(ACset.ACcounts.counts, alleleList.size());
|
||||
final int[] currentCnt = Arrays.copyOf(ACset.getACcounts().getCounts(), alleleList.size());
|
||||
double p1 = 0.0;
|
||||
|
||||
if (!hasReferenceSampleData) {
|
||||
|
|
@ -218,6 +219,6 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
|
|||
}
|
||||
p1 = MathUtils.logDotProduct(errorModel.getErrorModelVector().getProbabilityVector(minQ, maxQ), acVec);
|
||||
}
|
||||
ACset.log10Likelihoods[0] = p1;
|
||||
ACset.getLog10Likelihoods()[0] = p1;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
|
||||
import net.sf.samtools.SAMUtils;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
|
|
@ -221,12 +222,12 @@ public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLi
|
|||
* @param alleleList List of alleles
|
||||
* @param numObservations Number of observations for each allele in alleleList
|
||||
*/
|
||||
public void getLikelihoodOfConformation(final ExactAFCalculation.ExactACset ACset,
|
||||
public void getLikelihoodOfConformation(final ExactACset ACset,
|
||||
final ErrorModel errorModel,
|
||||
final List<Allele> alleleList,
|
||||
final List<Integer> numObservations,
|
||||
final ReadBackedPileup pileup) {
|
||||
final int[] currentCnt = Arrays.copyOf(ACset.ACcounts.counts, BaseUtils.BASES.length);
|
||||
final int[] currentCnt = Arrays.copyOf(ACset.getACcounts().getCounts(), BaseUtils.BASES.length);
|
||||
final int[] ac = new int[BaseUtils.BASES.length];
|
||||
|
||||
for (int k=0; k < BaseUtils.BASES.length; k++ )
|
||||
|
|
@ -241,9 +242,9 @@ public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLi
|
|||
final byte qual = qualToUse(elt, true, true, mbq);
|
||||
if ( qual == 0 )
|
||||
continue;
|
||||
final double acc[] = new double[ACset.ACcounts.counts.length];
|
||||
final double acc[] = new double[ACset.getACcounts().getCounts().length];
|
||||
for (int k=0; k < acc.length; k++ )
|
||||
acc[k] = qualLikelihoodCache[BaseUtils.simpleBaseToBaseIndex(alleleList.get(k).getBases()[0])][BaseUtils.simpleBaseToBaseIndex(obsBase)][qual] +MathUtils.log10Cache[ACset.ACcounts.counts[k]]
|
||||
acc[k] = qualLikelihoodCache[BaseUtils.simpleBaseToBaseIndex(alleleList.get(k).getBases()[0])][BaseUtils.simpleBaseToBaseIndex(obsBase)][qual] +MathUtils.log10Cache[ACset.getACcounts().getCounts()[k]]
|
||||
- LOG10_PLOIDY;
|
||||
p1 += MathUtils.log10sumLog10(acc);
|
||||
}
|
||||
|
|
@ -267,7 +268,7 @@ public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLi
|
|||
|
||||
p1 = MathUtils.logDotProduct(errorModel.getErrorModelVector().getProbabilityVector(minQ,maxQ), acVec);
|
||||
}
|
||||
ACset.log10Likelihoods[0] = p1;
|
||||
ACset.getLog10Likelihoods()[0] = p1;
|
||||
/* System.out.println(Arrays.toString(ACset.ACcounts.getCounts())+" "+String.valueOf(p1));
|
||||
System.out.println(Arrays.toString(errorModel.getErrorModelVector().getProbabilityVector(minQ,maxQ)));
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.apache.log4j.ConsoleAppender;
|
||||
import org.apache.log4j.Logger;
|
||||
|
|
@ -175,8 +175,8 @@ public class ExactAFCalculationPerformanceTest {
|
|||
final boolean USE_GENERAL = false;
|
||||
final List<ExactAFCalculationTestBuilder.ModelType> modelTypes = USE_GENERAL
|
||||
? Arrays.asList(ExactAFCalculationTestBuilder.ModelType.values())
|
||||
: Arrays.asList(ExactAFCalculationTestBuilder.ModelType.OptimizedDiploidExact);
|
||||
// : Arrays.asList(ExactAFCalculationTestBuilder.ModelType.DiploidExact, ExactAFCalculationTestBuilder.ModelType.OptimizedDiploidExact);
|
||||
: Arrays.asList(ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact);
|
||||
// : Arrays.asList(ExactAFCalculationTestBuilder.ModelType.ReferenceDiploidExact, ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact);
|
||||
|
||||
final boolean ONLY_HUMAN_PRIORS = false;
|
||||
final List<ExactAFCalculationTestBuilder.PriorType> priorTypes = ONLY_HUMAN_PRIORS
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -32,8 +33,8 @@ public class ExactAFCalculationTestBuilder {
|
|||
}
|
||||
|
||||
public enum ModelType {
|
||||
DiploidExact,
|
||||
OptimizedDiploidExact,
|
||||
ReferenceDiploidExact,
|
||||
ConstrainedDiploidExact,
|
||||
GeneralExact
|
||||
}
|
||||
|
||||
|
|
@ -48,8 +49,8 @@ public class ExactAFCalculationTestBuilder {
|
|||
|
||||
public ExactAFCalculation makeModel() {
|
||||
switch (modelType) {
|
||||
case DiploidExact: return new ReferenceDiploidExactAFCalculation(nSamples, 4);
|
||||
case OptimizedDiploidExact: return new ConstrainedDiploidExactAFCalculation(nSamples, 4);
|
||||
case ReferenceDiploidExact: return new ReferenceDiploidExactAFCalculation(nSamples, 4);
|
||||
case ConstrainedDiploidExact: return new ConstrainedDiploidExactAFCalculation(nSamples, 4);
|
||||
case GeneralExact: return new GeneralPloidyExactAFCalculation(nSamples, 4, 2);
|
||||
default: throw new RuntimeException("Unexpected type " + modelType);
|
||||
}
|
||||
|
|
@ -63,7 +64,7 @@ public class ExactAFCalculationTestBuilder {
|
|||
return MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors
|
||||
case human:
|
||||
final double[] humanPriors = new double[nPriorValues];
|
||||
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues-1, humanPriors, 0.001);
|
||||
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001);
|
||||
return humanPriors;
|
||||
default:
|
||||
throw new RuntimeException("Unexpected type " + priorType);
|
||||
|
|
@ -23,9 +23,12 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.ProbabilityVector;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -100,8 +103,8 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
|||
|
||||
public void add(ExactACset set) {
|
||||
alleleCountSetList.add(set);
|
||||
conformationMap.put(set.ACcounts, set);
|
||||
final double likelihood = set.log10Likelihoods[0];
|
||||
conformationMap.put(set.getACcounts(), set);
|
||||
final double likelihood = set.getLog10Likelihoods()[0];
|
||||
|
||||
if (likelihood > maxLikelihood )
|
||||
maxLikelihood = likelihood;
|
||||
|
|
@ -114,11 +117,11 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
|||
}
|
||||
|
||||
public double getLikelihoodOfConformation(int[] ac) {
|
||||
return conformationMap.get(new ExactACcounts(ac)).log10Likelihoods[0];
|
||||
return conformationMap.get(new ExactACcounts(ac)).getLog10Likelihoods()[0];
|
||||
}
|
||||
|
||||
public double getGLOfACZero() {
|
||||
return alleleCountSetList.get(0).log10Likelihoods[0]; // AC 0 is always at beginning of list
|
||||
return alleleCountSetList.get(0).getLog10Likelihoods()[0]; // AC 0 is always at beginning of list
|
||||
}
|
||||
|
||||
public int getLength() {
|
||||
|
|
@ -196,7 +199,7 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
|||
// first element: zero ploidy, e.g. trivial degenerate distribution
|
||||
final int[] zeroCounts = new int[numAlleles];
|
||||
final ExactACset set = new ExactACset(1, new ExactACcounts(zeroCounts));
|
||||
set.log10Likelihoods[0] = 0.0;
|
||||
set.getLog10Likelihoods()[0] = 0.0;
|
||||
|
||||
combinedPoolLikelihoods.add(set);
|
||||
for (int p=1; p<genotypeLikelihoods.size(); p++) {
|
||||
|
|
@ -227,24 +230,24 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
|||
ExactACset zeroSet = new ExactACset(1, new ExactACcounts(zeroCounts));
|
||||
|
||||
ACqueue.add(zeroSet);
|
||||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
||||
indexesToACset.put(zeroSet.getACcounts(), zeroSet);
|
||||
|
||||
// keep processing while we have AC conformations that need to be calculated
|
||||
MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen();
|
||||
StateTracker stateTracker = new StateTracker();
|
||||
while ( !ACqueue.isEmpty() ) {
|
||||
result.incNEvaluations();
|
||||
// compute log10Likelihoods
|
||||
final ExactACset ACset = ACqueue.remove();
|
||||
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLikelihoodSeen, ACqueue, indexesToACset);
|
||||
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, stateTracker, ACqueue, indexesToACset);
|
||||
|
||||
// adjust max likelihood seen if needed
|
||||
if ( log10LofKs > maxLikelihoodSeen.maxLog10L )
|
||||
maxLikelihoodSeen.update(log10LofKs, ACset.ACcounts);
|
||||
if ( log10LofKs > stateTracker.getMaxLog10L())
|
||||
stateTracker.update(log10LofKs, ACset.getACcounts());
|
||||
|
||||
// clean up memory
|
||||
indexesToACset.remove(ACset.ACcounts);
|
||||
indexesToACset.remove(ACset.getACcounts());
|
||||
if ( VERBOSE )
|
||||
System.out.printf(" *** removing used set=%s%n", ACset.ACcounts);
|
||||
System.out.printf(" *** removing used set=%s%n", ACset.getACcounts());
|
||||
|
||||
}
|
||||
return newPool;
|
||||
|
|
@ -261,7 +264,7 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
|||
* @param originalPloidy Total ploidy of original combined pool
|
||||
* @param newGLPloidy Ploidy of GL vector
|
||||
* @param result AFResult object
|
||||
* @param maxLikelihoodSeen max likelihood observed so far
|
||||
* @param stateTracker max likelihood observed so far
|
||||
* @param ACqueue Queue of conformations to compute
|
||||
* @param indexesToACset AC indices of objects in queue
|
||||
* @return max log likelihood
|
||||
|
|
@ -274,12 +277,12 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
|||
final int originalPloidy,
|
||||
final int newGLPloidy,
|
||||
final AlleleFrequencyCalculationResult result,
|
||||
final MaxLikelihoodSeen maxLikelihoodSeen,
|
||||
final StateTracker stateTracker,
|
||||
final LinkedList<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
|
||||
|
||||
// compute likeihood in "set" of new set based on original likelihoods
|
||||
final int numAlleles = set.ACcounts.counts.length;
|
||||
final int numAlleles = set.getACcounts().getCounts().length;
|
||||
final int newPloidy = set.getACsum();
|
||||
final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy, result);
|
||||
|
||||
|
|
@ -289,24 +292,24 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
|||
newPool.add(set);
|
||||
|
||||
// TODO -- uncomment this correct line when the implementation of this model is optimized (it's too slow now to handle this fix)
|
||||
//if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) {
|
||||
if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||
//if ( log10LofK < stateTracker.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && stateTracker.isLowerAC(set.ACcounts) ) {
|
||||
if ( log10LofK < stateTracker.getMaxLog10L() - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||
if ( VERBOSE )
|
||||
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLikelihoodSeen.maxLog10L);
|
||||
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.getACcounts(), log10LofK, stateTracker.getMaxLog10L());
|
||||
return log10LofK;
|
||||
}
|
||||
|
||||
// iterate over higher frequencies if possible
|
||||
// by convention, ACcounts contained in set have full vector of possible pool ac counts including ref count.
|
||||
// so, if first element is zero, it automatically means we have no wiggle since we're in a corner of the conformation space
|
||||
final int ACwiggle = set.ACcounts.counts[0];
|
||||
final int ACwiggle = set.getACcounts().getCounts()[0];
|
||||
if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
|
||||
return log10LofK;
|
||||
|
||||
|
||||
// add conformations for other cases
|
||||
for ( int allele = 1; allele < numAlleles; allele++ ) {
|
||||
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
|
||||
final int[] ACcountsClone = set.getACcounts().getCounts().clone();
|
||||
ACcountsClone[allele]++;
|
||||
// is this a valid conformation?
|
||||
int altSum = (int)MathUtils.sum(ACcountsClone) - ACcountsClone[0];
|
||||
|
|
@ -411,14 +414,14 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
|||
if (newPloidy != totalAltK)
|
||||
throw new ReviewedStingException("BUG: inconsistent sizes of set.getACsum and passed ploidy values");
|
||||
|
||||
totalAltK -= set.ACcounts.counts[0];
|
||||
totalAltK -= set.getACcounts().getCounts()[0];
|
||||
// totalAltK has sum of alt alleles of conformation now
|
||||
|
||||
|
||||
// special case for k = 0 over all k
|
||||
if ( totalAltK == 0 ) { // all-ref case
|
||||
final double log10Lof0 = firstGLs.getGLOfACZero() + secondGL[HOM_REF_INDEX];
|
||||
set.log10Likelihoods[0] = log10Lof0;
|
||||
set.getLog10Likelihoods()[0] = log10Lof0;
|
||||
|
||||
result.setLog10LikelihoodOfAFzero(log10Lof0);
|
||||
result.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
|
||||
|
|
@ -430,12 +433,12 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
|||
// ExactACset holds by convention the conformation of all alleles, and the sum of all allele count is just the ploidy.
|
||||
// To compute n!/k1!k2!k3!... we need to compute first n!/(k2!k3!...) and then further divide by k1! where k1=ploidy-sum_k_i
|
||||
|
||||
int[] currentCount = set.ACcounts.getCounts();
|
||||
int[] currentCount = set.getACcounts().getCounts();
|
||||
double denom = -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount);
|
||||
|
||||
// for current conformation, get all possible ways to break vector K into two components G1 and G2
|
||||
final GeneralPloidyGenotypeLikelihoods.SumIterator innerIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
|
||||
set.log10Likelihoods[0] = Double.NEGATIVE_INFINITY;
|
||||
set.getLog10Likelihoods()[0] = Double.NEGATIVE_INFINITY;
|
||||
while (innerIterator.hasNext()) {
|
||||
// check if breaking current conformation into g1 and g2 is feasible.
|
||||
final int[] acCount2 = innerIterator.getCurrentVector();
|
||||
|
|
@ -451,19 +454,19 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
|||
final double num2 = MathUtils.log10MultinomialCoefficient(ploidy2, acCount2);
|
||||
final double sum = firstGL + gl2 + num1 + num2;
|
||||
|
||||
set.log10Likelihoods[0] = MathUtils.approximateLog10SumLog10(set.log10Likelihoods[0], sum);
|
||||
set.getLog10Likelihoods()[0] = MathUtils.approximateLog10SumLog10(set.getLog10Likelihoods()[0], sum);
|
||||
}
|
||||
}
|
||||
innerIterator.next();
|
||||
}
|
||||
|
||||
set.log10Likelihoods[0] += denom;
|
||||
set.getLog10Likelihoods()[0] += denom;
|
||||
}
|
||||
|
||||
double log10LofK = set.log10Likelihoods[0];
|
||||
double log10LofK = set.getLog10Likelihoods()[0];
|
||||
|
||||
// update the MLE if necessary
|
||||
final int altCounts[] = Arrays.copyOfRange(set.ACcounts.counts,1, set.ACcounts.counts.length);
|
||||
final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length);
|
||||
result.updateMLEifNeeded(log10LofK, altCounts);
|
||||
|
||||
// apply the priors over each alternate allele
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
|
@ -128,7 +129,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
final int nPriorValues = 2*nSamples+1;
|
||||
final double[] flatPriors = MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors
|
||||
final double[] humanPriors = new double[nPriorValues];
|
||||
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues-1, humanPriors, 0.001);
|
||||
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001);
|
||||
|
||||
for ( final double[] priors : Arrays.asList(flatPriors, humanPriors) ) { // , humanPriors) ) {
|
||||
for ( ExactAFCalculation model : Arrays.asList(diploidCalc, generalCalc, optDiploidCalc) ) {
|
||||
|
|
@ -375,7 +376,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final int nSamples = 10;
|
||||
final ExactAFCalculationTestBuilder.ModelType modelType = ExactAFCalculationTestBuilder.ModelType.DiploidExact;
|
||||
final ExactAFCalculationTestBuilder.ModelType modelType = ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact;
|
||||
|
||||
for (int nNonInformative = 0; nNonInformative < nSamples - 1; nNonInformative++ ) {
|
||||
final int nChrom = (nSamples - nNonInformative) * 2;
|
||||
|
|
@ -400,7 +401,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
ExactAFCalculationTestBuilder.PriorType.human);
|
||||
|
||||
final VariantContext vc = testBuilder.makeACTest(requestedACs, nNonInformative, 100);
|
||||
final int[] maxACsToVisit = testBuilder.makeModel().computeMaxACs(vc);
|
||||
final int[] maxACsToVisit = ((ConstrainedDiploidExactAFCalculation)testBuilder.makeModel()).computeMaxACs(vc);
|
||||
|
||||
testExpectedACs(vc, maxACsToVisit);
|
||||
}
|
||||
|
|
@ -461,11 +462,11 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
private void testMakeACByGenotype(final VariantContext vcRoot, final Genotype g) {
|
||||
final VariantContext vc = new VariantContextBuilder(vcRoot).genotypes(g).make();
|
||||
|
||||
final ExactAFCalculationTestBuilder.ModelType modelType = ExactAFCalculationTestBuilder.ModelType.DiploidExact;
|
||||
final ExactAFCalculationTestBuilder.ModelType modelType = ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact;
|
||||
final ExactAFCalculationTestBuilder testBuilder
|
||||
= new ExactAFCalculationTestBuilder(1, vc.getNAlleles()-1, modelType,
|
||||
ExactAFCalculationTestBuilder.PriorType.human);
|
||||
final int[] maxACsToVisit = testBuilder.makeModel().computeMaxACs(vc);
|
||||
final int[] maxACsToVisit = ((ConstrainedDiploidExactAFCalculation)testBuilder.makeModel()).computeMaxACs(vc);
|
||||
testExpectedACs(vc, maxACsToVisit);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
|
|
@ -1,22 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
public class ConstrainedDiploidExactAFCalculation extends DiploidExactAFCalculation {
|
||||
public ConstrainedDiploidExactAFCalculation(final int nSamples, final int maxAltAlleles) {
|
||||
super(nSamples, maxAltAlleles);
|
||||
}
|
||||
|
||||
public ConstrainedDiploidExactAFCalculation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
super(UAC, N, logger, verboseWriter);
|
||||
}
|
||||
|
||||
protected MaxLikelihoodSeen makeMaxLikelihood(final VariantContext vc, final AlleleFrequencyCalculationResult result) {
|
||||
final int[] maxACsToConsider = computeMaxACs(vc);
|
||||
result.setAClimits(maxACsToConsider);
|
||||
return new MaxLikelihoodSeen(maxACsToConsider);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,328 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Uses the Exact calculation of Heng Li
|
||||
*/
|
||||
abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
|
||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||
|
||||
protected ExactAFCalculation(final UnifiedArgumentCollection UAC, final int nSamples, final Logger logger, final PrintStream verboseWriter) {
|
||||
super(UAC, nSamples, logger, verboseWriter);
|
||||
}
|
||||
|
||||
protected ExactAFCalculation(final int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, File exactCallsLog, Logger logger, PrintStream verboseWriter) {
|
||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, exactCallsLog, logger, verboseWriter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper class that compares two likelihoods associated with two alleles
|
||||
*/
|
||||
protected static final class LikelihoodSum implements Comparable<LikelihoodSum> {
|
||||
public double sum = 0.0;
|
||||
public Allele allele;
|
||||
|
||||
public LikelihoodSum(Allele allele) { this.allele = allele; }
|
||||
|
||||
public int compareTo(LikelihoodSum other) {
|
||||
final double diff = sum - other.sum;
|
||||
return ( diff < 0.0 ) ? 1 : (diff > 0.0 ) ? -1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unpack GenotypesContext into arraylist of doubel values
|
||||
* @param GLs Input genotype context
|
||||
* @return ArrayList of doubles corresponding to GL vectors
|
||||
*/
|
||||
protected static ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
||||
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(GLs.size());
|
||||
|
||||
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
|
||||
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
|
||||
if ( sample.hasLikelihoods() ) {
|
||||
double[] gls = sample.getLikelihoods().getAsVector();
|
||||
|
||||
if ( MathUtils.sum(gls) < UnifiedGenotyperEngine.SUM_GL_THRESH_NOCALL )
|
||||
genotypeLikelihoods.add(gls);
|
||||
}
|
||||
}
|
||||
|
||||
return genotypeLikelihoods;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the maximum ACs we need to consider for each alt allele
|
||||
*
|
||||
* Walks over the genotypes in VC, and computes for each alt allele the maximum
|
||||
* AC we need to consider in that alt allele dimension. Does the calculation
|
||||
* based on the PLs in each genotype g, choosing to update the max AC for the
|
||||
* alt alleles corresponding to that PL. Only takes the first lowest PL,
|
||||
* if there are multiple genotype configurations with the same PL value. It
|
||||
* takes values in the order of the alt alleles.
|
||||
*
|
||||
* @param vc the variant context we will compute max alt alleles for
|
||||
* @return a vector of max alt alleles, indexed by alt allele, so result[0] is the AC of the
|
||||
* first alt allele.
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
protected int[] computeMaxACs(final VariantContext vc) {
|
||||
final int[] maxACs = new int[vc.getNAlleles()-1];
|
||||
|
||||
for ( final Genotype g : vc.getGenotypes() )
|
||||
updateMaxACs(g, maxACs);
|
||||
|
||||
return maxACs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the maximum achievable allele counts in maxAC according to the PLs in g
|
||||
*
|
||||
* Selects the maximum genotype configuration from the PLs in g, and updates
|
||||
* the maxAC for this configure. For example, if the lowest PL is for 0/1, updates
|
||||
* the maxAC for the alt allele 1 by 1. If it's 1/1, update is 2. Works for
|
||||
* many number of alt alleles (determined by length of maxACs).
|
||||
*
|
||||
* If the max PL occurs at 0/0, updates nothing
|
||||
* Note that this function greedily takes the first min PL, so that if 0/1 and 1/1 have
|
||||
* the same PL value, then updates the first one.
|
||||
*
|
||||
* Also, only will update 1 alt allele, so if 0/1 and 0/2 both have the same PL,
|
||||
* then only first one (1) will be updated
|
||||
*
|
||||
* @param g the genotype to update
|
||||
* @param maxACs the max allele count vector for alt alleles (starting at 0 => first alt allele)
|
||||
*/
|
||||
@Requires({
|
||||
"g != null",
|
||||
"maxACs != null",
|
||||
"MathUtils.sum(maxACs) >= 0"})
|
||||
private void updateMaxACs(final Genotype g, final int[] maxACs) {
|
||||
final int[] PLs = g.getLikelihoods().getAsPLs();
|
||||
|
||||
int minPLi = 0;
|
||||
int minPL = PLs[0];
|
||||
|
||||
for ( int i = 0; i < PLs.length; i++ ) {
|
||||
if ( PLs[i] < minPL ) {
|
||||
minPL = PLs[i];
|
||||
minPLi = i;
|
||||
}
|
||||
}
|
||||
|
||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(minPLi);
|
||||
updateMaxACs(maxACs, pair.alleleIndex1);
|
||||
updateMaxACs(maxACs, pair.alleleIndex2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple helper. Update max alt alleles maxACs according to the allele index (where 0 == ref)
|
||||
*
|
||||
* If alleleI == 0 => doesn't update anything
|
||||
* else maxACs[alleleI - 1]++
|
||||
*
|
||||
* @param maxACs array of max alt allele ACs
|
||||
* @param alleleI the index (relative to 0) to update a count of 1 in max alt alleles.
|
||||
*/
|
||||
@Requires({
|
||||
"alleleI >= 0",
|
||||
"(alleleI - 1) < maxACs.length",
|
||||
"MathUtils.sum(maxACs) >= 0"})
|
||||
private void updateMaxACs(final int[] maxACs, final int alleleI) {
|
||||
if ( alleleI > 0 )
|
||||
maxACs[alleleI-1]++;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// protected classes used to store exact model matrix columns
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
|
||||
|
||||
// a wrapper around the int array so that we can make it hashable
|
||||
protected static final class ExactACcounts {
|
||||
|
||||
protected final int[] counts;
|
||||
private int hashcode = -1;
|
||||
|
||||
public ExactACcounts(final int[] counts) {
|
||||
this.counts = counts;
|
||||
}
|
||||
|
||||
public int[] getCounts() {
|
||||
return counts;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts) obj).counts);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
if ( hashcode == -1 )
|
||||
hashcode = Arrays.hashCode(counts);
|
||||
return hashcode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(counts[0]);
|
||||
for ( int i = 1; i < counts.length; i++ ) {
|
||||
sb.append("/");
|
||||
sb.append(counts[i]);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// This class represents a column in the Exact AC calculation matrix
|
||||
protected static final class ExactACset {
|
||||
|
||||
// the counts of the various alternate alleles which this column represents
|
||||
final ExactACcounts ACcounts;
|
||||
|
||||
// the column of the matrix
|
||||
final double[] log10Likelihoods;
|
||||
|
||||
int sum = -1;
|
||||
|
||||
public ExactACset(final int size, final ExactACcounts ACcounts) {
|
||||
this.ACcounts = ACcounts;
|
||||
log10Likelihoods = new double[size];
|
||||
Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
// sum of all the non-reference alleles
|
||||
public int getACsum() {
|
||||
if ( sum == -1 ) {
|
||||
sum = 0;
|
||||
for ( int count : ACcounts.getCounts() )
|
||||
sum += count;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
|
||||
}
|
||||
}
|
||||
|
||||
protected static final class MaxLikelihoodSeen {
|
||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
final int[] maxACsToConsider;
|
||||
ExactACcounts ACsAtMax = null;
|
||||
|
||||
public MaxLikelihoodSeen() {
|
||||
this(null);
|
||||
}
|
||||
|
||||
public MaxLikelihoodSeen(final int[] maxACsToConsider) {
|
||||
this.maxACsToConsider = maxACsToConsider;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the maximum log10L seen, if log10LofKs is higher
|
||||
*
|
||||
* @param log10LofKs the likelihood of our current configuration state
|
||||
*/
|
||||
public void update(final double log10LofKs, final ExactACcounts ACs) {
|
||||
if ( log10LofKs > maxLog10L ) {
|
||||
this.maxLog10L = log10LofKs;
|
||||
this.ACsAtMax = ACs;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the likelihood of configuration K too low to consider, related to the
|
||||
* maximum likelihood seen already?
|
||||
*
|
||||
* @param log10LofK the log10 likelihood of the configuration we're considering analyzing
|
||||
* @return true if the configuration cannot meaningfully contribute to our likelihood sum
|
||||
*/
|
||||
public boolean tooLowLikelihood(final double log10LofK) {
|
||||
return log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY;
|
||||
}
|
||||
|
||||
/**
|
||||
* Are all ACs in otherACs less than or equal to their corresponding ACs in the maxACsToConsider?
|
||||
*
|
||||
* @param otherACs the set of otherACs that we want to know if we should consider analyzing
|
||||
* @return true if otherACs is a state worth considering, or false otherwise
|
||||
*/
|
||||
public boolean withinMaxACs(final ExactACcounts otherACs) {
|
||||
if ( maxACsToConsider == null )
|
||||
return true;
|
||||
|
||||
final int[] otherACcounts = otherACs.getCounts();
|
||||
|
||||
for ( int i = 0; i < maxACsToConsider.length; i++ ) {
|
||||
// consider one more than the max AC to collect a bit more likelihood mass
|
||||
if ( otherACcounts[i] > maxACsToConsider[i] + 1 )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
|
||||
*/
|
||||
public boolean isLowerAC(final ExactACcounts otherACs) {
|
||||
if ( ACsAtMax == null )
|
||||
return true;
|
||||
|
||||
final int[] myACcounts = this.ACsAtMax.getCounts();
|
||||
final int[] otherACcounts = otherACs.getCounts();
|
||||
|
||||
for ( int i = 0; i < myACcounts.length; i++ ) {
|
||||
if ( myACcounts[i] > otherACcounts[i] )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean abort( final double log10LofK, final ExactACcounts ACs ) {
|
||||
return tooLowLikelihood(log10LofK) && isLowerAC(ACs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AlleleFrequencyCalculation;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
|
|
@ -156,7 +157,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy
|
||||
*/
|
||||
@Argument(shortName="ploidy", fullName="sample_ploidy", doc="Plody (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false)
|
||||
int samplePloidy = VariantContextUtils.DEFAULT_PLOIDY;
|
||||
public int samplePloidy = VariantContextUtils.DEFAULT_PLOIDY;
|
||||
|
||||
@Hidden
|
||||
@Argument(shortName="minqs", fullName="min_quality_score", doc="Min quality score to consider. Smaller numbers process faster. Default: Q1.", required=false)
|
||||
|
|
|
|||
|
|
@ -34,6 +34,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AlleleFrequencyCalculation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AlleleFrequencyCalculationResult;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
|
|
@ -104,8 +106,6 @@ public class UnifiedGenotyperEngine {
|
|||
private final GenomeLocParser genomeLocParser;
|
||||
private final boolean BAQEnabledOnCMDLine;
|
||||
|
||||
protected static final double SUM_GL_THRESH_NOCALL = VariantContextUtils.SUM_GL_THRESH_NOCALL;
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Public interface functions
|
||||
|
|
@ -689,7 +689,7 @@ public class UnifiedGenotyperEngine {
|
|||
return models;
|
||||
}
|
||||
|
||||
protected static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) {
|
||||
public static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) {
|
||||
|
||||
double sum = 0.0;
|
||||
|
||||
|
|
|
|||
|
|
@ -23,11 +23,12 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -54,7 +55,7 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
|
|||
/** The default model with the best performance in all cases */
|
||||
EXACT("ExactAFCalculation");
|
||||
|
||||
final String implementationName;
|
||||
public final String implementationName;
|
||||
|
||||
private Model(String implementationName) {
|
||||
this.implementationName = implementationName;
|
||||
|
|
@ -101,7 +102,7 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
|
|||
* Allocates a new results object. Useful for testing but slow in practice.
|
||||
*/
|
||||
public final AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors) {
|
||||
final double[] log10AlleleFrequencyPriors) {
|
||||
return getLog10PNonRef(vc, log10AlleleFrequencyPriors, new AlleleFrequencyCalculationResult(getMaxAltAlleles()));
|
||||
}
|
||||
|
||||
|
|
@ -165,9 +166,9 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
|
|||
* @param result (pre-allocated) object to store results
|
||||
*/
|
||||
// TODO -- add consistent requires among args
|
||||
protected abstract void computeLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result);
|
||||
public abstract void computeLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result);
|
||||
|
||||
/**
|
||||
* Must be overridden by concrete subclasses
|
||||
|
|
@ -178,10 +179,10 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
|
|||
* @param ploidy
|
||||
* @return GenotypesContext object
|
||||
*/
|
||||
protected abstract GenotypesContext subsetAlleles(final VariantContext vc,
|
||||
final List<Allele> allelesToUse,
|
||||
final boolean assignGenotypes,
|
||||
final int ploidy);
|
||||
public abstract GenotypesContext subsetAlleles(final VariantContext vc,
|
||||
final List<Allele> allelesToUse,
|
||||
final boolean assignGenotypes,
|
||||
final int ploidy);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -23,7 +23,7 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
|
@ -0,0 +1,109 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
public class ConstrainedDiploidExactAFCalculation extends DiploidExactAFCalculation {
|
||||
public ConstrainedDiploidExactAFCalculation(final int nSamples, final int maxAltAlleles) {
|
||||
super(nSamples, maxAltAlleles);
|
||||
}
|
||||
|
||||
public ConstrainedDiploidExactAFCalculation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
super(UAC, N, logger, verboseWriter);
|
||||
}
|
||||
|
||||
protected StateTracker makeMaxLikelihood(final VariantContext vc, final AlleleFrequencyCalculationResult result) {
|
||||
final int[] maxACsToConsider = computeMaxACs(vc);
|
||||
result.setAClimits(maxACsToConsider);
|
||||
return new StateTracker(maxACsToConsider);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the maximum ACs we need to consider for each alt allele
|
||||
*
|
||||
* Walks over the genotypes in VC, and computes for each alt allele the maximum
|
||||
* AC we need to consider in that alt allele dimension. Does the calculation
|
||||
* based on the PLs in each genotype g, choosing to update the max AC for the
|
||||
* alt alleles corresponding to that PL. Only takes the first lowest PL,
|
||||
* if there are multiple genotype configurations with the same PL value. It
|
||||
* takes values in the order of the alt alleles.
|
||||
*
|
||||
* @param vc the variant context we will compute max alt alleles for
|
||||
* @return a vector of max alt alleles, indexed by alt allele, so result[0] is the AC of the
|
||||
* first alt allele.
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
protected final int[] computeMaxACs(final VariantContext vc) {
|
||||
final int[] maxACs = new int[vc.getNAlleles()-1];
|
||||
|
||||
for ( final Genotype g : vc.getGenotypes() )
|
||||
updateMaxACs(g, maxACs);
|
||||
|
||||
return maxACs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the maximum achievable allele counts in maxAC according to the PLs in g
|
||||
*
|
||||
* Selects the maximum genotype configuration from the PLs in g, and updates
|
||||
* the maxAC for this configure. For example, if the lowest PL is for 0/1, updates
|
||||
* the maxAC for the alt allele 1 by 1. If it's 1/1, update is 2. Works for
|
||||
* many number of alt alleles (determined by length of maxACs).
|
||||
*
|
||||
* If the max PL occurs at 0/0, updates nothing
|
||||
* Note that this function greedily takes the first min PL, so that if 0/1 and 1/1 have
|
||||
* the same PL value, then updates the first one.
|
||||
*
|
||||
* Also, only will update 1 alt allele, so if 0/1 and 0/2 both have the same PL,
|
||||
* then only first one (1) will be updated
|
||||
*
|
||||
* @param g the genotype to update
|
||||
* @param maxACs the max allele count vector for alt alleles (starting at 0 => first alt allele)
|
||||
*/
|
||||
@Requires({
|
||||
"g != null",
|
||||
"maxACs != null",
|
||||
"MathUtils.sum(maxACs) >= 0"})
|
||||
private void updateMaxACs(final Genotype g, final int[] maxACs) {
|
||||
final int[] PLs = g.getLikelihoods().getAsPLs();
|
||||
|
||||
int minPLi = 0;
|
||||
int minPL = PLs[0];
|
||||
|
||||
for ( int i = 0; i < PLs.length; i++ ) {
|
||||
if ( PLs[i] < minPL ) {
|
||||
minPL = PLs[i];
|
||||
minPLi = i;
|
||||
}
|
||||
}
|
||||
|
||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(minPLi);
|
||||
updateMaxACs(maxACs, pair.alleleIndex1);
|
||||
updateMaxACs(maxACs, pair.alleleIndex2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple helper. Update max alt alleles maxACs according to the allele index (where 0 == ref)
|
||||
*
|
||||
* If alleleI == 0 => doesn't update anything
|
||||
* else maxACs[alleleI - 1]++
|
||||
*
|
||||
* @param maxACs array of max alt allele ACs
|
||||
* @param alleleI the index (relative to 0) to update a count of 1 in max alt alleles.
|
||||
*/
|
||||
@Requires({
|
||||
"alleleI >= 0",
|
||||
"(alleleI - 1) < maxACs.length",
|
||||
"MathUtils.sum(maxACs) >= 0"})
|
||||
private void updateMaxACs(final int[] maxACs, final int alleleI) {
|
||||
if ( alleleI > 0 )
|
||||
maxACs[alleleI-1]++;
|
||||
}
|
||||
}
|
||||
|
|
@ -23,9 +23,10 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
|
|
@ -41,7 +42,7 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
super(UAC, N, logger, verboseWriter);
|
||||
}
|
||||
|
||||
protected abstract MaxLikelihoodSeen makeMaxLikelihood(final VariantContext vc, final AlleleFrequencyCalculationResult result);
|
||||
protected abstract StateTracker makeMaxLikelihood(final VariantContext vc, final AlleleFrequencyCalculationResult result);
|
||||
|
||||
@Override
|
||||
public void computeLog10PNonRef(final VariantContext vc,
|
||||
|
|
@ -62,10 +63,10 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
final int[] zeroCounts = new int[numAlternateAlleles];
|
||||
ExactACset zeroSet = new ExactACset(numSamples+1, new ExactACcounts(zeroCounts));
|
||||
ACqueue.add(zeroSet);
|
||||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
||||
indexesToACset.put(zeroSet.getACcounts(), zeroSet);
|
||||
|
||||
// keep processing while we have AC conformations that need to be calculated
|
||||
final MaxLikelihoodSeen maxLikelihoodSeen = makeMaxLikelihood(vc, result);
|
||||
final StateTracker stateTracker = makeMaxLikelihood(vc, result);
|
||||
|
||||
while ( !ACqueue.isEmpty() ) {
|
||||
result.incNEvaluations(); // keep track of the number of evaluations
|
||||
|
|
@ -73,14 +74,14 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
// compute log10Likelihoods
|
||||
final ExactACset set = ACqueue.remove();
|
||||
|
||||
if ( maxLikelihoodSeen.withinMaxACs(set.ACcounts) ) {
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLikelihoodSeen, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
|
||||
if ( stateTracker.withinMaxACs(set.getACcounts()) ) {
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, stateTracker, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
|
||||
|
||||
// adjust max likelihood seen if needed
|
||||
maxLikelihoodSeen.update(log10LofKs, set.ACcounts);
|
||||
stateTracker.update(log10LofKs, set.getACcounts());
|
||||
|
||||
// clean up memory
|
||||
indexesToACset.remove(set.ACcounts);
|
||||
indexesToACset.remove(set.getACcounts());
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** removing used set=%s%n", set.ACcounts);
|
||||
}
|
||||
|
|
@ -155,7 +156,7 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
|
||||
private double calculateAlleleCountConformation(final ExactACset set,
|
||||
final ArrayList<double[]> genotypeLikelihoods,
|
||||
final MaxLikelihoodSeen maxLikelihoodSeen,
|
||||
final StateTracker stateTracker,
|
||||
final int numChr,
|
||||
final LinkedList<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
|
|
@ -168,10 +169,10 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
// compute the log10Likelihoods
|
||||
computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors, result);
|
||||
|
||||
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
||||
final double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
|
||||
|
||||
// can we abort early because the log10Likelihoods are so small?
|
||||
if ( maxLikelihoodSeen.abort(log10LofK, set.ACcounts) ) {
|
||||
if ( stateTracker.abort(log10LofK, set.getACcounts()) ) {
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
||||
return log10LofK;
|
||||
|
|
@ -182,15 +183,15 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
|
||||
return log10LofK;
|
||||
|
||||
final int numAltAlleles = set.ACcounts.getCounts().length;
|
||||
final int numAltAlleles = set.getACcounts().getCounts().length;
|
||||
|
||||
// add conformations for the k+1 case
|
||||
for ( int allele = 0; allele < numAltAlleles; allele++ ) {
|
||||
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
|
||||
final int[] ACcountsClone = set.getACcounts().getCounts().clone();
|
||||
ACcountsClone[allele]++;
|
||||
// to get to this conformation, a sample would need to be AB (remember that ref=0)
|
||||
final int PLindex = GenotypeLikelihoods.calculatePLindex(0, allele+1);
|
||||
updateACset(maxLikelihoodSeen, ACcountsClone, numChr, set, PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
updateACset(stateTracker, ACcountsClone, numChr, set, PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
}
|
||||
|
||||
// add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
|
||||
|
|
@ -200,7 +201,7 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
|
||||
for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) {
|
||||
for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) {
|
||||
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
|
||||
final int[] ACcountsClone = set.getACcounts().getCounts().clone();
|
||||
ACcountsClone[allele_i]++;
|
||||
ACcountsClone[allele_j]++;
|
||||
|
||||
|
|
@ -215,9 +216,9 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
|
||||
// IMPORTANT: we must first add the cases where the 2 new alleles are different so that the queue maintains its ordering
|
||||
for ( DependentSet dependent : differentAlleles )
|
||||
updateACset(maxLikelihoodSeen, dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
updateACset(stateTracker, dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
for ( DependentSet dependent : sameAlleles )
|
||||
updateACset(maxLikelihoodSeen, dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
updateACset(stateTracker, dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
}
|
||||
|
||||
return log10LofK;
|
||||
|
|
@ -225,7 +226,7 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
|
||||
// adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and
|
||||
// also pushes its value to the given callingSetIndex.
|
||||
private void updateACset(final MaxLikelihoodSeen maxLikelihoodSeen,
|
||||
private void updateACset(final StateTracker stateTracker,
|
||||
final int[] newSetCounts,
|
||||
final int numChr,
|
||||
final ExactACset dependentSet,
|
||||
|
|
@ -251,15 +252,15 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
final double[] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
|
||||
set.log10Likelihoods[0] = 0.0; // the zero case
|
||||
set.getLog10Likelihoods()[0] = 0.0; // the zero case
|
||||
final int totalK = set.getACsum();
|
||||
|
||||
// special case for k = 0 over all k
|
||||
if ( totalK == 0 ) {
|
||||
for ( int j = 1; j < set.log10Likelihoods.length; j++ )
|
||||
set.log10Likelihoods[j] = set.log10Likelihoods[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX];
|
||||
for ( int j = 1; j < set.getLog10Likelihoods().length; j++ )
|
||||
set.getLog10Likelihoods()[j] = set.getLog10Likelihoods()[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX];
|
||||
|
||||
final double log10Lof0 = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
||||
final double log10Lof0 = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
|
||||
result.setLog10LikelihoodOfAFzero(log10Lof0);
|
||||
result.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
|
||||
return;
|
||||
|
|
@ -268,29 +269,29 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
// if we got here, then k > 0 for at least one k.
|
||||
// the non-AA possible conformations were already dealt with by pushes from dependent sets;
|
||||
// now deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value
|
||||
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
|
||||
for ( int j = 1; j < set.getLog10Likelihoods().length; j++ ) {
|
||||
|
||||
if ( totalK < 2*j-1 ) {
|
||||
final double[] gl = genotypeLikelihoods.get(j);
|
||||
final double conformationValue = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
|
||||
set.log10Likelihoods[j] = MathUtils.approximateLog10SumLog10(set.log10Likelihoods[j], conformationValue);
|
||||
final double conformationValue = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.getLog10Likelihoods()[j-1] + gl[HOM_REF_INDEX];
|
||||
set.getLog10Likelihoods()[j] = MathUtils.approximateLog10SumLog10(set.getLog10Likelihoods()[j], conformationValue);
|
||||
}
|
||||
|
||||
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
||||
set.log10Likelihoods[j] = set.log10Likelihoods[j] - logDenominator;
|
||||
set.getLog10Likelihoods()[j] = set.getLog10Likelihoods()[j] - logDenominator;
|
||||
}
|
||||
|
||||
double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
||||
double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
|
||||
|
||||
// update the MLE if necessary
|
||||
result.updateMLEifNeeded(log10LofK, set.ACcounts.counts);
|
||||
result.updateMLEifNeeded(log10LofK, set.getACcounts().getCounts());
|
||||
|
||||
// apply the priors over each alternate allele
|
||||
for ( final int ACcount : set.ACcounts.getCounts() ) {
|
||||
for ( final int ACcount : set.getACcounts().getCounts() ) {
|
||||
if ( ACcount > 0 )
|
||||
log10LofK += log10AlleleFrequencyPriors[ACcount];
|
||||
}
|
||||
result.updateMAPifNeeded(log10LofK, set.ACcounts.counts);
|
||||
result.updateMAPifNeeded(log10LofK, set.getACcounts().getCounts());
|
||||
}
|
||||
|
||||
private void pushData(final ExactACset targetSet,
|
||||
|
|
@ -299,13 +300,13 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
|||
final ArrayList<double[]> genotypeLikelihoods) {
|
||||
final int totalK = targetSet.getACsum();
|
||||
|
||||
for ( int j = 1; j < targetSet.log10Likelihoods.length; j++ ) {
|
||||
for ( int j = 1; j < targetSet.getLog10Likelihoods().length; j++ ) {
|
||||
|
||||
if ( totalK <= 2*j ) { // skip impossible conformations
|
||||
final double[] gl = genotypeLikelihoods.get(j);
|
||||
final double conformationValue =
|
||||
determineCoefficient(PLsetIndex, j, targetSet.ACcounts.getCounts(), totalK) + dependentSet.log10Likelihoods[j-1] + gl[PLsetIndex];
|
||||
targetSet.log10Likelihoods[j] = MathUtils.approximateLog10SumLog10(targetSet.log10Likelihoods[j], conformationValue);
|
||||
determineCoefficient(PLsetIndex, j, targetSet.getACcounts().getCounts(), totalK) + dependentSet.getLog10Likelihoods()[j-1] + gl[PLsetIndex];
|
||||
targetSet.getLog10Likelihoods()[j] = MathUtils.approximateLog10SumLog10(targetSet.getLog10Likelihoods()[j], conformationValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 10/5/12
|
||||
* Time: 2:54 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/ // a wrapper around the int array so that we can make it hashable
|
||||
public final class ExactACcounts {
|
||||
private final int[] counts;
|
||||
private int hashcode = -1;
|
||||
|
||||
public ExactACcounts(final int[] counts) {
|
||||
this.counts = counts;
|
||||
}
|
||||
|
||||
public int[] getCounts() {
|
||||
return counts;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof ExactACcounts) && Arrays.equals(getCounts(), ((ExactACcounts) obj).getCounts());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
if ( hashcode == -1 )
|
||||
hashcode = Arrays.hashCode(getCounts());
|
||||
return hashcode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(getCounts()[0]);
|
||||
for ( int i = 1; i < getCounts().length; i++ ) {
|
||||
sb.append("/");
|
||||
sb.append(getCounts()[i]);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 10/5/12
|
||||
* Time: 2:53 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/ // This class represents a column in the Exact AC calculation matrix
|
||||
public final class ExactACset {
|
||||
// the counts of the various alternate alleles which this column represents
|
||||
private final ExactACcounts ACcounts;
|
||||
|
||||
// the column of the matrix
|
||||
private final double[] log10Likelihoods;
|
||||
|
||||
int sum = -1;
|
||||
|
||||
public ExactACset(final int size, final ExactACcounts ACcounts) {
|
||||
this.ACcounts = ACcounts;
|
||||
log10Likelihoods = new double[size];
|
||||
Arrays.fill(getLog10Likelihoods(), Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
// sum of all the non-reference alleles
|
||||
public int getACsum() {
|
||||
if ( sum == -1 ) {
|
||||
sum = 0;
|
||||
for ( int count : getACcounts().getCounts() )
|
||||
sum += count;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof ExactACset) && getACcounts().equals(((ExactACset)obj).getACcounts());
|
||||
}
|
||||
|
||||
public ExactACcounts getACcounts() {
|
||||
return ACcounts;
|
||||
}
|
||||
|
||||
public double[] getLog10Likelihoods() {
|
||||
return log10Likelihoods;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Uses the Exact calculation of Heng Li
|
||||
*/
|
||||
abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
|
||||
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
|
||||
|
||||
protected ExactAFCalculation(final UnifiedArgumentCollection UAC, final int nSamples, final Logger logger, final PrintStream verboseWriter) {
|
||||
super(UAC, nSamples, logger, verboseWriter);
|
||||
}
|
||||
|
||||
protected ExactAFCalculation(final int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, File exactCallsLog, Logger logger, PrintStream verboseWriter) {
|
||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, exactCallsLog, logger, verboseWriter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper class that compares two likelihoods associated with two alleles
|
||||
*/
|
||||
protected static final class LikelihoodSum implements Comparable<LikelihoodSum> {
|
||||
public double sum = 0.0;
|
||||
public Allele allele;
|
||||
|
||||
public LikelihoodSum(Allele allele) { this.allele = allele; }
|
||||
|
||||
public int compareTo(LikelihoodSum other) {
|
||||
final double diff = sum - other.sum;
|
||||
return ( diff < 0.0 ) ? 1 : (diff > 0.0 ) ? -1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unpack GenotypesContext into arraylist of doubel values
|
||||
* @param GLs Input genotype context
|
||||
* @return ArrayList of doubles corresponding to GL vectors
|
||||
*/
|
||||
protected static ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
||||
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(GLs.size());
|
||||
|
||||
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
|
||||
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
|
||||
if ( sample.hasLikelihoods() ) {
|
||||
double[] gls = sample.getLikelihoods().getAsVector();
|
||||
|
||||
if ( MathUtils.sum(gls) < VariantContextUtils.SUM_GL_THRESH_NOCALL )
|
||||
genotypeLikelihoods.add(gls);
|
||||
}
|
||||
}
|
||||
|
||||
return genotypeLikelihoods;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -14,7 +15,7 @@ public class ReferenceDiploidExactAFCalculation extends DiploidExactAFCalculatio
|
|||
super(UAC, N, logger, verboseWriter);
|
||||
}
|
||||
|
||||
protected MaxLikelihoodSeen makeMaxLikelihood(final VariantContext vc, final AlleleFrequencyCalculationResult result) {
|
||||
return new ExactAFCalculation.MaxLikelihoodSeen();
|
||||
protected StateTracker makeMaxLikelihood(final VariantContext vc, final AlleleFrequencyCalculationResult result) {
|
||||
return new StateTracker();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,96 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
/**
|
||||
* Keeps track of the best state seen by the exact model and the max states to visit
|
||||
* allowing us to abort the search before we visit the entire matrix of AC x samples
|
||||
*/
|
||||
final class StateTracker {
|
||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||
|
||||
final private int[] maxACsToConsider;
|
||||
|
||||
private ExactACcounts ACsAtMax = null;
|
||||
private double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
|
||||
public StateTracker() {
|
||||
this(null);
|
||||
}
|
||||
|
||||
public StateTracker(final int[] maxACsToConsider) {
|
||||
this.maxACsToConsider = maxACsToConsider;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the maximum log10L seen, if log10LofKs is higher
|
||||
*
|
||||
* @param log10LofKs the likelihood of our current configuration state
|
||||
*/
|
||||
public void update(final double log10LofKs, final ExactACcounts ACs) {
|
||||
if ( log10LofKs > getMaxLog10L()) {
|
||||
this.setMaxLog10L(log10LofKs);
|
||||
this.ACsAtMax = ACs;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the likelihood of configuration K too low to consider, related to the
|
||||
* maximum likelihood seen already?
|
||||
*
|
||||
* @param log10LofK the log10 likelihood of the configuration we're considering analyzing
|
||||
* @return true if the configuration cannot meaningfully contribute to our likelihood sum
|
||||
*/
|
||||
public boolean tooLowLikelihood(final double log10LofK) {
|
||||
return log10LofK < getMaxLog10L() - MAX_LOG10_ERROR_TO_STOP_EARLY;
|
||||
}
|
||||
|
||||
/**
|
||||
* Are all ACs in otherACs less than or equal to their corresponding ACs in the maxACsToConsider?
|
||||
*
|
||||
* @param otherACs the set of otherACs that we want to know if we should consider analyzing
|
||||
* @return true if otherACs is a state worth considering, or false otherwise
|
||||
*/
|
||||
public boolean withinMaxACs(final ExactACcounts otherACs) {
|
||||
if ( maxACsToConsider == null )
|
||||
return true;
|
||||
|
||||
final int[] otherACcounts = otherACs.getCounts();
|
||||
|
||||
for ( int i = 0; i < maxACsToConsider.length; i++ ) {
|
||||
// consider one more than the max AC to collect a bit more likelihood mass
|
||||
if ( otherACcounts[i] > maxACsToConsider[i] + 1 )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
|
||||
*/
|
||||
public boolean isLowerAC(final ExactACcounts otherACs) {
|
||||
if ( ACsAtMax == null )
|
||||
return true;
|
||||
|
||||
final int[] myACcounts = this.ACsAtMax.getCounts();
|
||||
final int[] otherACcounts = otherACs.getCounts();
|
||||
|
||||
for ( int i = 0; i < myACcounts.length; i++ ) {
|
||||
if ( myACcounts[i] > otherACcounts[i] )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean abort( final double log10LofK, final ExactACcounts ACs ) {
|
||||
return tooLowLikelihood(log10LofK) && isLowerAC(ACs);
|
||||
}
|
||||
|
||||
public double getMaxLog10L() {
|
||||
return maxLog10L;
|
||||
}
|
||||
|
||||
public void setMaxLog10L(double maxLog10L) {
|
||||
this.maxLog10L = maxLog10L;
|
||||
}
|
||||
}
|
||||
|
|
@ -23,9 +23,9 @@
|
|||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.AlleleFrequencyCalculationResult;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidExactAFCalculation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.ReferenceDiploidExactAFCalculation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AlleleFrequencyCalculationResult;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.DiploidExactAFCalculation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ReferenceDiploidExactAFCalculation;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.TreeSet;
|
||||
|
|
|
|||
Loading…
Reference in New Issue