Reorganize and cleanup AFCalculations
-- Now contained in a package called afcalc -- Extracted standard alone classes from private static classes in ExactAF -- Most fields are now private, with accessors -- Overall cleaner organization now
This commit is contained in:
parent
13211231c7
commit
cf3f9d6ee8
|
|
@ -26,6 +26,8 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import net.sf.samtools.SAMUtils;
|
import net.sf.samtools.SAMUtils;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACcounts;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
|
@ -123,7 +125,7 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
protected static class SumIterator {
|
public static class SumIterator {
|
||||||
private int[] currentState;
|
private int[] currentState;
|
||||||
private final int[] finalState;
|
private final int[] finalState;
|
||||||
private final int restrictSumTo;
|
private final int restrictSumTo;
|
||||||
|
|
@ -491,32 +493,32 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
||||||
// If neighbors fall below maximum - threshold, we don't queue up THEIR own neighbors
|
// If neighbors fall below maximum - threshold, we don't queue up THEIR own neighbors
|
||||||
// and we repeat until queue is empty
|
// and we repeat until queue is empty
|
||||||
// queue of AC conformations to process
|
// queue of AC conformations to process
|
||||||
final LinkedList<ExactAFCalculation.ExactACset> ACqueue = new LinkedList<ExactAFCalculation.ExactACset>();
|
final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>();
|
||||||
// mapping of ExactACset indexes to the objects
|
// mapping of ExactACset indexes to the objects
|
||||||
final HashMap<ExactAFCalculation.ExactACcounts, ExactAFCalculation.ExactACset> indexesToACset = new HashMap<ExactAFCalculation.ExactACcounts, ExactAFCalculation.ExactACset>(likelihoodDim);
|
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>(likelihoodDim);
|
||||||
// add AC=0 to the queue
|
// add AC=0 to the queue
|
||||||
final int[] zeroCounts = new int[nAlleles];
|
final int[] zeroCounts = new int[nAlleles];
|
||||||
zeroCounts[0] = numChromosomes;
|
zeroCounts[0] = numChromosomes;
|
||||||
|
|
||||||
ExactAFCalculation.ExactACset zeroSet =
|
ExactACset zeroSet =
|
||||||
new ExactAFCalculation.ExactACset(1, new ExactAFCalculation.ExactACcounts(zeroCounts));
|
new ExactACset(1, new ExactACcounts(zeroCounts));
|
||||||
|
|
||||||
ACqueue.add(zeroSet);
|
ACqueue.add(zeroSet);
|
||||||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
indexesToACset.put(zeroSet.getACcounts(), zeroSet);
|
||||||
|
|
||||||
// keep processing while we have AC conformations that need to be calculated
|
// keep processing while we have AC conformations that need to be calculated
|
||||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||||
while ( !ACqueue.isEmpty() ) {
|
while ( !ACqueue.isEmpty() ) {
|
||||||
// compute log10Likelihoods
|
// compute log10Likelihoods
|
||||||
final ExactAFCalculation.ExactACset ACset = ACqueue.remove();
|
final ExactACset ACset = ACqueue.remove();
|
||||||
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, errorModel, alleleList, numObservations, maxLog10L, ACqueue, indexesToACset, pileup);
|
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, errorModel, alleleList, numObservations, maxLog10L, ACqueue, indexesToACset, pileup);
|
||||||
|
|
||||||
// adjust max likelihood seen if needed
|
// adjust max likelihood seen if needed
|
||||||
maxLog10L = Math.max(maxLog10L, log10LofKs);
|
maxLog10L = Math.max(maxLog10L, log10LofKs);
|
||||||
// clean up memory
|
// clean up memory
|
||||||
indexesToACset.remove(ACset.ACcounts);
|
indexesToACset.remove(ACset.getACcounts());
|
||||||
if ( VERBOSE )
|
if ( VERBOSE )
|
||||||
System.out.printf(" *** removing used set=%s%n", ACset.ACcounts);
|
System.out.printf(" *** removing used set=%s%n", ACset.getACcounts());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -525,13 +527,13 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
||||||
int plIdx = 0;
|
int plIdx = 0;
|
||||||
SumIterator iterator = new SumIterator(nAlleles, numChromosomes);
|
SumIterator iterator = new SumIterator(nAlleles, numChromosomes);
|
||||||
while (iterator.hasNext()) {
|
while (iterator.hasNext()) {
|
||||||
ExactAFCalculation.ExactACset ACset =
|
ExactACset ACset =
|
||||||
new ExactAFCalculation.ExactACset(1, new ExactAFCalculation.ExactACcounts(iterator.getCurrentVector()));
|
new ExactACset(1, new ExactACcounts(iterator.getCurrentVector()));
|
||||||
// for observed base X, add Q(jX,k) to likelihood vector for all k in error model
|
// for observed base X, add Q(jX,k) to likelihood vector for all k in error model
|
||||||
//likelihood(jA,jC,jG,jT) = logsum(logPr (errorModel[k],nA*Q(jA,k) + nC*Q(jC,k) + nG*Q(jG,k) + nT*Q(jT,k))
|
//likelihood(jA,jC,jG,jT) = logsum(logPr (errorModel[k],nA*Q(jA,k) + nC*Q(jC,k) + nG*Q(jG,k) + nT*Q(jT,k))
|
||||||
getLikelihoodOfConformation(ACset, errorModel, alleleList, numObservations, pileup);
|
getLikelihoodOfConformation(ACset, errorModel, alleleList, numObservations, pileup);
|
||||||
|
|
||||||
setLogPLs(plIdx++, ACset.log10Likelihoods[0]);
|
setLogPLs(plIdx++, ACset.getLog10Likelihoods()[0]);
|
||||||
iterator.next();
|
iterator.next();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -540,40 +542,40 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private double calculateACConformationAndUpdateQueue(final ExactAFCalculation.ExactACset set,
|
private double calculateACConformationAndUpdateQueue(final ExactACset set,
|
||||||
final ErrorModel errorModel,
|
final ErrorModel errorModel,
|
||||||
final List<Allele> alleleList,
|
final List<Allele> alleleList,
|
||||||
final List<Integer> numObservations,
|
final List<Integer> numObservations,
|
||||||
final double maxLog10L,
|
final double maxLog10L,
|
||||||
final LinkedList<ExactAFCalculation.ExactACset> ACqueue,
|
final LinkedList<ExactACset> ACqueue,
|
||||||
final HashMap<ExactAFCalculation.ExactACcounts,
|
final HashMap<ExactACcounts,
|
||||||
ExactAFCalculation.ExactACset> indexesToACset,
|
ExactACset> indexesToACset,
|
||||||
final ReadBackedPileup pileup) {
|
final ReadBackedPileup pileup) {
|
||||||
// compute likelihood of set
|
// compute likelihood of set
|
||||||
getLikelihoodOfConformation(set, errorModel, alleleList, numObservations, pileup);
|
getLikelihoodOfConformation(set, errorModel, alleleList, numObservations, pileup);
|
||||||
final double log10LofK = set.log10Likelihoods[0];
|
final double log10LofK = set.getLog10Likelihoods()[0];
|
||||||
|
|
||||||
// log result in PL vector
|
// log result in PL vector
|
||||||
int idx = getLinearIndex(set.ACcounts.getCounts(), nAlleles, numChromosomes);
|
int idx = getLinearIndex(set.getACcounts().getCounts(), nAlleles, numChromosomes);
|
||||||
setLogPLs(idx, log10LofK);
|
setLogPLs(idx, log10LofK);
|
||||||
|
|
||||||
// can we abort early because the log10Likelihoods are so small?
|
// can we abort early because the log10Likelihoods are so small?
|
||||||
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||||
if ( VERBOSE )
|
if ( VERBOSE )
|
||||||
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.getACcounts(), log10LofK, maxLog10L);
|
||||||
return log10LofK;
|
return log10LofK;
|
||||||
}
|
}
|
||||||
|
|
||||||
// iterate over higher frequencies if possible
|
// iterate over higher frequencies if possible
|
||||||
// by convention, ACcounts contained in set have full vector of possible pool ac counts including ref count.
|
// by convention, ACcounts contained in set have full vector of possible pool ac counts including ref count.
|
||||||
final int ACwiggle = numChromosomes - set.getACsum() + set.ACcounts.counts[0];
|
final int ACwiggle = numChromosomes - set.getACsum() + set.getACcounts().getCounts()[0];
|
||||||
if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
|
if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
|
||||||
return log10LofK;
|
return log10LofK;
|
||||||
|
|
||||||
|
|
||||||
// add conformations for other cases
|
// add conformations for other cases
|
||||||
for ( int allele = 1; allele < nAlleles; allele++ ) {
|
for ( int allele = 1; allele < nAlleles; allele++ ) {
|
||||||
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
|
final int[] ACcountsClone = set.getACcounts().getCounts().clone();
|
||||||
ACcountsClone[allele]++;
|
ACcountsClone[allele]++;
|
||||||
// is this a valid conformation?
|
// is this a valid conformation?
|
||||||
int altSum = (int)MathUtils.sum(ACcountsClone) - ACcountsClone[0];
|
int altSum = (int)MathUtils.sum(ACcountsClone) - ACcountsClone[0];
|
||||||
|
|
@ -597,7 +599,7 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
||||||
* @param numObservations Number of observations for each allele
|
* @param numObservations Number of observations for each allele
|
||||||
* @param pileup Read backed pileup in case it's necessary
|
* @param pileup Read backed pileup in case it's necessary
|
||||||
*/
|
*/
|
||||||
public abstract void getLikelihoodOfConformation(final ExactAFCalculation.ExactACset ACset,
|
public abstract void getLikelihoodOfConformation(final ExactACset ACset,
|
||||||
final ErrorModel errorModel,
|
final ErrorModel errorModel,
|
||||||
final List<Allele> alleleList,
|
final List<Allele> alleleList,
|
||||||
final List<Integer> numObservations,
|
final List<Integer> numObservations,
|
||||||
|
|
@ -608,12 +610,12 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
||||||
|
|
||||||
// Static methods
|
// Static methods
|
||||||
public static void updateACset(final int[] newSetCounts,
|
public static void updateACset(final int[] newSetCounts,
|
||||||
final LinkedList<ExactAFCalculation.ExactACset> ACqueue,
|
final LinkedList<ExactACset> ACqueue,
|
||||||
final HashMap<ExactAFCalculation.ExactACcounts, ExactAFCalculation.ExactACset> indexesToACset) {
|
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
|
||||||
|
|
||||||
final ExactAFCalculation.ExactACcounts index = new ExactAFCalculation.ExactACcounts(newSetCounts);
|
final ExactACcounts index = new ExactACcounts(newSetCounts);
|
||||||
if ( !indexesToACset.containsKey(index) ) {
|
if ( !indexesToACset.containsKey(index) ) {
|
||||||
ExactAFCalculation.ExactACset newSet = new ExactAFCalculation.ExactACset(1, index);
|
ExactACset newSet = new ExactACset(1, index);
|
||||||
indexesToACset.put(index, newSet);
|
indexesToACset.put(index, newSet);
|
||||||
ACqueue.add(newSet);
|
ACqueue.add(newSet);
|
||||||
if (VERBOSE)
|
if (VERBOSE)
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset;
|
||||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||||
import org.broadinstitute.sting.utils.Haplotype;
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
|
@ -188,12 +189,12 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
|
||||||
* @param alleleList List of alleles
|
* @param alleleList List of alleles
|
||||||
* @param numObservations Number of observations for each allele in alleleList
|
* @param numObservations Number of observations for each allele in alleleList
|
||||||
*/
|
*/
|
||||||
public void getLikelihoodOfConformation(final ExactAFCalculation.ExactACset ACset,
|
public void getLikelihoodOfConformation(final ExactACset ACset,
|
||||||
final ErrorModel errorModel,
|
final ErrorModel errorModel,
|
||||||
final List<Allele> alleleList,
|
final List<Allele> alleleList,
|
||||||
final List<Integer> numObservations,
|
final List<Integer> numObservations,
|
||||||
final ReadBackedPileup pileup) {
|
final ReadBackedPileup pileup) {
|
||||||
final int[] currentCnt = Arrays.copyOf(ACset.ACcounts.counts, alleleList.size());
|
final int[] currentCnt = Arrays.copyOf(ACset.getACcounts().getCounts(), alleleList.size());
|
||||||
double p1 = 0.0;
|
double p1 = 0.0;
|
||||||
|
|
||||||
if (!hasReferenceSampleData) {
|
if (!hasReferenceSampleData) {
|
||||||
|
|
@ -218,6 +219,6 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
|
||||||
}
|
}
|
||||||
p1 = MathUtils.logDotProduct(errorModel.getErrorModelVector().getProbabilityVector(minQ, maxQ), acVec);
|
p1 = MathUtils.logDotProduct(errorModel.getErrorModelVector().getProbabilityVector(minQ, maxQ), acVec);
|
||||||
}
|
}
|
||||||
ACset.log10Likelihoods[0] = p1;
|
ACset.getLog10Likelihoods()[0] = p1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMUtils;
|
import net.sf.samtools.SAMUtils;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
|
|
@ -221,12 +222,12 @@ public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLi
|
||||||
* @param alleleList List of alleles
|
* @param alleleList List of alleles
|
||||||
* @param numObservations Number of observations for each allele in alleleList
|
* @param numObservations Number of observations for each allele in alleleList
|
||||||
*/
|
*/
|
||||||
public void getLikelihoodOfConformation(final ExactAFCalculation.ExactACset ACset,
|
public void getLikelihoodOfConformation(final ExactACset ACset,
|
||||||
final ErrorModel errorModel,
|
final ErrorModel errorModel,
|
||||||
final List<Allele> alleleList,
|
final List<Allele> alleleList,
|
||||||
final List<Integer> numObservations,
|
final List<Integer> numObservations,
|
||||||
final ReadBackedPileup pileup) {
|
final ReadBackedPileup pileup) {
|
||||||
final int[] currentCnt = Arrays.copyOf(ACset.ACcounts.counts, BaseUtils.BASES.length);
|
final int[] currentCnt = Arrays.copyOf(ACset.getACcounts().getCounts(), BaseUtils.BASES.length);
|
||||||
final int[] ac = new int[BaseUtils.BASES.length];
|
final int[] ac = new int[BaseUtils.BASES.length];
|
||||||
|
|
||||||
for (int k=0; k < BaseUtils.BASES.length; k++ )
|
for (int k=0; k < BaseUtils.BASES.length; k++ )
|
||||||
|
|
@ -241,9 +242,9 @@ public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLi
|
||||||
final byte qual = qualToUse(elt, true, true, mbq);
|
final byte qual = qualToUse(elt, true, true, mbq);
|
||||||
if ( qual == 0 )
|
if ( qual == 0 )
|
||||||
continue;
|
continue;
|
||||||
final double acc[] = new double[ACset.ACcounts.counts.length];
|
final double acc[] = new double[ACset.getACcounts().getCounts().length];
|
||||||
for (int k=0; k < acc.length; k++ )
|
for (int k=0; k < acc.length; k++ )
|
||||||
acc[k] = qualLikelihoodCache[BaseUtils.simpleBaseToBaseIndex(alleleList.get(k).getBases()[0])][BaseUtils.simpleBaseToBaseIndex(obsBase)][qual] +MathUtils.log10Cache[ACset.ACcounts.counts[k]]
|
acc[k] = qualLikelihoodCache[BaseUtils.simpleBaseToBaseIndex(alleleList.get(k).getBases()[0])][BaseUtils.simpleBaseToBaseIndex(obsBase)][qual] +MathUtils.log10Cache[ACset.getACcounts().getCounts()[k]]
|
||||||
- LOG10_PLOIDY;
|
- LOG10_PLOIDY;
|
||||||
p1 += MathUtils.log10sumLog10(acc);
|
p1 += MathUtils.log10sumLog10(acc);
|
||||||
}
|
}
|
||||||
|
|
@ -267,7 +268,7 @@ public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLi
|
||||||
|
|
||||||
p1 = MathUtils.logDotProduct(errorModel.getErrorModelVector().getProbabilityVector(minQ,maxQ), acVec);
|
p1 = MathUtils.logDotProduct(errorModel.getErrorModelVector().getProbabilityVector(minQ,maxQ), acVec);
|
||||||
}
|
}
|
||||||
ACset.log10Likelihoods[0] = p1;
|
ACset.getLog10Likelihoods()[0] = p1;
|
||||||
/* System.out.println(Arrays.toString(ACset.ACcounts.getCounts())+" "+String.valueOf(p1));
|
/* System.out.println(Arrays.toString(ACset.ACcounts.getCounts())+" "+String.valueOf(p1));
|
||||||
System.out.println(Arrays.toString(errorModel.getErrorModelVector().getProbabilityVector(minQ,maxQ)));
|
System.out.println(Arrays.toString(errorModel.getErrorModelVector().getProbabilityVector(minQ,maxQ)));
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
import org.apache.log4j.ConsoleAppender;
|
import org.apache.log4j.ConsoleAppender;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
@ -175,8 +175,8 @@ public class ExactAFCalculationPerformanceTest {
|
||||||
final boolean USE_GENERAL = false;
|
final boolean USE_GENERAL = false;
|
||||||
final List<ExactAFCalculationTestBuilder.ModelType> modelTypes = USE_GENERAL
|
final List<ExactAFCalculationTestBuilder.ModelType> modelTypes = USE_GENERAL
|
||||||
? Arrays.asList(ExactAFCalculationTestBuilder.ModelType.values())
|
? Arrays.asList(ExactAFCalculationTestBuilder.ModelType.values())
|
||||||
: Arrays.asList(ExactAFCalculationTestBuilder.ModelType.OptimizedDiploidExact);
|
: Arrays.asList(ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact);
|
||||||
// : Arrays.asList(ExactAFCalculationTestBuilder.ModelType.DiploidExact, ExactAFCalculationTestBuilder.ModelType.OptimizedDiploidExact);
|
// : Arrays.asList(ExactAFCalculationTestBuilder.ModelType.ReferenceDiploidExact, ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact);
|
||||||
|
|
||||||
final boolean ONLY_HUMAN_PRIORS = false;
|
final boolean ONLY_HUMAN_PRIORS = false;
|
||||||
final List<ExactAFCalculationTestBuilder.PriorType> priorTypes = ONLY_HUMAN_PRIORS
|
final List<ExactAFCalculationTestBuilder.PriorType> priorTypes = ONLY_HUMAN_PRIORS
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
import org.apache.commons.lang.ArrayUtils;
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
@ -32,8 +33,8 @@ public class ExactAFCalculationTestBuilder {
|
||||||
}
|
}
|
||||||
|
|
||||||
public enum ModelType {
|
public enum ModelType {
|
||||||
DiploidExact,
|
ReferenceDiploidExact,
|
||||||
OptimizedDiploidExact,
|
ConstrainedDiploidExact,
|
||||||
GeneralExact
|
GeneralExact
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -48,8 +49,8 @@ public class ExactAFCalculationTestBuilder {
|
||||||
|
|
||||||
public ExactAFCalculation makeModel() {
|
public ExactAFCalculation makeModel() {
|
||||||
switch (modelType) {
|
switch (modelType) {
|
||||||
case DiploidExact: return new ReferenceDiploidExactAFCalculation(nSamples, 4);
|
case ReferenceDiploidExact: return new ReferenceDiploidExactAFCalculation(nSamples, 4);
|
||||||
case OptimizedDiploidExact: return new ConstrainedDiploidExactAFCalculation(nSamples, 4);
|
case ConstrainedDiploidExact: return new ConstrainedDiploidExactAFCalculation(nSamples, 4);
|
||||||
case GeneralExact: return new GeneralPloidyExactAFCalculation(nSamples, 4, 2);
|
case GeneralExact: return new GeneralPloidyExactAFCalculation(nSamples, 4, 2);
|
||||||
default: throw new RuntimeException("Unexpected type " + modelType);
|
default: throw new RuntimeException("Unexpected type " + modelType);
|
||||||
}
|
}
|
||||||
|
|
@ -63,7 +64,7 @@ public class ExactAFCalculationTestBuilder {
|
||||||
return MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors
|
return MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors
|
||||||
case human:
|
case human:
|
||||||
final double[] humanPriors = new double[nPriorValues];
|
final double[] humanPriors = new double[nPriorValues];
|
||||||
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues-1, humanPriors, 0.001);
|
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001);
|
||||||
return humanPriors;
|
return humanPriors;
|
||||||
default:
|
default:
|
||||||
throw new RuntimeException("Unexpected type " + priorType);
|
throw new RuntimeException("Unexpected type " + priorType);
|
||||||
|
|
@ -23,9 +23,12 @@
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.ProbabilityVector;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
@ -100,8 +103,8 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
||||||
|
|
||||||
public void add(ExactACset set) {
|
public void add(ExactACset set) {
|
||||||
alleleCountSetList.add(set);
|
alleleCountSetList.add(set);
|
||||||
conformationMap.put(set.ACcounts, set);
|
conformationMap.put(set.getACcounts(), set);
|
||||||
final double likelihood = set.log10Likelihoods[0];
|
final double likelihood = set.getLog10Likelihoods()[0];
|
||||||
|
|
||||||
if (likelihood > maxLikelihood )
|
if (likelihood > maxLikelihood )
|
||||||
maxLikelihood = likelihood;
|
maxLikelihood = likelihood;
|
||||||
|
|
@ -114,11 +117,11 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
||||||
}
|
}
|
||||||
|
|
||||||
public double getLikelihoodOfConformation(int[] ac) {
|
public double getLikelihoodOfConformation(int[] ac) {
|
||||||
return conformationMap.get(new ExactACcounts(ac)).log10Likelihoods[0];
|
return conformationMap.get(new ExactACcounts(ac)).getLog10Likelihoods()[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
public double getGLOfACZero() {
|
public double getGLOfACZero() {
|
||||||
return alleleCountSetList.get(0).log10Likelihoods[0]; // AC 0 is always at beginning of list
|
return alleleCountSetList.get(0).getLog10Likelihoods()[0]; // AC 0 is always at beginning of list
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getLength() {
|
public int getLength() {
|
||||||
|
|
@ -196,7 +199,7 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
||||||
// first element: zero ploidy, e.g. trivial degenerate distribution
|
// first element: zero ploidy, e.g. trivial degenerate distribution
|
||||||
final int[] zeroCounts = new int[numAlleles];
|
final int[] zeroCounts = new int[numAlleles];
|
||||||
final ExactACset set = new ExactACset(1, new ExactACcounts(zeroCounts));
|
final ExactACset set = new ExactACset(1, new ExactACcounts(zeroCounts));
|
||||||
set.log10Likelihoods[0] = 0.0;
|
set.getLog10Likelihoods()[0] = 0.0;
|
||||||
|
|
||||||
combinedPoolLikelihoods.add(set);
|
combinedPoolLikelihoods.add(set);
|
||||||
for (int p=1; p<genotypeLikelihoods.size(); p++) {
|
for (int p=1; p<genotypeLikelihoods.size(); p++) {
|
||||||
|
|
@ -227,24 +230,24 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
||||||
ExactACset zeroSet = new ExactACset(1, new ExactACcounts(zeroCounts));
|
ExactACset zeroSet = new ExactACset(1, new ExactACcounts(zeroCounts));
|
||||||
|
|
||||||
ACqueue.add(zeroSet);
|
ACqueue.add(zeroSet);
|
||||||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
indexesToACset.put(zeroSet.getACcounts(), zeroSet);
|
||||||
|
|
||||||
// keep processing while we have AC conformations that need to be calculated
|
// keep processing while we have AC conformations that need to be calculated
|
||||||
MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen();
|
StateTracker stateTracker = new StateTracker();
|
||||||
while ( !ACqueue.isEmpty() ) {
|
while ( !ACqueue.isEmpty() ) {
|
||||||
result.incNEvaluations();
|
result.incNEvaluations();
|
||||||
// compute log10Likelihoods
|
// compute log10Likelihoods
|
||||||
final ExactACset ACset = ACqueue.remove();
|
final ExactACset ACset = ACqueue.remove();
|
||||||
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLikelihoodSeen, ACqueue, indexesToACset);
|
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, stateTracker, ACqueue, indexesToACset);
|
||||||
|
|
||||||
// adjust max likelihood seen if needed
|
// adjust max likelihood seen if needed
|
||||||
if ( log10LofKs > maxLikelihoodSeen.maxLog10L )
|
if ( log10LofKs > stateTracker.getMaxLog10L())
|
||||||
maxLikelihoodSeen.update(log10LofKs, ACset.ACcounts);
|
stateTracker.update(log10LofKs, ACset.getACcounts());
|
||||||
|
|
||||||
// clean up memory
|
// clean up memory
|
||||||
indexesToACset.remove(ACset.ACcounts);
|
indexesToACset.remove(ACset.getACcounts());
|
||||||
if ( VERBOSE )
|
if ( VERBOSE )
|
||||||
System.out.printf(" *** removing used set=%s%n", ACset.ACcounts);
|
System.out.printf(" *** removing used set=%s%n", ACset.getACcounts());
|
||||||
|
|
||||||
}
|
}
|
||||||
return newPool;
|
return newPool;
|
||||||
|
|
@ -261,7 +264,7 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
||||||
* @param originalPloidy Total ploidy of original combined pool
|
* @param originalPloidy Total ploidy of original combined pool
|
||||||
* @param newGLPloidy Ploidy of GL vector
|
* @param newGLPloidy Ploidy of GL vector
|
||||||
* @param result AFResult object
|
* @param result AFResult object
|
||||||
* @param maxLikelihoodSeen max likelihood observed so far
|
* @param stateTracker max likelihood observed so far
|
||||||
* @param ACqueue Queue of conformations to compute
|
* @param ACqueue Queue of conformations to compute
|
||||||
* @param indexesToACset AC indices of objects in queue
|
* @param indexesToACset AC indices of objects in queue
|
||||||
* @return max log likelihood
|
* @return max log likelihood
|
||||||
|
|
@ -274,12 +277,12 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
||||||
final int originalPloidy,
|
final int originalPloidy,
|
||||||
final int newGLPloidy,
|
final int newGLPloidy,
|
||||||
final AlleleFrequencyCalculationResult result,
|
final AlleleFrequencyCalculationResult result,
|
||||||
final MaxLikelihoodSeen maxLikelihoodSeen,
|
final StateTracker stateTracker,
|
||||||
final LinkedList<ExactACset> ACqueue,
|
final LinkedList<ExactACset> ACqueue,
|
||||||
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
|
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
|
||||||
|
|
||||||
// compute likeihood in "set" of new set based on original likelihoods
|
// compute likeihood in "set" of new set based on original likelihoods
|
||||||
final int numAlleles = set.ACcounts.counts.length;
|
final int numAlleles = set.getACcounts().getCounts().length;
|
||||||
final int newPloidy = set.getACsum();
|
final int newPloidy = set.getACsum();
|
||||||
final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy, result);
|
final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy, result);
|
||||||
|
|
||||||
|
|
@ -289,24 +292,24 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
||||||
newPool.add(set);
|
newPool.add(set);
|
||||||
|
|
||||||
// TODO -- uncomment this correct line when the implementation of this model is optimized (it's too slow now to handle this fix)
|
// TODO -- uncomment this correct line when the implementation of this model is optimized (it's too slow now to handle this fix)
|
||||||
//if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) {
|
//if ( log10LofK < stateTracker.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && stateTracker.isLowerAC(set.ACcounts) ) {
|
||||||
if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
if ( log10LofK < stateTracker.getMaxLog10L() - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||||
if ( VERBOSE )
|
if ( VERBOSE )
|
||||||
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLikelihoodSeen.maxLog10L);
|
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.getACcounts(), log10LofK, stateTracker.getMaxLog10L());
|
||||||
return log10LofK;
|
return log10LofK;
|
||||||
}
|
}
|
||||||
|
|
||||||
// iterate over higher frequencies if possible
|
// iterate over higher frequencies if possible
|
||||||
// by convention, ACcounts contained in set have full vector of possible pool ac counts including ref count.
|
// by convention, ACcounts contained in set have full vector of possible pool ac counts including ref count.
|
||||||
// so, if first element is zero, it automatically means we have no wiggle since we're in a corner of the conformation space
|
// so, if first element is zero, it automatically means we have no wiggle since we're in a corner of the conformation space
|
||||||
final int ACwiggle = set.ACcounts.counts[0];
|
final int ACwiggle = set.getACcounts().getCounts()[0];
|
||||||
if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
|
if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
|
||||||
return log10LofK;
|
return log10LofK;
|
||||||
|
|
||||||
|
|
||||||
// add conformations for other cases
|
// add conformations for other cases
|
||||||
for ( int allele = 1; allele < numAlleles; allele++ ) {
|
for ( int allele = 1; allele < numAlleles; allele++ ) {
|
||||||
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
|
final int[] ACcountsClone = set.getACcounts().getCounts().clone();
|
||||||
ACcountsClone[allele]++;
|
ACcountsClone[allele]++;
|
||||||
// is this a valid conformation?
|
// is this a valid conformation?
|
||||||
int altSum = (int)MathUtils.sum(ACcountsClone) - ACcountsClone[0];
|
int altSum = (int)MathUtils.sum(ACcountsClone) - ACcountsClone[0];
|
||||||
|
|
@ -411,14 +414,14 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
||||||
if (newPloidy != totalAltK)
|
if (newPloidy != totalAltK)
|
||||||
throw new ReviewedStingException("BUG: inconsistent sizes of set.getACsum and passed ploidy values");
|
throw new ReviewedStingException("BUG: inconsistent sizes of set.getACsum and passed ploidy values");
|
||||||
|
|
||||||
totalAltK -= set.ACcounts.counts[0];
|
totalAltK -= set.getACcounts().getCounts()[0];
|
||||||
// totalAltK has sum of alt alleles of conformation now
|
// totalAltK has sum of alt alleles of conformation now
|
||||||
|
|
||||||
|
|
||||||
// special case for k = 0 over all k
|
// special case for k = 0 over all k
|
||||||
if ( totalAltK == 0 ) { // all-ref case
|
if ( totalAltK == 0 ) { // all-ref case
|
||||||
final double log10Lof0 = firstGLs.getGLOfACZero() + secondGL[HOM_REF_INDEX];
|
final double log10Lof0 = firstGLs.getGLOfACZero() + secondGL[HOM_REF_INDEX];
|
||||||
set.log10Likelihoods[0] = log10Lof0;
|
set.getLog10Likelihoods()[0] = log10Lof0;
|
||||||
|
|
||||||
result.setLog10LikelihoodOfAFzero(log10Lof0);
|
result.setLog10LikelihoodOfAFzero(log10Lof0);
|
||||||
result.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
|
result.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
|
||||||
|
|
@ -430,12 +433,12 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
||||||
// ExactACset holds by convention the conformation of all alleles, and the sum of all allele count is just the ploidy.
|
// ExactACset holds by convention the conformation of all alleles, and the sum of all allele count is just the ploidy.
|
||||||
// To compute n!/k1!k2!k3!... we need to compute first n!/(k2!k3!...) and then further divide by k1! where k1=ploidy-sum_k_i
|
// To compute n!/k1!k2!k3!... we need to compute first n!/(k2!k3!...) and then further divide by k1! where k1=ploidy-sum_k_i
|
||||||
|
|
||||||
int[] currentCount = set.ACcounts.getCounts();
|
int[] currentCount = set.getACcounts().getCounts();
|
||||||
double denom = -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount);
|
double denom = -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount);
|
||||||
|
|
||||||
// for current conformation, get all possible ways to break vector K into two components G1 and G2
|
// for current conformation, get all possible ways to break vector K into two components G1 and G2
|
||||||
final GeneralPloidyGenotypeLikelihoods.SumIterator innerIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
|
final GeneralPloidyGenotypeLikelihoods.SumIterator innerIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
|
||||||
set.log10Likelihoods[0] = Double.NEGATIVE_INFINITY;
|
set.getLog10Likelihoods()[0] = Double.NEGATIVE_INFINITY;
|
||||||
while (innerIterator.hasNext()) {
|
while (innerIterator.hasNext()) {
|
||||||
// check if breaking current conformation into g1 and g2 is feasible.
|
// check if breaking current conformation into g1 and g2 is feasible.
|
||||||
final int[] acCount2 = innerIterator.getCurrentVector();
|
final int[] acCount2 = innerIterator.getCurrentVector();
|
||||||
|
|
@ -451,19 +454,19 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
||||||
final double num2 = MathUtils.log10MultinomialCoefficient(ploidy2, acCount2);
|
final double num2 = MathUtils.log10MultinomialCoefficient(ploidy2, acCount2);
|
||||||
final double sum = firstGL + gl2 + num1 + num2;
|
final double sum = firstGL + gl2 + num1 + num2;
|
||||||
|
|
||||||
set.log10Likelihoods[0] = MathUtils.approximateLog10SumLog10(set.log10Likelihoods[0], sum);
|
set.getLog10Likelihoods()[0] = MathUtils.approximateLog10SumLog10(set.getLog10Likelihoods()[0], sum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
innerIterator.next();
|
innerIterator.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
set.log10Likelihoods[0] += denom;
|
set.getLog10Likelihoods()[0] += denom;
|
||||||
}
|
}
|
||||||
|
|
||||||
double log10LofK = set.log10Likelihoods[0];
|
double log10LofK = set.getLog10Likelihoods()[0];
|
||||||
|
|
||||||
// update the MLE if necessary
|
// update the MLE if necessary
|
||||||
final int altCounts[] = Arrays.copyOfRange(set.ACcounts.counts,1, set.ACcounts.counts.length);
|
final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length);
|
||||||
result.updateMLEifNeeded(log10LofK, altCounts);
|
result.updateMLEifNeeded(log10LofK, altCounts);
|
||||||
|
|
||||||
// apply the priors over each alternate allele
|
// apply the priors over each alternate allele
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
@ -128,7 +129,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
||||||
final int nPriorValues = 2*nSamples+1;
|
final int nPriorValues = 2*nSamples+1;
|
||||||
final double[] flatPriors = MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors
|
final double[] flatPriors = MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors
|
||||||
final double[] humanPriors = new double[nPriorValues];
|
final double[] humanPriors = new double[nPriorValues];
|
||||||
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues-1, humanPriors, 0.001);
|
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001);
|
||||||
|
|
||||||
for ( final double[] priors : Arrays.asList(flatPriors, humanPriors) ) { // , humanPriors) ) {
|
for ( final double[] priors : Arrays.asList(flatPriors, humanPriors) ) { // , humanPriors) ) {
|
||||||
for ( ExactAFCalculation model : Arrays.asList(diploidCalc, generalCalc, optDiploidCalc) ) {
|
for ( ExactAFCalculation model : Arrays.asList(diploidCalc, generalCalc, optDiploidCalc) ) {
|
||||||
|
|
@ -375,7 +376,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
||||||
List<Object[]> tests = new ArrayList<Object[]>();
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
final int nSamples = 10;
|
final int nSamples = 10;
|
||||||
final ExactAFCalculationTestBuilder.ModelType modelType = ExactAFCalculationTestBuilder.ModelType.DiploidExact;
|
final ExactAFCalculationTestBuilder.ModelType modelType = ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact;
|
||||||
|
|
||||||
for (int nNonInformative = 0; nNonInformative < nSamples - 1; nNonInformative++ ) {
|
for (int nNonInformative = 0; nNonInformative < nSamples - 1; nNonInformative++ ) {
|
||||||
final int nChrom = (nSamples - nNonInformative) * 2;
|
final int nChrom = (nSamples - nNonInformative) * 2;
|
||||||
|
|
@ -400,7 +401,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
||||||
ExactAFCalculationTestBuilder.PriorType.human);
|
ExactAFCalculationTestBuilder.PriorType.human);
|
||||||
|
|
||||||
final VariantContext vc = testBuilder.makeACTest(requestedACs, nNonInformative, 100);
|
final VariantContext vc = testBuilder.makeACTest(requestedACs, nNonInformative, 100);
|
||||||
final int[] maxACsToVisit = testBuilder.makeModel().computeMaxACs(vc);
|
final int[] maxACsToVisit = ((ConstrainedDiploidExactAFCalculation)testBuilder.makeModel()).computeMaxACs(vc);
|
||||||
|
|
||||||
testExpectedACs(vc, maxACsToVisit);
|
testExpectedACs(vc, maxACsToVisit);
|
||||||
}
|
}
|
||||||
|
|
@ -461,11 +462,11 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
||||||
private void testMakeACByGenotype(final VariantContext vcRoot, final Genotype g) {
|
private void testMakeACByGenotype(final VariantContext vcRoot, final Genotype g) {
|
||||||
final VariantContext vc = new VariantContextBuilder(vcRoot).genotypes(g).make();
|
final VariantContext vc = new VariantContextBuilder(vcRoot).genotypes(g).make();
|
||||||
|
|
||||||
final ExactAFCalculationTestBuilder.ModelType modelType = ExactAFCalculationTestBuilder.ModelType.DiploidExact;
|
final ExactAFCalculationTestBuilder.ModelType modelType = ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact;
|
||||||
final ExactAFCalculationTestBuilder testBuilder
|
final ExactAFCalculationTestBuilder testBuilder
|
||||||
= new ExactAFCalculationTestBuilder(1, vc.getNAlleles()-1, modelType,
|
= new ExactAFCalculationTestBuilder(1, vc.getNAlleles()-1, modelType,
|
||||||
ExactAFCalculationTestBuilder.PriorType.human);
|
ExactAFCalculationTestBuilder.PriorType.human);
|
||||||
final int[] maxACsToVisit = testBuilder.makeModel().computeMaxACs(vc);
|
final int[] maxACsToVisit = ((ConstrainedDiploidExactAFCalculation)testBuilder.makeModel()).computeMaxACs(vc);
|
||||||
testExpectedACs(vc, maxACsToVisit);
|
testExpectedACs(vc, maxACsToVisit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||||
|
|
@ -1,22 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
|
|
||||||
import java.io.PrintStream;
|
|
||||||
|
|
||||||
public class ConstrainedDiploidExactAFCalculation extends DiploidExactAFCalculation {
|
|
||||||
public ConstrainedDiploidExactAFCalculation(final int nSamples, final int maxAltAlleles) {
|
|
||||||
super(nSamples, maxAltAlleles);
|
|
||||||
}
|
|
||||||
|
|
||||||
public ConstrainedDiploidExactAFCalculation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
|
||||||
super(UAC, N, logger, verboseWriter);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected MaxLikelihoodSeen makeMaxLikelihood(final VariantContext vc, final AlleleFrequencyCalculationResult result) {
|
|
||||||
final int[] maxACsToConsider = computeMaxACs(vc);
|
|
||||||
result.setAClimits(maxACsToConsider);
|
|
||||||
return new MaxLikelihoodSeen(maxACsToConsider);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,328 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
|
||||||
import com.google.java.contract.Requires;
|
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.PrintStream;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Uses the Exact calculation of Heng Li
|
|
||||||
*/
|
|
||||||
abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
|
|
||||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
|
||||||
|
|
||||||
protected ExactAFCalculation(final UnifiedArgumentCollection UAC, final int nSamples, final Logger logger, final PrintStream verboseWriter) {
|
|
||||||
super(UAC, nSamples, logger, verboseWriter);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected ExactAFCalculation(final int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, File exactCallsLog, Logger logger, PrintStream verboseWriter) {
|
|
||||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, exactCallsLog, logger, verboseWriter);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Wrapper class that compares two likelihoods associated with two alleles
|
|
||||||
*/
|
|
||||||
protected static final class LikelihoodSum implements Comparable<LikelihoodSum> {
|
|
||||||
public double sum = 0.0;
|
|
||||||
public Allele allele;
|
|
||||||
|
|
||||||
public LikelihoodSum(Allele allele) { this.allele = allele; }
|
|
||||||
|
|
||||||
public int compareTo(LikelihoodSum other) {
|
|
||||||
final double diff = sum - other.sum;
|
|
||||||
return ( diff < 0.0 ) ? 1 : (diff > 0.0 ) ? -1 : 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Unpack GenotypesContext into arraylist of doubel values
|
|
||||||
* @param GLs Input genotype context
|
|
||||||
* @return ArrayList of doubles corresponding to GL vectors
|
|
||||||
*/
|
|
||||||
protected static ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
|
||||||
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(GLs.size());
|
|
||||||
|
|
||||||
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
|
|
||||||
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
|
|
||||||
if ( sample.hasLikelihoods() ) {
|
|
||||||
double[] gls = sample.getLikelihoods().getAsVector();
|
|
||||||
|
|
||||||
if ( MathUtils.sum(gls) < UnifiedGenotyperEngine.SUM_GL_THRESH_NOCALL )
|
|
||||||
genotypeLikelihoods.add(gls);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return genotypeLikelihoods;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Computes the maximum ACs we need to consider for each alt allele
|
|
||||||
*
|
|
||||||
* Walks over the genotypes in VC, and computes for each alt allele the maximum
|
|
||||||
* AC we need to consider in that alt allele dimension. Does the calculation
|
|
||||||
* based on the PLs in each genotype g, choosing to update the max AC for the
|
|
||||||
* alt alleles corresponding to that PL. Only takes the first lowest PL,
|
|
||||||
* if there are multiple genotype configurations with the same PL value. It
|
|
||||||
* takes values in the order of the alt alleles.
|
|
||||||
*
|
|
||||||
* @param vc the variant context we will compute max alt alleles for
|
|
||||||
* @return a vector of max alt alleles, indexed by alt allele, so result[0] is the AC of the
|
|
||||||
* first alt allele.
|
|
||||||
*/
|
|
||||||
@Ensures("result != null")
|
|
||||||
protected int[] computeMaxACs(final VariantContext vc) {
|
|
||||||
final int[] maxACs = new int[vc.getNAlleles()-1];
|
|
||||||
|
|
||||||
for ( final Genotype g : vc.getGenotypes() )
|
|
||||||
updateMaxACs(g, maxACs);
|
|
||||||
|
|
||||||
return maxACs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Update the maximum achievable allele counts in maxAC according to the PLs in g
|
|
||||||
*
|
|
||||||
* Selects the maximum genotype configuration from the PLs in g, and updates
|
|
||||||
* the maxAC for this configure. For example, if the lowest PL is for 0/1, updates
|
|
||||||
* the maxAC for the alt allele 1 by 1. If it's 1/1, update is 2. Works for
|
|
||||||
* many number of alt alleles (determined by length of maxACs).
|
|
||||||
*
|
|
||||||
* If the max PL occurs at 0/0, updates nothing
|
|
||||||
* Note that this function greedily takes the first min PL, so that if 0/1 and 1/1 have
|
|
||||||
* the same PL value, then updates the first one.
|
|
||||||
*
|
|
||||||
* Also, only will update 1 alt allele, so if 0/1 and 0/2 both have the same PL,
|
|
||||||
* then only first one (1) will be updated
|
|
||||||
*
|
|
||||||
* @param g the genotype to update
|
|
||||||
* @param maxACs the max allele count vector for alt alleles (starting at 0 => first alt allele)
|
|
||||||
*/
|
|
||||||
@Requires({
|
|
||||||
"g != null",
|
|
||||||
"maxACs != null",
|
|
||||||
"MathUtils.sum(maxACs) >= 0"})
|
|
||||||
private void updateMaxACs(final Genotype g, final int[] maxACs) {
|
|
||||||
final int[] PLs = g.getLikelihoods().getAsPLs();
|
|
||||||
|
|
||||||
int minPLi = 0;
|
|
||||||
int minPL = PLs[0];
|
|
||||||
|
|
||||||
for ( int i = 0; i < PLs.length; i++ ) {
|
|
||||||
if ( PLs[i] < minPL ) {
|
|
||||||
minPL = PLs[i];
|
|
||||||
minPLi = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(minPLi);
|
|
||||||
updateMaxACs(maxACs, pair.alleleIndex1);
|
|
||||||
updateMaxACs(maxACs, pair.alleleIndex2);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Simple helper. Update max alt alleles maxACs according to the allele index (where 0 == ref)
|
|
||||||
*
|
|
||||||
* If alleleI == 0 => doesn't update anything
|
|
||||||
* else maxACs[alleleI - 1]++
|
|
||||||
*
|
|
||||||
* @param maxACs array of max alt allele ACs
|
|
||||||
* @param alleleI the index (relative to 0) to update a count of 1 in max alt alleles.
|
|
||||||
*/
|
|
||||||
@Requires({
|
|
||||||
"alleleI >= 0",
|
|
||||||
"(alleleI - 1) < maxACs.length",
|
|
||||||
"MathUtils.sum(maxACs) >= 0"})
|
|
||||||
private void updateMaxACs(final int[] maxACs, final int alleleI) {
|
|
||||||
if ( alleleI > 0 )
|
|
||||||
maxACs[alleleI-1]++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// protected classes used to store exact model matrix columns
|
|
||||||
//
|
|
||||||
// -------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
|
|
||||||
|
|
||||||
// a wrapper around the int array so that we can make it hashable
|
|
||||||
protected static final class ExactACcounts {
|
|
||||||
|
|
||||||
protected final int[] counts;
|
|
||||||
private int hashcode = -1;
|
|
||||||
|
|
||||||
public ExactACcounts(final int[] counts) {
|
|
||||||
this.counts = counts;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int[] getCounts() {
|
|
||||||
return counts;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object obj) {
|
|
||||||
return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts) obj).counts);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
if ( hashcode == -1 )
|
|
||||||
hashcode = Arrays.hashCode(counts);
|
|
||||||
return hashcode;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
sb.append(counts[0]);
|
|
||||||
for ( int i = 1; i < counts.length; i++ ) {
|
|
||||||
sb.append("/");
|
|
||||||
sb.append(counts[i]);
|
|
||||||
}
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// This class represents a column in the Exact AC calculation matrix
|
|
||||||
protected static final class ExactACset {
|
|
||||||
|
|
||||||
// the counts of the various alternate alleles which this column represents
|
|
||||||
final ExactACcounts ACcounts;
|
|
||||||
|
|
||||||
// the column of the matrix
|
|
||||||
final double[] log10Likelihoods;
|
|
||||||
|
|
||||||
int sum = -1;
|
|
||||||
|
|
||||||
public ExactACset(final int size, final ExactACcounts ACcounts) {
|
|
||||||
this.ACcounts = ACcounts;
|
|
||||||
log10Likelihoods = new double[size];
|
|
||||||
Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY);
|
|
||||||
}
|
|
||||||
|
|
||||||
// sum of all the non-reference alleles
|
|
||||||
public int getACsum() {
|
|
||||||
if ( sum == -1 ) {
|
|
||||||
sum = 0;
|
|
||||||
for ( int count : ACcounts.getCounts() )
|
|
||||||
sum += count;
|
|
||||||
}
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean equals(Object obj) {
|
|
||||||
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static final class MaxLikelihoodSeen {
|
|
||||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
|
||||||
final int[] maxACsToConsider;
|
|
||||||
ExactACcounts ACsAtMax = null;
|
|
||||||
|
|
||||||
public MaxLikelihoodSeen() {
|
|
||||||
this(null);
|
|
||||||
}
|
|
||||||
|
|
||||||
public MaxLikelihoodSeen(final int[] maxACsToConsider) {
|
|
||||||
this.maxACsToConsider = maxACsToConsider;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Update the maximum log10L seen, if log10LofKs is higher
|
|
||||||
*
|
|
||||||
* @param log10LofKs the likelihood of our current configuration state
|
|
||||||
*/
|
|
||||||
public void update(final double log10LofKs, final ExactACcounts ACs) {
|
|
||||||
if ( log10LofKs > maxLog10L ) {
|
|
||||||
this.maxLog10L = log10LofKs;
|
|
||||||
this.ACsAtMax = ACs;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Is the likelihood of configuration K too low to consider, related to the
|
|
||||||
* maximum likelihood seen already?
|
|
||||||
*
|
|
||||||
* @param log10LofK the log10 likelihood of the configuration we're considering analyzing
|
|
||||||
* @return true if the configuration cannot meaningfully contribute to our likelihood sum
|
|
||||||
*/
|
|
||||||
public boolean tooLowLikelihood(final double log10LofK) {
|
|
||||||
return log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Are all ACs in otherACs less than or equal to their corresponding ACs in the maxACsToConsider?
|
|
||||||
*
|
|
||||||
* @param otherACs the set of otherACs that we want to know if we should consider analyzing
|
|
||||||
* @return true if otherACs is a state worth considering, or false otherwise
|
|
||||||
*/
|
|
||||||
public boolean withinMaxACs(final ExactACcounts otherACs) {
|
|
||||||
if ( maxACsToConsider == null )
|
|
||||||
return true;
|
|
||||||
|
|
||||||
final int[] otherACcounts = otherACs.getCounts();
|
|
||||||
|
|
||||||
for ( int i = 0; i < maxACsToConsider.length; i++ ) {
|
|
||||||
// consider one more than the max AC to collect a bit more likelihood mass
|
|
||||||
if ( otherACcounts[i] > maxACsToConsider[i] + 1 )
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
|
|
||||||
*/
|
|
||||||
public boolean isLowerAC(final ExactACcounts otherACs) {
|
|
||||||
if ( ACsAtMax == null )
|
|
||||||
return true;
|
|
||||||
|
|
||||||
final int[] myACcounts = this.ACsAtMax.getCounts();
|
|
||||||
final int[] otherACcounts = otherACs.getCounts();
|
|
||||||
|
|
||||||
for ( int i = 0; i < myACcounts.length; i++ ) {
|
|
||||||
if ( myACcounts[i] > otherACcounts[i] )
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean abort( final double log10LofK, final ExactACcounts ACs ) {
|
|
||||||
return tooLowLikelihood(log10LofK) && isLowerAC(ACs);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.*;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AlleleFrequencyCalculation;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||||
|
|
||||||
|
|
@ -156,7 +157,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
||||||
Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy
|
Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy
|
||||||
*/
|
*/
|
||||||
@Argument(shortName="ploidy", fullName="sample_ploidy", doc="Plody (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false)
|
@Argument(shortName="ploidy", fullName="sample_ploidy", doc="Plody (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false)
|
||||||
int samplePloidy = VariantContextUtils.DEFAULT_PLOIDY;
|
public int samplePloidy = VariantContextUtils.DEFAULT_PLOIDY;
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(shortName="minqs", fullName="min_quality_score", doc="Min quality score to consider. Smaller numbers process faster. Default: Q1.", required=false)
|
@Argument(shortName="minqs", fullName="min_quality_score", doc="Min quality score to consider. Smaller numbers process faster. Default: Q1.", required=false)
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AlleleFrequencyCalculation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AlleleFrequencyCalculationResult;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
|
|
@ -104,8 +106,6 @@ public class UnifiedGenotyperEngine {
|
||||||
private final GenomeLocParser genomeLocParser;
|
private final GenomeLocParser genomeLocParser;
|
||||||
private final boolean BAQEnabledOnCMDLine;
|
private final boolean BAQEnabledOnCMDLine;
|
||||||
|
|
||||||
protected static final double SUM_GL_THRESH_NOCALL = VariantContextUtils.SUM_GL_THRESH_NOCALL;
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// Public interface functions
|
// Public interface functions
|
||||||
|
|
@ -689,7 +689,7 @@ public class UnifiedGenotyperEngine {
|
||||||
return models;
|
return models;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) {
|
public static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) {
|
||||||
|
|
||||||
double sum = 0.0;
|
double sum = 0.0;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,11 +23,12 @@
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -54,7 +55,7 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
|
||||||
/** The default model with the best performance in all cases */
|
/** The default model with the best performance in all cases */
|
||||||
EXACT("ExactAFCalculation");
|
EXACT("ExactAFCalculation");
|
||||||
|
|
||||||
final String implementationName;
|
public final String implementationName;
|
||||||
|
|
||||||
private Model(String implementationName) {
|
private Model(String implementationName) {
|
||||||
this.implementationName = implementationName;
|
this.implementationName = implementationName;
|
||||||
|
|
@ -101,7 +102,7 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
|
||||||
* Allocates a new results object. Useful for testing but slow in practice.
|
* Allocates a new results object. Useful for testing but slow in practice.
|
||||||
*/
|
*/
|
||||||
public final AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc,
|
public final AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc,
|
||||||
final double[] log10AlleleFrequencyPriors) {
|
final double[] log10AlleleFrequencyPriors) {
|
||||||
return getLog10PNonRef(vc, log10AlleleFrequencyPriors, new AlleleFrequencyCalculationResult(getMaxAltAlleles()));
|
return getLog10PNonRef(vc, log10AlleleFrequencyPriors, new AlleleFrequencyCalculationResult(getMaxAltAlleles()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -165,9 +166,9 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
|
||||||
* @param result (pre-allocated) object to store results
|
* @param result (pre-allocated) object to store results
|
||||||
*/
|
*/
|
||||||
// TODO -- add consistent requires among args
|
// TODO -- add consistent requires among args
|
||||||
protected abstract void computeLog10PNonRef(final VariantContext vc,
|
public abstract void computeLog10PNonRef(final VariantContext vc,
|
||||||
final double[] log10AlleleFrequencyPriors,
|
final double[] log10AlleleFrequencyPriors,
|
||||||
final AlleleFrequencyCalculationResult result);
|
final AlleleFrequencyCalculationResult result);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Must be overridden by concrete subclasses
|
* Must be overridden by concrete subclasses
|
||||||
|
|
@ -178,10 +179,10 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
|
||||||
* @param ploidy
|
* @param ploidy
|
||||||
* @return GenotypesContext object
|
* @return GenotypesContext object
|
||||||
*/
|
*/
|
||||||
protected abstract GenotypesContext subsetAlleles(final VariantContext vc,
|
public abstract GenotypesContext subsetAlleles(final VariantContext vc,
|
||||||
final List<Allele> allelesToUse,
|
final List<Allele> allelesToUse,
|
||||||
final boolean assignGenotypes,
|
final boolean assignGenotypes,
|
||||||
final int ploidy);
|
final int ploidy);
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
@ -23,7 +23,7 @@
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
|
@ -0,0 +1,109 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
import java.io.PrintStream;
|
||||||
|
|
||||||
|
public class ConstrainedDiploidExactAFCalculation extends DiploidExactAFCalculation {
|
||||||
|
public ConstrainedDiploidExactAFCalculation(final int nSamples, final int maxAltAlleles) {
|
||||||
|
super(nSamples, maxAltAlleles);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ConstrainedDiploidExactAFCalculation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||||
|
super(UAC, N, logger, verboseWriter);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected StateTracker makeMaxLikelihood(final VariantContext vc, final AlleleFrequencyCalculationResult result) {
|
||||||
|
final int[] maxACsToConsider = computeMaxACs(vc);
|
||||||
|
result.setAClimits(maxACsToConsider);
|
||||||
|
return new StateTracker(maxACsToConsider);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes the maximum ACs we need to consider for each alt allele
|
||||||
|
*
|
||||||
|
* Walks over the genotypes in VC, and computes for each alt allele the maximum
|
||||||
|
* AC we need to consider in that alt allele dimension. Does the calculation
|
||||||
|
* based on the PLs in each genotype g, choosing to update the max AC for the
|
||||||
|
* alt alleles corresponding to that PL. Only takes the first lowest PL,
|
||||||
|
* if there are multiple genotype configurations with the same PL value. It
|
||||||
|
* takes values in the order of the alt alleles.
|
||||||
|
*
|
||||||
|
* @param vc the variant context we will compute max alt alleles for
|
||||||
|
* @return a vector of max alt alleles, indexed by alt allele, so result[0] is the AC of the
|
||||||
|
* first alt allele.
|
||||||
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
|
protected final int[] computeMaxACs(final VariantContext vc) {
|
||||||
|
final int[] maxACs = new int[vc.getNAlleles()-1];
|
||||||
|
|
||||||
|
for ( final Genotype g : vc.getGenotypes() )
|
||||||
|
updateMaxACs(g, maxACs);
|
||||||
|
|
||||||
|
return maxACs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the maximum achievable allele counts in maxAC according to the PLs in g
|
||||||
|
*
|
||||||
|
* Selects the maximum genotype configuration from the PLs in g, and updates
|
||||||
|
* the maxAC for this configure. For example, if the lowest PL is for 0/1, updates
|
||||||
|
* the maxAC for the alt allele 1 by 1. If it's 1/1, update is 2. Works for
|
||||||
|
* many number of alt alleles (determined by length of maxACs).
|
||||||
|
*
|
||||||
|
* If the max PL occurs at 0/0, updates nothing
|
||||||
|
* Note that this function greedily takes the first min PL, so that if 0/1 and 1/1 have
|
||||||
|
* the same PL value, then updates the first one.
|
||||||
|
*
|
||||||
|
* Also, only will update 1 alt allele, so if 0/1 and 0/2 both have the same PL,
|
||||||
|
* then only first one (1) will be updated
|
||||||
|
*
|
||||||
|
* @param g the genotype to update
|
||||||
|
* @param maxACs the max allele count vector for alt alleles (starting at 0 => first alt allele)
|
||||||
|
*/
|
||||||
|
@Requires({
|
||||||
|
"g != null",
|
||||||
|
"maxACs != null",
|
||||||
|
"MathUtils.sum(maxACs) >= 0"})
|
||||||
|
private void updateMaxACs(final Genotype g, final int[] maxACs) {
|
||||||
|
final int[] PLs = g.getLikelihoods().getAsPLs();
|
||||||
|
|
||||||
|
int minPLi = 0;
|
||||||
|
int minPL = PLs[0];
|
||||||
|
|
||||||
|
for ( int i = 0; i < PLs.length; i++ ) {
|
||||||
|
if ( PLs[i] < minPL ) {
|
||||||
|
minPL = PLs[i];
|
||||||
|
minPLi = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(minPLi);
|
||||||
|
updateMaxACs(maxACs, pair.alleleIndex1);
|
||||||
|
updateMaxACs(maxACs, pair.alleleIndex2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple helper. Update max alt alleles maxACs according to the allele index (where 0 == ref)
|
||||||
|
*
|
||||||
|
* If alleleI == 0 => doesn't update anything
|
||||||
|
* else maxACs[alleleI - 1]++
|
||||||
|
*
|
||||||
|
* @param maxACs array of max alt allele ACs
|
||||||
|
* @param alleleI the index (relative to 0) to update a count of 1 in max alt alleles.
|
||||||
|
*/
|
||||||
|
@Requires({
|
||||||
|
"alleleI >= 0",
|
||||||
|
"(alleleI - 1) < maxACs.length",
|
||||||
|
"MathUtils.sum(maxACs) >= 0"})
|
||||||
|
private void updateMaxACs(final int[] maxACs, final int alleleI) {
|
||||||
|
if ( alleleI > 0 )
|
||||||
|
maxACs[alleleI-1]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -23,9 +23,10 @@
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
||||||
|
|
@ -41,7 +42,7 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
||||||
super(UAC, N, logger, verboseWriter);
|
super(UAC, N, logger, verboseWriter);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract MaxLikelihoodSeen makeMaxLikelihood(final VariantContext vc, final AlleleFrequencyCalculationResult result);
|
protected abstract StateTracker makeMaxLikelihood(final VariantContext vc, final AlleleFrequencyCalculationResult result);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void computeLog10PNonRef(final VariantContext vc,
|
public void computeLog10PNonRef(final VariantContext vc,
|
||||||
|
|
@ -62,10 +63,10 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
||||||
final int[] zeroCounts = new int[numAlternateAlleles];
|
final int[] zeroCounts = new int[numAlternateAlleles];
|
||||||
ExactACset zeroSet = new ExactACset(numSamples+1, new ExactACcounts(zeroCounts));
|
ExactACset zeroSet = new ExactACset(numSamples+1, new ExactACcounts(zeroCounts));
|
||||||
ACqueue.add(zeroSet);
|
ACqueue.add(zeroSet);
|
||||||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
indexesToACset.put(zeroSet.getACcounts(), zeroSet);
|
||||||
|
|
||||||
// keep processing while we have AC conformations that need to be calculated
|
// keep processing while we have AC conformations that need to be calculated
|
||||||
final MaxLikelihoodSeen maxLikelihoodSeen = makeMaxLikelihood(vc, result);
|
final StateTracker stateTracker = makeMaxLikelihood(vc, result);
|
||||||
|
|
||||||
while ( !ACqueue.isEmpty() ) {
|
while ( !ACqueue.isEmpty() ) {
|
||||||
result.incNEvaluations(); // keep track of the number of evaluations
|
result.incNEvaluations(); // keep track of the number of evaluations
|
||||||
|
|
@ -73,14 +74,14 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
||||||
// compute log10Likelihoods
|
// compute log10Likelihoods
|
||||||
final ExactACset set = ACqueue.remove();
|
final ExactACset set = ACqueue.remove();
|
||||||
|
|
||||||
if ( maxLikelihoodSeen.withinMaxACs(set.ACcounts) ) {
|
if ( stateTracker.withinMaxACs(set.getACcounts()) ) {
|
||||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLikelihoodSeen, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
|
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, stateTracker, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
|
||||||
|
|
||||||
// adjust max likelihood seen if needed
|
// adjust max likelihood seen if needed
|
||||||
maxLikelihoodSeen.update(log10LofKs, set.ACcounts);
|
stateTracker.update(log10LofKs, set.getACcounts());
|
||||||
|
|
||||||
// clean up memory
|
// clean up memory
|
||||||
indexesToACset.remove(set.ACcounts);
|
indexesToACset.remove(set.getACcounts());
|
||||||
//if ( DEBUG )
|
//if ( DEBUG )
|
||||||
// System.out.printf(" *** removing used set=%s%n", set.ACcounts);
|
// System.out.printf(" *** removing used set=%s%n", set.ACcounts);
|
||||||
}
|
}
|
||||||
|
|
@ -155,7 +156,7 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
||||||
|
|
||||||
private double calculateAlleleCountConformation(final ExactACset set,
|
private double calculateAlleleCountConformation(final ExactACset set,
|
||||||
final ArrayList<double[]> genotypeLikelihoods,
|
final ArrayList<double[]> genotypeLikelihoods,
|
||||||
final MaxLikelihoodSeen maxLikelihoodSeen,
|
final StateTracker stateTracker,
|
||||||
final int numChr,
|
final int numChr,
|
||||||
final LinkedList<ExactACset> ACqueue,
|
final LinkedList<ExactACset> ACqueue,
|
||||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||||
|
|
@ -168,10 +169,10 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
||||||
// compute the log10Likelihoods
|
// compute the log10Likelihoods
|
||||||
computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors, result);
|
computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors, result);
|
||||||
|
|
||||||
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
final double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
|
||||||
|
|
||||||
// can we abort early because the log10Likelihoods are so small?
|
// can we abort early because the log10Likelihoods are so small?
|
||||||
if ( maxLikelihoodSeen.abort(log10LofK, set.ACcounts) ) {
|
if ( stateTracker.abort(log10LofK, set.getACcounts()) ) {
|
||||||
//if ( DEBUG )
|
//if ( DEBUG )
|
||||||
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
||||||
return log10LofK;
|
return log10LofK;
|
||||||
|
|
@ -182,15 +183,15 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
||||||
if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
|
if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
|
||||||
return log10LofK;
|
return log10LofK;
|
||||||
|
|
||||||
final int numAltAlleles = set.ACcounts.getCounts().length;
|
final int numAltAlleles = set.getACcounts().getCounts().length;
|
||||||
|
|
||||||
// add conformations for the k+1 case
|
// add conformations for the k+1 case
|
||||||
for ( int allele = 0; allele < numAltAlleles; allele++ ) {
|
for ( int allele = 0; allele < numAltAlleles; allele++ ) {
|
||||||
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
|
final int[] ACcountsClone = set.getACcounts().getCounts().clone();
|
||||||
ACcountsClone[allele]++;
|
ACcountsClone[allele]++;
|
||||||
// to get to this conformation, a sample would need to be AB (remember that ref=0)
|
// to get to this conformation, a sample would need to be AB (remember that ref=0)
|
||||||
final int PLindex = GenotypeLikelihoods.calculatePLindex(0, allele+1);
|
final int PLindex = GenotypeLikelihoods.calculatePLindex(0, allele+1);
|
||||||
updateACset(maxLikelihoodSeen, ACcountsClone, numChr, set, PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
updateACset(stateTracker, ACcountsClone, numChr, set, PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||||
}
|
}
|
||||||
|
|
||||||
// add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
|
// add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
|
||||||
|
|
@ -200,7 +201,7 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
||||||
|
|
||||||
for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) {
|
for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) {
|
||||||
for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) {
|
for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) {
|
||||||
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
|
final int[] ACcountsClone = set.getACcounts().getCounts().clone();
|
||||||
ACcountsClone[allele_i]++;
|
ACcountsClone[allele_i]++;
|
||||||
ACcountsClone[allele_j]++;
|
ACcountsClone[allele_j]++;
|
||||||
|
|
||||||
|
|
@ -215,9 +216,9 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
||||||
|
|
||||||
// IMPORTANT: we must first add the cases where the 2 new alleles are different so that the queue maintains its ordering
|
// IMPORTANT: we must first add the cases where the 2 new alleles are different so that the queue maintains its ordering
|
||||||
for ( DependentSet dependent : differentAlleles )
|
for ( DependentSet dependent : differentAlleles )
|
||||||
updateACset(maxLikelihoodSeen, dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
updateACset(stateTracker, dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||||
for ( DependentSet dependent : sameAlleles )
|
for ( DependentSet dependent : sameAlleles )
|
||||||
updateACset(maxLikelihoodSeen, dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
updateACset(stateTracker, dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||||
}
|
}
|
||||||
|
|
||||||
return log10LofK;
|
return log10LofK;
|
||||||
|
|
@ -225,7 +226,7 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
||||||
|
|
||||||
// adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and
|
// adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and
|
||||||
// also pushes its value to the given callingSetIndex.
|
// also pushes its value to the given callingSetIndex.
|
||||||
private void updateACset(final MaxLikelihoodSeen maxLikelihoodSeen,
|
private void updateACset(final StateTracker stateTracker,
|
||||||
final int[] newSetCounts,
|
final int[] newSetCounts,
|
||||||
final int numChr,
|
final int numChr,
|
||||||
final ExactACset dependentSet,
|
final ExactACset dependentSet,
|
||||||
|
|
@ -251,15 +252,15 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
||||||
final double[] log10AlleleFrequencyPriors,
|
final double[] log10AlleleFrequencyPriors,
|
||||||
final AlleleFrequencyCalculationResult result) {
|
final AlleleFrequencyCalculationResult result) {
|
||||||
|
|
||||||
set.log10Likelihoods[0] = 0.0; // the zero case
|
set.getLog10Likelihoods()[0] = 0.0; // the zero case
|
||||||
final int totalK = set.getACsum();
|
final int totalK = set.getACsum();
|
||||||
|
|
||||||
// special case for k = 0 over all k
|
// special case for k = 0 over all k
|
||||||
if ( totalK == 0 ) {
|
if ( totalK == 0 ) {
|
||||||
for ( int j = 1; j < set.log10Likelihoods.length; j++ )
|
for ( int j = 1; j < set.getLog10Likelihoods().length; j++ )
|
||||||
set.log10Likelihoods[j] = set.log10Likelihoods[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX];
|
set.getLog10Likelihoods()[j] = set.getLog10Likelihoods()[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX];
|
||||||
|
|
||||||
final double log10Lof0 = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
final double log10Lof0 = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
|
||||||
result.setLog10LikelihoodOfAFzero(log10Lof0);
|
result.setLog10LikelihoodOfAFzero(log10Lof0);
|
||||||
result.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
|
result.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
|
||||||
return;
|
return;
|
||||||
|
|
@ -268,29 +269,29 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
||||||
// if we got here, then k > 0 for at least one k.
|
// if we got here, then k > 0 for at least one k.
|
||||||
// the non-AA possible conformations were already dealt with by pushes from dependent sets;
|
// the non-AA possible conformations were already dealt with by pushes from dependent sets;
|
||||||
// now deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value
|
// now deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value
|
||||||
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
|
for ( int j = 1; j < set.getLog10Likelihoods().length; j++ ) {
|
||||||
|
|
||||||
if ( totalK < 2*j-1 ) {
|
if ( totalK < 2*j-1 ) {
|
||||||
final double[] gl = genotypeLikelihoods.get(j);
|
final double[] gl = genotypeLikelihoods.get(j);
|
||||||
final double conformationValue = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
|
final double conformationValue = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.getLog10Likelihoods()[j-1] + gl[HOM_REF_INDEX];
|
||||||
set.log10Likelihoods[j] = MathUtils.approximateLog10SumLog10(set.log10Likelihoods[j], conformationValue);
|
set.getLog10Likelihoods()[j] = MathUtils.approximateLog10SumLog10(set.getLog10Likelihoods()[j], conformationValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
||||||
set.log10Likelihoods[j] = set.log10Likelihoods[j] - logDenominator;
|
set.getLog10Likelihoods()[j] = set.getLog10Likelihoods()[j] - logDenominator;
|
||||||
}
|
}
|
||||||
|
|
||||||
double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
|
||||||
|
|
||||||
// update the MLE if necessary
|
// update the MLE if necessary
|
||||||
result.updateMLEifNeeded(log10LofK, set.ACcounts.counts);
|
result.updateMLEifNeeded(log10LofK, set.getACcounts().getCounts());
|
||||||
|
|
||||||
// apply the priors over each alternate allele
|
// apply the priors over each alternate allele
|
||||||
for ( final int ACcount : set.ACcounts.getCounts() ) {
|
for ( final int ACcount : set.getACcounts().getCounts() ) {
|
||||||
if ( ACcount > 0 )
|
if ( ACcount > 0 )
|
||||||
log10LofK += log10AlleleFrequencyPriors[ACcount];
|
log10LofK += log10AlleleFrequencyPriors[ACcount];
|
||||||
}
|
}
|
||||||
result.updateMAPifNeeded(log10LofK, set.ACcounts.counts);
|
result.updateMAPifNeeded(log10LofK, set.getACcounts().getCounts());
|
||||||
}
|
}
|
||||||
|
|
||||||
private void pushData(final ExactACset targetSet,
|
private void pushData(final ExactACset targetSet,
|
||||||
|
|
@ -299,13 +300,13 @@ public abstract class DiploidExactAFCalculation extends ExactAFCalculation {
|
||||||
final ArrayList<double[]> genotypeLikelihoods) {
|
final ArrayList<double[]> genotypeLikelihoods) {
|
||||||
final int totalK = targetSet.getACsum();
|
final int totalK = targetSet.getACsum();
|
||||||
|
|
||||||
for ( int j = 1; j < targetSet.log10Likelihoods.length; j++ ) {
|
for ( int j = 1; j < targetSet.getLog10Likelihoods().length; j++ ) {
|
||||||
|
|
||||||
if ( totalK <= 2*j ) { // skip impossible conformations
|
if ( totalK <= 2*j ) { // skip impossible conformations
|
||||||
final double[] gl = genotypeLikelihoods.get(j);
|
final double[] gl = genotypeLikelihoods.get(j);
|
||||||
final double conformationValue =
|
final double conformationValue =
|
||||||
determineCoefficient(PLsetIndex, j, targetSet.ACcounts.getCounts(), totalK) + dependentSet.log10Likelihoods[j-1] + gl[PLsetIndex];
|
determineCoefficient(PLsetIndex, j, targetSet.getACcounts().getCounts(), totalK) + dependentSet.getLog10Likelihoods()[j-1] + gl[PLsetIndex];
|
||||||
targetSet.log10Likelihoods[j] = MathUtils.approximateLog10SumLog10(targetSet.log10Likelihoods[j], conformationValue);
|
targetSet.getLog10Likelihoods()[j] = MathUtils.approximateLog10SumLog10(targetSet.getLog10Likelihoods()[j], conformationValue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,46 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created with IntelliJ IDEA.
|
||||||
|
* User: depristo
|
||||||
|
* Date: 10/5/12
|
||||||
|
* Time: 2:54 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/ // a wrapper around the int array so that we can make it hashable
|
||||||
|
public final class ExactACcounts {
|
||||||
|
private final int[] counts;
|
||||||
|
private int hashcode = -1;
|
||||||
|
|
||||||
|
public ExactACcounts(final int[] counts) {
|
||||||
|
this.counts = counts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int[] getCounts() {
|
||||||
|
return counts;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
return (obj instanceof ExactACcounts) && Arrays.equals(getCounts(), ((ExactACcounts) obj).getCounts());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
if ( hashcode == -1 )
|
||||||
|
hashcode = Arrays.hashCode(getCounts());
|
||||||
|
return hashcode;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
StringBuffer sb = new StringBuffer();
|
||||||
|
sb.append(getCounts()[0]);
|
||||||
|
for ( int i = 1; i < getCounts().length; i++ ) {
|
||||||
|
sb.append("/");
|
||||||
|
sb.append(getCounts()[i]);
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created with IntelliJ IDEA.
|
||||||
|
* User: depristo
|
||||||
|
* Date: 10/5/12
|
||||||
|
* Time: 2:53 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/ // This class represents a column in the Exact AC calculation matrix
|
||||||
|
public final class ExactACset {
|
||||||
|
// the counts of the various alternate alleles which this column represents
|
||||||
|
private final ExactACcounts ACcounts;
|
||||||
|
|
||||||
|
// the column of the matrix
|
||||||
|
private final double[] log10Likelihoods;
|
||||||
|
|
||||||
|
int sum = -1;
|
||||||
|
|
||||||
|
public ExactACset(final int size, final ExactACcounts ACcounts) {
|
||||||
|
this.ACcounts = ACcounts;
|
||||||
|
log10Likelihoods = new double[size];
|
||||||
|
Arrays.fill(getLog10Likelihoods(), Double.NEGATIVE_INFINITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
// sum of all the non-reference alleles
|
||||||
|
public int getACsum() {
|
||||||
|
if ( sum == -1 ) {
|
||||||
|
sum = 0;
|
||||||
|
for ( int count : getACcounts().getCounts() )
|
||||||
|
sum += count;
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
return (obj instanceof ExactACset) && getACcounts().equals(((ExactACset)obj).getACcounts());
|
||||||
|
}
|
||||||
|
|
||||||
|
public ExactACcounts getACcounts() {
|
||||||
|
return ACcounts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double[] getLog10Likelihoods() {
|
||||||
|
return log10Likelihoods;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,89 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Uses the Exact calculation of Heng Li
|
||||||
|
*/
|
||||||
|
abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
|
||||||
|
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
|
||||||
|
|
||||||
|
protected ExactAFCalculation(final UnifiedArgumentCollection UAC, final int nSamples, final Logger logger, final PrintStream verboseWriter) {
|
||||||
|
super(UAC, nSamples, logger, verboseWriter);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected ExactAFCalculation(final int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, File exactCallsLog, Logger logger, PrintStream verboseWriter) {
|
||||||
|
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, exactCallsLog, logger, verboseWriter);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrapper class that compares two likelihoods associated with two alleles
|
||||||
|
*/
|
||||||
|
protected static final class LikelihoodSum implements Comparable<LikelihoodSum> {
|
||||||
|
public double sum = 0.0;
|
||||||
|
public Allele allele;
|
||||||
|
|
||||||
|
public LikelihoodSum(Allele allele) { this.allele = allele; }
|
||||||
|
|
||||||
|
public int compareTo(LikelihoodSum other) {
|
||||||
|
final double diff = sum - other.sum;
|
||||||
|
return ( diff < 0.0 ) ? 1 : (diff > 0.0 ) ? -1 : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unpack GenotypesContext into arraylist of doubel values
|
||||||
|
* @param GLs Input genotype context
|
||||||
|
* @return ArrayList of doubles corresponding to GL vectors
|
||||||
|
*/
|
||||||
|
protected static ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
||||||
|
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(GLs.size());
|
||||||
|
|
||||||
|
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
|
||||||
|
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
|
||||||
|
if ( sample.hasLikelihoods() ) {
|
||||||
|
double[] gls = sample.getLikelihoods().getAsVector();
|
||||||
|
|
||||||
|
if ( MathUtils.sum(gls) < VariantContextUtils.SUM_GL_THRESH_NOCALL )
|
||||||
|
genotypeLikelihoods.add(gls);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return genotypeLikelihoods;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
@ -14,7 +15,7 @@ public class ReferenceDiploidExactAFCalculation extends DiploidExactAFCalculatio
|
||||||
super(UAC, N, logger, verboseWriter);
|
super(UAC, N, logger, verboseWriter);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected MaxLikelihoodSeen makeMaxLikelihood(final VariantContext vc, final AlleleFrequencyCalculationResult result) {
|
protected StateTracker makeMaxLikelihood(final VariantContext vc, final AlleleFrequencyCalculationResult result) {
|
||||||
return new ExactAFCalculation.MaxLikelihoodSeen();
|
return new StateTracker();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,96 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Keeps track of the best state seen by the exact model and the max states to visit
|
||||||
|
* allowing us to abort the search before we visit the entire matrix of AC x samples
|
||||||
|
*/
|
||||||
|
final class StateTracker {
|
||||||
|
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||||
|
|
||||||
|
final private int[] maxACsToConsider;
|
||||||
|
|
||||||
|
private ExactACcounts ACsAtMax = null;
|
||||||
|
private double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||||
|
|
||||||
|
public StateTracker() {
|
||||||
|
this(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public StateTracker(final int[] maxACsToConsider) {
|
||||||
|
this.maxACsToConsider = maxACsToConsider;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the maximum log10L seen, if log10LofKs is higher
|
||||||
|
*
|
||||||
|
* @param log10LofKs the likelihood of our current configuration state
|
||||||
|
*/
|
||||||
|
public void update(final double log10LofKs, final ExactACcounts ACs) {
|
||||||
|
if ( log10LofKs > getMaxLog10L()) {
|
||||||
|
this.setMaxLog10L(log10LofKs);
|
||||||
|
this.ACsAtMax = ACs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is the likelihood of configuration K too low to consider, related to the
|
||||||
|
* maximum likelihood seen already?
|
||||||
|
*
|
||||||
|
* @param log10LofK the log10 likelihood of the configuration we're considering analyzing
|
||||||
|
* @return true if the configuration cannot meaningfully contribute to our likelihood sum
|
||||||
|
*/
|
||||||
|
public boolean tooLowLikelihood(final double log10LofK) {
|
||||||
|
return log10LofK < getMaxLog10L() - MAX_LOG10_ERROR_TO_STOP_EARLY;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Are all ACs in otherACs less than or equal to their corresponding ACs in the maxACsToConsider?
|
||||||
|
*
|
||||||
|
* @param otherACs the set of otherACs that we want to know if we should consider analyzing
|
||||||
|
* @return true if otherACs is a state worth considering, or false otherwise
|
||||||
|
*/
|
||||||
|
public boolean withinMaxACs(final ExactACcounts otherACs) {
|
||||||
|
if ( maxACsToConsider == null )
|
||||||
|
return true;
|
||||||
|
|
||||||
|
final int[] otherACcounts = otherACs.getCounts();
|
||||||
|
|
||||||
|
for ( int i = 0; i < maxACsToConsider.length; i++ ) {
|
||||||
|
// consider one more than the max AC to collect a bit more likelihood mass
|
||||||
|
if ( otherACcounts[i] > maxACsToConsider[i] + 1 )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
|
||||||
|
*/
|
||||||
|
public boolean isLowerAC(final ExactACcounts otherACs) {
|
||||||
|
if ( ACsAtMax == null )
|
||||||
|
return true;
|
||||||
|
|
||||||
|
final int[] myACcounts = this.ACsAtMax.getCounts();
|
||||||
|
final int[] otherACcounts = otherACs.getCounts();
|
||||||
|
|
||||||
|
for ( int i = 0; i < myACcounts.length; i++ ) {
|
||||||
|
if ( myACcounts[i] > otherACcounts[i] )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean abort( final double log10LofK, final ExactACcounts ACs ) {
|
||||||
|
return tooLowLikelihood(log10LofK) && isLowerAC(ACs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getMaxLog10L() {
|
||||||
|
return maxLog10L;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMaxLog10L(double maxLog10L) {
|
||||||
|
this.maxLog10L = maxLog10L;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -23,9 +23,9 @@
|
||||||
*/
|
*/
|
||||||
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.AlleleFrequencyCalculationResult;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AlleleFrequencyCalculationResult;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidExactAFCalculation;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.DiploidExactAFCalculation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.ReferenceDiploidExactAFCalculation;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ReferenceDiploidExactAFCalculation;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue