Clean up AlleleFrequencyCalculation classes
-- Added a true base class that only does truly common tasks (like manage call logging) -- This base class provides the only public method (getLog10PNonRef) and calls into a protected compute function that's abstract -- Split ExactAF into superclass ExactAF with common data structures and two subclasses: DiploidExact and GeneralPloidyExact -- Added an abstract reduceScope function that manages the simplification of the input VariantContext in the case where there are too many alleles or other constraints require us to only attempt a smaller computation -- All unit tests pass
This commit is contained in:
parent
1c52db4cdd
commit
3e01a76590
|
|
@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
||||||
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
|
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
|
||||||
final protected UnifiedArgumentCollection UAC;
|
final protected UnifiedArgumentCollection UAC;
|
||||||
|
|
||||||
|
|
@ -42,35 +42,38 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
|
||||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||||
private final static boolean VERBOSE = false;
|
private final static boolean VERBOSE = false;
|
||||||
|
|
||||||
protected GeneralPloidyExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
protected GeneralPloidyExactAFCalculation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||||
super(UAC, N, logger, verboseWriter);
|
super(UAC, N, logger, verboseWriter);
|
||||||
ploidy = UAC.samplePloidy;
|
ploidy = UAC.samplePloidy;
|
||||||
this.UAC = UAC;
|
this.UAC = UAC;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Allele> getLog10PNonRef(final VariantContext vc,
|
@Override
|
||||||
final double[] log10AlleleFrequencyPriors,
|
protected VariantContext reduceScope(VariantContext vc) {
|
||||||
final AlleleFrequencyCalculationResult result) {
|
|
||||||
|
|
||||||
GenotypesContext GLs = vc.getGenotypes();
|
|
||||||
List<Allele> alleles = vc.getAlleles();
|
|
||||||
|
|
||||||
// don't try to genotype too many alternate alleles
|
// don't try to genotype too many alternate alleles
|
||||||
if ( vc.getAlternateAlleles().size() > MAX_ALTERNATE_ALLELES_TO_GENOTYPE ) {
|
if ( vc.getAlternateAlleles().size() > MAX_ALTERNATE_ALLELES_TO_GENOTYPE ) {
|
||||||
logger.warn("this tool is currently set to genotype at most " + MAX_ALTERNATE_ALLELES_TO_GENOTYPE + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
logger.warn("this tool is currently set to genotype at most " + MAX_ALTERNATE_ALLELES_TO_GENOTYPE + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
||||||
|
|
||||||
alleles = new ArrayList<Allele>(MAX_ALTERNATE_ALLELES_TO_GENOTYPE + 1);
|
final List<Allele> alleles = new ArrayList<Allele>(MAX_ALTERNATE_ALLELES_TO_GENOTYPE + 1);
|
||||||
alleles.add(vc.getReference());
|
alleles.add(vc.getReference());
|
||||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, MAX_ALTERNATE_ALLELES_TO_GENOTYPE, ploidy));
|
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, MAX_ALTERNATE_ALLELES_TO_GENOTYPE, ploidy));
|
||||||
|
|
||||||
|
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||||
|
builder.alleles(alleles);
|
||||||
|
builder.genotypes(subsetAlleles(vc, alleles, false, ploidy));
|
||||||
|
return builder.make();
|
||||||
|
|
||||||
GLs = subsetAlleles(vc, alleles, false, ploidy);
|
} else {
|
||||||
|
return vc;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
combineSinglePools(GLs, alleles.size(), ploidy, log10AlleleFrequencyPriors, result);
|
@Override
|
||||||
|
public void computeLog10PNonRef(final VariantContext vc,
|
||||||
return alleles;
|
final double[] log10AlleleFrequencyPriors,
|
||||||
|
final AlleleFrequencyCalculationResult result) {
|
||||||
|
combineSinglePools(vc.getGenotypes(), vc.getNAlleles(), ploidy, log10AlleleFrequencyPriors, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -491,15 +491,15 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
||||||
// If neighbors fall below maximum - threshold, we don't queue up THEIR own neighbors
|
// If neighbors fall below maximum - threshold, we don't queue up THEIR own neighbors
|
||||||
// and we repeat until queue is empty
|
// and we repeat until queue is empty
|
||||||
// queue of AC conformations to process
|
// queue of AC conformations to process
|
||||||
final LinkedList<AlleleFrequencyCalculationModel.ExactACset> ACqueue = new LinkedList<AlleleFrequencyCalculationModel.ExactACset>();
|
final LinkedList<ExactAFCalculation.ExactACset> ACqueue = new LinkedList<ExactAFCalculation.ExactACset>();
|
||||||
// mapping of ExactACset indexes to the objects
|
// mapping of ExactACset indexes to the objects
|
||||||
final HashMap<AlleleFrequencyCalculationModel.ExactACcounts, AlleleFrequencyCalculationModel.ExactACset> indexesToACset = new HashMap<AlleleFrequencyCalculationModel.ExactACcounts, AlleleFrequencyCalculationModel.ExactACset>(likelihoodDim);
|
final HashMap<ExactAFCalculation.ExactACcounts, ExactAFCalculation.ExactACset> indexesToACset = new HashMap<ExactAFCalculation.ExactACcounts, ExactAFCalculation.ExactACset>(likelihoodDim);
|
||||||
// add AC=0 to the queue
|
// add AC=0 to the queue
|
||||||
final int[] zeroCounts = new int[nAlleles];
|
final int[] zeroCounts = new int[nAlleles];
|
||||||
zeroCounts[0] = numChromosomes;
|
zeroCounts[0] = numChromosomes;
|
||||||
|
|
||||||
AlleleFrequencyCalculationModel.ExactACset zeroSet =
|
ExactAFCalculation.ExactACset zeroSet =
|
||||||
new AlleleFrequencyCalculationModel.ExactACset(1, new AlleleFrequencyCalculationModel.ExactACcounts(zeroCounts));
|
new ExactAFCalculation.ExactACset(1, new ExactAFCalculation.ExactACcounts(zeroCounts));
|
||||||
|
|
||||||
ACqueue.add(zeroSet);
|
ACqueue.add(zeroSet);
|
||||||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
||||||
|
|
@ -508,7 +508,7 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
||||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||||
while ( !ACqueue.isEmpty() ) {
|
while ( !ACqueue.isEmpty() ) {
|
||||||
// compute log10Likelihoods
|
// compute log10Likelihoods
|
||||||
final AlleleFrequencyCalculationModel.ExactACset ACset = ACqueue.remove();
|
final ExactAFCalculation.ExactACset ACset = ACqueue.remove();
|
||||||
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, errorModel, alleleList, numObservations, maxLog10L, ACqueue, indexesToACset, pileup);
|
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, errorModel, alleleList, numObservations, maxLog10L, ACqueue, indexesToACset, pileup);
|
||||||
|
|
||||||
// adjust max likelihood seen if needed
|
// adjust max likelihood seen if needed
|
||||||
|
|
@ -525,8 +525,8 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
||||||
int plIdx = 0;
|
int plIdx = 0;
|
||||||
SumIterator iterator = new SumIterator(nAlleles, numChromosomes);
|
SumIterator iterator = new SumIterator(nAlleles, numChromosomes);
|
||||||
while (iterator.hasNext()) {
|
while (iterator.hasNext()) {
|
||||||
AlleleFrequencyCalculationModel.ExactACset ACset =
|
ExactAFCalculation.ExactACset ACset =
|
||||||
new AlleleFrequencyCalculationModel.ExactACset(1, new AlleleFrequencyCalculationModel.ExactACcounts(iterator.getCurrentVector()));
|
new ExactAFCalculation.ExactACset(1, new ExactAFCalculation.ExactACcounts(iterator.getCurrentVector()));
|
||||||
// for observed base X, add Q(jX,k) to likelihood vector for all k in error model
|
// for observed base X, add Q(jX,k) to likelihood vector for all k in error model
|
||||||
//likelihood(jA,jC,jG,jT) = logsum(logPr (errorModel[k],nA*Q(jA,k) + nC*Q(jC,k) + nG*Q(jG,k) + nT*Q(jT,k))
|
//likelihood(jA,jC,jG,jT) = logsum(logPr (errorModel[k],nA*Q(jA,k) + nC*Q(jC,k) + nG*Q(jG,k) + nT*Q(jT,k))
|
||||||
getLikelihoodOfConformation(ACset, errorModel, alleleList, numObservations, pileup);
|
getLikelihoodOfConformation(ACset, errorModel, alleleList, numObservations, pileup);
|
||||||
|
|
@ -540,14 +540,14 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private double calculateACConformationAndUpdateQueue(final ExactAFCalculationModel.ExactACset set,
|
private double calculateACConformationAndUpdateQueue(final DiploidExactAFCalculation.ExactACset set,
|
||||||
final ErrorModel errorModel,
|
final ErrorModel errorModel,
|
||||||
final List<Allele> alleleList,
|
final List<Allele> alleleList,
|
||||||
final List<Integer> numObservations,
|
final List<Integer> numObservations,
|
||||||
final double maxLog10L,
|
final double maxLog10L,
|
||||||
final LinkedList<AlleleFrequencyCalculationModel.ExactACset> ACqueue,
|
final LinkedList<ExactAFCalculation.ExactACset> ACqueue,
|
||||||
final HashMap<AlleleFrequencyCalculationModel.ExactACcounts,
|
final HashMap<ExactAFCalculation.ExactACcounts,
|
||||||
AlleleFrequencyCalculationModel.ExactACset> indexesToACset,
|
ExactAFCalculation.ExactACset> indexesToACset,
|
||||||
final ReadBackedPileup pileup) {
|
final ReadBackedPileup pileup) {
|
||||||
// compute likelihood of set
|
// compute likelihood of set
|
||||||
getLikelihoodOfConformation(set, errorModel, alleleList, numObservations, pileup);
|
getLikelihoodOfConformation(set, errorModel, alleleList, numObservations, pileup);
|
||||||
|
|
@ -597,7 +597,7 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
||||||
* @param numObservations Number of observations for each allele
|
* @param numObservations Number of observations for each allele
|
||||||
* @param pileup Read backed pileup in case it's necessary
|
* @param pileup Read backed pileup in case it's necessary
|
||||||
*/
|
*/
|
||||||
public abstract void getLikelihoodOfConformation(final AlleleFrequencyCalculationModel.ExactACset ACset,
|
public abstract void getLikelihoodOfConformation(final ExactAFCalculation.ExactACset ACset,
|
||||||
final ErrorModel errorModel,
|
final ErrorModel errorModel,
|
||||||
final List<Allele> alleleList,
|
final List<Allele> alleleList,
|
||||||
final List<Integer> numObservations,
|
final List<Integer> numObservations,
|
||||||
|
|
@ -608,12 +608,12 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
||||||
|
|
||||||
// Static methods
|
// Static methods
|
||||||
public static void updateACset(final int[] newSetCounts,
|
public static void updateACset(final int[] newSetCounts,
|
||||||
final LinkedList<AlleleFrequencyCalculationModel.ExactACset> ACqueue,
|
final LinkedList<ExactAFCalculation.ExactACset> ACqueue,
|
||||||
final HashMap<AlleleFrequencyCalculationModel.ExactACcounts, AlleleFrequencyCalculationModel.ExactACset> indexesToACset) {
|
final HashMap<ExactAFCalculation.ExactACcounts, ExactAFCalculation.ExactACset> indexesToACset) {
|
||||||
|
|
||||||
final AlleleFrequencyCalculationModel.ExactACcounts index = new AlleleFrequencyCalculationModel.ExactACcounts(newSetCounts);
|
final ExactAFCalculation.ExactACcounts index = new ExactAFCalculation.ExactACcounts(newSetCounts);
|
||||||
if ( !indexesToACset.containsKey(index) ) {
|
if ( !indexesToACset.containsKey(index) ) {
|
||||||
AlleleFrequencyCalculationModel.ExactACset newSet = new AlleleFrequencyCalculationModel.ExactACset(1, index);
|
ExactAFCalculation.ExactACset newSet = new ExactAFCalculation.ExactACset(1, index);
|
||||||
indexesToACset.put(index, newSet);
|
indexesToACset.put(index, newSet);
|
||||||
ACqueue.add(newSet);
|
ACqueue.add(newSet);
|
||||||
if (VERBOSE)
|
if (VERBOSE)
|
||||||
|
|
|
||||||
|
|
@ -188,7 +188,7 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
|
||||||
* @param alleleList List of alleles
|
* @param alleleList List of alleles
|
||||||
* @param numObservations Number of observations for each allele in alleleList
|
* @param numObservations Number of observations for each allele in alleleList
|
||||||
*/
|
*/
|
||||||
public void getLikelihoodOfConformation(final AlleleFrequencyCalculationModel.ExactACset ACset,
|
public void getLikelihoodOfConformation(final ExactAFCalculation.ExactACset ACset,
|
||||||
final ErrorModel errorModel,
|
final ErrorModel errorModel,
|
||||||
final List<Allele> alleleList,
|
final List<Allele> alleleList,
|
||||||
final List<Integer> numObservations,
|
final List<Integer> numObservations,
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,10 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import static java.lang.Math.log10;
|
import static java.lang.Math.log10;
|
||||||
import static java.lang.Math.pow;
|
import static java.lang.Math.pow;
|
||||||
|
|
@ -218,7 +221,7 @@ public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLi
|
||||||
* @param alleleList List of alleles
|
* @param alleleList List of alleles
|
||||||
* @param numObservations Number of observations for each allele in alleleList
|
* @param numObservations Number of observations for each allele in alleleList
|
||||||
*/
|
*/
|
||||||
public void getLikelihoodOfConformation(final AlleleFrequencyCalculationModel.ExactACset ACset,
|
public void getLikelihoodOfConformation(final ExactAFCalculation.ExactACset ACset,
|
||||||
final ErrorModel errorModel,
|
final ErrorModel errorModel,
|
||||||
final List<Allele> alleleList,
|
final List<Allele> alleleList,
|
||||||
final List<Integer> numObservations,
|
final List<Integer> numObservations,
|
||||||
|
|
|
||||||
|
|
@ -141,7 +141,7 @@ public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest {
|
||||||
final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
|
final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
|
||||||
double[] priors = new double[len]; // flat priors
|
double[] priors = new double[len]; // flat priors
|
||||||
|
|
||||||
GeneralPloidyExactAFCalculationModel.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors, result);
|
GeneralPloidyExactAFCalculation.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors, result);
|
||||||
int nameIndex = 1;
|
int nameIndex = 1;
|
||||||
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
|
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
|
||||||
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
|
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,230 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generic interface for calculating the probability of alleles segregating given priors and genotype likelihoods
|
||||||
|
*/
|
||||||
|
public abstract class AlleleFrequencyCalculation implements Cloneable {
|
||||||
|
private final static Logger defaultLogger = Logger.getLogger(AlleleFrequencyCalculation.class);
|
||||||
|
|
||||||
|
public enum Model {
|
||||||
|
/** The default model with the best performance in all cases */
|
||||||
|
EXACT("ExactAFCalculation");
|
||||||
|
|
||||||
|
final String implementationName;
|
||||||
|
|
||||||
|
private Model(String implementationName) {
|
||||||
|
this.implementationName = implementationName;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected int nSamples;
|
||||||
|
protected int MAX_ALTERNATE_ALLELES_TO_GENOTYPE;
|
||||||
|
protected boolean CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS;
|
||||||
|
|
||||||
|
protected Logger logger;
|
||||||
|
protected PrintStream verboseWriter;
|
||||||
|
|
||||||
|
protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY;
|
||||||
|
|
||||||
|
private SimpleTimer callTimer = new SimpleTimer();
|
||||||
|
private PrintStream callReport = null;
|
||||||
|
|
||||||
|
protected AlleleFrequencyCalculation(final UnifiedArgumentCollection UAC, final int nSamples, final Logger logger, final PrintStream verboseWriter) {
|
||||||
|
this(nSamples, UAC.MAX_ALTERNATE_ALLELES, UAC.CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS, UAC.exactCallsLog, logger, verboseWriter);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected AlleleFrequencyCalculation(final int nSamples,
|
||||||
|
final int maxAltAlleles,
|
||||||
|
final boolean capMaxAltsForIndels,
|
||||||
|
final File exactCallsLog,
|
||||||
|
final Logger logger,
|
||||||
|
final PrintStream verboseWriter) {
|
||||||
|
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
|
||||||
|
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles);
|
||||||
|
|
||||||
|
this.nSamples = nSamples;
|
||||||
|
this.MAX_ALTERNATE_ALLELES_TO_GENOTYPE = maxAltAlleles;
|
||||||
|
this.CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = capMaxAltsForIndels;
|
||||||
|
this.logger = logger == null ? defaultLogger : logger;
|
||||||
|
this.verboseWriter = verboseWriter;
|
||||||
|
if ( exactCallsLog != null )
|
||||||
|
initializeOutputFile(exactCallsLog);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see #getLog10PNonRef(org.broadinstitute.sting.utils.variantcontext.VariantContext, double[], AlleleFrequencyCalculationResult)
|
||||||
|
*
|
||||||
|
* Allocates a new results object. Useful for testing but slow in practice.
|
||||||
|
*/
|
||||||
|
public AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc,
|
||||||
|
final double[] log10AlleleFrequencyPriors) {
|
||||||
|
return getLog10PNonRef(vc, log10AlleleFrequencyPriors, new AlleleFrequencyCalculationResult(MAX_ALTERNATE_ALLELES_TO_GENOTYPE));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc
|
||||||
|
*
|
||||||
|
* @param vc the VariantContext holding the alleles and sample information
|
||||||
|
* @param log10AlleleFrequencyPriors a prior vector nSamples x 2 in length indicating the Pr(AF = i)
|
||||||
|
* @param result a pre-allocated (for efficiency) object to hold the result of the calculation
|
||||||
|
* @return result (for programming convenience)
|
||||||
|
*/
|
||||||
|
public AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc,
|
||||||
|
final double[] log10AlleleFrequencyPriors,
|
||||||
|
final AlleleFrequencyCalculationResult result) {
|
||||||
|
if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null");
|
||||||
|
if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null");
|
||||||
|
if ( result == null ) throw new IllegalArgumentException("Results object cannot be null");
|
||||||
|
|
||||||
|
final VariantContext vcWorking = reduceScope(vc);
|
||||||
|
result.setAllelesUsedInGenotyping(vcWorking.getAlleles());
|
||||||
|
|
||||||
|
callTimer.start();
|
||||||
|
computeLog10PNonRef(vcWorking, log10AlleleFrequencyPriors, result);
|
||||||
|
final long nanoTime = callTimer.getElapsedTimeNano();
|
||||||
|
|
||||||
|
if ( callReport != null )
|
||||||
|
printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, result.getLog10PosteriorOfAFzero());
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Abstract methods that should be implemented by concrete implementations
|
||||||
|
// to actually calculate the AF
|
||||||
|
//
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Look at VC and perhaps return a new one of reduced complexity, if that's necessary
|
||||||
|
*
|
||||||
|
* Used before the call to computeLog10PNonRef to simply the calculation job at hand,
|
||||||
|
* if vc exceeds bounds. For example, if VC has 100 alt alleles this function
|
||||||
|
* may decide to only genotype the best 2 of them.
|
||||||
|
*
|
||||||
|
* @param vc the initial VC provided by the caller to this AFcalculation
|
||||||
|
* @return a potentially simpler VC that's more tractable to genotype
|
||||||
|
*/
|
||||||
|
@Requires("vc != null")
|
||||||
|
@Ensures("result != null")
|
||||||
|
protected abstract VariantContext reduceScope(final VariantContext vc);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Actually carry out the log10PNonRef calculation on vc, storing results in results
|
||||||
|
*
|
||||||
|
* @param vc variant context with alleles and genotype likelihoods
|
||||||
|
* @param log10AlleleFrequencyPriors priors
|
||||||
|
* @param result (pre-allocated) object to store results
|
||||||
|
*/
|
||||||
|
// TODO -- add consistent requires among args
|
||||||
|
protected abstract void computeLog10PNonRef(final VariantContext vc,
|
||||||
|
final double[] log10AlleleFrequencyPriors,
|
||||||
|
final AlleleFrequencyCalculationResult result);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Must be overridden by concrete subclasses
|
||||||
|
*
|
||||||
|
* @param vc variant context with alleles and genotype likelihoods
|
||||||
|
* @param allelesToUse alleles to subset
|
||||||
|
* @param assignGenotypes
|
||||||
|
* @param ploidy
|
||||||
|
* @return GenotypesContext object
|
||||||
|
*/
|
||||||
|
protected abstract GenotypesContext subsetAlleles(final VariantContext vc,
|
||||||
|
final List<Allele> allelesToUse,
|
||||||
|
final boolean assignGenotypes,
|
||||||
|
final int ploidy);
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Print information about the call to the calls log
|
||||||
|
//
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private void initializeOutputFile(final File outputFile) {
|
||||||
|
try {
|
||||||
|
if (outputFile != null) {
|
||||||
|
callReport = new PrintStream( new FileOutputStream(outputFile) );
|
||||||
|
callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value")));
|
||||||
|
}
|
||||||
|
} catch ( FileNotFoundException e ) {
|
||||||
|
throw new UserException.CouldNotCreateOutputFile(outputFile, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printCallInfo(final VariantContext vc,
|
||||||
|
final double[] log10AlleleFrequencyPriors,
|
||||||
|
final long runtimeNano,
|
||||||
|
final double log10PosteriorOfAFzero) {
|
||||||
|
printCallElement(vc, "type", "ignore", vc.getType());
|
||||||
|
|
||||||
|
int allelei = 0;
|
||||||
|
for ( final Allele a : vc.getAlleles() )
|
||||||
|
printCallElement(vc, "allele", allelei++, a.getDisplayString());
|
||||||
|
|
||||||
|
for ( final Genotype g : vc.getGenotypes() )
|
||||||
|
printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString());
|
||||||
|
|
||||||
|
for ( int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++ )
|
||||||
|
printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]);
|
||||||
|
|
||||||
|
printCallElement(vc, "runtime.nano", "ignore", runtimeNano);
|
||||||
|
printCallElement(vc, "log10PosteriorOfAFzero", "ignore", log10PosteriorOfAFzero);
|
||||||
|
|
||||||
|
callReport.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printCallElement(final VariantContext vc,
|
||||||
|
final Object variable,
|
||||||
|
final Object key,
|
||||||
|
final Object value) {
|
||||||
|
final String loc = String.format("%s:%d", vc.getChr(), vc.getStart());
|
||||||
|
callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -26,8 +26,10 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -54,6 +56,7 @@ public class AlleleFrequencyCalculationResult {
|
||||||
private double log10LikelihoodOfAFzero;
|
private double log10LikelihoodOfAFzero;
|
||||||
private double log10PosteriorOfAFzero;
|
private double log10PosteriorOfAFzero;
|
||||||
|
|
||||||
|
private List<Allele> allelesUsedInGenotyping;
|
||||||
|
|
||||||
public AlleleFrequencyCalculationResult(final int maxAltAlleles) {
|
public AlleleFrequencyCalculationResult(final int maxAltAlleles) {
|
||||||
alleleCountsOfMLE = new int[maxAltAlleles];
|
alleleCountsOfMLE = new int[maxAltAlleles];
|
||||||
|
|
@ -93,13 +96,14 @@ public class AlleleFrequencyCalculationResult {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reset() {
|
public void reset() {
|
||||||
log10MLE = log10MAP = log10LikelihoodOfAFzero = log10PosteriorOfAFzero = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
log10MLE = log10MAP = log10LikelihoodOfAFzero = log10PosteriorOfAFzero = AlleleFrequencyCalculation.VALUE_NOT_CALCULATED;
|
||||||
for ( int i = 0; i < alleleCountsOfMLE.length; i++ ) {
|
for ( int i = 0; i < alleleCountsOfMLE.length; i++ ) {
|
||||||
alleleCountsOfMLE[i] = 0;
|
alleleCountsOfMLE[i] = 0;
|
||||||
alleleCountsOfMAP[i] = 0;
|
alleleCountsOfMAP[i] = 0;
|
||||||
}
|
}
|
||||||
currentPosteriorsCacheIndex = 0;
|
currentPosteriorsCacheIndex = 0;
|
||||||
log10PosteriorMatrixSum = null;
|
log10PosteriorMatrixSum = null;
|
||||||
|
allelesUsedInGenotyping = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void updateMLEifNeeded(final double log10LofK, final int[] alleleCountsForK) {
|
public void updateMLEifNeeded(final double log10LofK, final int[] alleleCountsForK) {
|
||||||
|
|
@ -147,4 +151,12 @@ public class AlleleFrequencyCalculationResult {
|
||||||
Arrays.fill(alleleCountsOfMAP, 0);
|
Arrays.fill(alleleCountsOfMAP, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<Allele> getAllelesUsedInGenotyping() {
|
||||||
|
return allelesUsedInGenotyping;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAllelesUsedInGenotyping(List<Allele> allelesUsedInGenotyping) {
|
||||||
|
this.allelesUsedInGenotyping = allelesUsedInGenotyping;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -27,98 +27,49 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
public class DiploidExactAFCalculation extends ExactAFCalculation {
|
||||||
private SimpleTimer callTimer = new SimpleTimer();
|
|
||||||
private PrintStream callReport = null;
|
|
||||||
|
|
||||||
// private final static boolean DEBUG = false;
|
// private final static boolean DEBUG = false;
|
||||||
|
|
||||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||||
|
|
||||||
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
public DiploidExactAFCalculation(final int nSamples, final int maxAltAlleles) {
|
||||||
|
super(nSamples, maxAltAlleles, false, null, null, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public DiploidExactAFCalculation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||||
super(UAC, N, logger, verboseWriter);
|
super(UAC, N, logger, verboseWriter);
|
||||||
if ( UAC.exactCallsLog != null )
|
|
||||||
initializeOutputFile(UAC.exactCallsLog);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void initializeOutputFile(final File outputFile) {
|
@Override
|
||||||
try {
|
public void computeLog10PNonRef(final VariantContext vc,
|
||||||
if (outputFile != null) {
|
final double[] log10AlleleFrequencyPriors,
|
||||||
callReport = new PrintStream( new FileOutputStream(outputFile) );
|
final AlleleFrequencyCalculationResult result) {
|
||||||
callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value")));
|
linearExactMultiAllelic(vc.getGenotypes(), vc.getNAlleles() - 1, log10AlleleFrequencyPriors, result);
|
||||||
}
|
|
||||||
} catch ( FileNotFoundException e ) {
|
|
||||||
throw new UserException.CouldNotCreateOutputFile(outputFile, e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Allele> getLog10PNonRef(final VariantContext vc,
|
@Override
|
||||||
final double[] log10AlleleFrequencyPriors,
|
protected VariantContext reduceScope(final VariantContext vc) {
|
||||||
final AlleleFrequencyCalculationResult result) {
|
|
||||||
|
|
||||||
GenotypesContext GLs = vc.getGenotypes();
|
|
||||||
List<Allele> alleles = vc.getAlleles();
|
|
||||||
|
|
||||||
final int myMaxAltAllelesToGenotype = CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS && vc.getType().equals(VariantContext.Type.INDEL) ? 2 : MAX_ALTERNATE_ALLELES_TO_GENOTYPE;
|
final int myMaxAltAllelesToGenotype = CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS && vc.getType().equals(VariantContext.Type.INDEL) ? 2 : MAX_ALTERNATE_ALLELES_TO_GENOTYPE;
|
||||||
|
|
||||||
// don't try to genotype too many alternate alleles
|
// don't try to genotype too many alternate alleles
|
||||||
if ( vc.getAlternateAlleles().size() > myMaxAltAllelesToGenotype ) {
|
if ( vc.getAlternateAlleles().size() > myMaxAltAllelesToGenotype ) {
|
||||||
logger.warn("this tool is currently set to genotype at most " + myMaxAltAllelesToGenotype + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
logger.warn("this tool is currently set to genotype at most " + myMaxAltAllelesToGenotype + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
||||||
|
|
||||||
alleles = new ArrayList<Allele>(myMaxAltAllelesToGenotype + 1);
|
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||||
|
List<Allele> alleles = new ArrayList<Allele>(myMaxAltAllelesToGenotype + 1);
|
||||||
alleles.add(vc.getReference());
|
alleles.add(vc.getReference());
|
||||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, myMaxAltAllelesToGenotype));
|
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, myMaxAltAllelesToGenotype));
|
||||||
GLs = VariantContextUtils.subsetDiploidAlleles(vc, alleles, false);
|
builder.alleles(alleles);
|
||||||
|
builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
|
||||||
|
return builder.make();
|
||||||
|
} else {
|
||||||
|
return vc;
|
||||||
}
|
}
|
||||||
|
|
||||||
callTimer.start();
|
|
||||||
linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result);
|
|
||||||
final long nanoTime = callTimer.getElapsedTimeNano();
|
|
||||||
|
|
||||||
if ( callReport != null )
|
|
||||||
printCallInfo(vc, alleles, GLs, log10AlleleFrequencyPriors, nanoTime, result.getLog10PosteriorOfAFzero());
|
|
||||||
|
|
||||||
return alleles;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void printCallInfo(final VariantContext vc,
|
|
||||||
final List<Allele> alleles,
|
|
||||||
final GenotypesContext GLs,
|
|
||||||
final double[] log10AlleleFrequencyPriors,
|
|
||||||
final long runtimeNano,
|
|
||||||
final double log10PosteriorOfAFzero) {
|
|
||||||
printCallElement(vc, "type", "ignore", vc.getType());
|
|
||||||
|
|
||||||
int allelei = 0;
|
|
||||||
for ( final Allele a : alleles )
|
|
||||||
printCallElement(vc, "allele", allelei++, a.getDisplayString());
|
|
||||||
|
|
||||||
for ( final Genotype g : GLs )
|
|
||||||
printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString());
|
|
||||||
|
|
||||||
for ( int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++ )
|
|
||||||
printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]);
|
|
||||||
|
|
||||||
printCallElement(vc, "runtime.nano", "ignore", runtimeNano);
|
|
||||||
printCallElement(vc, "log10PosteriorOfAFzero", "ignore", log10PosteriorOfAFzero);
|
|
||||||
|
|
||||||
callReport.flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void printCallElement(final VariantContext vc, final Object variable, final Object key, final Object value) {
|
|
||||||
final String loc = String.format("%s:%d", vc.getChr(), vc.getStart());
|
|
||||||
callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final int PL_INDEX_OF_HOM_REF = 0;
|
private static final int PL_INDEX_OF_HOM_REF = 0;
|
||||||
|
|
@ -30,40 +30,23 @@ import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The model representing how we calculate a genotype given the priors and a pile
|
* Uses the Exact calculation of Heng Li
|
||||||
* of bases and quality scores
|
|
||||||
*/
|
*/
|
||||||
public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
|
||||||
|
protected ExactAFCalculation(final UnifiedArgumentCollection UAC, final int nSamples, final Logger logger, final PrintStream verboseWriter) {
|
||||||
public enum Model {
|
super(UAC, nSamples, logger, verboseWriter);
|
||||||
/** The default model with the best performance in all cases */
|
|
||||||
EXACT
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected int N;
|
protected ExactAFCalculation(final int nSamples, int maxAltAlleles, boolean capMaxAltsForIndels, File exactCallsLog, Logger logger, PrintStream verboseWriter) {
|
||||||
protected int MAX_ALTERNATE_ALLELES_TO_GENOTYPE;
|
super(nSamples, maxAltAlleles, capMaxAltsForIndels, exactCallsLog, logger, verboseWriter);
|
||||||
protected boolean CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS;
|
|
||||||
|
|
||||||
protected Logger logger;
|
|
||||||
protected PrintStream verboseWriter;
|
|
||||||
|
|
||||||
protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
protected AlleleFrequencyCalculationModel(final UnifiedArgumentCollection UAC, final int N, final Logger logger, final PrintStream verboseWriter) {
|
|
||||||
this.N = N;
|
|
||||||
this.MAX_ALTERNATE_ALLELES_TO_GENOTYPE = UAC.MAX_ALTERNATE_ALLELES;
|
|
||||||
this.CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = UAC.CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS;
|
|
||||||
this.logger = logger;
|
|
||||||
this.verboseWriter = verboseWriter;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -102,31 +85,6 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
||||||
return genotypeLikelihoods;
|
return genotypeLikelihoods;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Must be overridden by concrete subclasses
|
|
||||||
* @param vc variant context with alleles and genotype likelihoods
|
|
||||||
* @param log10AlleleFrequencyPriors priors
|
|
||||||
* @param result (pre-allocated) object to store likelihoods results
|
|
||||||
* @return the alleles used for genotyping
|
|
||||||
*/
|
|
||||||
protected abstract List<Allele> getLog10PNonRef(final VariantContext vc,
|
|
||||||
final double[] log10AlleleFrequencyPriors,
|
|
||||||
final AlleleFrequencyCalculationResult result);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Must be overridden by concrete subclasses
|
|
||||||
* @param vc variant context with alleles and genotype likelihoods
|
|
||||||
* @param allelesToUse alleles to subset
|
|
||||||
* @param assignGenotypes
|
|
||||||
* @param ploidy
|
|
||||||
* @return GenotypesContext object
|
|
||||||
*/
|
|
||||||
protected abstract GenotypesContext subsetAlleles(final VariantContext vc,
|
|
||||||
final List<Allele> allelesToUse,
|
|
||||||
final boolean assignGenotypes,
|
|
||||||
final int ploidy);
|
|
||||||
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// protected classes used to store exact model matrix columns
|
// protected classes used to store exact model matrix columns
|
||||||
|
|
@ -41,7 +41,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
||||||
*/
|
*/
|
||||||
@Advanced
|
@Advanced
|
||||||
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
|
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
|
||||||
protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
|
protected AlleleFrequencyCalculation.Model AFmodel = AlleleFrequencyCalculation.Model.EXACT;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily
|
* The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily
|
||||||
|
|
|
||||||
|
|
@ -27,10 +27,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.*;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
|
||||||
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
|
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||||
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
|
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
|
||||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||||
|
|
@ -249,7 +249,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
||||||
throw new UserException("Incorrect genotype calculation model chosen. Only [POOLSNP|POOLINDEL|POOLBOTH] supported with this walker if sample ploidy != 2");
|
throw new UserException("Incorrect genotype calculation model chosen. Only [POOLSNP|POOLINDEL|POOLBOTH] supported with this walker if sample ploidy != 2");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (UAC.AFmodel != AlleleFrequencyCalculationModel.Model.POOL)
|
if (UAC.AFmodel != AlleleFrequencyCalculation.Model.POOL)
|
||||||
throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2");
|
throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -78,7 +78,7 @@ public class UnifiedGenotyperEngine {
|
||||||
private ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>>();
|
private ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>>();
|
||||||
|
|
||||||
// the model used for calculating p(non-ref)
|
// the model used for calculating p(non-ref)
|
||||||
private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>();
|
private ThreadLocal<AlleleFrequencyCalculation> afcm = new ThreadLocal<AlleleFrequencyCalculation>();
|
||||||
|
|
||||||
// the allele frequency likelihoods and posteriors (allocated once as an optimization)
|
// the allele frequency likelihoods and posteriors (allocated once as an optimization)
|
||||||
private ThreadLocal<AlleleFrequencyCalculationResult> alleleFrequencyCalculationResult = new ThreadLocal<AlleleFrequencyCalculationResult>();
|
private ThreadLocal<AlleleFrequencyCalculationResult> alleleFrequencyCalculationResult = new ThreadLocal<AlleleFrequencyCalculationResult>();
|
||||||
|
|
@ -371,7 +371,7 @@ public class UnifiedGenotyperEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
AFresult.reset();
|
AFresult.reset();
|
||||||
List<Allele> allelesUsedInGenotyping = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model), AFresult);
|
afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model), AFresult);
|
||||||
|
|
||||||
// is the most likely frequency conformation AC=0 for all alternate alleles?
|
// is the most likely frequency conformation AC=0 for all alternate alleles?
|
||||||
boolean bestGuessIsRef = true;
|
boolean bestGuessIsRef = true;
|
||||||
|
|
@ -382,7 +382,7 @@ public class UnifiedGenotyperEngine {
|
||||||
myAlleles.add(vc.getReference());
|
myAlleles.add(vc.getReference());
|
||||||
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
|
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
|
||||||
final Allele alternateAllele = vc.getAlternateAllele(i);
|
final Allele alternateAllele = vc.getAlternateAllele(i);
|
||||||
final int indexOfAllele = allelesUsedInGenotyping.indexOf(alternateAllele);
|
final int indexOfAllele = AFresult.getAllelesUsedInGenotyping().indexOf(alternateAllele);
|
||||||
// the genotyping model may have stripped it out
|
// the genotyping model may have stripped it out
|
||||||
if ( indexOfAllele == -1 )
|
if ( indexOfAllele == -1 )
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -754,32 +754,34 @@ public class UnifiedGenotyperEngine {
|
||||||
return glcm;
|
return glcm;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) {
|
private static AlleleFrequencyCalculation getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) {
|
||||||
|
|
||||||
List<Class<? extends AlleleFrequencyCalculationModel>> afClasses = new PluginManager<AlleleFrequencyCalculationModel>(AlleleFrequencyCalculationModel.class).getPlugins();
|
List<Class<? extends AlleleFrequencyCalculation>> afClasses = new PluginManager<AlleleFrequencyCalculation>(AlleleFrequencyCalculation.class).getPlugins();
|
||||||
|
|
||||||
// user-specified name
|
// user-specified name
|
||||||
String afModelName = UAC.AFmodel.name();
|
String afModelName = UAC.AFmodel.implementationName;
|
||||||
|
|
||||||
if (!afModelName.contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY)
|
if (!afModelName.contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY)
|
||||||
afModelName = GPSTRING + afModelName;
|
afModelName = GPSTRING + afModelName;
|
||||||
|
else
|
||||||
|
afModelName = "Diploid" + afModelName;
|
||||||
|
|
||||||
for (int i = 0; i < afClasses.size(); i++) {
|
for (int i = 0; i < afClasses.size(); i++) {
|
||||||
Class<? extends AlleleFrequencyCalculationModel> afClass = afClasses.get(i);
|
Class<? extends AlleleFrequencyCalculation> afClass = afClasses.get(i);
|
||||||
String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase();
|
String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase();
|
||||||
if (afModelName.equalsIgnoreCase(key)) {
|
if (afModelName.equalsIgnoreCase(key)) {
|
||||||
try {
|
try {
|
||||||
Object args[] = new Object[]{UAC,N,logger,verboseWriter};
|
Object args[] = new Object[]{UAC,N,logger,verboseWriter};
|
||||||
Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class);
|
Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class);
|
||||||
|
|
||||||
return (AlleleFrequencyCalculationModel)c.newInstance(args);
|
return (AlleleFrequencyCalculation)c.newInstance(args);
|
||||||
}
|
}
|
||||||
catch (Exception e) {
|
catch (Exception e) {
|
||||||
throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
|
throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculation " + UAC.AFmodel);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
|
throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculation " + UAC.AFmodel);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) {
|
public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) {
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.AlleleFrequencyCalculationResult;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.AlleleFrequencyCalculationResult;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.ExactAFCalculationModel;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidExactAFCalculation;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
@ -51,7 +51,7 @@ public class GLBasedSampleSelector extends SampleSelector {
|
||||||
flatPriors = new double[1+2*samples.size()];
|
flatPriors = new double[1+2*samples.size()];
|
||||||
}
|
}
|
||||||
AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(vc.getAlternateAlleles().size());
|
AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(vc.getAlternateAlleles().size());
|
||||||
ExactAFCalculationModel.linearExactMultiAllelic(subContext.getGenotypes(),vc.getAlternateAlleles().size(),flatPriors,result);
|
DiploidExactAFCalculation.linearExactMultiAllelic(subContext.getGenotypes(), vc.getAlternateAlleles().size(), flatPriors, result);
|
||||||
// do we want to let this qual go up or down?
|
// do we want to let this qual go up or down?
|
||||||
if ( result.getLog10PosteriorOfAFzero() < referenceLikelihood ) {
|
if ( result.getLog10PosteriorOfAFzero() < referenceLikelihood ) {
|
||||||
return true;
|
return true;
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,14 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeSuite;
|
import org.testng.annotations.BeforeSuite;
|
||||||
import org.testng.annotations.DataProvider;
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
public class ExactAFCalculationModelUnitTest extends BaseTest {
|
public class ExactAFCalculationModelUnitTest extends BaseTest {
|
||||||
|
|
@ -45,6 +43,19 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
||||||
this.numAltAlleles = numAltAlleles;
|
this.numAltAlleles = numAltAlleles;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public VariantContext getVC() {
|
||||||
|
VariantContextBuilder builder = new VariantContextBuilder("test", "1", 1, 1, getAlleles());
|
||||||
|
builder.genotypes(GLs);
|
||||||
|
return builder.make();
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Allele> getAlleles() {
|
||||||
|
return Arrays.asList(Allele.create("A", true),
|
||||||
|
Allele.create("C"),
|
||||||
|
Allele.create("G"),
|
||||||
|
Allele.create("T")).subList(0, numAltAlleles+1);
|
||||||
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("%s input=%s", super.toString(), GLs);
|
return String.format("%s input=%s", super.toString(), GLs);
|
||||||
}
|
}
|
||||||
|
|
@ -83,9 +94,8 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
||||||
@Test(dataProvider = "getGLs")
|
@Test(dataProvider = "getGLs")
|
||||||
public void testGLs(GetGLsTest cfg) {
|
public void testGLs(GetGLsTest cfg) {
|
||||||
|
|
||||||
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2);
|
final DiploidExactAFCalculation afCalculation = new DiploidExactAFCalculation(cfg.getVC().getNSamples(), cfg.numAltAlleles);
|
||||||
|
final AlleleFrequencyCalculationResult result = afCalculation.getLog10PNonRef(cfg.getVC(), priors);
|
||||||
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result);
|
|
||||||
|
|
||||||
int nameIndex = 1;
|
int nameIndex = 1;
|
||||||
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
|
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
|
||||||
|
|
@ -102,9 +112,8 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
||||||
final double[] BB = new double[]{-20000000.0, -20000000.0, 0.0};
|
final double[] BB = new double[]{-20000000.0, -20000000.0, 0.0};
|
||||||
GetGLsTest cfg = new GetGLsTest("B6", 1, createGenotype("1", BB), createGenotype("2", BB), createGenotype("3", BB));
|
GetGLsTest cfg = new GetGLsTest("B6", 1, createGenotype("1", BB), createGenotype("2", BB), createGenotype("3", BB));
|
||||||
|
|
||||||
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2);
|
final DiploidExactAFCalculation afCalculation = new DiploidExactAFCalculation(1, 1);
|
||||||
|
final AlleleFrequencyCalculationResult result = afCalculation.getLog10PNonRef(cfg.getVC(), priors);
|
||||||
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result);
|
|
||||||
|
|
||||||
int calculatedAlleleCount = result.getAlleleCountsOfMAP()[0];
|
int calculatedAlleleCount = result.getAlleleCountsOfMAP()[0];
|
||||||
Assert.assertEquals(calculatedAlleleCount, 6);
|
Assert.assertEquals(calculatedAlleleCount, 6);
|
||||||
|
|
@ -117,9 +126,8 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
||||||
final double[] AC = new double[]{-100.0, -100.0, -100.0, 0.0, -100.0, -100.0};
|
final double[] AC = new double[]{-100.0, -100.0, -100.0, 0.0, -100.0, -100.0};
|
||||||
GetGLsTest cfg = new GetGLsTest("B1C1", 2, createGenotype("1", AC), createGenotype("2", AB));
|
GetGLsTest cfg = new GetGLsTest("B1C1", 2, createGenotype("1", AC), createGenotype("2", AB));
|
||||||
|
|
||||||
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2);
|
final DiploidExactAFCalculation afCalculation = new DiploidExactAFCalculation(2, 2);
|
||||||
|
final AlleleFrequencyCalculationResult result = afCalculation.getLog10PNonRef(cfg.getVC(), priors);
|
||||||
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result);
|
|
||||||
|
|
||||||
Assert.assertEquals(result.getAlleleCountsOfMAP()[0], 1);
|
Assert.assertEquals(result.getAlleleCountsOfMAP()[0], 1);
|
||||||
Assert.assertEquals(result.getAlleleCountsOfMAP()[1], 1);
|
Assert.assertEquals(result.getAlleleCountsOfMAP()[1], 1);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue