Haplotype caller now sends genotype likelihoods to the exact model to genotype the events found in the best haplotypes.
This commit is contained in:
parent
6f72b3de6a
commit
f5d910b8a5
|
|
@ -68,16 +68,12 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
|||
|
||||
/**
|
||||
* Must be overridden by concrete subclasses
|
||||
* @param tracker rod data
|
||||
* @param ref reference context
|
||||
* @param GLs genotype likelihoods
|
||||
* @param Alleles Alleles corresponding to GLs
|
||||
* @param log10AlleleFrequencyPriors priors
|
||||
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results
|
||||
*/
|
||||
protected abstract void getLog10PNonRef(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, Genotype> GLs, List<Allele> Alleles,
|
||||
protected abstract void getLog10PNonRef(Map<String, Genotype> GLs, List<Allele> Alleles,
|
||||
double[] log10AlleleFrequencyPriors,
|
||||
double[] log10AlleleFrequencyPosteriors);
|
||||
|
||||
|
|
|
|||
|
|
@ -51,9 +51,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
super(UAC, N, logger, verboseWriter);
|
||||
}
|
||||
|
||||
public void getLog10PNonRef(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, Genotype> GLs, List<Allele> alleles,
|
||||
public void getLog10PNonRef(Map<String, Genotype> GLs, List<Allele> alleles,
|
||||
double[] log10AlleleFrequencyPriors,
|
||||
double[] log10AlleleFrequencyPosteriors) {
|
||||
final int numAlleles = alleles.size();
|
||||
|
|
|
|||
|
|
@ -52,9 +52,7 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel {
|
|||
AFMatrix = new AlleleFrequencyMatrix(N);
|
||||
}
|
||||
|
||||
protected void getLog10PNonRef(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, Genotype> GLs, List<Allele> alleles,
|
||||
protected void getLog10PNonRef(Map<String, Genotype> GLs, List<Allele> alleles,
|
||||
double[] log10AlleleFrequencyPriors,
|
||||
double[] log10AlleleFrequencyPosteriors) {
|
||||
initializeAFMatrix(GLs);
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import org.broadinstitute.sting.utils.exceptions.StingException;
|
|||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -15,11 +16,11 @@ import java.util.ArrayList;
|
|||
public class MultiallelicGenotypeLikelihoods {
|
||||
private String sample;
|
||||
private double[] GLs;
|
||||
private ArrayList<Allele> alleleList;
|
||||
private List<Allele> alleleList;
|
||||
private int depth;
|
||||
|
||||
public MultiallelicGenotypeLikelihoods(String sample,
|
||||
ArrayList<Allele> A,
|
||||
List<Allele> A,
|
||||
double[] log10Likelihoods, int depth) {
|
||||
/* Check for consistency between likelihood vector and number of alleles */
|
||||
int numAlleles = A.size();
|
||||
|
|
@ -40,7 +41,7 @@ public class MultiallelicGenotypeLikelihoods {
|
|||
return GLs;
|
||||
}
|
||||
|
||||
public ArrayList<Allele> getAlleles() {
|
||||
public List<Allele> getAlleles() {
|
||||
return alleleList;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -325,7 +325,7 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(tracker, refContext, vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
|
||||
// find the most likely frequency
|
||||
int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get());
|
||||
|
|
@ -383,7 +383,7 @@ public class UnifiedGenotyperEngine {
|
|||
// the overall lod
|
||||
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(tracker, refContext, vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||
double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
||||
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||
|
|
@ -391,7 +391,7 @@ public class UnifiedGenotyperEngine {
|
|||
// the forward lod
|
||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(tracker, refContext, vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||
double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
||||
|
|
@ -400,7 +400,7 @@ public class UnifiedGenotyperEngine {
|
|||
// the reverse lod
|
||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(tracker, refContext, vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||
double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
||||
|
|
@ -447,6 +447,77 @@ public class UnifiedGenotyperEngine {
|
|||
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
|
||||
}
|
||||
|
||||
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenomeLoc loc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
|
||||
// initialize the data for this thread if that hasn't been done yet
|
||||
if ( afcm.get() == null ) {
|
||||
log10AlleleFrequencyPosteriors.set(new double[N+1]);
|
||||
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
|
||||
}
|
||||
|
||||
// estimate our confidence in a reference call and return
|
||||
if ( vc.getNSamples() == 0 )
|
||||
return null;
|
||||
|
||||
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
|
||||
// find the most likely frequency
|
||||
int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get());
|
||||
|
||||
// calculate p(f>0)
|
||||
double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get());
|
||||
double sum = 0.0;
|
||||
for (int i = 1; i <= N; i++)
|
||||
sum += normalizedPosteriors[i];
|
||||
double PofF = Math.min(sum, 1.0); // deal with precision errors
|
||||
|
||||
double phredScaledConfidence;
|
||||
if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
|
||||
if ( Double.isInfinite(phredScaledConfidence) )
|
||||
phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0];
|
||||
} else {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
|
||||
if ( Double.isInfinite(phredScaledConfidence) ) {
|
||||
sum = 0.0;
|
||||
for (int i = 1; i <= N; i++) {
|
||||
if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
|
||||
break;
|
||||
sum += log10AlleleFrequencyPosteriors.get()[i];
|
||||
}
|
||||
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
|
||||
}
|
||||
}
|
||||
|
||||
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
|
||||
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) {
|
||||
// technically, at this point our confidence in a reference call isn't accurately estimated
|
||||
// because it didn't take into account samples with no data, so let's get a better estimate
|
||||
return null;
|
||||
}
|
||||
|
||||
// create the genotypes
|
||||
Map<String, Genotype> genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess);
|
||||
|
||||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
||||
int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc);
|
||||
|
||||
Set<Allele> myAlleles = new HashSet<Allele>(vc.getAlleles());
|
||||
// strip out the alternate allele if it's a ref call
|
||||
if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
|
||||
myAlleles = new HashSet<Allele>(1);
|
||||
myAlleles.add(vc.getReference());
|
||||
}
|
||||
VariantContext vcCall = new VariantContext("UG_call", loc.getContig(), loc.getStart(), endLoc,
|
||||
myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes, vc.getReferenceBaseForIndel());
|
||||
|
||||
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
|
||||
}
|
||||
|
||||
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
|
||||
// TODO - temp fix until we can deal with extended events properly
|
||||
// for indels, stop location is one more than ref allele length
|
||||
|
|
|
|||
Loading…
Reference in New Issue