-Have the calculation models determine whether a call passes the lod/confidence thresholds (as opposed to returning everything and letting the UG decide); this way, walkers which call map() will get only the good calls.
-Do the right thing in all models for all-base-mode (for Kiran). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1940 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
64ac956885
commit
4ee1d6f733
|
|
@ -15,7 +15,6 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
// We consider the EM stable when the MAF doesn't change more than 1/10,000
|
||||
protected static final double EM_STABILITY_METRIC = 1e-4;
|
||||
|
||||
|
||||
protected EMGenotypeCalculationModel() {}
|
||||
|
||||
public Pair<List<Genotype>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
|
||||
|
|
@ -28,7 +27,10 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
// run the EM calculation
|
||||
EMOutput overall = runEM(ref, contexts, priors, StratifiedContext.OVERALL);
|
||||
double lod = overall.getPofD() - overall.getPofNull();
|
||||
logger.debug("lod=" + lod);
|
||||
|
||||
// return a null call if we don't pass the lod cutoff
|
||||
if ( !ALL_BASE_MODE && lod < LOD_THRESHOLD )
|
||||
return new Pair<List<Genotype>, GenotypeMetaData>(null, null);
|
||||
|
||||
// calculate strand score
|
||||
EMOutput forward = runEM(ref, contexts, priors, StratifiedContext.FORWARD);
|
||||
|
|
|
|||
|
|
@ -307,38 +307,40 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
|
|||
}
|
||||
}
|
||||
}
|
||||
double phredScaledConfidence = -10.0 * Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
|
||||
double bestAFguess = findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second / (double)(frequencyEstimationPoints-1);
|
||||
|
||||
double phredScaledConfidence = -10.0 * Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
|
||||
|
||||
// return a null call if we don't pass the confidence cutoff
|
||||
if ( !ALL_BASE_MODE && phredScaledConfidence < CONFIDENCE_THRESHOLD )
|
||||
return new Pair<List<Genotype>, GenotypeMetaData>(null, null);
|
||||
|
||||
double bestAFguess = findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second / (double)(frequencyEstimationPoints-1);
|
||||
ArrayList<Genotype> calls = new ArrayList<Genotype>();
|
||||
|
||||
// TODO -- generate strand score
|
||||
double strandScore = 0.0;
|
||||
GenotypeMetaData metadata = new GenotypeMetaData(phredScaledConfidence, strandScore, bestAFguess);
|
||||
|
||||
// generate calls only if we pass the threshold or we want ref calls
|
||||
if ( phredScaledConfidence >= CONFIDENCE_THRESHOLD || ALL_BASE_MODE ) {
|
||||
for ( String sample : GLs.keySet() ) {
|
||||
|
||||
for ( String sample : GLs.keySet() ) {
|
||||
// create the call
|
||||
AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL);
|
||||
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
|
||||
|
||||
// create the call
|
||||
AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL);
|
||||
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
|
||||
|
||||
if ( call instanceof ReadBacked ) {
|
||||
((ReadBacked)call).setReads(context.getReads());
|
||||
}
|
||||
if ( call instanceof SampleBacked ) {
|
||||
((SampleBacked)call).setSampleName(sample);
|
||||
}
|
||||
if ( call instanceof LikelihoodsBacked ) {
|
||||
((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods());
|
||||
}
|
||||
if ( call instanceof PosteriorsBacked ) {
|
||||
((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors());
|
||||
}
|
||||
|
||||
calls.add(call);
|
||||
if ( call instanceof ReadBacked ) {
|
||||
((ReadBacked)call).setReads(context.getReads());
|
||||
}
|
||||
if ( call instanceof SampleBacked ) {
|
||||
((SampleBacked)call).setSampleName(sample);
|
||||
}
|
||||
if ( call instanceof LikelihoodsBacked ) {
|
||||
((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods());
|
||||
}
|
||||
if ( call instanceof PosteriorsBacked ) {
|
||||
((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors());
|
||||
}
|
||||
|
||||
calls.add(call);
|
||||
}
|
||||
|
||||
return new Pair<List<Genotype>, GenotypeMetaData>(calls, metadata);
|
||||
|
|
|
|||
|
|
@ -42,12 +42,26 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
if ( sampleContext == null )
|
||||
return null;
|
||||
|
||||
// create the genotype call object
|
||||
// create the call
|
||||
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
|
||||
|
||||
// get the genotype likelihoods
|
||||
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(ref, sampleContext, priors, StratifiedContext.OVERALL);
|
||||
|
||||
// are we above the lod threshold for emitting calls (and not in all-bases-mode)?
|
||||
if ( !ALL_BASE_MODE ) {
|
||||
double[] posteriors = discoveryGL.second.getPosteriors();
|
||||
Integer sortedPosteriors[] = Utils.SortPermutation(posteriors);
|
||||
double bestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 1]];
|
||||
double nextBestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 2]];
|
||||
double refGenotype = posteriors[DiploidGenotype.createHomGenotype(ref).ordinal()];
|
||||
double lodConfidence = (GENOTYPE_MODE ? (bestGenotype - nextBestGenotype) : (bestGenotype - refGenotype));
|
||||
|
||||
// return a null call
|
||||
if ( lodConfidence < LOD_THRESHOLD )
|
||||
return new Pair<List<Genotype>, GenotypeMetaData>(null, null);
|
||||
}
|
||||
|
||||
// we can now create the genotype call object
|
||||
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
|
||||
|
||||
if ( call instanceof ReadBacked ) {
|
||||
((ReadBacked)call).setReads(discoveryGL.first.getReads());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -212,43 +212,28 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeM
|
|||
public Integer reduceInit() { return 0; }
|
||||
|
||||
public Integer reduce(Pair<List<Genotype>, GenotypeMetaData> value, Integer sum) {
|
||||
if ( value == null || value.first == null )
|
||||
// can't call the locus because of no coverage
|
||||
if ( value == null )
|
||||
return sum;
|
||||
|
||||
callsMetrics.nCalledBases++;
|
||||
|
||||
if ( value.first.size() == 0 )
|
||||
// can't make a confident variant call here
|
||||
if ( value.first == null || value.first.size() == 0 ) {
|
||||
callsMetrics.nNonConfidentCalls++;
|
||||
return sum;
|
||||
}
|
||||
|
||||
// special-case for single-sample using PointEstimate model
|
||||
if ( value.second == null ) {
|
||||
Genotype call = value.first.get(0);
|
||||
if ( UAC.GENOTYPE || call.isVariant(call.getReference()) ) {
|
||||
double confidence = (UAC.GENOTYPE ? call.getNegLog10PError() : call.toVariation().getNegLog10PError());
|
||||
if ( confidence >= UAC.LOD_THRESHOLD ) {
|
||||
callsMetrics.nConfidentCalls++;
|
||||
writer.addGenotypeCall(call);
|
||||
}
|
||||
} else {
|
||||
callsMetrics.nNonConfidentCalls++;
|
||||
}
|
||||
callsMetrics.nConfidentCalls++;
|
||||
|
||||
// if we have a single-sample call (single sample from PointEstimate model returns no genotype locus data)
|
||||
if ( value.second == null || (!writer.supportsMultiSample() && samples.size() == 1) ) {
|
||||
writer.addGenotypeCall(value.first.get(0));
|
||||
}
|
||||
|
||||
// use multi-sample mode if we have multiple samples or the output type allows it
|
||||
else if ( writer.supportsMultiSample() || samples.size() > 1 ) {
|
||||
|
||||
if ( UAC.CONFIDENCE_THRESHOLD <= value.second.getLOD() && UAC.LOD_THRESHOLD <= value.second.getLOD() ) {
|
||||
callsMetrics.nConfidentCalls++;
|
||||
writer.addMultiSampleCall(value.first, value.second);
|
||||
} else {
|
||||
callsMetrics.nNonConfidentCalls++;
|
||||
}
|
||||
}
|
||||
|
||||
// otherwise, use single sample mode
|
||||
else {
|
||||
callsMetrics.nConfidentCalls++;
|
||||
writer.addGenotypeCall(value.first.get(0));
|
||||
writer.addMultiSampleCall(value.first, value.second);
|
||||
}
|
||||
|
||||
return sum + 1;
|
||||
|
|
|
|||
|
|
@ -374,7 +374,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
|||
|
||||
public boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
Pair<List<Genotype>, GenotypeMetaData> calls = ug.map(tracker,ref,context);
|
||||
if (calls == null)
|
||||
if (calls == null || calls.first == null)
|
||||
return false;
|
||||
return (! calls.first.get(0).isVariant(ref.getBase())) && calls.first.get(0).getNegLog10PError() > confidentRefThreshold && BaseUtils.isRegularBase(ref.getBase());
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue