-Have the calculation models determine whether a call passes the lod/confidence thresholds (as opposed to returning everything and letting the UG decide); this way, walkers which call map() will get only the good calls.

-Do the right thing in all models for all-base-mode (for Kiran).


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1940 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-10-30 02:35:51 +00:00
parent 64ac956885
commit 4ee1d6f733
5 changed files with 60 additions and 57 deletions

View File

@ -15,7 +15,6 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
// We consider the EM stable when the MAF doesn't change more than 1/10,000
protected static final double EM_STABILITY_METRIC = 1e-4;
protected EMGenotypeCalculationModel() {}
public Pair<List<Genotype>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
@ -28,7 +27,10 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
// run the EM calculation
EMOutput overall = runEM(ref, contexts, priors, StratifiedContext.OVERALL);
double lod = overall.getPofD() - overall.getPofNull();
logger.debug("lod=" + lod);
// return a null call if we don't pass the lod cutoff
if ( !ALL_BASE_MODE && lod < LOD_THRESHOLD )
return new Pair<List<Genotype>, GenotypeMetaData>(null, null);
// calculate strand score
EMOutput forward = runEM(ref, contexts, priors, StratifiedContext.FORWARD);

View File

@ -307,38 +307,40 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
}
}
}
double phredScaledConfidence = -10.0 * Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
double bestAFguess = findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second / (double)(frequencyEstimationPoints-1);
double phredScaledConfidence = -10.0 * Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
// return a null call if we don't pass the confidence cutoff
if ( !ALL_BASE_MODE && phredScaledConfidence < CONFIDENCE_THRESHOLD )
return new Pair<List<Genotype>, GenotypeMetaData>(null, null);
double bestAFguess = findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second / (double)(frequencyEstimationPoints-1);
ArrayList<Genotype> calls = new ArrayList<Genotype>();
// TODO -- generate strand score
double strandScore = 0.0;
GenotypeMetaData metadata = new GenotypeMetaData(phredScaledConfidence, strandScore, bestAFguess);
// generate calls only if we pass the threshold or we want ref calls
if ( phredScaledConfidence >= CONFIDENCE_THRESHOLD || ALL_BASE_MODE ) {
for ( String sample : GLs.keySet() ) {
for ( String sample : GLs.keySet() ) {
// create the call
AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL);
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
// create the call
AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL);
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
if ( call instanceof ReadBacked ) {
((ReadBacked)call).setReads(context.getReads());
}
if ( call instanceof SampleBacked ) {
((SampleBacked)call).setSampleName(sample);
}
if ( call instanceof LikelihoodsBacked ) {
((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods());
}
if ( call instanceof PosteriorsBacked ) {
((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors());
}
calls.add(call);
if ( call instanceof ReadBacked ) {
((ReadBacked)call).setReads(context.getReads());
}
if ( call instanceof SampleBacked ) {
((SampleBacked)call).setSampleName(sample);
}
if ( call instanceof LikelihoodsBacked ) {
((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods());
}
if ( call instanceof PosteriorsBacked ) {
((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors());
}
calls.add(call);
}
return new Pair<List<Genotype>, GenotypeMetaData>(calls, metadata);

View File

@ -42,12 +42,26 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
if ( sampleContext == null )
return null;
// create the genotype call object
// create the call
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
// get the genotype likelihoods
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(ref, sampleContext, priors, StratifiedContext.OVERALL);
// are we above the lod threshold for emitting calls (and not in all-bases-mode)?
if ( !ALL_BASE_MODE ) {
double[] posteriors = discoveryGL.second.getPosteriors();
Integer sortedPosteriors[] = Utils.SortPermutation(posteriors);
double bestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 1]];
double nextBestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 2]];
double refGenotype = posteriors[DiploidGenotype.createHomGenotype(ref).ordinal()];
double lodConfidence = (GENOTYPE_MODE ? (bestGenotype - nextBestGenotype) : (bestGenotype - refGenotype));
// return a null call
if ( lodConfidence < LOD_THRESHOLD )
return new Pair<List<Genotype>, GenotypeMetaData>(null, null);
}
// we can now create the genotype call object
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
if ( call instanceof ReadBacked ) {
((ReadBacked)call).setReads(discoveryGL.first.getReads());
}

View File

@ -212,43 +212,28 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeM
public Integer reduceInit() { return 0; }
public Integer reduce(Pair<List<Genotype>, GenotypeMetaData> value, Integer sum) {
if ( value == null || value.first == null )
// can't call the locus because of no coverage
if ( value == null )
return sum;
callsMetrics.nCalledBases++;
if ( value.first.size() == 0 )
// can't make a confident variant call here
if ( value.first == null || value.first.size() == 0 ) {
callsMetrics.nNonConfidentCalls++;
return sum;
}
// special-case for single-sample using PointEstimate model
if ( value.second == null ) {
Genotype call = value.first.get(0);
if ( UAC.GENOTYPE || call.isVariant(call.getReference()) ) {
double confidence = (UAC.GENOTYPE ? call.getNegLog10PError() : call.toVariation().getNegLog10PError());
if ( confidence >= UAC.LOD_THRESHOLD ) {
callsMetrics.nConfidentCalls++;
writer.addGenotypeCall(call);
}
} else {
callsMetrics.nNonConfidentCalls++;
}
callsMetrics.nConfidentCalls++;
// if we have a single-sample call (single sample from PointEstimate model returns no genotype locus data)
if ( value.second == null || (!writer.supportsMultiSample() && samples.size() == 1) ) {
writer.addGenotypeCall(value.first.get(0));
}
// use multi-sample mode if we have multiple samples or the output type allows it
else if ( writer.supportsMultiSample() || samples.size() > 1 ) {
if ( UAC.CONFIDENCE_THRESHOLD <= value.second.getLOD() && UAC.LOD_THRESHOLD <= value.second.getLOD() ) {
callsMetrics.nConfidentCalls++;
writer.addMultiSampleCall(value.first, value.second);
} else {
callsMetrics.nNonConfidentCalls++;
}
}
// otherwise, use single sample mode
else {
callsMetrics.nConfidentCalls++;
writer.addGenotypeCall(value.first.get(0));
writer.addMultiSampleCall(value.first, value.second);
}
return sum + 1;

View File

@ -374,7 +374,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
public boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
Pair<List<Genotype>, GenotypeMetaData> calls = ug.map(tracker,ref,context);
if (calls == null)
if (calls == null || calls.first == null)
return false;
return (! calls.first.get(0).isVariant(ref.getBase())) && calls.first.get(0).getNegLog10PError() > confidentRefThreshold && BaseUtils.isRegularBase(ref.getBase());