-Have the calculation models determine whether a call passes the lod/confidence thresholds (as opposed to returning everything and letting the UG decide); this way, walkers which call map() will get only the good calls.

-Do the right thing in all models for all-base-mode (for Kiran). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1940 348d0f76-0448-11de-a6fe-93d51630548a
2009-10-30 02:35:51 +00:00 · 2009-10-30 02:35:51 +00:00 · 4ee1d6f733
parent 64ac956885
commit 4ee1d6f733
5 changed files with 60 additions and 57 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java
@ -15,7 +15,6 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
    // We consider the EM stable when the MAF doesn't change more than 1/10,000
    protected static final double EM_STABILITY_METRIC = 1e-4;

-
    protected EMGenotypeCalculationModel() {}

    public Pair<List<Genotype>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
@ -28,7 +27,10 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
        // run the EM calculation
        EMOutput overall = runEM(ref, contexts, priors, StratifiedContext.OVERALL);
        double lod = overall.getPofD() - overall.getPofNull();
-        logger.debug("lod=" + lod);
+
+        // return a null call if we don't pass the lod cutoff
+        if ( !ALL_BASE_MODE && lod < LOD_THRESHOLD )
+            return new Pair<List<Genotype>, GenotypeMetaData>(null, null);

        // calculate strand score
        EMOutput forward = runEM(ref, contexts, priors, StratifiedContext.FORWARD);
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java
@ -307,38 +307,40 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
                }
            }
        }
-        double phredScaledConfidence = -10.0 * Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
-        double bestAFguess = findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second / (double)(frequencyEstimationPoints-1);

+        double phredScaledConfidence = -10.0 * Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
+
+        // return a null call if we don't pass the confidence cutoff
+        if ( !ALL_BASE_MODE && phredScaledConfidence < CONFIDENCE_THRESHOLD )
+            return new Pair<List<Genotype>, GenotypeMetaData>(null, null);
+
+        double bestAFguess = findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second / (double)(frequencyEstimationPoints-1);
        ArrayList<Genotype> calls = new ArrayList<Genotype>();
+
        // TODO -- generate strand score
        double strandScore = 0.0;
        GenotypeMetaData metadata = new GenotypeMetaData(phredScaledConfidence, strandScore, bestAFguess);

-        // generate calls only if we pass the threshold or we want ref calls
-        if ( phredScaledConfidence >= CONFIDENCE_THRESHOLD || ALL_BASE_MODE ) {
+        for ( String sample : GLs.keySet() ) {

-            for ( String sample : GLs.keySet() ) {
+            // create the call
+            AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL);
+            Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());

-                // create the call
-                AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL);
-                Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
-
-                if ( call instanceof ReadBacked ) {
-                    ((ReadBacked)call).setReads(context.getReads());
-                }
-                if ( call instanceof SampleBacked ) {
-                    ((SampleBacked)call).setSampleName(sample);
-                }
-                if ( call instanceof LikelihoodsBacked ) {
-                    ((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods());
-                }
-                if ( call instanceof PosteriorsBacked ) {
-                    ((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors());
-                }
-
-                calls.add(call);
+            if ( call instanceof ReadBacked ) {
+                ((ReadBacked)call).setReads(context.getReads());
            }
+            if ( call instanceof SampleBacked ) {
+                ((SampleBacked)call).setSampleName(sample);
+            }
+            if ( call instanceof LikelihoodsBacked ) {
+                ((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods());
+            }
+            if ( call instanceof PosteriorsBacked ) {
+                ((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors());
+            }
+
+            calls.add(call);
        }

        return new Pair<List<Genotype>, GenotypeMetaData>(calls, metadata);
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java
@ -42,12 +42,26 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
            if ( sampleContext == null )
                return null;

-            // create the genotype call object
-            // create the call
-            Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
-
+            // get the genotype likelihoods
            Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(ref, sampleContext, priors, StratifiedContext.OVERALL);

+            // are we above the lod threshold for emitting calls (and not in all-bases-mode)?
+            if ( !ALL_BASE_MODE ) {
+                double[] posteriors = discoveryGL.second.getPosteriors();
+                Integer sortedPosteriors[] = Utils.SortPermutation(posteriors);
+                double bestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 1]];
+                double nextBestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 2]];
+                double refGenotype = posteriors[DiploidGenotype.createHomGenotype(ref).ordinal()];
+                double lodConfidence = (GENOTYPE_MODE ? (bestGenotype - nextBestGenotype) : (bestGenotype - refGenotype));
+
+                // return a null call
+                if ( lodConfidence < LOD_THRESHOLD )
+                    return new Pair<List<Genotype>, GenotypeMetaData>(null, null);
+            }
+
+            // we can now create the genotype call object
+            Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
+
            if ( call instanceof ReadBacked ) {
                ((ReadBacked)call).setReads(discoveryGL.first.getReads());
            }
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@ -212,43 +212,28 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeM
    public Integer reduceInit() { return 0; }

    public Integer reduce(Pair<List<Genotype>, GenotypeMetaData> value, Integer sum) {
-        if ( value == null || value.first == null )
+        // can't call the locus because of no coverage
+        if ( value == null )
            return sum;

        callsMetrics.nCalledBases++;

-        if ( value.first.size() == 0 )
+        // can't make a confident variant call here
+        if ( value.first == null || value.first.size() == 0 ) {
+            callsMetrics.nNonConfidentCalls++;
            return sum;
+        }

-        // special-case for single-sample using PointEstimate model
-        if ( value.second == null ) {
-            Genotype call = value.first.get(0);
-            if ( UAC.GENOTYPE || call.isVariant(call.getReference()) ) {
-                double confidence = (UAC.GENOTYPE ? call.getNegLog10PError() : call.toVariation().getNegLog10PError());
-                if ( confidence >= UAC.LOD_THRESHOLD ) {
-                    callsMetrics.nConfidentCalls++;
-                    writer.addGenotypeCall(call);
-                }
-            } else {
-                callsMetrics.nNonConfidentCalls++;
-            }
+        callsMetrics.nConfidentCalls++;
+
+        // if we have a single-sample call (single sample from PointEstimate model returns no genotype locus data)
+        if ( value.second == null || (!writer.supportsMultiSample() && samples.size() == 1) ) {
+            writer.addGenotypeCall(value.first.get(0));
        }

        // use multi-sample mode if we have multiple samples or the output type allows it
-        else if ( writer.supportsMultiSample() || samples.size() > 1 ) {
-
-            if ( UAC.CONFIDENCE_THRESHOLD <= value.second.getLOD() && UAC.LOD_THRESHOLD <= value.second.getLOD() ) {
-                callsMetrics.nConfidentCalls++;
-                writer.addMultiSampleCall(value.first, value.second);
-            } else {
-                callsMetrics.nNonConfidentCalls++;
-            }
-        }
-
-        // otherwise, use single sample mode
        else {
-            callsMetrics.nConfidentCalls++;
-            writer.addGenotypeCall(value.first.get(0));
+            writer.addMultiSampleCall(value.first, value.second);
        }

        return sum + 1;
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java
@ -374,7 +374,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas

    public boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
        Pair<List<Genotype>, GenotypeMetaData> calls = ug.map(tracker,ref,context);
-        if (calls == null)
+        if (calls == null || calls.first == null)
            return false;
        return  (! calls.first.get(0).isVariant(ref.getBase())) && calls.first.get(0).getNegLog10PError() > confidentRefThreshold && BaseUtils.isRegularBase(ref.getBase());