No reason to sum the normalized posteriors array to get Pr(AF>0) given that we can just compute 1.0 - array[0]. Integration tests change only because of trivial precision artifacts for reference calls using EMIT_ALL_SITES.

2011-12-18 00:31:47 -05:00 · 2011-12-18 00:31:47 -05:00 · c5ffe0ab04
parent 6dc52d42bf
commit c5ffe0ab04
2 changed files with 10 additions and 14 deletions
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@ -335,12 +335,8 @@ public class UnifiedGenotyperEngine {

        // calculate p(f>0):
        // because the likelihoods are marginalized for each alternate allele, we only need to compare log10PosteriorOfAFzero against any one of them
-        double[] normalizedPosteriors = generateNormalizedPosteriors(AFresult, posteriorsArray.get());
-        // TODO -- why not just 1.0 - normalizedPosteriors[0]?
-        double sum = 0.0;
-        for (int i = 1; i <= N+1; i++) // N+1 because the cell at position zero in the original posteriors array is actually probability of non-ref (since it's marginalized over all alleles)
-            sum += normalizedPosteriors[i];
-        double PofF = Math.min(sum, 1.0); // deal with precision errors
+        final double[] normalizedPosteriors = generateNormalizedPosteriors(AFresult, posteriorsArray.get());
+        final double PofF = 1.0 - normalizedPosteriors[0];

        double phredScaledConfidence;
        if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
@ -350,7 +346,7 @@ public class UnifiedGenotyperEngine {
        } else {
            phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
            if ( Double.isInfinite(phredScaledConfidence) ) {
-                sum = AFresult.log10AlleleFrequencyPosteriors[0][0];
+                double sum = AFresult.log10AlleleFrequencyPosteriors[0][0];
                if ( sum == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
                    sum = 0.0;
                for (int i = 1; i <= N; i++) {
@ -370,7 +366,7 @@ public class UnifiedGenotyperEngine {
        }

        // strip out any alleles that aren't going to be used in the VariantContext
-        List<Allele> myAlleles;
+        final List<Allele> myAlleles;
        if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
            myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
            myAlleles.add(vc.getReference());
@ -384,8 +380,8 @@ public class UnifiedGenotyperEngine {
        }

        // start constructing the resulting VC
-        GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
-        VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles);
+        final GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
+        final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles);
        builder.log10PError(phredScaledConfidence/-10.0);
        if ( ! passesCallThreshold(phredScaledConfidence) )
            builder.filters(filter);
@ -396,14 +392,14 @@ public class UnifiedGenotyperEngine {
        }

        // create the genotypes
-        GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse, myAlleles);
+        final GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse, myAlleles);

        // print out stats if we have a writer
        if ( verboseWriter != null && !limitedContext )
            printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model);

        // *** note that calculating strand bias involves overwriting data structures, so we do that last
-        HashMap<String, Object> attributes = new HashMap<String, Object>();
+        final HashMap<String, Object> attributes = new HashMap<String, Object>();

        // if the site was downsampled, record that fact
        if ( !limitedContext && rawContext.hasPileupBeenDownsampled() )
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -129,8 +129,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testOutputParameter() {
        HashMap<String, String> e = new HashMap<String, String>();
        e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" );
-        e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "274bd9d1b9c7857690fa5f0228ffc6d7" );
-        e.put( "--output_mode EMIT_ALL_SITES", "594c6d3c48bbc73289de7727d768644d" );
+        e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "d971d392956aea69c3707da64d24485b" );
+        e.put( "--output_mode EMIT_ALL_SITES", "21993e71ca1a06a0d1f11d58e3cc26c3" );

        for ( Map.Entry<String, String> entry : e.entrySet() ) {
            WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(