From dfe7d694710a45d49c44470dd789f7c63286b620 Mon Sep 17 00:00:00 2001 From: ebanks Date: Mon, 23 Nov 2009 02:55:43 +0000 Subject: [PATCH] 1. VCF: don't print slod if it's never set 2. UG: don't print slod if lods are infinite (todo: figure out a good guess instead) 3. UG: if probF=0 for 2 alt alleles are both 0 (because of precision), use log values to discriminate git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2116 348d0f76-0448-11de-a6fe-93d51630548a --- ...JointEstimateGenotypeCalculationModel.java | 53 +++++++++++-------- .../sting/utils/genotype/SLODBacked.java | 4 +- .../genotype/vcf/VCFGenotypeLocusData.java | 4 +- .../vcf/VCFGenotypeWriterAdapter.java | 3 +- 4 files changed, 36 insertions(+), 28 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index b262e5ede..58b0b3431 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -63,7 +63,6 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc } protected void initializeVerboseWriter(PrintWriter verboseWriter) { - // by default, no initialization is done StringBuilder header = new StringBuilder("AFINFO\tLOC\tMAF\tF\tNullAFpriors\t"); for ( char altAllele : BaseUtils.BASES ) { char base = Character.toLowerCase(altAllele); @@ -229,7 +228,8 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc for ( char altAllele : BaseUtils.BASES ) { if ( altAllele != ref ) { int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele); - if ( PofFs[baseIndex] > maxConfidence ) { + if ( PofFs[baseIndex] > maxConfidence || + (MathUtils.compareDoubles(PofFs[baseIndex], maxConfidence) == 0 && log10PofDgivenAFi[baseIndex][0] < log10PofDgivenAFi[indexOfMax][0]) ) { indexOfMax = baseIndex; baseOfMax = altAllele; maxConfidence = PofFs[baseIndex]; @@ -273,31 +273,38 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc double overallLog10PofF = Math.log10(PofFs[indexOfMax]); double lod = overallLog10PofF - overallLog10PofNull; - // the forward lod - initialize(ref, contexts, StratifiedContext.FORWARD); - calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.FORWARD); - calculatePofFs(ref, frequencyEstimationPoints); - double forwardLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); - double forwardLog10PofF = Math.log10(PofFs[indexOfMax]); + // I'm not sure what to do when the numbers are so small as to make the lod infinite. + // For now, we'll just not compute strand bias... + if ( !Double.isInfinite(lod) ) { - // the reverse lod - initialize(ref, contexts, StratifiedContext.REVERSE); - calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.REVERSE); - calculatePofFs(ref, frequencyEstimationPoints); - double reverseLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); - double reverseLog10PofF = Math.log10(PofFs[indexOfMax]); + // the forward lod + initialize(ref, contexts, StratifiedContext.FORWARD); + calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.FORWARD); + calculatePofFs(ref, frequencyEstimationPoints); + double forwardLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); + double forwardLog10PofF = Math.log10(PofFs[indexOfMax]); + if ( Double.isInfinite(lod) ) + lod = -1.0 * log10PofDgivenAFi[indexOfMax][0]; - double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofNull; - double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofNull; - //logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); + // the reverse lod + initialize(ref, contexts, StratifiedContext.REVERSE); + calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.REVERSE); + calculatePofFs(ref, frequencyEstimationPoints); + double reverseLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); + double reverseLog10PofF = Math.log10(PofFs[indexOfMax]); - // strand score is max bias between forward and reverse strands - double strandScore = Math.max(forwardLod - lod, reverseLod - lod); - // rescale by a factor of 10 - strandScore *= 10.0; - //logger.debug(String.format("SLOD=%f", strandScore)); + double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofNull; + double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofNull; + //logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); - ((SLODBacked)locusdata).setSLOD(strandScore); + // strand score is max bias between forward and reverse strands + double strandScore = Math.max(forwardLod - lod, reverseLod - lod); + // rescale by a factor of 10 + strandScore *= 10.0; + //logger.debug(String.format("SLOD=%f", strandScore)); + + ((SLODBacked)locusdata).setSLOD(strandScore); + } } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/SLODBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/SLODBacked.java index c0ca132e9..6884bb3f8 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/SLODBacked.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/SLODBacked.java @@ -11,9 +11,9 @@ public interface SLODBacked { /** * - * @return returns the strand lod for this genotype + * @return returns the strand lod for this genotype or null if not set */ - public double getSLOD(); + public Double getSLOD(); /** * diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java index 83c80ea07..acca3aae7 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java @@ -19,7 +19,7 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked private double mConfidence = 0.0; // the strand score lod - private double mSLOD = 0.0; + private Double mSLOD = null; // the allele frequency field private double mAlleleFrequency = 0.0; @@ -103,7 +103,7 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked * * @return the strand lod */ - public double getSLOD() { + public Double getSLOD() { return mSLOD; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index afb978e06..d1a67c64c 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -187,7 +187,8 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { private static Map getInfoFields(VCFGenotypeLocusData locusdata, VCFParameters params) { Map infoFields = new HashMap(); if ( locusdata != null ) { - infoFields.put("SB", String.format("%.2f", locusdata.getSLOD())); + if ( locusdata.getSLOD() != null ) + infoFields.put("SB", String.format("%.2f", locusdata.getSLOD())); infoFields.put("AF", String.format("%.2f", locusdata.getAlleleFrequency())); Map otherFields = locusdata.getFields(); if ( otherFields != null ) {