From 555fb975de77f957069e3558ea2bd7f761a9efd2 Mon Sep 17 00:00:00 2001 From: ebanks Date: Fri, 13 Nov 2009 03:59:13 +0000 Subject: [PATCH] 1. Print out allele frequency range (from joint estimation model only). 2. Don't print verbose output from SLOD calculation (it's just a repeat of previous output). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2032 348d0f76-0448-11de-a6fe-93d51630548a --- .../genotyper/GenotypeCalculationModel.java | 4 ++ ...JointEstimateGenotypeCalculationModel.java | 31 +++++++++++++--- .../genotyper/UnifiedArgumentCollection.java | 5 ++- .../utils/genotype/AlleleFrequencyBacked.java | 37 ++++++++++++++++++- .../genotype/vcf/VCFGenotypeLocusData.java | 20 +++++++++- .../vcf/VCFGenotypeWriterAdapter.java | 3 ++ 6 files changed, 92 insertions(+), 8 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index 6b2be5560..a2fca9df7 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -38,6 +38,7 @@ public abstract class GenotypeCalculationModel implements Cloneable { protected int POOL_SIZE; protected double CONFIDENCE_THRESHOLD; protected double MINIMUM_ALLELE_FREQUENCY; + protected double ALLELE_FREQUENCY_RANGE; protected int maxDeletionsInPileup; protected String assumedSingleSample; protected PrintWriter verboseWriter; @@ -70,6 +71,9 @@ public abstract class GenotypeCalculationModel implements Cloneable { POOL_SIZE = UAC.POOLSIZE; CONFIDENCE_THRESHOLD = UAC.CONFIDENCE_THRESHOLD; MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY; + ALLELE_FREQUENCY_RANGE = UAC.ALLELE_FREQUENCY_RANGE; + if ( ALLELE_FREQUENCY_RANGE < 0.0 || ALLELE_FREQUENCY_RANGE > 1.0 ) + throw new StingException("Allele frequency fraction must be a value between 0.0 and 1.0"); maxDeletionsInPileup = UAC.MAX_DELETIONS; assumedSingleSample = UAC.ASSUME_SINGLE_SAMPLE; if ( UAC.VERBOSE != null ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index f9a3f1e0c..aa227afd5 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -212,7 +212,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo return HWvalues; } - private void normalizeFromLog10(double[] array) { + private static void normalizeFromLog10(double[] array) { // for precision purposes, we need to add (or really subtract, since they're // all negative) the largest value; also, we need to convert to normal-space. double maxValue = findMaxEntry(array).first; @@ -298,7 +298,6 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo if ( !ALL_BASE_MODE && phredScaledConfidence < CONFIDENCE_THRESHOLD ) return new Pair, GenotypeLocusData>(null, null); - double bestAFguess = findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second / (double)(frequencyEstimationPoints-1); ArrayList calls = new ArrayList(); // first, populate the sample-specific data @@ -332,7 +331,10 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo ((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence); } if ( locusdata instanceof AlleleFrequencyBacked ) { - ((AlleleFrequencyBacked)locusdata).setAlleleFrequency(bestAFguess); + int bestAFguess = findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second; + ((AlleleFrequencyBacked)locusdata).setAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1)); + AlleleFrequencyBacked.AlleleFrequencyRange range = computeAFrange(alleleFrequencyPosteriors[indexOfMax], frequencyEstimationPoints-1, bestAFguess, ALLELE_FREQUENCY_RANGE); + ((AlleleFrequencyBacked)locusdata).setAlleleFrequencyRange(range); } if ( locusdata instanceof IDBacked ) { rodDbSNP dbsnp = getDbSNP(tracker); @@ -404,13 +406,13 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo // the forward lod initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.FORWARD); - calculateAlleleFrequencyPosteriors(ref, loc); + calculateAlleleFrequencyPosteriors(ref, null); double forwardLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); double forwardLog10PofF = Math.log10(PofFs[indexOfMax]); // the reverse lod initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.REVERSE); - calculateAlleleFrequencyPosteriors(ref, loc); + calculateAlleleFrequencyPosteriors(ref, null); double reverseLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); double reverseLog10PofF = Math.log10(PofFs[indexOfMax]); @@ -430,4 +432,23 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo return new Pair, GenotypeLocusData>(calls, locusdata); } + + // computes the range of allele frequencies making up the given fraction of the total probability + private static AlleleFrequencyBacked.AlleleFrequencyRange computeAFrange(double[] alleleFrequencyProbs, int N, int bestAFguess, double fraction) { + double totalProb = alleleFrequencyProbs[bestAFguess]; + int lowIndex = bestAFguess; + int highIndex = bestAFguess; + while ( totalProb < fraction ) { + if ( lowIndex > 1 ) { + lowIndex--; + totalProb += alleleFrequencyProbs[lowIndex]; + } + if ( highIndex < N ) { + highIndex++; + totalProb += alleleFrequencyProbs[highIndex]; + } + } + + return new AlleleFrequencyBacked.AlleleFrequencyRange((double)lowIndex / (double)N, (double)highIndex / (double)N, fraction); + } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index c2d2a0910..528769546 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -83,6 +83,9 @@ public class UnifiedArgumentCollection { @Argument(fullName = "max_coverage", shortName = "coverage", doc = "Maximum reads at this locus for it to be callable; to disable, provide value < 1 [default:10,000]", required = false) public Integer MAX_READS_IN_PILEUP = 10000; - @Argument(fullName = "min_allele_frequency", shortName = "minFreq", doc = "The minimum possible allele frequency in a population (advanced)", required = false) + @Argument(fullName = "min_allele_frequency", shortName = "min_freq", doc = "The minimum possible allele frequency in a population (advanced)", required = false) public double MINIMUM_ALLELE_FREQUENCY = 1e-8; + + @Argument(fullName = "allele_frequency_range", shortName = "freq_range", doc = "The range/fraction to emit of the total probability over all frequencies (in JOINT_ESTIMATION model only)", required = false) + public double ALLELE_FREQUENCY_RANGE = 0.95; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/AlleleFrequencyBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/AlleleFrequencyBacked.java index 03bd54eba..0c413f0f8 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/AlleleFrequencyBacked.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/AlleleFrequencyBacked.java @@ -11,7 +11,7 @@ public interface AlleleFrequencyBacked { /** * - * @return returns the allele frequency for this genotype + * @return returns the best allele frequency for this genotype */ public double getAlleleFrequency(); @@ -21,4 +21,39 @@ public interface AlleleFrequencyBacked { */ public void setAlleleFrequency(double frequency); + /** + * + * @return returns the allele frequency for this genotype + */ + public AlleleFrequencyRange getAlleleFrequencyRange(); + + /** + * + * @param range the allele frequency range for this genotype + */ + public void setAlleleFrequencyRange(AlleleFrequencyRange range); + + + /** + * A class representing a range of allele frequencies that make up a given + * fraction of the total probability over all frequencies. + */ + public class AlleleFrequencyRange { + + private double mLow, mHigh, mFraction; + + public AlleleFrequencyRange(double low, double high, double fraction) { + mLow = low; + mHigh = high; + mFraction = fraction; + } + + public double getLowEnd() { return mLow; } + public double getHighEnd() { return mHigh; } + public double getPercentageOfProbability() { return mFraction; } + + public String toString() { + return String.format("%.2f-%.2f,%.0f%%", mLow, mHigh, 100*mFraction); + } + } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java index dcf869936..57a76e284 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java @@ -21,8 +21,9 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked // the strand score lod private double mSLOD = 0.0; - // the allele frequency + // the allele frequency fields private double mAlleleFrequency = 0.0; + private AlleleFrequencyRange mAFrange = null; // the location private GenomeLoc mLoc; @@ -115,6 +116,23 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked mAlleleFrequency = frequency; } + /** + * get the allele frequency range + * + * @return the allele frequency range + */ + public AlleleFrequencyRange getAlleleFrequencyRange() { + return mAFrange; + } + + /** + * + * @param range the allele frequency range for this genotype + */ + public void setAlleleFrequencyRange(AlleleFrequencyRange range) { + mAFrange = range; + } + /** * @return returns the dbsnp id for this genotype */ diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 4794fb5f7..487f4624a 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -189,6 +189,9 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { if ( locusdata != null ) { infoFields.put("SB", String.format("%.2f", locusdata.getSLOD())); infoFields.put("AF", String.format("%.2f", locusdata.getAlleleFrequency())); + AlleleFrequencyBacked.AlleleFrequencyRange range = locusdata.getAlleleFrequencyRange(); + if ( range != null ) + infoFields.put("AFrange", range.toString()); Map otherFields = locusdata.getFields(); if ( otherFields != null ) { infoFields.putAll(otherFields);