From effeedf1a3c8e6ae22d95d2fe3087113bcd9b08c Mon Sep 17 00:00:00 2001 From: fromer Date: Thu, 19 Aug 2010 19:55:47 +0000 Subject: [PATCH] Updated Bayesian phasing method to output per-site phasing statistics (and to not cap PQ at 40) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4064 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/ReadBackedPhasingWalker.java | 119 +++++++++--------- .../sting/utils/PreciseNonNegativeDouble.java | 60 +++++---- 2 files changed, 101 insertions(+), 78 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadBackedPhasingWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadBackedPhasingWalker.java index 9935af35c..2a0e256e9 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadBackedPhasingWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadBackedPhasingWalker.java @@ -36,14 +36,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.PreciseNonNegativeDouble; -import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriterImpl; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -67,13 +63,17 @@ public class ReadBackedPhasingWalker extends LocusWalker P(error) = 10^(-4.77/10) = 0.33, P(correct) = 0.66, so that we have odds ratio of >= 2 @Argument(fullName = "phasedVCFFile", shortName = "phasedVCF", doc = "The name of the phased VCF file output", required = true) protected String phasedVCFFile = null; + @Argument(fullName = "variantStatsFilePrefix", shortName = "variantStats", doc = "The prefix of the VCF/phasing statistics files", required = false) + protected String variantStatsFilePrefix = null; + private VCFWriter writer = null; + private PhasingQualityStatsWriter statsWriter = null; private LinkedList siteQueue = null; private VariantAndReads prevVr = null; // the VC emitted after phasing, and the alignment bases at the position emitted @@ -96,6 +96,9 @@ public class ReadBackedPhasingWalker extends LocusWalker(); prevVr = new VariantAndReads(null, null, true); + + if (variantStatsFilePrefix != null) + statsWriter = new PhasingQualityStatsWriter(variantStatsFilePrefix); } public boolean generateExtendedEvents() { @@ -279,14 +282,18 @@ public class ReadBackedPhasingWalker extends LocusWalker do NOT cap the quality! + logger.debug("MAX hap:\t" + maxEntry.getHaplotypeClass() + "\tposteriorProb:\t" + posteriorProb + "\tphaseQuality:\t" + phaseQuality); - genotypesArePhased = (score >= phaseScoreThresh); + if (statsWriter != null) + statsWriter.addStat(samp, distance(prevVc, vc), phaseQuality); + + genotypesArePhased = (phaseQuality >= phaseQualityThresh); if (genotypesArePhased) { Biallele prevBiall = new Biallele(prevVc.getGenotype(samp)); ensurePhasing(biall, prevBiall, maxEntry.getHaplotypeClass().getRepresentative()); - gtAttribs.put("PQ", new Integer(QualityUtils.probToQual(score))); + gtAttribs.put("PQ", phaseQuality); logger.debug("CHOSE PHASE:\n" + biall + "\n\n"); } @@ -347,6 +354,15 @@ public class ReadBackedPhasingWalker extends LocusWalker sampPhaseCountEntry : result.getPhaseCounts()) { PhaseCounts pc = sampPhaseCountEntry.getValue(); out.println("Sample: " + sampPhaseCountEntry.getKey() + "\tNumber of tested sites: " + pc.numTestedSites + "\tNumber of phased sites: " + pc.numPhased); @@ -1085,56 +1103,45 @@ class PhasingStatsAndOutput { } } -class CardinalityCounter implements Iterator, Iterable { - private int[] cards; - private int[] valList; - private boolean hasNext; +class PhasingQualityStatsWriter { + private String variantStatsFilePrefix; + private HashMap sampleToStatsWriter = new HashMap(); - public CardinalityCounter(int[] cards) { - this.cards = cards; - this.valList = new int[cards.length]; - for (int i = 0; i < cards.length; i++) { - if (this.cards[i] <= 0) - throw new StingException("CANNOT have zero cardinalities!"); - this.valList[i] = 0; - } - this.hasNext = true; + public PhasingQualityStatsWriter(String variantStatsFilePrefix) { + this.variantStatsFilePrefix = variantStatsFilePrefix; } - public boolean hasNext() { - return hasNext; - } + public void addStat(String sample, int distanceFromPrevious, int phasingQuality) { + BufferedWriter sampWriter = sampleToStatsWriter.get(sample); + if (sampWriter == null) { + String fileName = variantStatsFilePrefix + "." + sample + ".distance_PQ.txt"; - public int[] next() { - if (!hasNext()) - throw new StingException("CANNOT iterate past end!"); - - // Copy the assignment to be returned: - int[] nextList = new int[valList.length]; - for (int i = 0; i < valList.length; i++) - nextList[i] = valList[i]; - - // Find the assignment after this one: - hasNext = false; - int i = cards.length - 1; - for (; i >= 0; i--) { - if (valList[i] < (cards[i] - 1)) { - valList[i]++; - hasNext = true; - break; + FileOutputStream output; + try { + output = new FileOutputStream(fileName); + } catch (FileNotFoundException e) { + throw new RuntimeException("Unable to create phasing quality stats file at location: " + fileName); } - valList[i] = 0; + sampWriter = new BufferedWriter(new OutputStreamWriter(output)); + sampleToStatsWriter.put(sample, sampWriter); + } + try { + sampWriter.write(distanceFromPrevious + "\t" + phasingQuality + "\n"); + sampWriter.flush(); + } catch (IOException e) { + throw new RuntimeException("Unable to write to per-sample phasing quality stats file", e); } - - return nextList; } - public void remove() { - throw new StingException("Cannot remove from CardinalityCounter!"); + public void close() { + for (Map.Entry sampWriterEntry : sampleToStatsWriter.entrySet()) { + BufferedWriter sampWriter = sampWriterEntry.getValue(); + try { + sampWriter.flush(); + sampWriter.close(); + } catch (IOException e) { + throw new RuntimeException("Unable to close per-sample phasing quality stats file"); + } + } } - - public Iterator iterator() { - return this; - } - -} +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/PreciseNonNegativeDouble.java b/java/src/org/broadinstitute/sting/utils/PreciseNonNegativeDouble.java index 8255b7307..c22ceb61d 100644 --- a/java/src/org/broadinstitute/sting/utils/PreciseNonNegativeDouble.java +++ b/java/src/org/broadinstitute/sting/utils/PreciseNonNegativeDouble.java @@ -32,7 +32,7 @@ public class PreciseNonNegativeDouble implements Comparable a R b, where R is one of: >, <, == double logValDiff = this.logValue - other.logValue; if (Math.abs(logValDiff) <= EQUALS_THRESH) @@ -70,23 +74,33 @@ public class PreciseNonNegativeDouble implements Comparable 0); } - public boolean lt(org.broadinstitute.sting.utils.PreciseNonNegativeDouble other) { + public boolean lt(PreciseNonNegativeDouble other) { return (this.compareTo(other) < 0); } - public org.broadinstitute.sting.utils.PreciseNonNegativeDouble plusEqual(org.broadinstitute.sting.utils.PreciseNonNegativeDouble other) { + public PreciseNonNegativeDouble plusEqual(PreciseNonNegativeDouble other) { logValue = addInLogSpace(logValue, other.logValue); return this; } + public PreciseNonNegativeDouble timesEqual(PreciseNonNegativeDouble other) { + logValue += other.logValue; + return this; + } + + public PreciseNonNegativeDouble divEqual(PreciseNonNegativeDouble other) { + logValue -= other.logValue; + return this; + } + // If x = log(a), y = log(b), returns log(a+b) public static double addInLogSpace(double x, double y) { if (x == INFINITY || y == INFINITY) return INFINITY; //log( e^INFINITY + e^y ) = INFINITY @@ -108,15 +122,17 @@ public class PreciseNonNegativeDouble implements Comparable= y) // x + log(1-e^(y-x)) = log(a) + log(1-e^(log(b)-log(a))) = log(a) + log(1-b/a) = a - b = |a-b|, since x >= y + return x + Math.log(1 - Math.exp(y-x)); + else + return y + Math.log(1 - Math.exp(x-y)); + } public String toString() { return new StringBuilder().append(getValue()).toString();