diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypePhasingEvaluator.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypePhasingEvaluator.java index 8384890e9..227a4b2b5 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypePhasingEvaluator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypePhasingEvaluator.java @@ -111,15 +111,15 @@ public class GenotypePhasingEvaluator extends VariantEvaluator { samplePrevGenotypes.put(samp, null); } else { // Both comp and eval have a non-null Genotype at this site: - Biallele compBiallele = new Biallele(compSampGt); - Biallele evalBiallele = new Biallele(evalSampGt); + AllelePair compAllelePair = new AllelePair(compSampGt); + AllelePair evalAllelePair = new AllelePair(evalSampGt); boolean breakPhasing = false; if (compSampGt.isHet() != evalSampGt.isHet() || compSampGt.isHom() != evalSampGt.isHom()) breakPhasing = true; // since they are not both het or both hom else { // both are het, or both are hom: - boolean topMatchesTopAndBottomMatchesBottom = (topMatchesTop(compBiallele, evalBiallele) && bottomMatchesBottom(compBiallele, evalBiallele)); - boolean topMatchesBottomAndBottomMatchesTop = (topMatchesBottom(compBiallele, evalBiallele) && bottomMatchesTop(compBiallele, evalBiallele)); + boolean topMatchesTopAndBottomMatchesBottom = (topMatchesTop(compAllelePair, evalAllelePair) && bottomMatchesBottom(compAllelePair, evalAllelePair)); + boolean topMatchesBottomAndBottomMatchesTop = (topMatchesBottom(compAllelePair, evalAllelePair) && bottomMatchesTop(compAllelePair, evalAllelePair)); if (!topMatchesTopAndBottomMatchesBottom && !topMatchesBottomAndBottomMatchesTop) breakPhasing = true; // since the 2 VCFs have different diploid genotypes for this sample } @@ -154,12 +154,12 @@ public class GenotypePhasingEvaluator extends VariantEvaluator { interesting.addReason("ONLY_EVAL", samp, group, ""); } else { // both comp and eval are phased: - Biallele prevCompBiallele = new Biallele(prevCompAndEval.getCompGenotpye()); - Biallele prevEvalBiallele = new Biallele(prevCompAndEval.getEvalGenotype()); + AllelePair prevCompAllelePair = new AllelePair(prevCompAndEval.getCompGenotpye()); + AllelePair prevEvalAllelePair = new AllelePair(prevCompAndEval.getEvalGenotype()); // Sufficient to check only the top of comp, since we ensured that comp and eval have the same diploid genotypes for this sample: - boolean topsMatch = (topMatchesTop(prevCompBiallele, prevEvalBiallele) && topMatchesTop(compBiallele, evalBiallele)); - boolean topMatchesBottom = (topMatchesBottom(prevCompBiallele, prevEvalBiallele) && topMatchesBottom(compBiallele, evalBiallele)); + boolean topsMatch = (topMatchesTop(prevCompAllelePair, prevEvalAllelePair) && topMatchesTop(compAllelePair, evalAllelePair)); + boolean topMatchesBottom = (topMatchesBottom(prevCompAllelePair, prevEvalAllelePair) && topMatchesBottom(compAllelePair, evalAllelePair)); if (topsMatch || topMatchesBottom) { ps.phasesAgree++; @@ -172,7 +172,7 @@ public class GenotypePhasingEvaluator extends VariantEvaluator { else { ps.phasesDisagree++; logger.debug("SWITCHED locus: " + curLocus); - interesting.addReason("SWITCH", samp, group, toString(prevCompBiallele, compBiallele) + " -> " + toString(prevEvalBiallele, evalBiallele)); + interesting.addReason("SWITCH", samp, group, toString(prevCompAllelePair, compAllelePair) + " -> " + toString(prevEvalAllelePair, evalAllelePair)); } } } @@ -212,23 +212,23 @@ public class GenotypePhasingEvaluator extends VariantEvaluator { return new Double(pq.toString()); } - public boolean topMatchesTop(Biallele b1, Biallele b2) { + public boolean topMatchesTop(AllelePair b1, AllelePair b2) { return b1.getTopAllele().equals(b2.getTopAllele()); } - public boolean topMatchesBottom(Biallele b1, Biallele b2) { + public boolean topMatchesBottom(AllelePair b1, AllelePair b2) { return b1.getTopAllele().equals(b2.getBottomAllele()); } - public boolean bottomMatchesTop(Biallele b1, Biallele b2) { + public boolean bottomMatchesTop(AllelePair b1, AllelePair b2) { return topMatchesBottom(b2, b1); } - public boolean bottomMatchesBottom(Biallele b1, Biallele b2) { + public boolean bottomMatchesBottom(AllelePair b1, AllelePair b2) { return b1.getBottomAllele().equals(b2.getBottomAllele()); } - public String toString(Biallele prev, Biallele cur) { + public String toString(AllelePair prev, AllelePair cur) { return prev.getTopAllele().getBaseString() + "," + cur.getTopAllele().getBaseString() + "|" + prev.getBottomAllele().getBaseString() + "," + cur.getBottomAllele().getBaseString(); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/Biallele.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/AllelePair.java similarity index 93% rename from java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/Biallele.java rename to java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/AllelePair.java index de9e549ee..4effa9767 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/Biallele.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/AllelePair.java @@ -30,13 +30,13 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; import java.util.List; -public class Biallele { +public class AllelePair { private Allele top; private Allele bottom; - public Biallele(Genotype gt) { + public AllelePair(Genotype gt) { if (gt.getPloidy() != 2) - throw new ReviewedStingException("Biallele must have ploidy of 2!"); + throw new ReviewedStingException("AllelePair must have ploidy of 2!"); this.top = gt.getAllele(0); this.bottom = gt.getAllele(1); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/DisjointSet.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/DisjointSet.java index 78ac92be4..a46d423d3 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/DisjointSet.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/DisjointSet.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.phasing; -import java.util.LinkedList; -import java.util.List; +import java.util.*; /* * Copyright (c) 2010, The Broad Institute @@ -55,11 +54,11 @@ public class DisjointSet { } public boolean inSameSet(int x, int y) { - return (findSet(x) == findSet(y)); + return (x == y || nodes[x].parent == nodes[y].parent || findSet(x) == findSet(y)); } - public List inSameSetAs(int x, int[] testSet) { - List sameSetInds = new LinkedList(); + public Set inSameSetAs(int x, Collection testSet) { + Set sameSetInds = new TreeSet(); int xSet = findSet(x); for (int t : testSet) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/Graph.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/Graph.java index b5fe5e501..dc1389be0 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/Graph.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/Graph.java @@ -26,6 +26,7 @@ import java.util.*; * OTHER DEALINGS IN THE SOFTWARE. */ +// Represents an undirected graph with no self-edges: public class Graph implements Iterable { private Neighbors[] adj; @@ -36,45 +37,58 @@ public class Graph implements Iterable { } public void addEdge(GraphEdge e) { + if (e.v1 == e.v2) // do not permit self-edges + return; + adj[e.v1].addNeighbor(e); adj[e.v2].addNeighbor(e); } + public void addEdges(Collection edges) { + for (GraphEdge e : edges) + addEdge(e); + } + public void removeEdge(GraphEdge e) { adj[e.v1].removeNeighbor(e); adj[e.v2].removeNeighbor(e); } + public Collection removeAllIncidentEdges(int vertexIndex) { + Collection incidentEdges = new TreeSet(adj[vertexIndex].neighbors); // implemented GraphEdge.compareTo() + + for (GraphEdge neighbEdge : incidentEdges) { + if (vertexIndex != neighbEdge.v1) // vertexIndex == neighbEdge.v2 + adj[neighbEdge.v1].removeNeighbor(neighbEdge); + else if (vertexIndex != neighbEdge.v2) // vertexIndex == neighbEdge.v1 + adj[neighbEdge.v2].removeNeighbor(neighbEdge); + } + adj[vertexIndex].clearAllNeighbors(); + + return incidentEdges; + } + public DisjointSet getConnectedComponents() { DisjointSet cc = new DisjointSet(adj.length); - for (int i = 0; i < adj.length; i++) - for (GraphEdge e : adj[i]) - cc.setUnion(e.v1, e.v2); + for (GraphEdge e : this) + cc.setUnion(e.v1, e.v2); return cc; } - // Note that this will give each edge TWICE [since e=(v1,v2) is stored as a neighbor for both v1 and v2] public Iterator iterator() { return new AllEdgesIterator(); } - public List getAllEdges() { - Set allEdges = new TreeSet(); // implemented GraphEdge.compareTo() - for (GraphEdge e : this) - allEdges.add(e); - - return new LinkedList(allEdges); - } - public String toString() { StringBuilder sb = new StringBuilder(); for (int i = 0; i < adj.length; i++) { sb.append(i + ":"); - for (GraphEdge e : adj[i]) - sb.append(" " + e); + for (GraphEdge e : adj[i]) { + sb.append(" " + (e.v1 == i ? e.v2 : e.v1)); + } sb.append("\n"); } @@ -84,18 +98,29 @@ public class Graph implements Iterable { private class AllEdgesIterator implements Iterator { private int curInd; private Iterator innerIt; + private GraphEdge nextEdge; public AllEdgesIterator() { curInd = 0; innerIt = null; + nextEdge = null; } public boolean hasNext() { + if (nextEdge != null) + return true; + for (; curInd < adj.length; curInd++) { if (innerIt == null) innerIt = adj[curInd].iterator(); - if (innerIt.hasNext()) - return true; + + while (innerIt.hasNext()) { + GraphEdge e = innerIt.next(); + if (e.v1 == curInd) { // only want to see each edge once + nextEdge = e; + return true; + } + } innerIt = null; } @@ -107,7 +132,9 @@ public class Graph implements Iterable { if (!hasNext()) throw new NoSuchElementException(); - return innerIt.next(); + GraphEdge tmpEdge = nextEdge; + nextEdge = null; + return tmpEdge; } public void remove() { @@ -133,5 +160,9 @@ public class Graph implements Iterable { public Iterator iterator() { return neighbors.iterator(); } + + public void clearAllNeighbors() { + neighbors.clear(); + } } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/ReadBackedPhasingWalker.java index a6c99c057..749ee83b8 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -43,7 +43,6 @@ import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import javax.naming.OperationNotSupportedException; import java.io.*; import java.util.*; @@ -245,7 +244,7 @@ public class ReadBackedPhasingWalker extends RodWalker prevHetAndInteriorIt = phaseWindow.prevHetAndInteriorIt; @@ -257,9 +256,18 @@ public class ReadBackedPhasingWalker extends RodWalker= phaseQualityThresh); + boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality); + + // + // + if (pr.phaseQuality < 0) { + logger.warn("MORE than 10% of the reads are inconsistent for phasing of " + VariantContextUtils.getLocation(vc)); + } + // + // + if (genotypesArePhased) { - BialleleSNP prevBiall = new BialleleSNP(prevHetGenotype); + SNPallelePair prevBiall = new SNPallelePair(prevHetGenotype); logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevBiall + "\n"); logger.debug("THE PHASE CHOSEN HERE:\n" + biall + "\n\n"); @@ -311,6 +319,10 @@ public class ReadBackedPhasingWalker extends RodWalker= phaseQualityThresh; + } + private static class GenotypeAndReadBases { public Genotype genotype; public ReadBasesAtPosition readBases; @@ -449,7 +461,7 @@ public class ReadBackedPhasingWalker extends RodWalker nameToReads : readsAtHetSites.entrySet()) { String rdName = nameToReads.getKey(); Read rd = nameToReads.getValue(); @@ -458,85 +470,98 @@ public class ReadBackedPhasingWalker extends RodWalker rdEdges; - public int[] siteInds; + private class EdgeToReads { + private Map> edgeReads; - public ReadProperties(Read rd) { - this.siteInds = rd.getNonNullIndices(); - this.rdEdges = new LinkedList(); + public EdgeToReads() { + this.edgeReads = new TreeMap>(); // implemented GraphEdge.compareTo() + } - // sufficient to create a path linking the sites in rd, so they all end up in the same connected component: - for (int i = 0; i < siteInds.length - 1; i++) { - GraphEdge e = new GraphEdge(siteInds[i], siteInds[i + 1]); - rdEdges.add(e); + public void addRead(GraphEdge e, String readName) { + List reads = edgeReads.get(e); + if (reads == null) { + reads = new LinkedList(); + edgeReads.put(e, reads); } + reads.add(readName); + } + + public List getReads(GraphEdge e) { + return edgeReads.get(e); } } - private class EdgeCounts { - private Map counts; + private class IntegerSet implements Iterable { + private Set list; - public EdgeCounts() { - this.counts = new TreeMap(); // implemented GraphEdge.compareTo() + public IntegerSet(Set list) { + this.list = list; } - public int getCount(GraphEdge e) { - Integer count = counts.get(e); - if (count == null) - return 0; - - return count; + public boolean contains(int i) { + return list.contains(i); } - public int incrementEdge(GraphEdge e) { - Integer eCount = counts.get(e); - int cnt; - if (eCount == null) - cnt = 0; - else - cnt = eCount; - - cnt++; - counts.put(e, cnt); - return cnt; + public Iterator iterator() { + return list.iterator(); } - public int decrementEdge(GraphEdge e) { - Integer eCount = counts.get(e); - if (eCount == null) - return 0; - - int cnt = eCount - 1; - counts.put(e, cnt); - return cnt; + public String toString() { + StringBuilder sb = new StringBuilder(); + for (int i : this) { + sb.append(i + ", "); + } + return sb.toString(); } } public Set removeExtraneousReads(int numHetSites) { Graph readGraph = new Graph(numHetSites); - Map readToGraphProperties = new HashMap(); - EdgeCounts edgeCounts = new EdgeCounts(); + EdgeToReads edgeToReads = new EdgeToReads(); + Set sitesWithEdges = new TreeSet(); for (Map.Entry nameToReads : readsAtHetSites.entrySet()) { String rdName = nameToReads.getKey(); Read rd = nameToReads.getValue(); - ReadProperties rp = new ReadProperties(rd); - if (!rp.rdEdges.isEmpty()) { // otherwise, this read is clearly irrelevant since it can't link anything - for (GraphEdge e : rp.rdEdges) { - readGraph.addEdge(e); + int[] siteInds = rd.getNonNullIndices(); + // Connect each pair of non-null sites in rd: + for (int i = 0; i < siteInds.length; i++) { + for (int j = i + 1; j < siteInds.length; j++) { + GraphEdge e = new GraphEdge(siteInds[i], siteInds[j]); logger.debug("Read = " + rdName + " is adding edge: " + e); + readGraph.addEdge(e); - edgeCounts.incrementEdge(e); + edgeToReads.addRead(e, rdName); + + sitesWithEdges.add(e.v1); + sitesWithEdges.add(e.v2); } - readToGraphProperties.put(rdName, rp); } } logger.debug("Read graph:\n" + readGraph); Set keepReads = new HashSet(); - // Check which Reads are involved in paths from (phasingSiteIndex - 1) to (phasingSiteIndex): + /* Check which Reads are involved in acyclic paths from (phasingSiteIndex - 1) to (phasingSiteIndex): + + In detail: + Every Read links EACH pair of sites for which it contains bases. Then, each such edge is added to a "site connectivity graph". + A read provides non-trivial bias toward the final haplotype decision if it participates in a path from prev ---> cur. This is tested by + considering each edge that the read contributes. For edge e=(v1,v2), if there exists a path from prev ---> v1 [that doesn't include v2] and + cur ---> v2 [that doesn't include v1], then there is a path from prev ---> cur that uses e, hence making the read significant. + By excluding each vertex's edges and then calculating connected components, we are able to make the determination, for example, + if a path exists from prev ---> v1 that excludes v2. + + Furthermore, if the path DOES use other edges that exist solely due to the read, then that's fine, since adding in the read will give those edges as well. + And, if the path uses edges from other reads, then keeping all other reads that contribute those edges + [which will happen since those edges are also in paths from prev ---> cur] is sufficient for this path to exist. + + NOTE: + If we would use NON-UNIFORM priors for the various haplotypes, then this calculation would not be correct, since the equivalence of: + 1. The read affects the final marginal haplotype posterior probability (for general mapping and base quality values). + 2. The read has edges involved in a path from prev ---> cur. + DEPENDS STRONGLY on the fact that all haplotypes have the same EXACT prior. + */ int prev = phasingSiteIndex - 1; int cur = phasingSiteIndex; @@ -546,56 +571,51 @@ public class ReadBackedPhasingWalker extends RodWalker rdEdgesEntry : readToGraphProperties.entrySet()) { - String testRead = rdEdgesEntry.getKey(); - ReadProperties rp = rdEdgesEntry.getValue(); - logger.debug("Testing the connectivity of Read: " + testRead); + /* Check the connected components of prev and cur when removing each individual vertex's edges: + [Total run-time: for each vertex, calculate connected components after removing it's edges: O(V * E)] + */ + IntegerSet[] removedSiteSameCCAsPrev = new IntegerSet[numHetSites]; + IntegerSet[] removedSiteSameCCAsCur = new IntegerSet[numHetSites]; + for (int i : sitesWithEdges) { + logger.debug("Calculating CC after removing edges of site: " + i); - // Check the connected components after removing this read's UNIQUE edges: - for (GraphEdge e : rp.rdEdges) { - if (edgeCounts.getCount(e) == 1) // otherwise, the edge still exists without this read - readGraph.removeEdge(e); - } + // Remove all edges incident to i and see which positions have paths to prev and cur: + Collection removedEdges = readGraph.removeAllIncidentEdges(i); + + // Run-time for efficiently calculating connected components using DisjointSet: O(E) DisjointSet ccAfterRemove = readGraph.getConnectedComponents(); + removedSiteSameCCAsPrev[i] = new IntegerSet(ccAfterRemove.inSameSetAs(prev, sitesWithEdges)); + removedSiteSameCCAsCur[i] = new IntegerSet(ccAfterRemove.inSameSetAs(cur, sitesWithEdges)); - /* testRead contributes a path between prev and cur iff: - There exists i != j s.t. testRead[i] != null, testRead[j] != null, ccAfterRemove.inSameSet(prev,i) && ccAfterRemove.inSameSet(j,cur) - [since ALL non-null indices in testRead are connected to one another, as one clique]. + logger.debug("Same CC as previous [" + prev + "]: " + removedSiteSameCCAsPrev[i]); + logger.debug("Same CC as current [" + cur + "]: " + removedSiteSameCCAsCur[i]); + + // Add the removed edges back in: + readGraph.addEdges(removedEdges); + } + + for (GraphEdge e : readGraph) { + logger.debug("Testing the path-connectivity of Edge: " + e); + + /* Edge e={v1,v2} contributes a path between prev and cur for testRead iff: + testRead[v1] != null, testRead[v2] != null, and there is a path from prev ---> v1 -> v2 ---> cur [or vice versa]. + Note that the path from prev ---> v1 will NOT contain v2, since we removed all of v2's edges, + and the path from v2 ---> cur will NOT contain v1. */ - List sameCCasPrev = ccAfterRemove.inSameSetAs(prev, rp.siteInds); - List sameCCasCur = ccAfterRemove.inSameSetAs(cur, rp.siteInds); - if (logger.isDebugEnabled()) { - StringBuilder sb = new StringBuilder("sameCCasPrev:"); - for (int ind : sameCCasPrev) - sb.append(" " + ind); - logger.debug(sb.toString()); + boolean prevTo2and1ToCur = removedSiteSameCCAsPrev[e.v1].contains(e.v2) && removedSiteSameCCAsCur[e.v2].contains(e.v1); + boolean prevTo1and2ToCur = removedSiteSameCCAsPrev[e.v2].contains(e.v1) && removedSiteSameCCAsCur[e.v1].contains(e.v2); - sb = new StringBuilder("sameCCasCur:"); - for (int ind : sameCCasCur) - sb.append(" " + ind); - logger.debug(sb.toString()); - } + if (prevTo2and1ToCur || prevTo1and2ToCur) { + for (String readName : edgeToReads.getReads(e)) { + keepReads.add(readName); - boolean keepRead = false; - if (!sameCCasPrev.isEmpty() && !sameCCasCur.isEmpty()) { // There exists a path from prev to cur that goes through the sites in testRead - // Now, make sure that TWO DISTINCT sites, i and j, in testRead are used in the path: - Set union = new HashSet(sameCCasPrev); - union.addAll(sameCCasCur); - if (union.size() >= 2) // i != j - keepRead = true; - } - - if (keepRead) { - logger.debug("Read is part of path from " + prev + " to " + cur); - keepReads.add(testRead); - - // Add the removed edges back in, since we're keeping the read: - for (GraphEdge e : rp.rdEdges) - readGraph.addEdge(e); - } - else { // Decrease the count for the edges [note that any read-specific edges were already removed above]: - for (GraphEdge e : rp.rdEdges) - edgeCounts.decrementEdge(e); + if (logger.isDebugEnabled()) { + if (prevTo2and1ToCur) + logger.debug("Keep read " + readName + " due to path: " + prev + " ---> " + e.v2 + " -> " + e.v1 + " ---> " + cur); + else + logger.debug("Keep read " + readName + " due to path: " + prev + " ---> " + e.v1 + " -> " + e.v2 + " ---> " + cur); + } + } } } @@ -677,7 +697,7 @@ public class ReadBackedPhasingWalker extends RodWalker nameToReads : phaseWindow.readsAtHetSites.entrySet()) { Read rd = nameToReads.getValue(); @@ -704,38 +727,88 @@ public class ReadBackedPhasingWalker extends RodWalker= 0.1) { + // + // ???? + // NEED TO CHANGE phaseSite() to always output PQ field, EVEN if it's LESS THAN threshold ?????? + // ???? + // + repPhaseQuality *= -1; } - double phaseQuality = -10.0 * (sumErrorProbs.getLog10Value()); + // + // - logger.debug("MAX hap:\t" + maxEntry.getHaplotypeClass() + "\tposteriorProb:\t" + posteriorProb + "\tphaseQuality:\t" + phaseQuality); + return new PhaseResult(maxHapQual.maxEntry.getHaplotypeClass().getRepresentative(), repPhaseQuality); + } - return new PhaseResult(maxEntry.getHaplotypeClass().getRepresentative(), phaseQuality); + private static class MaxHaplotypeAndQuality { + public PhasingTable.PhasingTableEntry maxEntry; + public double phaseQuality; + + public MaxHaplotypeAndQuality(PhasingTable hapTable, boolean printDebug) { + // Marginalize each haplotype to its first 2 positions: + hapTable = HaplotypeTableCreator.marginalizeAsNewTable(hapTable); + if (printDebug) + logger.debug("\nPhasing table [AFTER MAPPING]:\n" + hapTable + "\n"); + + // Determine the phase at this position: + hapTable.normalizeScores(); + if (printDebug) + logger.debug("\nPhasing table [AFTER NORMALIZATION]:\n" + hapTable + "\n"); + + this.maxEntry = hapTable.maxEntry(); + + // convert posteriorProb to PHRED scale, but do NOT cap the quality as in QualityUtils.probToQual(posteriorProb): + this.phaseQuality = getPhasingQuality(hapTable, maxEntry); + } + + // Returns the PQ of entry (within table hapTable, which MUST be normalized): + public static double getPhasingQuality(PhasingTable hapTable, PhasingTable.PhasingTableEntry entry) { + PreciseNonNegativeDouble sumErrorProbs = new PreciseNonNegativeDouble(ZERO); + for (PhasingTable.PhasingTableEntry pte : hapTable) { + if (pte != entry) + sumErrorProbs.plusEqual(pte.getScore()); + } + return -10.0 * (sumErrorProbs.getLog10Value()); + } } /* Ensure that curBiall is phased relative to prevBiall as specified by hap. */ - - public static void ensurePhasing(BialleleSNP curBiall, BialleleSNP prevBiall, Haplotype hap) { + public static void ensurePhasing(SNPallelePair curBiall, SNPallelePair prevBiall, Haplotype hap) { if (hap.size() < 2) throw new ReviewedStingException("LOGICAL ERROR: Only considering haplotypes of length > 2!"); @@ -784,7 +857,7 @@ public class ReadBackedPhasingWalker extends RodWalker hapMap = new TreeMap(); for (PhasingTable.PhasingTableEntry pte : table) { Haplotype rep = pte.getHaplotypeClass().getRepresentative(); @@ -1056,23 +1129,23 @@ public class ReadBackedPhasingWalker extends RodWalker