Fixed phasing algorithm to: 1. More correctly weed out irrelevant reads and sites; 2. Crudely flag sites with large phase discrepancies betweens reads

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4368 348d0f76-0448-11de-a6fe-93d51630548a
2010-09-28 23:02:53 +00:00 · 2010-09-28 23:02:53 +00:00 · 8d8980e8eb
parent 5a5c72c80d
commit 8d8980e8eb
6 changed files with 270 additions and 167 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypePhasingEvaluator.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypePhasingEvaluator.java
@ -111,15 +111,15 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
                    samplePrevGenotypes.put(samp, null);
            }
            else { // Both comp and eval have a non-null Genotype at this site:
-                Biallele compBiallele = new Biallele(compSampGt);
-                Biallele evalBiallele = new Biallele(evalSampGt);
+                AllelePair compAllelePair = new AllelePair(compSampGt);
+                AllelePair evalAllelePair = new AllelePair(evalSampGt);

                boolean breakPhasing = false;
                if (compSampGt.isHet() != evalSampGt.isHet() || compSampGt.isHom() != evalSampGt.isHom())
                    breakPhasing = true; // since they are not both het or both hom
                else { // both are het, or both are hom:
-                    boolean topMatchesTopAndBottomMatchesBottom = (topMatchesTop(compBiallele, evalBiallele) && bottomMatchesBottom(compBiallele, evalBiallele));
-                    boolean topMatchesBottomAndBottomMatchesTop = (topMatchesBottom(compBiallele, evalBiallele) && bottomMatchesTop(compBiallele, evalBiallele));
+                    boolean topMatchesTopAndBottomMatchesBottom = (topMatchesTop(compAllelePair, evalAllelePair) && bottomMatchesBottom(compAllelePair, evalAllelePair));
+                    boolean topMatchesBottomAndBottomMatchesTop = (topMatchesBottom(compAllelePair, evalAllelePair) && bottomMatchesTop(compAllelePair, evalAllelePair));
                    if (!topMatchesTopAndBottomMatchesBottom && !topMatchesBottomAndBottomMatchesTop)
                        breakPhasing = true; // since the 2 VCFs have different diploid genotypes for this sample
                }
@ -154,12 +154,12 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
                                interesting.addReason("ONLY_EVAL", samp, group, "");
                            }
                            else { // both comp and eval are phased:                        
-                                Biallele prevCompBiallele = new Biallele(prevCompAndEval.getCompGenotpye());
-                                Biallele prevEvalBiallele = new Biallele(prevCompAndEval.getEvalGenotype());
+                                AllelePair prevCompAllelePair = new AllelePair(prevCompAndEval.getCompGenotpye());
+                                AllelePair prevEvalAllelePair = new AllelePair(prevCompAndEval.getEvalGenotype());

                                // Sufficient to check only the top of comp, since we ensured that comp and eval have the same diploid genotypes for this sample:
-                                boolean topsMatch = (topMatchesTop(prevCompBiallele, prevEvalBiallele) && topMatchesTop(compBiallele, evalBiallele));
-                                boolean topMatchesBottom = (topMatchesBottom(prevCompBiallele, prevEvalBiallele) && topMatchesBottom(compBiallele, evalBiallele));
+                                boolean topsMatch = (topMatchesTop(prevCompAllelePair, prevEvalAllelePair) && topMatchesTop(compAllelePair, evalAllelePair));
+                                boolean topMatchesBottom = (topMatchesBottom(prevCompAllelePair, prevEvalAllelePair) && topMatchesBottom(compAllelePair, evalAllelePair));

                                if (topsMatch || topMatchesBottom) {
                                    ps.phasesAgree++;
@ -172,7 +172,7 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
                                else {
                                    ps.phasesDisagree++;
                                    logger.debug("SWITCHED locus: " + curLocus);
-                                    interesting.addReason("SWITCH", samp, group, toString(prevCompBiallele, compBiallele) + " -> " + toString(prevEvalBiallele, evalBiallele));
+                                    interesting.addReason("SWITCH", samp, group, toString(prevCompAllelePair, compAllelePair) + " -> " + toString(prevEvalAllelePair, evalAllelePair));
                                }
                            }
                        }
@ -212,23 +212,23 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
        return new Double(pq.toString());
    }

-    public boolean topMatchesTop(Biallele b1, Biallele b2) {
+    public boolean topMatchesTop(AllelePair b1, AllelePair b2) {
        return b1.getTopAllele().equals(b2.getTopAllele());
    }

-    public boolean topMatchesBottom(Biallele b1, Biallele b2) {
+    public boolean topMatchesBottom(AllelePair b1, AllelePair b2) {
        return b1.getTopAllele().equals(b2.getBottomAllele());
    }

-    public boolean bottomMatchesTop(Biallele b1, Biallele b2) {
+    public boolean bottomMatchesTop(AllelePair b1, AllelePair b2) {
        return topMatchesBottom(b2, b1);
    }

-    public boolean bottomMatchesBottom(Biallele b1, Biallele b2) {
+    public boolean bottomMatchesBottom(AllelePair b1, AllelePair b2) {
        return b1.getBottomAllele().equals(b2.getBottomAllele());
    }

-    public String toString(Biallele prev, Biallele cur) {
+    public String toString(AllelePair prev, AllelePair cur) {
        return prev.getTopAllele().getBaseString() + "," + cur.getTopAllele().getBaseString() + "|" + prev.getBottomAllele().getBaseString() + "," + cur.getBottomAllele().getBaseString();
    }

--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/AllelePair.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/AllelePair.java
@ -30,13 +30,13 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import java.util.ArrayList;
 import java.util.List;

-public class Biallele {
+public class AllelePair {
    private Allele top;
    private Allele bottom;

-    public Biallele(Genotype gt) {
+    public AllelePair(Genotype gt) {
        if (gt.getPloidy() != 2)
-            throw new ReviewedStingException("Biallele must have ploidy of 2!");
+            throw new ReviewedStingException("AllelePair must have ploidy of 2!");

        this.top = gt.getAllele(0);
        this.bottom = gt.getAllele(1);
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/DisjointSet.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/DisjointSet.java
@ -1,7 +1,6 @@
 package org.broadinstitute.sting.playground.gatk.walkers.phasing;

-import java.util.LinkedList;
-import java.util.List;
+import java.util.*;

 /*
 * Copyright (c) 2010, The Broad Institute
@ -55,11 +54,11 @@ public class DisjointSet {
    }

    public boolean inSameSet(int x, int y) {
-        return (findSet(x) == findSet(y));
+        return (x == y || nodes[x].parent == nodes[y].parent || findSet(x) == findSet(y));
    }

-    public List<Integer> inSameSetAs(int x, int[] testSet) {
-        List<Integer> sameSetInds = new LinkedList<Integer>();
+    public Set<Integer> inSameSetAs(int x, Collection<Integer> testSet) {
+        Set<Integer> sameSetInds = new TreeSet<Integer>();

        int xSet = findSet(x);
        for (int t : testSet) {
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/Graph.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/Graph.java
@ -26,6 +26,7 @@ import java.util.*;
 * OTHER DEALINGS IN THE SOFTWARE.
 */

+// Represents an undirected graph with no self-edges:
 public class Graph implements Iterable<GraphEdge> {
    private Neighbors[] adj;

@ -36,45 +37,58 @@ public class Graph implements Iterable<GraphEdge> {
    }

    public void addEdge(GraphEdge e) {
+        if (e.v1 == e.v2) // do not permit self-edges
+            return;
+
        adj[e.v1].addNeighbor(e);
        adj[e.v2].addNeighbor(e);
    }

+    public void addEdges(Collection<GraphEdge> edges) {
+        for (GraphEdge e : edges)
+            addEdge(e);
+    }
+
    public void removeEdge(GraphEdge e) {
        adj[e.v1].removeNeighbor(e);
        adj[e.v2].removeNeighbor(e);
    }

+    public Collection<GraphEdge> removeAllIncidentEdges(int vertexIndex) {
+        Collection<GraphEdge> incidentEdges = new TreeSet<GraphEdge>(adj[vertexIndex].neighbors); // implemented GraphEdge.compareTo()
+
+        for (GraphEdge neighbEdge : incidentEdges) {
+            if (vertexIndex != neighbEdge.v1) // vertexIndex == neighbEdge.v2
+                adj[neighbEdge.v1].removeNeighbor(neighbEdge);
+            else if (vertexIndex != neighbEdge.v2) // vertexIndex == neighbEdge.v1
+                adj[neighbEdge.v2].removeNeighbor(neighbEdge);
+        }
+        adj[vertexIndex].clearAllNeighbors();
+
+        return incidentEdges;
+    }
+
    public DisjointSet getConnectedComponents() {
        DisjointSet cc = new DisjointSet(adj.length);

-        for (int i = 0; i < adj.length; i++)
-            for (GraphEdge e : adj[i])
-                cc.setUnion(e.v1, e.v2);
+        for (GraphEdge e : this)
+            cc.setUnion(e.v1, e.v2);

        return cc;
    }

-    // Note that this will give each edge TWICE [since e=(v1,v2) is stored as a neighbor for both v1 and v2]
    public Iterator<GraphEdge> iterator() {
        return new AllEdgesIterator();
    }

-    public List<GraphEdge> getAllEdges() {
-        Set<GraphEdge> allEdges = new TreeSet<GraphEdge>(); // implemented GraphEdge.compareTo()
-        for (GraphEdge e : this)
-            allEdges.add(e);
-
-        return new LinkedList<GraphEdge>(allEdges);
-    }
-
    public String toString() {
        StringBuilder sb = new StringBuilder();

        for (int i = 0; i < adj.length; i++) {
            sb.append(i + ":");
-            for (GraphEdge e : adj[i])
-                sb.append(" " + e);
+            for (GraphEdge e : adj[i]) {
+                sb.append(" " + (e.v1 == i ? e.v2 : e.v1));
+            }
            sb.append("\n");
        }

@ -84,18 +98,29 @@ public class Graph implements Iterable<GraphEdge> {
    private class AllEdgesIterator implements Iterator<GraphEdge> {
        private int curInd;
        private Iterator<GraphEdge> innerIt;
+        private GraphEdge nextEdge;

        public AllEdgesIterator() {
            curInd = 0;
            innerIt = null;
+            nextEdge = null;
        }

        public boolean hasNext() {
+            if (nextEdge != null)
+                return true;
+
            for (; curInd < adj.length; curInd++) {
                if (innerIt == null)
                    innerIt = adj[curInd].iterator();
-                if (innerIt.hasNext())
-                    return true;
+
+                while (innerIt.hasNext()) {
+                    GraphEdge e = innerIt.next();
+                    if (e.v1 == curInd) { // only want to see each edge once
+                        nextEdge = e;
+                        return true;
+                    }
+                }

                innerIt = null;
            }
@ -107,7 +132,9 @@ public class Graph implements Iterable<GraphEdge> {
            if (!hasNext())
                throw new NoSuchElementException();

-            return innerIt.next();
+            GraphEdge tmpEdge = nextEdge;
+            nextEdge = null;
+            return tmpEdge;
        }

        public void remove() {
@ -133,5 +160,9 @@ public class Graph implements Iterable<GraphEdge> {
        public Iterator<GraphEdge> iterator() {
            return neighbors.iterator();
        }
+
+        public void clearAllNeighbors() {
+            neighbors.clear();
+        }
    }
 }
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/ReadBackedPhasingWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/ReadBackedPhasingWalker.java
@ -43,7 +43,6 @@ import org.broadinstitute.sting.utils.vcf.VCFUtils;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

-import javax.naming.OperationNotSupportedException;
 import java.io.*;
 import java.util.*;

@ -245,7 +244,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
            if (isCalledDiploidGenotype(gt) && gt.isHet()) { // Can attempt to phase this genotype
                PhasingWindow phaseWindow = new PhasingWindow(vr, samp);
                if (phaseWindow.hasPreviousHets()) { // Otherwise, nothing to phase this against
-                    BialleleSNP biall = new BialleleSNP(gt);
+                    SNPallelePair biall = new SNPallelePair(gt);
                    logger.debug("Want to phase TOP vs. BOTTOM for: " + "\n" + biall);

                    DoublyLinkedList.BidirectionalIterator<UnfinishedVariantAndReads> prevHetAndInteriorIt = phaseWindow.prevHetAndInteriorIt;
@ -257,9 +256,18 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
                    Genotype prevHetGenotype = prevUvc.getGenotype(samp);

                    PhaseResult pr = phaseSample(phaseWindow);
-                    boolean genotypesArePhased = (pr.phaseQuality >= phaseQualityThresh);
+                    boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality);
+
+                    //
+                    //
+                    if (pr.phaseQuality < 0) {
+                        logger.warn("MORE than 10% of the reads are inconsistent for phasing of " + VariantContextUtils.getLocation(vc));
+                    }
+                    //
+                    //
+
                    if (genotypesArePhased) {
-                        BialleleSNP prevBiall = new BialleleSNP(prevHetGenotype);
+                        SNPallelePair prevBiall = new SNPallelePair(prevHetGenotype);

                        logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevBiall + "\n");
                        logger.debug("THE PHASE CHOSEN HERE:\n" + biall + "\n\n");
@ -311,6 +319,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
        phaseStats.addIn(new PhasingStats(samplePhaseStats));
    }

+    public boolean passesPhasingThreshold(double PQ) {
+        return PQ >= phaseQualityThresh;
+    }
+
    private static class GenotypeAndReadBases {
        public Genotype genotype;
        public ReadBasesAtPosition readBases;
@ -449,7 +461,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
            logger.debug("Number of sites in window = " + index);

            if (logger.isDebugEnabled()) {
-                logger.debug("ALL READS:");
+                logger.debug("ALL READS [phasingSiteIndex = " + phasingSiteIndex + "]:");
                for (Map.Entry<String, Read> nameToReads : readsAtHetSites.entrySet()) {
                    String rdName = nameToReads.getKey();
                    Read rd = nameToReads.getValue();
@ -458,85 +470,98 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
            }
        }

-        private class ReadProperties {
-            public List<GraphEdge> rdEdges;
-            public int[] siteInds;
+        private class EdgeToReads {
+            private Map<GraphEdge, List<String>> edgeReads;

-            public ReadProperties(Read rd) {
-                this.siteInds = rd.getNonNullIndices();
-                this.rdEdges = new LinkedList<GraphEdge>();
+            public EdgeToReads() {
+                this.edgeReads = new TreeMap<GraphEdge, List<String>>(); // implemented GraphEdge.compareTo()
+            }

-                // sufficient to create a path linking the sites in rd, so they all end up in the same connected component:
-                for (int i = 0; i < siteInds.length - 1; i++) {
-                    GraphEdge e = new GraphEdge(siteInds[i], siteInds[i + 1]);
-                    rdEdges.add(e);
+            public void addRead(GraphEdge e, String readName) {
+                List<String> reads = edgeReads.get(e);
+                if (reads == null) {
+                    reads = new LinkedList<String>();
+                    edgeReads.put(e, reads);
                }
+                reads.add(readName);
+            }
+
+            public List<String> getReads(GraphEdge e) {
+                return edgeReads.get(e);
            }
        }

-        private class EdgeCounts {
-            private Map<GraphEdge, Integer> counts;
+        private class IntegerSet implements Iterable<Integer> {
+            private Set<Integer> list;

-            public EdgeCounts() {
-                this.counts = new TreeMap<GraphEdge, Integer>(); // implemented GraphEdge.compareTo()
+            public IntegerSet(Set<Integer> list) {
+                this.list = list;
            }

-            public int getCount(GraphEdge e) {
-                Integer count = counts.get(e);
-                if (count == null)
-                    return 0;
-
-                return count;
+            public boolean contains(int i) {
+                return list.contains(i);
            }

-            public int incrementEdge(GraphEdge e) {
-                Integer eCount = counts.get(e);
-                int cnt;
-                if (eCount == null)
-                    cnt = 0;
-                else
-                    cnt = eCount;
-
-                cnt++;
-                counts.put(e, cnt);
-                return cnt;
+            public Iterator<Integer> iterator() {
+                return list.iterator();
            }

-            public int decrementEdge(GraphEdge e) {
-                Integer eCount = counts.get(e);
-                if (eCount == null)
-                    return 0;
-
-                int cnt = eCount - 1;
-                counts.put(e, cnt);
-                return cnt;
+            public String toString() {
+                StringBuilder sb = new StringBuilder();
+                for (int i : this) {
+                    sb.append(i + ", ");
+                }
+                return sb.toString();
            }
        }

        public Set<String> removeExtraneousReads(int numHetSites) {
            Graph readGraph = new Graph(numHetSites);
-            Map<String, ReadProperties> readToGraphProperties = new HashMap<String, ReadProperties>();
-            EdgeCounts edgeCounts = new EdgeCounts();
+            EdgeToReads edgeToReads = new EdgeToReads();
+            Set<Integer> sitesWithEdges = new TreeSet<Integer>();

            for (Map.Entry<String, Read> nameToReads : readsAtHetSites.entrySet()) {
                String rdName = nameToReads.getKey();
                Read rd = nameToReads.getValue();

-                ReadProperties rp = new ReadProperties(rd);
-                if (!rp.rdEdges.isEmpty()) { // otherwise, this read is clearly irrelevant since it can't link anything
-                    for (GraphEdge e : rp.rdEdges) {
-                        readGraph.addEdge(e);
+                int[] siteInds = rd.getNonNullIndices();
+                // Connect each pair of non-null sites in rd:
+                for (int i = 0; i < siteInds.length; i++) {
+                    for (int j = i + 1; j < siteInds.length; j++) {
+                        GraphEdge e = new GraphEdge(siteInds[i], siteInds[j]);
                        logger.debug("Read = " + rdName + " is adding edge: " + e);
+                        readGraph.addEdge(e);

-                        edgeCounts.incrementEdge(e);
+                        edgeToReads.addRead(e, rdName);
+
+                        sitesWithEdges.add(e.v1);
+                        sitesWithEdges.add(e.v2);
                    }
-                    readToGraphProperties.put(rdName, rp);
                }
            }
            logger.debug("Read graph:\n" + readGraph);
            Set<String> keepReads = new HashSet<String>();

-            // Check which Reads are involved in paths from (phasingSiteIndex - 1) to (phasingSiteIndex):
+            /* Check which Reads are involved in acyclic paths from (phasingSiteIndex - 1) to (phasingSiteIndex):
+
+               In detail:
+               Every Read links EACH pair of sites for which it contains bases.  Then, each such edge is added to a "site connectivity graph".
+               A read provides non-trivial bias toward the final haplotype decision if it participates in a path from prev ---> cur.  This is tested by
+               considering each edge that the read contributes.  For edge e=(v1,v2), if there exists a path from prev ---> v1 [that doesn't include v2] and
+               cur ---> v2 [that doesn't include v1], then there is a path from prev ---> cur that uses e, hence making the read significant.
+               By excluding each vertex's edges and then calculating connected components, we are able to make the determination, for example,
+               if a path exists from prev ---> v1 that excludes v2.
+
+               Furthermore, if the path DOES use other edges that exist solely due to the read, then that's fine, since adding in the read will give those edges as well.
+               And, if the path uses edges from other reads, then keeping all other reads that contribute those edges
+               [which will happen since those edges are also in paths from prev ---> cur] is sufficient for this path to exist.
+
+               NOTE:
+               If we would use NON-UNIFORM priors for the various haplotypes, then this calculation would not be correct, since the equivalence of:
+               1. The read affects the final marginal haplotype posterior probability (for general mapping and base quality values).
+               2. The read has edges involved in a path from prev ---> cur.
+               DEPENDS STRONGLY on the fact that all haplotypes have the same EXACT prior.
+             */
            int prev = phasingSiteIndex - 1;
            int cur = phasingSiteIndex;

@ -546,56 +571,51 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
                return keepReads;
            }

-            for (Map.Entry<String, ReadProperties> rdEdgesEntry : readToGraphProperties.entrySet()) {
-                String testRead = rdEdgesEntry.getKey();
-                ReadProperties rp = rdEdgesEntry.getValue();
-                logger.debug("Testing the connectivity of Read: " + testRead);
+            /* Check the connected components of prev and cur when removing each individual vertex's edges:
+               [Total run-time: for each vertex, calculate connected components after removing it's edges: O(V * E)]
+             */
+            IntegerSet[] removedSiteSameCCAsPrev = new IntegerSet[numHetSites];
+            IntegerSet[] removedSiteSameCCAsCur = new IntegerSet[numHetSites];
+            for (int i : sitesWithEdges) {
+                logger.debug("Calculating CC after removing edges of site: " + i);

-                // Check the connected components after removing this read's UNIQUE edges:
-                for (GraphEdge e : rp.rdEdges) {
-                    if (edgeCounts.getCount(e) == 1) // otherwise, the edge still exists without this read
-                        readGraph.removeEdge(e);
-                }
+                // Remove all edges incident to i and see which positions have paths to prev and cur:
+                Collection<GraphEdge> removedEdges = readGraph.removeAllIncidentEdges(i);
+
+                // Run-time for efficiently calculating connected components using DisjointSet: O(E)
                DisjointSet ccAfterRemove = readGraph.getConnectedComponents();
+                removedSiteSameCCAsPrev[i] = new IntegerSet(ccAfterRemove.inSameSetAs(prev, sitesWithEdges));
+                removedSiteSameCCAsCur[i] = new IntegerSet(ccAfterRemove.inSameSetAs(cur, sitesWithEdges));

-                /* testRead contributes a path between prev and cur iff:
-                   There exists i != j s.t. testRead[i] != null, testRead[j] != null, ccAfterRemove.inSameSet(prev,i) && ccAfterRemove.inSameSet(j,cur)
-                   [since ALL non-null indices in testRead are connected to one another, as one clique].
+                logger.debug("Same CC as previous [" + prev + "]: " + removedSiteSameCCAsPrev[i]);
+                logger.debug("Same CC as current  [" + cur + "]: " + removedSiteSameCCAsCur[i]);
+
+                // Add the removed edges back in:
+                readGraph.addEdges(removedEdges);
+            }
+
+            for (GraphEdge e : readGraph) {
+                logger.debug("Testing the path-connectivity of Edge: " + e);
+
+                /* Edge e={v1,v2} contributes a path between prev and cur for testRead iff:
+                   testRead[v1] != null, testRead[v2] != null, and there is a path from prev ---> v1 -> v2 ---> cur  [or vice versa].
+                   Note that the path from prev ---> v1 will NOT contain v2, since we removed all of v2's edges,
+                   and the path from v2 ---> cur will NOT contain v1.
                 */
-                List<Integer> sameCCasPrev = ccAfterRemove.inSameSetAs(prev, rp.siteInds);
-                List<Integer> sameCCasCur = ccAfterRemove.inSameSetAs(cur, rp.siteInds);
-                if (logger.isDebugEnabled()) {
-                    StringBuilder sb = new StringBuilder("sameCCasPrev:");
-                    for (int ind : sameCCasPrev)
-                        sb.append(" " + ind);
-                    logger.debug(sb.toString());
+                boolean prevTo2and1ToCur = removedSiteSameCCAsPrev[e.v1].contains(e.v2) && removedSiteSameCCAsCur[e.v2].contains(e.v1);
+                boolean prevTo1and2ToCur = removedSiteSameCCAsPrev[e.v2].contains(e.v1) && removedSiteSameCCAsCur[e.v1].contains(e.v2);

-                    sb = new StringBuilder("sameCCasCur:");
-                    for (int ind : sameCCasCur)
-                        sb.append(" " + ind);
-                    logger.debug(sb.toString());
-                }
+                if (prevTo2and1ToCur || prevTo1and2ToCur) {
+                    for (String readName : edgeToReads.getReads(e)) {
+                        keepReads.add(readName);

-                boolean keepRead = false;
-                if (!sameCCasPrev.isEmpty() && !sameCCasCur.isEmpty()) { // There exists a path from prev to cur that goes through the sites in testRead
-                    // Now, make sure that TWO DISTINCT sites, i and j, in testRead are used in the path:
-                    Set<Integer> union = new HashSet<Integer>(sameCCasPrev);
-                    union.addAll(sameCCasCur);
-                    if (union.size() >= 2) // i != j
-                        keepRead = true;
-                }
-
-                if (keepRead) {
-                    logger.debug("Read is part of path from " + prev + " to " + cur);
-                    keepReads.add(testRead);
-
-                    // Add the removed edges back in, since we're keeping the read:
-                    for (GraphEdge e : rp.rdEdges)
-                        readGraph.addEdge(e);
-                }
-                else { // Decrease the count for the edges [note that any read-specific edges were already removed above]:
-                    for (GraphEdge e : rp.rdEdges)
-                        edgeCounts.decrementEdge(e);
+                        if (logger.isDebugEnabled()) {
+                            if (prevTo2and1ToCur)
+                                logger.debug("Keep read " + readName + " due to path: " + prev + " ---> " + e.v2 + " -> " + e.v1 + " ---> " + cur);
+                            else
+                                logger.debug("Keep read " + readName + " due to path: " + prev + " ---> " + e.v1 + " -> " + e.v2 + " ---> " + cur);
+                        }
+                    }
                }
            }

@ -677,7 +697,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph

    private PhaseResult phaseSample(PhasingWindow phaseWindow) {
        /* Will map a phase and its "complement" to a single representative phase,
-          and marginalizeTable() marginalizes to 2 positions [starting at the previous position, and then the current position]:
+          and marginalizeAsNewTable() marginalizes to 2 positions [starting at the previous position, and then the current position]:
        */
        HaplotypeTableCreator tabCreator = new BiallelicComplementHaplotypeTableCreator(phaseWindow.hetGenotypes, phaseWindow.phasingSiteIndex - 1, 2);
        PhasingTable sampleHaps = tabCreator.getNewTable();
@ -693,6 +713,9 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
        }

        // Update the phasing table based on each of the sub-reads for this sample:
+        MaxHaplotypeAndQuality prevMaxHapAndQual = null;
+        int numHighQualityIterations = 0;
+        int numInconsistentIterations = 0;
        for (Map.Entry<String, Read> nameToReads : phaseWindow.readsAtHetSites.entrySet()) {
            Read rd = nameToReads.getValue();

@ -704,38 +727,88 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph

                logger.debug("score(" + rd + ", " + pte.getHaplotypeClass() + ") = " + score);
            }
+
+            //
+            //
+            //
+            // Check the current best haplotype assignment:
+            MaxHaplotypeAndQuality curMaxHapAndQual = new MaxHaplotypeAndQuality(sampleHaps, false);
+            logger.debug("CUR MAX hap:\t" + curMaxHapAndQual.maxEntry.getHaplotypeClass() + "\tcurPhaseQuality:\t" + curMaxHapAndQual.phaseQuality);
+            if (prevMaxHapAndQual != null && passesPhasingThreshold(prevMaxHapAndQual.phaseQuality)) {
+                numHighQualityIterations++;
+                if (!curMaxHapAndQual.maxEntry.getHaplotypeClass().getRepresentative().equals(prevMaxHapAndQual.maxEntry.getHaplotypeClass().getRepresentative()) || // switched phase
+                        curMaxHapAndQual.phaseQuality < 0.9 * prevMaxHapAndQual.phaseQuality) { // a 10% ["significant"] decrease in PQ
+                    logger.debug("Inconsistent read found!");
+                    numInconsistentIterations++;
+                }
+            }
+            prevMaxHapAndQual = curMaxHapAndQual;
+            //
+            //
+            //
+
        }
        logger.debug("\nPhasing table [AFTER CALCULATION]:\n" + sampleHaps + "\n");
+        MaxHaplotypeAndQuality maxHapQual = new MaxHaplotypeAndQuality(sampleHaps, true);
+        double posteriorProb = maxHapQual.maxEntry.getScore().getValue();

-        // Marginalize each haplotype to its first 2 positions:
-        sampleHaps = HaplotypeTableCreator.marginalizeTable(sampleHaps);
-        logger.debug("\nPhasing table [AFTER MAPPING]:\n" + sampleHaps + "\n");
+        logger.debug("MAX hap:\t" + maxHapQual.maxEntry.getHaplotypeClass() + "\tposteriorProb:\t" + posteriorProb + "\tphaseQuality:\t" + maxHapQual.phaseQuality);
+        logger.debug("Number of used reads " + phaseWindow.readsAtHetSites.size() + "; number of high PQ iterations " + numHighQualityIterations + "; number of inconsistencies " + numInconsistentIterations);

-        // Determine the phase at this position:
-        sampleHaps.normalizeScores();
-        logger.debug("\nPhasing table [AFTER NORMALIZATION]:\n" + sampleHaps + "\n");
+        double repPhaseQuality = maxHapQual.phaseQuality;

-        PhasingTable.PhasingTableEntry maxEntry = sampleHaps.maxEntry();
-        double posteriorProb = maxEntry.getScore().getValue();
-
-        // convert posteriorProb to PHRED scale, but do NOT cap the quality as in QualityUtils.probToQual(posteriorProb):
-        PreciseNonNegativeDouble sumErrorProbs = new PreciseNonNegativeDouble(ZERO);
-        for (PhasingTable.PhasingTableEntry pte : sampleHaps) {
-            if (pte != maxEntry)
-                sumErrorProbs.plusEqual(pte.getScore());
+        //
+        //
+        if (numInconsistentIterations / (double) numHighQualityIterations >= 0.1) {
+            //
+            // ????
+            // NEED TO CHANGE phaseSite() to always output PQ field, EVEN if it's LESS THAN threshold ??????
+            // ????
+            //
+            repPhaseQuality *= -1;
        }
-        double phaseQuality = -10.0 * (sumErrorProbs.getLog10Value());
+        //
+        //

-        logger.debug("MAX hap:\t" + maxEntry.getHaplotypeClass() + "\tposteriorProb:\t" + posteriorProb + "\tphaseQuality:\t" + phaseQuality);
+        return new PhaseResult(maxHapQual.maxEntry.getHaplotypeClass().getRepresentative(), repPhaseQuality);
+    }

-        return new PhaseResult(maxEntry.getHaplotypeClass().getRepresentative(), phaseQuality);
+    private static class MaxHaplotypeAndQuality {
+        public PhasingTable.PhasingTableEntry maxEntry;
+        public double phaseQuality;
+
+        public MaxHaplotypeAndQuality(PhasingTable hapTable, boolean printDebug) {
+            // Marginalize each haplotype to its first 2 positions:
+            hapTable = HaplotypeTableCreator.marginalizeAsNewTable(hapTable);
+            if (printDebug)
+                logger.debug("\nPhasing table [AFTER MAPPING]:\n" + hapTable + "\n");
+
+            // Determine the phase at this position:
+            hapTable.normalizeScores();
+            if (printDebug)
+                logger.debug("\nPhasing table [AFTER NORMALIZATION]:\n" + hapTable + "\n");
+
+            this.maxEntry = hapTable.maxEntry();
+
+            // convert posteriorProb to PHRED scale, but do NOT cap the quality as in QualityUtils.probToQual(posteriorProb):
+            this.phaseQuality = getPhasingQuality(hapTable, maxEntry);
+        }
+
+        // Returns the PQ of entry (within table hapTable, which MUST be normalized):
+        public static double getPhasingQuality(PhasingTable hapTable, PhasingTable.PhasingTableEntry entry) {
+            PreciseNonNegativeDouble sumErrorProbs = new PreciseNonNegativeDouble(ZERO);
+            for (PhasingTable.PhasingTableEntry pte : hapTable) {
+                if (pte != entry)
+                    sumErrorProbs.plusEqual(pte.getScore());
+            }
+            return -10.0 * (sumErrorProbs.getLog10Value());
+        }
    }

    /*
        Ensure that curBiall is phased relative to prevBiall as specified by hap.
     */
-
-    public static void ensurePhasing(BialleleSNP curBiall, BialleleSNP prevBiall, Haplotype hap) {
+    public static void ensurePhasing(SNPallelePair curBiall, SNPallelePair prevBiall, Haplotype hap) {
        if (hap.size() < 2)
            throw new ReviewedStingException("LOGICAL ERROR: Only considering haplotypes of length > 2!");

@ -784,7 +857,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
        byte refBase;
        if (!vc.isIndel()) {
            Allele varAllele = vc.getReference();
-            refBase = BialleleSNP.getSingleBase(varAllele);
+            refBase = SNPallelePair.getSingleBase(varAllele);
        }
        else {
            refBase = vc.getReferenceBaseForIndel();
@ -1023,14 +1096,14 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
                byte[] hapBases = new byte[numSites];
                for (int i = 0; i < numSites; i++) {
                    Allele alleleI = genotypes[i].getAllele(alleleInds[i]);
-                    hapBases[i] = BialleleSNP.getSingleBase(alleleI);
+                    hapBases[i] = SNPallelePair.getSingleBase(alleleI);
                }
                allHaps.add(new Haplotype(hapBases));
            }
            return allHaps;
        }

-        public static PhasingTable marginalizeTable(PhasingTable table) {
+        public static PhasingTable marginalizeAsNewTable(PhasingTable table) {
            Map<Haplotype, PreciseNonNegativeDouble> hapMap = new TreeMap<Haplotype, PreciseNonNegativeDouble>();
            for (PhasingTable.PhasingTableEntry pte : table) {
                Haplotype rep = pte.getHaplotypeClass().getRepresentative();
@ -1056,23 +1129,23 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
    }

    private static class BiallelicComplementHaplotypeTableCreator extends HaplotypeTableCreator {
-        private BialleleSNP[] bialleleSNPs;
+        private SNPallelePair[] bialleleSNPs;
        private int startIndex;
        private int marginalizeLength;

        public BiallelicComplementHaplotypeTableCreator(Genotype[] hetGenotypes, int startIndex, int marginalizeLength) {
            super(hetGenotypes);

-            this.bialleleSNPs = new BialleleSNP[genotypes.length];
+            this.bialleleSNPs = new SNPallelePair[genotypes.length];
            for (int i = 0; i < genotypes.length; i++)
-                bialleleSNPs[i] = new BialleleSNP(genotypes[i]);
+                bialleleSNPs[i] = new SNPallelePair(genotypes[i]);

            this.startIndex = startIndex;
            this.marginalizeLength = marginalizeLength;
        }

        public PhasingTable getNewTable() {
-            double hapClassPrior = 1.0; // can change later
+            double hapClassPrior = 1.0; // can change later, BUT see NOTE above in removeExtraneousReads()

            PhasingTable table = new PhasingTable();
            for (Haplotype hap : getAllHaplotypes()) {
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/SNPallelePair.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/phasing/SNPallelePair.java
@ -28,15 +28,15 @@ import org.broad.tribble.util.variantcontext.Genotype;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

-public class BialleleSNP extends Biallele {
+public class SNPallelePair extends AllelePair {

-    public BialleleSNP(Genotype gt) {
+    public SNPallelePair(Genotype gt) {
        super(gt);

        if (getTopAllele().getBases().length != 1)
-            throw new ReviewedStingException("LOGICAL ERROR: BialleleSNP may not contain non-SNP site!");
+            throw new ReviewedStingException("LOGICAL ERROR: SNPallelePair may not contain non-SNP site!");
        if (getBottomAllele().getBases().length != 1)
-            throw new ReviewedStingException("LOGICAL ERROR: BialleleSNP may not contain non-SNP site!");
+            throw new ReviewedStingException("LOGICAL ERROR: SNPallelePair may not contain non-SNP site!");
    }

    public byte getTopBase() {