Renamed and documented some phasing-specific classes to make their purpose clearer to someone browing through the code

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4989 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
fromer 2011-01-13 16:17:36 +00:00
parent 6b5474a00a
commit e8f0ae4b09
4 changed files with 71 additions and 60 deletions

View File

@ -25,12 +25,17 @@ package org.broadinstitute.sting.gatk.walkers.phasing;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
public class DoublyLinkedList<E> { /*
private DoublyLinkedNode<E> first; DoublyLinkedList class is a doubly-linked list, which allows O(1) traversal to next and previous elements in the list.
private DoublyLinkedNode<E> last; It is UNIQUE in the fact that its iterator (BidirectionalIterator) can be cloned
to save the current pointer for a later time (while the original iterator can continue to iterate).
*/
public class CloneableIteratorLinkedList<E> {
private CloneableIteratorDoublyLinkedNode<E> first;
private CloneableIteratorDoublyLinkedNode<E> last;
private int size; private int size;
public DoublyLinkedList() { public CloneableIteratorLinkedList() {
this.first = null; this.first = null;
this.last = null; this.last = null;
this.size = 0; this.size = 0;
@ -45,7 +50,7 @@ public class DoublyLinkedList<E> {
} }
public void addFirst(E e) { public void addFirst(E e) {
DoublyLinkedNode<E> newNode = new DoublyLinkedNode<E>(e); CloneableIteratorDoublyLinkedNode<E> newNode = new CloneableIteratorDoublyLinkedNode<E>(e);
if (isEmpty()) if (isEmpty())
last = newNode; last = newNode;
@ -59,7 +64,7 @@ public class DoublyLinkedList<E> {
} }
public void addLast(E e) { public void addLast(E e) {
DoublyLinkedNode<E> newNode = new DoublyLinkedNode<E>(e); CloneableIteratorDoublyLinkedNode<E> newNode = new CloneableIteratorDoublyLinkedNode<E>(e);
if (isEmpty()) if (isEmpty())
first = newNode; first = newNode;
@ -132,17 +137,17 @@ public class DoublyLinkedList<E> {
return true; return true;
} }
public BidirectionalIterator<E> iterator() { public CloneableIterator<E> iterator() {
return new BidirectionalIterator<E>(this); return new CloneableIterator<E>(this);
} }
private static class DoublyLinkedNode<E> { private static class CloneableIteratorDoublyLinkedNode<E> {
private E element = null; private E element = null;
private DoublyLinkedNode<E> next = null; private CloneableIteratorDoublyLinkedNode<E> next = null;
private DoublyLinkedNode<E> previous = null; private CloneableIteratorDoublyLinkedNode<E> previous = null;
public DoublyLinkedNode(E element) { public CloneableIteratorDoublyLinkedNode(E element) {
this.element = element; this.element = element;
this.next = null; this.next = null;
this.previous = null; this.previous = null;
@ -150,16 +155,19 @@ public class DoublyLinkedList<E> {
} }
public static class BidirectionalIterator<E> implements Cloneable { /*
private DoublyLinkedNode<E> nextNode; This iterator is unique since it can be cloned to save the current pointer for a later time (while the original iterator can continue to iterate).
private DoublyLinkedNode<E> lastNode; */
public static class CloneableIterator<E> implements Cloneable {
private CloneableIteratorDoublyLinkedNode<E> nextNode;
private CloneableIteratorDoublyLinkedNode<E> lastNode;
private BidirectionalIterator(DoublyLinkedNode<E> nextNode, DoublyLinkedNode<E> lastNode) { private CloneableIterator(CloneableIteratorDoublyLinkedNode<E> nextNode, CloneableIteratorDoublyLinkedNode<E> lastNode) {
this.nextNode = nextNode; this.nextNode = nextNode;
this.lastNode = lastNode; this.lastNode = lastNode;
} }
private BidirectionalIterator(DoublyLinkedList<E> list) { private CloneableIterator(CloneableIteratorLinkedList<E> list) {
this(list.first, list.last); this(list.first, list.last);
} }
@ -195,13 +203,13 @@ public class DoublyLinkedList<E> {
return nextNode.element; return nextNode.element;
} }
public BidirectionalIterator<E> clone() { public CloneableIterator<E> clone() {
try { try {
super.clone(); super.clone();
} catch (CloneNotSupportedException e) { } catch (CloneNotSupportedException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
} }
return new BidirectionalIterator<E>(nextNode, lastNode); return new CloneableIterator<E>(nextNode, lastNode);
} }
} }
} }

View File

@ -28,16 +28,16 @@ import org.broadinstitute.sting.utils.DisjointSet;
import java.util.*; import java.util.*;
// Represents an undirected graph with no self-edges: // Represents an undirected graph with no self-edges:
public class Graph implements Iterable<GraphEdge> { public class PhasingGraph implements Iterable<PhasingGraphEdge> {
private Neighbors[] adj; private Neighbors[] adj;
public Graph(int numVertices) { public PhasingGraph(int numVertices) {
adj = new Neighbors[numVertices]; adj = new Neighbors[numVertices];
for (int i = 0; i < numVertices; i++) for (int i = 0; i < numVertices; i++)
adj[i] = new Neighbors(); adj[i] = new Neighbors();
} }
public void addEdge(GraphEdge e) { public void addEdge(PhasingGraphEdge e) {
if (e.v1 == e.v2) // do not permit self-edges if (e.v1 == e.v2) // do not permit self-edges
return; return;
@ -45,20 +45,20 @@ public class Graph implements Iterable<GraphEdge> {
adj[e.v2].addNeighbor(e); adj[e.v2].addNeighbor(e);
} }
public void addEdges(Collection<GraphEdge> edges) { public void addEdges(Collection<PhasingGraphEdge> edges) {
for (GraphEdge e : edges) for (PhasingGraphEdge e : edges)
addEdge(e); addEdge(e);
} }
public void removeEdge(GraphEdge e) { public void removeEdge(PhasingGraphEdge e) {
adj[e.v1].removeNeighbor(e); adj[e.v1].removeNeighbor(e);
adj[e.v2].removeNeighbor(e); adj[e.v2].removeNeighbor(e);
} }
public Collection<GraphEdge> removeAllIncidentEdges(int vertexIndex) { public Collection<PhasingGraphEdge> removeAllIncidentEdges(int vertexIndex) {
Collection<GraphEdge> incidentEdges = new TreeSet<GraphEdge>(adj[vertexIndex].neighbors); // implemented GraphEdge.compareTo() Collection<PhasingGraphEdge> incidentEdges = new TreeSet<PhasingGraphEdge>(adj[vertexIndex].neighbors); // implemented GraphEdge.compareTo()
for (GraphEdge neighbEdge : incidentEdges) { for (PhasingGraphEdge neighbEdge : incidentEdges) {
if (vertexIndex != neighbEdge.v1) // vertexIndex == neighbEdge.v2 if (vertexIndex != neighbEdge.v1) // vertexIndex == neighbEdge.v2
adj[neighbEdge.v1].removeNeighbor(neighbEdge); adj[neighbEdge.v1].removeNeighbor(neighbEdge);
else if (vertexIndex != neighbEdge.v2) // vertexIndex == neighbEdge.v1 else if (vertexIndex != neighbEdge.v2) // vertexIndex == neighbEdge.v1
@ -72,13 +72,13 @@ public class Graph implements Iterable<GraphEdge> {
public DisjointSet getConnectedComponents() { public DisjointSet getConnectedComponents() {
DisjointSet cc = new DisjointSet(adj.length); DisjointSet cc = new DisjointSet(adj.length);
for (GraphEdge e : this) for (PhasingGraphEdge e : this)
cc.setUnion(e.v1, e.v2); cc.setUnion(e.v1, e.v2);
return cc; return cc;
} }
public Iterator<GraphEdge> iterator() { public Iterator<PhasingGraphEdge> iterator() {
return new AllEdgesIterator(); return new AllEdgesIterator();
} }
@ -87,7 +87,7 @@ public class Graph implements Iterable<GraphEdge> {
for (int i = 0; i < adj.length; i++) { for (int i = 0; i < adj.length; i++) {
sb.append(i + ":"); sb.append(i + ":");
for (GraphEdge e : adj[i]) { for (PhasingGraphEdge e : adj[i]) {
sb.append(" " + (e.v1 == i ? e.v2 : e.v1)); sb.append(" " + (e.v1 == i ? e.v2 : e.v1));
} }
sb.append("\n"); sb.append("\n");
@ -96,10 +96,10 @@ public class Graph implements Iterable<GraphEdge> {
return sb.toString(); return sb.toString();
} }
private class AllEdgesIterator implements Iterator<GraphEdge> { private class AllEdgesIterator implements Iterator<PhasingGraphEdge> {
private int curInd; private int curInd;
private Iterator<GraphEdge> innerIt; private Iterator<PhasingGraphEdge> innerIt;
private GraphEdge nextEdge; private PhasingGraphEdge nextEdge;
public AllEdgesIterator() { public AllEdgesIterator() {
curInd = 0; curInd = 0;
@ -116,7 +116,7 @@ public class Graph implements Iterable<GraphEdge> {
innerIt = adj[curInd].iterator(); innerIt = adj[curInd].iterator();
while (innerIt.hasNext()) { while (innerIt.hasNext()) {
GraphEdge e = innerIt.next(); PhasingGraphEdge e = innerIt.next();
if (e.v1 == curInd) { // only want to see each edge once if (e.v1 == curInd) { // only want to see each edge once
nextEdge = e; nextEdge = e;
return true; return true;
@ -129,11 +129,11 @@ public class Graph implements Iterable<GraphEdge> {
return false; return false;
} }
public GraphEdge next() { public PhasingGraphEdge next() {
if (!hasNext()) if (!hasNext())
throw new NoSuchElementException(); throw new NoSuchElementException();
GraphEdge tmpEdge = nextEdge; PhasingGraphEdge tmpEdge = nextEdge;
nextEdge = null; nextEdge = null;
return tmpEdge; return tmpEdge;
} }
@ -143,22 +143,22 @@ public class Graph implements Iterable<GraphEdge> {
} }
} }
private class Neighbors implements Iterable<GraphEdge> { private class Neighbors implements Iterable<PhasingGraphEdge> {
private Set<GraphEdge> neighbors; private Set<PhasingGraphEdge> neighbors;
public Neighbors() { public Neighbors() {
this.neighbors = new TreeSet<GraphEdge>(); // implemented GraphEdge.compareTo() this.neighbors = new TreeSet<PhasingGraphEdge>(); // implemented GraphEdge.compareTo()
} }
public void addNeighbor(GraphEdge e) { public void addNeighbor(PhasingGraphEdge e) {
neighbors.add(e); neighbors.add(e);
} }
public void removeNeighbor(GraphEdge e) { public void removeNeighbor(PhasingGraphEdge e) {
neighbors.remove(e); neighbors.remove(e);
} }
public Iterator<GraphEdge> iterator() { public Iterator<PhasingGraphEdge> iterator() {
return neighbors.iterator(); return neighbors.iterator();
} }

View File

@ -23,11 +23,14 @@
*/ */
package org.broadinstitute.sting.gatk.walkers.phasing; package org.broadinstitute.sting.gatk.walkers.phasing;
public class GraphEdge implements Comparable<GraphEdge> { /*
Edge class for PhasingGraph
*/
public class PhasingGraphEdge implements Comparable<PhasingGraphEdge> {
protected int v1; protected int v1;
protected int v2; protected int v2;
public GraphEdge(int v1, int v2) { public PhasingGraphEdge(int v1, int v2) {
this.v1 = v1; this.v1 = v1;
this.v2 = v2; this.v2 = v2;
} }
@ -40,7 +43,7 @@ public class GraphEdge implements Comparable<GraphEdge> {
return v2; return v2;
} }
public int compareTo(GraphEdge that) { public int compareTo(PhasingGraphEdge that) {
if (this.v1 != that.v1) if (this.v1 != that.v1)
return (this.v1 - that.v1); return (this.v1 - that.v1);
@ -48,7 +51,7 @@ public class GraphEdge implements Comparable<GraphEdge> {
return (this.v2 - that.v2); return (this.v2 - that.v2);
} }
public boolean equals(GraphEdge other) { public boolean equals(PhasingGraphEdge other) {
return (this.compareTo(other) == 0); return (this.compareTo(other) == 0);
} }

View File

@ -93,7 +93,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
private GenomeLoc mostDownstreamLocusReached = null; private GenomeLoc mostDownstreamLocusReached = null;
private LinkedList<VariantAndReads> unphasedSiteQueue = null; private LinkedList<VariantAndReads> unphasedSiteQueue = null;
private DoublyLinkedList<UnfinishedVariantAndReads> partiallyPhasedSites = null; // the phased VCs to be emitted, and the alignment bases at these positions private CloneableIteratorLinkedList<UnfinishedVariantAndReads> partiallyPhasedSites = null; // the phased VCs to be emitted, and the alignment bases at these positions
private static PreciseNonNegativeDouble ZERO = new PreciseNonNegativeDouble(0.0); private static PreciseNonNegativeDouble ZERO = new PreciseNonNegativeDouble(0.0);
@ -135,7 +135,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
MIN_MAPPING_QUALITY_SCORE = Math.max(MIN_MAPPING_QUALITY_SCORE, MIN_BASE_QUALITY_SCORE); MIN_MAPPING_QUALITY_SCORE = Math.max(MIN_MAPPING_QUALITY_SCORE, MIN_BASE_QUALITY_SCORE);
unphasedSiteQueue = new LinkedList<VariantAndReads>(); unphasedSiteQueue = new LinkedList<VariantAndReads>();
partiallyPhasedSites = new DoublyLinkedList<UnfinishedVariantAndReads>(); partiallyPhasedSites = new CloneableIteratorLinkedList<UnfinishedVariantAndReads>();
initializeVcfWriter(); initializeVcfWriter();
@ -340,7 +340,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
SNPallelePair allelePair = new SNPallelePair(gt); SNPallelePair allelePair = new SNPallelePair(gt);
if (DEBUG) logger.debug("Want to phase TOP vs. BOTTOM for: " + "\n" + allelePair); if (DEBUG) logger.debug("Want to phase TOP vs. BOTTOM for: " + "\n" + allelePair);
DoublyLinkedList.BidirectionalIterator<UnfinishedVariantAndReads> prevHetAndInteriorIt = phaseWindow.prevHetAndInteriorIt; CloneableIteratorLinkedList.CloneableIterator<UnfinishedVariantAndReads> prevHetAndInteriorIt = phaseWindow.prevHetAndInteriorIt;
/* Notes: /* Notes:
1. Call to next() advances iterator to next position in partiallyPhasedSites. 1. Call to next() advances iterator to next position in partiallyPhasedSites.
2. prevHetGenotype != null, since otherwise prevHetAndInteriorIt would not have been chosen to point to its UnfinishedVariantAndReads. 2. prevHetGenotype != null, since otherwise prevHetAndInteriorIt would not have been chosen to point to its UnfinishedVariantAndReads.
@ -438,7 +438,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
private class PhasingWindow { private class PhasingWindow {
private Genotype[] hetGenotypes = null; private Genotype[] hetGenotypes = null;
private DoublyLinkedList.BidirectionalIterator<UnfinishedVariantAndReads> prevHetAndInteriorIt = null; private CloneableIteratorLinkedList.CloneableIterator<UnfinishedVariantAndReads> prevHetAndInteriorIt = null;
private int phasingSiteIndex = -1; private int phasingSiteIndex = -1;
private Map<String, PhasingRead> readsAtHetSites = null; private Map<String, PhasingRead> readsAtHetSites = null;
@ -452,7 +452,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
List<GenotypeAndReadBases> listHetGenotypes = new LinkedList<GenotypeAndReadBases>(); List<GenotypeAndReadBases> listHetGenotypes = new LinkedList<GenotypeAndReadBases>();
// Include previously phased sites in the phasing computation: // Include previously phased sites in the phasing computation:
DoublyLinkedList.BidirectionalIterator<UnfinishedVariantAndReads> phasedIt = partiallyPhasedSites.iterator(); CloneableIteratorLinkedList.CloneableIterator<UnfinishedVariantAndReads> phasedIt = partiallyPhasedSites.iterator();
while (phasedIt.hasNext()) { while (phasedIt.hasNext()) {
UnfinishedVariantAndReads phasedVr = phasedIt.next(); UnfinishedVariantAndReads phasedVr = phasedIt.next();
Genotype gt = phasedVr.unfinishedVariant.getGenotype(sample); Genotype gt = phasedVr.unfinishedVariant.getGenotype(sample);
@ -577,13 +577,13 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
} }
private class EdgeToReads { private class EdgeToReads {
private TreeMap<GraphEdge, List<String>> edgeReads; private TreeMap<PhasingGraphEdge, List<String>> edgeReads;
public EdgeToReads() { public EdgeToReads() {
this.edgeReads = new TreeMap<GraphEdge, List<String>>(); // implemented GraphEdge.compareTo() this.edgeReads = new TreeMap<PhasingGraphEdge, List<String>>(); // implemented GraphEdge.compareTo()
} }
public void addRead(GraphEdge e, String readName) { public void addRead(PhasingGraphEdge e, String readName) {
List<String> reads = edgeReads.get(e); List<String> reads = edgeReads.get(e);
if (reads == null) { if (reads == null) {
reads = new LinkedList<String>(); reads = new LinkedList<String>();
@ -592,7 +592,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
reads.add(readName); reads.add(readName);
} }
public List<String> getReads(GraphEdge e) { public List<String> getReads(PhasingGraphEdge e) {
return edgeReads.get(e); return edgeReads.get(e);
} }
} }
@ -622,7 +622,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
} }
public Set<String> removeExtraneousReads(int numHetSites) { public Set<String> removeExtraneousReads(int numHetSites) {
Graph readGraph = new Graph(numHetSites); PhasingGraph readGraph = new PhasingGraph(numHetSites);
EdgeToReads edgeToReads = new EdgeToReads(); EdgeToReads edgeToReads = new EdgeToReads();
Set<Integer> sitesWithEdges = new TreeSet<Integer>(); Set<Integer> sitesWithEdges = new TreeSet<Integer>();
@ -634,7 +634,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
// Connect each pair of non-null sites in rd: // Connect each pair of non-null sites in rd:
for (int i = 0; i < siteInds.length; i++) { for (int i = 0; i < siteInds.length; i++) {
for (int j = i + 1; j < siteInds.length; j++) { for (int j = i + 1; j < siteInds.length; j++) {
GraphEdge e = new GraphEdge(siteInds[i], siteInds[j]); PhasingGraphEdge e = new PhasingGraphEdge(siteInds[i], siteInds[j]);
if (DEBUG) logger.debug("Read = " + rdName + " is adding edge: " + e); if (DEBUG) logger.debug("Read = " + rdName + " is adding edge: " + e);
readGraph.addEdge(e); readGraph.addEdge(e);
@ -713,7 +713,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
if (DEBUG) logger.debug("Calculating CC after removing edges of site: " + i); if (DEBUG) logger.debug("Calculating CC after removing edges of site: " + i);
// Remove all edges incident to i and see which positions have paths to prev and cur: // Remove all edges incident to i and see which positions have paths to prev and cur:
Collection<GraphEdge> removedEdges = readGraph.removeAllIncidentEdges(i); Collection<PhasingGraphEdge> removedEdges = readGraph.removeAllIncidentEdges(i);
// Run-time for efficiently calculating connected components using DisjointSet: O(E) // Run-time for efficiently calculating connected components using DisjointSet: O(E)
DisjointSet ccAfterRemove = readGraph.getConnectedComponents(); DisjointSet ccAfterRemove = readGraph.getConnectedComponents();
@ -727,7 +727,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
readGraph.addEdges(removedEdges); readGraph.addEdges(removedEdges);
} }
for (GraphEdge e : readGraph) { for (PhasingGraphEdge e : readGraph) {
if (DEBUG) logger.debug("Testing the path-connectivity of Edge: " + e); if (DEBUG) logger.debug("Testing the path-connectivity of Edge: " + e);
/* Edge e={v1,v2} contributes a path between prev and cur for testRead iff: /* Edge e={v1,v2} contributes a path between prev and cur for testRead iff: