Updated Phasing algorithm + evaluation module to properly implement haplotypes [including homozygous genotypes]; Implemented dynamic window phasing model for LARGE increase in efficiency

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4332 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
fromer 2010-09-22 21:29:58 +00:00
parent 192757d1e0
commit 44ccfc3531
6 changed files with 1123 additions and 249 deletions

View File

@ -10,6 +10,7 @@ import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
import org.broadinstitute.sting.playground.utils.report.utils.TableType;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MathUtils;
import java.util.*;
@ -104,9 +105,9 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
if (evalSampGenotypes != null)
evalSampGt = evalSampGenotypes.get(samp);
if (compSampGt == null || evalSampGt == null) {
// Having a hom site (or an unphased het site) breaks the phasing for the sample - hence, must reset phasing knowledge for both comp and eval [put a null CompEvalGenotypes]:
if ((compSampGt != null && !permitsTransitivePhasing(compSampGt)) || (evalSampGt != null && !permitsTransitivePhasing(evalSampGt)))
if (compSampGt == null || evalSampGt == null) { // Since either comp or eval (or both) are missing the site, the best we can do is hope to preserve phase [if the non-missing one preserves phase]
// Having an unphased site breaks the phasing for the sample [does NOT permit "transitive phasing"] - hence, must reset phasing knowledge for both comp and eval [put a null CompEvalGenotypes]:
if (isNonNullButUnphased(compSampGt) || isNonNullButUnphased(evalSampGt))
samplePrevGenotypes.put(samp, null);
}
else { // Both comp and eval have a non-null Genotype at this site:
@ -114,9 +115,9 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
Biallele evalBiallele = new Biallele(evalSampGt);
boolean breakPhasing = false;
if (!compSampGt.isHet() || !evalSampGt.isHet())
breakPhasing = true;
else { // both are het
if (compSampGt.isHet() != evalSampGt.isHet() || compSampGt.isHom() != evalSampGt.isHom())
breakPhasing = true; // since they are not both het or both hom
else { // both are het, or both are hom:
boolean topMatchesTopAndBottomMatchesBottom = (topMatchesTop(compBiallele, evalBiallele) && bottomMatchesBottom(compBiallele, evalBiallele));
boolean topMatchesBottomAndBottomMatchesTop = (topMatchesBottom(compBiallele, evalBiallele) && bottomMatchesTop(compBiallele, evalBiallele));
if (!topMatchesTopAndBottomMatchesBottom && !topMatchesBottomAndBottomMatchesTop)
@ -126,16 +127,19 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
if (breakPhasing) {
samplePrevGenotypes.put(samp, null); // nothing to do for this site, AND must remove any history for the future
}
else { // comp and eval have the same het Genotype at this site:
else if (compSampGt.isHet() && evalSampGt.isHet()) {
/* comp and eval have the HET same Genotype at this site:
[Note that if both are hom, then nothing is done here, but the het history IS preserved].
*/
CompEvalGenotypes prevCompAndEval = samplePrevGenotypes.get(samp);
if (prevCompAndEval != null && !prevCompAndEval.getLocus().onSameContig(curLocus)) // exclude curLocus if it is "phased" relative to a different chromosome
prevCompAndEval = null;
// Replace the previous with current:
// Replace the previous hets with the current hets:
samplePrevGenotypes.put(samp, curLocus, compSampGt, evalSampGt);
if (prevCompAndEval != null) {
logger.debug("Potentially phaseable locus: " + curLocus);
logger.debug("Potentially phaseable het locus: " + curLocus + " [relative to previous het locus: " + prevCompAndEval.getLocus() + "]");
PhaseStats ps = samplePhasingStatistics.ensureSampleStats(samp);
boolean compSampIsPhased = genotypesArePhasedAboveThreshold(compSampGt);
@ -149,7 +153,7 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
ps.onlyEvalPhased++;
interesting.addReason("ONLY_EVAL", samp, group, "");
}
else {
else { // both comp and eval are phased:
Biallele prevCompBiallele = new Biallele(prevCompAndEval.getCompGenotpye());
Biallele prevEvalBiallele = new Biallele(prevCompAndEval.getEvalGenotype());
@ -159,6 +163,11 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
if (topsMatch || topMatchesBottom) {
ps.phasesAgree++;
Double compPQ = getPQ(compSampGt);
Double evalPQ = getPQ(evalSampGt);
if (compPQ != null && evalPQ != null && MathUtils.compareDoubles(compPQ, evalPQ) != 0)
interesting.addReason("PQ_CHANGE", samp, group, compPQ + " -> " + evalPQ);
}
else {
ps.phasesDisagree++;
@ -183,16 +192,24 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
return (vc != null && !vc.isFiltered());
}
public boolean isNonNullButUnphased(Genotype gt) {
return (gt != null && !genotypesArePhasedAboveThreshold(gt));
}
public boolean genotypesArePhasedAboveThreshold(Genotype gt) {
if (!gt.genotypesArePhased())
return false;
Object pq = gt.getAttributes().get("PQ");
return (pq == null || (new Double(pq.toString()) >= getVEWalker().minPhaseQuality));
Double pq = getPQ(gt);
return (pq == null || pq >= getVEWalker().minPhaseQuality);
}
public boolean permitsTransitivePhasing(Genotype gt) {
return (gt != null && gt.isHet() && genotypesArePhasedAboveThreshold(gt)); // only a phased het site lets the phase pass through
public static Double getPQ(Genotype gt) {
Object pq = gt.getAttributes().get("PQ");
if (pq == null)
return null;
return new Double(pq.toString());
}
public boolean topMatchesTop(Biallele b1, Biallele b2) {

View File

@ -0,0 +1,102 @@
package org.broadinstitute.sting.playground.gatk.walkers.phasing;
import java.util.LinkedList;
import java.util.List;
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
public class DisjointSet {
private ItemNode[] nodes;
public DisjointSet(int numItems) {
this.nodes = new ItemNode[numItems];
for (int i = 0; i < numItems; i++)
this.nodes[i] = new ItemNode(i);
}
public int findSet(int itemIndex) {
// Find itemIndex's root Node:
ItemNode curNode = nodes[itemIndex];
while (curNode.parent != null)
curNode = curNode.parent;
ItemNode root = curNode;
// Perform path compression:
curNode = nodes[itemIndex];
while (curNode != root) {
ItemNode next = curNode.parent;
curNode.parent = root;
curNode = next;
}
return root.itemIndex;
}
public boolean inSameSet(int x, int y) {
return (findSet(x) == findSet(y));
}
public List<Integer> inSameSetAs(int x, int[] testSet) {
List<Integer> sameSetInds = new LinkedList<Integer>();
int xSet = findSet(x);
for (int t : testSet) {
if (findSet(t) == xSet)
sameSetInds.add(t);
}
return sameSetInds;
}
public void setUnion(int x, int y) {
link(findSet(x), findSet(y));
}
private void link(int x, int y) {
if (x == y)
return;
// Union by rank:
if (nodes[x].rank > nodes[y].rank) {
nodes[y].parent = nodes[x];
}
else { // nodes[x].rank <= nodes[y].rank
nodes[x].parent = nodes[y];
if (nodes[x].rank == nodes[y].rank)
nodes[y].rank++;
}
}
private class ItemNode {
private int itemIndex;
private ItemNode parent;
private int rank;
public ItemNode(int itemIndex) {
this.itemIndex = itemIndex;
this.parent = null;
this.rank = 0;
}
}
}

View File

@ -0,0 +1,208 @@
package org.broadinstitute.sting.playground.gatk.walkers.phasing;
import java.util.NoSuchElementException;
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
public class DoublyLinkedList<E> {
private DoublyLinkedNode<E> first;
private DoublyLinkedNode<E> last;
private int size;
public DoublyLinkedList() {
this.first = null;
this.last = null;
this.size = 0;
}
public boolean isEmpty() {
return first == null;
}
public int size() {
return size;
}
public void addFirst(E e) {
DoublyLinkedNode<E> newNode = new DoublyLinkedNode<E>(e);
if (isEmpty())
last = newNode;
else {
first.previous = newNode;
newNode.next = first;
}
first = newNode;
size++;
}
public void addLast(E e) {
DoublyLinkedNode<E> newNode = new DoublyLinkedNode<E>(e);
if (isEmpty())
first = newNode;
else {
last.next = newNode;
newNode.previous = last;
}
last = newNode;
size++;
}
public E removeFirst() {
if (isEmpty())
throw new NoSuchElementException();
E e = first.element;
if (first.next == null)
last = null;
else
first.next.previous = null;
first = first.next;
size--;
return e;
}
public E removeLast() {
if (isEmpty())
throw new NoSuchElementException();
E e = last.element;
if (last.previous == null)
first = null;
else
last.previous.next = null;
last = last.previous;
size--;
return e;
}
public E getFirst() {
if (isEmpty())
throw new NoSuchElementException();
return first.element;
}
public E getLast() {
if (isEmpty())
throw new NoSuchElementException();
return last.element;
}
public E peek() {
if (isEmpty())
return null;
return getFirst();
}
public E remove() {
return removeFirst();
}
public boolean add(E e) {
addLast(e);
return true;
}
public BidirectionalIterator<E> iterator() {
return new BidirectionalIterator<E>(this);
}
private static class DoublyLinkedNode<E> {
private E element = null;
private DoublyLinkedNode<E> next = null;
private DoublyLinkedNode<E> previous = null;
public DoublyLinkedNode(E element) {
this.element = element;
this.next = null;
this.previous = null;
}
}
public static class BidirectionalIterator<E> implements Cloneable {
private DoublyLinkedNode<E> nextNode;
private DoublyLinkedNode<E> lastNode;
private BidirectionalIterator(DoublyLinkedNode<E> nextNode, DoublyLinkedNode<E> lastNode) {
this.nextNode = nextNode;
this.lastNode = lastNode;
}
private BidirectionalIterator(DoublyLinkedList<E> list) {
this(list.first, list.last);
}
public boolean hasNext() {
return nextNode != null;
}
public E next() {
if (!hasNext())
throw new NoSuchElementException();
E e = nextNode.element;
nextNode = nextNode.next;
return e;
}
public boolean hasPrevious() {
if (nextNode != null)
return nextNode.previous != null;
return lastNode != null;
}
public E previous() {
if (!hasPrevious())
throw new NoSuchElementException();
if (nextNode != null)
nextNode = nextNode.previous;
else
nextNode = lastNode;
return nextNode.element;
}
public BidirectionalIterator<E> clone() {
try {
super.clone();
} catch (CloneNotSupportedException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
return new BidirectionalIterator<E>(nextNode, lastNode);
}
}
}

View File

@ -0,0 +1,137 @@
package org.broadinstitute.sting.playground.gatk.walkers.phasing;
import java.util.*;
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
public class Graph implements Iterable<GraphEdge> {
private Neighbors[] adj;
public Graph(int numVertices) {
adj = new Neighbors[numVertices];
for (int i = 0; i < numVertices; i++)
adj[i] = new Neighbors();
}
public void addEdge(GraphEdge e) {
adj[e.v1].addNeighbor(e);
adj[e.v2].addNeighbor(e);
}
public void removeEdge(GraphEdge e) {
adj[e.v1].removeNeighbor(e);
adj[e.v2].removeNeighbor(e);
}
public DisjointSet getConnectedComponents() {
DisjointSet cc = new DisjointSet(adj.length);
for (int i = 0; i < adj.length; i++)
for (GraphEdge e : adj[i])
cc.setUnion(e.v1, e.v2);
return cc;
}
// Note that this will give each edge TWICE [since e=(v1,v2) is stored as a neighbor for both v1 and v2]
public Iterator<GraphEdge> iterator() {
return new AllEdgesIterator();
}
public List<GraphEdge> getAllEdges() {
Set<GraphEdge> allEdges = new TreeSet<GraphEdge>(); // implemented GraphEdge.compareTo()
for (GraphEdge e : this)
allEdges.add(e);
return new LinkedList<GraphEdge>(allEdges);
}
public String toString() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < adj.length; i++) {
sb.append(i + ":");
for (GraphEdge e : adj[i])
sb.append(" " + e);
sb.append("\n");
}
return sb.toString();
}
private class AllEdgesIterator implements Iterator<GraphEdge> {
private int curInd;
private Iterator<GraphEdge> innerIt;
public AllEdgesIterator() {
curInd = 0;
innerIt = null;
}
public boolean hasNext() {
for (; curInd < adj.length; curInd++) {
if (innerIt == null)
innerIt = adj[curInd].iterator();
if (innerIt.hasNext())
return true;
innerIt = null;
}
return false;
}
public GraphEdge next() {
if (!hasNext())
throw new NoSuchElementException();
return innerIt.next();
}
public void remove() {
throw new UnsupportedOperationException();
}
}
private class Neighbors implements Iterable<GraphEdge> {
private Set<GraphEdge> neighbors;
public Neighbors() {
this.neighbors = new TreeSet<GraphEdge>(); // implemented GraphEdge.compareTo()
}
public void addNeighbor(GraphEdge e) {
neighbors.add(e);
}
public void removeNeighbor(GraphEdge e) {
neighbors.remove(e);
}
public Iterator<GraphEdge> iterator() {
return neighbors.iterator();
}
}
}

View File

@ -0,0 +1,61 @@
package org.broadinstitute.sting.playground.gatk.walkers.phasing;
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
public class GraphEdge implements Comparable<GraphEdge> {
protected int v1;
protected int v2;
public GraphEdge(int v1, int v2) {
this.v1 = v1;
this.v2 = v2;
}
public int getV1() {
return v1;
}
public int getV2() {
return v2;
}
public int compareTo(GraphEdge that) {
if (this.v1 != that.v1)
return (this.v1 - that.v1);
// this.v1 == that.v1:
return (this.v2 - that.v2);
}
public boolean equals(GraphEdge other) {
return (this.compareTo(other) == 0);
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("(").append(v1).append(", ").append(v2).append(")");
return sb.toString();
}
}