Fixed phasing algorithm to: 1. More correctly weed out irrelevant reads and sites; 2. Crudely flag sites with large phase discrepancies betweens reads
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4368 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
5a5c72c80d
commit
8d8980e8eb
|
|
@ -111,15 +111,15 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
|
||||||
samplePrevGenotypes.put(samp, null);
|
samplePrevGenotypes.put(samp, null);
|
||||||
}
|
}
|
||||||
else { // Both comp and eval have a non-null Genotype at this site:
|
else { // Both comp and eval have a non-null Genotype at this site:
|
||||||
Biallele compBiallele = new Biallele(compSampGt);
|
AllelePair compAllelePair = new AllelePair(compSampGt);
|
||||||
Biallele evalBiallele = new Biallele(evalSampGt);
|
AllelePair evalAllelePair = new AllelePair(evalSampGt);
|
||||||
|
|
||||||
boolean breakPhasing = false;
|
boolean breakPhasing = false;
|
||||||
if (compSampGt.isHet() != evalSampGt.isHet() || compSampGt.isHom() != evalSampGt.isHom())
|
if (compSampGt.isHet() != evalSampGt.isHet() || compSampGt.isHom() != evalSampGt.isHom())
|
||||||
breakPhasing = true; // since they are not both het or both hom
|
breakPhasing = true; // since they are not both het or both hom
|
||||||
else { // both are het, or both are hom:
|
else { // both are het, or both are hom:
|
||||||
boolean topMatchesTopAndBottomMatchesBottom = (topMatchesTop(compBiallele, evalBiallele) && bottomMatchesBottom(compBiallele, evalBiallele));
|
boolean topMatchesTopAndBottomMatchesBottom = (topMatchesTop(compAllelePair, evalAllelePair) && bottomMatchesBottom(compAllelePair, evalAllelePair));
|
||||||
boolean topMatchesBottomAndBottomMatchesTop = (topMatchesBottom(compBiallele, evalBiallele) && bottomMatchesTop(compBiallele, evalBiallele));
|
boolean topMatchesBottomAndBottomMatchesTop = (topMatchesBottom(compAllelePair, evalAllelePair) && bottomMatchesTop(compAllelePair, evalAllelePair));
|
||||||
if (!topMatchesTopAndBottomMatchesBottom && !topMatchesBottomAndBottomMatchesTop)
|
if (!topMatchesTopAndBottomMatchesBottom && !topMatchesBottomAndBottomMatchesTop)
|
||||||
breakPhasing = true; // since the 2 VCFs have different diploid genotypes for this sample
|
breakPhasing = true; // since the 2 VCFs have different diploid genotypes for this sample
|
||||||
}
|
}
|
||||||
|
|
@ -154,12 +154,12 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
|
||||||
interesting.addReason("ONLY_EVAL", samp, group, "");
|
interesting.addReason("ONLY_EVAL", samp, group, "");
|
||||||
}
|
}
|
||||||
else { // both comp and eval are phased:
|
else { // both comp and eval are phased:
|
||||||
Biallele prevCompBiallele = new Biallele(prevCompAndEval.getCompGenotpye());
|
AllelePair prevCompAllelePair = new AllelePair(prevCompAndEval.getCompGenotpye());
|
||||||
Biallele prevEvalBiallele = new Biallele(prevCompAndEval.getEvalGenotype());
|
AllelePair prevEvalAllelePair = new AllelePair(prevCompAndEval.getEvalGenotype());
|
||||||
|
|
||||||
// Sufficient to check only the top of comp, since we ensured that comp and eval have the same diploid genotypes for this sample:
|
// Sufficient to check only the top of comp, since we ensured that comp and eval have the same diploid genotypes for this sample:
|
||||||
boolean topsMatch = (topMatchesTop(prevCompBiallele, prevEvalBiallele) && topMatchesTop(compBiallele, evalBiallele));
|
boolean topsMatch = (topMatchesTop(prevCompAllelePair, prevEvalAllelePair) && topMatchesTop(compAllelePair, evalAllelePair));
|
||||||
boolean topMatchesBottom = (topMatchesBottom(prevCompBiallele, prevEvalBiallele) && topMatchesBottom(compBiallele, evalBiallele));
|
boolean topMatchesBottom = (topMatchesBottom(prevCompAllelePair, prevEvalAllelePair) && topMatchesBottom(compAllelePair, evalAllelePair));
|
||||||
|
|
||||||
if (topsMatch || topMatchesBottom) {
|
if (topsMatch || topMatchesBottom) {
|
||||||
ps.phasesAgree++;
|
ps.phasesAgree++;
|
||||||
|
|
@ -172,7 +172,7 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
|
||||||
else {
|
else {
|
||||||
ps.phasesDisagree++;
|
ps.phasesDisagree++;
|
||||||
logger.debug("SWITCHED locus: " + curLocus);
|
logger.debug("SWITCHED locus: " + curLocus);
|
||||||
interesting.addReason("SWITCH", samp, group, toString(prevCompBiallele, compBiallele) + " -> " + toString(prevEvalBiallele, evalBiallele));
|
interesting.addReason("SWITCH", samp, group, toString(prevCompAllelePair, compAllelePair) + " -> " + toString(prevEvalAllelePair, evalAllelePair));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -212,23 +212,23 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
|
||||||
return new Double(pq.toString());
|
return new Double(pq.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean topMatchesTop(Biallele b1, Biallele b2) {
|
public boolean topMatchesTop(AllelePair b1, AllelePair b2) {
|
||||||
return b1.getTopAllele().equals(b2.getTopAllele());
|
return b1.getTopAllele().equals(b2.getTopAllele());
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean topMatchesBottom(Biallele b1, Biallele b2) {
|
public boolean topMatchesBottom(AllelePair b1, AllelePair b2) {
|
||||||
return b1.getTopAllele().equals(b2.getBottomAllele());
|
return b1.getTopAllele().equals(b2.getBottomAllele());
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean bottomMatchesTop(Biallele b1, Biallele b2) {
|
public boolean bottomMatchesTop(AllelePair b1, AllelePair b2) {
|
||||||
return topMatchesBottom(b2, b1);
|
return topMatchesBottom(b2, b1);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean bottomMatchesBottom(Biallele b1, Biallele b2) {
|
public boolean bottomMatchesBottom(AllelePair b1, AllelePair b2) {
|
||||||
return b1.getBottomAllele().equals(b2.getBottomAllele());
|
return b1.getBottomAllele().equals(b2.getBottomAllele());
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString(Biallele prev, Biallele cur) {
|
public String toString(AllelePair prev, AllelePair cur) {
|
||||||
return prev.getTopAllele().getBaseString() + "," + cur.getTopAllele().getBaseString() + "|" + prev.getBottomAllele().getBaseString() + "," + cur.getBottomAllele().getBaseString();
|
return prev.getTopAllele().getBaseString() + "," + cur.getTopAllele().getBaseString() + "|" + prev.getBottomAllele().getBaseString() + "," + cur.getBottomAllele().getBaseString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,13 +30,13 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public class Biallele {
|
public class AllelePair {
|
||||||
private Allele top;
|
private Allele top;
|
||||||
private Allele bottom;
|
private Allele bottom;
|
||||||
|
|
||||||
public Biallele(Genotype gt) {
|
public AllelePair(Genotype gt) {
|
||||||
if (gt.getPloidy() != 2)
|
if (gt.getPloidy() != 2)
|
||||||
throw new ReviewedStingException("Biallele must have ploidy of 2!");
|
throw new ReviewedStingException("AllelePair must have ploidy of 2!");
|
||||||
|
|
||||||
this.top = gt.getAllele(0);
|
this.top = gt.getAllele(0);
|
||||||
this.bottom = gt.getAllele(1);
|
this.bottom = gt.getAllele(1);
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.phasing;
|
package org.broadinstitute.sting.playground.gatk.walkers.phasing;
|
||||||
|
|
||||||
import java.util.LinkedList;
|
import java.util.*;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2010, The Broad Institute
|
* Copyright (c) 2010, The Broad Institute
|
||||||
|
|
@ -55,11 +54,11 @@ public class DisjointSet {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean inSameSet(int x, int y) {
|
public boolean inSameSet(int x, int y) {
|
||||||
return (findSet(x) == findSet(y));
|
return (x == y || nodes[x].parent == nodes[y].parent || findSet(x) == findSet(y));
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Integer> inSameSetAs(int x, int[] testSet) {
|
public Set<Integer> inSameSetAs(int x, Collection<Integer> testSet) {
|
||||||
List<Integer> sameSetInds = new LinkedList<Integer>();
|
Set<Integer> sameSetInds = new TreeSet<Integer>();
|
||||||
|
|
||||||
int xSet = findSet(x);
|
int xSet = findSet(x);
|
||||||
for (int t : testSet) {
|
for (int t : testSet) {
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ import java.util.*;
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// Represents an undirected graph with no self-edges:
|
||||||
public class Graph implements Iterable<GraphEdge> {
|
public class Graph implements Iterable<GraphEdge> {
|
||||||
private Neighbors[] adj;
|
private Neighbors[] adj;
|
||||||
|
|
||||||
|
|
@ -36,45 +37,58 @@ public class Graph implements Iterable<GraphEdge> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addEdge(GraphEdge e) {
|
public void addEdge(GraphEdge e) {
|
||||||
|
if (e.v1 == e.v2) // do not permit self-edges
|
||||||
|
return;
|
||||||
|
|
||||||
adj[e.v1].addNeighbor(e);
|
adj[e.v1].addNeighbor(e);
|
||||||
adj[e.v2].addNeighbor(e);
|
adj[e.v2].addNeighbor(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addEdges(Collection<GraphEdge> edges) {
|
||||||
|
for (GraphEdge e : edges)
|
||||||
|
addEdge(e);
|
||||||
|
}
|
||||||
|
|
||||||
public void removeEdge(GraphEdge e) {
|
public void removeEdge(GraphEdge e) {
|
||||||
adj[e.v1].removeNeighbor(e);
|
adj[e.v1].removeNeighbor(e);
|
||||||
adj[e.v2].removeNeighbor(e);
|
adj[e.v2].removeNeighbor(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Collection<GraphEdge> removeAllIncidentEdges(int vertexIndex) {
|
||||||
|
Collection<GraphEdge> incidentEdges = new TreeSet<GraphEdge>(adj[vertexIndex].neighbors); // implemented GraphEdge.compareTo()
|
||||||
|
|
||||||
|
for (GraphEdge neighbEdge : incidentEdges) {
|
||||||
|
if (vertexIndex != neighbEdge.v1) // vertexIndex == neighbEdge.v2
|
||||||
|
adj[neighbEdge.v1].removeNeighbor(neighbEdge);
|
||||||
|
else if (vertexIndex != neighbEdge.v2) // vertexIndex == neighbEdge.v1
|
||||||
|
adj[neighbEdge.v2].removeNeighbor(neighbEdge);
|
||||||
|
}
|
||||||
|
adj[vertexIndex].clearAllNeighbors();
|
||||||
|
|
||||||
|
return incidentEdges;
|
||||||
|
}
|
||||||
|
|
||||||
public DisjointSet getConnectedComponents() {
|
public DisjointSet getConnectedComponents() {
|
||||||
DisjointSet cc = new DisjointSet(adj.length);
|
DisjointSet cc = new DisjointSet(adj.length);
|
||||||
|
|
||||||
for (int i = 0; i < adj.length; i++)
|
for (GraphEdge e : this)
|
||||||
for (GraphEdge e : adj[i])
|
|
||||||
cc.setUnion(e.v1, e.v2);
|
cc.setUnion(e.v1, e.v2);
|
||||||
|
|
||||||
return cc;
|
return cc;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note that this will give each edge TWICE [since e=(v1,v2) is stored as a neighbor for both v1 and v2]
|
|
||||||
public Iterator<GraphEdge> iterator() {
|
public Iterator<GraphEdge> iterator() {
|
||||||
return new AllEdgesIterator();
|
return new AllEdgesIterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<GraphEdge> getAllEdges() {
|
|
||||||
Set<GraphEdge> allEdges = new TreeSet<GraphEdge>(); // implemented GraphEdge.compareTo()
|
|
||||||
for (GraphEdge e : this)
|
|
||||||
allEdges.add(e);
|
|
||||||
|
|
||||||
return new LinkedList<GraphEdge>(allEdges);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
|
|
||||||
for (int i = 0; i < adj.length; i++) {
|
for (int i = 0; i < adj.length; i++) {
|
||||||
sb.append(i + ":");
|
sb.append(i + ":");
|
||||||
for (GraphEdge e : adj[i])
|
for (GraphEdge e : adj[i]) {
|
||||||
sb.append(" " + e);
|
sb.append(" " + (e.v1 == i ? e.v2 : e.v1));
|
||||||
|
}
|
||||||
sb.append("\n");
|
sb.append("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -84,18 +98,29 @@ public class Graph implements Iterable<GraphEdge> {
|
||||||
private class AllEdgesIterator implements Iterator<GraphEdge> {
|
private class AllEdgesIterator implements Iterator<GraphEdge> {
|
||||||
private int curInd;
|
private int curInd;
|
||||||
private Iterator<GraphEdge> innerIt;
|
private Iterator<GraphEdge> innerIt;
|
||||||
|
private GraphEdge nextEdge;
|
||||||
|
|
||||||
public AllEdgesIterator() {
|
public AllEdgesIterator() {
|
||||||
curInd = 0;
|
curInd = 0;
|
||||||
innerIt = null;
|
innerIt = null;
|
||||||
|
nextEdge = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
|
if (nextEdge != null)
|
||||||
|
return true;
|
||||||
|
|
||||||
for (; curInd < adj.length; curInd++) {
|
for (; curInd < adj.length; curInd++) {
|
||||||
if (innerIt == null)
|
if (innerIt == null)
|
||||||
innerIt = adj[curInd].iterator();
|
innerIt = adj[curInd].iterator();
|
||||||
if (innerIt.hasNext())
|
|
||||||
|
while (innerIt.hasNext()) {
|
||||||
|
GraphEdge e = innerIt.next();
|
||||||
|
if (e.v1 == curInd) { // only want to see each edge once
|
||||||
|
nextEdge = e;
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
innerIt = null;
|
innerIt = null;
|
||||||
}
|
}
|
||||||
|
|
@ -107,7 +132,9 @@ public class Graph implements Iterable<GraphEdge> {
|
||||||
if (!hasNext())
|
if (!hasNext())
|
||||||
throw new NoSuchElementException();
|
throw new NoSuchElementException();
|
||||||
|
|
||||||
return innerIt.next();
|
GraphEdge tmpEdge = nextEdge;
|
||||||
|
nextEdge = null;
|
||||||
|
return tmpEdge;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void remove() {
|
public void remove() {
|
||||||
|
|
@ -133,5 +160,9 @@ public class Graph implements Iterable<GraphEdge> {
|
||||||
public Iterator<GraphEdge> iterator() {
|
public Iterator<GraphEdge> iterator() {
|
||||||
return neighbors.iterator();
|
return neighbors.iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void clearAllNeighbors() {
|
||||||
|
neighbors.clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,6 @@ import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
import javax.naming.OperationNotSupportedException;
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -245,7 +244,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
if (isCalledDiploidGenotype(gt) && gt.isHet()) { // Can attempt to phase this genotype
|
if (isCalledDiploidGenotype(gt) && gt.isHet()) { // Can attempt to phase this genotype
|
||||||
PhasingWindow phaseWindow = new PhasingWindow(vr, samp);
|
PhasingWindow phaseWindow = new PhasingWindow(vr, samp);
|
||||||
if (phaseWindow.hasPreviousHets()) { // Otherwise, nothing to phase this against
|
if (phaseWindow.hasPreviousHets()) { // Otherwise, nothing to phase this against
|
||||||
BialleleSNP biall = new BialleleSNP(gt);
|
SNPallelePair biall = new SNPallelePair(gt);
|
||||||
logger.debug("Want to phase TOP vs. BOTTOM for: " + "\n" + biall);
|
logger.debug("Want to phase TOP vs. BOTTOM for: " + "\n" + biall);
|
||||||
|
|
||||||
DoublyLinkedList.BidirectionalIterator<UnfinishedVariantAndReads> prevHetAndInteriorIt = phaseWindow.prevHetAndInteriorIt;
|
DoublyLinkedList.BidirectionalIterator<UnfinishedVariantAndReads> prevHetAndInteriorIt = phaseWindow.prevHetAndInteriorIt;
|
||||||
|
|
@ -257,9 +256,18 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
Genotype prevHetGenotype = prevUvc.getGenotype(samp);
|
Genotype prevHetGenotype = prevUvc.getGenotype(samp);
|
||||||
|
|
||||||
PhaseResult pr = phaseSample(phaseWindow);
|
PhaseResult pr = phaseSample(phaseWindow);
|
||||||
boolean genotypesArePhased = (pr.phaseQuality >= phaseQualityThresh);
|
boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality);
|
||||||
|
|
||||||
|
//
|
||||||
|
//
|
||||||
|
if (pr.phaseQuality < 0) {
|
||||||
|
logger.warn("MORE than 10% of the reads are inconsistent for phasing of " + VariantContextUtils.getLocation(vc));
|
||||||
|
}
|
||||||
|
//
|
||||||
|
//
|
||||||
|
|
||||||
if (genotypesArePhased) {
|
if (genotypesArePhased) {
|
||||||
BialleleSNP prevBiall = new BialleleSNP(prevHetGenotype);
|
SNPallelePair prevBiall = new SNPallelePair(prevHetGenotype);
|
||||||
|
|
||||||
logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevBiall + "\n");
|
logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevBiall + "\n");
|
||||||
logger.debug("THE PHASE CHOSEN HERE:\n" + biall + "\n\n");
|
logger.debug("THE PHASE CHOSEN HERE:\n" + biall + "\n\n");
|
||||||
|
|
@ -311,6 +319,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
phaseStats.addIn(new PhasingStats(samplePhaseStats));
|
phaseStats.addIn(new PhasingStats(samplePhaseStats));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean passesPhasingThreshold(double PQ) {
|
||||||
|
return PQ >= phaseQualityThresh;
|
||||||
|
}
|
||||||
|
|
||||||
private static class GenotypeAndReadBases {
|
private static class GenotypeAndReadBases {
|
||||||
public Genotype genotype;
|
public Genotype genotype;
|
||||||
public ReadBasesAtPosition readBases;
|
public ReadBasesAtPosition readBases;
|
||||||
|
|
@ -449,7 +461,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
logger.debug("Number of sites in window = " + index);
|
logger.debug("Number of sites in window = " + index);
|
||||||
|
|
||||||
if (logger.isDebugEnabled()) {
|
if (logger.isDebugEnabled()) {
|
||||||
logger.debug("ALL READS:");
|
logger.debug("ALL READS [phasingSiteIndex = " + phasingSiteIndex + "]:");
|
||||||
for (Map.Entry<String, Read> nameToReads : readsAtHetSites.entrySet()) {
|
for (Map.Entry<String, Read> nameToReads : readsAtHetSites.entrySet()) {
|
||||||
String rdName = nameToReads.getKey();
|
String rdName = nameToReads.getKey();
|
||||||
Read rd = nameToReads.getValue();
|
Read rd = nameToReads.getValue();
|
||||||
|
|
@ -458,85 +470,98 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class ReadProperties {
|
private class EdgeToReads {
|
||||||
public List<GraphEdge> rdEdges;
|
private Map<GraphEdge, List<String>> edgeReads;
|
||||||
public int[] siteInds;
|
|
||||||
|
|
||||||
public ReadProperties(Read rd) {
|
public EdgeToReads() {
|
||||||
this.siteInds = rd.getNonNullIndices();
|
this.edgeReads = new TreeMap<GraphEdge, List<String>>(); // implemented GraphEdge.compareTo()
|
||||||
this.rdEdges = new LinkedList<GraphEdge>();
|
|
||||||
|
|
||||||
// sufficient to create a path linking the sites in rd, so they all end up in the same connected component:
|
|
||||||
for (int i = 0; i < siteInds.length - 1; i++) {
|
|
||||||
GraphEdge e = new GraphEdge(siteInds[i], siteInds[i + 1]);
|
|
||||||
rdEdges.add(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private class EdgeCounts {
|
public void addRead(GraphEdge e, String readName) {
|
||||||
private Map<GraphEdge, Integer> counts;
|
List<String> reads = edgeReads.get(e);
|
||||||
|
if (reads == null) {
|
||||||
public EdgeCounts() {
|
reads = new LinkedList<String>();
|
||||||
this.counts = new TreeMap<GraphEdge, Integer>(); // implemented GraphEdge.compareTo()
|
edgeReads.put(e, reads);
|
||||||
|
}
|
||||||
|
reads.add(readName);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getCount(GraphEdge e) {
|
public List<String> getReads(GraphEdge e) {
|
||||||
Integer count = counts.get(e);
|
return edgeReads.get(e);
|
||||||
if (count == null)
|
}
|
||||||
return 0;
|
|
||||||
|
|
||||||
return count;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int incrementEdge(GraphEdge e) {
|
private class IntegerSet implements Iterable<Integer> {
|
||||||
Integer eCount = counts.get(e);
|
private Set<Integer> list;
|
||||||
int cnt;
|
|
||||||
if (eCount == null)
|
|
||||||
cnt = 0;
|
|
||||||
else
|
|
||||||
cnt = eCount;
|
|
||||||
|
|
||||||
cnt++;
|
public IntegerSet(Set<Integer> list) {
|
||||||
counts.put(e, cnt);
|
this.list = list;
|
||||||
return cnt;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int decrementEdge(GraphEdge e) {
|
public boolean contains(int i) {
|
||||||
Integer eCount = counts.get(e);
|
return list.contains(i);
|
||||||
if (eCount == null)
|
}
|
||||||
return 0;
|
|
||||||
|
|
||||||
int cnt = eCount - 1;
|
public Iterator<Integer> iterator() {
|
||||||
counts.put(e, cnt);
|
return list.iterator();
|
||||||
return cnt;
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int i : this) {
|
||||||
|
sb.append(i + ", ");
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Set<String> removeExtraneousReads(int numHetSites) {
|
public Set<String> removeExtraneousReads(int numHetSites) {
|
||||||
Graph readGraph = new Graph(numHetSites);
|
Graph readGraph = new Graph(numHetSites);
|
||||||
Map<String, ReadProperties> readToGraphProperties = new HashMap<String, ReadProperties>();
|
EdgeToReads edgeToReads = new EdgeToReads();
|
||||||
EdgeCounts edgeCounts = new EdgeCounts();
|
Set<Integer> sitesWithEdges = new TreeSet<Integer>();
|
||||||
|
|
||||||
for (Map.Entry<String, Read> nameToReads : readsAtHetSites.entrySet()) {
|
for (Map.Entry<String, Read> nameToReads : readsAtHetSites.entrySet()) {
|
||||||
String rdName = nameToReads.getKey();
|
String rdName = nameToReads.getKey();
|
||||||
Read rd = nameToReads.getValue();
|
Read rd = nameToReads.getValue();
|
||||||
|
|
||||||
ReadProperties rp = new ReadProperties(rd);
|
int[] siteInds = rd.getNonNullIndices();
|
||||||
if (!rp.rdEdges.isEmpty()) { // otherwise, this read is clearly irrelevant since it can't link anything
|
// Connect each pair of non-null sites in rd:
|
||||||
for (GraphEdge e : rp.rdEdges) {
|
for (int i = 0; i < siteInds.length; i++) {
|
||||||
readGraph.addEdge(e);
|
for (int j = i + 1; j < siteInds.length; j++) {
|
||||||
|
GraphEdge e = new GraphEdge(siteInds[i], siteInds[j]);
|
||||||
logger.debug("Read = " + rdName + " is adding edge: " + e);
|
logger.debug("Read = " + rdName + " is adding edge: " + e);
|
||||||
|
readGraph.addEdge(e);
|
||||||
|
|
||||||
edgeCounts.incrementEdge(e);
|
edgeToReads.addRead(e, rdName);
|
||||||
|
|
||||||
|
sitesWithEdges.add(e.v1);
|
||||||
|
sitesWithEdges.add(e.v2);
|
||||||
}
|
}
|
||||||
readToGraphProperties.put(rdName, rp);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
logger.debug("Read graph:\n" + readGraph);
|
logger.debug("Read graph:\n" + readGraph);
|
||||||
Set<String> keepReads = new HashSet<String>();
|
Set<String> keepReads = new HashSet<String>();
|
||||||
|
|
||||||
// Check which Reads are involved in paths from (phasingSiteIndex - 1) to (phasingSiteIndex):
|
/* Check which Reads are involved in acyclic paths from (phasingSiteIndex - 1) to (phasingSiteIndex):
|
||||||
|
|
||||||
|
In detail:
|
||||||
|
Every Read links EACH pair of sites for which it contains bases. Then, each such edge is added to a "site connectivity graph".
|
||||||
|
A read provides non-trivial bias toward the final haplotype decision if it participates in a path from prev ---> cur. This is tested by
|
||||||
|
considering each edge that the read contributes. For edge e=(v1,v2), if there exists a path from prev ---> v1 [that doesn't include v2] and
|
||||||
|
cur ---> v2 [that doesn't include v1], then there is a path from prev ---> cur that uses e, hence making the read significant.
|
||||||
|
By excluding each vertex's edges and then calculating connected components, we are able to make the determination, for example,
|
||||||
|
if a path exists from prev ---> v1 that excludes v2.
|
||||||
|
|
||||||
|
Furthermore, if the path DOES use other edges that exist solely due to the read, then that's fine, since adding in the read will give those edges as well.
|
||||||
|
And, if the path uses edges from other reads, then keeping all other reads that contribute those edges
|
||||||
|
[which will happen since those edges are also in paths from prev ---> cur] is sufficient for this path to exist.
|
||||||
|
|
||||||
|
NOTE:
|
||||||
|
If we would use NON-UNIFORM priors for the various haplotypes, then this calculation would not be correct, since the equivalence of:
|
||||||
|
1. The read affects the final marginal haplotype posterior probability (for general mapping and base quality values).
|
||||||
|
2. The read has edges involved in a path from prev ---> cur.
|
||||||
|
DEPENDS STRONGLY on the fact that all haplotypes have the same EXACT prior.
|
||||||
|
*/
|
||||||
int prev = phasingSiteIndex - 1;
|
int prev = phasingSiteIndex - 1;
|
||||||
int cur = phasingSiteIndex;
|
int cur = phasingSiteIndex;
|
||||||
|
|
||||||
|
|
@ -546,56 +571,51 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
return keepReads;
|
return keepReads;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Map.Entry<String, ReadProperties> rdEdgesEntry : readToGraphProperties.entrySet()) {
|
/* Check the connected components of prev and cur when removing each individual vertex's edges:
|
||||||
String testRead = rdEdgesEntry.getKey();
|
[Total run-time: for each vertex, calculate connected components after removing it's edges: O(V * E)]
|
||||||
ReadProperties rp = rdEdgesEntry.getValue();
|
|
||||||
logger.debug("Testing the connectivity of Read: " + testRead);
|
|
||||||
|
|
||||||
// Check the connected components after removing this read's UNIQUE edges:
|
|
||||||
for (GraphEdge e : rp.rdEdges) {
|
|
||||||
if (edgeCounts.getCount(e) == 1) // otherwise, the edge still exists without this read
|
|
||||||
readGraph.removeEdge(e);
|
|
||||||
}
|
|
||||||
DisjointSet ccAfterRemove = readGraph.getConnectedComponents();
|
|
||||||
|
|
||||||
/* testRead contributes a path between prev and cur iff:
|
|
||||||
There exists i != j s.t. testRead[i] != null, testRead[j] != null, ccAfterRemove.inSameSet(prev,i) && ccAfterRemove.inSameSet(j,cur)
|
|
||||||
[since ALL non-null indices in testRead are connected to one another, as one clique].
|
|
||||||
*/
|
*/
|
||||||
List<Integer> sameCCasPrev = ccAfterRemove.inSameSetAs(prev, rp.siteInds);
|
IntegerSet[] removedSiteSameCCAsPrev = new IntegerSet[numHetSites];
|
||||||
List<Integer> sameCCasCur = ccAfterRemove.inSameSetAs(cur, rp.siteInds);
|
IntegerSet[] removedSiteSameCCAsCur = new IntegerSet[numHetSites];
|
||||||
|
for (int i : sitesWithEdges) {
|
||||||
|
logger.debug("Calculating CC after removing edges of site: " + i);
|
||||||
|
|
||||||
|
// Remove all edges incident to i and see which positions have paths to prev and cur:
|
||||||
|
Collection<GraphEdge> removedEdges = readGraph.removeAllIncidentEdges(i);
|
||||||
|
|
||||||
|
// Run-time for efficiently calculating connected components using DisjointSet: O(E)
|
||||||
|
DisjointSet ccAfterRemove = readGraph.getConnectedComponents();
|
||||||
|
removedSiteSameCCAsPrev[i] = new IntegerSet(ccAfterRemove.inSameSetAs(prev, sitesWithEdges));
|
||||||
|
removedSiteSameCCAsCur[i] = new IntegerSet(ccAfterRemove.inSameSetAs(cur, sitesWithEdges));
|
||||||
|
|
||||||
|
logger.debug("Same CC as previous [" + prev + "]: " + removedSiteSameCCAsPrev[i]);
|
||||||
|
logger.debug("Same CC as current [" + cur + "]: " + removedSiteSameCCAsCur[i]);
|
||||||
|
|
||||||
|
// Add the removed edges back in:
|
||||||
|
readGraph.addEdges(removedEdges);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (GraphEdge e : readGraph) {
|
||||||
|
logger.debug("Testing the path-connectivity of Edge: " + e);
|
||||||
|
|
||||||
|
/* Edge e={v1,v2} contributes a path between prev and cur for testRead iff:
|
||||||
|
testRead[v1] != null, testRead[v2] != null, and there is a path from prev ---> v1 -> v2 ---> cur [or vice versa].
|
||||||
|
Note that the path from prev ---> v1 will NOT contain v2, since we removed all of v2's edges,
|
||||||
|
and the path from v2 ---> cur will NOT contain v1.
|
||||||
|
*/
|
||||||
|
boolean prevTo2and1ToCur = removedSiteSameCCAsPrev[e.v1].contains(e.v2) && removedSiteSameCCAsCur[e.v2].contains(e.v1);
|
||||||
|
boolean prevTo1and2ToCur = removedSiteSameCCAsPrev[e.v2].contains(e.v1) && removedSiteSameCCAsCur[e.v1].contains(e.v2);
|
||||||
|
|
||||||
|
if (prevTo2and1ToCur || prevTo1and2ToCur) {
|
||||||
|
for (String readName : edgeToReads.getReads(e)) {
|
||||||
|
keepReads.add(readName);
|
||||||
|
|
||||||
if (logger.isDebugEnabled()) {
|
if (logger.isDebugEnabled()) {
|
||||||
StringBuilder sb = new StringBuilder("sameCCasPrev:");
|
if (prevTo2and1ToCur)
|
||||||
for (int ind : sameCCasPrev)
|
logger.debug("Keep read " + readName + " due to path: " + prev + " ---> " + e.v2 + " -> " + e.v1 + " ---> " + cur);
|
||||||
sb.append(" " + ind);
|
else
|
||||||
logger.debug(sb.toString());
|
logger.debug("Keep read " + readName + " due to path: " + prev + " ---> " + e.v1 + " -> " + e.v2 + " ---> " + cur);
|
||||||
|
|
||||||
sb = new StringBuilder("sameCCasCur:");
|
|
||||||
for (int ind : sameCCasCur)
|
|
||||||
sb.append(" " + ind);
|
|
||||||
logger.debug(sb.toString());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean keepRead = false;
|
|
||||||
if (!sameCCasPrev.isEmpty() && !sameCCasCur.isEmpty()) { // There exists a path from prev to cur that goes through the sites in testRead
|
|
||||||
// Now, make sure that TWO DISTINCT sites, i and j, in testRead are used in the path:
|
|
||||||
Set<Integer> union = new HashSet<Integer>(sameCCasPrev);
|
|
||||||
union.addAll(sameCCasCur);
|
|
||||||
if (union.size() >= 2) // i != j
|
|
||||||
keepRead = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (keepRead) {
|
|
||||||
logger.debug("Read is part of path from " + prev + " to " + cur);
|
|
||||||
keepReads.add(testRead);
|
|
||||||
|
|
||||||
// Add the removed edges back in, since we're keeping the read:
|
|
||||||
for (GraphEdge e : rp.rdEdges)
|
|
||||||
readGraph.addEdge(e);
|
|
||||||
}
|
|
||||||
else { // Decrease the count for the edges [note that any read-specific edges were already removed above]:
|
|
||||||
for (GraphEdge e : rp.rdEdges)
|
|
||||||
edgeCounts.decrementEdge(e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -677,7 +697,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
|
|
||||||
private PhaseResult phaseSample(PhasingWindow phaseWindow) {
|
private PhaseResult phaseSample(PhasingWindow phaseWindow) {
|
||||||
/* Will map a phase and its "complement" to a single representative phase,
|
/* Will map a phase and its "complement" to a single representative phase,
|
||||||
and marginalizeTable() marginalizes to 2 positions [starting at the previous position, and then the current position]:
|
and marginalizeAsNewTable() marginalizes to 2 positions [starting at the previous position, and then the current position]:
|
||||||
*/
|
*/
|
||||||
HaplotypeTableCreator tabCreator = new BiallelicComplementHaplotypeTableCreator(phaseWindow.hetGenotypes, phaseWindow.phasingSiteIndex - 1, 2);
|
HaplotypeTableCreator tabCreator = new BiallelicComplementHaplotypeTableCreator(phaseWindow.hetGenotypes, phaseWindow.phasingSiteIndex - 1, 2);
|
||||||
PhasingTable sampleHaps = tabCreator.getNewTable();
|
PhasingTable sampleHaps = tabCreator.getNewTable();
|
||||||
|
|
@ -693,6 +713,9 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the phasing table based on each of the sub-reads for this sample:
|
// Update the phasing table based on each of the sub-reads for this sample:
|
||||||
|
MaxHaplotypeAndQuality prevMaxHapAndQual = null;
|
||||||
|
int numHighQualityIterations = 0;
|
||||||
|
int numInconsistentIterations = 0;
|
||||||
for (Map.Entry<String, Read> nameToReads : phaseWindow.readsAtHetSites.entrySet()) {
|
for (Map.Entry<String, Read> nameToReads : phaseWindow.readsAtHetSites.entrySet()) {
|
||||||
Read rd = nameToReads.getValue();
|
Read rd = nameToReads.getValue();
|
||||||
|
|
||||||
|
|
@ -704,38 +727,88 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
|
|
||||||
logger.debug("score(" + rd + ", " + pte.getHaplotypeClass() + ") = " + score);
|
logger.debug("score(" + rd + ", " + pte.getHaplotypeClass() + ") = " + score);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Check the current best haplotype assignment:
|
||||||
|
MaxHaplotypeAndQuality curMaxHapAndQual = new MaxHaplotypeAndQuality(sampleHaps, false);
|
||||||
|
logger.debug("CUR MAX hap:\t" + curMaxHapAndQual.maxEntry.getHaplotypeClass() + "\tcurPhaseQuality:\t" + curMaxHapAndQual.phaseQuality);
|
||||||
|
if (prevMaxHapAndQual != null && passesPhasingThreshold(prevMaxHapAndQual.phaseQuality)) {
|
||||||
|
numHighQualityIterations++;
|
||||||
|
if (!curMaxHapAndQual.maxEntry.getHaplotypeClass().getRepresentative().equals(prevMaxHapAndQual.maxEntry.getHaplotypeClass().getRepresentative()) || // switched phase
|
||||||
|
curMaxHapAndQual.phaseQuality < 0.9 * prevMaxHapAndQual.phaseQuality) { // a 10% ["significant"] decrease in PQ
|
||||||
|
logger.debug("Inconsistent read found!");
|
||||||
|
numInconsistentIterations++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
prevMaxHapAndQual = curMaxHapAndQual;
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
|
||||||
}
|
}
|
||||||
logger.debug("\nPhasing table [AFTER CALCULATION]:\n" + sampleHaps + "\n");
|
logger.debug("\nPhasing table [AFTER CALCULATION]:\n" + sampleHaps + "\n");
|
||||||
|
MaxHaplotypeAndQuality maxHapQual = new MaxHaplotypeAndQuality(sampleHaps, true);
|
||||||
|
double posteriorProb = maxHapQual.maxEntry.getScore().getValue();
|
||||||
|
|
||||||
|
logger.debug("MAX hap:\t" + maxHapQual.maxEntry.getHaplotypeClass() + "\tposteriorProb:\t" + posteriorProb + "\tphaseQuality:\t" + maxHapQual.phaseQuality);
|
||||||
|
logger.debug("Number of used reads " + phaseWindow.readsAtHetSites.size() + "; number of high PQ iterations " + numHighQualityIterations + "; number of inconsistencies " + numInconsistentIterations);
|
||||||
|
|
||||||
|
double repPhaseQuality = maxHapQual.phaseQuality;
|
||||||
|
|
||||||
|
//
|
||||||
|
//
|
||||||
|
if (numInconsistentIterations / (double) numHighQualityIterations >= 0.1) {
|
||||||
|
//
|
||||||
|
// ????
|
||||||
|
// NEED TO CHANGE phaseSite() to always output PQ field, EVEN if it's LESS THAN threshold ??????
|
||||||
|
// ????
|
||||||
|
//
|
||||||
|
repPhaseQuality *= -1;
|
||||||
|
}
|
||||||
|
//
|
||||||
|
//
|
||||||
|
|
||||||
|
return new PhaseResult(maxHapQual.maxEntry.getHaplotypeClass().getRepresentative(), repPhaseQuality);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class MaxHaplotypeAndQuality {
|
||||||
|
public PhasingTable.PhasingTableEntry maxEntry;
|
||||||
|
public double phaseQuality;
|
||||||
|
|
||||||
|
public MaxHaplotypeAndQuality(PhasingTable hapTable, boolean printDebug) {
|
||||||
// Marginalize each haplotype to its first 2 positions:
|
// Marginalize each haplotype to its first 2 positions:
|
||||||
sampleHaps = HaplotypeTableCreator.marginalizeTable(sampleHaps);
|
hapTable = HaplotypeTableCreator.marginalizeAsNewTable(hapTable);
|
||||||
logger.debug("\nPhasing table [AFTER MAPPING]:\n" + sampleHaps + "\n");
|
if (printDebug)
|
||||||
|
logger.debug("\nPhasing table [AFTER MAPPING]:\n" + hapTable + "\n");
|
||||||
|
|
||||||
// Determine the phase at this position:
|
// Determine the phase at this position:
|
||||||
sampleHaps.normalizeScores();
|
hapTable.normalizeScores();
|
||||||
logger.debug("\nPhasing table [AFTER NORMALIZATION]:\n" + sampleHaps + "\n");
|
if (printDebug)
|
||||||
|
logger.debug("\nPhasing table [AFTER NORMALIZATION]:\n" + hapTable + "\n");
|
||||||
|
|
||||||
PhasingTable.PhasingTableEntry maxEntry = sampleHaps.maxEntry();
|
this.maxEntry = hapTable.maxEntry();
|
||||||
double posteriorProb = maxEntry.getScore().getValue();
|
|
||||||
|
|
||||||
// convert posteriorProb to PHRED scale, but do NOT cap the quality as in QualityUtils.probToQual(posteriorProb):
|
// convert posteriorProb to PHRED scale, but do NOT cap the quality as in QualityUtils.probToQual(posteriorProb):
|
||||||
|
this.phaseQuality = getPhasingQuality(hapTable, maxEntry);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the PQ of entry (within table hapTable, which MUST be normalized):
|
||||||
|
public static double getPhasingQuality(PhasingTable hapTable, PhasingTable.PhasingTableEntry entry) {
|
||||||
PreciseNonNegativeDouble sumErrorProbs = new PreciseNonNegativeDouble(ZERO);
|
PreciseNonNegativeDouble sumErrorProbs = new PreciseNonNegativeDouble(ZERO);
|
||||||
for (PhasingTable.PhasingTableEntry pte : sampleHaps) {
|
for (PhasingTable.PhasingTableEntry pte : hapTable) {
|
||||||
if (pte != maxEntry)
|
if (pte != entry)
|
||||||
sumErrorProbs.plusEqual(pte.getScore());
|
sumErrorProbs.plusEqual(pte.getScore());
|
||||||
}
|
}
|
||||||
double phaseQuality = -10.0 * (sumErrorProbs.getLog10Value());
|
return -10.0 * (sumErrorProbs.getLog10Value());
|
||||||
|
}
|
||||||
logger.debug("MAX hap:\t" + maxEntry.getHaplotypeClass() + "\tposteriorProb:\t" + posteriorProb + "\tphaseQuality:\t" + phaseQuality);
|
|
||||||
|
|
||||||
return new PhaseResult(maxEntry.getHaplotypeClass().getRepresentative(), phaseQuality);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Ensure that curBiall is phased relative to prevBiall as specified by hap.
|
Ensure that curBiall is phased relative to prevBiall as specified by hap.
|
||||||
*/
|
*/
|
||||||
|
public static void ensurePhasing(SNPallelePair curBiall, SNPallelePair prevBiall, Haplotype hap) {
|
||||||
public static void ensurePhasing(BialleleSNP curBiall, BialleleSNP prevBiall, Haplotype hap) {
|
|
||||||
if (hap.size() < 2)
|
if (hap.size() < 2)
|
||||||
throw new ReviewedStingException("LOGICAL ERROR: Only considering haplotypes of length > 2!");
|
throw new ReviewedStingException("LOGICAL ERROR: Only considering haplotypes of length > 2!");
|
||||||
|
|
||||||
|
|
@ -784,7 +857,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
byte refBase;
|
byte refBase;
|
||||||
if (!vc.isIndel()) {
|
if (!vc.isIndel()) {
|
||||||
Allele varAllele = vc.getReference();
|
Allele varAllele = vc.getReference();
|
||||||
refBase = BialleleSNP.getSingleBase(varAllele);
|
refBase = SNPallelePair.getSingleBase(varAllele);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
refBase = vc.getReferenceBaseForIndel();
|
refBase = vc.getReferenceBaseForIndel();
|
||||||
|
|
@ -1023,14 +1096,14 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
byte[] hapBases = new byte[numSites];
|
byte[] hapBases = new byte[numSites];
|
||||||
for (int i = 0; i < numSites; i++) {
|
for (int i = 0; i < numSites; i++) {
|
||||||
Allele alleleI = genotypes[i].getAllele(alleleInds[i]);
|
Allele alleleI = genotypes[i].getAllele(alleleInds[i]);
|
||||||
hapBases[i] = BialleleSNP.getSingleBase(alleleI);
|
hapBases[i] = SNPallelePair.getSingleBase(alleleI);
|
||||||
}
|
}
|
||||||
allHaps.add(new Haplotype(hapBases));
|
allHaps.add(new Haplotype(hapBases));
|
||||||
}
|
}
|
||||||
return allHaps;
|
return allHaps;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static PhasingTable marginalizeTable(PhasingTable table) {
|
public static PhasingTable marginalizeAsNewTable(PhasingTable table) {
|
||||||
Map<Haplotype, PreciseNonNegativeDouble> hapMap = new TreeMap<Haplotype, PreciseNonNegativeDouble>();
|
Map<Haplotype, PreciseNonNegativeDouble> hapMap = new TreeMap<Haplotype, PreciseNonNegativeDouble>();
|
||||||
for (PhasingTable.PhasingTableEntry pte : table) {
|
for (PhasingTable.PhasingTableEntry pte : table) {
|
||||||
Haplotype rep = pte.getHaplotypeClass().getRepresentative();
|
Haplotype rep = pte.getHaplotypeClass().getRepresentative();
|
||||||
|
|
@ -1056,23 +1129,23 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class BiallelicComplementHaplotypeTableCreator extends HaplotypeTableCreator {
|
private static class BiallelicComplementHaplotypeTableCreator extends HaplotypeTableCreator {
|
||||||
private BialleleSNP[] bialleleSNPs;
|
private SNPallelePair[] bialleleSNPs;
|
||||||
private int startIndex;
|
private int startIndex;
|
||||||
private int marginalizeLength;
|
private int marginalizeLength;
|
||||||
|
|
||||||
public BiallelicComplementHaplotypeTableCreator(Genotype[] hetGenotypes, int startIndex, int marginalizeLength) {
|
public BiallelicComplementHaplotypeTableCreator(Genotype[] hetGenotypes, int startIndex, int marginalizeLength) {
|
||||||
super(hetGenotypes);
|
super(hetGenotypes);
|
||||||
|
|
||||||
this.bialleleSNPs = new BialleleSNP[genotypes.length];
|
this.bialleleSNPs = new SNPallelePair[genotypes.length];
|
||||||
for (int i = 0; i < genotypes.length; i++)
|
for (int i = 0; i < genotypes.length; i++)
|
||||||
bialleleSNPs[i] = new BialleleSNP(genotypes[i]);
|
bialleleSNPs[i] = new SNPallelePair(genotypes[i]);
|
||||||
|
|
||||||
this.startIndex = startIndex;
|
this.startIndex = startIndex;
|
||||||
this.marginalizeLength = marginalizeLength;
|
this.marginalizeLength = marginalizeLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
public PhasingTable getNewTable() {
|
public PhasingTable getNewTable() {
|
||||||
double hapClassPrior = 1.0; // can change later
|
double hapClassPrior = 1.0; // can change later, BUT see NOTE above in removeExtraneousReads()
|
||||||
|
|
||||||
PhasingTable table = new PhasingTable();
|
PhasingTable table = new PhasingTable();
|
||||||
for (Haplotype hap : getAllHaplotypes()) {
|
for (Haplotype hap : getAllHaplotypes()) {
|
||||||
|
|
|
||||||
|
|
@ -28,15 +28,15 @@ import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
public class BialleleSNP extends Biallele {
|
public class SNPallelePair extends AllelePair {
|
||||||
|
|
||||||
public BialleleSNP(Genotype gt) {
|
public SNPallelePair(Genotype gt) {
|
||||||
super(gt);
|
super(gt);
|
||||||
|
|
||||||
if (getTopAllele().getBases().length != 1)
|
if (getTopAllele().getBases().length != 1)
|
||||||
throw new ReviewedStingException("LOGICAL ERROR: BialleleSNP may not contain non-SNP site!");
|
throw new ReviewedStingException("LOGICAL ERROR: SNPallelePair may not contain non-SNP site!");
|
||||||
if (getBottomAllele().getBases().length != 1)
|
if (getBottomAllele().getBases().length != 1)
|
||||||
throw new ReviewedStingException("LOGICAL ERROR: BialleleSNP may not contain non-SNP site!");
|
throw new ReviewedStingException("LOGICAL ERROR: SNPallelePair may not contain non-SNP site!");
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte getTopBase() {
|
public byte getTopBase() {
|
||||||
Loading…
Reference in New Issue