Added detection and INFO field marking of phasing inconsistencies (and optional filtration using --filterInconsistentSites)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4384 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
fromer 2010-09-29 19:28:56 +00:00
parent a6c7de95c8
commit 20ffe484bc
2 changed files with 121 additions and 96 deletions

View File

@ -68,7 +68,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
protected Integer cacheWindow = 20000; protected Integer cacheWindow = 20000;
@Argument(fullName = "maxPhaseSites", shortName = "maxSites", doc = "The maximum number of successive heterozygous sites permitted to be used by the phasing algorithm; [default:10]", required = false) @Argument(fullName = "maxPhaseSites", shortName = "maxSites", doc = "The maximum number of successive heterozygous sites permitted to be used by the phasing algorithm; [default:10]", required = false)
protected Integer maxPhaseSites = 10; // 2^10 == 10^3 biallelic haplotypes protected Integer maxPhaseSites = 10; // 2^10 == 10^3 diploid haplotypes
@Argument(fullName = "phaseQualityThresh", shortName = "phaseThresh", doc = "The minimum phasing quality score required to output phasing; [default:10.0]", required = false) @Argument(fullName = "phaseQualityThresh", shortName = "phaseThresh", doc = "The minimum phasing quality score required to output phasing; [default:10.0]", required = false)
protected Double phaseQualityThresh = 10.0; // PQ = 10.0 <=> P(error) = 10^(-10/10) = 0.1, P(correct) = 0.9 protected Double phaseQualityThresh = 10.0; // PQ = 10.0 <=> P(error) = 10^(-10/10) = 0.1, P(correct) = 0.9
@ -84,6 +84,17 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
private LinkedList<String> rodNames = null; private LinkedList<String> rodNames = null;
private PhasingQualityStatsWriter statsWriter = null; private PhasingQualityStatsWriter statsWriter = null;
public static final String PQ_KEY = "PQ";
// In order to detect phase inconsistencies:
private static final double FRACTION_OF_MEAN_PQ_CHANGES = 0.5; // If the PQ decreases by this fraction of the mean PQ changes (thus far), then this read is inconsistent with previous reads
private static final double MAX_FRACTION_OF_INCONSISTENT_READS = 0.1; // If there are more than this fraction of inconsistent reads, then flag this site
@Argument(fullName = "filterInconsistentSites", shortName = "filterInconsistentSites", doc = "Mark sites with inconsistent phasing as filtered [default:false]", required = false)
protected boolean filterInconsistentSites = false;
public static final String PHASING_INCONSISTENT_KEY = "PhasingInconsistent";
public void initialize() { public void initialize() {
if (maxPhaseSites <= 2) if (maxPhaseSites <= 2)
maxPhaseSites = 2; // by definition, must phase a site relative to previous site [thus, 2 in total] maxPhaseSites = 2; // by definition, must phase a site relative to previous site [thus, 2 in total]
@ -105,7 +116,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
hInfo.add(new VCFFormatHeaderLine("PQ", 1, VCFHeaderLineType.Float, "Read-backed phasing quality"));
// Phasing-specific INFO fields:
hInfo.add(new VCFFormatHeaderLine(PQ_KEY, 1, VCFHeaderLineType.Float, "Read-backed phasing quality"));
hInfo.add(new VCFInfoHeaderLine(PHASING_INCONSISTENT_KEY, 0, VCFHeaderLineType.Flag, "Are the reads significantly haplotype-inconsistent?"));
Map<String, VCFHeader> rodNameToHeader = getVCFHeadersFromRods(getToolkit(), rodNames); Map<String, VCFHeader> rodNameToHeader = getVCFHeadersFromRods(getToolkit(), rodNames);
writer.writeHeader(new VCFHeader(hInfo, new TreeSet<String>(rodNameToHeader.get(rodNames.get(0)).getGenotypeSamples()))); writer.writeHeader(new VCFHeader(hInfo, new TreeSet<String>(rodNameToHeader.get(rodNames.get(0)).getGenotypeSamples())));
@ -244,8 +258,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
if (isCalledDiploidGenotype(gt) && gt.isHet()) { // Can attempt to phase this genotype if (isCalledDiploidGenotype(gt) && gt.isHet()) { // Can attempt to phase this genotype
PhasingWindow phaseWindow = new PhasingWindow(vr, samp); PhasingWindow phaseWindow = new PhasingWindow(vr, samp);
if (phaseWindow.hasPreviousHets()) { // Otherwise, nothing to phase this against if (phaseWindow.hasPreviousHets()) { // Otherwise, nothing to phase this against
SNPallelePair biall = new SNPallelePair(gt); SNPallelePair allelePair = new SNPallelePair(gt);
logger.debug("Want to phase TOP vs. BOTTOM for: " + "\n" + biall); logger.debug("Want to phase TOP vs. BOTTOM for: " + "\n" + allelePair);
DoublyLinkedList.BidirectionalIterator<UnfinishedVariantAndReads> prevHetAndInteriorIt = phaseWindow.prevHetAndInteriorIt; DoublyLinkedList.BidirectionalIterator<UnfinishedVariantAndReads> prevHetAndInteriorIt = phaseWindow.prevHetAndInteriorIt;
/* Notes: /* Notes:
@ -258,24 +272,21 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
PhaseResult pr = phaseSample(phaseWindow); PhaseResult pr = phaseSample(phaseWindow);
boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality); boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality);
// if (pr.phasingContainsInconsistencies) {
// logger.debug("MORE than " + (MAX_FRACTION_OF_INCONSISTENT_READS * 100) + "% of the reads are inconsistent for phasing of " + VariantContextUtils.getLocation(vc));
if (pr.phaseQuality < 0) { uvc.setPhasingInconsistent(filterInconsistentSites);
logger.warn("MORE than 10% of the reads are inconsistent for phasing of " + VariantContextUtils.getLocation(vc));
} }
//
//
if (genotypesArePhased) { if (genotypesArePhased) {
SNPallelePair prevBiall = new SNPallelePair(prevHetGenotype); SNPallelePair prevAllelePair = new SNPallelePair(prevHetGenotype);
logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevBiall + "\n"); logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevAllelePair + "\n");
logger.debug("THE PHASE CHOSEN HERE:\n" + biall + "\n\n"); logger.debug("THE PHASE CHOSEN HERE:\n" + allelePair + "\n\n");
ensurePhasing(biall, prevBiall, pr.haplotype); ensurePhasing(allelePair, prevAllelePair, pr.haplotype);
Map<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes()); Map<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes());
gtAttribs.put("PQ", pr.phaseQuality); gtAttribs.put(PQ_KEY, pr.phaseQuality);
Genotype phasedGt = new Genotype(gt.getSampleName(), biall.getAllelesAsList(), gt.getNegLog10PError(), gt.getFilters(), gtAttribs, genotypesArePhased); Genotype phasedGt = new Genotype(gt.getSampleName(), allelePair.getAllelesAsList(), gt.getNegLog10PError(), gt.getFilters(), gtAttribs, genotypesArePhased);
uvc.setGenotype(samp, phasedGt); uvc.setGenotype(samp, phasedGt);
} }
@ -290,10 +301,13 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
if (!handledGt.isHom()) if (!handledGt.isHom())
throw new ReviewedStingException("LOGICAL error: should not have anything besides hom sites IN BETWEEN two het sites"); throw new ReviewedStingException("LOGICAL error: should not have anything besides hom sites IN BETWEEN two het sites");
// Use the same PQ for each hom site in the "interior" as for the het-het phase: // Use the same phasing consistency and PQ for each hom site in the "interior" as for the het-het phase:
if (pr.phasingContainsInconsistencies)
interiorUvc.setPhasingInconsistent(filterInconsistentSites);
if (genotypesArePhased) { if (genotypesArePhased) {
Map<String, Object> handledGtAttribs = new HashMap<String, Object>(handledGt.getAttributes()); Map<String, Object> handledGtAttribs = new HashMap<String, Object>(handledGt.getAttributes());
handledGtAttribs.put("PQ", pr.phaseQuality); handledGtAttribs.put(PQ_KEY, pr.phaseQuality);
Genotype phasedHomGt = new Genotype(handledGt.getSampleName(), handledGt.getAlleles(), handledGt.getNegLog10PError(), handledGt.getFilters(), handledGtAttribs, genotypesArePhased); Genotype phasedHomGt = new Genotype(handledGt.getSampleName(), handledGt.getAlleles(), handledGt.getNegLog10PError(), handledGt.getFilters(), handledGtAttribs, genotypesArePhased);
interiorUvc.setGenotype(samp, phasedHomGt); interiorUvc.setGenotype(samp, phasedHomGt);
} }
@ -557,10 +571,36 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
[which will happen since those edges are also in paths from prev ---> cur] is sufficient for this path to exist. [which will happen since those edges are also in paths from prev ---> cur] is sufficient for this path to exist.
NOTE: NOTE:
If we would use NON-UNIFORM priors for the various haplotypes, then this calculation would not be correct, since the equivalence of: If we would use NON-UNIFORM priors for the various haplotypes consistent with a margnialized haplotype, then this calculation would not be correct, since the equivalence of:
1. The read affects the final marginal haplotype posterior probability (for general mapping and base quality values). 1. The read affects the final marginal haplotype posterior probability (for general mapping and base quality values).
2. The read has edges involved in a path from prev ---> cur. 2. The read has edges involved in a path from prev ---> cur.
DEPENDS STRONGLY on the fact that all haplotypes have the same EXACT prior. DEPENDS STRONGLY on the fact that all haplotypes have the same EXACT prior.
This is due to the following:
[We denote:
R = set of all reads
r = a single read
"AA + CC" = AA on top chromosome, CC on bottom chromosome]
Note that since there are only two haplotype possibilities:
P(AA + CC | R) + P(AC + CA | R) = 1
Now, if we assume that all haplotypes consistent with AA + CC have the same prior probability [P(AA + CC | R)], then:
P(AA + CC | R)
= P(AAAA + CCCC | R) + ... + P(AACC + CCAA | R)
= [P(AAAA + CCCC , R) + ... + P(AACC + CCAA , R)] / P(R)
\propto P(AAAA + CCCC , R) + ... + P(AACC + CCAA , R)
= P(R | AAAA + CCCC)*P(AAAA + CCCC) + ... + P(R | AACC + CCAA)*P(AACC + CCAA)
= P(AA + CC | R) * [P(R | AAAA + CCCC) + ... + P(R | AACC + CCAA)]
Since we assume independence between reads given a particular haplotype [P(R | AAAA + CCCC) = \prod_r P(r | AAAA + CCCC)],
a new read r affects P(AA + CC | R) by multiplying each of the terms in the sum by, e.g., P(r | AAAA + CCCC).
Therefore, if these values do not affect the ratio of:
(I) [P(R | AAAA + CCCC) + ... + P(R | AACC + CCAA)] / [P(R | ACAA + CACC) + ... + P(R | ACCC + CAAA)]
then they do not affect the value of:
(II) P(AA + CC | R) / P(AC + CA | R) [which uniquely defines their values, since they sum to 1]
And, the P(r | AAAA + CCCC), ..., P(r | ACCC + CAAA) do not affect ratio (I) iff r's edges do not take part in a path from prev to cur in combination with the other reads in R.
*/ */
int prev = phasingSiteIndex - 1; int prev = phasingSiteIndex - 1;
int cur = phasingSiteIndex; int cur = phasingSiteIndex;
@ -699,7 +739,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
/* Will map a phase and its "complement" to a single representative phase, /* Will map a phase and its "complement" to a single representative phase,
and marginalizeAsNewTable() marginalizes to 2 positions [starting at the previous position, and then the current position]: and marginalizeAsNewTable() marginalizes to 2 positions [starting at the previous position, and then the current position]:
*/ */
HaplotypeTableCreator tabCreator = new BiallelicComplementHaplotypeTableCreator(phaseWindow.hetGenotypes, phaseWindow.phasingSiteIndex - 1, 2); HaplotypeTableCreator tabCreator = new TableCreatorOfHaplotypeAndComplementForDiploidAlleles(phaseWindow.hetGenotypes, phaseWindow.phasingSiteIndex - 1, 2);
PhasingTable sampleHaps = tabCreator.getNewTable(); PhasingTable sampleHaps = tabCreator.getNewTable();
logger.debug("Number of USED reads [connecting the two positions to be phased] at sites: " + phaseWindow.readsAtHetSites.size()); logger.debug("Number of USED reads [connecting the two positions to be phased] at sites: " + phaseWindow.readsAtHetSites.size());
@ -714,40 +754,44 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
// Update the phasing table based on each of the sub-reads for this sample: // Update the phasing table based on each of the sub-reads for this sample:
MaxHaplotypeAndQuality prevMaxHapAndQual = null; MaxHaplotypeAndQuality prevMaxHapAndQual = null;
int numHighQualityIterations = 0; int numHighQualityIterations = 0;
int numInconsistentIterations = 0; int numInconsistentIterations = 0;
double totalAbsPQchange = 0;
int numPQchangesObserved = 0;
for (Map.Entry<String, Read> nameToReads : phaseWindow.readsAtHetSites.entrySet()) { for (Map.Entry<String, Read> nameToReads : phaseWindow.readsAtHetSites.entrySet()) {
Read rd = nameToReads.getValue(); Read rd = nameToReads.getValue();
logger.debug("rd = " + rd + "\tname = " + nameToReads.getKey());
logger.debug("rd = " + rd + "\tname = " + nameToReads.getKey() + (rd.isGapped() ? "\tGAPPED" : ""));
for (PhasingTable.PhasingTableEntry pte : sampleHaps) { for (PhasingTable.PhasingTableEntry pte : sampleHaps) {
PhasingScore score = rd.matchHaplotypeClassScore(pte.getHaplotypeClass()); PhasingScore score = rd.matchHaplotypeClassScore(pte.getHaplotypeClass());
pte.getScore().integrateReadScore(score); pte.getScore().integrateReadScore(score);
logger.debug("score(" + rd + ", " + pte.getHaplotypeClass() + ") = " + score); logger.debug("score(" + rd + ", " + pte.getHaplotypeClass() + ") = " + score);
} }
// // Check the current best haplotype assignment and compare it to the previous one:
//
//
// Check the current best haplotype assignment:
MaxHaplotypeAndQuality curMaxHapAndQual = new MaxHaplotypeAndQuality(sampleHaps, false); MaxHaplotypeAndQuality curMaxHapAndQual = new MaxHaplotypeAndQuality(sampleHaps, false);
logger.debug("CUR MAX hap:\t" + curMaxHapAndQual.maxEntry.getHaplotypeClass() + "\tcurPhaseQuality:\t" + curMaxHapAndQual.phaseQuality); logger.debug("CUR MAX hap:\t" + curMaxHapAndQual.maxEntry.getHaplotypeClass() + "\tcurPhaseQuality:\t" + curMaxHapAndQual.phaseQuality);
if (prevMaxHapAndQual != null && passesPhasingThreshold(prevMaxHapAndQual.phaseQuality)) { if (prevMaxHapAndQual != null) {
numHighQualityIterations++; double changeInPQ = prevMaxHapAndQual.phaseQuality - curMaxHapAndQual.phaseQuality;
if (!curMaxHapAndQual.maxEntry.getHaplotypeClass().getRepresentative().equals(prevMaxHapAndQual.maxEntry.getHaplotypeClass().getRepresentative()) || // switched phase
curMaxHapAndQual.phaseQuality < 0.9 * prevMaxHapAndQual.phaseQuality) { // a 10% ["significant"] decrease in PQ if (passesPhasingThreshold(prevMaxHapAndQual.phaseQuality)) {
logger.debug("Inconsistent read found!"); numHighQualityIterations++;
numInconsistentIterations++; if (!curMaxHapAndQual.hasSameRepresentativeHaplotype(prevMaxHapAndQual) || // switched phase
(numPQchangesObserved > 0 && changeInPQ > FRACTION_OF_MEAN_PQ_CHANGES * (totalAbsPQchange / numPQchangesObserved))) { // a "significant" decrease in PQ
logger.debug("Inconsistent read found!");
numInconsistentIterations++;
}
} }
totalAbsPQchange += Math.abs(changeInPQ);
numPQchangesObserved++;
} }
prevMaxHapAndQual = curMaxHapAndQual; prevMaxHapAndQual = curMaxHapAndQual;
//
//
//
} }
logger.debug("\nPhasing table [AFTER CALCULATION]:\n" + sampleHaps + "\n"); logger.debug("\nPhasing table [AFTER CALCULATION]:\n" + sampleHaps + "\n");
MaxHaplotypeAndQuality maxHapQual = new MaxHaplotypeAndQuality(sampleHaps, true); MaxHaplotypeAndQuality maxHapQual = new MaxHaplotypeAndQuality(sampleHaps, true);
double posteriorProb = maxHapQual.maxEntry.getScore().getValue(); double posteriorProb = maxHapQual.maxEntry.getScore().getValue();
@ -755,22 +799,11 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
logger.debug("MAX hap:\t" + maxHapQual.maxEntry.getHaplotypeClass() + "\tposteriorProb:\t" + posteriorProb + "\tphaseQuality:\t" + maxHapQual.phaseQuality); logger.debug("MAX hap:\t" + maxHapQual.maxEntry.getHaplotypeClass() + "\tposteriorProb:\t" + posteriorProb + "\tphaseQuality:\t" + maxHapQual.phaseQuality);
logger.debug("Number of used reads " + phaseWindow.readsAtHetSites.size() + "; number of high PQ iterations " + numHighQualityIterations + "; number of inconsistencies " + numInconsistentIterations); logger.debug("Number of used reads " + phaseWindow.readsAtHetSites.size() + "; number of high PQ iterations " + numHighQualityIterations + "; number of inconsistencies " + numInconsistentIterations);
double repPhaseQuality = maxHapQual.phaseQuality; boolean phasingContainsInconsistencies = false;
if (numInconsistentIterations / (double) numHighQualityIterations > MAX_FRACTION_OF_INCONSISTENT_READS)
phasingContainsInconsistencies = true;
// return new PhaseResult(maxHapQual.maxEntry.getHaplotypeClass().getRepresentative(), maxHapQual.phaseQuality, phasingContainsInconsistencies);
//
if (numInconsistentIterations / (double) numHighQualityIterations >= 0.1) {
//
// ????
// NEED TO CHANGE phaseSite() to always output PQ field, EVEN if it's LESS THAN threshold ??????
// ????
//
repPhaseQuality *= -1;
}
//
//
return new PhaseResult(maxHapQual.maxEntry.getHaplotypeClass().getRepresentative(), repPhaseQuality);
} }
private static class MaxHaplotypeAndQuality { private static class MaxHaplotypeAndQuality {
@ -803,22 +836,30 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
} }
return -10.0 * (sumErrorProbs.getLog10Value()); return -10.0 * (sumErrorProbs.getLog10Value());
} }
public boolean hasSameRepresentativeHaplotype(MaxHaplotypeAndQuality that) {
return this.getRepresentative().equals(that.getRepresentative());
}
private Haplotype getRepresentative() {
return maxEntry.getHaplotypeClass().getRepresentative();
}
} }
/* /*
Ensure that curBiall is phased relative to prevBiall as specified by hap. Ensure that curAllelePair is phased relative to prevAllelePair as specified by hap.
*/ */
public static void ensurePhasing(SNPallelePair curBiall, SNPallelePair prevBiall, Haplotype hap) { public static void ensurePhasing(SNPallelePair curAllelePair, SNPallelePair prevAllelePair, Haplotype hap) {
if (hap.size() < 2) if (hap.size() < 2)
throw new ReviewedStingException("LOGICAL ERROR: Only considering haplotypes of length > 2!"); throw new ReviewedStingException("LOGICAL ERROR: Only considering haplotypes of length > 2!");
byte prevBase = hap.getBase(0); // The 1st base in the haplotype byte prevBase = hap.getBase(0); // The 1st base in the haplotype
byte curBase = hap.getBase(1); // The 2nd base in the haplotype byte curBase = hap.getBase(1); // The 2nd base in the haplotype
boolean chosePrevTopChrom = prevBiall.matchesTopBase(prevBase); boolean chosePrevTopChrom = prevAllelePair.matchesTopBase(prevBase);
boolean choseCurTopChrom = curBiall.matchesTopBase(curBase); boolean choseCurTopChrom = curAllelePair.matchesTopBase(curBase);
if (chosePrevTopChrom != choseCurTopChrom) if (chosePrevTopChrom != choseCurTopChrom)
curBiall.swapAlleles(); curAllelePair.swapAlleles();
} }
private boolean isInWindowRange(VariantContext vc1, VariantContext vc2) { private boolean isInWindowRange(VariantContext vc1, VariantContext vc2) {
@ -1002,7 +1043,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
private Map<String, Genotype> genotypes; private Map<String, Genotype> genotypes;
private double negLog10PError; private double negLog10PError;
private Set<String> filters; private Set<String> filters;
private Map<String, ?> attributes; private Map<String, Object> attributes;
public UnfinishedVariantContext(VariantContext vc) { public UnfinishedVariantContext(VariantContext vc) {
this.name = vc.getName(); this.name = vc.getName();
@ -1012,8 +1053,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
this.alleles = vc.getAlleles(); this.alleles = vc.getAlleles();
this.genotypes = new HashMap<String, Genotype>(vc.getGenotypes()); // since vc.getGenotypes() is unmodifiable this.genotypes = new HashMap<String, Genotype>(vc.getGenotypes()); // since vc.getGenotypes() is unmodifiable
this.negLog10PError = vc.getNegLog10PError(); this.negLog10PError = vc.getNegLog10PError();
this.filters = vc.getFilters(); this.filters = new HashSet<String>(vc.getFilters());
this.attributes = vc.getAttributes(); this.attributes = new HashMap<String, Object>(vc.getAttributes());
} }
public VariantContext toVariantContext() { public VariantContext toVariantContext() {
@ -1031,6 +1072,13 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
public void setGenotype(String sample, Genotype newGt) { public void setGenotype(String sample, Genotype newGt) {
genotypes.put(sample, newGt); genotypes.put(sample, newGt);
} }
public void setPhasingInconsistent(boolean addAsFilter) {
attributes.put(PHASING_INCONSISTENT_KEY, true);
if (addAsFilter)
filters.add(PHASING_INCONSISTENT_KEY);
}
} }
private static class ReadBase { private static class ReadBase {
@ -1128,17 +1176,17 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
} }
} }
private static class BiallelicComplementHaplotypeTableCreator extends HaplotypeTableCreator { private static class TableCreatorOfHaplotypeAndComplementForDiploidAlleles extends HaplotypeTableCreator {
private SNPallelePair[] bialleleSNPs; private SNPallelePair[] SNPallelePairs;
private int startIndex; private int startIndex;
private int marginalizeLength; private int marginalizeLength;
public BiallelicComplementHaplotypeTableCreator(Genotype[] hetGenotypes, int startIndex, int marginalizeLength) { public TableCreatorOfHaplotypeAndComplementForDiploidAlleles(Genotype[] hetGenotypes, int startIndex, int marginalizeLength) {
super(hetGenotypes); super(hetGenotypes);
this.bialleleSNPs = new SNPallelePair[genotypes.length]; this.SNPallelePairs = new SNPallelePair[genotypes.length];
for (int i = 0; i < genotypes.length; i++) for (int i = 0; i < genotypes.length; i++)
bialleleSNPs[i] = new SNPallelePair(genotypes[i]); SNPallelePairs[i] = new SNPallelePair(genotypes[i]);
this.startIndex = startIndex; this.startIndex = startIndex;
this.marginalizeLength = marginalizeLength; this.marginalizeLength = marginalizeLength;
@ -1149,7 +1197,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
PhasingTable table = new PhasingTable(); PhasingTable table = new PhasingTable();
for (Haplotype hap : getAllHaplotypes()) { for (Haplotype hap : getAllHaplotypes()) {
if (bialleleSNPs[startIndex].matchesTopBase(hap.getBase(startIndex))) { if (SNPallelePairs[startIndex].matchesTopBase(hap.getBase(startIndex))) {
/* hap is the "representative" haplotype [DEFINED here to be /* hap is the "representative" haplotype [DEFINED here to be
the one with the top base at the startIndex position. the one with the top base at the startIndex position.
NOTE that it is CRITICAL that this definition be consistent with the representative sub-haplotypes defined below!] NOTE that it is CRITICAL that this definition be consistent with the representative sub-haplotypes defined below!]
@ -1169,14 +1217,14 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
} }
private Haplotype complement(Haplotype hap) { private Haplotype complement(Haplotype hap) {
int numSites = bialleleSNPs.length; int numSites = SNPallelePairs.length;
if (hap.size() != numSites) if (hap.size() != numSites)
throw new ReviewedStingException("INTERNAL ERROR: hap.size() != numSites"); throw new ReviewedStingException("INTERNAL ERROR: hap.size() != numSites");
// Take the other base at EACH position of the Haplotype: // Take the other base at EACH position of the Haplotype:
byte[] complementBases = new byte[numSites]; byte[] complementBases = new byte[numSites];
for (int i = 0; i < numSites; i++) for (int i = 0; i < numSites; i++)
complementBases[i] = bialleleSNPs[i].getOtherBase(hap.getBase(i)); complementBases[i] = SNPallelePairs[i].getOtherBase(hap.getBase(i));
return new Haplotype(complementBases); return new Haplotype(complementBases);
} }
@ -1268,10 +1316,12 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
private static class PhaseResult { private static class PhaseResult {
public Haplotype haplotype; public Haplotype haplotype;
public double phaseQuality; public double phaseQuality;
public boolean phasingContainsInconsistencies;
public PhaseResult(Haplotype haplotype, double phaseQuality) { public PhaseResult(Haplotype haplotype, double phaseQuality, boolean phasingContainsInconsistencies) {
this.haplotype = haplotype; this.haplotype = haplotype;
this.phaseQuality = phaseQuality; this.phaseQuality = phaseQuality;
this.phasingContainsInconsistencies = phasingContainsInconsistencies;
} }
} }
@ -1425,7 +1475,7 @@ class HaplotypeClass implements Iterable<Haplotype> {
if (isFirst) if (isFirst)
isFirst = false; isFirst = false;
else else
sb.append("|"); sb.append(" + ");
sb.append(h); sb.append(h);
} }
@ -1488,31 +1538,6 @@ class Read extends BaseArray {
return score; return score;
} }
private enum ReadStage {
BEFORE_BASES, BASES_1, NO_BASES, BASES_2
}
public boolean isGapped() {
ReadStage s = ReadStage.BEFORE_BASES;
for (int i = 0; i < bases.length; i++) {
if (getBase(i) != null) { // has a base at i
if (s == ReadStage.BEFORE_BASES)
s = ReadStage.BASES_1;
else if (s == ReadStage.NO_BASES) {
s = ReadStage.BASES_2;
break;
}
}
else { // no base at i
if (s == ReadStage.BASES_1)
s = ReadStage.NO_BASES;
}
}
return (s == ReadStage.BASES_2);
}
public int[] getNonNullIndices() { public int[] getNonNullIndices() {
List<Integer> nonNull = new LinkedList<Integer>(); List<Integer> nonNull = new LinkedList<Integer>();
for (int i = 0; i < bases.length; i++) { for (int i = 0; i < bases.length; i++) {

View File

@ -205,7 +205,7 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
} }
public static Double getPQ(Genotype gt) { public static Double getPQ(Genotype gt) {
Object pq = gt.getAttributes().get("PQ"); Object pq = gt.getAttributes().get(ReadBackedPhasingWalker.PQ_KEY);
if (pq == null) if (pq == null)
return null; return null;