Added basic version of phasing evaluation: GenotypePhasingEvaluator

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4196 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
fromer 2010-09-02 22:09:50 +00:00
parent fd5970fdd4
commit a1cf3398a5
4 changed files with 596 additions and 94 deletions

View File

@ -0,0 +1,431 @@
package org.broadinstitute.sting.gatk.walkers.varianteval;
import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.playground.gatk.walkers.Biallele;
import org.broadinstitute.sting.playground.gatk.walkers.BialleleSNP;
import org.broadinstitute.sting.playground.gatk.walkers.ReadBackedPhasingWalker;
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
import org.broadinstitute.sting.playground.utils.report.utils.TableType;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.*;
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
@Analysis(name = "Genotype Phasing Evaluation", description = "Evaluates the phasing of genotypes in different tracks")
public class GenotypePhasingEvaluator extends VariantEvaluator {
protected final static Logger logger = Logger.getLogger(GenotypePhasingEvaluator.class);
//
//@Argument(fullName = "phaseQualityThresh", shortName = "phaseThresh", doc = "The minimum phasing quality score required to consider eval track phasing; [default:20.0]", required = false)
//
protected Double phaseQualityThresh = 20.0; // PQ = 20.0 <=> P(error) = 10^(-20/10) = 0.01, P(correct) = 0.99
//
//
private VariantEvalWalker.EvaluationContext group = null;
// a mapping from sample to stats
@DataPoint(name = "samples", description = "the phasing statistics for each sample")
SamplePhasingStatistics samplePhasingStatistics = null;
SamplePreviousGenotypes samplePrevGenotypes = null;
//
//
//
HashMap<String, Genotype> prevCompGenotypes = new HashMap<String, Genotype>();
HashMap<String, Genotype> prevEvalGenotypes = new HashMap<String, Genotype>();
//
//
//
public GenotypePhasingEvaluator(VariantEvalWalker parent) {
super(parent);
this.samplePhasingStatistics = new SamplePhasingStatistics(phaseQualityThresh);
this.samplePrevGenotypes = new SamplePreviousGenotypes();
}
public String getName() {
return "GenotypePhasingEvaluator";
}
public int getComparisonOrder() {
return 2; // we only need to see pairs of (comp, eval)
}
public boolean enabled() {
return true;
}
public String toString() {
return getName() + ": <table>";
}
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) {
String interesting = null;
if (ref == null)
return interesting;
GenomeLoc curLocus = ref.getLocus();
this.group = group;
logger.debug("update2() locus: " + curLocus);
logger.debug("comp = " + comp + " eval = " + eval);
if (comp == null || eval == null || comp.isFiltered()) // ignore existence of filtered variants in comp [NOTE that eval will be checked BOTH filtered and unfiltered (by VariantEvalWalker)]
return interesting;
if (!comp.isBiallelic() || !eval.isBiallelic() || !comp.getAlternateAllele(0).equals(eval.getAlternateAllele(0))) // these are not the same biallelic variants
return interesting;
Map<String, Genotype> compSampGenotypes = comp.getGenotypes();
Map<String, Genotype> evalSampGenotypes = eval.getGenotypes();
Set<String> allSamples = new HashSet<String>();
allSamples.addAll(comp.getSampleNames());
allSamples.addAll(eval.getSampleNames());
for (String samp : allSamples) {
logger.debug("sample = " + samp);
Genotype compSampGt = compSampGenotypes.get(samp);
Genotype evalSampGt = evalSampGenotypes.get(samp);
if (compSampGt != null && evalSampGt != null && compSampGt.isHet() && evalSampGt.isHet()) {
CompEvalGenotypes prevCompAndEval = samplePrevGenotypes.get(samp);
if (prevCompAndEval != null && !prevCompAndEval.getLocus().onSameContig(curLocus)) // exclude curLocus if it is "phased" relative to a different chromosome
prevCompAndEval = null;
// Replace the previous with current:
samplePrevGenotypes.put(samp, curLocus, compSampGt, evalSampGt);
if (prevCompAndEval != null) {
logger.debug("Potentially phaseable locus: " + curLocus);
Genotype prevCompSampGt = prevCompAndEval.getCompGenotpye();
Genotype prevEvalSampGt = prevCompAndEval.getEvalGenotype();
PhaseStats ps = samplePhasingStatistics.ensureSampleStats(samp);
boolean compSampIsPhased = genotypesArePhasedAboveThreshold(compSampGt);
boolean evalSampIsPhased = genotypesArePhasedAboveThreshold(evalSampGt);
if (compSampIsPhased || evalSampIsPhased) {
if (!evalSampIsPhased)
ps.onlyCompPhased++;
else if (!compSampIsPhased)
ps.onlyEvalPhased++;
else {
Biallele prevCompBiallele = new Biallele(prevCompSampGt);
Biallele compBiallele = new Biallele(compSampGt);
Biallele prevEvalBiallele = new Biallele(prevEvalSampGt);
Biallele evalBiallele = new Biallele(evalSampGt);
boolean topsMatch = (prevCompBiallele.getTopAllele().equals(prevEvalBiallele.getTopAllele()) && compBiallele.getTopAllele().equals(evalBiallele.getTopAllele()));
boolean topMatchesBottom = (prevCompBiallele.getTopAllele().equals(prevEvalBiallele.getBottomAllele()) && compBiallele.getTopAllele().equals(evalBiallele.getBottomAllele()));
if (topsMatch || topMatchesBottom) {
ps.phasesAgree++;
}
else {
ps.phasesDisagree++;
}
}
}
else {
ps.neitherPhased++;
}
}
}
else { // This site (either non-existent or homozygous for at least one of them) breaks the phase of the next position:
samplePrevGenotypes.put(samp, null);
}
}
logger.debug("\n" + samplePhasingStatistics + "\n");
return interesting;
}
public boolean genotypesArePhasedAboveThreshold(Genotype gt) {
if (!gt.genotypesArePhased())
return false;
Object pq = gt.getAttributes().get("PQ");
return (pq == null || (new Double(pq.toString()) >= phaseQualityThresh));
}
//
//
//
public String update3(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) {
this.group = group;
String interesting = null;
if (comp == null && eval == null)
return interesting;
if (comp != null && comp.isFiltered()) // ignore existence of filtered variants in comp [NOTE that eval will be checked BOTH filtered and unfiltered (by VariantEvalWalker)]
return interesting;
Set<String> allSamples = new HashSet<String>();
boolean compIsUsable = (comp != null && comp.isBiallelic());
boolean evalIsUsable = (eval != null && eval.isBiallelic());
if (compIsUsable && evalIsUsable && !comp.getAlternateAllele(0).equals(eval.getAlternateAllele(0))) { // these are not the same biallelic variants
compIsUsable = false;
evalIsUsable = false;
}
Map<String, Genotype> compSampGenotypes = null;
if (compIsUsable) {
compSampGenotypes = comp.getGenotypes();
allSamples.addAll(comp.getSampleNames());
}
Map<String, Genotype> evalSampGenotypes = null;
if (evalIsUsable) {
evalSampGenotypes = eval.getGenotypes();
allSamples.addAll(eval.getSampleNames());
}
// Note that missing or incompatible VariantContext (e.g., comp) might break the phase of the other VariantContext (e.g., eval):
for (String samp : allSamples) {
Genotype compSampGt = null;
if (compSampGenotypes != null)
compSampGt = compSampGenotypes.get(samp);
boolean compIsHetForSample = (compIsUsable && compSampGt != null && compSampGt.isHet());
Genotype evalSampGt = null;
if (evalSampGenotypes != null)
evalSampGt = evalSampGenotypes.get(samp);
boolean evalIsHetForSample = (evalIsUsable && evalSampGt != null && evalSampGt.isHet());
// Only compare phasing at het positions after het positions:
if (compIsHetForSample && evalIsHetForSample) {
// Put het genotypes into previous Genotypes:
prevCompGenotypes.put(samp, compSampGt);
prevEvalGenotypes.put(samp, evalSampGt);
}
else { // at least one is not a biallelic het site for samp:
breakPhaseForUnphasedOrHomozygousSite(evalSampGt, samp, prevEvalGenotypes);
breakPhaseForUnphasedOrHomozygousSite(compSampGt, samp, prevCompGenotypes);
}
}
/*
Biallele compBiallele = new Biallele();
Biallele evalBiallele = null;
*/
//interesting = determineStats(eval, validation);
return interesting; // we don't capture any interesting sites
}
//
//
//
private void breakPhaseForUnphasedOrHomozygousSite(Genotype gt, String samp, HashMap<String, Genotype> prevVcGenotypes) {
if (gt == null)
return;
// Break the phase of gt [i.e., set the previous Genotype to null] since it is unphased (or homozygous):
if (!genotypesArePhasedAboveThreshold(gt) || gt.isHom()) // otherwise, a phased het site lets the phase pass through
prevVcGenotypes.put(samp, null);
}
public void finalizeEvaluation() {
}
}
class CompEvalGenotypes {
private GenomeLoc loc;
private Genotype compGt;
private Genotype evalGt;
public CompEvalGenotypes(GenomeLoc loc, Genotype compGt, Genotype evalGt) {
this.loc = loc;
this.compGt = compGt;
this.evalGt = evalGt;
}
public GenomeLoc getLocus() {
return loc;
}
public Genotype getCompGenotpye() {
return compGt;
}
public Genotype getEvalGenotype() {
return evalGt;
}
public void setCompGenotype(Genotype compGt) {
this.compGt = compGt;
}
public void setEvalGenotype(Genotype evalGt) {
this.evalGt = evalGt;
}
}
class SamplePreviousGenotypes {
private HashMap<String, CompEvalGenotypes> sampleGenotypes = null;
public SamplePreviousGenotypes() {
this.sampleGenotypes = new HashMap<String, CompEvalGenotypes>();
}
public CompEvalGenotypes get(String sample) {
return sampleGenotypes.get(sample);
}
public void put(String sample, CompEvalGenotypes compEvalGts) {
sampleGenotypes.put(sample, compEvalGts);
}
public void put(String sample, GenomeLoc locus, Genotype compGt, Genotype evalGt) {
sampleGenotypes.put(sample, new CompEvalGenotypes(locus, compGt, evalGt));
}
}
class PhaseStats {
public int neitherPhased;
public int onlyCompPhased;
public int onlyEvalPhased;
public int phasesAgree;
public int phasesDisagree;
public PhaseStats() {
this.neitherPhased = 0;
this.onlyCompPhased = 0;
this.onlyEvalPhased = 0;
this.phasesAgree = 0;
this.phasesDisagree = 0;
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Neither phased: " + neitherPhased + "\tOnly Comp: " + onlyCompPhased + "\tOnly Eval: " + onlyEvalPhased + "\tSame phase: " + phasesAgree + "\tOpposite phase: " + phasesDisagree);
return sb.toString();
}
public static String[] getFieldNamesArray() {
return new String[]{"total", "neither", "only_comp", "only_eval", "both", "match", "switch", "switch_rate"};
}
public Object getField(int index) {
switch (index) {
case (0):
return (neitherPhased + onlyCompPhased + onlyEvalPhased + phasesAgree + phasesDisagree);
case (1):
return neitherPhased;
case (2):
return onlyCompPhased;
case (3):
return onlyEvalPhased;
case (4):
return (phasesAgree + phasesDisagree);
case (5):
return phasesAgree;
case (6):
return phasesDisagree;
case (7):
return ((phasesDisagree == 0) ? 0 : ((double) phasesDisagree) / (phasesAgree + phasesDisagree));
default:
return -1;
}
}
}
/**
* a table of sample names to genotype phasing statistics
*/
class SamplePhasingStatistics implements TableType {
private HashMap<String, PhaseStats> sampleStats = null;
private double phaseQualityThresh;
public SamplePhasingStatistics(double phaseQualityThresh) {
this.sampleStats = new HashMap<String, PhaseStats>();
this.phaseQualityThresh = phaseQualityThresh;
}
public PhaseStats ensureSampleStats(String samp) {
PhaseStats ps = sampleStats.get(samp);
if (ps == null) {
ps = new PhaseStats();
sampleStats.put(samp, ps);
}
return ps;
}
/**
* @return one row per sample
*/
public String[] getRowKeys() {
return sampleStats.keySet().toArray(new String[sampleStats.size()]);
}
/**
* get the column keys
*
* @return a list of objects, in this case strings, that are the column names
*/
public String[] getColumnKeys() {
return PhaseStats.getFieldNamesArray();
}
public Object getCell(int x, int y) {
String[] rowKeys = getRowKeys();
PhaseStats ps = sampleStats.get(rowKeys[x]);
return ps.getField(y);
}
public String getName() {
return "Sample Phasing Statistics (for PQ >= " + phaseQualityThresh + ")";
}
public String toString() {
StringBuilder sb = new StringBuilder();
for (Map.Entry<String, PhaseStats> sampPhaseStatsEnt : sampleStats.entrySet()) {
String sample = sampPhaseStatsEnt.getKey();
PhaseStats ps = sampPhaseStatsEnt.getValue();
sb.append(sample + "\t" + ps);
}
return sb.toString();
}
}

View File

@ -0,0 +1,71 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.playground.gatk.walkers;
import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broadinstitute.sting.utils.StingException;
import java.util.ArrayList;
import java.util.List;
public class Biallele {
private Allele top;
private Allele bottom;
public Biallele(Genotype gt) {
if (gt.getPloidy() != 2)
throw new StingException("Biallele must have ploidy of 2!");
this.top = gt.getAllele(0);
this.bottom = gt.getAllele(1);
}
public Allele getTopAllele() {
return top;
}
public Allele getBottomAllele() {
return bottom;
}
public void swapAlleles() {
Allele tmp = top;
top = bottom;
bottom = tmp;
}
public List<Allele> getAllelesAsList() {
List<Allele> allList = new ArrayList<Allele>(2);
allList.add(0, top);
allList.add(1, bottom);
return allList;
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Top:\t" + top.getBaseString() + "\n");
sb.append("Bot:\t" + bottom.getBaseString() + "\n");
return sb.toString();
}
}

View File

@ -0,0 +1,78 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.playground.gatk.walkers;
import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.StingException;
import java.util.ArrayList;
import java.util.List;
public class BialleleSNP extends Biallele {
public BialleleSNP(Genotype gt) {
super(gt);
if (getTopAllele().getBases().length != 1)
throw new StingException("LOGICAL ERROR: BialleleSNP may not contain non-SNP site!");
if (getBottomAllele().getBases().length != 1)
throw new StingException("LOGICAL ERROR: BialleleSNP may not contain non-SNP site!");
}
public byte getTopBase() {
byte[] topBases = getTopAllele().getBases();
return getSingleBase(topBases);
}
public byte getBottomBase() {
byte[] bottomBases = getBottomAllele().getBases();
return getSingleBase(bottomBases);
}
public boolean matchesTopBase(byte base) {
return BaseUtils.basesAreEqual(base, getTopBase());
}
public byte getOtherBase(byte base) {
byte topBase = getTopBase();
byte botBase = getBottomBase();
if (BaseUtils.basesAreEqual(base, topBase))
return botBase;
else if (BaseUtils.basesAreEqual(base, botBase))
return topBase;
else
throw new StingException("LOGICAL ERROR: base MUST match either TOP or BOTTOM!");
}
public static byte getSingleBase(byte[] bases) {
return bases[0];
}
public static byte getSingleBase(Allele all) {
return getSingleBase(all.getBases());
}
}

View File

@ -134,7 +134,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
boolean takeFirstOnly = false; // take as many entries as the VCF file has boolean takeFirstOnly = false; // take as many entries as the VCF file has
for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) { for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) {
boolean processVariant = true; boolean processVariant = true;
if (!vc.isSNP() || !vc.isBiallelic() || vc.isFiltered()) if (!isUnfilteredBiallelicSNP(vc))
processVariant = false; processVariant = false;
VariantAndReads vr = new VariantAndReads(vc, context, processVariant); VariantAndReads vr = new VariantAndReads(vc, context, processVariant);
@ -272,7 +272,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
String samp = sampGtEntry.getKey(); String samp = sampGtEntry.getKey();
Genotype gt = sampGtEntry.getValue(); Genotype gt = sampGtEntry.getValue();
Biallele biall = new Biallele(gt); BialleleSNP biall = new BialleleSNP(gt);
HashMap<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes()); HashMap<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes());
if (gt.isHet()) { if (gt.isHet()) {
@ -331,7 +331,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
PhaseResult pr = phaseSample(samp, sampleWindowVaList, phasingSiteIndex); PhaseResult pr = phaseSample(samp, sampleWindowVaList, phasingSiteIndex);
genotypesArePhased = (pr.phaseQuality >= phaseQualityThresh); genotypesArePhased = (pr.phaseQuality >= phaseQualityThresh);
if (genotypesArePhased) { if (genotypesArePhased) {
Biallele prevBiall = new Biallele(prevGenotype); BialleleSNP prevBiall = new BialleleSNP(prevGenotype);
logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevBiall + "\n"); logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevBiall + "\n");
logger.debug("THE PHASE CHOSEN HERE:\n" + biall + "\n\n"); logger.debug("THE PHASE CHOSEN HERE:\n" + biall + "\n\n");
@ -429,7 +429,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
Ensure that curBiall is phased relative to prevBiall as specified by hap. Ensure that curBiall is phased relative to prevBiall as specified by hap.
*/ */
public static void ensurePhasing(Biallele curBiall, Biallele prevBiall, Haplotype hap) { public static void ensurePhasing(BialleleSNP curBiall, BialleleSNP prevBiall, Haplotype hap) {
if (hap.size() < 2) if (hap.size() < 2)
throw new StingException("LOGICAL ERROR: Only considering haplotypes of length > 2!"); throw new StingException("LOGICAL ERROR: Only considering haplotypes of length > 2!");
@ -466,7 +466,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
byte refBase; byte refBase;
if (!vc.isIndel()) { if (!vc.isIndel()) {
Allele varAllele = vc.getReference(); Allele varAllele = vc.getReference();
refBase = getSingleBase(varAllele); refBase = BialleleSNP.getSingleBase(varAllele);
} }
else { else {
refBase = vc.getReferenceBaseForIndel(); refBase = vc.getReferenceBaseForIndel();
@ -532,92 +532,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
return reads; return reads;
} }
protected static byte getSingleBase(byte[] bases) {
return bases[0];
}
protected static byte getSingleBase(Allele all) {
return getSingleBase(all.getBases());
}
/* /*
Inner classes: Inner classes:
*/ */
private static class Biallele {
public Allele top;
public Allele bottom;
public Biallele(Genotype gt) {
if (gt.getPloidy() != 2)
throw new StingException("Doesn't support phasing for ploidy that is not 2!");
this.top = gt.getAllele(0);
this.bottom = gt.getAllele(1);
}
public void swapAlleles() {
Allele tmp = top;
top = bottom;
bottom = tmp;
}
public List<Allele> getAllelesAsList() {
List<Allele> allList = new ArrayList<Allele>(2);
allList.add(0, top);
allList.add(1, bottom);
return allList;
}
public byte getTopBase() {
byte[] topBases = top.getBases();
if (topBases.length != 1)
throw new StingException("LOGICAL ERROR: should not process non-SNP sites!");
return getSingleBase(topBases);
}
public byte getBottomBase() {
byte[] bottomBases = bottom.getBases();
if (bottomBases.length != 1)
throw new StingException("LOGICAL ERROR: should not process non-SNP sites!");
return getSingleBase(bottomBases);
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Top:\t" + top.getBaseString() + "\n");
sb.append("Bot:\t" + bottom.getBaseString() + "\n");
return sb.toString();
}
public boolean matchesTopBase(byte base) {
boolean matchesTop;
if (BaseUtils.basesAreEqual(base, getTopBase()))
matchesTop = true;
else if (BaseUtils.basesAreEqual(base, getBottomBase()))
matchesTop = false;
else
throw new StingException("LOGICAL ERROR: base MUST match either TOP or BOTTOM!");
return matchesTop;
}
public byte getOtherBase(byte base) {
byte topBase = getTopBase();
byte botBase = getBottomBase();
if (BaseUtils.basesAreEqual(base, topBase))
return botBase;
else if (BaseUtils.basesAreEqual(base, botBase))
return topBase;
else
throw new StingException("LOGICAL ERROR: base MUST match either TOP or BOTTOM!");
}
}
private static class VariantAndReads { private static class VariantAndReads {
public VariantContext variant; public VariantContext variant;
public HashMap<String, ReadBasesAtPosition> sampleReadBases; public HashMap<String, ReadBasesAtPosition> sampleReadBases;
@ -751,7 +669,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
byte[] hapBases = new byte[numSites]; byte[] hapBases = new byte[numSites];
for (int i = 0; i < numSites; i++) { for (int i = 0; i < numSites; i++) {
Allele alleleI = genotypes[i].getAllele(alleleInds[i]); Allele alleleI = genotypes[i].getAllele(alleleInds[i]);
hapBases[i] = getSingleBase(alleleI); hapBases[i] = BialleleSNP.getSingleBase(alleleI);
} }
allHaps.add(new Haplotype(hapBases)); allHaps.add(new Haplotype(hapBases));
} }
@ -784,16 +702,16 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
} }
private static class BiallelicComplementHaplotypeTableCreator extends HaplotypeTableCreator { private static class BiallelicComplementHaplotypeTableCreator extends HaplotypeTableCreator {
private Biallele[] bialleles; private BialleleSNP[] bialleleSNPs;
private int startIndex; private int startIndex;
private int marginalizeLength; private int marginalizeLength;
public BiallelicComplementHaplotypeTableCreator(List<VariantAndReads> vaList, String sample, int startIndex, int marginalizeLength) { public BiallelicComplementHaplotypeTableCreator(List<VariantAndReads> vaList, String sample, int startIndex, int marginalizeLength) {
super(vaList, sample); super(vaList, sample);
this.bialleles = new Biallele[genotypes.length]; this.bialleleSNPs = new BialleleSNP[genotypes.length];
for (int i = 0; i < genotypes.length; i++) for (int i = 0; i < genotypes.length; i++)
bialleles[i] = new Biallele(genotypes[i]); bialleleSNPs[i] = new BialleleSNP(genotypes[i]);
this.startIndex = startIndex; this.startIndex = startIndex;
this.marginalizeLength = marginalizeLength; this.marginalizeLength = marginalizeLength;
@ -804,7 +722,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
PhasingTable table = new PhasingTable(); PhasingTable table = new PhasingTable();
for (Haplotype hap : getAllHaplotypes()) { for (Haplotype hap : getAllHaplotypes()) {
if (bialleles[startIndex].matchesTopBase(hap.getBase(startIndex))) { if (bialleleSNPs[startIndex].matchesTopBase(hap.getBase(startIndex))) {
/* hap is the "representative" haplotype [DEFINED here to be /* hap is the "representative" haplotype [DEFINED here to be
the one with the top base at the startIndex position. the one with the top base at the startIndex position.
NOTE that it is CRITICAL that this definition be consistent with the representative sub-haplotypes defined below!] NOTE that it is CRITICAL that this definition be consistent with the representative sub-haplotypes defined below!]
@ -824,14 +742,14 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
} }
private Haplotype complement(Haplotype hap) { private Haplotype complement(Haplotype hap) {
int numSites = bialleles.length; int numSites = bialleleSNPs.length;
if (hap.size() != numSites) if (hap.size() != numSites)
throw new StingException("INTERNAL ERROR: hap.size() != numSites"); throw new StingException("INTERNAL ERROR: hap.size() != numSites");
// Take the other base at EACH position of the Haplotype: // Take the other base at EACH position of the Haplotype:
byte[] complementBases = new byte[numSites]; byte[] complementBases = new byte[numSites];
for (int i = 0; i < numSites; i++) for (int i = 0; i < numSites; i++)
complementBases[i] = bialleles[i].getOtherBase(hap.getBase(i)); complementBases[i] = bialleleSNPs[i].getOtherBase(hap.getBase(i));
return new Haplotype(complementBases); return new Haplotype(complementBases);
} }
@ -931,6 +849,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
this.numReads = numReads; this.numReads = numReads;
} }
} }
public static boolean isUnfilteredBiallelicSNP(VariantContext vc) {
return (vc.isSNP() && vc.isBiallelic() && !vc.isFiltered());
}
} }