Added basic version of phasing evaluation: GenotypePhasingEvaluator
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4196 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
fd5970fdd4
commit
a1cf3398a5
|
|
@ -0,0 +1,431 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.Biallele;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.BialleleSNP;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.ReadBackedPhasingWalker;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||
import org.broadinstitute.sting.playground.utils.report.utils.TableType;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
@Analysis(name = "Genotype Phasing Evaluation", description = "Evaluates the phasing of genotypes in different tracks")
|
||||
public class GenotypePhasingEvaluator extends VariantEvaluator {
|
||||
protected final static Logger logger = Logger.getLogger(GenotypePhasingEvaluator.class);
|
||||
|
||||
//
|
||||
//@Argument(fullName = "phaseQualityThresh", shortName = "phaseThresh", doc = "The minimum phasing quality score required to consider eval track phasing; [default:20.0]", required = false)
|
||||
//
|
||||
protected Double phaseQualityThresh = 20.0; // PQ = 20.0 <=> P(error) = 10^(-20/10) = 0.01, P(correct) = 0.99
|
||||
//
|
||||
//
|
||||
|
||||
private VariantEvalWalker.EvaluationContext group = null;
|
||||
|
||||
// a mapping from sample to stats
|
||||
@DataPoint(name = "samples", description = "the phasing statistics for each sample")
|
||||
SamplePhasingStatistics samplePhasingStatistics = null;
|
||||
|
||||
SamplePreviousGenotypes samplePrevGenotypes = null;
|
||||
|
||||
//
|
||||
//
|
||||
//
|
||||
HashMap<String, Genotype> prevCompGenotypes = new HashMap<String, Genotype>();
|
||||
HashMap<String, Genotype> prevEvalGenotypes = new HashMap<String, Genotype>();
|
||||
//
|
||||
//
|
||||
//
|
||||
|
||||
public GenotypePhasingEvaluator(VariantEvalWalker parent) {
|
||||
super(parent);
|
||||
this.samplePhasingStatistics = new SamplePhasingStatistics(phaseQualityThresh);
|
||||
this.samplePrevGenotypes = new SamplePreviousGenotypes();
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "GenotypePhasingEvaluator";
|
||||
}
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 2; // we only need to see pairs of (comp, eval)
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return getName() + ": <table>";
|
||||
}
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) {
|
||||
String interesting = null;
|
||||
if (ref == null)
|
||||
return interesting;
|
||||
GenomeLoc curLocus = ref.getLocus();
|
||||
this.group = group;
|
||||
|
||||
logger.debug("update2() locus: " + curLocus);
|
||||
logger.debug("comp = " + comp + " eval = " + eval);
|
||||
|
||||
if (comp == null || eval == null || comp.isFiltered()) // ignore existence of filtered variants in comp [NOTE that eval will be checked BOTH filtered and unfiltered (by VariantEvalWalker)]
|
||||
return interesting;
|
||||
|
||||
if (!comp.isBiallelic() || !eval.isBiallelic() || !comp.getAlternateAllele(0).equals(eval.getAlternateAllele(0))) // these are not the same biallelic variants
|
||||
return interesting;
|
||||
|
||||
Map<String, Genotype> compSampGenotypes = comp.getGenotypes();
|
||||
Map<String, Genotype> evalSampGenotypes = eval.getGenotypes();
|
||||
|
||||
Set<String> allSamples = new HashSet<String>();
|
||||
allSamples.addAll(comp.getSampleNames());
|
||||
allSamples.addAll(eval.getSampleNames());
|
||||
|
||||
for (String samp : allSamples) {
|
||||
logger.debug("sample = " + samp);
|
||||
|
||||
Genotype compSampGt = compSampGenotypes.get(samp);
|
||||
Genotype evalSampGt = evalSampGenotypes.get(samp);
|
||||
if (compSampGt != null && evalSampGt != null && compSampGt.isHet() && evalSampGt.isHet()) {
|
||||
CompEvalGenotypes prevCompAndEval = samplePrevGenotypes.get(samp);
|
||||
if (prevCompAndEval != null && !prevCompAndEval.getLocus().onSameContig(curLocus)) // exclude curLocus if it is "phased" relative to a different chromosome
|
||||
prevCompAndEval = null;
|
||||
|
||||
// Replace the previous with current:
|
||||
samplePrevGenotypes.put(samp, curLocus, compSampGt, evalSampGt);
|
||||
if (prevCompAndEval != null) {
|
||||
logger.debug("Potentially phaseable locus: " + curLocus);
|
||||
Genotype prevCompSampGt = prevCompAndEval.getCompGenotpye();
|
||||
Genotype prevEvalSampGt = prevCompAndEval.getEvalGenotype();
|
||||
|
||||
PhaseStats ps = samplePhasingStatistics.ensureSampleStats(samp);
|
||||
|
||||
boolean compSampIsPhased = genotypesArePhasedAboveThreshold(compSampGt);
|
||||
boolean evalSampIsPhased = genotypesArePhasedAboveThreshold(evalSampGt);
|
||||
if (compSampIsPhased || evalSampIsPhased) {
|
||||
if (!evalSampIsPhased)
|
||||
ps.onlyCompPhased++;
|
||||
else if (!compSampIsPhased)
|
||||
ps.onlyEvalPhased++;
|
||||
else {
|
||||
Biallele prevCompBiallele = new Biallele(prevCompSampGt);
|
||||
Biallele compBiallele = new Biallele(compSampGt);
|
||||
|
||||
Biallele prevEvalBiallele = new Biallele(prevEvalSampGt);
|
||||
Biallele evalBiallele = new Biallele(evalSampGt);
|
||||
|
||||
boolean topsMatch = (prevCompBiallele.getTopAllele().equals(prevEvalBiallele.getTopAllele()) && compBiallele.getTopAllele().equals(evalBiallele.getTopAllele()));
|
||||
boolean topMatchesBottom = (prevCompBiallele.getTopAllele().equals(prevEvalBiallele.getBottomAllele()) && compBiallele.getTopAllele().equals(evalBiallele.getBottomAllele()));
|
||||
|
||||
if (topsMatch || topMatchesBottom) {
|
||||
ps.phasesAgree++;
|
||||
}
|
||||
else {
|
||||
ps.phasesDisagree++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
ps.neitherPhased++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else { // This site (either non-existent or homozygous for at least one of them) breaks the phase of the next position:
|
||||
samplePrevGenotypes.put(samp, null);
|
||||
}
|
||||
}
|
||||
logger.debug("\n" + samplePhasingStatistics + "\n");
|
||||
|
||||
return interesting;
|
||||
}
|
||||
|
||||
public boolean genotypesArePhasedAboveThreshold(Genotype gt) {
|
||||
if (!gt.genotypesArePhased())
|
||||
return false;
|
||||
|
||||
Object pq = gt.getAttributes().get("PQ");
|
||||
return (pq == null || (new Double(pq.toString()) >= phaseQualityThresh));
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
//
|
||||
public String update3(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) {
|
||||
this.group = group;
|
||||
|
||||
String interesting = null;
|
||||
if (comp == null && eval == null)
|
||||
return interesting;
|
||||
|
||||
if (comp != null && comp.isFiltered()) // ignore existence of filtered variants in comp [NOTE that eval will be checked BOTH filtered and unfiltered (by VariantEvalWalker)]
|
||||
return interesting;
|
||||
|
||||
Set<String> allSamples = new HashSet<String>();
|
||||
|
||||
boolean compIsUsable = (comp != null && comp.isBiallelic());
|
||||
boolean evalIsUsable = (eval != null && eval.isBiallelic());
|
||||
if (compIsUsable && evalIsUsable && !comp.getAlternateAllele(0).equals(eval.getAlternateAllele(0))) { // these are not the same biallelic variants
|
||||
compIsUsable = false;
|
||||
evalIsUsable = false;
|
||||
}
|
||||
|
||||
Map<String, Genotype> compSampGenotypes = null;
|
||||
if (compIsUsable) {
|
||||
compSampGenotypes = comp.getGenotypes();
|
||||
allSamples.addAll(comp.getSampleNames());
|
||||
}
|
||||
|
||||
Map<String, Genotype> evalSampGenotypes = null;
|
||||
if (evalIsUsable) {
|
||||
evalSampGenotypes = eval.getGenotypes();
|
||||
allSamples.addAll(eval.getSampleNames());
|
||||
}
|
||||
|
||||
// Note that missing or incompatible VariantContext (e.g., comp) might break the phase of the other VariantContext (e.g., eval):
|
||||
for (String samp : allSamples) {
|
||||
Genotype compSampGt = null;
|
||||
if (compSampGenotypes != null)
|
||||
compSampGt = compSampGenotypes.get(samp);
|
||||
boolean compIsHetForSample = (compIsUsable && compSampGt != null && compSampGt.isHet());
|
||||
|
||||
Genotype evalSampGt = null;
|
||||
if (evalSampGenotypes != null)
|
||||
evalSampGt = evalSampGenotypes.get(samp);
|
||||
boolean evalIsHetForSample = (evalIsUsable && evalSampGt != null && evalSampGt.isHet());
|
||||
|
||||
// Only compare phasing at het positions after het positions:
|
||||
if (compIsHetForSample && evalIsHetForSample) {
|
||||
|
||||
|
||||
// Put het genotypes into previous Genotypes:
|
||||
prevCompGenotypes.put(samp, compSampGt);
|
||||
prevEvalGenotypes.put(samp, evalSampGt);
|
||||
}
|
||||
else { // at least one is not a biallelic het site for samp:
|
||||
breakPhaseForUnphasedOrHomozygousSite(evalSampGt, samp, prevEvalGenotypes);
|
||||
breakPhaseForUnphasedOrHomozygousSite(compSampGt, samp, prevCompGenotypes);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Biallele compBiallele = new Biallele();
|
||||
Biallele evalBiallele = null;
|
||||
*/
|
||||
|
||||
//interesting = determineStats(eval, validation);
|
||||
|
||||
return interesting; // we don't capture any interesting sites
|
||||
}
|
||||
//
|
||||
//
|
||||
//
|
||||
|
||||
private void breakPhaseForUnphasedOrHomozygousSite(Genotype gt, String samp, HashMap<String, Genotype> prevVcGenotypes) {
|
||||
if (gt == null)
|
||||
return;
|
||||
|
||||
// Break the phase of gt [i.e., set the previous Genotype to null] since it is unphased (or homozygous):
|
||||
if (!genotypesArePhasedAboveThreshold(gt) || gt.isHom()) // otherwise, a phased het site lets the phase pass through
|
||||
prevVcGenotypes.put(samp, null);
|
||||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
class CompEvalGenotypes {
|
||||
private GenomeLoc loc;
|
||||
private Genotype compGt;
|
||||
private Genotype evalGt;
|
||||
|
||||
public CompEvalGenotypes(GenomeLoc loc, Genotype compGt, Genotype evalGt) {
|
||||
this.loc = loc;
|
||||
this.compGt = compGt;
|
||||
this.evalGt = evalGt;
|
||||
}
|
||||
|
||||
public GenomeLoc getLocus() {
|
||||
return loc;
|
||||
}
|
||||
|
||||
public Genotype getCompGenotpye() {
|
||||
return compGt;
|
||||
}
|
||||
public Genotype getEvalGenotype() {
|
||||
return evalGt;
|
||||
}
|
||||
|
||||
public void setCompGenotype(Genotype compGt) {
|
||||
this.compGt = compGt;
|
||||
}
|
||||
|
||||
public void setEvalGenotype(Genotype evalGt) {
|
||||
this.evalGt = evalGt;
|
||||
}
|
||||
}
|
||||
|
||||
class SamplePreviousGenotypes {
|
||||
private HashMap<String, CompEvalGenotypes> sampleGenotypes = null;
|
||||
|
||||
public SamplePreviousGenotypes() {
|
||||
this.sampleGenotypes = new HashMap<String, CompEvalGenotypes>();
|
||||
}
|
||||
|
||||
public CompEvalGenotypes get(String sample) {
|
||||
return sampleGenotypes.get(sample);
|
||||
}
|
||||
|
||||
public void put(String sample, CompEvalGenotypes compEvalGts) {
|
||||
sampleGenotypes.put(sample, compEvalGts);
|
||||
}
|
||||
|
||||
public void put(String sample, GenomeLoc locus, Genotype compGt, Genotype evalGt) {
|
||||
sampleGenotypes.put(sample, new CompEvalGenotypes(locus, compGt, evalGt));
|
||||
}
|
||||
}
|
||||
|
||||
class PhaseStats {
|
||||
public int neitherPhased;
|
||||
public int onlyCompPhased;
|
||||
public int onlyEvalPhased;
|
||||
public int phasesAgree;
|
||||
public int phasesDisagree;
|
||||
|
||||
public PhaseStats() {
|
||||
this.neitherPhased = 0;
|
||||
this.onlyCompPhased = 0;
|
||||
this.onlyEvalPhased = 0;
|
||||
this.phasesAgree = 0;
|
||||
this.phasesDisagree = 0;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("Neither phased: " + neitherPhased + "\tOnly Comp: " + onlyCompPhased + "\tOnly Eval: " + onlyEvalPhased + "\tSame phase: " + phasesAgree + "\tOpposite phase: " + phasesDisagree);
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static String[] getFieldNamesArray() {
|
||||
return new String[]{"total", "neither", "only_comp", "only_eval", "both", "match", "switch", "switch_rate"};
|
||||
}
|
||||
|
||||
public Object getField(int index) {
|
||||
switch (index) {
|
||||
case (0):
|
||||
return (neitherPhased + onlyCompPhased + onlyEvalPhased + phasesAgree + phasesDisagree);
|
||||
case (1):
|
||||
return neitherPhased;
|
||||
case (2):
|
||||
return onlyCompPhased;
|
||||
case (3):
|
||||
return onlyEvalPhased;
|
||||
case (4):
|
||||
return (phasesAgree + phasesDisagree);
|
||||
case (5):
|
||||
return phasesAgree;
|
||||
case (6):
|
||||
return phasesDisagree;
|
||||
case (7):
|
||||
return ((phasesDisagree == 0) ? 0 : ((double) phasesDisagree) / (phasesAgree + phasesDisagree));
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* a table of sample names to genotype phasing statistics
|
||||
*/
|
||||
class SamplePhasingStatistics implements TableType {
|
||||
private HashMap<String, PhaseStats> sampleStats = null;
|
||||
private double phaseQualityThresh;
|
||||
|
||||
public SamplePhasingStatistics(double phaseQualityThresh) {
|
||||
this.sampleStats = new HashMap<String, PhaseStats>();
|
||||
this.phaseQualityThresh = phaseQualityThresh;
|
||||
}
|
||||
|
||||
public PhaseStats ensureSampleStats(String samp) {
|
||||
PhaseStats ps = sampleStats.get(samp);
|
||||
if (ps == null) {
|
||||
ps = new PhaseStats();
|
||||
sampleStats.put(samp, ps);
|
||||
}
|
||||
return ps;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return one row per sample
|
||||
*/
|
||||
public String[] getRowKeys() {
|
||||
return sampleStats.keySet().toArray(new String[sampleStats.size()]);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the column keys
|
||||
*
|
||||
* @return a list of objects, in this case strings, that are the column names
|
||||
*/
|
||||
public String[] getColumnKeys() {
|
||||
return PhaseStats.getFieldNamesArray();
|
||||
}
|
||||
|
||||
public Object getCell(int x, int y) {
|
||||
String[] rowKeys = getRowKeys();
|
||||
PhaseStats ps = sampleStats.get(rowKeys[x]);
|
||||
return ps.getField(y);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "Sample Phasing Statistics (for PQ >= " + phaseQualityThresh + ")";
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (Map.Entry<String, PhaseStats> sampPhaseStatsEnt : sampleStats.entrySet()) {
|
||||
String sample = sampPhaseStatsEnt.getKey();
|
||||
PhaseStats ps = sampPhaseStatsEnt.getValue();
|
||||
|
||||
sb.append(sample + "\t" + ps);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class Biallele {
|
||||
private Allele top;
|
||||
private Allele bottom;
|
||||
|
||||
public Biallele(Genotype gt) {
|
||||
if (gt.getPloidy() != 2)
|
||||
throw new StingException("Biallele must have ploidy of 2!");
|
||||
|
||||
this.top = gt.getAllele(0);
|
||||
this.bottom = gt.getAllele(1);
|
||||
}
|
||||
|
||||
public Allele getTopAllele() {
|
||||
return top;
|
||||
}
|
||||
|
||||
public Allele getBottomAllele() {
|
||||
return bottom;
|
||||
}
|
||||
|
||||
public void swapAlleles() {
|
||||
Allele tmp = top;
|
||||
top = bottom;
|
||||
bottom = tmp;
|
||||
}
|
||||
|
||||
public List<Allele> getAllelesAsList() {
|
||||
List<Allele> allList = new ArrayList<Allele>(2);
|
||||
allList.add(0, top);
|
||||
allList.add(1, bottom);
|
||||
return allList;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("Top:\t" + top.getBaseString() + "\n");
|
||||
sb.append("Bot:\t" + bottom.getBaseString() + "\n");
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class BialleleSNP extends Biallele {
|
||||
|
||||
public BialleleSNP(Genotype gt) {
|
||||
super(gt);
|
||||
|
||||
if (getTopAllele().getBases().length != 1)
|
||||
throw new StingException("LOGICAL ERROR: BialleleSNP may not contain non-SNP site!");
|
||||
if (getBottomAllele().getBases().length != 1)
|
||||
throw new StingException("LOGICAL ERROR: BialleleSNP may not contain non-SNP site!");
|
||||
}
|
||||
|
||||
public byte getTopBase() {
|
||||
byte[] topBases = getTopAllele().getBases();
|
||||
return getSingleBase(topBases);
|
||||
}
|
||||
|
||||
public byte getBottomBase() {
|
||||
byte[] bottomBases = getBottomAllele().getBases();
|
||||
return getSingleBase(bottomBases);
|
||||
}
|
||||
|
||||
public boolean matchesTopBase(byte base) {
|
||||
return BaseUtils.basesAreEqual(base, getTopBase());
|
||||
}
|
||||
|
||||
public byte getOtherBase(byte base) {
|
||||
byte topBase = getTopBase();
|
||||
byte botBase = getBottomBase();
|
||||
|
||||
if (BaseUtils.basesAreEqual(base, topBase))
|
||||
return botBase;
|
||||
else if (BaseUtils.basesAreEqual(base, botBase))
|
||||
return topBase;
|
||||
else
|
||||
throw new StingException("LOGICAL ERROR: base MUST match either TOP or BOTTOM!");
|
||||
}
|
||||
|
||||
public static byte getSingleBase(byte[] bases) {
|
||||
return bases[0];
|
||||
}
|
||||
|
||||
public static byte getSingleBase(Allele all) {
|
||||
return getSingleBase(all.getBases());
|
||||
}
|
||||
}
|
||||
|
|
@ -134,7 +134,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
boolean takeFirstOnly = false; // take as many entries as the VCF file has
|
||||
for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) {
|
||||
boolean processVariant = true;
|
||||
if (!vc.isSNP() || !vc.isBiallelic() || vc.isFiltered())
|
||||
if (!isUnfilteredBiallelicSNP(vc))
|
||||
processVariant = false;
|
||||
|
||||
VariantAndReads vr = new VariantAndReads(vc, context, processVariant);
|
||||
|
|
@ -272,7 +272,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
|
||||
String samp = sampGtEntry.getKey();
|
||||
Genotype gt = sampGtEntry.getValue();
|
||||
Biallele biall = new Biallele(gt);
|
||||
BialleleSNP biall = new BialleleSNP(gt);
|
||||
HashMap<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes());
|
||||
|
||||
if (gt.isHet()) {
|
||||
|
|
@ -331,7 +331,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
PhaseResult pr = phaseSample(samp, sampleWindowVaList, phasingSiteIndex);
|
||||
genotypesArePhased = (pr.phaseQuality >= phaseQualityThresh);
|
||||
if (genotypesArePhased) {
|
||||
Biallele prevBiall = new Biallele(prevGenotype);
|
||||
BialleleSNP prevBiall = new BialleleSNP(prevGenotype);
|
||||
|
||||
logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevBiall + "\n");
|
||||
logger.debug("THE PHASE CHOSEN HERE:\n" + biall + "\n\n");
|
||||
|
|
@ -429,7 +429,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
Ensure that curBiall is phased relative to prevBiall as specified by hap.
|
||||
*/
|
||||
|
||||
public static void ensurePhasing(Biallele curBiall, Biallele prevBiall, Haplotype hap) {
|
||||
public static void ensurePhasing(BialleleSNP curBiall, BialleleSNP prevBiall, Haplotype hap) {
|
||||
if (hap.size() < 2)
|
||||
throw new StingException("LOGICAL ERROR: Only considering haplotypes of length > 2!");
|
||||
|
||||
|
|
@ -466,7 +466,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
byte refBase;
|
||||
if (!vc.isIndel()) {
|
||||
Allele varAllele = vc.getReference();
|
||||
refBase = getSingleBase(varAllele);
|
||||
refBase = BialleleSNP.getSingleBase(varAllele);
|
||||
}
|
||||
else {
|
||||
refBase = vc.getReferenceBaseForIndel();
|
||||
|
|
@ -532,92 +532,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
return reads;
|
||||
}
|
||||
|
||||
protected static byte getSingleBase(byte[] bases) {
|
||||
return bases[0];
|
||||
}
|
||||
|
||||
protected static byte getSingleBase(Allele all) {
|
||||
return getSingleBase(all.getBases());
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Inner classes:
|
||||
*/
|
||||
|
||||
private static class Biallele {
|
||||
public Allele top;
|
||||
public Allele bottom;
|
||||
|
||||
public Biallele(Genotype gt) {
|
||||
if (gt.getPloidy() != 2)
|
||||
throw new StingException("Doesn't support phasing for ploidy that is not 2!");
|
||||
|
||||
this.top = gt.getAllele(0);
|
||||
this.bottom = gt.getAllele(1);
|
||||
}
|
||||
|
||||
public void swapAlleles() {
|
||||
Allele tmp = top;
|
||||
top = bottom;
|
||||
bottom = tmp;
|
||||
}
|
||||
|
||||
public List<Allele> getAllelesAsList() {
|
||||
List<Allele> allList = new ArrayList<Allele>(2);
|
||||
allList.add(0, top);
|
||||
allList.add(1, bottom);
|
||||
return allList;
|
||||
}
|
||||
|
||||
public byte getTopBase() {
|
||||
byte[] topBases = top.getBases();
|
||||
if (topBases.length != 1)
|
||||
throw new StingException("LOGICAL ERROR: should not process non-SNP sites!");
|
||||
|
||||
return getSingleBase(topBases);
|
||||
}
|
||||
|
||||
public byte getBottomBase() {
|
||||
byte[] bottomBases = bottom.getBases();
|
||||
if (bottomBases.length != 1)
|
||||
throw new StingException("LOGICAL ERROR: should not process non-SNP sites!");
|
||||
|
||||
return getSingleBase(bottomBases);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("Top:\t" + top.getBaseString() + "\n");
|
||||
sb.append("Bot:\t" + bottom.getBaseString() + "\n");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public boolean matchesTopBase(byte base) {
|
||||
boolean matchesTop;
|
||||
if (BaseUtils.basesAreEqual(base, getTopBase()))
|
||||
matchesTop = true;
|
||||
else if (BaseUtils.basesAreEqual(base, getBottomBase()))
|
||||
matchesTop = false;
|
||||
else
|
||||
throw new StingException("LOGICAL ERROR: base MUST match either TOP or BOTTOM!");
|
||||
|
||||
return matchesTop;
|
||||
}
|
||||
|
||||
public byte getOtherBase(byte base) {
|
||||
byte topBase = getTopBase();
|
||||
byte botBase = getBottomBase();
|
||||
|
||||
if (BaseUtils.basesAreEqual(base, topBase))
|
||||
return botBase;
|
||||
else if (BaseUtils.basesAreEqual(base, botBase))
|
||||
return topBase;
|
||||
else
|
||||
throw new StingException("LOGICAL ERROR: base MUST match either TOP or BOTTOM!");
|
||||
}
|
||||
}
|
||||
|
||||
private static class VariantAndReads {
|
||||
public VariantContext variant;
|
||||
public HashMap<String, ReadBasesAtPosition> sampleReadBases;
|
||||
|
|
@ -751,7 +669,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
byte[] hapBases = new byte[numSites];
|
||||
for (int i = 0; i < numSites; i++) {
|
||||
Allele alleleI = genotypes[i].getAllele(alleleInds[i]);
|
||||
hapBases[i] = getSingleBase(alleleI);
|
||||
hapBases[i] = BialleleSNP.getSingleBase(alleleI);
|
||||
}
|
||||
allHaps.add(new Haplotype(hapBases));
|
||||
}
|
||||
|
|
@ -784,16 +702,16 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
}
|
||||
|
||||
private static class BiallelicComplementHaplotypeTableCreator extends HaplotypeTableCreator {
|
||||
private Biallele[] bialleles;
|
||||
private BialleleSNP[] bialleleSNPs;
|
||||
private int startIndex;
|
||||
private int marginalizeLength;
|
||||
|
||||
public BiallelicComplementHaplotypeTableCreator(List<VariantAndReads> vaList, String sample, int startIndex, int marginalizeLength) {
|
||||
super(vaList, sample);
|
||||
|
||||
this.bialleles = new Biallele[genotypes.length];
|
||||
this.bialleleSNPs = new BialleleSNP[genotypes.length];
|
||||
for (int i = 0; i < genotypes.length; i++)
|
||||
bialleles[i] = new Biallele(genotypes[i]);
|
||||
bialleleSNPs[i] = new BialleleSNP(genotypes[i]);
|
||||
|
||||
this.startIndex = startIndex;
|
||||
this.marginalizeLength = marginalizeLength;
|
||||
|
|
@ -804,7 +722,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
|
||||
PhasingTable table = new PhasingTable();
|
||||
for (Haplotype hap : getAllHaplotypes()) {
|
||||
if (bialleles[startIndex].matchesTopBase(hap.getBase(startIndex))) {
|
||||
if (bialleleSNPs[startIndex].matchesTopBase(hap.getBase(startIndex))) {
|
||||
/* hap is the "representative" haplotype [DEFINED here to be
|
||||
the one with the top base at the startIndex position.
|
||||
NOTE that it is CRITICAL that this definition be consistent with the representative sub-haplotypes defined below!]
|
||||
|
|
@ -824,14 +742,14 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
}
|
||||
|
||||
private Haplotype complement(Haplotype hap) {
|
||||
int numSites = bialleles.length;
|
||||
int numSites = bialleleSNPs.length;
|
||||
if (hap.size() != numSites)
|
||||
throw new StingException("INTERNAL ERROR: hap.size() != numSites");
|
||||
|
||||
// Take the other base at EACH position of the Haplotype:
|
||||
byte[] complementBases = new byte[numSites];
|
||||
for (int i = 0; i < numSites; i++)
|
||||
complementBases[i] = bialleles[i].getOtherBase(hap.getBase(i));
|
||||
complementBases[i] = bialleleSNPs[i].getOtherBase(hap.getBase(i));
|
||||
|
||||
return new Haplotype(complementBases);
|
||||
}
|
||||
|
|
@ -931,6 +849,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
this.numReads = numReads;
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean isUnfilteredBiallelicSNP(VariantContext vc) {
|
||||
return (vc.isSNP() && vc.isBiallelic() && !vc.isFiltered());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue