Added basic version of phasing evaluation: GenotypePhasingEvaluator
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4196 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
fd5970fdd4
commit
a1cf3398a5
|
|
@ -0,0 +1,431 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.Allele;
|
||||||
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.*;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.Biallele;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.BialleleSNP;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.ReadBackedPhasingWalker;
|
||||||
|
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
|
||||||
|
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||||
|
import org.broadinstitute.sting.playground.utils.report.utils.TableType;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
@Analysis(name = "Genotype Phasing Evaluation", description = "Evaluates the phasing of genotypes in different tracks")
|
||||||
|
public class GenotypePhasingEvaluator extends VariantEvaluator {
|
||||||
|
protected final static Logger logger = Logger.getLogger(GenotypePhasingEvaluator.class);
|
||||||
|
|
||||||
|
//
|
||||||
|
//@Argument(fullName = "phaseQualityThresh", shortName = "phaseThresh", doc = "The minimum phasing quality score required to consider eval track phasing; [default:20.0]", required = false)
|
||||||
|
//
|
||||||
|
protected Double phaseQualityThresh = 20.0; // PQ = 20.0 <=> P(error) = 10^(-20/10) = 0.01, P(correct) = 0.99
|
||||||
|
//
|
||||||
|
//
|
||||||
|
|
||||||
|
private VariantEvalWalker.EvaluationContext group = null;
|
||||||
|
|
||||||
|
// a mapping from sample to stats
|
||||||
|
@DataPoint(name = "samples", description = "the phasing statistics for each sample")
|
||||||
|
SamplePhasingStatistics samplePhasingStatistics = null;
|
||||||
|
|
||||||
|
SamplePreviousGenotypes samplePrevGenotypes = null;
|
||||||
|
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
HashMap<String, Genotype> prevCompGenotypes = new HashMap<String, Genotype>();
|
||||||
|
HashMap<String, Genotype> prevEvalGenotypes = new HashMap<String, Genotype>();
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
|
||||||
|
public GenotypePhasingEvaluator(VariantEvalWalker parent) {
|
||||||
|
super(parent);
|
||||||
|
this.samplePhasingStatistics = new SamplePhasingStatistics(phaseQualityThresh);
|
||||||
|
this.samplePrevGenotypes = new SamplePreviousGenotypes();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "GenotypePhasingEvaluator";
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getComparisonOrder() {
|
||||||
|
return 2; // we only need to see pairs of (comp, eval)
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean enabled() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return getName() + ": <table>";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) {
|
||||||
|
String interesting = null;
|
||||||
|
if (ref == null)
|
||||||
|
return interesting;
|
||||||
|
GenomeLoc curLocus = ref.getLocus();
|
||||||
|
this.group = group;
|
||||||
|
|
||||||
|
logger.debug("update2() locus: " + curLocus);
|
||||||
|
logger.debug("comp = " + comp + " eval = " + eval);
|
||||||
|
|
||||||
|
if (comp == null || eval == null || comp.isFiltered()) // ignore existence of filtered variants in comp [NOTE that eval will be checked BOTH filtered and unfiltered (by VariantEvalWalker)]
|
||||||
|
return interesting;
|
||||||
|
|
||||||
|
if (!comp.isBiallelic() || !eval.isBiallelic() || !comp.getAlternateAllele(0).equals(eval.getAlternateAllele(0))) // these are not the same biallelic variants
|
||||||
|
return interesting;
|
||||||
|
|
||||||
|
Map<String, Genotype> compSampGenotypes = comp.getGenotypes();
|
||||||
|
Map<String, Genotype> evalSampGenotypes = eval.getGenotypes();
|
||||||
|
|
||||||
|
Set<String> allSamples = new HashSet<String>();
|
||||||
|
allSamples.addAll(comp.getSampleNames());
|
||||||
|
allSamples.addAll(eval.getSampleNames());
|
||||||
|
|
||||||
|
for (String samp : allSamples) {
|
||||||
|
logger.debug("sample = " + samp);
|
||||||
|
|
||||||
|
Genotype compSampGt = compSampGenotypes.get(samp);
|
||||||
|
Genotype evalSampGt = evalSampGenotypes.get(samp);
|
||||||
|
if (compSampGt != null && evalSampGt != null && compSampGt.isHet() && evalSampGt.isHet()) {
|
||||||
|
CompEvalGenotypes prevCompAndEval = samplePrevGenotypes.get(samp);
|
||||||
|
if (prevCompAndEval != null && !prevCompAndEval.getLocus().onSameContig(curLocus)) // exclude curLocus if it is "phased" relative to a different chromosome
|
||||||
|
prevCompAndEval = null;
|
||||||
|
|
||||||
|
// Replace the previous with current:
|
||||||
|
samplePrevGenotypes.put(samp, curLocus, compSampGt, evalSampGt);
|
||||||
|
if (prevCompAndEval != null) {
|
||||||
|
logger.debug("Potentially phaseable locus: " + curLocus);
|
||||||
|
Genotype prevCompSampGt = prevCompAndEval.getCompGenotpye();
|
||||||
|
Genotype prevEvalSampGt = prevCompAndEval.getEvalGenotype();
|
||||||
|
|
||||||
|
PhaseStats ps = samplePhasingStatistics.ensureSampleStats(samp);
|
||||||
|
|
||||||
|
boolean compSampIsPhased = genotypesArePhasedAboveThreshold(compSampGt);
|
||||||
|
boolean evalSampIsPhased = genotypesArePhasedAboveThreshold(evalSampGt);
|
||||||
|
if (compSampIsPhased || evalSampIsPhased) {
|
||||||
|
if (!evalSampIsPhased)
|
||||||
|
ps.onlyCompPhased++;
|
||||||
|
else if (!compSampIsPhased)
|
||||||
|
ps.onlyEvalPhased++;
|
||||||
|
else {
|
||||||
|
Biallele prevCompBiallele = new Biallele(prevCompSampGt);
|
||||||
|
Biallele compBiallele = new Biallele(compSampGt);
|
||||||
|
|
||||||
|
Biallele prevEvalBiallele = new Biallele(prevEvalSampGt);
|
||||||
|
Biallele evalBiallele = new Biallele(evalSampGt);
|
||||||
|
|
||||||
|
boolean topsMatch = (prevCompBiallele.getTopAllele().equals(prevEvalBiallele.getTopAllele()) && compBiallele.getTopAllele().equals(evalBiallele.getTopAllele()));
|
||||||
|
boolean topMatchesBottom = (prevCompBiallele.getTopAllele().equals(prevEvalBiallele.getBottomAllele()) && compBiallele.getTopAllele().equals(evalBiallele.getBottomAllele()));
|
||||||
|
|
||||||
|
if (topsMatch || topMatchesBottom) {
|
||||||
|
ps.phasesAgree++;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ps.phasesDisagree++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ps.neitherPhased++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else { // This site (either non-existent or homozygous for at least one of them) breaks the phase of the next position:
|
||||||
|
samplePrevGenotypes.put(samp, null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
logger.debug("\n" + samplePhasingStatistics + "\n");
|
||||||
|
|
||||||
|
return interesting;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean genotypesArePhasedAboveThreshold(Genotype gt) {
|
||||||
|
if (!gt.genotypesArePhased())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Object pq = gt.getAttributes().get("PQ");
|
||||||
|
return (pq == null || (new Double(pq.toString()) >= phaseQualityThresh));
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
public String update3(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) {
|
||||||
|
this.group = group;
|
||||||
|
|
||||||
|
String interesting = null;
|
||||||
|
if (comp == null && eval == null)
|
||||||
|
return interesting;
|
||||||
|
|
||||||
|
if (comp != null && comp.isFiltered()) // ignore existence of filtered variants in comp [NOTE that eval will be checked BOTH filtered and unfiltered (by VariantEvalWalker)]
|
||||||
|
return interesting;
|
||||||
|
|
||||||
|
Set<String> allSamples = new HashSet<String>();
|
||||||
|
|
||||||
|
boolean compIsUsable = (comp != null && comp.isBiallelic());
|
||||||
|
boolean evalIsUsable = (eval != null && eval.isBiallelic());
|
||||||
|
if (compIsUsable && evalIsUsable && !comp.getAlternateAllele(0).equals(eval.getAlternateAllele(0))) { // these are not the same biallelic variants
|
||||||
|
compIsUsable = false;
|
||||||
|
evalIsUsable = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String, Genotype> compSampGenotypes = null;
|
||||||
|
if (compIsUsable) {
|
||||||
|
compSampGenotypes = comp.getGenotypes();
|
||||||
|
allSamples.addAll(comp.getSampleNames());
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String, Genotype> evalSampGenotypes = null;
|
||||||
|
if (evalIsUsable) {
|
||||||
|
evalSampGenotypes = eval.getGenotypes();
|
||||||
|
allSamples.addAll(eval.getSampleNames());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note that missing or incompatible VariantContext (e.g., comp) might break the phase of the other VariantContext (e.g., eval):
|
||||||
|
for (String samp : allSamples) {
|
||||||
|
Genotype compSampGt = null;
|
||||||
|
if (compSampGenotypes != null)
|
||||||
|
compSampGt = compSampGenotypes.get(samp);
|
||||||
|
boolean compIsHetForSample = (compIsUsable && compSampGt != null && compSampGt.isHet());
|
||||||
|
|
||||||
|
Genotype evalSampGt = null;
|
||||||
|
if (evalSampGenotypes != null)
|
||||||
|
evalSampGt = evalSampGenotypes.get(samp);
|
||||||
|
boolean evalIsHetForSample = (evalIsUsable && evalSampGt != null && evalSampGt.isHet());
|
||||||
|
|
||||||
|
// Only compare phasing at het positions after het positions:
|
||||||
|
if (compIsHetForSample && evalIsHetForSample) {
|
||||||
|
|
||||||
|
|
||||||
|
// Put het genotypes into previous Genotypes:
|
||||||
|
prevCompGenotypes.put(samp, compSampGt);
|
||||||
|
prevEvalGenotypes.put(samp, evalSampGt);
|
||||||
|
}
|
||||||
|
else { // at least one is not a biallelic het site for samp:
|
||||||
|
breakPhaseForUnphasedOrHomozygousSite(evalSampGt, samp, prevEvalGenotypes);
|
||||||
|
breakPhaseForUnphasedOrHomozygousSite(compSampGt, samp, prevCompGenotypes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Biallele compBiallele = new Biallele();
|
||||||
|
Biallele evalBiallele = null;
|
||||||
|
*/
|
||||||
|
|
||||||
|
//interesting = determineStats(eval, validation);
|
||||||
|
|
||||||
|
return interesting; // we don't capture any interesting sites
|
||||||
|
}
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
|
||||||
|
private void breakPhaseForUnphasedOrHomozygousSite(Genotype gt, String samp, HashMap<String, Genotype> prevVcGenotypes) {
|
||||||
|
if (gt == null)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Break the phase of gt [i.e., set the previous Genotype to null] since it is unphased (or homozygous):
|
||||||
|
if (!genotypesArePhasedAboveThreshold(gt) || gt.isHom()) // otherwise, a phased het site lets the phase pass through
|
||||||
|
prevVcGenotypes.put(samp, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void finalizeEvaluation() {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CompEvalGenotypes {
|
||||||
|
private GenomeLoc loc;
|
||||||
|
private Genotype compGt;
|
||||||
|
private Genotype evalGt;
|
||||||
|
|
||||||
|
public CompEvalGenotypes(GenomeLoc loc, Genotype compGt, Genotype evalGt) {
|
||||||
|
this.loc = loc;
|
||||||
|
this.compGt = compGt;
|
||||||
|
this.evalGt = evalGt;
|
||||||
|
}
|
||||||
|
|
||||||
|
public GenomeLoc getLocus() {
|
||||||
|
return loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Genotype getCompGenotpye() {
|
||||||
|
return compGt;
|
||||||
|
}
|
||||||
|
public Genotype getEvalGenotype() {
|
||||||
|
return evalGt;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCompGenotype(Genotype compGt) {
|
||||||
|
this.compGt = compGt;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEvalGenotype(Genotype evalGt) {
|
||||||
|
this.evalGt = evalGt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class SamplePreviousGenotypes {
|
||||||
|
private HashMap<String, CompEvalGenotypes> sampleGenotypes = null;
|
||||||
|
|
||||||
|
public SamplePreviousGenotypes() {
|
||||||
|
this.sampleGenotypes = new HashMap<String, CompEvalGenotypes>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public CompEvalGenotypes get(String sample) {
|
||||||
|
return sampleGenotypes.get(sample);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void put(String sample, CompEvalGenotypes compEvalGts) {
|
||||||
|
sampleGenotypes.put(sample, compEvalGts);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void put(String sample, GenomeLoc locus, Genotype compGt, Genotype evalGt) {
|
||||||
|
sampleGenotypes.put(sample, new CompEvalGenotypes(locus, compGt, evalGt));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class PhaseStats {
|
||||||
|
public int neitherPhased;
|
||||||
|
public int onlyCompPhased;
|
||||||
|
public int onlyEvalPhased;
|
||||||
|
public int phasesAgree;
|
||||||
|
public int phasesDisagree;
|
||||||
|
|
||||||
|
public PhaseStats() {
|
||||||
|
this.neitherPhased = 0;
|
||||||
|
this.onlyCompPhased = 0;
|
||||||
|
this.onlyEvalPhased = 0;
|
||||||
|
this.phasesAgree = 0;
|
||||||
|
this.phasesDisagree = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("Neither phased: " + neitherPhased + "\tOnly Comp: " + onlyCompPhased + "\tOnly Eval: " + onlyEvalPhased + "\tSame phase: " + phasesAgree + "\tOpposite phase: " + phasesDisagree);
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String[] getFieldNamesArray() {
|
||||||
|
return new String[]{"total", "neither", "only_comp", "only_eval", "both", "match", "switch", "switch_rate"};
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object getField(int index) {
|
||||||
|
switch (index) {
|
||||||
|
case (0):
|
||||||
|
return (neitherPhased + onlyCompPhased + onlyEvalPhased + phasesAgree + phasesDisagree);
|
||||||
|
case (1):
|
||||||
|
return neitherPhased;
|
||||||
|
case (2):
|
||||||
|
return onlyCompPhased;
|
||||||
|
case (3):
|
||||||
|
return onlyEvalPhased;
|
||||||
|
case (4):
|
||||||
|
return (phasesAgree + phasesDisagree);
|
||||||
|
case (5):
|
||||||
|
return phasesAgree;
|
||||||
|
case (6):
|
||||||
|
return phasesDisagree;
|
||||||
|
case (7):
|
||||||
|
return ((phasesDisagree == 0) ? 0 : ((double) phasesDisagree) / (phasesAgree + phasesDisagree));
|
||||||
|
default:
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a table of sample names to genotype phasing statistics
|
||||||
|
*/
|
||||||
|
class SamplePhasingStatistics implements TableType {
|
||||||
|
private HashMap<String, PhaseStats> sampleStats = null;
|
||||||
|
private double phaseQualityThresh;
|
||||||
|
|
||||||
|
public SamplePhasingStatistics(double phaseQualityThresh) {
|
||||||
|
this.sampleStats = new HashMap<String, PhaseStats>();
|
||||||
|
this.phaseQualityThresh = phaseQualityThresh;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PhaseStats ensureSampleStats(String samp) {
|
||||||
|
PhaseStats ps = sampleStats.get(samp);
|
||||||
|
if (ps == null) {
|
||||||
|
ps = new PhaseStats();
|
||||||
|
sampleStats.put(samp, ps);
|
||||||
|
}
|
||||||
|
return ps;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return one row per sample
|
||||||
|
*/
|
||||||
|
public String[] getRowKeys() {
|
||||||
|
return sampleStats.keySet().toArray(new String[sampleStats.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the column keys
|
||||||
|
*
|
||||||
|
* @return a list of objects, in this case strings, that are the column names
|
||||||
|
*/
|
||||||
|
public String[] getColumnKeys() {
|
||||||
|
return PhaseStats.getFieldNamesArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object getCell(int x, int y) {
|
||||||
|
String[] rowKeys = getRowKeys();
|
||||||
|
PhaseStats ps = sampleStats.get(rowKeys[x]);
|
||||||
|
return ps.getField(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "Sample Phasing Statistics (for PQ >= " + phaseQualityThresh + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (Map.Entry<String, PhaseStats> sampPhaseStatsEnt : sampleStats.entrySet()) {
|
||||||
|
String sample = sampPhaseStatsEnt.getKey();
|
||||||
|
PhaseStats ps = sampPhaseStatsEnt.getValue();
|
||||||
|
|
||||||
|
sb.append(sample + "\t" + ps);
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,71 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.Allele;
|
||||||
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class Biallele {
|
||||||
|
private Allele top;
|
||||||
|
private Allele bottom;
|
||||||
|
|
||||||
|
public Biallele(Genotype gt) {
|
||||||
|
if (gt.getPloidy() != 2)
|
||||||
|
throw new StingException("Biallele must have ploidy of 2!");
|
||||||
|
|
||||||
|
this.top = gt.getAllele(0);
|
||||||
|
this.bottom = gt.getAllele(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Allele getTopAllele() {
|
||||||
|
return top;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Allele getBottomAllele() {
|
||||||
|
return bottom;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void swapAlleles() {
|
||||||
|
Allele tmp = top;
|
||||||
|
top = bottom;
|
||||||
|
bottom = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Allele> getAllelesAsList() {
|
||||||
|
List<Allele> allList = new ArrayList<Allele>(2);
|
||||||
|
allList.add(0, top);
|
||||||
|
allList.add(1, bottom);
|
||||||
|
return allList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("Top:\t" + top.getBaseString() + "\n");
|
||||||
|
sb.append("Bot:\t" + bottom.getBaseString() + "\n");
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,78 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.Allele;
|
||||||
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class BialleleSNP extends Biallele {
|
||||||
|
|
||||||
|
public BialleleSNP(Genotype gt) {
|
||||||
|
super(gt);
|
||||||
|
|
||||||
|
if (getTopAllele().getBases().length != 1)
|
||||||
|
throw new StingException("LOGICAL ERROR: BialleleSNP may not contain non-SNP site!");
|
||||||
|
if (getBottomAllele().getBases().length != 1)
|
||||||
|
throw new StingException("LOGICAL ERROR: BialleleSNP may not contain non-SNP site!");
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte getTopBase() {
|
||||||
|
byte[] topBases = getTopAllele().getBases();
|
||||||
|
return getSingleBase(topBases);
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte getBottomBase() {
|
||||||
|
byte[] bottomBases = getBottomAllele().getBases();
|
||||||
|
return getSingleBase(bottomBases);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean matchesTopBase(byte base) {
|
||||||
|
return BaseUtils.basesAreEqual(base, getTopBase());
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte getOtherBase(byte base) {
|
||||||
|
byte topBase = getTopBase();
|
||||||
|
byte botBase = getBottomBase();
|
||||||
|
|
||||||
|
if (BaseUtils.basesAreEqual(base, topBase))
|
||||||
|
return botBase;
|
||||||
|
else if (BaseUtils.basesAreEqual(base, botBase))
|
||||||
|
return topBase;
|
||||||
|
else
|
||||||
|
throw new StingException("LOGICAL ERROR: base MUST match either TOP or BOTTOM!");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static byte getSingleBase(byte[] bases) {
|
||||||
|
return bases[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
public static byte getSingleBase(Allele all) {
|
||||||
|
return getSingleBase(all.getBases());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -134,7 +134,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
boolean takeFirstOnly = false; // take as many entries as the VCF file has
|
boolean takeFirstOnly = false; // take as many entries as the VCF file has
|
||||||
for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) {
|
for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) {
|
||||||
boolean processVariant = true;
|
boolean processVariant = true;
|
||||||
if (!vc.isSNP() || !vc.isBiallelic() || vc.isFiltered())
|
if (!isUnfilteredBiallelicSNP(vc))
|
||||||
processVariant = false;
|
processVariant = false;
|
||||||
|
|
||||||
VariantAndReads vr = new VariantAndReads(vc, context, processVariant);
|
VariantAndReads vr = new VariantAndReads(vc, context, processVariant);
|
||||||
|
|
@ -272,7 +272,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
|
|
||||||
String samp = sampGtEntry.getKey();
|
String samp = sampGtEntry.getKey();
|
||||||
Genotype gt = sampGtEntry.getValue();
|
Genotype gt = sampGtEntry.getValue();
|
||||||
Biallele biall = new Biallele(gt);
|
BialleleSNP biall = new BialleleSNP(gt);
|
||||||
HashMap<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes());
|
HashMap<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes());
|
||||||
|
|
||||||
if (gt.isHet()) {
|
if (gt.isHet()) {
|
||||||
|
|
@ -331,7 +331,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
PhaseResult pr = phaseSample(samp, sampleWindowVaList, phasingSiteIndex);
|
PhaseResult pr = phaseSample(samp, sampleWindowVaList, phasingSiteIndex);
|
||||||
genotypesArePhased = (pr.phaseQuality >= phaseQualityThresh);
|
genotypesArePhased = (pr.phaseQuality >= phaseQualityThresh);
|
||||||
if (genotypesArePhased) {
|
if (genotypesArePhased) {
|
||||||
Biallele prevBiall = new Biallele(prevGenotype);
|
BialleleSNP prevBiall = new BialleleSNP(prevGenotype);
|
||||||
|
|
||||||
logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevBiall + "\n");
|
logger.debug("THE PHASE PREVIOUSLY CHOSEN FOR PREVIOUS:\n" + prevBiall + "\n");
|
||||||
logger.debug("THE PHASE CHOSEN HERE:\n" + biall + "\n\n");
|
logger.debug("THE PHASE CHOSEN HERE:\n" + biall + "\n\n");
|
||||||
|
|
@ -429,7 +429,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
Ensure that curBiall is phased relative to prevBiall as specified by hap.
|
Ensure that curBiall is phased relative to prevBiall as specified by hap.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public static void ensurePhasing(Biallele curBiall, Biallele prevBiall, Haplotype hap) {
|
public static void ensurePhasing(BialleleSNP curBiall, BialleleSNP prevBiall, Haplotype hap) {
|
||||||
if (hap.size() < 2)
|
if (hap.size() < 2)
|
||||||
throw new StingException("LOGICAL ERROR: Only considering haplotypes of length > 2!");
|
throw new StingException("LOGICAL ERROR: Only considering haplotypes of length > 2!");
|
||||||
|
|
||||||
|
|
@ -466,7 +466,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
byte refBase;
|
byte refBase;
|
||||||
if (!vc.isIndel()) {
|
if (!vc.isIndel()) {
|
||||||
Allele varAllele = vc.getReference();
|
Allele varAllele = vc.getReference();
|
||||||
refBase = getSingleBase(varAllele);
|
refBase = BialleleSNP.getSingleBase(varAllele);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
refBase = vc.getReferenceBaseForIndel();
|
refBase = vc.getReferenceBaseForIndel();
|
||||||
|
|
@ -532,92 +532,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
return reads;
|
return reads;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static byte getSingleBase(byte[] bases) {
|
|
||||||
return bases[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static byte getSingleBase(Allele all) {
|
|
||||||
return getSingleBase(all.getBases());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Inner classes:
|
Inner classes:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private static class Biallele {
|
|
||||||
public Allele top;
|
|
||||||
public Allele bottom;
|
|
||||||
|
|
||||||
public Biallele(Genotype gt) {
|
|
||||||
if (gt.getPloidy() != 2)
|
|
||||||
throw new StingException("Doesn't support phasing for ploidy that is not 2!");
|
|
||||||
|
|
||||||
this.top = gt.getAllele(0);
|
|
||||||
this.bottom = gt.getAllele(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void swapAlleles() {
|
|
||||||
Allele tmp = top;
|
|
||||||
top = bottom;
|
|
||||||
bottom = tmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Allele> getAllelesAsList() {
|
|
||||||
List<Allele> allList = new ArrayList<Allele>(2);
|
|
||||||
allList.add(0, top);
|
|
||||||
allList.add(1, bottom);
|
|
||||||
return allList;
|
|
||||||
}
|
|
||||||
|
|
||||||
public byte getTopBase() {
|
|
||||||
byte[] topBases = top.getBases();
|
|
||||||
if (topBases.length != 1)
|
|
||||||
throw new StingException("LOGICAL ERROR: should not process non-SNP sites!");
|
|
||||||
|
|
||||||
return getSingleBase(topBases);
|
|
||||||
}
|
|
||||||
|
|
||||||
public byte getBottomBase() {
|
|
||||||
byte[] bottomBases = bottom.getBases();
|
|
||||||
if (bottomBases.length != 1)
|
|
||||||
throw new StingException("LOGICAL ERROR: should not process non-SNP sites!");
|
|
||||||
|
|
||||||
return getSingleBase(bottomBases);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
sb.append("Top:\t" + top.getBaseString() + "\n");
|
|
||||||
sb.append("Bot:\t" + bottom.getBaseString() + "\n");
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean matchesTopBase(byte base) {
|
|
||||||
boolean matchesTop;
|
|
||||||
if (BaseUtils.basesAreEqual(base, getTopBase()))
|
|
||||||
matchesTop = true;
|
|
||||||
else if (BaseUtils.basesAreEqual(base, getBottomBase()))
|
|
||||||
matchesTop = false;
|
|
||||||
else
|
|
||||||
throw new StingException("LOGICAL ERROR: base MUST match either TOP or BOTTOM!");
|
|
||||||
|
|
||||||
return matchesTop;
|
|
||||||
}
|
|
||||||
|
|
||||||
public byte getOtherBase(byte base) {
|
|
||||||
byte topBase = getTopBase();
|
|
||||||
byte botBase = getBottomBase();
|
|
||||||
|
|
||||||
if (BaseUtils.basesAreEqual(base, topBase))
|
|
||||||
return botBase;
|
|
||||||
else if (BaseUtils.basesAreEqual(base, botBase))
|
|
||||||
return topBase;
|
|
||||||
else
|
|
||||||
throw new StingException("LOGICAL ERROR: base MUST match either TOP or BOTTOM!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class VariantAndReads {
|
private static class VariantAndReads {
|
||||||
public VariantContext variant;
|
public VariantContext variant;
|
||||||
public HashMap<String, ReadBasesAtPosition> sampleReadBases;
|
public HashMap<String, ReadBasesAtPosition> sampleReadBases;
|
||||||
|
|
@ -751,7 +669,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
byte[] hapBases = new byte[numSites];
|
byte[] hapBases = new byte[numSites];
|
||||||
for (int i = 0; i < numSites; i++) {
|
for (int i = 0; i < numSites; i++) {
|
||||||
Allele alleleI = genotypes[i].getAllele(alleleInds[i]);
|
Allele alleleI = genotypes[i].getAllele(alleleInds[i]);
|
||||||
hapBases[i] = getSingleBase(alleleI);
|
hapBases[i] = BialleleSNP.getSingleBase(alleleI);
|
||||||
}
|
}
|
||||||
allHaps.add(new Haplotype(hapBases));
|
allHaps.add(new Haplotype(hapBases));
|
||||||
}
|
}
|
||||||
|
|
@ -784,16 +702,16 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class BiallelicComplementHaplotypeTableCreator extends HaplotypeTableCreator {
|
private static class BiallelicComplementHaplotypeTableCreator extends HaplotypeTableCreator {
|
||||||
private Biallele[] bialleles;
|
private BialleleSNP[] bialleleSNPs;
|
||||||
private int startIndex;
|
private int startIndex;
|
||||||
private int marginalizeLength;
|
private int marginalizeLength;
|
||||||
|
|
||||||
public BiallelicComplementHaplotypeTableCreator(List<VariantAndReads> vaList, String sample, int startIndex, int marginalizeLength) {
|
public BiallelicComplementHaplotypeTableCreator(List<VariantAndReads> vaList, String sample, int startIndex, int marginalizeLength) {
|
||||||
super(vaList, sample);
|
super(vaList, sample);
|
||||||
|
|
||||||
this.bialleles = new Biallele[genotypes.length];
|
this.bialleleSNPs = new BialleleSNP[genotypes.length];
|
||||||
for (int i = 0; i < genotypes.length; i++)
|
for (int i = 0; i < genotypes.length; i++)
|
||||||
bialleles[i] = new Biallele(genotypes[i]);
|
bialleleSNPs[i] = new BialleleSNP(genotypes[i]);
|
||||||
|
|
||||||
this.startIndex = startIndex;
|
this.startIndex = startIndex;
|
||||||
this.marginalizeLength = marginalizeLength;
|
this.marginalizeLength = marginalizeLength;
|
||||||
|
|
@ -804,7 +722,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
|
|
||||||
PhasingTable table = new PhasingTable();
|
PhasingTable table = new PhasingTable();
|
||||||
for (Haplotype hap : getAllHaplotypes()) {
|
for (Haplotype hap : getAllHaplotypes()) {
|
||||||
if (bialleles[startIndex].matchesTopBase(hap.getBase(startIndex))) {
|
if (bialleleSNPs[startIndex].matchesTopBase(hap.getBase(startIndex))) {
|
||||||
/* hap is the "representative" haplotype [DEFINED here to be
|
/* hap is the "representative" haplotype [DEFINED here to be
|
||||||
the one with the top base at the startIndex position.
|
the one with the top base at the startIndex position.
|
||||||
NOTE that it is CRITICAL that this definition be consistent with the representative sub-haplotypes defined below!]
|
NOTE that it is CRITICAL that this definition be consistent with the representative sub-haplotypes defined below!]
|
||||||
|
|
@ -824,14 +742,14 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
|
|
||||||
private Haplotype complement(Haplotype hap) {
|
private Haplotype complement(Haplotype hap) {
|
||||||
int numSites = bialleles.length;
|
int numSites = bialleleSNPs.length;
|
||||||
if (hap.size() != numSites)
|
if (hap.size() != numSites)
|
||||||
throw new StingException("INTERNAL ERROR: hap.size() != numSites");
|
throw new StingException("INTERNAL ERROR: hap.size() != numSites");
|
||||||
|
|
||||||
// Take the other base at EACH position of the Haplotype:
|
// Take the other base at EACH position of the Haplotype:
|
||||||
byte[] complementBases = new byte[numSites];
|
byte[] complementBases = new byte[numSites];
|
||||||
for (int i = 0; i < numSites; i++)
|
for (int i = 0; i < numSites; i++)
|
||||||
complementBases[i] = bialleles[i].getOtherBase(hap.getBase(i));
|
complementBases[i] = bialleleSNPs[i].getOtherBase(hap.getBase(i));
|
||||||
|
|
||||||
return new Haplotype(complementBases);
|
return new Haplotype(complementBases);
|
||||||
}
|
}
|
||||||
|
|
@ -931,6 +849,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
this.numReads = numReads;
|
this.numReads = numReads;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static boolean isUnfilteredBiallelicSNP(VariantContext vc) {
|
||||||
|
return (vc.isSNP() && vc.isBiallelic() && !vc.isFiltered());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue