From 3aa56037af03a2f817a480b186f90b62242754a3 Mon Sep 17 00:00:00 2001 From: kiran Date: Fri, 20 May 2011 18:48:19 +0000 Subject: [PATCH] If asked, filters out triple-het situations too (which cannot be simply phased by transmission). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5829 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/phasing/PhaseByTransmission.java | 77 +++++++++++++------ 1 file changed, 54 insertions(+), 23 deletions(-) diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/PhaseByTransmission.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/PhaseByTransmission.java index 7226b38a5..3f193733b 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/PhaseByTransmission.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/PhaseByTransmission.java @@ -19,15 +19,18 @@ import java.util.*; * Given genotypes for a trio, phases child by transmission. Computes probability that the determined phase is correct * given that the genotypes for mom and dad are correct (useful if you want to use this to compare phasing accuracy, but * want to break that comparison down by phasing confidence in the truth set). Optionally filters out sites where the - * phasing is indeterminate. + * phasing is indeterminate. This walker assumes there are only three samples in the VCF file to begin with. */ public class PhaseByTransmission extends RodWalker { @Argument(shortName="f", fullName="familyPattern", required=true, doc="Pattern for the family structure (usage: mom+dad=child)") public String familyStr = null; - @Argument(shortName="filter", fullName="filterPhaseIndeterminateSites", required=false, doc="Filters out sites that are phase-indeterminate") + @Argument(shortName="filter_ind", fullName="filterPhaseIndeterminateSites", required=false, doc="Filters out sites that are phase-indeterminate") public Boolean filterPhaseIndeterminateSites = false; + @Argument(shortName="filter_amb", fullName="filterAmbiguousAlleleOriginSites", required=false, doc="Filters out sites where the parental origin of the alleles is ambiguous (i.e. triple-het situations)") + public Boolean filterAmbiguousAlleleOriginSites = false; + @Output protected VCFWriter vcfWriter = null; @@ -35,6 +38,9 @@ public class PhaseByTransmission extends RodWalker { private String SAMPLE_NAME_DAD; private String SAMPLE_NAME_CHILD; + private class AmbiguousAlleleOriginException extends Exception {}; + private class NoAppropriatePhasedGenotypeException extends Exception {}; + /** * Parse the familial relationship specification, and initialize VCF writer */ @@ -62,13 +68,16 @@ public class PhaseByTransmission extends RodWalker { * @param mom mom's genotype * @param dad dad's genotype * @param child child's genotype - * @return phased version of child's genotype, or null in the case that the phasing cannot be determined + * @return phased version of child's genotype + * @throws AmbiguousAlleleOriginException if the parentage of the alleles can't be determined (i.e. a triple-het situation), + * @throws NoAppropriatePhasedGenotypeException if there is insufficient information to determine the phase */ - private Genotype getPhasedChildGenotype(Genotype mom, Genotype dad, Genotype child) { - List momAlleles = mom.getAlleles(); - List dadAlleles = dad.getAlleles(); + private Genotype getPhasedChildGenotype(Genotype mom, Genotype dad, Genotype child) throws AmbiguousAlleleOriginException, NoAppropriatePhasedGenotypeException { + Set momAlleles = new HashSet(); + momAlleles.addAll(mom.getAlleles()); - Set possiblePhasedChildGenotypes = new HashSet(); + Set dadAlleles = new HashSet(); + dadAlleles.addAll(dad.getAlleles()); double transmissionProb = QualityUtils.qualToProb(mom.getNegLog10PError())*QualityUtils.qualToProb(dad.getNegLog10PError()); @@ -76,6 +85,8 @@ public class PhaseByTransmission extends RodWalker { attributes.putAll(child.getAttributes()); attributes.put("TP", transmissionProb); + Set possiblePhasedChildGenotypes = new HashSet(); + for (Allele momAllele : momAlleles) { for (Allele dadAllele : dadAlleles) { if (momAllele.isCalled() && dadAllele.isCalled()) { @@ -84,18 +95,36 @@ public class PhaseByTransmission extends RodWalker { possiblePhasedChildAlleles.add(dadAllele); Genotype possiblePhasedChildGenotype = new Genotype(child.getSampleName(), possiblePhasedChildAlleles, child.getNegLog10PError(), null, attributes, true); + possiblePhasedChildGenotypes.add(possiblePhasedChildGenotype); } } } + Set ambiguousAlleleOriginGenotypes = new HashSet(); + + for (Genotype g1 : possiblePhasedChildGenotypes) { + for (Genotype g2 : possiblePhasedChildGenotypes) { + if (!g1.equals(g2)) { + if (g1.sameGenotype(g2, true)) { + ambiguousAlleleOriginGenotypes.add(g1); + ambiguousAlleleOriginGenotypes.add(g2); + } + } + } + } + for (Genotype possiblePhasedChildGenotype : possiblePhasedChildGenotypes) { if (child.sameGenotype(possiblePhasedChildGenotype, true)) { + if (ambiguousAlleleOriginGenotypes.contains(possiblePhasedChildGenotype)) { + throw new AmbiguousAlleleOriginException(); + } + return possiblePhasedChildGenotype; } } - return null; + throw new NoAppropriatePhasedGenotypeException(); } /** @@ -116,29 +145,31 @@ public class PhaseByTransmission extends RodWalker { Genotype dad = vc.getGenotype(SAMPLE_NAME_DAD); Genotype child = vc.getGenotype(SAMPLE_NAME_CHILD); - Genotype childPhased = getPhasedChildGenotype(mom, dad, child); - - Collection phasedGenotypes = new ArrayList(); - Collection alleles = vc.getAlleles(); Set filters = new HashSet(); - Map attributes = new HashMap(); - - phasedGenotypes.add(mom); - phasedGenotypes.add(dad); - filters.addAll(vc.getFilters()); - attributes.putAll(vc.getAttributes()); - - if (childPhased == null) { - phasedGenotypes.add(child); + try { + child = getPhasedChildGenotype(mom, dad, child); + } catch (AmbiguousAlleleOriginException e) { + if (filterAmbiguousAlleleOriginSites) { + filters.add("AMBIGUOUS_ALLELE_ORIGIN"); + } + } catch (NoAppropriatePhasedGenotypeException e) { if (filterPhaseIndeterminateSites) { filters.add("PHASE_INDETERMINATE"); } - } else { - phasedGenotypes.add(childPhased); } + Map attributes = new HashMap(); + attributes.putAll(vc.getAttributes()); + + Collection phasedGenotypes = new ArrayList(); + Collection alleles = vc.getAlleles(); + + phasedGenotypes.add(mom); + phasedGenotypes.add(dad); + phasedGenotypes.add(child); + VariantContext newvc = new VariantContext("PhaseByTransmission", ref.getLocus().getContig(), vc.getStart(), vc.getStart(), alleles, phasedGenotypes, vc.getNegLog10PError(), filters, attributes); vcfWriter.add(newvc, ref.getBase());