If asked, filters out triple-het situations too (which cannot be simply phased by transmission).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5829 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2011-05-20 18:48:19 +00:00
parent 3a2e32eef3
commit 3aa56037af
1 changed files with 54 additions and 23 deletions

View File

@ -19,15 +19,18 @@ import java.util.*;
* Given genotypes for a trio, phases child by transmission. Computes probability that the determined phase is correct * Given genotypes for a trio, phases child by transmission. Computes probability that the determined phase is correct
* given that the genotypes for mom and dad are correct (useful if you want to use this to compare phasing accuracy, but * given that the genotypes for mom and dad are correct (useful if you want to use this to compare phasing accuracy, but
* want to break that comparison down by phasing confidence in the truth set). Optionally filters out sites where the * want to break that comparison down by phasing confidence in the truth set). Optionally filters out sites where the
* phasing is indeterminate. * phasing is indeterminate. This walker assumes there are only three samples in the VCF file to begin with.
*/ */
public class PhaseByTransmission extends RodWalker<Integer, Integer> { public class PhaseByTransmission extends RodWalker<Integer, Integer> {
@Argument(shortName="f", fullName="familyPattern", required=true, doc="Pattern for the family structure (usage: mom+dad=child)") @Argument(shortName="f", fullName="familyPattern", required=true, doc="Pattern for the family structure (usage: mom+dad=child)")
public String familyStr = null; public String familyStr = null;
@Argument(shortName="filter", fullName="filterPhaseIndeterminateSites", required=false, doc="Filters out sites that are phase-indeterminate") @Argument(shortName="filter_ind", fullName="filterPhaseIndeterminateSites", required=false, doc="Filters out sites that are phase-indeterminate")
public Boolean filterPhaseIndeterminateSites = false; public Boolean filterPhaseIndeterminateSites = false;
@Argument(shortName="filter_amb", fullName="filterAmbiguousAlleleOriginSites", required=false, doc="Filters out sites where the parental origin of the alleles is ambiguous (i.e. triple-het situations)")
public Boolean filterAmbiguousAlleleOriginSites = false;
@Output @Output
protected VCFWriter vcfWriter = null; protected VCFWriter vcfWriter = null;
@ -35,6 +38,9 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
private String SAMPLE_NAME_DAD; private String SAMPLE_NAME_DAD;
private String SAMPLE_NAME_CHILD; private String SAMPLE_NAME_CHILD;
private class AmbiguousAlleleOriginException extends Exception {};
private class NoAppropriatePhasedGenotypeException extends Exception {};
/** /**
* Parse the familial relationship specification, and initialize VCF writer * Parse the familial relationship specification, and initialize VCF writer
*/ */
@ -62,13 +68,16 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
* @param mom mom's genotype * @param mom mom's genotype
* @param dad dad's genotype * @param dad dad's genotype
* @param child child's genotype * @param child child's genotype
* @return phased version of child's genotype, or null in the case that the phasing cannot be determined * @return phased version of child's genotype
* @throws AmbiguousAlleleOriginException if the parentage of the alleles can't be determined (i.e. a triple-het situation),
* @throws NoAppropriatePhasedGenotypeException if there is insufficient information to determine the phase
*/ */
private Genotype getPhasedChildGenotype(Genotype mom, Genotype dad, Genotype child) { private Genotype getPhasedChildGenotype(Genotype mom, Genotype dad, Genotype child) throws AmbiguousAlleleOriginException, NoAppropriatePhasedGenotypeException {
List<Allele> momAlleles = mom.getAlleles(); Set<Allele> momAlleles = new HashSet<Allele>();
List<Allele> dadAlleles = dad.getAlleles(); momAlleles.addAll(mom.getAlleles());
Set<Genotype> possiblePhasedChildGenotypes = new HashSet<Genotype>(); Set<Allele> dadAlleles = new HashSet<Allele>();
dadAlleles.addAll(dad.getAlleles());
double transmissionProb = QualityUtils.qualToProb(mom.getNegLog10PError())*QualityUtils.qualToProb(dad.getNegLog10PError()); double transmissionProb = QualityUtils.qualToProb(mom.getNegLog10PError())*QualityUtils.qualToProb(dad.getNegLog10PError());
@ -76,6 +85,8 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
attributes.putAll(child.getAttributes()); attributes.putAll(child.getAttributes());
attributes.put("TP", transmissionProb); attributes.put("TP", transmissionProb);
Set<Genotype> possiblePhasedChildGenotypes = new HashSet<Genotype>();
for (Allele momAllele : momAlleles) { for (Allele momAllele : momAlleles) {
for (Allele dadAllele : dadAlleles) { for (Allele dadAllele : dadAlleles) {
if (momAllele.isCalled() && dadAllele.isCalled()) { if (momAllele.isCalled() && dadAllele.isCalled()) {
@ -84,18 +95,36 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
possiblePhasedChildAlleles.add(dadAllele); possiblePhasedChildAlleles.add(dadAllele);
Genotype possiblePhasedChildGenotype = new Genotype(child.getSampleName(), possiblePhasedChildAlleles, child.getNegLog10PError(), null, attributes, true); Genotype possiblePhasedChildGenotype = new Genotype(child.getSampleName(), possiblePhasedChildAlleles, child.getNegLog10PError(), null, attributes, true);
possiblePhasedChildGenotypes.add(possiblePhasedChildGenotype); possiblePhasedChildGenotypes.add(possiblePhasedChildGenotype);
} }
} }
} }
Set<Genotype> ambiguousAlleleOriginGenotypes = new HashSet<Genotype>();
for (Genotype g1 : possiblePhasedChildGenotypes) {
for (Genotype g2 : possiblePhasedChildGenotypes) {
if (!g1.equals(g2)) {
if (g1.sameGenotype(g2, true)) {
ambiguousAlleleOriginGenotypes.add(g1);
ambiguousAlleleOriginGenotypes.add(g2);
}
}
}
}
for (Genotype possiblePhasedChildGenotype : possiblePhasedChildGenotypes) { for (Genotype possiblePhasedChildGenotype : possiblePhasedChildGenotypes) {
if (child.sameGenotype(possiblePhasedChildGenotype, true)) { if (child.sameGenotype(possiblePhasedChildGenotype, true)) {
if (ambiguousAlleleOriginGenotypes.contains(possiblePhasedChildGenotype)) {
throw new AmbiguousAlleleOriginException();
}
return possiblePhasedChildGenotype; return possiblePhasedChildGenotype;
} }
} }
return null; throw new NoAppropriatePhasedGenotypeException();
} }
/** /**
@ -116,29 +145,31 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
Genotype dad = vc.getGenotype(SAMPLE_NAME_DAD); Genotype dad = vc.getGenotype(SAMPLE_NAME_DAD);
Genotype child = vc.getGenotype(SAMPLE_NAME_CHILD); Genotype child = vc.getGenotype(SAMPLE_NAME_CHILD);
Genotype childPhased = getPhasedChildGenotype(mom, dad, child);
Collection<Genotype> phasedGenotypes = new ArrayList<Genotype>();
Collection<Allele> alleles = vc.getAlleles();
Set<String> filters = new HashSet<String>(); Set<String> filters = new HashSet<String>();
Map<String, Object> attributes = new HashMap<String, Object>();
phasedGenotypes.add(mom);
phasedGenotypes.add(dad);
filters.addAll(vc.getFilters()); filters.addAll(vc.getFilters());
attributes.putAll(vc.getAttributes());
if (childPhased == null) {
phasedGenotypes.add(child);
try {
child = getPhasedChildGenotype(mom, dad, child);
} catch (AmbiguousAlleleOriginException e) {
if (filterAmbiguousAlleleOriginSites) {
filters.add("AMBIGUOUS_ALLELE_ORIGIN");
}
} catch (NoAppropriatePhasedGenotypeException e) {
if (filterPhaseIndeterminateSites) { if (filterPhaseIndeterminateSites) {
filters.add("PHASE_INDETERMINATE"); filters.add("PHASE_INDETERMINATE");
} }
} else {
phasedGenotypes.add(childPhased);
} }
Map<String, Object> attributes = new HashMap<String, Object>();
attributes.putAll(vc.getAttributes());
Collection<Genotype> phasedGenotypes = new ArrayList<Genotype>();
Collection<Allele> alleles = vc.getAlleles();
phasedGenotypes.add(mom);
phasedGenotypes.add(dad);
phasedGenotypes.add(child);
VariantContext newvc = new VariantContext("PhaseByTransmission", ref.getLocus().getContig(), vc.getStart(), vc.getStart(), alleles, phasedGenotypes, vc.getNegLog10PError(), filters, attributes); VariantContext newvc = new VariantContext("PhaseByTransmission", ref.getLocus().getContig(), vc.getStart(), vc.getStart(), alleles, phasedGenotypes, vc.getNegLog10PError(), filters, attributes);
vcfWriter.add(newvc, ref.getBase()); vcfWriter.add(newvc, ref.getBase());