If asked, filters out triple-het situations too (which cannot be simply phased by transmission).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5829 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3a2e32eef3
commit
3aa56037af
|
|
@ -19,15 +19,18 @@ import java.util.*;
|
||||||
* Given genotypes for a trio, phases child by transmission. Computes probability that the determined phase is correct
|
* Given genotypes for a trio, phases child by transmission. Computes probability that the determined phase is correct
|
||||||
* given that the genotypes for mom and dad are correct (useful if you want to use this to compare phasing accuracy, but
|
* given that the genotypes for mom and dad are correct (useful if you want to use this to compare phasing accuracy, but
|
||||||
* want to break that comparison down by phasing confidence in the truth set). Optionally filters out sites where the
|
* want to break that comparison down by phasing confidence in the truth set). Optionally filters out sites where the
|
||||||
* phasing is indeterminate.
|
* phasing is indeterminate. This walker assumes there are only three samples in the VCF file to begin with.
|
||||||
*/
|
*/
|
||||||
public class PhaseByTransmission extends RodWalker<Integer, Integer> {
|
public class PhaseByTransmission extends RodWalker<Integer, Integer> {
|
||||||
@Argument(shortName="f", fullName="familyPattern", required=true, doc="Pattern for the family structure (usage: mom+dad=child)")
|
@Argument(shortName="f", fullName="familyPattern", required=true, doc="Pattern for the family structure (usage: mom+dad=child)")
|
||||||
public String familyStr = null;
|
public String familyStr = null;
|
||||||
|
|
||||||
@Argument(shortName="filter", fullName="filterPhaseIndeterminateSites", required=false, doc="Filters out sites that are phase-indeterminate")
|
@Argument(shortName="filter_ind", fullName="filterPhaseIndeterminateSites", required=false, doc="Filters out sites that are phase-indeterminate")
|
||||||
public Boolean filterPhaseIndeterminateSites = false;
|
public Boolean filterPhaseIndeterminateSites = false;
|
||||||
|
|
||||||
|
@Argument(shortName="filter_amb", fullName="filterAmbiguousAlleleOriginSites", required=false, doc="Filters out sites where the parental origin of the alleles is ambiguous (i.e. triple-het situations)")
|
||||||
|
public Boolean filterAmbiguousAlleleOriginSites = false;
|
||||||
|
|
||||||
@Output
|
@Output
|
||||||
protected VCFWriter vcfWriter = null;
|
protected VCFWriter vcfWriter = null;
|
||||||
|
|
||||||
|
|
@ -35,6 +38,9 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
|
||||||
private String SAMPLE_NAME_DAD;
|
private String SAMPLE_NAME_DAD;
|
||||||
private String SAMPLE_NAME_CHILD;
|
private String SAMPLE_NAME_CHILD;
|
||||||
|
|
||||||
|
private class AmbiguousAlleleOriginException extends Exception {};
|
||||||
|
private class NoAppropriatePhasedGenotypeException extends Exception {};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse the familial relationship specification, and initialize VCF writer
|
* Parse the familial relationship specification, and initialize VCF writer
|
||||||
*/
|
*/
|
||||||
|
|
@ -62,13 +68,16 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
|
||||||
* @param mom mom's genotype
|
* @param mom mom's genotype
|
||||||
* @param dad dad's genotype
|
* @param dad dad's genotype
|
||||||
* @param child child's genotype
|
* @param child child's genotype
|
||||||
* @return phased version of child's genotype, or null in the case that the phasing cannot be determined
|
* @return phased version of child's genotype
|
||||||
|
* @throws AmbiguousAlleleOriginException if the parentage of the alleles can't be determined (i.e. a triple-het situation),
|
||||||
|
* @throws NoAppropriatePhasedGenotypeException if there is insufficient information to determine the phase
|
||||||
*/
|
*/
|
||||||
private Genotype getPhasedChildGenotype(Genotype mom, Genotype dad, Genotype child) {
|
private Genotype getPhasedChildGenotype(Genotype mom, Genotype dad, Genotype child) throws AmbiguousAlleleOriginException, NoAppropriatePhasedGenotypeException {
|
||||||
List<Allele> momAlleles = mom.getAlleles();
|
Set<Allele> momAlleles = new HashSet<Allele>();
|
||||||
List<Allele> dadAlleles = dad.getAlleles();
|
momAlleles.addAll(mom.getAlleles());
|
||||||
|
|
||||||
Set<Genotype> possiblePhasedChildGenotypes = new HashSet<Genotype>();
|
Set<Allele> dadAlleles = new HashSet<Allele>();
|
||||||
|
dadAlleles.addAll(dad.getAlleles());
|
||||||
|
|
||||||
double transmissionProb = QualityUtils.qualToProb(mom.getNegLog10PError())*QualityUtils.qualToProb(dad.getNegLog10PError());
|
double transmissionProb = QualityUtils.qualToProb(mom.getNegLog10PError())*QualityUtils.qualToProb(dad.getNegLog10PError());
|
||||||
|
|
||||||
|
|
@ -76,6 +85,8 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
|
||||||
attributes.putAll(child.getAttributes());
|
attributes.putAll(child.getAttributes());
|
||||||
attributes.put("TP", transmissionProb);
|
attributes.put("TP", transmissionProb);
|
||||||
|
|
||||||
|
Set<Genotype> possiblePhasedChildGenotypes = new HashSet<Genotype>();
|
||||||
|
|
||||||
for (Allele momAllele : momAlleles) {
|
for (Allele momAllele : momAlleles) {
|
||||||
for (Allele dadAllele : dadAlleles) {
|
for (Allele dadAllele : dadAlleles) {
|
||||||
if (momAllele.isCalled() && dadAllele.isCalled()) {
|
if (momAllele.isCalled() && dadAllele.isCalled()) {
|
||||||
|
|
@ -84,18 +95,36 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
|
||||||
possiblePhasedChildAlleles.add(dadAllele);
|
possiblePhasedChildAlleles.add(dadAllele);
|
||||||
|
|
||||||
Genotype possiblePhasedChildGenotype = new Genotype(child.getSampleName(), possiblePhasedChildAlleles, child.getNegLog10PError(), null, attributes, true);
|
Genotype possiblePhasedChildGenotype = new Genotype(child.getSampleName(), possiblePhasedChildAlleles, child.getNegLog10PError(), null, attributes, true);
|
||||||
|
|
||||||
possiblePhasedChildGenotypes.add(possiblePhasedChildGenotype);
|
possiblePhasedChildGenotypes.add(possiblePhasedChildGenotype);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Set<Genotype> ambiguousAlleleOriginGenotypes = new HashSet<Genotype>();
|
||||||
|
|
||||||
|
for (Genotype g1 : possiblePhasedChildGenotypes) {
|
||||||
|
for (Genotype g2 : possiblePhasedChildGenotypes) {
|
||||||
|
if (!g1.equals(g2)) {
|
||||||
|
if (g1.sameGenotype(g2, true)) {
|
||||||
|
ambiguousAlleleOriginGenotypes.add(g1);
|
||||||
|
ambiguousAlleleOriginGenotypes.add(g2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (Genotype possiblePhasedChildGenotype : possiblePhasedChildGenotypes) {
|
for (Genotype possiblePhasedChildGenotype : possiblePhasedChildGenotypes) {
|
||||||
if (child.sameGenotype(possiblePhasedChildGenotype, true)) {
|
if (child.sameGenotype(possiblePhasedChildGenotype, true)) {
|
||||||
|
if (ambiguousAlleleOriginGenotypes.contains(possiblePhasedChildGenotype)) {
|
||||||
|
throw new AmbiguousAlleleOriginException();
|
||||||
|
}
|
||||||
|
|
||||||
return possiblePhasedChildGenotype;
|
return possiblePhasedChildGenotype;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
throw new NoAppropriatePhasedGenotypeException();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -116,29 +145,31 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
|
||||||
Genotype dad = vc.getGenotype(SAMPLE_NAME_DAD);
|
Genotype dad = vc.getGenotype(SAMPLE_NAME_DAD);
|
||||||
Genotype child = vc.getGenotype(SAMPLE_NAME_CHILD);
|
Genotype child = vc.getGenotype(SAMPLE_NAME_CHILD);
|
||||||
|
|
||||||
Genotype childPhased = getPhasedChildGenotype(mom, dad, child);
|
|
||||||
|
|
||||||
Collection<Genotype> phasedGenotypes = new ArrayList<Genotype>();
|
|
||||||
Collection<Allele> alleles = vc.getAlleles();
|
|
||||||
Set<String> filters = new HashSet<String>();
|
Set<String> filters = new HashSet<String>();
|
||||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
|
||||||
|
|
||||||
phasedGenotypes.add(mom);
|
|
||||||
phasedGenotypes.add(dad);
|
|
||||||
|
|
||||||
filters.addAll(vc.getFilters());
|
filters.addAll(vc.getFilters());
|
||||||
attributes.putAll(vc.getAttributes());
|
|
||||||
|
|
||||||
if (childPhased == null) {
|
|
||||||
phasedGenotypes.add(child);
|
|
||||||
|
|
||||||
|
try {
|
||||||
|
child = getPhasedChildGenotype(mom, dad, child);
|
||||||
|
} catch (AmbiguousAlleleOriginException e) {
|
||||||
|
if (filterAmbiguousAlleleOriginSites) {
|
||||||
|
filters.add("AMBIGUOUS_ALLELE_ORIGIN");
|
||||||
|
}
|
||||||
|
} catch (NoAppropriatePhasedGenotypeException e) {
|
||||||
if (filterPhaseIndeterminateSites) {
|
if (filterPhaseIndeterminateSites) {
|
||||||
filters.add("PHASE_INDETERMINATE");
|
filters.add("PHASE_INDETERMINATE");
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
phasedGenotypes.add(childPhased);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||||
|
attributes.putAll(vc.getAttributes());
|
||||||
|
|
||||||
|
Collection<Genotype> phasedGenotypes = new ArrayList<Genotype>();
|
||||||
|
Collection<Allele> alleles = vc.getAlleles();
|
||||||
|
|
||||||
|
phasedGenotypes.add(mom);
|
||||||
|
phasedGenotypes.add(dad);
|
||||||
|
phasedGenotypes.add(child);
|
||||||
|
|
||||||
VariantContext newvc = new VariantContext("PhaseByTransmission", ref.getLocus().getContig(), vc.getStart(), vc.getStart(), alleles, phasedGenotypes, vc.getNegLog10PError(), filters, attributes);
|
VariantContext newvc = new VariantContext("PhaseByTransmission", ref.getLocus().getContig(), vc.getStart(), vc.getStart(), alleles, phasedGenotypes, vc.getNegLog10PError(), filters, attributes);
|
||||||
|
|
||||||
vcfWriter.add(newvc, ref.getBase());
|
vcfWriter.add(newvc, ref.getBase());
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue