From f44d8b150f0e45fb7da570b983be4b6042770687 Mon Sep 17 00:00:00 2001 From: chartl Date: Fri, 18 Jun 2010 19:32:24 +0000 Subject: [PATCH] Mendelian Violation Classifier now filters violations on the fly via command line arguments; and closes unterminated homozygous regions at the end of a chromosome (so we see arms falling off in the file, rather than in the log) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3592 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/MendelianViolationClassifier.java | 199 ++++++++++++++---- 1 file changed, 162 insertions(+), 37 deletions(-) diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java index ff38d17fd..c4a102817 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java @@ -17,8 +17,10 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.gatk.walkers.varianteval.MendelianViolationEvaluator; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -40,6 +42,17 @@ public class MendelianViolationClassifier extends LocusWalker0.6") + String deNovoParentalAllele = "-0.1-1.1"; + @Argument(fullName="oppositeHomozygoteTriAllelicQ",required=false,doc="Cutoff for quality scores of 3rd allele at opposite homozygote sites to remove it from the violation set") + int opHomTriQ = 20; + @Argument(fullName="oppositeHomozygoteParentAllele",required=false,doc="Range for the parental allele in the parents at opposite homozygote sites for it to be kept in violation set") + String opHomParentAllele = "-0.1-1.1"; + @Argument(fullName="oppositeHomozygoteChildAllele",required=false,doc="Range for the parental allele in the child at opposite homozygote sites for it to be kept in violation set") + String opHomChildAllele = "-0.1-1.1"; + /* *********** PRIVATE CLASSES @@ -91,7 +104,7 @@ public class MendelianViolationClassifier extends LocusWalker lower-epsilon && p < upper+epsilon; + } + } + /* *************** PRIVATE ENUMS */ @@ -245,13 +284,22 @@ public class MendelianViolationClassifier extends LocusWalker= 10 && e.getMappingQual() >= 10 ) { - total++; - if ( e.getBase() == parental.getBases()[0] ) { - numParental++; - } - } + + Map splitContext = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup()); + Double proportion = getAlleleProportion(parental,splitContext.get(trioStructure.child)); + if ( proportion != null ) { + violation.addAttribute(MendelianInfoKey.ProportionOfParentAllele.getKey(), proportion); + if ( ! deNovoRange.contains(proportion) ) { + violation.type.filter(); } - violation.addAttribute(MendelianInfoKey.ProportionOfParentAllele.getKey(), ((double) numParental)/total); } + + Pair triAl = getTriAllelicQuality(tracker,ref,trio,splitContext); + if ( triAl != null ) { + violation.addAttribute(MendelianInfoKey.TriAllelicBase.getKey(),triAl.first.toString()); + violation.addAttribute(MendelianInfoKey.TriAllelicQuality.getKey(),triAl.second); + if ( triAl.second >= deNovoTriQ ) { + violation.type.filter(); + } + } + + } else { + violation.type.filter(); } + return violation; } @@ -431,32 +494,85 @@ public class MendelianViolationClassifier extends LocusWalker stratCon = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup()); - int conf = 0; - Allele alt = null; - for ( Map.Entry stratifiedEntry : stratCon.entrySet() ) { - VariantCallContext call = engine.runGenotyper(tracker,ref,stratifiedEntry.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE)); - if ( call != null && call.confidentlyCalled && call.vc != null) { - if ( call.vc.isSNP() ) { - if ( ! call.vc.getAlternateAllele(0).basesMatch(trio.getAlternateAllele(0)) ) { - if ( alt == null ) { - alt = call.vc.getAlternateAllele(0); - conf = (int) Math.floor(10*call.vc.getNegLog10PError()); - } else { - conf += (int) Math.floor(10*call.vc.getNegLog10PError()); - } + if ( ! trio.isFiltered() ) { + Map splitCon = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup()); + Pair triAl = getTriAllelicQuality(tracker, ref, trio, splitCon); + if ( triAl != null ) { + violation.addAttribute(MendelianInfoKey.TriAllelicBase.getKey(),triAl.first.toString()); + violation.addAttribute(MendelianInfoKey.TriAllelicQuality.getKey(),triAl.second); + if ( triAl.second >= opHomTriQ ) { + violation.type.filter(); + } + } + + Double childProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.child)); + Double motherProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.mom)); + Double fatherProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.dad)); + if ( childProp != null ) { + violation.addAttribute(MendelianInfoKey.ProportionOfParentAllele.getKey(),childProp); + if ( ! opHomChildRange.contains(childProp) ) { + violation.type.filter(); + } + } + + if ( motherProp != null && ! opHomParentRange.contains(motherProp) ) { + violation.type.filter(); + } + + if ( fatherProp != null && ! opHomParentRange.contains(fatherProp) ) { + violation.type.filter(); + } + } else { + violation.type.filter(); + } + + return violation; + } + + private Double getAlleleProportion(Allele a, StratifiedAlignmentContext context) { + int numParental = 0; + int total = 0; + if ( context != null ) { + for ( PileupElement e : context.getPileupElements(StratifiedAlignmentContext.StratifiedContextType.COMPLETE)) { + if ( e.getQual() >= 10 && e.getMappingQual() >= 10 ) { + total++; + if ( e.getBase() == a.getBases()[0]) { + numParental ++; + } + } + } + return ( (double) numParental )/total; + } else { + return null; + } + + + } + + private Pair getTriAllelicQuality(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext var, Map strat) { + int conf = 0; + Allele alt = null; + for ( Map.Entry sEntry : strat.entrySet() ) { + VariantCallContext call = engine.runGenotyper(tracker, ref, sEntry.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE)); + if ( call != null && call.confidentlyCalled && call.vc != null ) { + if ( call.vc.isSNP() ) { + if ( ! call.vc.getAlternateAllele(0).basesMatch(var.getAlternateAllele(0))) { + if ( alt == null ) { + alt = call.vc.getAlternateAllele(0); + conf = (int) Math.floor(10*call.vc.getNegLog10PError()); + } else { + conf += (int) Math.floor(10*call.vc.getNegLog10PError()); } } } } - if ( alt != null ) { - violation.addAttribute(MendelianInfoKey.TriAllelicBase.getKey(),alt.toString()); - violation.addAttribute(MendelianInfoKey.TriAllelicQuality.getKey(),conf); - } } - return violation; + if ( alt == null ) { + return null; + } else { + return new Pair(alt,conf); + } } private String getParentalDeletion(VariantContext trio) { @@ -494,13 +610,22 @@ public class MendelianViolationClassifier extends LocusWalker regions = trioStructure.homozygousRegions; Map counts = trioStructure.homozygousRegionCounts; + List to_print = new ArrayList(3); for ( Map.Entry entryRegion : regions.entrySet() ) { if ( entryRegion.getValue() != null ) { logger.info("---------------- REGION NOT FINALIZED -----------------"); logger.info(String.format("%s,%s,%s,%d,%d",entryRegion.getKey(),entryRegion.getValue().regionStart,entryRegion.getValue().lastSeen, entryRegion.getValue().deNovoSNPsInRegion,entryRegion.getValue().oppositeHomsInRegion)); + int chr_end = getToolkit().getSAMFileHeader().getSequenceDictionary().getSequence(entryRegion.getValue().getContigStr()).getSequenceLength(); + entryRegion.getValue().endedBy = GenomeLocParser.createGenomeLoc(entryRegion.getValue().getContigStr(),chr_end,chr_end); + to_print.add(entryRegion.getValue()); } } + + Collections.sort(to_print); + for ( HomozygosityRegion hr : to_print ) { + bedOutput.printf("%s%n",hr); + } } /*