From 75d47366008acf0653e9aef3b950002777e5dff4 Mon Sep 17 00:00:00 2001 From: chartl Date: Wed, 23 Jun 2010 15:49:13 +0000 Subject: [PATCH] Committing changes to comp overlap for indels. Passes all integration tests; minor changes to MVC walker. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3618 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/varianteval/CompOverlap.java | 11 +- .../walkers/MendelianViolationClassifier.java | 68 +++--- .../walkers/OppositeHomozygoteClassifier.java | 198 ------------------ 3 files changed, 43 insertions(+), 234 deletions(-) delete mode 100644 java/src/org/broadinstitute/sting/oneoffprojects/walkers/OppositeHomozygoteClassifier.java diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CompOverlap.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CompOverlap.java index ade80e0f5..6ddf54106 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CompOverlap.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CompOverlap.java @@ -41,8 +41,11 @@ public class CompOverlap extends VariantEvaluator { @DataPoint(name = "% concordant", description = "the concordance rate") double concordantRate = 0.0; + private boolean expectingIndels = false; + public CompOverlap(VariantEvalWalker parent) { super(parent); + expectingIndels = parent.dels; } public String getName() { @@ -84,10 +87,10 @@ public class CompOverlap extends VariantEvaluator { } public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - boolean compIsGood = comp != null && comp.isSNP() && comp.isNotFiltered(); - boolean evalIsGood = eval != null && eval.isSNP(); + boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ; + boolean evalIsGood = expectingIndels ? eval != null && eval.isIndel() : eval != null && eval.isSNP() ; - if (compIsGood) nCompSNPs++; // count the number of comp events + if ( compIsGood ) nCompSNPs++; // count the number of comp events if (evalIsGood) nEvalSNPs++; // count the number of eval events if (compIsGood && evalIsGood) { @@ -99,4 +102,6 @@ public class CompOverlap extends VariantEvaluator { return null; // we don't capture any interesting sites } + + } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java index 038097d93..7770533f0 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java @@ -48,10 +48,8 @@ public class MendelianViolationClassifier extends LocusWalker newAttributes; private HashMap homozygosityRegions; + private boolean filtered = false; public MendelianViolation(VariantContext context, MendelianViolationType violationType) { trio = context; @@ -250,6 +249,19 @@ public class MendelianViolationClassifier extends LocusWalker hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); - hInfo.add(new VCFHeaderLine("source", "OppositeHomozygoteClassifier")); + hInfo.add(new VCFHeaderLine("source", "MendelianViolationClassifier")); for ( MendelianInfoKey key : EnumSet.allOf(MendelianInfoKey.class) ) { hInfo.add( new VCFHeaderLine("INFO",key.toString())); } @@ -467,7 +468,8 @@ public class MendelianViolationClassifier extends LocusWalker= deNovoTriQ ) { - violation.type.filter(); + violation.filter(); } } } else { - violation.type.filter(); + violation.filter(); } return violation; @@ -501,29 +503,29 @@ public class MendelianViolationClassifier extends LocusWalker= opHomTriQ ) { - violation.type.filter(); + violation.filter(); } } - Double childProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.child)); + Double childProp = getAlleleProportion(trio.getGenotype(trioStructure.child).getAllele(0),splitCon.get(trioStructure.child)); Double motherProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.mom)); - Double fatherProp = getAlleleProportion(trio.getGenotype(trioStructure.mom).getAllele(0),splitCon.get(trioStructure.dad)); + Double fatherProp = getAlleleProportion(trio.getGenotype(trioStructure.dad).getAllele(0),splitCon.get(trioStructure.dad)); if ( childProp != null ) { violation.addAttribute(MendelianInfoKey.ProportionOfParentAllele.getKey(),childProp); - if ( ! opHomChildRange.contains(childProp) ) { - violation.type.filter(); + if ( ! opHomRange.contains(childProp) ) { + violation.filter(); } } - if ( motherProp != null && ! opHomParentRange.contains(motherProp) ) { - violation.type.filter(); + if ( motherProp != null && ! opHomRange.contains(motherProp) ) { + violation.filter(); } - if ( fatherProp != null && ! opHomParentRange.contains(fatherProp) ) { - violation.type.filter(); + if ( fatherProp != null && ! opHomRange.contains(fatherProp) ) { + violation.filter(); } } else { - violation.type.filter(); + violation.filter(); } return violation; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/OppositeHomozygoteClassifier.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/OppositeHomozygoteClassifier.java deleted file mode 100644 index e92bf71f0..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/OppositeHomozygoteClassifier.java +++ /dev/null @@ -1,198 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers; - -import org.broad.tribble.vcf.VCFHeader; -import org.broad.tribble.vcf.VCFHeaderLine; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; -import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; - -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl - * - * @Author chartl - * @Date Jun 7, 2010 - */ -public class OppositeHomozygoteClassifier extends RodWalker { - @Argument(shortName="f",fullName="familyPattern",required=true,doc="Pattern for the family structure (usage: mom+dad=child)") - String familyStr = null; - - private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)"); - private TrioStructure trioStructure; /* holds sample names of mom dad child */ - private ArrayList contextBuffer; /* holds contexts until they're ready to be printed */ - private GenomeLoc homozygousRegionStartChild; /* start of the homozygous region for child */ - private int callsWithinHomozygousRegion; /* number of calls in the current homozygous child region */ - private int childHomozygousRegionCounter; /* holds number of child homozygous regions */ - - public void initialize() { - trioStructure = parseTrioDescription(familyStr); - homozygousRegionStartChild = null; - childHomozygousRegionCounter = 0; - callsWithinHomozygousRegion = 0; - contextBuffer = new ArrayList(500); - } - - public static class TrioStructure { - public String mom, dad, child; - } - - public static TrioStructure parseTrioDescription(String family) { - Matcher m = FAMILY_PATTERN.matcher(family); - if (m.matches()) { - TrioStructure trio = new TrioStructure(); - //System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE); - trio.mom = m.group(1); - trio.dad = m.group(2); - trio.child = m.group(3); - return trio; - } else { - throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child"); - } - } - - public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null ) { - return null; - } - - VariantContext trioVariants = tracker.getVariantContext(ref,"trio", EnumSet.allOf(VariantContext.Type.class), ref.getLocus(), true); - // for this to work we need mismatching parents, one a homozyote, and a child homozygote - - if ( trioVariants == null ) { - return null; - } - - //System.out.println(": "+trioVariants.getGenotype(trioStructure.mom)+" dad: "+trioVariants.getGenotype(trioStructure.dad)+" child: "+trioVariants.getGenotype(trioStructure.child)); - if ( isOppositeHomozygoteSite(trioVariants) && ! trioVariants.isFiltered()) { - // find out who the homozygote is in the parents - if ( trioVariants.getGenotype(trioStructure.mom).isHom() ) { - return assessVariant(trioStructure.mom,trioStructure.dad,trioStructure.child,trioVariants,ref,context); - } else if ( trioVariants.getGenotype(trioStructure.dad).isHom() ) { - return assessVariant(trioStructure.dad,trioStructure.mom,trioStructure.child,trioVariants,ref,context); - } - } - - return trioVariants; - } - - private boolean isOppositeHomozygoteSite(VariantContext trio) { - if ( trio.getGenotype(trioStructure.child).isHet() ) { // not valid at child het sites - return false; - } else if ( trio.getHetCount() > 1 ) { // child is not het, so if this is 2, mom and dad are both het, invalid - return false; - } else if ( trio.getGenotype(trioStructure.dad) == null || trio.getGenotype(trioStructure.mom) == null ) { - return false; - } - - return true; - } - - private VariantContext assessVariant(String homParent, String otherParent, String child, VariantContext variCon, ReferenceContext refCon, AlignmentContext aliCon) { - // see if the child matches the hom parent - HashMap attributes = new HashMap(variCon.getAttributes()); - //System.out.println(refCon.getLocus()+" homParent: "+variCon.getGenotype(homParent).getGenotypeString()+" otherParent: "+variCon.getGenotype(otherParent).getGenotypeString()+" child: "+variCon.getGenotype(child).getGenotypeString()); - //do child and hom parent NOT match genotypes? - if ( variCon.getGenotype(child).isHomRef() && variCon.getGenotype(homParent).isHomVar() || - variCon.getGenotype(child).isHomVar() && variCon.getGenotype(homParent).isHomRef() ) { - // check for genotyping error (other must be het, or opposite of first parent) - if ( variCon.getGenotype(otherParent).isHet() || variCon.getGenotype(otherParent).isHomRef() != variCon.getGenotype(homParent).isHomRef() ) { - attributes.put("opHom",homParent); - } else { - attributes.put("opHom","genotypeError"); - } - } else if ( variCon.getGenotype(otherParent).isHom() && variCon.getGenotype(otherParent).isHomRef() != variCon.getGenotype(homParent).isHomRef() ) { - // is other parent both homozygous and different? - attributes.put("opHom",otherParent); - } - // todo -- assessment of site based on alignment contest (tri allelic? etc) - return new VariantContext(variCon.getName(), variCon.getLocation(), variCon.getAlleles(), variCon.getGenotypes(), variCon.getNegLog10PError(), variCon.getFilters(), attributes); - } - - public VCFWriter reduceInit() { - VCFWriter writer = new VCFWriter(out); - Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); - hInfo.add(new VCFHeaderLine("source", "OppositeHomozygoteClassifier")); - hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName())); - //hInfo.add(new VCFHeaderLine("opHom","Child tentatively inheritied the NULL ALLELE from this parent")); - // todo -- add info field annotation lines: "opHom", "CHR", "CHRS" - VCFHeader vcfHeader = new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit())); - writer.writeHeader(vcfHeader); - - return writer; - } - - public VCFWriter reduce(VariantContext variCon, VCFWriter writer) { - if ( variCon == null ) { - return writer; - } - - if ( homozygosityRegionIsBroken(variCon) && contextBuffer.size() > 0 ) { - outputBufferedRecords(contextBuffer,variCon,writer); - } else if ( ! variCon.isFiltered() && ! variCon.getGenotype(trioStructure.child).isNoCall() ) { - callsWithinHomozygousRegion++; - } - - if ( variCon.hasAttribute("opHom") ) { - //writer.addRecord(VariantContextAdaptors.toVCF(variCon, variCon.getReference().getBases()[0])); - contextBuffer.add(variCon); - } - - if ( homozygousRegionStartChild == null ) { - if ( variCon.getGenotype(trioStructure.child).isHom() && ! variCon.isFiltered() ) { - homozygousRegionStartChild = variCon.getLocation(); - } - } - - return writer; - - } - - private boolean homozygosityRegionIsBroken(VariantContext context) { - // check to see if either the parent or child homozygosity regions have been broken - if ( homozygousRegionStartChild != null && context.getGenotype(trioStructure.child).isHet() && ! context.isFiltered() ) { - // NOTE: NO CALLS DO NOT BREAK REGIONS OF HOMOZYGOSITY - return true; - } - - return false; - } - - private void outputBufferedRecords(List bufCon, VariantContext varCon, VCFWriter writer) { - // the buffered contexts all share one feature -- come from the same child homozygosity region - String regionSize; - if ( varCon != null ) { - regionSize = Integer.toString(varCon.getLocation().distance(homozygousRegionStartChild)); - } else { - regionSize = "unknown"; - } - for ( VariantContext vc : bufCon ) { - HashMap attributes = new HashMap(vc.getAttributes()); - attributes.put("CHR",childHomozygousRegionCounter); - attributes.put("CHRS",regionSize); - attributes.put("CHRNCALL",callsWithinHomozygousRegion); - attributes.put("CHRNOPHOM",bufCon.size()); - VariantContext newVC = new VariantContext(vc.getName(),vc.getLocation(),vc.getAlleles(),vc.getGenotypes(),vc.getNegLog10PError(),vc.getFilters(),attributes); - writer.addRecord(VariantContextAdaptors.toVCF(newVC,vc.getReference().getBases()[0])); - } - childHomozygousRegionCounter++; - homozygousRegionStartChild = null; - callsWithinHomozygousRegion = 0; - bufCon.clear(); - } - - public void onTraversalDone(VCFWriter w) { - outputBufferedRecords(contextBuffer,null,w); - } -}