From c66861746a59340e9af37b04a40765f3b8275d91 Mon Sep 17 00:00:00 2001 From: depristo Date: Wed, 10 Feb 2010 16:12:29 +0000 Subject: [PATCH] improvements to ve2, including more meaningful mendelian violation counting. Support for VCF emitted interesting sites, annotated according to the evaluations themselves. Basic intergration test for VE2 started git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2819 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/refdata/RefMetaDataTracker.java | 19 ++- .../walkers/varianteval2/CountVariants.java | 6 +- .../walkers/varianteval2/DbSNPPercentage.java | 4 +- .../MendelianViolationEvaluator.java | 77 +++------- .../varianteval2/TiTvVariantEvaluator.java | 4 +- .../varianteval2/VariantEval2Walker.java | 144 +++++++++++++----- .../varianteval2/VariantEvaluator.java | 41 +++-- .../vcftools/BeagleTrioToVCFWalker.java | 12 +- .../VariantEval2IntegrationTest.java | 48 ++++++ 9 files changed, 238 insertions(+), 117 deletions(-) create mode 100755 java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index d67f8b23b..07e3c8a7d 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -247,11 +247,18 @@ public class RefMetaDataTracker { * @return */ public Collection getVariantContexts(String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { - RODRecordList rodList = getTrackData(name, null); + return getVariantContexts(Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly); + } + + public Collection getVariantContexts(Collection names, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { Collection contexts = new ArrayList(); - if ( rodList != null ) - addVariantContexts(contexts, rodList, allowedTypes, curLocation, requireStartHere, takeFirstOnly ); + for ( String name : names ) { + RODRecordList rodList = getTrackData(name, null); + + if ( rodList != null ) + addVariantContexts(contexts, rodList, allowedTypes, curLocation, requireStartHere, takeFirstOnly ); + } return contexts; } @@ -271,8 +278,10 @@ public class RefMetaDataTracker { if ( contexts.size() > 1 ) throw new StingException("Requested a single VariantContext object for track " + name + " but multiple variants were present at position " + curLocation); - - return contexts.iterator().next(); + else if ( contexts.size() == 0 ) + return null; + else + return contexts.iterator().next(); } private void addVariantContexts(Collection contexts, RODRecordList rodList, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/CountVariants.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/CountVariants.java index 306532829..9503e04e1 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/CountVariants.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/CountVariants.java @@ -43,11 +43,11 @@ public class CountVariants extends VariantEvaluator { return 1; // we only need to see each eval track } - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { nProcessedLoci += context.getSkippedBases() + 1; } - public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { //nProcessedLoci++; nCalledLoci++; @@ -70,6 +70,8 @@ public class CountVariants extends VariantEvaluator { default: throw new StingException("BUG: Unexpected genotype type: " + g); } } + + return null; // we don't capture any intersting sites } public String toString() { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java index 10d0f23cb..d7ebe23cc 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java @@ -91,7 +91,7 @@ public class DbSNPPercentage extends VariantEvaluator { public double dbSNPRate() { return rate(nSNPsAtdbSNPs(), nEvalSNPs()); } public double concordanceRate() { return rate(nConcordant(), nSNPsAtdbSNPs()); } - public void update2(VariantContext eval, VariantContext dbsnp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public String update2(VariantContext eval, VariantContext dbsnp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { boolean dbSNPIsGood = dbsnp != null && dbsnp.isSNP() && dbsnp.isNotFiltered(); boolean evalIsGood = eval != null && eval.isSNP(); @@ -104,5 +104,7 @@ public class DbSNPPercentage extends VariantEvaluator { if ( ! discordantP(eval, dbsnp) ) // count whether we're concordant or not with the dbSNP value nConcordant++; } + + return null; // we don't capture any interesting sites } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/MendelianViolationEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/MendelianViolationEvaluator.java index 647a20826..1e9909ba3 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/MendelianViolationEvaluator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/MendelianViolationEvaluator.java @@ -40,10 +40,12 @@ import java.util.regex.Matcher; * Check that the child is one of these. */ public class MendelianViolationEvaluator extends VariantEvaluator { - long nVariants, nViolations, nOverCalls, nUnderCalls; + long nVariants, nViolations; TrioStructure trio; VariantEval2Walker parent; + long KidHomRef_ParentHomVar, KidHet_ParentsHomRef, KidHet_ParentsHomVar, KidHomVar_ParentHomRef; + private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)"); public static class TrioStructure { @@ -90,7 +92,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator { return 1; // we only need to see each eval track } - public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( vc.isBiallelic() && vc.hasGenotypes() ) { // todo -- currently limited to biallelic loci nVariants++; @@ -105,61 +107,34 @@ public class MendelianViolationEvaluator extends VariantEvaluator { nViolations++; String label = null; - switch ( getViolationType( vc, momG, dadG, childG ) ) { - case UNDER_CALL: - nUnderCalls++; - label = "under_called"; - break; - case OVER_CALL: - nOverCalls++; - label = "over_called"; - break; - default: - throw new StingException("BUG: unexpected violation type at " + vc); - + if ( childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar() )) { + label = "KidHomRef_ParentHomVar"; + KidHomRef_ParentHomVar++; + } else if (childG.isHet() && (momG.isHomRef() && dadG.isHomRef()) ) { + label = "KidHet_ParentsHomRef"; + KidHet_ParentsHomRef++; + } else if (childG.isHet() && (momG.isHomVar() && dadG.isHomVar()) ) { + label = "KidHet_ParentsHomVar"; + KidHet_ParentsHomVar++; + } else if (childG.isHomVar() && (momG.isHomRef() || dadG.isHomRef())) { + label = "KidHomVar_ParentHomRef"; + KidHomVar_ParentHomRef++; + } else { + throw new StingException("BUG: unexpected child genotype class " + childG); } - String why = String.format("Mendelian violation %s: at %s m=%s d=%s c=%s", label, vc.getLocation(), momG.toBriefString(), dadG.toBriefString(), childG.toBriefString()); - addInterestingSite(why , vc); + return label; } } } - } - /** - * Are we under or over calling? - * - * Assuming this is a bialleic locus, we then have 2 alleles A and B. There are really two types of violations: - * - * Undercall: where the child is A/A but parent genotypes imply that child must carry at least one B allele - * Overall: where the child carries a B allele but this B allele couldn't have been inherited from either parent - * - * The way to determine this is to look at mom and dad separately. If the child doesn't carry at least one - * allele from each parent, it's an under calls. Otherwise it's an overcall. - */ - public ViolationType getViolationType(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG ) { - switch ( childG.getType() ) { - case HOM_REF: - return ViolationType.UNDER_CALL; // if you have to undercalled as a hom ref child - case HET: - // the only two violations of a het is where both parents are hom. If they are hom ref, you overcalled, - // otherwise you undercalled - return momG.isHomRef() ? ViolationType.OVER_CALL : ViolationType.UNDER_CALL; - case HOM_VAR: - return ViolationType.OVER_CALL; // if you have to overcalled as a hom var child - default: - throw new StingException("BUG: unexpected child genotype class " + childG); - } + return null; // we don't capture any intersting sites } private boolean includeGenotype(Genotype g) { return g.getNegLog10PError() > getQThreshold() && g.isCalled(); } - private enum ViolationType { - UNDER_CALL, OVER_CALL - } - public static boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG ) { //VariantContext momVC = vc.subContextFromGenotypes(momG); //VariantContext dadVC = vc.subContextFromGenotypes(dadG); @@ -183,17 +158,11 @@ public class MendelianViolationEvaluator extends VariantEvaluator { } private String summaryLine() { - return String.format("%d %d %.2e %d %.2e %.2f %d %.2e %.2f", - nVariants, nViolations, rate(nViolations, nVariants), - nOverCalls, rate(nOverCalls, nVariants), ratio(nOverCalls, nViolations), - nUnderCalls, rate(nUnderCalls, nVariants), ratio(nUnderCalls, nViolations)); + return String.format("%d %d %d %d %d %d", nVariants, nViolations, KidHomRef_ParentHomVar, KidHet_ParentsHomRef, KidHet_ParentsHomVar, KidHomVar_ParentHomRef); } private static List HEADER = - Arrays.asList("nVariants", - "nViolations", "violationRate", - "nOverCalls", "overCallRate", "overCallFraction", - "nUnderCalls", "underCallRate", "underCallFraction"); + Arrays.asList("nVariants", "nViolations", "KidHomRef_ParentHomVar", "KidHet_ParentsHomRef", "KidHet_ParentsHomVar", "KidHomVar_ParentHomRef"); // making it a table public List getTableHeader() { @@ -203,4 +172,4 @@ public class MendelianViolationEvaluator extends VariantEvaluator { public List> getTableRows() { return Arrays.asList(Arrays.asList(summaryLine().split(" "))); } -} \ No newline at end of file +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/TiTvVariantEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/TiTvVariantEvaluator.java index a44280327..5a02baafd 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/TiTvVariantEvaluator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/TiTvVariantEvaluator.java @@ -36,9 +36,11 @@ public class TiTvVariantEvaluator extends VariantEvaluator { } } - public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( vc1 != null ) updateTiTv(vc1, false); if ( vc2 != null ) updateTiTv(vc2, true); + + return null; // we don't capture any intersting sites } public String toString() { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java index 37c10b488..55174e0af 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java @@ -4,17 +4,25 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableVariantContext; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.apache.log4j.Logger; import java.util.*; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; +import java.io.File; +// todo -- evalations should support comment lines +// todo -- add Mendelian variable explanations (nDeNovo and nMissingTransmissions) + +// todo -- interesting sites should support VCF generation, so that FN, FP, DeNovo, etc calls get put into a single VCF and +// todo -- an explanation added to the INFO field as to why it showed up there. // // todo -- write a simple column table system and have the evaluators return this instead of the list> objects @@ -28,6 +36,12 @@ import java.lang.reflect.InvocationTargetException; // todo -- create JEXL context implementing object that simply looks up values for JEXL evaluations. Throws error for unknown fields // +// todo -- port over SNP density evaluator. + +// todo -- add subgroup of known variants as to those at hapmap sites [it's in the dbSNP record] + +// todo -- deal with performance issues with variant contexts + // // Todo -- should really include argument parsing @annotations from subclass in this walker. Very // todo -- useful general capability. Right now you need to add arguments to VariantEval2 to handle new @@ -36,7 +50,8 @@ import java.lang.reflect.InvocationTargetException; // // todo -- the whole organization only supports a single eval x comp evaluation. We need to instantiate -// todo -- new contexts for each comparison object too! +// todo -- new contexts for each comparison object too! The output table should be clear as to what the "comp" +// todo -- variable is in the analysis // // @@ -73,12 +88,9 @@ public class VariantEval2Walker extends RodWalker { @Argument(shortName="MVQ", fullName="MendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false) protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50; - @Argument(shortName="PI", fullName="PrintInterestingSites", doc="If provided, interesting sites in the unselected, called set will be printed", required=false) - protected boolean PRINT_INTERESTING_SITES = false; + @Argument(shortName="outputVCF", fullName="InterestingSitesVCF", doc="If provided, interesting sites emitted to this vcf and the INFO field annotated as to why they are interesting", required=false) + protected String outputVCF = null; -// @Argument(shortName="PIA", fullName="PrintAllInterestingSites", doc="If provided, interesting sites will be printed for all sets. Verbose", required=false) -// protected boolean PRINT_ALL_INTERESTING_SITES = false; - // -------------------------------------------------------------------------------------------------------------- // // private walker data @@ -89,12 +101,14 @@ public class VariantEval2Walker extends RodWalker { private class EvaluationContext extends HashMap> { // useful for typing public String trackName, contextName; + public boolean enableInterestingSiteCaptures = false; VariantContextUtils.JexlVCMatchExp selectExp; - public EvaluationContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp) { + public EvaluationContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp, boolean enableInterestingSiteCaptures) { this.trackName = trackName; this.contextName = contextName; this.selectExp = selectExp; + this.enableInterestingSiteCaptures = enableInterestingSiteCaptures; } } @@ -112,6 +126,10 @@ public class VariantEval2Walker extends RodWalker { private List variantEvaluationNames = new ArrayList(); private List> evaluationClasses = null; + /** output writer for interesting sites */ + private VCFWriter writer = null; + private boolean wroteHeader = false; + // -------------------------------------------------------------------------------------------------------------- // // initialize @@ -136,11 +154,14 @@ public class VariantEval2Walker extends RodWalker { } determineContextNamePartSizes(); + + if ( outputVCF != null ) + writer = new VCFWriter(new File(outputVCF)); } private void determineAllEvalations() { evaluationClasses = PackageUtils.getClassesImplementingInterface(VariantEvaluator.class); - for ( VariantEvaluator e : instantiateEvalationsSet(false, null) ) { + for ( VariantEvaluator e : instantiateEvalationsSet() ) { // for collecting purposes variantEvaluationNames.add(e.getName()); logger.debug("Including VariantEvaluator " + e.getName() + " of class " + e.getClass()); @@ -149,7 +170,7 @@ public class VariantEval2Walker extends RodWalker { Collections.sort(variantEvaluationNames); } - private Set instantiateEvalationsSet(boolean baseline, String name) { + private Set instantiateEvalationsSet() { Set evals = new HashSet(); Object[] args = new Object[]{this}; Class[] argTypes = new Class[]{this.getClass()}; @@ -158,7 +179,6 @@ public class VariantEval2Walker extends RodWalker { try { Constructor constructor = c.getConstructor(argTypes); VariantEvaluator eval = (VariantEvaluator)constructor.newInstance(args); - if ( baseline ) eval.printInterestingSites(name); evals.add(eval); } catch (InstantiationException e) { throw new StingException(String.format("Cannot instantiate class '%s': must be concrete class", c.getSimpleName())); @@ -175,18 +195,25 @@ public class VariantEval2Walker extends RodWalker { } private void addNewContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp) { - EvaluationContext group = new EvaluationContext(trackName, contextName, selectExp); + EvaluationContext group = new EvaluationContext(trackName, contextName, selectExp, selectExp == null); for ( String filteredName : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME) ) { for ( String subname : Arrays.asList(ALL_SET_NAME, KNOWN_SET_NAME, NOVEL_SET_NAME) ) { String name = subname + "." + filteredName; - group.put(name, instantiateEvalationsSet(subname == ALL_SET_NAME && filteredName == RETAINED_SET_NAME, trackName + "." + (selectExp == null ? "all" : selectExp.name) + "." + name)); + //System.out.printf("Creating group name: " + name); + group.put(name, instantiateEvalationsSet()); + //group.put(name, instantiateEvalationsSet(subname == ALL_SET_NAME && filteredName == RETAINED_SET_NAME, trackName + "." + (selectExp == null ? "all" : selectExp.name) + "." + name)); } } contexts.put(contextName, group); } + private boolean captureInterestingSitesOfEvalSet(String name) { + //System.out.printf("checking %s%n", name); + return name.contains(ALL_SET_NAME + "." + RETAINED_SET_NAME); + } + // -------------------------------------------------------------------------------------------------------------- // // map @@ -199,17 +226,19 @@ public class VariantEval2Walker extends RodWalker { if ( ref == null ) return 0; - Map comps = getCompVariantContexts(tracker, context); + Collection comps = getCompVariantContexts(tracker, context); // to enable walking over pairs where eval or comps have no elements for ( EvaluationContext group : contexts.values() ) { - VariantContext vc = getVariantContext(group.trackName, tracker, context); + VariantContext vc = getEvalContext(group.trackName, tracker, context); //logger.debug(String.format("Updating %s of %s with variant", group.name, vc)); + for ( Map.Entry> namedEvaluations : group.entrySet() ) { String evaluationName = namedEvaluations.getKey(); Set evaluations = namedEvaluations.getValue(); boolean evalWantsVC = applyVCtoEvaluation(evaluationName, vc, comps, group); + List interestingReasons = new ArrayList(); for ( VariantEvaluator evaluation : evaluations ) { if ( evaluation.enabled() ) { @@ -219,12 +248,15 @@ public class VariantEval2Walker extends RodWalker { // now call the single or paired update function switch ( evaluation.getComparisonOrder() ) { case 1: - if ( evalWantsVC && vc != null ) - evaluation.update1(vc, tracker, ref, context); + if ( evalWantsVC && vc != null ) { + String interesting = evaluation.update1(vc, tracker, ref, context); + if ( interesting != null ) interestingReasons.add(interesting); + } break; case 2: - for ( VariantContext comp : comps.values() ) { - evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context); + for ( VariantContext comp : comps ) { + String interesting = evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context); + if ( interesting != null ) interestingReasons.add(interesting); } break; default: @@ -232,16 +264,53 @@ public class VariantEval2Walker extends RodWalker { } } } + + if ( group.enableInterestingSiteCaptures && captureInterestingSitesOfEvalSet(evaluationName) ) + writeInterestingSite(interestingReasons, vc); } } return 0; } - private boolean applyVCtoEvaluation(String evaluationName, VariantContext vc, Map comps, EvaluationContext group) { + private void writeInterestingSite(List interestingReasons, VariantContext vc) { + if ( writer != null && interestingReasons.size() > 0 ) { + MutableVariantContext mvc = new MutableVariantContext(vc); + + for ( String why : interestingReasons ) { + String key, value; + String[] parts = why.split("="); + + switch ( parts.length ) { + case 1: + key = parts[0]; + value = "1"; + break; + case 2: + key = parts[0]; + value = parts[1]; + break; + default: + throw new IllegalStateException("BUG: saw a interesting site reason sting with multiple = signs " + why); + } + + mvc.putAttribute(key, value); + } + + if ( ! wroteHeader ) { + writer.writeHeader(VariantContextAdaptors.createVCFHeader(null, vc)); + wroteHeader = true; + } + + writer.addRecord(VariantContextAdaptors.toVCF(mvc)); + //interestingReasons.clear(); + } + } + + private boolean applyVCtoEvaluation(String evaluationName, VariantContext vc, Collection comps, EvaluationContext group) { if ( vc == null ) return true; - + if ( evaluationName.contains(FILTERED_SET_NAME) && vc.isNotFiltered() ) return false; @@ -261,11 +330,15 @@ public class VariantEval2Walker extends RodWalker { return true; } - private boolean vcIsKnown(VariantContext vc, Map comps, String[] knownNames ) { - for ( String knownName : knownNames ) { - VariantContext known = comps.get(knownName); - if ( known != null && known.isNotFiltered() && known.getType() == vc.getType() ) - return true; + private boolean vcIsKnown(VariantContext vc, Collection comps, String[] knownNames ) { + for ( VariantContext comp : comps ) { + if ( comp.isNotFiltered() && comp.getType() == vc.getType() ) { + for ( String knownName : knownNames ) { + if ( comp.getName().equals(knownName) ) { + return true; + } + } + } } return false; @@ -278,20 +351,23 @@ public class VariantEval2Walker extends RodWalker { // //logger.info(String.format("Ignore second+ events at locus %s in rod %s => rec is %s", context.getLocation(), rodList.getName(), rec)); - private Map getCompVariantContexts(RefMetaDataTracker tracker, AlignmentContext context) { - Map comps = new HashMap(); - - for ( String compName : compNames ) { - comps.put(compName, getVariantContext(compName, tracker, context)); - } + private Collection getCompVariantContexts(RefMetaDataTracker tracker, AlignmentContext context) { + // todo -- we need to deal with dbSNP where there can be multiple records at the same start site. A potential solution is to + // todo -- allow the variant evaluation to specify the type of variants it wants to see and only take the first such record at a site + Collection comps = tracker.getVariantContexts(compNames, null, context.getLocation(), true, true); // todo -- remove me when the loop works correctly for comparisons of eval x comp for each comp if ( comps.size() > 1 ) throw new StingException("VariantEval2 currently only supports comparisons of N eval tracks vs. a single comparison track. Yes, I know..."); return comps; } - private VariantContext getVariantContext(String name, RefMetaDataTracker tracker, AlignmentContext context) { - return tracker.getVariantContext(name, null, context.getLocation(), true); + private VariantContext getEvalContext(String name, RefMetaDataTracker tracker, AlignmentContext context) { + Collection contexts = tracker.getVariantContexts(name, null, context.getLocation(), true, false); + + if ( context.size() > 1 ) + throw new StingException("Found multiple variant contexts at " + context.getLocation()); + + return contexts.size() == 1 ? contexts.iterator().next() : null; } // -------------------------------------------------------------------------------------------------------------- @@ -338,7 +414,7 @@ public class VariantEval2Walker extends RodWalker { } } } - + private String formatKeyword(String keyWord) { //System.out.printf("keyword %s%n", keyWord); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEvaluator.java index 2e9cfd41a..f12f05692 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEvaluator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEvaluator.java @@ -16,24 +16,25 @@ import java.util.ArrayList; * To change this template use File | Settings | File Templates. */ abstract class VariantEvaluator { - protected boolean accumulateInterestingSites = false, printInterestingSites = false; - protected String interestingSitePrefix = null; +// protected boolean accumulateInterestingSites = false, printInterestingSites = false; +// protected String interestingSitePrefix = null; protected boolean processedASite = false; - protected List interestingSites = new ArrayList(); +// protected List interestingSites = new ArrayList(); public abstract String getName(); // do we want to keep track of things that are interesting - public void accumulateInterestingSites(boolean enable) { accumulateInterestingSites = enable; } - public void printInterestingSites(String prefix) { printInterestingSites = true; interestingSitePrefix = prefix; } - public boolean isAccumulatingInterestingSites() { return accumulateInterestingSites; } - public List getInterestingSites() { return interestingSites; } - protected void addInterestingSite(String why, VariantContext vc) { - if ( accumulateInterestingSites ) - interestingSites.add(vc); - if ( printInterestingSites ) - System.out.printf("%40s %s%n", interestingSitePrefix, why); - } +// public void accumulateInterestingSites(boolean enable) { accumulateInterestingSites = enable; } +// public void printInterestingSites(String prefix) { printInterestingSites = true; interestingSitePrefix = prefix; } +// public boolean isAccumulatingInterestingSites() { return accumulateInterestingSites; } +// public List getInterestingSites() { return interestingSites; } + +// protected void addInterestingSite(String why, VariantContext vc) { +// if ( accumulateInterestingSites ) +// interestingSites.add(vc); +// if ( printInterestingSites ) +// System.out.printf("%40s %s%n", interestingSitePrefix, why); +// } public abstract boolean enabled(); //public boolean processedAnySites() { return processedASite; } @@ -43,9 +44,17 @@ abstract class VariantEvaluator { public abstract int getComparisonOrder(); /** called at all sites, regardless of eval context itself; useful for counting processed bases */ - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { } - public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { } - public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { } + public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + + } + + public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + return null; + } + + public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + return null; + } public void finalize() {} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/BeagleTrioToVCFWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/BeagleTrioToVCFWalker.java index 15c590e71..4eb287e88 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/BeagleTrioToVCFWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/BeagleTrioToVCFWalker.java @@ -35,6 +35,8 @@ public class BeagleTrioToVCFWalker extends RodWalker { @Argument(shortName="eth", fullName="excludeTripleHets", doc="If provide, sites that are triple hets calls will not be phased, regardless of Beagle's value", required=false) protected boolean dontPhaseTripleHets = false; + int nTripletHets = 0; + private MendelianViolationEvaluator.TrioStructure trio = null; private VCFWriter writer; @@ -81,8 +83,10 @@ public class BeagleTrioToVCFWalker extends RodWalker { Genotype unphasedDad = unphased.getGenotype(trio.dad); Genotype unphasedKid = unphased.getGenotype(trio.child); - if ( dontPhaseTripleHets && unphasedMom.isHet() && unphasedDad.isHet() && unphasedKid.isHet() ) + if ( dontPhaseTripleHets && unphasedMom.isHet() && unphasedDad.isHet() && unphasedKid.isHet() ) { + nTripletHets++; return unphased; + } else { Allele momTrans = unphased.getAllele(momBgl.get(0)); Allele momUntrans = unphased.getAllele(momBgl.get(1)); @@ -111,8 +115,8 @@ public class BeagleTrioToVCFWalker extends RodWalker { return sum; } - public void onTraversalDone(Integer result) { - //logger.info(String.format("Saw %d raw SNPs", result.nVariants)); + public void onTraversalDone(Long result) { + logger.info(String.format("Ignored phasing of %d het/het/het genotypes", nTripletHets)); //logger.info(String.format("Converted %d (%.2f%%) of these sites", result.nConverted, (100.0 * result.nConverted) / result.nVariants)); } @@ -126,4 +130,4 @@ public class BeagleTrioToVCFWalker extends RodWalker { return prevReduce; } -} \ No newline at end of file +} diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java new file mode 100755 index 000000000..904357413 --- /dev/null +++ b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java @@ -0,0 +1,48 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.varianteval2; + +import org.broadinstitute.sting.WalkerTest; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; +import java.util.Arrays; +import java.util.List; +import java.io.File; + +public class VariantEval2IntegrationTest extends WalkerTest { + private static String cmdRoot = "-T VariantEval2" + + " -R " + oneKGLocation + "reference/human_b36_both.fasta"; + + private static String root = cmdRoot + + " -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + + " -B eval,VCF,/humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf"; + + static HashMap expectations = new HashMap(); + static { + expectations.put("-L 1:1-10,000,000", "e7dba09b1856b9be86816939596a5062"); + expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "c170c672ca2ef86068cc5dee9aaac022"); + } + + @Test + public void testVE2Simple() { + + for ( Map.Entry entry : expectations.entrySet() ) { + String extraArgs = entry.getKey(); + String md5 = entry.getValue(); + + WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s", + 1, // just one output file + Arrays.asList(md5)); + executeTest("testVE2Simple", spec); + } + } + + @Test + public void testVE2WriteVCF() { + String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30"; + WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s", + 2, + Arrays.asList("c53d7638df2d7440dee1fd274d1f6384", "9ec81f7389c0971e44e4b8d2d4af3008")); + executeTest("testVE2WriteVCF", spec); + } +} \ No newline at end of file