diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 0f2d73c3a..f5092f5cd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -60,14 +60,16 @@ import java.util.*; @Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250) public class UnifiedGenotyper extends LocusWalker implements TreeReducible, AnnotatorCompatibleWalker { - @ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); + @ArgumentCollection + private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); /** * A dbSNP VCF file from which to annotate. * * rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate. */ - @ArgumentCollection protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); + @ArgumentCollection + protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); public RodBinding getDbsnpRodBinding() { return dbsnp.dbsnp; } public RodBinding getSnpEffRodBinding() { return null; } public List> getCompRodBindings() { return Collections.emptyList(); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index c26729ed3..29dd38c0b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -3,8 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; @@ -27,7 +27,6 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @@ -46,6 +45,15 @@ public class VariantEvalWalker extends RodWalker implements Tr @Output protected PrintStream out; + @Input(fullName="eval", shortName = "eval", doc="Input evaluation file(s)", required=true) + public List> evals; + + @Input(fullName="comp", shortName = "comp", doc="Input comparison file(s)", required=false) + public List> comps = Collections.emptyList(); + + @ArgumentCollection + protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); + // Help arguments @Argument(fullName="list", shortName="ls", doc="List the available eval modules and exit") protected Boolean LIST = false; @@ -61,7 +69,7 @@ public class VariantEvalWalker extends RodWalker implements Tr protected Set SAMPLE_EXPRESSIONS; @Argument(shortName="knownName", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false) - protected String[] KNOWN_NAMES = {DbSNPHelper.STANDARD_DBSNP_TRACK_NAME}; + protected String[] KNOWN_NAMES = {dbsnp.dbsnp.getName()}; // Stratification arguments @Argument(fullName="stratificationModule", shortName="ST", doc="One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless -noS is specified)", required=false) @@ -115,6 +123,10 @@ public class VariantEvalWalker extends RodWalker implements Tr // The set of all possible evaluation contexts private HashMap evaluationContexts = null; + // important stratifications + private boolean byFilterIsEnabled = false; + private boolean perSampleIsEnabled = false; + // Output report private GATKReport report = null; @@ -134,27 +146,21 @@ public class VariantEvalWalker extends RodWalker implements Tr // Just list the modules, and exit quickly. if (LIST) { variantEvalUtils.listModulesAndExit(); } - // Categorize each rod as an eval or a comp rod. - for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) { - if ( d.getName().startsWith("eval") ) { - evalNames.add(d.getName()); - } else if ( d.getName().startsWith("comp") || d.getName().startsWith(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) { - compNames.add(d.getName()); - } else { - logger.info(String.format("Not evaluating ROD binding '%s' because the name did not start with %s, comp, or eval", d.getName(), Utils.join(", ", KNOWN_NAMES))); - } - } - - // Barf if we don't have any eval tracks. - if (evalNames.size() == 0) { - throw new UserException("No evaluation tracks were specified. Please bind one or more callsets to evaluate using the -B argument with a trackname that starts with the word 'eval'."); - } - // Add a dummy comp track if none exists - if (compNames.size() == 0) { - compNames.add("none"); + if ( comps.size() == 0 ) { + comps.add(new RodBinding(VariantContext.class, "none", "UNBOUND", "", new Tags())); } + // Cache the rod names + for ( RodBinding compRod : comps ) + compNames.add(compRod.getName()); + + for ( RodBinding evalRod : evals ) + evalNames.add(evalRod.getName()); + + if ( dbsnp.dbsnp.isBound() ) + compNames.add(dbsnp.dbsnp.getName()); + // Set up set of known names knownNames.addAll(Arrays.asList(KNOWN_NAMES)); @@ -190,6 +196,12 @@ public class VariantEvalWalker extends RodWalker implements Tr // Initialize the set of stratifications and evaluations to use stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); Set> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE); + for ( VariantStratifier vs : getStratificationObjects() ) { + if ( vs.getClass().getSimpleName().equals("Filter") ) + byFilterIsEnabled = true; + else if ( vs.getClass().getSimpleName().equals("Sample") ) + perSampleIsEnabled = true; + } // Initialize the evaluation contexts evaluationContexts = variantEvalUtils.initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null); @@ -221,15 +233,68 @@ public class VariantEvalWalker extends RodWalker implements Tr if (tracker != null) { String aastr = (ancestralAlignments == null) ? null : new String(ancestralAlignments.getSubsequenceAt(ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStop()).getBases()); - // track sample vc - HashMap> vcs = variantEvalUtils.getVariantContexts(tracker, ref, compNames, evalNames, typesToUse != null); + // --------- track --------- sample - VariantContexts - + HashMap, HashMap>> evalVCs = variantEvalUtils.bindVariantContexts(tracker, ref, evals, byFilterIsEnabled, true, perSampleIsEnabled); + HashMap, HashMap>> compVCs = variantEvalUtils.bindVariantContexts(tracker, ref, comps, byFilterIsEnabled, false, false); - for ( String compName : compNames ) { - VariantContext comp = vcs.containsKey(compName) && vcs.get(compName) != null && vcs.get(compName).containsKey(ALL_SAMPLE_NAME) ? vcs.get(compName).get(ALL_SAMPLE_NAME) : null; + // for each eval track + for ( final RodBinding evalRod : evals ) { + final HashMap> evalSet = evalVCs.containsKey(evalRod) ? evalVCs.get(evalRod) : new HashMap>(0); - for ( String evalName : evalNames ) { - for ( String sampleName : sampleNamesForStratification ) { - VariantContext eval = vcs.containsKey(evalName) && vcs.get(evalName) != null ? vcs.get(evalName).get(sampleName) : null; + // for each sample stratifier + for ( final String sampleName : sampleNamesForStratification ) { + Set evalSetBySample = evalSet.get(sampleName); + if ( evalSetBySample == null ) { + evalSetBySample = new HashSet(1); + evalSetBySample.add(null); + } + + // for each eval in the track + for ( VariantContext eval : evalSetBySample ) { + // deal with ancestral alleles if requested + if ( eval != null && aastr != null ) { + HashMap newAts = new HashMap(eval.getAttributes()); + newAts.put("ANCESTRALALLELE", aastr); + eval = VariantContext.modifyAttributes(eval, newAts); + } + + // for each comp track + for ( final RodBinding compRod : comps ) { + // no sample stratification for comps + final Set compSet = compVCs.get(compRod) == null ? new HashSet(0) : compVCs.get(compRod).values().iterator().next(); + + // find the comp + final VariantContext comp = findMatchingComp(eval, compSet); + + HashMap> stateMap = new HashMap>(); + for ( VariantStratifier vs : stratificationObjects ) { + ArrayList states = vs.getRelevantStates(ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName); + stateMap.put(vs, states); + } + + ArrayList stateKeys = new ArrayList(); + variantEvalUtils.initializeStateKeys(stateMap, null, null, stateKeys); + + HashSet stateKeysHash = new HashSet(stateKeys); + + for ( StateKey stateKey : stateKeysHash ) { + NewEvaluationContext nec = evaluationContexts.get(stateKey); + + // eval against the comp + synchronized (nec) { + nec.apply(tracker, ref, context, comp, eval); + } + + // eval=null against all comps of different type + for ( VariantContext otherComp : compSet ) { + if ( otherComp != comp ) { + synchronized (nec) { + nec.apply(tracker, ref, context, otherComp, null); + } + } + } + } + } // todo: Eric, this is really the problem. We select single eval and comp VCs independently // todo: discarding multiple eval tracks at the sites and not providing matched comps @@ -247,37 +312,6 @@ public class VariantEvalWalker extends RodWalker implements Tr // todo: like subset to sample, etc. So you probably will want a master map that maps // todo: from special eval bindings to the digested VC for efficiency. - if ( typesToUse != null ) { - if ( eval != null && ! typesToUse.contains(eval.getType()) ) eval = null; - if ( comp != null && ! typesToUse.contains(comp.getType()) ) comp = null; -// if ( eval != null ) logger.info("Keeping " + eval); - } - - if (eval != null && aastr != null) { - HashMap newAts = new HashMap(eval.getAttributes()); - newAts.put("ANCESTRALALLELE", aastr); - - eval = VariantContext.modifyAttributes(eval, newAts); - } - - HashMap> stateMap = new HashMap>(); - for ( VariantStratifier vs : stratificationObjects ) { - ArrayList states = vs.getRelevantStates(ref, tracker, comp, compName, eval, evalName, sampleName); - stateMap.put(vs, states); - } - - ArrayList stateKeys = new ArrayList(); - variantEvalUtils.initializeStateKeys(stateMap, null, null, stateKeys); - - HashSet stateKeysHash = new HashSet(stateKeys); - - for ( StateKey stateKey : stateKeysHash ) { - NewEvaluationContext nec = evaluationContexts.get(stateKey); - - synchronized (nec) { - nec.apply(tracker, ref, context, comp, eval); - } - } } } } @@ -286,6 +320,25 @@ public class VariantEvalWalker extends RodWalker implements Tr return null; } + private VariantContext findMatchingComp(final VariantContext eval, final Set comps) { + // if no comps, return null + if ( comps == null || comps.isEmpty() ) + return null; + + // if no eval, return any comp + if ( eval == null ) + return comps.iterator().next(); + + // find a matching comp + for ( VariantContext comp : comps ) { + if ( comp.getType() == eval.getType() ) + return comp; + } + + // if no matching comp, return null + return null; + } + public Integer treeReduce(Integer lhs, Integer rhs) { return null; } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 61a959c99..17bb70c00 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; import org.apache.log4j.Logger; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.report.GATKReport; @@ -56,8 +57,9 @@ public class VariantEvalUtils { /** * Initialize required, standard and user-specified stratification objects * - * @param noStandardStrats don't use the standard stratifications - * @param modulesToUse the list of stratification modules to use + * @param variantEvalWalker the parent walker + * @param noStandardStrats don't use the standard stratifications + * @param modulesToUse the list of stratification modules to use * @return set of stratifications to use */ public TreeSet initializeStratificationObjects(VariantEvalWalker variantEvalWalker, boolean noStandardStrats, String[] modulesToUse) { @@ -256,23 +258,6 @@ public class VariantEvalUtils { return report; } - /** - * Figure out what the allowable variation types are based on the eval context - * - * @param tracker the reference metadata tracker - * @param ref the reference context - * @param compNames the comp track names - * @param evalNames the evaluation track names - * @return the set of allowable variation types - */ - public EnumSet getAllowableVariationTypes(RefMetaDataTracker tracker, - ReferenceContext ref, - Set compNames, - Set evalNames, - boolean dynamicSelectTypes ) { - return EnumSet.allOf(VariantContext.Type.class); - } - /** * Subset a VariantContext to a single sample * @@ -321,78 +306,59 @@ public class VariantEvalUtils { * * @param tracker the metadata tracker * @param ref the reference context - * @param trackNames the list of track names to process - * @param allowableTypes a set of allowable variation types + * @param tracks the list of tracks to process * @param byFilter if false, only accept PASSing VariantContexts. Otherwise, accept both PASSing and filtered * sites * @param subsetBySample if false, do not separate the track into per-sample VCs * @param trackPerSample if false, don't stratify per sample (and don't cut up the VariantContext like we would need * to do this) - * @return a mapping of track names to a list of VariantContext objects + * + * @return the mapping of track to VC list that should be populated */ - protected void bindVariantContexts(HashMap> bindings, RefMetaDataTracker tracker, ReferenceContext ref, Set trackNames, EnumSet allowableTypes, boolean byFilter, boolean subsetBySample, boolean trackPerSample) { - for (String trackName : trackNames) { - HashMap vcs = new HashMap(); + public HashMap, HashMap>> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, List> tracks, boolean byFilter, boolean subsetBySample, boolean trackPerSample) { + if ( tracker == null ) + return null; - VariantContext vc = tracker == null ? null : tracker.getFirstValue(VariantContext.class, trackName, ref.getLocus()); + HashMap, HashMap>> bindings = new HashMap, HashMap>>(); - // First, filter the VariantContext to represent only the samples for evaluation - if (vc != null) { + for ( RodBinding track : tracks ) { + HashMap> mapping = new HashMap>(); + + for ( VariantContext vc : tracker.getValues(track, ref.getLocus()) ) { + + // First, filter the VariantContext to represent only the samples for evaluation VariantContext vcsub = vc; - if (subsetBySample && vc.hasGenotypes() && vc.hasGenotypes(variantEvalWalker.getSampleNamesForEvaluation())) { + if ( subsetBySample && vc.hasGenotypes() && vc.hasGenotypes(variantEvalWalker.getSampleNamesForEvaluation()) ) { vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation()); } - if ((byFilter || !vcsub.isFiltered())) { - vcs.put(VariantEvalWalker.getAllSampleName(), vcsub); + if ( (byFilter || !vcsub.isFiltered()) ) { + addMapping(mapping, VariantEvalWalker.getAllSampleName(), vcsub); } // Now, if stratifying, split the subsetted vc per sample and add each as a new context - if (vc.hasGenotypes() && trackPerSample) { - for (String sampleName : variantEvalWalker.getSampleNamesForEvaluation()) { + if ( vc.hasGenotypes() && trackPerSample ) { + for ( String sampleName : variantEvalWalker.getSampleNamesForEvaluation() ) { VariantContext samplevc = getSubsetOfVariantContext(vc, sampleName); - if ((byFilter || !samplevc.isFiltered())) { - vcs.put(sampleName, samplevc); + if ( byFilter || !samplevc.isFiltered() ) { + addMapping(mapping, sampleName, samplevc); } } } - bindings.put(trackName, vcs); + bindings.put(track, mapping); } } + + return bindings; } - /** - * Maps track names to sample name to VariantContext objects. For eval tracks, VariantContexts per specified sample - * are also included. - * - * @param tracker the metadata tracker - * @param ref the reference context - * @param compNames the list of comp names to process - * @param evalNames the list of eval names to process - * @return a mapping of track names to a list of VariantContext objects - */ - public HashMap> getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set compNames, Set evalNames, boolean dynamicSelectTypes) { - HashMap> vcs = new HashMap>(); - - EnumSet allowableTypes = getAllowableVariationTypes(tracker, ref, compNames, evalNames, dynamicSelectTypes); - - boolean byFilter = false; - boolean perSampleIsEnabled = false; - for (VariantStratifier vs : variantEvalWalker.getStratificationObjects()) { - if (vs.getClass().getSimpleName().equals("Filter")) { - byFilter = true; - } else if (vs.getClass().getSimpleName().equals("Sample")) { - perSampleIsEnabled = true; - } - } - - bindVariantContexts(vcs, tracker, ref, evalNames, allowableTypes, byFilter, true, perSampleIsEnabled); - bindVariantContexts(vcs, tracker, ref, compNames, allowableTypes, byFilter, false, false); - - return vcs; + private void addMapping(HashMap> mappings, String sample, VariantContext vc) { + if ( !mappings.containsKey(sample) ) + mappings.put(sample, new HashSet()); + mappings.get(sample).add(vc); } /**