diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PrintMissingComp.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PrintMissingComp.java new file mode 100644 index 000000000..7d186c7a0 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PrintMissingComp.java @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.utils.report.tags.Analysis; +import org.broadinstitute.sting.utils.report.tags.DataPoint; + +@Analysis(name = "PrintMissingComp", description = "the overlap between eval and comp sites") +public class PrintMissingComp extends VariantEvaluator { + @DataPoint(name = "evals not at comp", description = "number of eval sites outside of comp sites") + long nMissing = 0; + + public PrintMissingComp(VariantEvalWalker parent) { + super(parent); + } + + public String getName() { + return "PrintMissingComp"; + } + + public int getComparisonOrder() { + return 2; // we need to see each eval track and each comp track + } + + public boolean enabled() { + return true; + } + + + public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP(); + boolean evalIsGood = eval != null && eval.isSNP(); + + if ( compIsGood & ! evalIsGood ) { + nMissing++; + return "MissingFrom" + comp.getSource(); + } else { + return null; + } + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 738483b25..5bc147725 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -218,6 +218,10 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(fullName = "minPhaseQuality", shortName = "minPQ", doc = "The minimum phasing quality (PQ) score required to consider phasing; [default:0]", required = false) protected Double minPhaseQuality = 0.0; // accept any positive value of PQ + @Argument(shortName="min", fullName="minimalComparisons", doc="If passed, filters and raw site values won't be computed", required=false) + protected boolean MINIMAL = false; + + // -------------------------------------------------------------------------------------------------------------- // // private walker data @@ -451,11 +455,14 @@ public class VariantEvalWalker extends RodWalker implements Tr // honor specifications of just one or a few samples), and put an "all" in here so // that we don't lose multi-sample evaluations + List filterTypes = MINIMAL ? Arrays.asList(RETAINED_SET_NAME) : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME); + + selectExps = append(selectExps, null); for ( String evalName : evalNames ) { for ( String compName : compNames ) { for ( VariantContextUtils.JexlVCMatchExp e : selectExps ) { - for ( String filteredName : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME) ) { + for ( String filteredName : filterTypes ) { for ( String novelty : Arrays.asList(ALL_SET_NAME, KNOWN_SET_NAME, NOVEL_SET_NAME) ) { EvaluationContext context = new EvaluationContext(evalName, compName, novelty, filteredName, e); contexts.add(context); @@ -516,6 +523,7 @@ public class VariantEvalWalker extends RodWalker implements Tr //logger.debug(String.format("Updating %s with variant", vc)); Set evaluations = group.evaluations; boolean evalWantsVC = applyVCtoEvaluation(vc, vcs, group); + VariantContext interestingVC = vc; List interestingReasons = new ArrayList(); for ( VariantEvaluator evaluation : evaluations ) { @@ -558,7 +566,10 @@ public class VariantEvalWalker extends RodWalker implements Tr **/ - if ( interesting != null ) interestingReasons.add(interesting); + if ( interesting != null ) { + interestingVC = interestingVC == null ? ( vc == null ? comp : vc ) : interestingVC; + interestingReasons.add(interesting); + } break; default: throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation); @@ -568,7 +579,7 @@ public class VariantEvalWalker extends RodWalker implements Tr } if ( tracker != null && group.enableInterestingSiteCaptures && captureInterestingSitesOfEvalSet(group) ) - writeInterestingSite(interestingReasons, vc, ref.getBase()); + writeInterestingSite(interestingReasons, interestingVC, ref.getBase()); } return 0; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index c4ecd3580..f36f390df 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -53,6 +53,9 @@ public class VariantsToTable extends RodWalker { @Argument(fullName="fields", shortName="F", doc="Fields to emit from the VCF, allows any VCF field, any info field, and some meta fields like nHets", required=true) public String FIELDS; + @Argument(fullName="showFiltered", shortName="raw", doc="Include filtered records") + public boolean showFiltered = false; + @Argument(fullName="maxRecords", shortName="M", doc="Maximum number of records to emit, if provided", required=false) public int MAX_RECORDS = -1; int nRecords = 0; @@ -115,7 +118,7 @@ public class VariantsToTable extends RodWalker { if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) { Collection vcs = tracker.getAllVariantContexts(ref, context.getLocation()); for ( VariantContext vc : vcs) { - if ( ! ignoreMultiAllelic || vc.isBiallelic() ) { + if ( ! ignoreMultiAllelic || vc.isBiallelic() || ( !showFiltered || !vc.isFiltered() ) ) { List vals = new ArrayList(); for ( String field : fieldsToTake ) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index f984741a9..b5b464e65 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -228,37 +228,39 @@ public class UnifiedGenotyperEngine { if ( !UAC.NO_SLOD ) { + final boolean DEBUG_SLOD = false; // the overall lod - double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; - double overallLog10PofF = log10AlleleFrequencyPosteriors.get()[bestAFguess]; - double lod = overallLog10PofF - overallLog10PofNull; - //System.out.println("overallLog10PofNull=" + overallLog10PofNull + ", overallLog10PofF=" + overallLog10PofF); + //double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; + double overallLog10PofF = MathUtils.log10sum(log10AlleleFrequencyPosteriors.get(), 1); + if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); // the forward lod GLs.clear(); glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD, genotypePriors, GLs); clearAFarray(log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get(), bestAFguess); + //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; - double forwardLog10PofF = log10AlleleFrequencyPosteriors.get()[bestAFguess]; - //System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); + double forwardLog10PofF = MathUtils.log10sum(log10AlleleFrequencyPosteriors.get(), 1); + if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); // the reverse lod GLs.clear(); glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE, genotypePriors, GLs); clearAFarray(log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get(), bestAFguess); + //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; - double reverseLog10PofF = log10AlleleFrequencyPosteriors.get()[bestAFguess]; - //System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF); + double reverseLog10PofF = MathUtils.log10sum(log10AlleleFrequencyPosteriors.get(), 1); + if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF); - double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofNull; - double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofNull; - //System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); + double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF; + double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF; + if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); // strand score is max bias between forward and reverse strands - double strandScore = Math.max(forwardLod - lod, reverseLod - lod); + double strandScore = Math.max(forwardLod, reverseLod); // rescale by a factor of 10 strandScore *= 10.0; //logger.debug(String.format("SLOD=%f", strandScore)); diff --git a/java/src/org/broadinstitute/sting/utils/MathUtils.java b/java/src/org/broadinstitute/sting/utils/MathUtils.java index ee6897de8..c00146762 100755 --- a/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -58,6 +58,17 @@ public class MathUtils { return s; } + public static double log10sum(double[] log10p, int start) { + double sum = 0.0; + + double maxValue = Utils.findMaxEntry(log10p); + for ( int i = start; i < log10p.length; i++ ) { + sum += Math.pow(10.0, log10p[i] - maxValue); + } + + return Math.log10(sum) + maxValue; + } + public static double sum(List values) { double s = 0.0; for ( double v : values) s += v;