Reasonable first pass at a correct SB calculation. Simple utilities to support it. VariantsToTable no longer prints filtered sites by default. New non-standard variant eval module to print comp sites not present in eval (FN finder)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4601 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-10-31 12:41:52 +00:00
parent 30fae5cf18
commit 23cb399a88
5 changed files with 110 additions and 16 deletions

View File

@ -0,0 +1,67 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.varianteval;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.utils.report.tags.Analysis;
import org.broadinstitute.sting.utils.report.tags.DataPoint;
@Analysis(name = "PrintMissingComp", description = "the overlap between eval and comp sites")
public class PrintMissingComp extends VariantEvaluator {
@DataPoint(name = "evals not at comp", description = "number of eval sites outside of comp sites")
long nMissing = 0;
public PrintMissingComp(VariantEvalWalker parent) {
super(parent);
}
public String getName() {
return "PrintMissingComp";
}
public int getComparisonOrder() {
return 2; // we need to see each eval track and each comp track
}
public boolean enabled() {
return true;
}
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP();
boolean evalIsGood = eval != null && eval.isSNP();
if ( compIsGood & ! evalIsGood ) {
nMissing++;
return "MissingFrom" + comp.getSource();
} else {
return null;
}
}
}

View File

@ -218,6 +218,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
@Argument(fullName = "minPhaseQuality", shortName = "minPQ", doc = "The minimum phasing quality (PQ) score required to consider phasing; [default:0]", required = false)
protected Double minPhaseQuality = 0.0; // accept any positive value of PQ
@Argument(shortName="min", fullName="minimalComparisons", doc="If passed, filters and raw site values won't be computed", required=false)
protected boolean MINIMAL = false;
// --------------------------------------------------------------------------------------------------------------
//
// private walker data
@ -451,11 +455,14 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
// honor specifications of just one or a few samples), and put an "all" in here so
// that we don't lose multi-sample evaluations
List<String> filterTypes = MINIMAL ? Arrays.asList(RETAINED_SET_NAME) : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME);
selectExps = append(selectExps, null);
for ( String evalName : evalNames ) {
for ( String compName : compNames ) {
for ( VariantContextUtils.JexlVCMatchExp e : selectExps ) {
for ( String filteredName : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME) ) {
for ( String filteredName : filterTypes ) {
for ( String novelty : Arrays.asList(ALL_SET_NAME, KNOWN_SET_NAME, NOVEL_SET_NAME) ) {
EvaluationContext context = new EvaluationContext(evalName, compName, novelty, filteredName, e);
contexts.add(context);
@ -516,6 +523,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
//logger.debug(String.format("Updating %s with variant", vc));
Set<VariantEvaluator> evaluations = group.evaluations;
boolean evalWantsVC = applyVCtoEvaluation(vc, vcs, group);
VariantContext interestingVC = vc;
List<String> interestingReasons = new ArrayList<String>();
for ( VariantEvaluator evaluation : evaluations ) {
@ -558,7 +566,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
**/
if ( interesting != null ) interestingReasons.add(interesting);
if ( interesting != null ) {
interestingVC = interestingVC == null ? ( vc == null ? comp : vc ) : interestingVC;
interestingReasons.add(interesting);
}
break;
default:
throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation);
@ -568,7 +579,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
}
if ( tracker != null && group.enableInterestingSiteCaptures && captureInterestingSitesOfEvalSet(group) )
writeInterestingSite(interestingReasons, vc, ref.getBase());
writeInterestingSite(interestingReasons, interestingVC, ref.getBase());
}
return 0;

View File

@ -53,6 +53,9 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
@Argument(fullName="fields", shortName="F", doc="Fields to emit from the VCF, allows any VCF field, any info field, and some meta fields like nHets", required=true)
public String FIELDS;
@Argument(fullName="showFiltered", shortName="raw", doc="Include filtered records")
public boolean showFiltered = false;
@Argument(fullName="maxRecords", shortName="M", doc="Maximum number of records to emit, if provided", required=false)
public int MAX_RECORDS = -1;
int nRecords = 0;
@ -115,7 +118,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) {
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
for ( VariantContext vc : vcs) {
if ( ! ignoreMultiAllelic || vc.isBiallelic() ) {
if ( ! ignoreMultiAllelic || vc.isBiallelic() || ( !showFiltered || !vc.isFiltered() ) ) {
List<String> vals = new ArrayList<String>();
for ( String field : fieldsToTake ) {

View File

@ -228,37 +228,39 @@ public class UnifiedGenotyperEngine {
if ( !UAC.NO_SLOD ) {
final boolean DEBUG_SLOD = false;
// the overall lod
double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
double overallLog10PofF = log10AlleleFrequencyPosteriors.get()[bestAFguess];
double lod = overallLog10PofF - overallLog10PofNull;
//System.out.println("overallLog10PofNull=" + overallLog10PofNull + ", overallLog10PofF=" + overallLog10PofF);
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
double overallLog10PofF = MathUtils.log10sum(log10AlleleFrequencyPosteriors.get(), 1);
if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
// the forward lod
GLs.clear();
glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD, genotypePriors, GLs);
clearAFarray(log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get(), bestAFguess);
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
double forwardLog10PofF = log10AlleleFrequencyPosteriors.get()[bestAFguess];
//System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
double forwardLog10PofF = MathUtils.log10sum(log10AlleleFrequencyPosteriors.get(), 1);
if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
// the reverse lod
GLs.clear();
glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE, genotypePriors, GLs);
clearAFarray(log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get(), bestAFguess);
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
double reverseLog10PofF = log10AlleleFrequencyPosteriors.get()[bestAFguess];
//System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
double reverseLog10PofF = MathUtils.log10sum(log10AlleleFrequencyPosteriors.get(), 1);
if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofNull;
double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofNull;
//System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
// strand score is max bias between forward and reverse strands
double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
double strandScore = Math.max(forwardLod, reverseLod);
// rescale by a factor of 10
strandScore *= 10.0;
//logger.debug(String.format("SLOD=%f", strandScore));

View File

@ -58,6 +58,17 @@ public class MathUtils {
return s;
}
public static double log10sum(double[] log10p, int start) {
double sum = 0.0;
double maxValue = Utils.findMaxEntry(log10p);
for ( int i = start; i < log10p.length; i++ ) {
sum += Math.pow(10.0, log10p[i] - maxValue);
}
return Math.log10(sum) + maxValue;
}
public static double sum(List<Double> values) {
double s = 0.0;
for ( double v : values) s += v;