Reasonable first pass at a correct SB calculation. Simple utilities to support it. VariantsToTable no longer prints filtered sites by default. New non-standard variant eval module to print comp sites not present in eval (FN finder)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4601 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
30fae5cf18
commit
23cb399a88
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.utils.report.tags.Analysis;
|
||||
import org.broadinstitute.sting.utils.report.tags.DataPoint;
|
||||
|
||||
@Analysis(name = "PrintMissingComp", description = "the overlap between eval and comp sites")
|
||||
public class PrintMissingComp extends VariantEvaluator {
|
||||
@DataPoint(name = "evals not at comp", description = "number of eval sites outside of comp sites")
|
||||
long nMissing = 0;
|
||||
|
||||
public PrintMissingComp(VariantEvalWalker parent) {
|
||||
super(parent);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "PrintMissingComp";
|
||||
}
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 2; // we need to see each eval track and each comp track
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP();
|
||||
boolean evalIsGood = eval != null && eval.isSNP();
|
||||
|
||||
if ( compIsGood & ! evalIsGood ) {
|
||||
nMissing++;
|
||||
return "MissingFrom" + comp.getSource();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -218,6 +218,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
@Argument(fullName = "minPhaseQuality", shortName = "minPQ", doc = "The minimum phasing quality (PQ) score required to consider phasing; [default:0]", required = false)
|
||||
protected Double minPhaseQuality = 0.0; // accept any positive value of PQ
|
||||
|
||||
@Argument(shortName="min", fullName="minimalComparisons", doc="If passed, filters and raw site values won't be computed", required=false)
|
||||
protected boolean MINIMAL = false;
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// private walker data
|
||||
|
|
@ -451,11 +455,14 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
// honor specifications of just one or a few samples), and put an "all" in here so
|
||||
// that we don't lose multi-sample evaluations
|
||||
|
||||
List<String> filterTypes = MINIMAL ? Arrays.asList(RETAINED_SET_NAME) : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME);
|
||||
|
||||
|
||||
selectExps = append(selectExps, null);
|
||||
for ( String evalName : evalNames ) {
|
||||
for ( String compName : compNames ) {
|
||||
for ( VariantContextUtils.JexlVCMatchExp e : selectExps ) {
|
||||
for ( String filteredName : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME) ) {
|
||||
for ( String filteredName : filterTypes ) {
|
||||
for ( String novelty : Arrays.asList(ALL_SET_NAME, KNOWN_SET_NAME, NOVEL_SET_NAME) ) {
|
||||
EvaluationContext context = new EvaluationContext(evalName, compName, novelty, filteredName, e);
|
||||
contexts.add(context);
|
||||
|
|
@ -516,6 +523,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
//logger.debug(String.format("Updating %s with variant", vc));
|
||||
Set<VariantEvaluator> evaluations = group.evaluations;
|
||||
boolean evalWantsVC = applyVCtoEvaluation(vc, vcs, group);
|
||||
VariantContext interestingVC = vc;
|
||||
List<String> interestingReasons = new ArrayList<String>();
|
||||
|
||||
for ( VariantEvaluator evaluation : evaluations ) {
|
||||
|
|
@ -558,7 +566,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
**/
|
||||
|
||||
|
||||
if ( interesting != null ) interestingReasons.add(interesting);
|
||||
if ( interesting != null ) {
|
||||
interestingVC = interestingVC == null ? ( vc == null ? comp : vc ) : interestingVC;
|
||||
interestingReasons.add(interesting);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation);
|
||||
|
|
@ -568,7 +579,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
}
|
||||
|
||||
if ( tracker != null && group.enableInterestingSiteCaptures && captureInterestingSitesOfEvalSet(group) )
|
||||
writeInterestingSite(interestingReasons, vc, ref.getBase());
|
||||
writeInterestingSite(interestingReasons, interestingVC, ref.getBase());
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -53,6 +53,9 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="fields", shortName="F", doc="Fields to emit from the VCF, allows any VCF field, any info field, and some meta fields like nHets", required=true)
|
||||
public String FIELDS;
|
||||
|
||||
@Argument(fullName="showFiltered", shortName="raw", doc="Include filtered records")
|
||||
public boolean showFiltered = false;
|
||||
|
||||
@Argument(fullName="maxRecords", shortName="M", doc="Maximum number of records to emit, if provided", required=false)
|
||||
public int MAX_RECORDS = -1;
|
||||
int nRecords = 0;
|
||||
|
|
@ -115,7 +118,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) {
|
||||
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
|
||||
for ( VariantContext vc : vcs) {
|
||||
if ( ! ignoreMultiAllelic || vc.isBiallelic() ) {
|
||||
if ( ! ignoreMultiAllelic || vc.isBiallelic() || ( !showFiltered || !vc.isFiltered() ) ) {
|
||||
List<String> vals = new ArrayList<String>();
|
||||
|
||||
for ( String field : fieldsToTake ) {
|
||||
|
|
|
|||
|
|
@ -228,37 +228,39 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
|
||||
if ( !UAC.NO_SLOD ) {
|
||||
final boolean DEBUG_SLOD = false;
|
||||
|
||||
// the overall lod
|
||||
double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||
double overallLog10PofF = log10AlleleFrequencyPosteriors.get()[bestAFguess];
|
||||
double lod = overallLog10PofF - overallLog10PofNull;
|
||||
//System.out.println("overallLog10PofNull=" + overallLog10PofNull + ", overallLog10PofF=" + overallLog10PofF);
|
||||
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||
double overallLog10PofF = MathUtils.log10sum(log10AlleleFrequencyPosteriors.get(), 1);
|
||||
if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||
|
||||
// the forward lod
|
||||
GLs.clear();
|
||||
glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD, genotypePriors, GLs);
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get(), bestAFguess);
|
||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||
double forwardLog10PofF = log10AlleleFrequencyPosteriors.get()[bestAFguess];
|
||||
//System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||
double forwardLog10PofF = MathUtils.log10sum(log10AlleleFrequencyPosteriors.get(), 1);
|
||||
if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||
|
||||
// the reverse lod
|
||||
GLs.clear();
|
||||
glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE, genotypePriors, GLs);
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get(), bestAFguess);
|
||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||
double reverseLog10PofF = log10AlleleFrequencyPosteriors.get()[bestAFguess];
|
||||
//System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
||||
double reverseLog10PofF = MathUtils.log10sum(log10AlleleFrequencyPosteriors.get(), 1);
|
||||
if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
||||
|
||||
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofNull;
|
||||
double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofNull;
|
||||
//System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
|
||||
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
|
||||
double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
|
||||
if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
|
||||
|
||||
// strand score is max bias between forward and reverse strands
|
||||
double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
|
||||
double strandScore = Math.max(forwardLod, reverseLod);
|
||||
// rescale by a factor of 10
|
||||
strandScore *= 10.0;
|
||||
//logger.debug(String.format("SLOD=%f", strandScore));
|
||||
|
|
|
|||
|
|
@ -58,6 +58,17 @@ public class MathUtils {
|
|||
return s;
|
||||
}
|
||||
|
||||
public static double log10sum(double[] log10p, int start) {
|
||||
double sum = 0.0;
|
||||
|
||||
double maxValue = Utils.findMaxEntry(log10p);
|
||||
for ( int i = start; i < log10p.length; i++ ) {
|
||||
sum += Math.pow(10.0, log10p[i] - maxValue);
|
||||
}
|
||||
|
||||
return Math.log10(sum) + maxValue;
|
||||
}
|
||||
|
||||
public static double sum(List<Double> values) {
|
||||
double s = 0.0;
|
||||
for ( double v : values) s += v;
|
||||
|
|
|
|||
Loading…
Reference in New Issue