Reasonable first pass at a correct SB calculation. Simple utilities to support it. VariantsToTable no longer prints filtered sites by default. New non-standard variant eval module to print comp sites not present in eval (FN finder)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4601 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
30fae5cf18
commit
23cb399a88
|
|
@ -0,0 +1,67 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
|
import org.broadinstitute.sting.utils.report.tags.Analysis;
|
||||||
|
import org.broadinstitute.sting.utils.report.tags.DataPoint;
|
||||||
|
|
||||||
|
@Analysis(name = "PrintMissingComp", description = "the overlap between eval and comp sites")
|
||||||
|
public class PrintMissingComp extends VariantEvaluator {
|
||||||
|
@DataPoint(name = "evals not at comp", description = "number of eval sites outside of comp sites")
|
||||||
|
long nMissing = 0;
|
||||||
|
|
||||||
|
public PrintMissingComp(VariantEvalWalker parent) {
|
||||||
|
super(parent);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "PrintMissingComp";
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getComparisonOrder() {
|
||||||
|
return 2; // we need to see each eval track and each comp track
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean enabled() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP();
|
||||||
|
boolean evalIsGood = eval != null && eval.isSNP();
|
||||||
|
|
||||||
|
if ( compIsGood & ! evalIsGood ) {
|
||||||
|
nMissing++;
|
||||||
|
return "MissingFrom" + comp.getSource();
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -218,6 +218,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
@Argument(fullName = "minPhaseQuality", shortName = "minPQ", doc = "The minimum phasing quality (PQ) score required to consider phasing; [default:0]", required = false)
|
@Argument(fullName = "minPhaseQuality", shortName = "minPQ", doc = "The minimum phasing quality (PQ) score required to consider phasing; [default:0]", required = false)
|
||||||
protected Double minPhaseQuality = 0.0; // accept any positive value of PQ
|
protected Double minPhaseQuality = 0.0; // accept any positive value of PQ
|
||||||
|
|
||||||
|
@Argument(shortName="min", fullName="minimalComparisons", doc="If passed, filters and raw site values won't be computed", required=false)
|
||||||
|
protected boolean MINIMAL = false;
|
||||||
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// private walker data
|
// private walker data
|
||||||
|
|
@ -451,11 +455,14 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
// honor specifications of just one or a few samples), and put an "all" in here so
|
// honor specifications of just one or a few samples), and put an "all" in here so
|
||||||
// that we don't lose multi-sample evaluations
|
// that we don't lose multi-sample evaluations
|
||||||
|
|
||||||
|
List<String> filterTypes = MINIMAL ? Arrays.asList(RETAINED_SET_NAME) : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME);
|
||||||
|
|
||||||
|
|
||||||
selectExps = append(selectExps, null);
|
selectExps = append(selectExps, null);
|
||||||
for ( String evalName : evalNames ) {
|
for ( String evalName : evalNames ) {
|
||||||
for ( String compName : compNames ) {
|
for ( String compName : compNames ) {
|
||||||
for ( VariantContextUtils.JexlVCMatchExp e : selectExps ) {
|
for ( VariantContextUtils.JexlVCMatchExp e : selectExps ) {
|
||||||
for ( String filteredName : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME) ) {
|
for ( String filteredName : filterTypes ) {
|
||||||
for ( String novelty : Arrays.asList(ALL_SET_NAME, KNOWN_SET_NAME, NOVEL_SET_NAME) ) {
|
for ( String novelty : Arrays.asList(ALL_SET_NAME, KNOWN_SET_NAME, NOVEL_SET_NAME) ) {
|
||||||
EvaluationContext context = new EvaluationContext(evalName, compName, novelty, filteredName, e);
|
EvaluationContext context = new EvaluationContext(evalName, compName, novelty, filteredName, e);
|
||||||
contexts.add(context);
|
contexts.add(context);
|
||||||
|
|
@ -516,6 +523,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
//logger.debug(String.format("Updating %s with variant", vc));
|
//logger.debug(String.format("Updating %s with variant", vc));
|
||||||
Set<VariantEvaluator> evaluations = group.evaluations;
|
Set<VariantEvaluator> evaluations = group.evaluations;
|
||||||
boolean evalWantsVC = applyVCtoEvaluation(vc, vcs, group);
|
boolean evalWantsVC = applyVCtoEvaluation(vc, vcs, group);
|
||||||
|
VariantContext interestingVC = vc;
|
||||||
List<String> interestingReasons = new ArrayList<String>();
|
List<String> interestingReasons = new ArrayList<String>();
|
||||||
|
|
||||||
for ( VariantEvaluator evaluation : evaluations ) {
|
for ( VariantEvaluator evaluation : evaluations ) {
|
||||||
|
|
@ -558,7 +566,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
**/
|
**/
|
||||||
|
|
||||||
|
|
||||||
if ( interesting != null ) interestingReasons.add(interesting);
|
if ( interesting != null ) {
|
||||||
|
interestingVC = interestingVC == null ? ( vc == null ? comp : vc ) : interestingVC;
|
||||||
|
interestingReasons.add(interesting);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation);
|
throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation);
|
||||||
|
|
@ -568,7 +579,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( tracker != null && group.enableInterestingSiteCaptures && captureInterestingSitesOfEvalSet(group) )
|
if ( tracker != null && group.enableInterestingSiteCaptures && captureInterestingSitesOfEvalSet(group) )
|
||||||
writeInterestingSite(interestingReasons, vc, ref.getBase());
|
writeInterestingSite(interestingReasons, interestingVC, ref.getBase());
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,9 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName="fields", shortName="F", doc="Fields to emit from the VCF, allows any VCF field, any info field, and some meta fields like nHets", required=true)
|
@Argument(fullName="fields", shortName="F", doc="Fields to emit from the VCF, allows any VCF field, any info field, and some meta fields like nHets", required=true)
|
||||||
public String FIELDS;
|
public String FIELDS;
|
||||||
|
|
||||||
|
@Argument(fullName="showFiltered", shortName="raw", doc="Include filtered records")
|
||||||
|
public boolean showFiltered = false;
|
||||||
|
|
||||||
@Argument(fullName="maxRecords", shortName="M", doc="Maximum number of records to emit, if provided", required=false)
|
@Argument(fullName="maxRecords", shortName="M", doc="Maximum number of records to emit, if provided", required=false)
|
||||||
public int MAX_RECORDS = -1;
|
public int MAX_RECORDS = -1;
|
||||||
int nRecords = 0;
|
int nRecords = 0;
|
||||||
|
|
@ -115,7 +118,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
||||||
if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) {
|
if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) {
|
||||||
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
|
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
|
||||||
for ( VariantContext vc : vcs) {
|
for ( VariantContext vc : vcs) {
|
||||||
if ( ! ignoreMultiAllelic || vc.isBiallelic() ) {
|
if ( ! ignoreMultiAllelic || vc.isBiallelic() || ( !showFiltered || !vc.isFiltered() ) ) {
|
||||||
List<String> vals = new ArrayList<String>();
|
List<String> vals = new ArrayList<String>();
|
||||||
|
|
||||||
for ( String field : fieldsToTake ) {
|
for ( String field : fieldsToTake ) {
|
||||||
|
|
|
||||||
|
|
@ -228,37 +228,39 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
|
|
||||||
if ( !UAC.NO_SLOD ) {
|
if ( !UAC.NO_SLOD ) {
|
||||||
|
final boolean DEBUG_SLOD = false;
|
||||||
|
|
||||||
// the overall lod
|
// the overall lod
|
||||||
double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||||
double overallLog10PofF = log10AlleleFrequencyPosteriors.get()[bestAFguess];
|
double overallLog10PofF = MathUtils.log10sum(log10AlleleFrequencyPosteriors.get(), 1);
|
||||||
double lod = overallLog10PofF - overallLog10PofNull;
|
if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||||
//System.out.println("overallLog10PofNull=" + overallLog10PofNull + ", overallLog10PofF=" + overallLog10PofF);
|
|
||||||
|
|
||||||
// the forward lod
|
// the forward lod
|
||||||
GLs.clear();
|
GLs.clear();
|
||||||
glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD, genotypePriors, GLs);
|
glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD, genotypePriors, GLs);
|
||||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||||
afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get(), bestAFguess);
|
afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get(), bestAFguess);
|
||||||
|
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||||
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||||
double forwardLog10PofF = log10AlleleFrequencyPosteriors.get()[bestAFguess];
|
double forwardLog10PofF = MathUtils.log10sum(log10AlleleFrequencyPosteriors.get(), 1);
|
||||||
//System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||||
|
|
||||||
// the reverse lod
|
// the reverse lod
|
||||||
GLs.clear();
|
GLs.clear();
|
||||||
glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE, genotypePriors, GLs);
|
glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE, genotypePriors, GLs);
|
||||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||||
afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get(), bestAFguess);
|
afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get(), bestAFguess);
|
||||||
|
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||||
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||||
double reverseLog10PofF = log10AlleleFrequencyPosteriors.get()[bestAFguess];
|
double reverseLog10PofF = MathUtils.log10sum(log10AlleleFrequencyPosteriors.get(), 1);
|
||||||
//System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
||||||
|
|
||||||
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofNull;
|
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
|
||||||
double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofNull;
|
double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
|
||||||
//System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
|
if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
|
||||||
|
|
||||||
// strand score is max bias between forward and reverse strands
|
// strand score is max bias between forward and reverse strands
|
||||||
double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
|
double strandScore = Math.max(forwardLod, reverseLod);
|
||||||
// rescale by a factor of 10
|
// rescale by a factor of 10
|
||||||
strandScore *= 10.0;
|
strandScore *= 10.0;
|
||||||
//logger.debug(String.format("SLOD=%f", strandScore));
|
//logger.debug(String.format("SLOD=%f", strandScore));
|
||||||
|
|
|
||||||
|
|
@ -58,6 +58,17 @@ public class MathUtils {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static double log10sum(double[] log10p, int start) {
|
||||||
|
double sum = 0.0;
|
||||||
|
|
||||||
|
double maxValue = Utils.findMaxEntry(log10p);
|
||||||
|
for ( int i = start; i < log10p.length; i++ ) {
|
||||||
|
sum += Math.pow(10.0, log10p[i] - maxValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Math.log10(sum) + maxValue;
|
||||||
|
}
|
||||||
|
|
||||||
public static double sum(List<Double> values) {
|
public static double sum(List<Double> values) {
|
||||||
double s = 0.0;
|
double s = 0.0;
|
||||||
for ( double v : values) s += v;
|
for ( double v : values) s += v;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue