From 55013eff78e8de92a3a3bb864d48055fdbfdcbae Mon Sep 17 00:00:00 2001 From: ebanks Date: Wed, 2 Sep 2009 15:33:18 +0000 Subject: [PATCH] Re-revert back to point estimation for now. We need to do this right, just not yet. Also, it's safer to let colt do the log factorial calculations for us. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1503 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/variants/RatioFilter.java | 20 +++++++-- .../walkers/variants/VECAlleleBalance.java | 2 +- .../walkers/variants/VECFisherStrand.java | 45 +++++-------------- .../variants/VECOnOffGenotypeRatio.java | 2 +- 4 files changed, 28 insertions(+), 41 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/RatioFilter.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/RatioFilter.java index 23dd48ab3..b3235dd73 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/RatioFilter.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/RatioFilter.java @@ -4,10 +4,12 @@ import org.broadinstitute.sting.gatk.contexts.VariantContext; import org.broadinstitute.sting.gatk.refdata.rodVariants; import org.broadinstitute.sting.utils.*; import org.apache.log4j.Logger; +import cern.jet.math.Arithmetic; public abstract class RatioFilter implements VariantExclusionCriterion { private static final double defaultMinGenotypeConfidenceToTest = 5.0; // TODO -- must be replaced with true confidence scoore, right now assumes LOD + private static final int minDepthOfCoverage = 25; // TODO -- must be replaced with a proper probability calculation protected double minGenotypeConfidenceToTest = defaultMinGenotypeConfidenceToTest; protected double pvalueLimit = -1; @@ -50,21 +52,31 @@ public abstract class RatioFilter implements VariantExclusionCriterion { boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest; boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet(variant); - exclude = excludable && highGenotypeConfidence && integralExclude(counts); + exclude = excludable && highGenotypeConfidence && pointEstimateExclude(counts); // // for printing only // int n = counts.first + counts.second; double value = counts.first / (1.0 * counts.first + counts.second); - logger.info(String.format("%s: counts1=%d (%.2f), counts2=%d (%.2f), n=%d, value=%f, exclude=%b, bases=%s", + logger.info(String.format("%s: counts1=%d (%.2f), counts2=%d (%.2f), n=%d, value=%f, exclude=%b, location=%s, bases=%s", name, counts.first, counts.first / (0.01 * n), counts.second, counts.second / (0.01 * n), n, - value, exclude, pileup.getBases())); + value, exclude, variant.getLocation(), pileup.getBases())); } private final static double SEARCH_INCREMENT = 0.01; private final static double integralPValueThreshold = 0.05; - private boolean integralExclude(Pair counts ) { + // TODO - this whole calculation needs to be redone correctly + private boolean pointEstimateExclude(Pair counts) { + if ( counts.first + counts.second < minDepthOfCoverage ) + return false; + + int n = counts.first + counts.second; + double ratio = counts.first.doubleValue() / (double)n; + return !passesThreshold(ratio); + } + + private boolean integralExclude(Pair counts) { double sumExclude = 0.0, sumP = 0.0; int n = counts.first + counts.second; for ( double r = 0.0; r <= 1.0; r += SEARCH_INCREMENT ) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java index 5ddc92562..c7024dd02 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java @@ -7,7 +7,7 @@ import org.broadinstitute.sting.utils.Pair; public class VECAlleleBalance extends RatioFilter { - private double lowThreshold = 0.10, highThreshold = 0.85; + private double lowThreshold = 0.25, highThreshold = 0.75; private double ratio; public VECAlleleBalance() { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java index a9a2dfc71..66e4642dd 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java @@ -5,8 +5,8 @@ import org.broadinstitute.sting.gatk.contexts.VariantContext; import org.broadinstitute.sting.gatk.refdata.rodVariants; import org.broadinstitute.sting.utils.BaseUtils; import net.sf.samtools.SAMRecord; +import cern.jet.math.Arithmetic; -import java.util.ArrayList; import java.util.List; @@ -14,13 +14,11 @@ public class VECFisherStrand implements VariantExclusionCriterion { private double pvalueLimit = 0.00001; private double pValue; private boolean exclude; - private ArrayList factorials = new ArrayList(); public void initialize(String arguments) { if (arguments != null && !arguments.isEmpty()) { pvalueLimit = Double.valueOf(arguments); } - factorials.add(0.0); // add fact(0) } public void compute(VariantContextWindow contextWindow) { @@ -56,8 +54,6 @@ public class VECFisherStrand implements VariantExclusionCriterion { if ( !variantIsHet(table) ) return false; - updateFactorials(table); - double pCutoff = computePValue(table); //printTable(table, pCutoff); @@ -135,30 +131,16 @@ public class VECFisherStrand implements VariantExclusionCriterion { int N = rowSums[0] + rowSums[1]; // calculate in log space so we don't die with high numbers - double pCutoff = factorials.get(rowSums[0]) - + factorials.get(rowSums[1]) - + factorials.get(colSums[0]) - + factorials.get(colSums[1]) - - factorials.get(table[0][0]) - - factorials.get(table[0][1]) - - factorials.get(table[1][0]) - - factorials.get(table[1][1]) - - factorials.get(N); + double pCutoff = Arithmetic.logFactorial(rowSums[0]) + + Arithmetic.logFactorial(rowSums[1]) + + Arithmetic.logFactorial(colSums[0]) + + Arithmetic.logFactorial(colSums[1]) + - Arithmetic.logFactorial(table[0][0]) + - Arithmetic.logFactorial(table[0][1]) + - Arithmetic.logFactorial(table[1][0]) + - Arithmetic.logFactorial(table[1][1]) + - Arithmetic.logFactorial(N); return Math.exp(pCutoff); - - //pCutoff *= (double) Arithmetic.factorial(rowSums[0]); - //pCutoff *= (double) Arithmetic.factorial(rowSums[1]); - //pCutoff *= (double) Arithmetic.factorial(colSums[0]); - //pCutoff *= (double) Arithmetic.factorial(colSums[1]); - //pCutoff /= (double) Arithmetic.factorial(N); - // - //for (int i = 0; i < 2; i++) { - // for (int j = 0; j < 2; j++) { - // pCutoff /= (double) Arithmetic.factorial(table[i][j]); - // } - //} - // - //return pCutoff; } private static int sumRow(int[][] table, int column) { @@ -213,11 +195,4 @@ public class VECFisherStrand implements VariantExclusionCriterion { return table; } - - private void updateFactorials(int[][] table) { - // calculate in log space so we don't die with high numbers - int max = table[0][0] + table[0][1] + table[1][0] + table[1][1]; - for (int i = factorials.size(); i <= max; i++) - factorials.add(factorials.get(i - 1) + Math.log(i)); - } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java index 44a8accab..54d0e1bec 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java @@ -4,7 +4,7 @@ import org.broadinstitute.sting.gatk.refdata.rodVariants; import org.broadinstitute.sting.utils.*; public class VECOnOffGenotypeRatio extends RatioFilter { - private double threshold = 0.0; + private double threshold = 0.8; private double ratio; public VECOnOffGenotypeRatio() {