Re-revert back to point estimation for now. We need to do this right, just not yet.

Also, it's safer to let colt do the log factorial calculations for us. 


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1503 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-09-02 15:33:18 +00:00
parent eb664ae287
commit 55013eff78
4 changed files with 28 additions and 41 deletions

View File

@ -4,10 +4,12 @@ import org.broadinstitute.sting.gatk.contexts.VariantContext;
import org.broadinstitute.sting.gatk.refdata.rodVariants; import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import cern.jet.math.Arithmetic;
public abstract class RatioFilter implements VariantExclusionCriterion { public abstract class RatioFilter implements VariantExclusionCriterion {
private static final double defaultMinGenotypeConfidenceToTest = 5.0; // TODO -- must be replaced with true confidence scoore, right now assumes LOD private static final double defaultMinGenotypeConfidenceToTest = 5.0; // TODO -- must be replaced with true confidence scoore, right now assumes LOD
private static final int minDepthOfCoverage = 25; // TODO -- must be replaced with a proper probability calculation
protected double minGenotypeConfidenceToTest = defaultMinGenotypeConfidenceToTest; protected double minGenotypeConfidenceToTest = defaultMinGenotypeConfidenceToTest;
protected double pvalueLimit = -1; protected double pvalueLimit = -1;
@ -50,21 +52,31 @@ public abstract class RatioFilter implements VariantExclusionCriterion {
boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest; boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest;
boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet(variant); boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet(variant);
exclude = excludable && highGenotypeConfidence && integralExclude(counts); exclude = excludable && highGenotypeConfidence && pointEstimateExclude(counts);
// //
// for printing only // for printing only
// //
int n = counts.first + counts.second; int n = counts.first + counts.second;
double value = counts.first / (1.0 * counts.first + counts.second); double value = counts.first / (1.0 * counts.first + counts.second);
logger.info(String.format("%s: counts1=%d (%.2f), counts2=%d (%.2f), n=%d, value=%f, exclude=%b, bases=%s", logger.info(String.format("%s: counts1=%d (%.2f), counts2=%d (%.2f), n=%d, value=%f, exclude=%b, location=%s, bases=%s",
name, counts.first, counts.first / (0.01 * n), counts.second, counts.second / (0.01 * n), n, name, counts.first, counts.first / (0.01 * n), counts.second, counts.second / (0.01 * n), n,
value, exclude, pileup.getBases())); value, exclude, variant.getLocation(), pileup.getBases()));
} }
private final static double SEARCH_INCREMENT = 0.01; private final static double SEARCH_INCREMENT = 0.01;
private final static double integralPValueThreshold = 0.05; private final static double integralPValueThreshold = 0.05;
private boolean integralExclude(Pair<Integer, Integer> counts ) { // TODO - this whole calculation needs to be redone correctly
private boolean pointEstimateExclude(Pair<Integer, Integer> counts) {
if ( counts.first + counts.second < minDepthOfCoverage )
return false;
int n = counts.first + counts.second;
double ratio = counts.first.doubleValue() / (double)n;
return !passesThreshold(ratio);
}
private boolean integralExclude(Pair<Integer, Integer> counts) {
double sumExclude = 0.0, sumP = 0.0; double sumExclude = 0.0, sumP = 0.0;
int n = counts.first + counts.second; int n = counts.first + counts.second;
for ( double r = 0.0; r <= 1.0; r += SEARCH_INCREMENT ) { for ( double r = 0.0; r <= 1.0; r += SEARCH_INCREMENT ) {

View File

@ -7,7 +7,7 @@ import org.broadinstitute.sting.utils.Pair;
public class VECAlleleBalance extends RatioFilter { public class VECAlleleBalance extends RatioFilter {
private double lowThreshold = 0.10, highThreshold = 0.85; private double lowThreshold = 0.25, highThreshold = 0.75;
private double ratio; private double ratio;
public VECAlleleBalance() { public VECAlleleBalance() {

View File

@ -5,8 +5,8 @@ import org.broadinstitute.sting.gatk.contexts.VariantContext;
import org.broadinstitute.sting.gatk.refdata.rodVariants; import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import cern.jet.math.Arithmetic;
import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -14,13 +14,11 @@ public class VECFisherStrand implements VariantExclusionCriterion {
private double pvalueLimit = 0.00001; private double pvalueLimit = 0.00001;
private double pValue; private double pValue;
private boolean exclude; private boolean exclude;
private ArrayList<Double> factorials = new ArrayList<Double>();
public void initialize(String arguments) { public void initialize(String arguments) {
if (arguments != null && !arguments.isEmpty()) { if (arguments != null && !arguments.isEmpty()) {
pvalueLimit = Double.valueOf(arguments); pvalueLimit = Double.valueOf(arguments);
} }
factorials.add(0.0); // add fact(0)
} }
public void compute(VariantContextWindow contextWindow) { public void compute(VariantContextWindow contextWindow) {
@ -56,8 +54,6 @@ public class VECFisherStrand implements VariantExclusionCriterion {
if ( !variantIsHet(table) ) if ( !variantIsHet(table) )
return false; return false;
updateFactorials(table);
double pCutoff = computePValue(table); double pCutoff = computePValue(table);
//printTable(table, pCutoff); //printTable(table, pCutoff);
@ -135,30 +131,16 @@ public class VECFisherStrand implements VariantExclusionCriterion {
int N = rowSums[0] + rowSums[1]; int N = rowSums[0] + rowSums[1];
// calculate in log space so we don't die with high numbers // calculate in log space so we don't die with high numbers
double pCutoff = factorials.get(rowSums[0]) double pCutoff = Arithmetic.logFactorial(rowSums[0])
+ factorials.get(rowSums[1]) + Arithmetic.logFactorial(rowSums[1])
+ factorials.get(colSums[0]) + Arithmetic.logFactorial(colSums[0])
+ factorials.get(colSums[1]) + Arithmetic.logFactorial(colSums[1])
- factorials.get(table[0][0]) - Arithmetic.logFactorial(table[0][0])
- factorials.get(table[0][1]) - Arithmetic.logFactorial(table[0][1])
- factorials.get(table[1][0]) - Arithmetic.logFactorial(table[1][0])
- factorials.get(table[1][1]) - Arithmetic.logFactorial(table[1][1])
- factorials.get(N); - Arithmetic.logFactorial(N);
return Math.exp(pCutoff); return Math.exp(pCutoff);
//pCutoff *= (double) Arithmetic.factorial(rowSums[0]);
//pCutoff *= (double) Arithmetic.factorial(rowSums[1]);
//pCutoff *= (double) Arithmetic.factorial(colSums[0]);
//pCutoff *= (double) Arithmetic.factorial(colSums[1]);
//pCutoff /= (double) Arithmetic.factorial(N);
//
//for (int i = 0; i < 2; i++) {
// for (int j = 0; j < 2; j++) {
// pCutoff /= (double) Arithmetic.factorial(table[i][j]);
// }
//}
//
//return pCutoff;
} }
private static int sumRow(int[][] table, int column) { private static int sumRow(int[][] table, int column) {
@ -213,11 +195,4 @@ public class VECFisherStrand implements VariantExclusionCriterion {
return table; return table;
} }
private void updateFactorials(int[][] table) {
// calculate in log space so we don't die with high numbers
int max = table[0][0] + table[0][1] + table[1][0] + table[1][1];
for (int i = factorials.size(); i <= max; i++)
factorials.add(factorials.get(i - 1) + Math.log(i));
}
} }

View File

@ -4,7 +4,7 @@ import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
public class VECOnOffGenotypeRatio extends RatioFilter { public class VECOnOffGenotypeRatio extends RatioFilter {
private double threshold = 0.0; private double threshold = 0.8;
private double ratio; private double ratio;
public VECOnOffGenotypeRatio() { public VECOnOffGenotypeRatio() {