Re-revert back to point estimation for now. We need to do this right, just not yet.
Also, it's safer to let colt do the log factorial calculations for us. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1503 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
eb664ae287
commit
55013eff78
|
|
@ -4,10 +4,12 @@ import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.apache.log4j.Logger;
|
||||
import cern.jet.math.Arithmetic;
|
||||
|
||||
|
||||
public abstract class RatioFilter implements VariantExclusionCriterion {
|
||||
private static final double defaultMinGenotypeConfidenceToTest = 5.0; // TODO -- must be replaced with true confidence scoore, right now assumes LOD
|
||||
private static final int minDepthOfCoverage = 25; // TODO -- must be replaced with a proper probability calculation
|
||||
protected double minGenotypeConfidenceToTest = defaultMinGenotypeConfidenceToTest;
|
||||
|
||||
protected double pvalueLimit = -1;
|
||||
|
|
@ -50,21 +52,31 @@ public abstract class RatioFilter implements VariantExclusionCriterion {
|
|||
|
||||
boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest;
|
||||
boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet(variant);
|
||||
exclude = excludable && highGenotypeConfidence && integralExclude(counts);
|
||||
exclude = excludable && highGenotypeConfidence && pointEstimateExclude(counts);
|
||||
//
|
||||
// for printing only
|
||||
//
|
||||
int n = counts.first + counts.second;
|
||||
double value = counts.first / (1.0 * counts.first + counts.second);
|
||||
logger.info(String.format("%s: counts1=%d (%.2f), counts2=%d (%.2f), n=%d, value=%f, exclude=%b, bases=%s",
|
||||
logger.info(String.format("%s: counts1=%d (%.2f), counts2=%d (%.2f), n=%d, value=%f, exclude=%b, location=%s, bases=%s",
|
||||
name, counts.first, counts.first / (0.01 * n), counts.second, counts.second / (0.01 * n), n,
|
||||
value, exclude, pileup.getBases()));
|
||||
value, exclude, variant.getLocation(), pileup.getBases()));
|
||||
}
|
||||
|
||||
private final static double SEARCH_INCREMENT = 0.01;
|
||||
private final static double integralPValueThreshold = 0.05;
|
||||
|
||||
private boolean integralExclude(Pair<Integer, Integer> counts ) {
|
||||
// TODO - this whole calculation needs to be redone correctly
|
||||
private boolean pointEstimateExclude(Pair<Integer, Integer> counts) {
|
||||
if ( counts.first + counts.second < minDepthOfCoverage )
|
||||
return false;
|
||||
|
||||
int n = counts.first + counts.second;
|
||||
double ratio = counts.first.doubleValue() / (double)n;
|
||||
return !passesThreshold(ratio);
|
||||
}
|
||||
|
||||
private boolean integralExclude(Pair<Integer, Integer> counts) {
|
||||
double sumExclude = 0.0, sumP = 0.0;
|
||||
int n = counts.first + counts.second;
|
||||
for ( double r = 0.0; r <= 1.0; r += SEARCH_INCREMENT ) {
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import org.broadinstitute.sting.utils.Pair;
|
|||
|
||||
public class VECAlleleBalance extends RatioFilter {
|
||||
|
||||
private double lowThreshold = 0.10, highThreshold = 0.85;
|
||||
private double lowThreshold = 0.25, highThreshold = 0.75;
|
||||
private double ratio;
|
||||
|
||||
public VECAlleleBalance() {
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@ import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import cern.jet.math.Arithmetic;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
|
|
@ -14,13 +14,11 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
|||
private double pvalueLimit = 0.00001;
|
||||
private double pValue;
|
||||
private boolean exclude;
|
||||
private ArrayList<Double> factorials = new ArrayList<Double>();
|
||||
|
||||
public void initialize(String arguments) {
|
||||
if (arguments != null && !arguments.isEmpty()) {
|
||||
pvalueLimit = Double.valueOf(arguments);
|
||||
}
|
||||
factorials.add(0.0); // add fact(0)
|
||||
}
|
||||
|
||||
public void compute(VariantContextWindow contextWindow) {
|
||||
|
|
@ -56,8 +54,6 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
|||
if ( !variantIsHet(table) )
|
||||
return false;
|
||||
|
||||
updateFactorials(table);
|
||||
|
||||
double pCutoff = computePValue(table);
|
||||
//printTable(table, pCutoff);
|
||||
|
||||
|
|
@ -135,30 +131,16 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
|||
int N = rowSums[0] + rowSums[1];
|
||||
|
||||
// calculate in log space so we don't die with high numbers
|
||||
double pCutoff = factorials.get(rowSums[0])
|
||||
+ factorials.get(rowSums[1])
|
||||
+ factorials.get(colSums[0])
|
||||
+ factorials.get(colSums[1])
|
||||
- factorials.get(table[0][0])
|
||||
- factorials.get(table[0][1])
|
||||
- factorials.get(table[1][0])
|
||||
- factorials.get(table[1][1])
|
||||
- factorials.get(N);
|
||||
double pCutoff = Arithmetic.logFactorial(rowSums[0])
|
||||
+ Arithmetic.logFactorial(rowSums[1])
|
||||
+ Arithmetic.logFactorial(colSums[0])
|
||||
+ Arithmetic.logFactorial(colSums[1])
|
||||
- Arithmetic.logFactorial(table[0][0])
|
||||
- Arithmetic.logFactorial(table[0][1])
|
||||
- Arithmetic.logFactorial(table[1][0])
|
||||
- Arithmetic.logFactorial(table[1][1])
|
||||
- Arithmetic.logFactorial(N);
|
||||
return Math.exp(pCutoff);
|
||||
|
||||
//pCutoff *= (double) Arithmetic.factorial(rowSums[0]);
|
||||
//pCutoff *= (double) Arithmetic.factorial(rowSums[1]);
|
||||
//pCutoff *= (double) Arithmetic.factorial(colSums[0]);
|
||||
//pCutoff *= (double) Arithmetic.factorial(colSums[1]);
|
||||
//pCutoff /= (double) Arithmetic.factorial(N);
|
||||
//
|
||||
//for (int i = 0; i < 2; i++) {
|
||||
// for (int j = 0; j < 2; j++) {
|
||||
// pCutoff /= (double) Arithmetic.factorial(table[i][j]);
|
||||
// }
|
||||
//}
|
||||
//
|
||||
//return pCutoff;
|
||||
}
|
||||
|
||||
private static int sumRow(int[][] table, int column) {
|
||||
|
|
@ -213,11 +195,4 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
|||
|
||||
return table;
|
||||
}
|
||||
|
||||
private void updateFactorials(int[][] table) {
|
||||
// calculate in log space so we don't die with high numbers
|
||||
int max = table[0][0] + table[0][1] + table[1][0] + table[1][1];
|
||||
for (int i = factorials.size(); i <= max; i++)
|
||||
factorials.add(factorials.get(i - 1) + Math.log(i));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
|||
import org.broadinstitute.sting.utils.*;
|
||||
|
||||
public class VECOnOffGenotypeRatio extends RatioFilter {
|
||||
private double threshold = 0.0;
|
||||
private double threshold = 0.8;
|
||||
private double ratio;
|
||||
|
||||
public VECOnOffGenotypeRatio() {
|
||||
|
|
|
|||
Loading…
Reference in New Issue