Re-revert back to point estimation for now. We need to do this right, just not yet.
Also, it's safer to let colt do the log factorial calculations for us. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1503 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
eb664ae287
commit
55013eff78
|
|
@ -4,10 +4,12 @@ import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import cern.jet.math.Arithmetic;
|
||||||
|
|
||||||
|
|
||||||
public abstract class RatioFilter implements VariantExclusionCriterion {
|
public abstract class RatioFilter implements VariantExclusionCriterion {
|
||||||
private static final double defaultMinGenotypeConfidenceToTest = 5.0; // TODO -- must be replaced with true confidence scoore, right now assumes LOD
|
private static final double defaultMinGenotypeConfidenceToTest = 5.0; // TODO -- must be replaced with true confidence scoore, right now assumes LOD
|
||||||
|
private static final int minDepthOfCoverage = 25; // TODO -- must be replaced with a proper probability calculation
|
||||||
protected double minGenotypeConfidenceToTest = defaultMinGenotypeConfidenceToTest;
|
protected double minGenotypeConfidenceToTest = defaultMinGenotypeConfidenceToTest;
|
||||||
|
|
||||||
protected double pvalueLimit = -1;
|
protected double pvalueLimit = -1;
|
||||||
|
|
@ -50,21 +52,31 @@ public abstract class RatioFilter implements VariantExclusionCriterion {
|
||||||
|
|
||||||
boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest;
|
boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest;
|
||||||
boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet(variant);
|
boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet(variant);
|
||||||
exclude = excludable && highGenotypeConfidence && integralExclude(counts);
|
exclude = excludable && highGenotypeConfidence && pointEstimateExclude(counts);
|
||||||
//
|
//
|
||||||
// for printing only
|
// for printing only
|
||||||
//
|
//
|
||||||
int n = counts.first + counts.second;
|
int n = counts.first + counts.second;
|
||||||
double value = counts.first / (1.0 * counts.first + counts.second);
|
double value = counts.first / (1.0 * counts.first + counts.second);
|
||||||
logger.info(String.format("%s: counts1=%d (%.2f), counts2=%d (%.2f), n=%d, value=%f, exclude=%b, bases=%s",
|
logger.info(String.format("%s: counts1=%d (%.2f), counts2=%d (%.2f), n=%d, value=%f, exclude=%b, location=%s, bases=%s",
|
||||||
name, counts.first, counts.first / (0.01 * n), counts.second, counts.second / (0.01 * n), n,
|
name, counts.first, counts.first / (0.01 * n), counts.second, counts.second / (0.01 * n), n,
|
||||||
value, exclude, pileup.getBases()));
|
value, exclude, variant.getLocation(), pileup.getBases()));
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static double SEARCH_INCREMENT = 0.01;
|
private final static double SEARCH_INCREMENT = 0.01;
|
||||||
private final static double integralPValueThreshold = 0.05;
|
private final static double integralPValueThreshold = 0.05;
|
||||||
|
|
||||||
private boolean integralExclude(Pair<Integer, Integer> counts ) {
|
// TODO - this whole calculation needs to be redone correctly
|
||||||
|
private boolean pointEstimateExclude(Pair<Integer, Integer> counts) {
|
||||||
|
if ( counts.first + counts.second < minDepthOfCoverage )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
int n = counts.first + counts.second;
|
||||||
|
double ratio = counts.first.doubleValue() / (double)n;
|
||||||
|
return !passesThreshold(ratio);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean integralExclude(Pair<Integer, Integer> counts) {
|
||||||
double sumExclude = 0.0, sumP = 0.0;
|
double sumExclude = 0.0, sumP = 0.0;
|
||||||
int n = counts.first + counts.second;
|
int n = counts.first + counts.second;
|
||||||
for ( double r = 0.0; r <= 1.0; r += SEARCH_INCREMENT ) {
|
for ( double r = 0.0; r <= 1.0; r += SEARCH_INCREMENT ) {
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ import org.broadinstitute.sting.utils.Pair;
|
||||||
|
|
||||||
public class VECAlleleBalance extends RatioFilter {
|
public class VECAlleleBalance extends RatioFilter {
|
||||||
|
|
||||||
private double lowThreshold = 0.10, highThreshold = 0.85;
|
private double lowThreshold = 0.25, highThreshold = 0.75;
|
||||||
private double ratio;
|
private double ratio;
|
||||||
|
|
||||||
public VECAlleleBalance() {
|
public VECAlleleBalance() {
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,8 @@ import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import cern.jet.math.Arithmetic;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -14,13 +14,11 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
||||||
private double pvalueLimit = 0.00001;
|
private double pvalueLimit = 0.00001;
|
||||||
private double pValue;
|
private double pValue;
|
||||||
private boolean exclude;
|
private boolean exclude;
|
||||||
private ArrayList<Double> factorials = new ArrayList<Double>();
|
|
||||||
|
|
||||||
public void initialize(String arguments) {
|
public void initialize(String arguments) {
|
||||||
if (arguments != null && !arguments.isEmpty()) {
|
if (arguments != null && !arguments.isEmpty()) {
|
||||||
pvalueLimit = Double.valueOf(arguments);
|
pvalueLimit = Double.valueOf(arguments);
|
||||||
}
|
}
|
||||||
factorials.add(0.0); // add fact(0)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
public void compute(VariantContextWindow contextWindow) {
|
||||||
|
|
@ -56,8 +54,6 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
||||||
if ( !variantIsHet(table) )
|
if ( !variantIsHet(table) )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
updateFactorials(table);
|
|
||||||
|
|
||||||
double pCutoff = computePValue(table);
|
double pCutoff = computePValue(table);
|
||||||
//printTable(table, pCutoff);
|
//printTable(table, pCutoff);
|
||||||
|
|
||||||
|
|
@ -135,30 +131,16 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
||||||
int N = rowSums[0] + rowSums[1];
|
int N = rowSums[0] + rowSums[1];
|
||||||
|
|
||||||
// calculate in log space so we don't die with high numbers
|
// calculate in log space so we don't die with high numbers
|
||||||
double pCutoff = factorials.get(rowSums[0])
|
double pCutoff = Arithmetic.logFactorial(rowSums[0])
|
||||||
+ factorials.get(rowSums[1])
|
+ Arithmetic.logFactorial(rowSums[1])
|
||||||
+ factorials.get(colSums[0])
|
+ Arithmetic.logFactorial(colSums[0])
|
||||||
+ factorials.get(colSums[1])
|
+ Arithmetic.logFactorial(colSums[1])
|
||||||
- factorials.get(table[0][0])
|
- Arithmetic.logFactorial(table[0][0])
|
||||||
- factorials.get(table[0][1])
|
- Arithmetic.logFactorial(table[0][1])
|
||||||
- factorials.get(table[1][0])
|
- Arithmetic.logFactorial(table[1][0])
|
||||||
- factorials.get(table[1][1])
|
- Arithmetic.logFactorial(table[1][1])
|
||||||
- factorials.get(N);
|
- Arithmetic.logFactorial(N);
|
||||||
return Math.exp(pCutoff);
|
return Math.exp(pCutoff);
|
||||||
|
|
||||||
//pCutoff *= (double) Arithmetic.factorial(rowSums[0]);
|
|
||||||
//pCutoff *= (double) Arithmetic.factorial(rowSums[1]);
|
|
||||||
//pCutoff *= (double) Arithmetic.factorial(colSums[0]);
|
|
||||||
//pCutoff *= (double) Arithmetic.factorial(colSums[1]);
|
|
||||||
//pCutoff /= (double) Arithmetic.factorial(N);
|
|
||||||
//
|
|
||||||
//for (int i = 0; i < 2; i++) {
|
|
||||||
// for (int j = 0; j < 2; j++) {
|
|
||||||
// pCutoff /= (double) Arithmetic.factorial(table[i][j]);
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
//
|
|
||||||
//return pCutoff;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int sumRow(int[][] table, int column) {
|
private static int sumRow(int[][] table, int column) {
|
||||||
|
|
@ -213,11 +195,4 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
||||||
|
|
||||||
return table;
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void updateFactorials(int[][] table) {
|
|
||||||
// calculate in log space so we don't die with high numbers
|
|
||||||
int max = table[0][0] + table[0][1] + table[1][0] + table[1][1];
|
|
||||||
for (int i = factorials.size(); i <= max; i++)
|
|
||||||
factorials.add(factorials.get(i - 1) + Math.log(i));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
|
||||||
public class VECOnOffGenotypeRatio extends RatioFilter {
|
public class VECOnOffGenotypeRatio extends RatioFilter {
|
||||||
private double threshold = 0.0;
|
private double threshold = 0.8;
|
||||||
private double ratio;
|
private double ratio;
|
||||||
|
|
||||||
public VECOnOffGenotypeRatio() {
|
public VECOnOffGenotypeRatio() {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue