Revert to having these filters use integration over binomial probs
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1499 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
05c164ec69
commit
91ccb0f8c5
|
|
@ -2,329 +2,95 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||||
import org.broadinstitute.sting.gatk.refdata.TabularROD;
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
class GenotypeFeatureData extends ArrayList<Double> {
|
|
||||||
public enum Tail { LeftTailed, RightTailed, TwoTailed }
|
|
||||||
|
|
||||||
private String genotype = null;
|
|
||||||
private Integer[] permutation = null;
|
|
||||||
private Logger logger = null;
|
|
||||||
|
|
||||||
private Tail tail = null;
|
|
||||||
|
|
||||||
double lowThreshold = -1;
|
|
||||||
double highThreshold = -1;
|
|
||||||
|
|
||||||
public GenotypeFeatureData(final String genotype, Logger logger, Tail tail ) {
|
|
||||||
super();
|
|
||||||
this.genotype = genotype;
|
|
||||||
this.logger = logger;
|
|
||||||
this.tail = tail;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Tail getTail() {
|
|
||||||
return tail;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double getLowThreshold() {
|
|
||||||
return lowThreshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setLowThreshold(double lowThreshold) {
|
|
||||||
this.lowThreshold = lowThreshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double getHighThreshold() {
|
|
||||||
return highThreshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setHighThreshold(double highThreshold) {
|
|
||||||
this.highThreshold = highThreshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void finalizeData() {
|
|
||||||
permutation = Utils.SortPermutation(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void determineThresholds(final double pvalueLimit) {
|
|
||||||
if ( pvalueLimit <= 0.0 || pvalueLimit >= 1.0 )
|
|
||||||
throw new RuntimeException(String.format("Invalid pValue limit = %f", pvalueLimit));
|
|
||||||
|
|
||||||
switch ( tail ) {
|
|
||||||
case LeftTailed:
|
|
||||||
lowThreshold = highThreshold = determineOneTailThreshold(pvalueLimit);
|
|
||||||
break;
|
|
||||||
case RightTailed:
|
|
||||||
lowThreshold = highThreshold = determineOneTailThreshold(1 - pvalueLimit);
|
|
||||||
break;
|
|
||||||
case TwoTailed:
|
|
||||||
lowThreshold = determineOneTailThreshold(pvalueLimit/2);
|
|
||||||
highThreshold = determineOneTailThreshold(1-pvalueLimit/2);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
//logger.info(String.format(" %d of %d elements are >= threshold = %f (%f percent)",
|
|
||||||
// nElementsAboveThreshold(), nElements(), getHighThreshold(), nElementsAboveThreshold() / ( 1.0 * nElements())));
|
|
||||||
//logger.info(String.format(" %d of %d elements are <= threshold = %f (%f percent)",
|
|
||||||
// nElementsBelowThreshold(), nElements(), getLowThreshold(), nElementsBelowThreshold() / ( 1.0 * nElements())));
|
|
||||||
}
|
|
||||||
|
|
||||||
private double determineOneTailThreshold(final double pvalueLimit) {
|
|
||||||
double trueSortedIndexLimit = pvalueLimit * nElements();
|
|
||||||
int sortedIndexLimit = (int)(pvalueLimit > 0.5 ? Math.ceil(trueSortedIndexLimit) : Math.floor(trueSortedIndexLimit));
|
|
||||||
double threshold = get(itemIndex(sortedIndexLimit));
|
|
||||||
|
|
||||||
logger.debug(String.format("determineTailThreshold(%s, %f) => %f => %d => %f", genotype, pvalueLimit, trueSortedIndexLimit, sortedIndexLimit, threshold));
|
|
||||||
|
|
||||||
return threshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int nElementsBelowThreshold() { return nElementsPassingThreshold(true, false); }
|
|
||||||
public int nElementsAboveThreshold() { return nElementsPassingThreshold(false, true); }
|
|
||||||
|
|
||||||
public int nElementsPassingThreshold(boolean keepBelow, boolean keepAbove) {
|
|
||||||
int count = 0;
|
|
||||||
|
|
||||||
for ( double stat : this ) {
|
|
||||||
if ( passesThreshold(stat, keepBelow, keepAbove) ) count++;
|
|
||||||
}
|
|
||||||
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean passesThreshold(double value, boolean keepBelow, boolean keepAbove) {
|
|
||||||
//System.out.printf("passThreshold %s, %b, %b = %b%n", value, keepBelow, keepAbove, value < threshold && keepBelow || value >= threshold && keepAbove);
|
|
||||||
return value <= getHighThreshold() && keepBelow || value >= getLowThreshold() && keepAbove;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean passesThreshold(double value) {
|
|
||||||
switch ( tail ) {
|
|
||||||
case LeftTailed:
|
|
||||||
return value >= getLowThreshold();
|
|
||||||
case RightTailed:
|
|
||||||
return value <= getHighThreshold();
|
|
||||||
case TwoTailed:
|
|
||||||
return value >= getLowThreshold() && value < getHighThreshold();
|
|
||||||
default:
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public double pValue(double value) {
|
|
||||||
return 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int itemIndex(int sortedIndex) {
|
|
||||||
return permutation[sortedIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
public int nElements() { return size(); }
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return String.format("[GenotypeFeatureData: genotype=%s, tail=%s, lowThreshold=%f, highThreshold=%f]", genotype, tail, lowThreshold, highThreshold);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class ObservationTable extends HashMap<String, GenotypeFeatureData> {
|
|
||||||
String statField = null;
|
|
||||||
File observationsFile = null;
|
|
||||||
Logger logger = null;
|
|
||||||
GenotypeFeatureData.Tail tail;
|
|
||||||
|
|
||||||
public ObservationTable(final File observationsFile, final String statField, Logger logger, GenotypeFeatureData.Tail tail ) throws NoSuchFieldException, IOException {
|
|
||||||
this.observationsFile = observationsFile;
|
|
||||||
this.statField = statField;
|
|
||||||
this.logger = logger;
|
|
||||||
this.tail = tail;
|
|
||||||
readAllData(observationsFile, statField);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Set<String> genotypes() {
|
|
||||||
return keySet();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void readAllData(final File observationsFile, final String statField) throws NoSuchFieldException, FileNotFoundException, IOException {
|
|
||||||
ArrayList<String> header = TabularROD.readHeader(observationsFile);
|
|
||||||
|
|
||||||
//logger.info(String.format("Starting to read table %s", observationsFile));
|
|
||||||
|
|
||||||
for ( String line : new xReadLines(observationsFile) ) {
|
|
||||||
TabularROD d = new TabularROD("ignoreMe", header);
|
|
||||||
String[] parts = line.split("\\s+");
|
|
||||||
if ( d.parseLine(header, parts) ) {
|
|
||||||
if (! d.containsKey(statField)) {
|
|
||||||
throw new NoSuchFieldException(String.format("Could not find field %s in line %s", statField, line));
|
|
||||||
}
|
|
||||||
double stat = Double.valueOf(d.get(statField));
|
|
||||||
|
|
||||||
final String genotype = d.get("genotype");
|
|
||||||
GenotypeFeatureData gfd = getData(genotype);
|
|
||||||
gfd.add(stat);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for ( GenotypeFeatureData gfd : this.values() ) {
|
|
||||||
gfd.finalizeData();
|
|
||||||
}
|
|
||||||
|
|
||||||
//logger.info(String.format("Read table %s, found %d genotypes/data pairs in %s field",
|
|
||||||
// observationsFile, size(), statField));
|
|
||||||
}
|
|
||||||
|
|
||||||
public GenotypeFeatureData getData(final String genotype) {
|
|
||||||
if ( ! containsKey(genotype) ) {
|
|
||||||
GenotypeFeatureData gfd = new GenotypeFeatureData(genotype, logger, tail);
|
|
||||||
put(genotype, gfd);
|
|
||||||
}
|
|
||||||
|
|
||||||
return get(genotype);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return String.format("[ObservationTable: file=%s, field=%s, nGenotypes=%d]", observationsFile, statField, size());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public abstract class RatioFilter implements VariantExclusionCriterion {
|
public abstract class RatioFilter implements VariantExclusionCriterion {
|
||||||
|
private static final double defaultMinGenotypeConfidenceToTest = 5.0; // TODO -- must be replaced with true confidence scoore, right now assumes LOD
|
||||||
|
protected double minGenotypeConfidenceToTest = defaultMinGenotypeConfidenceToTest;
|
||||||
|
|
||||||
protected double pvalueLimit = -1;
|
protected double pvalueLimit = -1;
|
||||||
protected File observationsFile = null;
|
|
||||||
protected String statField = null; // "AlleleRatio";
|
|
||||||
protected Logger logger = null; // Logger.getLogger(RatioFilter.class);
|
protected Logger logger = null; // Logger.getLogger(RatioFilter.class);
|
||||||
protected ObservationTable dataTable = null;
|
|
||||||
protected String name = null;
|
protected String name = null;
|
||||||
protected GenotypeFeatureData.Tail tail = null;
|
|
||||||
|
protected enum Tail { LeftTailed, RightTailed, TwoTailed }
|
||||||
|
protected Tail tail = null;
|
||||||
|
protected double lowThreshold = -1;
|
||||||
|
protected double highThreshold = -1;
|
||||||
|
|
||||||
protected boolean exclude = false;
|
protected boolean exclude = false;
|
||||||
|
|
||||||
/**
|
public RatioFilter(final String name, Class myClass, Tail tail ) {
|
||||||
* A short-term hack to stop the systme from rejecting poorly covered sites, under the assumption
|
|
||||||
* that the ratio test is poorly determined without at least MIN_COUNTS_TO_APPLY_TEST. To be
|
|
||||||
* replaced by the more robust sampling approach outlined below
|
|
||||||
*/
|
|
||||||
final private static int MIN_COUNTS_TO_APPLY_TEST = 20;
|
|
||||||
|
|
||||||
final static boolean integrateOverSamplingProbabilities = true;
|
|
||||||
|
|
||||||
public RatioFilter(final String name, final String statField, Class myClass, GenotypeFeatureData.Tail tail ) {
|
|
||||||
this.name = name;
|
this.name = name;
|
||||||
this.statField = statField;
|
|
||||||
this.tail = tail;
|
this.tail = tail;
|
||||||
logger = Logger.getLogger(myClass);
|
logger = Logger.getLogger(myClass);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void initialize(String arguments) {
|
protected void setLowThreshold(double threshold) {
|
||||||
if (arguments != null && !arguments.isEmpty()) {
|
lowThreshold = threshold;
|
||||||
String[] argPieces = arguments.split(",");
|
}
|
||||||
try {
|
|
||||||
pvalueLimit = Double.valueOf(argPieces[0]);
|
protected void setHighThreshold(double threshold) {
|
||||||
observationsFile = new File(argPieces[1]);
|
highThreshold = threshold;
|
||||||
|
|
||||||
dataTable = new ObservationTable(observationsFile, statField, logger, tail);
|
|
||||||
|
|
||||||
logger.info(String.format("Initialized data for ratio filter %s: %s", name, dataTable));
|
|
||||||
for ( String genotype : dataTable.genotypes() ) {
|
|
||||||
GenotypeFeatureData gfd = dataTable.get(genotype);
|
|
||||||
gfd.determineThresholds(pvalueLimit);
|
|
||||||
logger.info(gfd.toString());
|
|
||||||
}
|
|
||||||
} catch ( NumberFormatException e ) {
|
|
||||||
throw new RuntimeException(String.format("Couldn't parse pValue limit from %s", argPieces[0]), e);
|
|
||||||
} catch ( FileNotFoundException e ) {
|
|
||||||
throw new RuntimeException("Couldn't open ObservationTable " + observationsFile, e);
|
|
||||||
} catch ( NoSuchFieldException e ) {
|
|
||||||
throw new RuntimeException("Couldn't parse ObservationTable " + observationsFile, e);
|
|
||||||
} catch ( IOException e ) {
|
|
||||||
throw new RuntimeException("Couldn't parse ObservationTable " + observationsFile,e );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract boolean applyToVariant(rodVariants variant);
|
|
||||||
protected abstract Pair<Integer, Integer> scoreVariant(char ref, ReadBackedPileup pileup, rodVariants variant);
|
protected abstract Pair<Integer, Integer> scoreVariant(char ref, ReadBackedPileup pileup, rodVariants variant);
|
||||||
|
protected abstract boolean excludeHetsOnly();
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
public boolean useZeroQualityReads() { return false; }
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
public void compute(VariantContextWindow contextWindow) {
|
||||||
boolean exclude = false;
|
|
||||||
VariantContext context = contextWindow.getContext();
|
VariantContext context = contextWindow.getContext();
|
||||||
rodVariants variant = context.getVariant();
|
rodVariants variant = context.getVariant();
|
||||||
char ref = context.getReferenceContext().getBase();
|
char ref = context.getReferenceContext().getBase();
|
||||||
|
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getAlignmentContext(useZeroQualityReads()));
|
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getAlignmentContext(useZeroQualityReads()));
|
||||||
if ( applyToVariant(variant) ) {
|
|
||||||
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
||||||
GenotypeFeatureData gfd = dataTable.get(orderedBases(variant.getBestGenotype()));
|
|
||||||
|
|
||||||
if (integrateOverSamplingProbabilities)
|
|
||||||
exclude = integralExclude(gfd, counts);
|
|
||||||
else
|
|
||||||
exclude = pointEstimateExclude(gfd, counts);
|
|
||||||
|
|
||||||
|
boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest;
|
||||||
|
boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet(variant);
|
||||||
|
exclude = excludable && highGenotypeConfidence && integralExclude(counts);
|
||||||
//
|
//
|
||||||
// for printing only
|
// for printing only
|
||||||
//
|
//
|
||||||
int n = counts.first + counts.second;
|
int n = counts.first + counts.second;
|
||||||
double value = counts.first / (1.0 * counts.first + counts.second);
|
double value = counts.first / (1.0 * counts.first + counts.second);
|
||||||
logger.info(String.format("%s: counts1=%d (%.2f), counts2=%d (%.2f), n=%d, value=%f, %s, exclude=%b, bases=%s",
|
logger.info(String.format("%s: counts1=%d (%.2f), counts2=%d (%.2f), n=%d, value=%f, exclude=%b, bases=%s",
|
||||||
name, counts.first, counts.first / (0.01 * n), counts.second, counts.second / (0.01 * n), n,
|
name, counts.first, counts.first / (0.01 * n), counts.second, counts.second / (0.01 * n), n,
|
||||||
value, gfd, exclude, pileup.getBases()));
|
value, exclude, pileup.getBases()));
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isExcludable() {
|
|
||||||
return exclude;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return "";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static double SEARCH_INCREMENT = 0.01;
|
private final static double SEARCH_INCREMENT = 0.01;
|
||||||
private final static double integralPValueThreshold = 0.05;
|
private final static double integralPValueThreshold = 0.05;
|
||||||
|
|
||||||
private boolean integralExclude(GenotypeFeatureData gfd, Pair<Integer, Integer> counts ) {
|
private boolean integralExclude(Pair<Integer, Integer> counts ) {
|
||||||
double sumExclude = 0.0, sumP = 0.0;
|
double sumExclude = 0.0, sumP = 0.0;
|
||||||
int n = counts.first + counts.second;
|
int n = counts.first + counts.second;
|
||||||
for ( double r = 0.0; r <= 1.0; r += SEARCH_INCREMENT ) {
|
for ( double r = 0.0; r <= 1.0; r += SEARCH_INCREMENT ) {
|
||||||
double p = MathUtils.binomialProbability(counts.first, n, r);
|
double p = MathUtils.binomialProbability(counts.first, n, r);
|
||||||
sumP += p;
|
sumP += p;
|
||||||
boolean exclude = ! gfd.passesThreshold(r);
|
boolean exclude = ! passesThreshold(r);
|
||||||
sumExclude += p * (exclude ? 1.0 : 0.0);
|
sumExclude += p * (exclude ? 1.0 : 0.0);
|
||||||
|
|
||||||
//System.out.printf("integral: k=%d, n=%d, r=%f, p=%f, sumP = %f, exclude=%b | sum=%f, percentExcluded=%f%n",
|
System.out.printf("integral: k=%d, n=%d, r=%f, p=%f, sumP = %f, exclude=%b | sum=%f, percentExcluded=%f%n",
|
||||||
// counts.first, n, r, p, sumP, exclude, sumExclude, sumExclude / sumP);
|
counts.first, n, r, p, sumP, exclude, sumExclude, sumExclude / sumP);
|
||||||
}
|
}
|
||||||
|
|
||||||
double percentExcluded = sumExclude / sumP;
|
double percentExcluded = sumExclude / sumP;
|
||||||
return 1 - percentExcluded <= integralPValueThreshold ;
|
return 1 - percentExcluded <= integralPValueThreshold ;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean pointEstimateExclude(GenotypeFeatureData gfd, Pair<Integer, Integer> counts ) {
|
private boolean passesThreshold(double value) {
|
||||||
int n = counts.first + counts.second;
|
switch ( tail ) {
|
||||||
if ( n < MIN_COUNTS_TO_APPLY_TEST ) return false;
|
case LeftTailed:
|
||||||
double value = counts.first / (1.0 * counts.first + counts.second);
|
return value >= lowThreshold;
|
||||||
//double value = counts.first / (1.0 * Math.max(counts.second, 1.0));
|
case RightTailed:
|
||||||
return ! gfd.passesThreshold(value);
|
return value <= highThreshold;
|
||||||
|
case TwoTailed:
|
||||||
|
return value >= lowThreshold && value < highThreshold;
|
||||||
|
default:
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String orderedBases(String s) {
|
|
||||||
char[] charArray = s.toCharArray();
|
|
||||||
java.util.Arrays.sort(charArray);
|
|
||||||
return new String(charArray);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,20 +1,19 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
|
||||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
import org.broadinstitute.sting.utils.GenotypeUtils;
|
|
||||||
|
|
||||||
public class VECAlleleBalance implements VariantExclusionCriterion { //extends RatioFilter {
|
public class VECAlleleBalance extends RatioFilter {
|
||||||
private static final double defaultMinGenotypeConfidenceToTest = 5.0; // TODO -- must be replaced with true confidence scoore, right now assumes LOD
|
|
||||||
|
|
||||||
private boolean exclude;
|
private double lowThreshold = 0.10, highThreshold = 0.85;
|
||||||
private double lowThreshold = 0.1, highThreshold = 0.85;
|
|
||||||
private double minGenotypeConfidenceToTest = defaultMinGenotypeConfidenceToTest;
|
|
||||||
private double ratio;
|
private double ratio;
|
||||||
|
|
||||||
|
public VECAlleleBalance() {
|
||||||
|
super("Allele Balance Ratio", VECAlleleBalance.class, Tail.TwoTailed);
|
||||||
|
}
|
||||||
|
|
||||||
public void initialize(String arguments) {
|
public void initialize(String arguments) {
|
||||||
if (arguments != null && !arguments.isEmpty()) {
|
if (arguments != null && !arguments.isEmpty()) {
|
||||||
String[] argPieces = arguments.split(",");
|
String[] argPieces = arguments.split(",");
|
||||||
|
|
@ -23,15 +22,13 @@ public class VECAlleleBalance implements VariantExclusionCriterion { //extends
|
||||||
if ( argPieces.length > 2 )
|
if ( argPieces.length > 2 )
|
||||||
minGenotypeConfidenceToTest = Double.valueOf(argPieces[2]);
|
minGenotypeConfidenceToTest = Double.valueOf(argPieces[2]);
|
||||||
}
|
}
|
||||||
|
setLowThreshold(lowThreshold);
|
||||||
|
setHighThreshold(highThreshold);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the count of bases matching the major (first) and minor (second) alleles as a pair.
|
* Return the count of bases matching the major (first) and minor (second) alleles as a pair.
|
||||||
*
|
*
|
||||||
* @param ref
|
|
||||||
* @param pileup
|
|
||||||
* @param variant
|
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
protected Pair<Integer, Integer> scoreVariant(char ref, ReadBackedPileup pileup, rodVariants variant) {
|
protected Pair<Integer, Integer> scoreVariant(char ref, ReadBackedPileup pileup, rodVariants variant) {
|
||||||
final String genotype = variant.getBestGenotype();
|
final String genotype = variant.getBestGenotype();
|
||||||
|
|
@ -49,30 +46,11 @@ public class VECAlleleBalance implements VariantExclusionCriterion { //extends
|
||||||
int refCount = a == ref ? aCount : bCount;
|
int refCount = a == ref ? aCount : bCount;
|
||||||
int altCount = a == ref ? bCount : aCount;
|
int altCount = a == ref ? bCount : aCount;
|
||||||
|
|
||||||
return new Pair(refCount, altCount);
|
ratio = (double)refCount / (double)altCount;
|
||||||
|
return new Pair<Integer, Integer>(refCount, altCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
protected boolean excludeHetsOnly() { return true; }
|
||||||
VariantContext context = contextWindow.getContext();
|
|
||||||
char ref = context.getReferenceContext().getBase();
|
|
||||||
rodVariants variant = context.getVariant();
|
|
||||||
|
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getAlignmentContext(useZeroQualityReads()));
|
|
||||||
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
|
||||||
int n = counts.first + counts.second;
|
|
||||||
ratio = counts.first.doubleValue() / (double)n;
|
|
||||||
|
|
||||||
boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest;
|
|
||||||
boolean failsHetExpectation = GenotypeUtils.isHet(variant) && (ratio < lowThreshold || ratio > highThreshold);
|
|
||||||
exclude = failsHetExpectation && highGenotypeConfidence;
|
|
||||||
|
|
||||||
// if ( failsHetExpectation ) {
|
|
||||||
// String header = highGenotypeConfidence ? "FILTER-HIGH-CONFIDENCE" : "PASS-LOW-CONFIDENCE";
|
|
||||||
// System.out.printf("[%s] %s getConsensusConfidence() = %f > minGenotypeConfidenceToTest = %f exclude=%b %s%n",
|
|
||||||
// header, variant.getBestGenotype(), variant.getConsensusConfidence(), minGenotypeConfidenceToTest, exclude,
|
|
||||||
// ! highGenotypeConfidence ? variant : "" );
|
|
||||||
// }
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
public boolean useZeroQualityReads() { return false; }
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,29 +1,30 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
|
||||||
public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // extends RatioFilter {
|
public class VECOnOffGenotypeRatio extends RatioFilter {
|
||||||
//final private static GenotypeFeatureData.Tail tail = GenotypeFeatureData.Tail.LeftTailed;
|
|
||||||
private boolean exclude;
|
|
||||||
private double threshold = 0.0;
|
private double threshold = 0.0;
|
||||||
private double ratio;
|
private double ratio;
|
||||||
|
|
||||||
|
public VECOnOffGenotypeRatio() {
|
||||||
|
super("On/Off Genotype Ratio", VECOnOffGenotypeRatio.class, Tail.LeftTailed);
|
||||||
|
}
|
||||||
|
|
||||||
public void initialize(String arguments) {
|
public void initialize(String arguments) {
|
||||||
if (arguments != null && !arguments.isEmpty()) {
|
if (arguments != null && !arguments.isEmpty()) {
|
||||||
threshold = Double.valueOf(arguments);
|
String[] argPieces = arguments.split(",");
|
||||||
|
threshold = Double.valueOf(argPieces[0]);
|
||||||
|
if ( argPieces.length > 1 )
|
||||||
|
minGenotypeConfidenceToTest = Double.valueOf(argPieces[1]);
|
||||||
}
|
}
|
||||||
|
setLowThreshold(threshold);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the counts of bases that are on (matching the bestGenotype) and off (not matching the
|
* Return the counts of bases that are on (matching the bestGenotype) and off (not matching the
|
||||||
* best genotype). On are in the first field, off in the second.
|
* best genotype). On are in the first field, off in the second.
|
||||||
*
|
*
|
||||||
* @param ref
|
|
||||||
* @param pileup
|
|
||||||
* @param variant
|
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
protected Pair<Integer, Integer> scoreVariant(char ref, ReadBackedPileup pileup, rodVariants variant) {
|
protected Pair<Integer, Integer> scoreVariant(char ref, ReadBackedPileup pileup, rodVariants variant) {
|
||||||
final String genotype = variant.getBestGenotype().toUpperCase();
|
final String genotype = variant.getBestGenotype().toUpperCase();
|
||||||
|
|
@ -43,19 +44,11 @@ public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // ext
|
||||||
//System.out.printf("count = %d, on=%d, off=%d for %c in %s%n", count, on, off, base, genotype);
|
//System.out.printf("count = %d, on=%d, off=%d for %c in %s%n", count, on, off, base, genotype);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ratio = (double)on / (double)off;
|
||||||
return new Pair<Integer, Integer>(on, off);
|
return new Pair<Integer, Integer>(on, off);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
protected boolean excludeHetsOnly() { return false; }
|
||||||
VariantContext context = contextWindow.getContext();
|
|
||||||
char ref = context.getReferenceContext().getBase();
|
|
||||||
|
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getAlignmentContext(useZeroQualityReads()));
|
|
||||||
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, context.getVariant());
|
|
||||||
int n = counts.first + counts.second;
|
|
||||||
ratio = counts.first.doubleValue() / (double)n;
|
|
||||||
exclude = ratio < threshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double inclusionProbability() {
|
public double inclusionProbability() {
|
||||||
return exclude ? 0.0 : 1.0;
|
return exclude ? 0.0 : 1.0;
|
||||||
|
|
|
||||||
|
|
@ -294,6 +294,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
*/
|
*/
|
||||||
public void onTraversalDone(Integer result) {
|
public void onTraversalDone(Integer result) {
|
||||||
// move the window over so that we can filter the last few variants
|
// move the window over so that we can filter the last few variants
|
||||||
|
if ( windowInitializer != null ) {
|
||||||
|
while ( windowInitializer.size() < windowSize )
|
||||||
|
windowInitializer.add(null);
|
||||||
|
variantContextWindow = new VariantContextWindow(windowInitializer);
|
||||||
|
}
|
||||||
for (int i=0; i < windowSize; i++) {
|
for (int i=0; i < windowSize; i++) {
|
||||||
variantContextWindow.moveWindow(null);
|
variantContextWindow.moveWindow(null);
|
||||||
compute();
|
compute();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue