- added mapping quality filter

- make the filters brainless in that they strictly have thresholds and filter based on them; require user to calculate and input these thresholds.
- update filters in preparation for migration to new output format



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1363 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-08-03 16:46:51 +00:00
parent ba07f057ac
commit e495b836d3
7 changed files with 120 additions and 33 deletions

View File

@ -144,7 +144,7 @@ class ObservationTable extends HashMap<String, GenotypeFeatureData> {
Logger logger = null;
GenotypeFeatureData.Tail tail;
public ObservationTable(final File observationsFile, final String statField, Logger logger, GenotypeFeatureData.Tail tail ) throws NoSuchFieldException, FileNotFoundException, IOException {
public ObservationTable(final File observationsFile, final String statField, Logger logger, GenotypeFeatureData.Tail tail ) throws NoSuchFieldException, IOException {
this.observationsFile = observationsFile;
this.statField = statField;
this.logger = logger;
@ -202,7 +202,7 @@ public abstract class RatioFilter implements VariantExclusionCriterion {
protected double pvalueLimit = -1;
protected File observationsFile = null;
protected String statField = null; // "AlleleRatio";
protected Logger logger = null; // Logger.getLogger(RatioFilter.class);
protected Logger logger = null; // Logger.getLogger(RatioFilter.class);
protected ObservationTable dataTable = null;
protected String name = null;
protected GenotypeFeatureData.Tail tail = null;
@ -260,13 +260,6 @@ public abstract class RatioFilter implements VariantExclusionCriterion {
public void compute(char ref, LocusContext context, rodVariants variant) {
boolean exclude = false;
//
// todo -- need to calculate a significance value for the chance that the
// todo -- observed first and second counts are reliably not passing the threshold
// todo -- basically, we need to sample with replacement from all first / second pairs
// todo -- and only conclude that the variant fails the test if the probability of failing
// todo -- across all samples is greater than some P value, like 0.05
//
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
if ( applyToVariant(variant) ) {
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);

View File

@ -1,17 +1,22 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.Pair;
public class VECAlleleBalance extends RatioFilter {
final private static String statField = "AlleleRatio";
public class VECAlleleBalance implements VariantExclusionCriterion { //extends RatioFilter {
final private static GenotypeFeatureData.Tail tail = GenotypeFeatureData.Tail.TwoTailed;
private boolean exclude;
private double lowThreshold, highThreshold, ratio;
public VECAlleleBalance() {
super("AlleleBalance", statField, VECAlleleBalance.class, tail);
public void initialize(String arguments) {
if (arguments != null && !arguments.isEmpty()) {
String[] argPieces = arguments.split(",");
lowThreshold = Double.valueOf(argPieces[0]);
highThreshold = Double.valueOf(argPieces[1]);
}
}
/**
@ -51,4 +56,25 @@ public class VECAlleleBalance extends RatioFilter {
return new Pair(major, minor);
}
public void compute(char ref, LocusContext context, rodVariants variant) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
ratio = (double)counts.first / (double)counts.second;
exclude = ratio < lowThreshold || ratio > highThreshold;
}
public boolean useZeroQualityReads() { return false; }
public boolean isExcludable() {
return exclude;
}
public String getStudyHeader() {
return "AlleleBalance("+lowThreshold+","+highThreshold+")\tMajorMinorRatio";
}
public String getStudyInfo() {
return (exclude ? "fail" : "pass") + "\t" + ratio;
}
}

View File

@ -24,7 +24,6 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion {
public void compute(char ref, LocusContext context, rodVariants variant) {
exclude = context.getReads().size() > maximum;
depth = context.getReads().size();
}
@ -33,11 +32,11 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion {
}
public String getStudyHeader() {
return "#depth";
return "DepthOfCoverage("+maximum+")\tdepth";
}
public String getStudyInfo() {
return Integer.toString(depth);
return (exclude ? "fail" : "pass") + "\t" + depth;
}
public boolean useZeroQualityReads() { return false; }

View File

@ -11,6 +11,7 @@ import java.util.List;
public class VECFisherStrand implements VariantExclusionCriterion {
private double pvalueLimit = 0.00001;
private double pValue;
private boolean exclude;
private ArrayList<Double> factorials = new ArrayList<Double>();
@ -37,11 +38,11 @@ public class VECFisherStrand implements VariantExclusionCriterion {
}
public String getStudyHeader() {
return "";
return "FisherStrand("+pvalueLimit+")\tpvalue";
}
public String getStudyInfo() {
return "";
return (exclude ? "fail" : "pass") + "\t" + pValue;
}
public boolean useZeroQualityReads() { return false; }
@ -56,7 +57,7 @@ public class VECFisherStrand implements VariantExclusionCriterion {
double pCutoff = computePValue(table);
//printTable(table, pCutoff);
double pValue = pCutoff;
pValue = pCutoff;
while (rotateTable(table)) {
double pValuePiece = computePValue(table);

View File

@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.LocusContext;
public class VECLodThreshold implements VariantExclusionCriterion {
private double lodThreshold = 5.0;
private double lod;
private boolean exclude;
public void initialize(String arguments) {
@ -15,25 +15,22 @@ public class VECLodThreshold implements VariantExclusionCriterion {
}
public void compute(char ref, LocusContext context, rodVariants variant) {
exclude = (variant.getLodBtr() < lodThreshold);
lod = variant.getLodBtr();
exclude = lod < lodThreshold;
}
public boolean useZeroQualityReads() { return false; }
public boolean exclude(char ref, LocusContext context, rodVariants variant) {
return (variant.getLodBtr() < lodThreshold);
}
public boolean isExcludable() {
return exclude;
}
public String getStudyHeader() {
return "";
return "LodThreshold("+lod+")\tlod";
}
public String getStudyInfo() {
return "";
return (exclude ? "fail" : "pass") + "\t" + lod;
}
}

View File

@ -0,0 +1,46 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
import net.sf.samtools.SAMRecord;
import java.util.List;
public class VECMappingQuality implements VariantExclusionCriterion {
private double minQuality = 5.0;
private double rms;
private boolean exclude;
public void initialize(String arguments) {
if (arguments != null && !arguments.isEmpty()) {
minQuality = Double.valueOf(arguments);
}
}
public void compute(char ref, LocusContext context, rodVariants variant) {
List<SAMRecord> reads = context.getReads();
rms = 0.0;
for (int readIndex = 0; readIndex < reads.size(); readIndex++) {
int qual = reads.get(readIndex).getMappingQuality();
rms += qual * qual;
}
rms /= reads.size();
rms = Math.sqrt(rms);
}
public boolean isExcludable() {
return exclude;
}
public String getStudyHeader() {
return "MappingQuality("+minQuality+")\trms";
}
public String getStudyInfo() {
return (exclude ? "fail" : "pass") + "\t" + rms;
}
public boolean useZeroQualityReads() { return true; }
}

View File

@ -4,12 +4,17 @@ import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.utils.*;
public class VECOnOffGenotypeRatio extends RatioFilter {
final private static String statField = "onOffRatio";
public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // extends RatioFilter {
final private static GenotypeFeatureData.Tail tail = GenotypeFeatureData.Tail.LeftTailed;
private boolean exclude;
private double lowThreshold, highThreshold, ratio;
public VECOnOffGenotypeRatio() {
super("On/Off Genotype Ratio", statField, VECOnOffGenotypeRatio.class, tail);
public void initialize(String arguments) {
if (arguments != null && !arguments.isEmpty()) {
String[] argPieces = arguments.split(",");
lowThreshold = Double.valueOf(argPieces[0]);
highThreshold = Double.valueOf(argPieces[1]);
}
}
/**
@ -38,7 +43,6 @@ public class VECOnOffGenotypeRatio extends RatioFilter {
if ( genotype.length() > 2 )
throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype));
int on = 0, off = 0;
for ( char base : BaseUtils.BASES ) {
@ -52,4 +56,25 @@ public class VECOnOffGenotypeRatio extends RatioFilter {
return new Pair<Integer, Integer>(on, off);
}
public void compute(char ref, LocusContext context, rodVariants variant) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
ratio = (double)counts.first / (double)counts.second;
exclude = ratio < lowThreshold || ratio > highThreshold;
}
public boolean useZeroQualityReads() { return false; }
public boolean isExcludable() {
return exclude;
}
public String getStudyHeader() {
return "OnOffGenotype("+lowThreshold+","+highThreshold+")\tMajorMinorRatio";
}
public String getStudyInfo() {
return (exclude ? "fail" : "pass") + "\t" + ratio;
}
}