- added mapping quality filter
- make the filters brainless in that they strictly have thresholds and filter based on them; require user to calculate and input these thresholds. - update filters in preparation for migration to new output format git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1363 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ba07f057ac
commit
e495b836d3
|
|
@ -144,7 +144,7 @@ class ObservationTable extends HashMap<String, GenotypeFeatureData> {
|
||||||
Logger logger = null;
|
Logger logger = null;
|
||||||
GenotypeFeatureData.Tail tail;
|
GenotypeFeatureData.Tail tail;
|
||||||
|
|
||||||
public ObservationTable(final File observationsFile, final String statField, Logger logger, GenotypeFeatureData.Tail tail ) throws NoSuchFieldException, FileNotFoundException, IOException {
|
public ObservationTable(final File observationsFile, final String statField, Logger logger, GenotypeFeatureData.Tail tail ) throws NoSuchFieldException, IOException {
|
||||||
this.observationsFile = observationsFile;
|
this.observationsFile = observationsFile;
|
||||||
this.statField = statField;
|
this.statField = statField;
|
||||||
this.logger = logger;
|
this.logger = logger;
|
||||||
|
|
@ -202,7 +202,7 @@ public abstract class RatioFilter implements VariantExclusionCriterion {
|
||||||
protected double pvalueLimit = -1;
|
protected double pvalueLimit = -1;
|
||||||
protected File observationsFile = null;
|
protected File observationsFile = null;
|
||||||
protected String statField = null; // "AlleleRatio";
|
protected String statField = null; // "AlleleRatio";
|
||||||
protected Logger logger = null; // Logger.getLogger(RatioFilter.class);
|
protected Logger logger = null; // Logger.getLogger(RatioFilter.class);
|
||||||
protected ObservationTable dataTable = null;
|
protected ObservationTable dataTable = null;
|
||||||
protected String name = null;
|
protected String name = null;
|
||||||
protected GenotypeFeatureData.Tail tail = null;
|
protected GenotypeFeatureData.Tail tail = null;
|
||||||
|
|
@ -260,13 +260,6 @@ public abstract class RatioFilter implements VariantExclusionCriterion {
|
||||||
public void compute(char ref, LocusContext context, rodVariants variant) {
|
public void compute(char ref, LocusContext context, rodVariants variant) {
|
||||||
boolean exclude = false;
|
boolean exclude = false;
|
||||||
|
|
||||||
//
|
|
||||||
// todo -- need to calculate a significance value for the chance that the
|
|
||||||
// todo -- observed first and second counts are reliably not passing the threshold
|
|
||||||
// todo -- basically, we need to sample with replacement from all first / second pairs
|
|
||||||
// todo -- and only conclude that the variant fails the test if the probability of failing
|
|
||||||
// todo -- across all samples is greater than some P value, like 0.05
|
|
||||||
//
|
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||||
if ( applyToVariant(variant) ) {
|
if ( applyToVariant(variant) ) {
|
||||||
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
||||||
|
|
|
||||||
|
|
@ -1,17 +1,22 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||||
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
|
||||||
public class VECAlleleBalance extends RatioFilter {
|
public class VECAlleleBalance implements VariantExclusionCriterion { //extends RatioFilter {
|
||||||
final private static String statField = "AlleleRatio";
|
|
||||||
final private static GenotypeFeatureData.Tail tail = GenotypeFeatureData.Tail.TwoTailed;
|
final private static GenotypeFeatureData.Tail tail = GenotypeFeatureData.Tail.TwoTailed;
|
||||||
|
private boolean exclude;
|
||||||
|
private double lowThreshold, highThreshold, ratio;
|
||||||
|
|
||||||
|
public void initialize(String arguments) {
|
||||||
public VECAlleleBalance() {
|
if (arguments != null && !arguments.isEmpty()) {
|
||||||
super("AlleleBalance", statField, VECAlleleBalance.class, tail);
|
String[] argPieces = arguments.split(",");
|
||||||
|
lowThreshold = Double.valueOf(argPieces[0]);
|
||||||
|
highThreshold = Double.valueOf(argPieces[1]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -51,4 +56,25 @@ public class VECAlleleBalance extends RatioFilter {
|
||||||
|
|
||||||
return new Pair(major, minor);
|
return new Pair(major, minor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void compute(char ref, LocusContext context, rodVariants variant) {
|
||||||
|
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||||
|
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
||||||
|
ratio = (double)counts.first / (double)counts.second;
|
||||||
|
exclude = ratio < lowThreshold || ratio > highThreshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean useZeroQualityReads() { return false; }
|
||||||
|
|
||||||
|
public boolean isExcludable() {
|
||||||
|
return exclude;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStudyHeader() {
|
||||||
|
return "AlleleBalance("+lowThreshold+","+highThreshold+")\tMajorMinorRatio";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStudyInfo() {
|
||||||
|
return (exclude ? "fail" : "pass") + "\t" + ratio;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,6 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion {
|
||||||
|
|
||||||
public void compute(char ref, LocusContext context, rodVariants variant) {
|
public void compute(char ref, LocusContext context, rodVariants variant) {
|
||||||
exclude = context.getReads().size() > maximum;
|
exclude = context.getReads().size() > maximum;
|
||||||
|
|
||||||
depth = context.getReads().size();
|
depth = context.getReads().size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -33,11 +32,11 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getStudyHeader() {
|
public String getStudyHeader() {
|
||||||
return "#depth";
|
return "DepthOfCoverage("+maximum+")\tdepth";
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getStudyInfo() {
|
public String getStudyInfo() {
|
||||||
return Integer.toString(depth);
|
return (exclude ? "fail" : "pass") + "\t" + depth;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
public boolean useZeroQualityReads() { return false; }
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ import java.util.List;
|
||||||
|
|
||||||
public class VECFisherStrand implements VariantExclusionCriterion {
|
public class VECFisherStrand implements VariantExclusionCriterion {
|
||||||
private double pvalueLimit = 0.00001;
|
private double pvalueLimit = 0.00001;
|
||||||
|
private double pValue;
|
||||||
private boolean exclude;
|
private boolean exclude;
|
||||||
private ArrayList<Double> factorials = new ArrayList<Double>();
|
private ArrayList<Double> factorials = new ArrayList<Double>();
|
||||||
|
|
||||||
|
|
@ -37,11 +38,11 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getStudyHeader() {
|
public String getStudyHeader() {
|
||||||
return "";
|
return "FisherStrand("+pvalueLimit+")\tpvalue";
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getStudyInfo() {
|
public String getStudyInfo() {
|
||||||
return "";
|
return (exclude ? "fail" : "pass") + "\t" + pValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
public boolean useZeroQualityReads() { return false; }
|
||||||
|
|
@ -56,7 +57,7 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
||||||
double pCutoff = computePValue(table);
|
double pCutoff = computePValue(table);
|
||||||
//printTable(table, pCutoff);
|
//printTable(table, pCutoff);
|
||||||
|
|
||||||
double pValue = pCutoff;
|
pValue = pCutoff;
|
||||||
while (rotateTable(table)) {
|
while (rotateTable(table)) {
|
||||||
double pValuePiece = computePValue(table);
|
double pValuePiece = computePValue(table);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
|
||||||
public class VECLodThreshold implements VariantExclusionCriterion {
|
public class VECLodThreshold implements VariantExclusionCriterion {
|
||||||
private double lodThreshold = 5.0;
|
private double lodThreshold = 5.0;
|
||||||
|
private double lod;
|
||||||
private boolean exclude;
|
private boolean exclude;
|
||||||
|
|
||||||
public void initialize(String arguments) {
|
public void initialize(String arguments) {
|
||||||
|
|
@ -15,25 +15,22 @@ public class VECLodThreshold implements VariantExclusionCriterion {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void compute(char ref, LocusContext context, rodVariants variant) {
|
public void compute(char ref, LocusContext context, rodVariants variant) {
|
||||||
exclude = (variant.getLodBtr() < lodThreshold);
|
lod = variant.getLodBtr();
|
||||||
|
exclude = lod < lodThreshold;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
public boolean useZeroQualityReads() { return false; }
|
||||||
|
|
||||||
public boolean exclude(char ref, LocusContext context, rodVariants variant) {
|
|
||||||
return (variant.getLodBtr() < lodThreshold);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isExcludable() {
|
public boolean isExcludable() {
|
||||||
return exclude;
|
return exclude;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getStudyHeader() {
|
public String getStudyHeader() {
|
||||||
return "";
|
return "LodThreshold("+lod+")\tlod";
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getStudyInfo() {
|
public String getStudyInfo() {
|
||||||
return "";
|
return (exclude ? "fail" : "pass") + "\t" + lod;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,46 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
|
public class VECMappingQuality implements VariantExclusionCriterion {
|
||||||
|
private double minQuality = 5.0;
|
||||||
|
private double rms;
|
||||||
|
private boolean exclude;
|
||||||
|
|
||||||
|
public void initialize(String arguments) {
|
||||||
|
if (arguments != null && !arguments.isEmpty()) {
|
||||||
|
minQuality = Double.valueOf(arguments);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void compute(char ref, LocusContext context, rodVariants variant) {
|
||||||
|
List<SAMRecord> reads = context.getReads();
|
||||||
|
|
||||||
|
rms = 0.0;
|
||||||
|
for (int readIndex = 0; readIndex < reads.size(); readIndex++) {
|
||||||
|
int qual = reads.get(readIndex).getMappingQuality();
|
||||||
|
rms += qual * qual;
|
||||||
|
}
|
||||||
|
rms /= reads.size();
|
||||||
|
rms = Math.sqrt(rms);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isExcludable() {
|
||||||
|
return exclude;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStudyHeader() {
|
||||||
|
return "MappingQuality("+minQuality+")\trms";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStudyInfo() {
|
||||||
|
return (exclude ? "fail" : "pass") + "\t" + rms;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean useZeroQualityReads() { return true; }
|
||||||
|
}
|
||||||
|
|
@ -4,12 +4,17 @@ import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
|
||||||
public class VECOnOffGenotypeRatio extends RatioFilter {
|
public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // extends RatioFilter {
|
||||||
final private static String statField = "onOffRatio";
|
|
||||||
final private static GenotypeFeatureData.Tail tail = GenotypeFeatureData.Tail.LeftTailed;
|
final private static GenotypeFeatureData.Tail tail = GenotypeFeatureData.Tail.LeftTailed;
|
||||||
|
private boolean exclude;
|
||||||
|
private double lowThreshold, highThreshold, ratio;
|
||||||
|
|
||||||
public VECOnOffGenotypeRatio() {
|
public void initialize(String arguments) {
|
||||||
super("On/Off Genotype Ratio", statField, VECOnOffGenotypeRatio.class, tail);
|
if (arguments != null && !arguments.isEmpty()) {
|
||||||
|
String[] argPieces = arguments.split(",");
|
||||||
|
lowThreshold = Double.valueOf(argPieces[0]);
|
||||||
|
highThreshold = Double.valueOf(argPieces[1]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -38,7 +43,6 @@ public class VECOnOffGenotypeRatio extends RatioFilter {
|
||||||
if ( genotype.length() > 2 )
|
if ( genotype.length() > 2 )
|
||||||
throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype));
|
throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype));
|
||||||
|
|
||||||
|
|
||||||
int on = 0, off = 0;
|
int on = 0, off = 0;
|
||||||
|
|
||||||
for ( char base : BaseUtils.BASES ) {
|
for ( char base : BaseUtils.BASES ) {
|
||||||
|
|
@ -52,4 +56,25 @@ public class VECOnOffGenotypeRatio extends RatioFilter {
|
||||||
|
|
||||||
return new Pair<Integer, Integer>(on, off);
|
return new Pair<Integer, Integer>(on, off);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void compute(char ref, LocusContext context, rodVariants variant) {
|
||||||
|
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||||
|
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
||||||
|
ratio = (double)counts.first / (double)counts.second;
|
||||||
|
exclude = ratio < lowThreshold || ratio > highThreshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean useZeroQualityReads() { return false; }
|
||||||
|
|
||||||
|
public boolean isExcludable() {
|
||||||
|
return exclude;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStudyHeader() {
|
||||||
|
return "OnOffGenotype("+lowThreshold+","+highThreshold+")\tMajorMinorRatio";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStudyInfo() {
|
||||||
|
return (exclude ? "fail" : "pass") + "\t" + ratio;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue