- added mapping quality filter
- make the filters brainless in that they strictly have thresholds and filter based on them; require user to calculate and input these thresholds. - update filters in preparation for migration to new output format git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1363 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ba07f057ac
commit
e495b836d3
|
|
@ -144,7 +144,7 @@ class ObservationTable extends HashMap<String, GenotypeFeatureData> {
|
|||
Logger logger = null;
|
||||
GenotypeFeatureData.Tail tail;
|
||||
|
||||
public ObservationTable(final File observationsFile, final String statField, Logger logger, GenotypeFeatureData.Tail tail ) throws NoSuchFieldException, FileNotFoundException, IOException {
|
||||
public ObservationTable(final File observationsFile, final String statField, Logger logger, GenotypeFeatureData.Tail tail ) throws NoSuchFieldException, IOException {
|
||||
this.observationsFile = observationsFile;
|
||||
this.statField = statField;
|
||||
this.logger = logger;
|
||||
|
|
@ -202,7 +202,7 @@ public abstract class RatioFilter implements VariantExclusionCriterion {
|
|||
protected double pvalueLimit = -1;
|
||||
protected File observationsFile = null;
|
||||
protected String statField = null; // "AlleleRatio";
|
||||
protected Logger logger = null; // Logger.getLogger(RatioFilter.class);
|
||||
protected Logger logger = null; // Logger.getLogger(RatioFilter.class);
|
||||
protected ObservationTable dataTable = null;
|
||||
protected String name = null;
|
||||
protected GenotypeFeatureData.Tail tail = null;
|
||||
|
|
@ -260,13 +260,6 @@ public abstract class RatioFilter implements VariantExclusionCriterion {
|
|||
public void compute(char ref, LocusContext context, rodVariants variant) {
|
||||
boolean exclude = false;
|
||||
|
||||
//
|
||||
// todo -- need to calculate a significance value for the chance that the
|
||||
// todo -- observed first and second counts are reliably not passing the threshold
|
||||
// todo -- basically, we need to sample with replacement from all first / second pairs
|
||||
// todo -- and only conclude that the variant fails the test if the probability of failing
|
||||
// todo -- across all samples is greater than some P value, like 0.05
|
||||
//
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
if ( applyToVariant(variant) ) {
|
||||
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
||||
|
|
|
|||
|
|
@ -1,17 +1,22 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
|
||||
public class VECAlleleBalance extends RatioFilter {
|
||||
final private static String statField = "AlleleRatio";
|
||||
public class VECAlleleBalance implements VariantExclusionCriterion { //extends RatioFilter {
|
||||
final private static GenotypeFeatureData.Tail tail = GenotypeFeatureData.Tail.TwoTailed;
|
||||
private boolean exclude;
|
||||
private double lowThreshold, highThreshold, ratio;
|
||||
|
||||
|
||||
public VECAlleleBalance() {
|
||||
super("AlleleBalance", statField, VECAlleleBalance.class, tail);
|
||||
public void initialize(String arguments) {
|
||||
if (arguments != null && !arguments.isEmpty()) {
|
||||
String[] argPieces = arguments.split(",");
|
||||
lowThreshold = Double.valueOf(argPieces[0]);
|
||||
highThreshold = Double.valueOf(argPieces[1]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -51,4 +56,25 @@ public class VECAlleleBalance extends RatioFilter {
|
|||
|
||||
return new Pair(major, minor);
|
||||
}
|
||||
|
||||
public void compute(char ref, LocusContext context, rodVariants variant) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
||||
ratio = (double)counts.first / (double)counts.second;
|
||||
exclude = ratio < lowThreshold || ratio > highThreshold;
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
|
||||
public boolean isExcludable() {
|
||||
return exclude;
|
||||
}
|
||||
|
||||
public String getStudyHeader() {
|
||||
return "AlleleBalance("+lowThreshold+","+highThreshold+")\tMajorMinorRatio";
|
||||
}
|
||||
|
||||
public String getStudyInfo() {
|
||||
return (exclude ? "fail" : "pass") + "\t" + ratio;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion {
|
|||
|
||||
public void compute(char ref, LocusContext context, rodVariants variant) {
|
||||
exclude = context.getReads().size() > maximum;
|
||||
|
||||
depth = context.getReads().size();
|
||||
}
|
||||
|
||||
|
|
@ -33,11 +32,11 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion {
|
|||
}
|
||||
|
||||
public String getStudyHeader() {
|
||||
return "#depth";
|
||||
return "DepthOfCoverage("+maximum+")\tdepth";
|
||||
}
|
||||
|
||||
public String getStudyInfo() {
|
||||
return Integer.toString(depth);
|
||||
return (exclude ? "fail" : "pass") + "\t" + depth;
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import java.util.List;
|
|||
|
||||
public class VECFisherStrand implements VariantExclusionCriterion {
|
||||
private double pvalueLimit = 0.00001;
|
||||
private double pValue;
|
||||
private boolean exclude;
|
||||
private ArrayList<Double> factorials = new ArrayList<Double>();
|
||||
|
||||
|
|
@ -37,11 +38,11 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
|||
}
|
||||
|
||||
public String getStudyHeader() {
|
||||
return "";
|
||||
return "FisherStrand("+pvalueLimit+")\tpvalue";
|
||||
}
|
||||
|
||||
public String getStudyInfo() {
|
||||
return "";
|
||||
return (exclude ? "fail" : "pass") + "\t" + pValue;
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
|
|
@ -56,7 +57,7 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
|||
double pCutoff = computePValue(table);
|
||||
//printTable(table, pCutoff);
|
||||
|
||||
double pValue = pCutoff;
|
||||
pValue = pCutoff;
|
||||
while (rotateTable(table)) {
|
||||
double pValuePiece = computePValue(table);
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.LocusContext;
|
|||
|
||||
public class VECLodThreshold implements VariantExclusionCriterion {
|
||||
private double lodThreshold = 5.0;
|
||||
|
||||
private double lod;
|
||||
private boolean exclude;
|
||||
|
||||
public void initialize(String arguments) {
|
||||
|
|
@ -15,25 +15,22 @@ public class VECLodThreshold implements VariantExclusionCriterion {
|
|||
}
|
||||
|
||||
public void compute(char ref, LocusContext context, rodVariants variant) {
|
||||
exclude = (variant.getLodBtr() < lodThreshold);
|
||||
lod = variant.getLodBtr();
|
||||
exclude = lod < lodThreshold;
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
|
||||
public boolean exclude(char ref, LocusContext context, rodVariants variant) {
|
||||
return (variant.getLodBtr() < lodThreshold);
|
||||
}
|
||||
|
||||
public boolean isExcludable() {
|
||||
return exclude;
|
||||
}
|
||||
|
||||
public String getStudyHeader() {
|
||||
return "";
|
||||
return "LodThreshold("+lod+")\tlod";
|
||||
}
|
||||
|
||||
public String getStudyInfo() {
|
||||
return "";
|
||||
return (exclude ? "fail" : "pass") + "\t" + lod;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,46 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class VECMappingQuality implements VariantExclusionCriterion {
|
||||
private double minQuality = 5.0;
|
||||
private double rms;
|
||||
private boolean exclude;
|
||||
|
||||
public void initialize(String arguments) {
|
||||
if (arguments != null && !arguments.isEmpty()) {
|
||||
minQuality = Double.valueOf(arguments);
|
||||
}
|
||||
}
|
||||
|
||||
public void compute(char ref, LocusContext context, rodVariants variant) {
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
|
||||
rms = 0.0;
|
||||
for (int readIndex = 0; readIndex < reads.size(); readIndex++) {
|
||||
int qual = reads.get(readIndex).getMappingQuality();
|
||||
rms += qual * qual;
|
||||
}
|
||||
rms /= reads.size();
|
||||
rms = Math.sqrt(rms);
|
||||
}
|
||||
|
||||
public boolean isExcludable() {
|
||||
return exclude;
|
||||
}
|
||||
|
||||
public String getStudyHeader() {
|
||||
return "MappingQuality("+minQuality+")\trms";
|
||||
}
|
||||
|
||||
public String getStudyInfo() {
|
||||
return (exclude ? "fail" : "pass") + "\t" + rms;
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() { return true; }
|
||||
}
|
||||
|
|
@ -4,12 +4,17 @@ import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
|||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
||||
public class VECOnOffGenotypeRatio extends RatioFilter {
|
||||
final private static String statField = "onOffRatio";
|
||||
public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // extends RatioFilter {
|
||||
final private static GenotypeFeatureData.Tail tail = GenotypeFeatureData.Tail.LeftTailed;
|
||||
private boolean exclude;
|
||||
private double lowThreshold, highThreshold, ratio;
|
||||
|
||||
public VECOnOffGenotypeRatio() {
|
||||
super("On/Off Genotype Ratio", statField, VECOnOffGenotypeRatio.class, tail);
|
||||
public void initialize(String arguments) {
|
||||
if (arguments != null && !arguments.isEmpty()) {
|
||||
String[] argPieces = arguments.split(",");
|
||||
lowThreshold = Double.valueOf(argPieces[0]);
|
||||
highThreshold = Double.valueOf(argPieces[1]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -38,7 +43,6 @@ public class VECOnOffGenotypeRatio extends RatioFilter {
|
|||
if ( genotype.length() > 2 )
|
||||
throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype));
|
||||
|
||||
|
||||
int on = 0, off = 0;
|
||||
|
||||
for ( char base : BaseUtils.BASES ) {
|
||||
|
|
@ -52,4 +56,25 @@ public class VECOnOffGenotypeRatio extends RatioFilter {
|
|||
|
||||
return new Pair<Integer, Integer>(on, off);
|
||||
}
|
||||
|
||||
public void compute(char ref, LocusContext context, rodVariants variant) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
||||
ratio = (double)counts.first / (double)counts.second;
|
||||
exclude = ratio < lowThreshold || ratio > highThreshold;
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
|
||||
public boolean isExcludable() {
|
||||
return exclude;
|
||||
}
|
||||
|
||||
public String getStudyHeader() {
|
||||
return "OnOffGenotype("+lowThreshold+","+highThreshold+")\tMajorMinorRatio";
|
||||
}
|
||||
|
||||
public String getStudyInfo() {
|
||||
return (exclude ? "fail" : "pass") + "\t" + ratio;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue