Added ability for all filters to return a probability for a given variant - interpreted as the probability that the given variant should be included in the final set. The joint probability of all the filters is computed to determine whether a variant should stay or go. At the moment, this is only visible in verbose mode (specify -V). Also removed 'learning mode'; now, filters emit important stats no matter what. Various code cleanups.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1458 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2009-08-25 21:17:56 +00:00
parent e5115409fa
commit f12ea3a27e
9 changed files with 86 additions and 93 deletions

View File

@ -69,8 +69,9 @@ public class VECAlleleBalance implements VariantExclusionCriterion { //extends
public boolean useZeroQualityReads() { return false; }
public boolean isExcludable() {
return exclude;
public double inclusionProbability() {
// A hack for now until this filter is actually converted to an empirical filter
return exclude ? 0.0 : 1.0;
}
public String getStudyHeader() {

View File

@ -27,10 +27,15 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion {
depth = context.getReads().size();
}
public boolean isExcludable() {
return exclude;
public double inclusionProbability() {
// A hack for now until this filter is actually converted to an empirical filter
return exclude ? 0.0 : 1.0;
}
// public boolean isExcludable() {
// return exclude;
// }
public String getStudyHeader() {
return "DepthOfCoverage("+maximum+")\tdepth";
}

View File

@ -33,8 +33,9 @@ public class VECFisherStrand implements VariantExclusionCriterion {
}
}
public boolean isExcludable() {
return exclude;
public double inclusionProbability() {
// A hack for now until this filter is actually converted to an empirical filter
return exclude ? 0.0 : 1.0;
}
public String getStudyHeader() {

View File

@ -21,8 +21,9 @@ public class VECLodThreshold implements VariantExclusionCriterion {
public boolean useZeroQualityReads() { return false; }
public boolean isExcludable() {
return exclude;
public double inclusionProbability() {
// A hack for now until this filter is actually converted to an empirical filter
return exclude ? 0.0 : 1.0;
}
public String getStudyHeader() {

View File

@ -28,8 +28,9 @@ public class VECMappingQuality implements VariantExclusionCriterion {
exclude = rms < minQuality;
}
public boolean isExcludable() {
return exclude;
public double inclusionProbability() {
// A hack for now until this filter is actually converted to an empirical filter
return exclude ? 0.0 : 1.0;
}
public String getStudyHeader() {

View File

@ -29,8 +29,9 @@ public class VECMappingQualityZero implements VariantExclusionCriterion {
exclude = mq0Count > maximum;
}
public boolean isExcludable() {
return exclude;
public double inclusionProbability() {
// A hack for now until this filter is actually converted to an empirical filter
return exclude ? 0.0 : 1.0;
}
public String getStudyHeader() {

View File

@ -10,8 +10,9 @@ public class VECNull implements VariantExclusionCriterion {
public void compute(char ref, AlignmentContext context, rodVariants variant) {
}
public boolean isExcludable() {
return false;
public double inclusionProbability() {
// A hack for now until this filter is actually converted to an empirical filter
return 1.0;
}
public String getStudyHeader() {

View File

@ -8,7 +8,8 @@ public interface VariantExclusionCriterion {
public void compute(char ref, AlignmentContext context, rodVariants variant);
public boolean isExcludable();
//public boolean isExcludable();
public double inclusionProbability();
public String getStudyHeader();

View File

@ -24,19 +24,16 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
@Argument(fullName="variants_out_head", shortName="VOH", doc="File to which modified variants should be written") public String VARIANTS_OUT_HEAD;
@Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'", required=false) public String[] FEATURES;
@Argument(fullName="exclusion_criterion", shortName="X", doc="Exclusion test (optionally with arguments) to apply to variant call. Syntax: 'testname[:arguments]'", required=false) public String[] EXCLUSIONS;
@Argument(fullName="inclusion_threshold", shortName="IT", doc="The product of the probability to include variants based on these filters must be greater than the value specified here in order to be included", required=false) public Double INCLUSION_THRESHOLD = 0.9;
@Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false;
@Argument(fullName="list", shortName="ls", doc="List the available features and exclusion criteria and exit") public Boolean LIST = false;
@Argument(fullName="learning_mode", shortName="LM", doc="Output parseable information on each filter that can then be fed back to the filter as a training set") public Boolean LEARNING = false;
@Argument(fullName="truth", shortName="truth", doc="Operate on truth set only") public Boolean TRUTH = false;
private List<Class<? extends IndependentVariantFeature>> featureClasses;
private List<Class<? extends VariantExclusionCriterion>> exclusionClasses;
private PrintWriter vwriter;
private HashMap<String, PrintWriter> ewriters;
private HashMap<String, PrintWriter> swriters;
private final String STUDY_NAME = "study";
private final String knownSNPDBName = "dbSNP";
private PrintWriter variantsWriter;
private PrintWriter paramsWriter;
private HashMap<String, PrintWriter> exclusionWriters;
private ArrayList<IndependentVariantFeature> requestedFeatures;
private ArrayList<VariantExclusionCriterion> requestedExclusions;
@ -55,16 +52,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
}
try {
vwriter = new PrintWriter(VARIANTS_OUT_HEAD + ".included.geli.calls");
vwriter.println(GeliTextWriter.headerLine);
variantsWriter = new PrintWriter(VARIANTS_OUT_HEAD + ".included.geli.calls");
variantsWriter.println(GeliTextWriter.headerLine);
swriters = new HashMap<String, PrintWriter>();
if (LEARNING) {
PrintWriter studyWriter = new PrintWriter(VARIANTS_OUT_HEAD + "." + STUDY_NAME);
swriters.put(STUDY_NAME, studyWriter);
studyWriter.print("Chr\tPosition\t");
}
paramsWriter = new PrintWriter(VARIANTS_OUT_HEAD + ".params.out");
paramsWriter.print("Chr\tPosition\t");
requestedFeatures = new ArrayList<IndependentVariantFeature>();
requestedExclusions = new ArrayList<VariantExclusionCriterion>();
@ -85,8 +77,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
ivf.initialize(requestedFeatureArgs);
requestedFeatures.add(ivf);
if (LEARNING)
swriters.get(STUDY_NAME).print(ivf.getStudyHeader() + "\t");
paramsWriter.print(ivf.getStudyHeader() + "\t");
} catch (InstantiationException e) {
throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName()));
} catch (IllegalAccessException e) {
@ -98,7 +89,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
}
// Initialize requested exclusion criteria
ewriters = new HashMap<String, PrintWriter>();
exclusionWriters = new HashMap<String, PrintWriter>();
if (EXCLUSIONS != null) {
for (String requestedExclusionString : EXCLUSIONS) {
@ -115,13 +106,12 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
vec.initialize(requestedExclusionArgs);
requestedExclusions.add(vec);
if (LEARNING)
swriters.get(STUDY_NAME).print(vec.getStudyHeader() + "\t");
paramsWriter.print(vec.getStudyHeader() + "\t");
PrintWriter writer = new PrintWriter(VARIANTS_OUT_HEAD + ".excluded." + exclusionClassName + ".geli.calls");
writer.println(GeliTextWriter.headerLine);
ewriters.put(exclusionClassName, writer);
exclusionWriters.put(exclusionClassName, writer);
} catch (InstantiationException e) {
throw new StingException(String.format("Cannot instantiate exclusion class '%s': must be concrete class", exclusionClass.getSimpleName()));
} catch (IllegalAccessException e) {
@ -132,8 +122,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
}
}
if (LEARNING)
swriters.get(STUDY_NAME).print("inDbSNP\tinHapMap\tisHet\n");
paramsWriter.print("inDbSNP\tinHapMap\tisHet\n");
} catch (FileNotFoundException e) {
throw new StingException(String.format("Could not open file(s) for writing"));
}
@ -186,21 +175,13 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
rodVariants variant = (rodVariants) tracker.lookup("variant", null);
rodGFF hapmapSite = null;
for ( ReferenceOrderedDatum datum : tracker.getAllRods() ) {
if ( datum != null && datum instanceof rodGFF ) {
hapmapSite = (rodGFF) datum;
}
}
// Ignore places where we don't have a variant or where the reference base is ambiguous.
if (variant != null && (!TRUTH || hapmapSite != null) && BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1) {
if (VERBOSE) { out.println("Original:\n " + variant); }
if (variant != null && BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1) {
HashMap<String, Double> exclusionResults = new HashMap<String, Double>();
if (LEARNING) {
swriters.get(STUDY_NAME).print(context.getLocation().getContig() + "\t" + context.getLocation().getStart() + "\t");
}
if (VERBOSE) { out.println("Original:\n" + variant); }
paramsWriter.print(context.getLocation().getContig() + "\t" + context.getLocation().getStart() + "\t");
// Apply features that modify the likelihoods and LOD scores
for ( IndependentVariantFeature ivf : requestedFeatures ) {
@ -212,59 +193,62 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
if (LEARNING) {
swriters.get(STUDY_NAME).print(ivf.getStudyInfo() + "\t");
}
paramsWriter.print(ivf.getStudyInfo() + "\t");
}
// Apply exclusion tests that accept or reject the variant call
ArrayList<String> exclusionResults = new ArrayList<String>();
// we need to provide an alternative context without mapping quality 0 reads
// We need to provide an alternative context without mapping quality 0 reads
// for those exclusion criterion that don't want them
AlignmentContext Q0freeContext = removeQ0reads(context);
// Apply exclusion tests that score the variant call
if (VERBOSE) {
out.print("InclusionProbabilities:[");
}
// Use the filters to score the variant
double jointInclusionProbability = 1.0;
for ( VariantExclusionCriterion vec : requestedExclusions ) {
vec.compute(ref.getBase(), (vec.useZeroQualityReads() ? context : Q0freeContext), variant);
String exclusionClassName = rationalizeClassName(vec.getClass());
if (vec.isExcludable()) {
exclusionResults.add(exclusionClassName);
}
Double inclusionProbability = vec.inclusionProbability();
jointInclusionProbability *= inclusionProbability;
exclusionResults.put(exclusionClassName, inclusionProbability);
if (LEARNING) {
swriters.get(STUDY_NAME).print(vec.getStudyInfo() + "\t");
}
}
if (exclusionResults.size() > 0) {
String exclusions = "";
for (int i = 0; i < exclusionResults.size(); i++) {
exclusions += exclusionResults.get(i) + (i == exclusionResults.size() - 1 ? "" : ",");
PrintWriter writer = ewriters.get(exclusionResults.get(i));
if (writer != null) {
writer.println(variant);
if (inclusionProbability < INCLUSION_THRESHOLD) {
PrintWriter ewriter = exclusionWriters.get(exclusionClassName);
if (ewriter != null) {
ewriter.println(variant);
ewriter.flush();
}
}
if (VERBOSE) { out.printf("Exclusions: %s\n", exclusions); }
if (VERBOSE) {
out.print(exclusionClassName + "=" + inclusionProbability + ";");
}
paramsWriter.print(vec.getStudyInfo() + "\t");
}
// Decide whether we should keep the call or not
if (jointInclusionProbability >= INCLUSION_THRESHOLD) {
variantsWriter.println(variant);
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); }
} else {
vwriter.println(variant);
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:excluded\n"); }
}
if (VERBOSE) { out.println(); }
if (LEARNING) {
rodDbSNP dbsnp = (rodDbSNP)tracker.lookup(knownSNPDBName, null);
if ( dbsnp == null )
swriters.get(STUDY_NAME).print("false\tfalse\t");
else
swriters.get(STUDY_NAME).print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
swriters.get(STUDY_NAME).println(GenotypeUtils.isHet(variant));
rodDbSNP dbsnp = (rodDbSNP) tracker.lookup("dbSNP", null);
if ( dbsnp == null ) {
paramsWriter.print("false\tfalse\t");
} else {
paramsWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
}
paramsWriter.println(GenotypeUtils.isHet(variant));
return 1;
}
@ -312,14 +296,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
public void onTraversalDone(Integer result) {
out.printf("Processed %d loci.\n", result);
vwriter.close();
variantsWriter.close();
paramsWriter.close();
for (PrintWriter ewriter : ewriters.values()) {
for (PrintWriter ewriter : exclusionWriters.values()) {
ewriter.close();
}
for (PrintWriter swriter : swriters.values()) {
swriter.close();
}
}
}