Added ability for all filters to return a probability for a given variant - interpreted as the probability that the given variant should be included in the final set. The joint probability of all the filters is computed to determine whether a variant should stay or go. At the moment, this is only visible in verbose mode (specify -V). Also removed 'learning mode'; now, filters emit important stats no matter what. Various code cleanups.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1458 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2009-08-25 21:17:56 +00:00
parent e5115409fa
commit f12ea3a27e
9 changed files with 86 additions and 93 deletions

View File

@ -69,8 +69,9 @@ public class VECAlleleBalance implements VariantExclusionCriterion { //extends
public boolean useZeroQualityReads() { return false; } public boolean useZeroQualityReads() { return false; }
public boolean isExcludable() { public double inclusionProbability() {
return exclude; // A hack for now until this filter is actually converted to an empirical filter
return exclude ? 0.0 : 1.0;
} }
public String getStudyHeader() { public String getStudyHeader() {

View File

@ -27,10 +27,15 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion {
depth = context.getReads().size(); depth = context.getReads().size();
} }
public boolean isExcludable() { public double inclusionProbability() {
return exclude; // A hack for now until this filter is actually converted to an empirical filter
return exclude ? 0.0 : 1.0;
} }
// public boolean isExcludable() {
// return exclude;
// }
public String getStudyHeader() { public String getStudyHeader() {
return "DepthOfCoverage("+maximum+")\tdepth"; return "DepthOfCoverage("+maximum+")\tdepth";
} }

View File

@ -33,8 +33,9 @@ public class VECFisherStrand implements VariantExclusionCriterion {
} }
} }
public boolean isExcludable() { public double inclusionProbability() {
return exclude; // A hack for now until this filter is actually converted to an empirical filter
return exclude ? 0.0 : 1.0;
} }
public String getStudyHeader() { public String getStudyHeader() {

View File

@ -21,8 +21,9 @@ public class VECLodThreshold implements VariantExclusionCriterion {
public boolean useZeroQualityReads() { return false; } public boolean useZeroQualityReads() { return false; }
public boolean isExcludable() { public double inclusionProbability() {
return exclude; // A hack for now until this filter is actually converted to an empirical filter
return exclude ? 0.0 : 1.0;
} }
public String getStudyHeader() { public String getStudyHeader() {

View File

@ -28,8 +28,9 @@ public class VECMappingQuality implements VariantExclusionCriterion {
exclude = rms < minQuality; exclude = rms < minQuality;
} }
public boolean isExcludable() { public double inclusionProbability() {
return exclude; // A hack for now until this filter is actually converted to an empirical filter
return exclude ? 0.0 : 1.0;
} }
public String getStudyHeader() { public String getStudyHeader() {

View File

@ -29,8 +29,9 @@ public class VECMappingQualityZero implements VariantExclusionCriterion {
exclude = mq0Count > maximum; exclude = mq0Count > maximum;
} }
public boolean isExcludable() { public double inclusionProbability() {
return exclude; // A hack for now until this filter is actually converted to an empirical filter
return exclude ? 0.0 : 1.0;
} }
public String getStudyHeader() { public String getStudyHeader() {

View File

@ -10,8 +10,9 @@ public class VECNull implements VariantExclusionCriterion {
public void compute(char ref, AlignmentContext context, rodVariants variant) { public void compute(char ref, AlignmentContext context, rodVariants variant) {
} }
public boolean isExcludable() { public double inclusionProbability() {
return false; // A hack for now until this filter is actually converted to an empirical filter
return 1.0;
} }
public String getStudyHeader() { public String getStudyHeader() {

View File

@ -8,7 +8,8 @@ public interface VariantExclusionCriterion {
public void compute(char ref, AlignmentContext context, rodVariants variant); public void compute(char ref, AlignmentContext context, rodVariants variant);
public boolean isExcludable(); //public boolean isExcludable();
public double inclusionProbability();
public String getStudyHeader(); public String getStudyHeader();

View File

@ -24,19 +24,16 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
@Argument(fullName="variants_out_head", shortName="VOH", doc="File to which modified variants should be written") public String VARIANTS_OUT_HEAD; @Argument(fullName="variants_out_head", shortName="VOH", doc="File to which modified variants should be written") public String VARIANTS_OUT_HEAD;
@Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'", required=false) public String[] FEATURES; @Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'", required=false) public String[] FEATURES;
@Argument(fullName="exclusion_criterion", shortName="X", doc="Exclusion test (optionally with arguments) to apply to variant call. Syntax: 'testname[:arguments]'", required=false) public String[] EXCLUSIONS; @Argument(fullName="exclusion_criterion", shortName="X", doc="Exclusion test (optionally with arguments) to apply to variant call. Syntax: 'testname[:arguments]'", required=false) public String[] EXCLUSIONS;
@Argument(fullName="inclusion_threshold", shortName="IT", doc="The product of the probability to include variants based on these filters must be greater than the value specified here in order to be included", required=false) public Double INCLUSION_THRESHOLD = 0.9;
@Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false; @Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false;
@Argument(fullName="list", shortName="ls", doc="List the available features and exclusion criteria and exit") public Boolean LIST = false; @Argument(fullName="list", shortName="ls", doc="List the available features and exclusion criteria and exit") public Boolean LIST = false;
@Argument(fullName="learning_mode", shortName="LM", doc="Output parseable information on each filter that can then be fed back to the filter as a training set") public Boolean LEARNING = false;
@Argument(fullName="truth", shortName="truth", doc="Operate on truth set only") public Boolean TRUTH = false;
private List<Class<? extends IndependentVariantFeature>> featureClasses; private List<Class<? extends IndependentVariantFeature>> featureClasses;
private List<Class<? extends VariantExclusionCriterion>> exclusionClasses; private List<Class<? extends VariantExclusionCriterion>> exclusionClasses;
private PrintWriter vwriter; private PrintWriter variantsWriter;
private HashMap<String, PrintWriter> ewriters; private PrintWriter paramsWriter;
private HashMap<String, PrintWriter> swriters; private HashMap<String, PrintWriter> exclusionWriters;
private final String STUDY_NAME = "study";
private final String knownSNPDBName = "dbSNP";
private ArrayList<IndependentVariantFeature> requestedFeatures; private ArrayList<IndependentVariantFeature> requestedFeatures;
private ArrayList<VariantExclusionCriterion> requestedExclusions; private ArrayList<VariantExclusionCriterion> requestedExclusions;
@ -55,16 +52,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
} }
try { try {
vwriter = new PrintWriter(VARIANTS_OUT_HEAD + ".included.geli.calls"); variantsWriter = new PrintWriter(VARIANTS_OUT_HEAD + ".included.geli.calls");
vwriter.println(GeliTextWriter.headerLine); variantsWriter.println(GeliTextWriter.headerLine);
swriters = new HashMap<String, PrintWriter>(); paramsWriter = new PrintWriter(VARIANTS_OUT_HEAD + ".params.out");
paramsWriter.print("Chr\tPosition\t");
if (LEARNING) {
PrintWriter studyWriter = new PrintWriter(VARIANTS_OUT_HEAD + "." + STUDY_NAME);
swriters.put(STUDY_NAME, studyWriter);
studyWriter.print("Chr\tPosition\t");
}
requestedFeatures = new ArrayList<IndependentVariantFeature>(); requestedFeatures = new ArrayList<IndependentVariantFeature>();
requestedExclusions = new ArrayList<VariantExclusionCriterion>(); requestedExclusions = new ArrayList<VariantExclusionCriterion>();
@ -85,8 +77,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
ivf.initialize(requestedFeatureArgs); ivf.initialize(requestedFeatureArgs);
requestedFeatures.add(ivf); requestedFeatures.add(ivf);
if (LEARNING) paramsWriter.print(ivf.getStudyHeader() + "\t");
swriters.get(STUDY_NAME).print(ivf.getStudyHeader() + "\t");
} catch (InstantiationException e) { } catch (InstantiationException e) {
throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName())); throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName()));
} catch (IllegalAccessException e) { } catch (IllegalAccessException e) {
@ -98,7 +89,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
} }
// Initialize requested exclusion criteria // Initialize requested exclusion criteria
ewriters = new HashMap<String, PrintWriter>(); exclusionWriters = new HashMap<String, PrintWriter>();
if (EXCLUSIONS != null) { if (EXCLUSIONS != null) {
for (String requestedExclusionString : EXCLUSIONS) { for (String requestedExclusionString : EXCLUSIONS) {
@ -115,13 +106,12 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
vec.initialize(requestedExclusionArgs); vec.initialize(requestedExclusionArgs);
requestedExclusions.add(vec); requestedExclusions.add(vec);
if (LEARNING) paramsWriter.print(vec.getStudyHeader() + "\t");
swriters.get(STUDY_NAME).print(vec.getStudyHeader() + "\t");
PrintWriter writer = new PrintWriter(VARIANTS_OUT_HEAD + ".excluded." + exclusionClassName + ".geli.calls"); PrintWriter writer = new PrintWriter(VARIANTS_OUT_HEAD + ".excluded." + exclusionClassName + ".geli.calls");
writer.println(GeliTextWriter.headerLine); writer.println(GeliTextWriter.headerLine);
ewriters.put(exclusionClassName, writer); exclusionWriters.put(exclusionClassName, writer);
} catch (InstantiationException e) { } catch (InstantiationException e) {
throw new StingException(String.format("Cannot instantiate exclusion class '%s': must be concrete class", exclusionClass.getSimpleName())); throw new StingException(String.format("Cannot instantiate exclusion class '%s': must be concrete class", exclusionClass.getSimpleName()));
} catch (IllegalAccessException e) { } catch (IllegalAccessException e) {
@ -132,8 +122,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
} }
} }
if (LEARNING) paramsWriter.print("inDbSNP\tinHapMap\tisHet\n");
swriters.get(STUDY_NAME).print("inDbSNP\tinHapMap\tisHet\n");
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
throw new StingException(String.format("Could not open file(s) for writing")); throw new StingException(String.format("Could not open file(s) for writing"));
} }
@ -186,21 +175,13 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
rodVariants variant = (rodVariants) tracker.lookup("variant", null); rodVariants variant = (rodVariants) tracker.lookup("variant", null);
rodGFF hapmapSite = null;
for ( ReferenceOrderedDatum datum : tracker.getAllRods() ) {
if ( datum != null && datum instanceof rodGFF ) {
hapmapSite = (rodGFF) datum;
}
}
// Ignore places where we don't have a variant or where the reference base is ambiguous. // Ignore places where we don't have a variant or where the reference base is ambiguous.
if (variant != null && (!TRUTH || hapmapSite != null) && BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1) { if (variant != null && BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1) {
if (VERBOSE) { out.println("Original:\n " + variant); } HashMap<String, Double> exclusionResults = new HashMap<String, Double>();
if (LEARNING) { if (VERBOSE) { out.println("Original:\n" + variant); }
swriters.get(STUDY_NAME).print(context.getLocation().getContig() + "\t" + context.getLocation().getStart() + "\t");
} paramsWriter.print(context.getLocation().getContig() + "\t" + context.getLocation().getStart() + "\t");
// Apply features that modify the likelihoods and LOD scores // Apply features that modify the likelihoods and LOD scores
for ( IndependentVariantFeature ivf : requestedFeatures ) { for ( IndependentVariantFeature ivf : requestedFeatures ) {
@ -212,59 +193,62 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); } if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
if (LEARNING) { paramsWriter.print(ivf.getStudyInfo() + "\t");
swriters.get(STUDY_NAME).print(ivf.getStudyInfo() + "\t");
}
} }
// Apply exclusion tests that accept or reject the variant call // We need to provide an alternative context without mapping quality 0 reads
ArrayList<String> exclusionResults = new ArrayList<String>();
// we need to provide an alternative context without mapping quality 0 reads
// for those exclusion criterion that don't want them // for those exclusion criterion that don't want them
AlignmentContext Q0freeContext = removeQ0reads(context); AlignmentContext Q0freeContext = removeQ0reads(context);
// Apply exclusion tests that score the variant call
if (VERBOSE) {
out.print("InclusionProbabilities:[");
}
// Use the filters to score the variant
double jointInclusionProbability = 1.0;
for ( VariantExclusionCriterion vec : requestedExclusions ) { for ( VariantExclusionCriterion vec : requestedExclusions ) {
vec.compute(ref.getBase(), (vec.useZeroQualityReads() ? context : Q0freeContext), variant); vec.compute(ref.getBase(), (vec.useZeroQualityReads() ? context : Q0freeContext), variant);
String exclusionClassName = rationalizeClassName(vec.getClass()); String exclusionClassName = rationalizeClassName(vec.getClass());
if (vec.isExcludable()) { Double inclusionProbability = vec.inclusionProbability();
exclusionResults.add(exclusionClassName); jointInclusionProbability *= inclusionProbability;
} exclusionResults.put(exclusionClassName, inclusionProbability);
if (LEARNING) { if (inclusionProbability < INCLUSION_THRESHOLD) {
swriters.get(STUDY_NAME).print(vec.getStudyInfo() + "\t"); PrintWriter ewriter = exclusionWriters.get(exclusionClassName);
if (ewriter != null) {
ewriter.println(variant);
ewriter.flush();
} }
} }
if (exclusionResults.size() > 0) { if (VERBOSE) {
String exclusions = ""; out.print(exclusionClassName + "=" + inclusionProbability + ";");
for (int i = 0; i < exclusionResults.size(); i++) {
exclusions += exclusionResults.get(i) + (i == exclusionResults.size() - 1 ? "" : ",");
PrintWriter writer = ewriters.get(exclusionResults.get(i));
if (writer != null) {
writer.println(variant);
}
} }
if (VERBOSE) { out.printf("Exclusions: %s\n", exclusions); } paramsWriter.print(vec.getStudyInfo() + "\t");
}
// Decide whether we should keep the call or not
if (jointInclusionProbability >= INCLUSION_THRESHOLD) {
variantsWriter.println(variant);
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); }
} else { } else {
vwriter.println(variant); if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:excluded\n"); }
} }
if (VERBOSE) { out.println(); } rodDbSNP dbsnp = (rodDbSNP) tracker.lookup("dbSNP", null);
if ( dbsnp == null ) {
if (LEARNING) { paramsWriter.print("false\tfalse\t");
rodDbSNP dbsnp = (rodDbSNP)tracker.lookup(knownSNPDBName, null); } else {
if ( dbsnp == null ) paramsWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
swriters.get(STUDY_NAME).print("false\tfalse\t");
else
swriters.get(STUDY_NAME).print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
swriters.get(STUDY_NAME).println(GenotypeUtils.isHet(variant));
} }
paramsWriter.println(GenotypeUtils.isHet(variant));
return 1; return 1;
} }
@ -312,14 +296,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
public void onTraversalDone(Integer result) { public void onTraversalDone(Integer result) {
out.printf("Processed %d loci.\n", result); out.printf("Processed %d loci.\n", result);
vwriter.close(); variantsWriter.close();
paramsWriter.close();
for (PrintWriter ewriter : ewriters.values()) { for (PrintWriter ewriter : exclusionWriters.values()) {
ewriter.close(); ewriter.close();
} }
for (PrintWriter swriter : swriters.values()) {
swriter.close();
}
} }
} }