Added ability for all filters to return a probability for a given variant - interpreted as the probability that the given variant should be included in the final set. The joint probability of all the filters is computed to determine whether a variant should stay or go. At the moment, this is only visible in verbose mode (specify -V). Also removed 'learning mode'; now, filters emit important stats no matter what. Various code cleanups.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1458 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e5115409fa
commit
f12ea3a27e
|
|
@ -69,8 +69,9 @@ public class VECAlleleBalance implements VariantExclusionCriterion { //extends
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
public boolean useZeroQualityReads() { return false; }
|
||||||
|
|
||||||
public boolean isExcludable() {
|
public double inclusionProbability() {
|
||||||
return exclude;
|
// A hack for now until this filter is actually converted to an empirical filter
|
||||||
|
return exclude ? 0.0 : 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getStudyHeader() {
|
public String getStudyHeader() {
|
||||||
|
|
|
||||||
|
|
@ -27,10 +27,15 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion {
|
||||||
depth = context.getReads().size();
|
depth = context.getReads().size();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isExcludable() {
|
public double inclusionProbability() {
|
||||||
return exclude;
|
// A hack for now until this filter is actually converted to an empirical filter
|
||||||
|
return exclude ? 0.0 : 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// public boolean isExcludable() {
|
||||||
|
// return exclude;
|
||||||
|
// }
|
||||||
|
|
||||||
public String getStudyHeader() {
|
public String getStudyHeader() {
|
||||||
return "DepthOfCoverage("+maximum+")\tdepth";
|
return "DepthOfCoverage("+maximum+")\tdepth";
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -33,8 +33,9 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isExcludable() {
|
public double inclusionProbability() {
|
||||||
return exclude;
|
// A hack for now until this filter is actually converted to an empirical filter
|
||||||
|
return exclude ? 0.0 : 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getStudyHeader() {
|
public String getStudyHeader() {
|
||||||
|
|
|
||||||
|
|
@ -21,8 +21,9 @@ public class VECLodThreshold implements VariantExclusionCriterion {
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
public boolean useZeroQualityReads() { return false; }
|
||||||
|
|
||||||
public boolean isExcludable() {
|
public double inclusionProbability() {
|
||||||
return exclude;
|
// A hack for now until this filter is actually converted to an empirical filter
|
||||||
|
return exclude ? 0.0 : 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getStudyHeader() {
|
public String getStudyHeader() {
|
||||||
|
|
|
||||||
|
|
@ -28,8 +28,9 @@ public class VECMappingQuality implements VariantExclusionCriterion {
|
||||||
exclude = rms < minQuality;
|
exclude = rms < minQuality;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isExcludable() {
|
public double inclusionProbability() {
|
||||||
return exclude;
|
// A hack for now until this filter is actually converted to an empirical filter
|
||||||
|
return exclude ? 0.0 : 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getStudyHeader() {
|
public String getStudyHeader() {
|
||||||
|
|
|
||||||
|
|
@ -29,8 +29,9 @@ public class VECMappingQualityZero implements VariantExclusionCriterion {
|
||||||
exclude = mq0Count > maximum;
|
exclude = mq0Count > maximum;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isExcludable() {
|
public double inclusionProbability() {
|
||||||
return exclude;
|
// A hack for now until this filter is actually converted to an empirical filter
|
||||||
|
return exclude ? 0.0 : 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getStudyHeader() {
|
public String getStudyHeader() {
|
||||||
|
|
|
||||||
|
|
@ -10,8 +10,9 @@ public class VECNull implements VariantExclusionCriterion {
|
||||||
public void compute(char ref, AlignmentContext context, rodVariants variant) {
|
public void compute(char ref, AlignmentContext context, rodVariants variant) {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isExcludable() {
|
public double inclusionProbability() {
|
||||||
return false;
|
// A hack for now until this filter is actually converted to an empirical filter
|
||||||
|
return 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getStudyHeader() {
|
public String getStudyHeader() {
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,8 @@ public interface VariantExclusionCriterion {
|
||||||
|
|
||||||
public void compute(char ref, AlignmentContext context, rodVariants variant);
|
public void compute(char ref, AlignmentContext context, rodVariants variant);
|
||||||
|
|
||||||
public boolean isExcludable();
|
//public boolean isExcludable();
|
||||||
|
public double inclusionProbability();
|
||||||
|
|
||||||
public String getStudyHeader();
|
public String getStudyHeader();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,19 +24,16 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
@Argument(fullName="variants_out_head", shortName="VOH", doc="File to which modified variants should be written") public String VARIANTS_OUT_HEAD;
|
@Argument(fullName="variants_out_head", shortName="VOH", doc="File to which modified variants should be written") public String VARIANTS_OUT_HEAD;
|
||||||
@Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'", required=false) public String[] FEATURES;
|
@Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'", required=false) public String[] FEATURES;
|
||||||
@Argument(fullName="exclusion_criterion", shortName="X", doc="Exclusion test (optionally with arguments) to apply to variant call. Syntax: 'testname[:arguments]'", required=false) public String[] EXCLUSIONS;
|
@Argument(fullName="exclusion_criterion", shortName="X", doc="Exclusion test (optionally with arguments) to apply to variant call. Syntax: 'testname[:arguments]'", required=false) public String[] EXCLUSIONS;
|
||||||
|
@Argument(fullName="inclusion_threshold", shortName="IT", doc="The product of the probability to include variants based on these filters must be greater than the value specified here in order to be included", required=false) public Double INCLUSION_THRESHOLD = 0.9;
|
||||||
@Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false;
|
@Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false;
|
||||||
@Argument(fullName="list", shortName="ls", doc="List the available features and exclusion criteria and exit") public Boolean LIST = false;
|
@Argument(fullName="list", shortName="ls", doc="List the available features and exclusion criteria and exit") public Boolean LIST = false;
|
||||||
@Argument(fullName="learning_mode", shortName="LM", doc="Output parseable information on each filter that can then be fed back to the filter as a training set") public Boolean LEARNING = false;
|
|
||||||
@Argument(fullName="truth", shortName="truth", doc="Operate on truth set only") public Boolean TRUTH = false;
|
|
||||||
|
|
||||||
private List<Class<? extends IndependentVariantFeature>> featureClasses;
|
private List<Class<? extends IndependentVariantFeature>> featureClasses;
|
||||||
private List<Class<? extends VariantExclusionCriterion>> exclusionClasses;
|
private List<Class<? extends VariantExclusionCriterion>> exclusionClasses;
|
||||||
|
|
||||||
private PrintWriter vwriter;
|
private PrintWriter variantsWriter;
|
||||||
private HashMap<String, PrintWriter> ewriters;
|
private PrintWriter paramsWriter;
|
||||||
private HashMap<String, PrintWriter> swriters;
|
private HashMap<String, PrintWriter> exclusionWriters;
|
||||||
private final String STUDY_NAME = "study";
|
|
||||||
private final String knownSNPDBName = "dbSNP";
|
|
||||||
|
|
||||||
private ArrayList<IndependentVariantFeature> requestedFeatures;
|
private ArrayList<IndependentVariantFeature> requestedFeatures;
|
||||||
private ArrayList<VariantExclusionCriterion> requestedExclusions;
|
private ArrayList<VariantExclusionCriterion> requestedExclusions;
|
||||||
|
|
@ -55,16 +52,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
vwriter = new PrintWriter(VARIANTS_OUT_HEAD + ".included.geli.calls");
|
variantsWriter = new PrintWriter(VARIANTS_OUT_HEAD + ".included.geli.calls");
|
||||||
vwriter.println(GeliTextWriter.headerLine);
|
variantsWriter.println(GeliTextWriter.headerLine);
|
||||||
|
|
||||||
swriters = new HashMap<String, PrintWriter>();
|
paramsWriter = new PrintWriter(VARIANTS_OUT_HEAD + ".params.out");
|
||||||
|
paramsWriter.print("Chr\tPosition\t");
|
||||||
if (LEARNING) {
|
|
||||||
PrintWriter studyWriter = new PrintWriter(VARIANTS_OUT_HEAD + "." + STUDY_NAME);
|
|
||||||
swriters.put(STUDY_NAME, studyWriter);
|
|
||||||
studyWriter.print("Chr\tPosition\t");
|
|
||||||
}
|
|
||||||
|
|
||||||
requestedFeatures = new ArrayList<IndependentVariantFeature>();
|
requestedFeatures = new ArrayList<IndependentVariantFeature>();
|
||||||
requestedExclusions = new ArrayList<VariantExclusionCriterion>();
|
requestedExclusions = new ArrayList<VariantExclusionCriterion>();
|
||||||
|
|
@ -85,8 +77,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
ivf.initialize(requestedFeatureArgs);
|
ivf.initialize(requestedFeatureArgs);
|
||||||
requestedFeatures.add(ivf);
|
requestedFeatures.add(ivf);
|
||||||
|
|
||||||
if (LEARNING)
|
paramsWriter.print(ivf.getStudyHeader() + "\t");
|
||||||
swriters.get(STUDY_NAME).print(ivf.getStudyHeader() + "\t");
|
|
||||||
} catch (InstantiationException e) {
|
} catch (InstantiationException e) {
|
||||||
throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName()));
|
throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName()));
|
||||||
} catch (IllegalAccessException e) {
|
} catch (IllegalAccessException e) {
|
||||||
|
|
@ -98,7 +89,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize requested exclusion criteria
|
// Initialize requested exclusion criteria
|
||||||
ewriters = new HashMap<String, PrintWriter>();
|
exclusionWriters = new HashMap<String, PrintWriter>();
|
||||||
|
|
||||||
if (EXCLUSIONS != null) {
|
if (EXCLUSIONS != null) {
|
||||||
for (String requestedExclusionString : EXCLUSIONS) {
|
for (String requestedExclusionString : EXCLUSIONS) {
|
||||||
|
|
@ -115,13 +106,12 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
vec.initialize(requestedExclusionArgs);
|
vec.initialize(requestedExclusionArgs);
|
||||||
requestedExclusions.add(vec);
|
requestedExclusions.add(vec);
|
||||||
|
|
||||||
if (LEARNING)
|
paramsWriter.print(vec.getStudyHeader() + "\t");
|
||||||
swriters.get(STUDY_NAME).print(vec.getStudyHeader() + "\t");
|
|
||||||
|
|
||||||
PrintWriter writer = new PrintWriter(VARIANTS_OUT_HEAD + ".excluded." + exclusionClassName + ".geli.calls");
|
PrintWriter writer = new PrintWriter(VARIANTS_OUT_HEAD + ".excluded." + exclusionClassName + ".geli.calls");
|
||||||
writer.println(GeliTextWriter.headerLine);
|
writer.println(GeliTextWriter.headerLine);
|
||||||
|
|
||||||
ewriters.put(exclusionClassName, writer);
|
exclusionWriters.put(exclusionClassName, writer);
|
||||||
} catch (InstantiationException e) {
|
} catch (InstantiationException e) {
|
||||||
throw new StingException(String.format("Cannot instantiate exclusion class '%s': must be concrete class", exclusionClass.getSimpleName()));
|
throw new StingException(String.format("Cannot instantiate exclusion class '%s': must be concrete class", exclusionClass.getSimpleName()));
|
||||||
} catch (IllegalAccessException e) {
|
} catch (IllegalAccessException e) {
|
||||||
|
|
@ -132,8 +122,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (LEARNING)
|
paramsWriter.print("inDbSNP\tinHapMap\tisHet\n");
|
||||||
swriters.get(STUDY_NAME).print("inDbSNP\tinHapMap\tisHet\n");
|
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
throw new StingException(String.format("Could not open file(s) for writing"));
|
throw new StingException(String.format("Could not open file(s) for writing"));
|
||||||
}
|
}
|
||||||
|
|
@ -186,21 +175,13 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
rodVariants variant = (rodVariants) tracker.lookup("variant", null);
|
rodVariants variant = (rodVariants) tracker.lookup("variant", null);
|
||||||
|
|
||||||
rodGFF hapmapSite = null;
|
|
||||||
|
|
||||||
for ( ReferenceOrderedDatum datum : tracker.getAllRods() ) {
|
|
||||||
if ( datum != null && datum instanceof rodGFF ) {
|
|
||||||
hapmapSite = (rodGFF) datum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ignore places where we don't have a variant or where the reference base is ambiguous.
|
// Ignore places where we don't have a variant or where the reference base is ambiguous.
|
||||||
if (variant != null && (!TRUTH || hapmapSite != null) && BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1) {
|
if (variant != null && BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1) {
|
||||||
if (VERBOSE) { out.println("Original:\n " + variant); }
|
HashMap<String, Double> exclusionResults = new HashMap<String, Double>();
|
||||||
|
|
||||||
if (LEARNING) {
|
if (VERBOSE) { out.println("Original:\n" + variant); }
|
||||||
swriters.get(STUDY_NAME).print(context.getLocation().getContig() + "\t" + context.getLocation().getStart() + "\t");
|
|
||||||
}
|
paramsWriter.print(context.getLocation().getContig() + "\t" + context.getLocation().getStart() + "\t");
|
||||||
|
|
||||||
// Apply features that modify the likelihoods and LOD scores
|
// Apply features that modify the likelihoods and LOD scores
|
||||||
for ( IndependentVariantFeature ivf : requestedFeatures ) {
|
for ( IndependentVariantFeature ivf : requestedFeatures ) {
|
||||||
|
|
@ -212,59 +193,62 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
|
|
||||||
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
|
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
|
||||||
|
|
||||||
if (LEARNING) {
|
paramsWriter.print(ivf.getStudyInfo() + "\t");
|
||||||
swriters.get(STUDY_NAME).print(ivf.getStudyInfo() + "\t");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply exclusion tests that accept or reject the variant call
|
// We need to provide an alternative context without mapping quality 0 reads
|
||||||
ArrayList<String> exclusionResults = new ArrayList<String>();
|
|
||||||
|
|
||||||
// we need to provide an alternative context without mapping quality 0 reads
|
|
||||||
// for those exclusion criterion that don't want them
|
// for those exclusion criterion that don't want them
|
||||||
AlignmentContext Q0freeContext = removeQ0reads(context);
|
AlignmentContext Q0freeContext = removeQ0reads(context);
|
||||||
|
|
||||||
|
// Apply exclusion tests that score the variant call
|
||||||
|
if (VERBOSE) {
|
||||||
|
out.print("InclusionProbabilities:[");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the filters to score the variant
|
||||||
|
double jointInclusionProbability = 1.0;
|
||||||
for ( VariantExclusionCriterion vec : requestedExclusions ) {
|
for ( VariantExclusionCriterion vec : requestedExclusions ) {
|
||||||
vec.compute(ref.getBase(), (vec.useZeroQualityReads() ? context : Q0freeContext), variant);
|
vec.compute(ref.getBase(), (vec.useZeroQualityReads() ? context : Q0freeContext), variant);
|
||||||
|
|
||||||
String exclusionClassName = rationalizeClassName(vec.getClass());
|
String exclusionClassName = rationalizeClassName(vec.getClass());
|
||||||
|
|
||||||
if (vec.isExcludable()) {
|
Double inclusionProbability = vec.inclusionProbability();
|
||||||
exclusionResults.add(exclusionClassName);
|
jointInclusionProbability *= inclusionProbability;
|
||||||
}
|
exclusionResults.put(exclusionClassName, inclusionProbability);
|
||||||
|
|
||||||
if (LEARNING) {
|
if (inclusionProbability < INCLUSION_THRESHOLD) {
|
||||||
swriters.get(STUDY_NAME).print(vec.getStudyInfo() + "\t");
|
PrintWriter ewriter = exclusionWriters.get(exclusionClassName);
|
||||||
|
if (ewriter != null) {
|
||||||
|
ewriter.println(variant);
|
||||||
|
ewriter.flush();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (exclusionResults.size() > 0) {
|
if (VERBOSE) {
|
||||||
String exclusions = "";
|
out.print(exclusionClassName + "=" + inclusionProbability + ";");
|
||||||
for (int i = 0; i < exclusionResults.size(); i++) {
|
|
||||||
exclusions += exclusionResults.get(i) + (i == exclusionResults.size() - 1 ? "" : ",");
|
|
||||||
|
|
||||||
PrintWriter writer = ewriters.get(exclusionResults.get(i));
|
|
||||||
if (writer != null) {
|
|
||||||
writer.println(variant);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VERBOSE) { out.printf("Exclusions: %s\n", exclusions); }
|
paramsWriter.print(vec.getStudyInfo() + "\t");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decide whether we should keep the call or not
|
||||||
|
if (jointInclusionProbability >= INCLUSION_THRESHOLD) {
|
||||||
|
variantsWriter.println(variant);
|
||||||
|
|
||||||
|
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); }
|
||||||
} else {
|
} else {
|
||||||
vwriter.println(variant);
|
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:excluded\n"); }
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VERBOSE) { out.println(); }
|
rodDbSNP dbsnp = (rodDbSNP) tracker.lookup("dbSNP", null);
|
||||||
|
if ( dbsnp == null ) {
|
||||||
if (LEARNING) {
|
paramsWriter.print("false\tfalse\t");
|
||||||
rodDbSNP dbsnp = (rodDbSNP)tracker.lookup(knownSNPDBName, null);
|
} else {
|
||||||
if ( dbsnp == null )
|
paramsWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
|
||||||
swriters.get(STUDY_NAME).print("false\tfalse\t");
|
|
||||||
else
|
|
||||||
swriters.get(STUDY_NAME).print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
|
|
||||||
swriters.get(STUDY_NAME).println(GenotypeUtils.isHet(variant));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
paramsWriter.println(GenotypeUtils.isHet(variant));
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -312,14 +296,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
public void onTraversalDone(Integer result) {
|
public void onTraversalDone(Integer result) {
|
||||||
out.printf("Processed %d loci.\n", result);
|
out.printf("Processed %d loci.\n", result);
|
||||||
|
|
||||||
vwriter.close();
|
variantsWriter.close();
|
||||||
|
paramsWriter.close();
|
||||||
|
|
||||||
for (PrintWriter ewriter : ewriters.values()) {
|
for (PrintWriter ewriter : exclusionWriters.values()) {
|
||||||
ewriter.close();
|
ewriter.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
for (PrintWriter swriter : swriters.values()) {
|
|
||||||
swriter.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue