VariantFiltrationWalker can now apply specified exclusion tests after the feature tests. For a given variant, all reasons for exclusions are printed to screen.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1061 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2009-06-19 21:12:01 +00:00
parent 8ac40e8e2d
commit a0a3cf2f9f
4 changed files with 124 additions and 36 deletions

View File

@ -15,13 +15,14 @@ public interface IndependentVariantFeature {
* Method so that features can initialize themselves based on a short argument string.
* At the moment, each feature is responsible for interpreting their own argument string.
*
* @param arguments
* @param arguments the arguments!
*/
public void initialize(String arguments);
/**
* Method to compute the result of this feature for each of the ten genotypes. The return value must
* be a double array of length 10 (one for each genotype) and the value must be in log10-space.
*
* @param ref the reference base
* @param context the context for the given locus
* @return a ten-element array of log-likelihood result of the feature applied to each genotype

View File

@ -0,0 +1,18 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.gatk.LocusContext;
public class VECLodThreshold implements VariantExclusionCriterion {
private double lodThreshold = 5.0;
public void initialize(String arguments) {
if (arguments != null && !arguments.isEmpty()) {
lodThreshold = Double.valueOf(arguments);
}
}
public boolean exclude(char ref, LocusContext context, rodVariants variant) {
return (variant.getLodBtr() < lodThreshold);
}
}

View File

@ -0,0 +1,10 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
public interface VariantExclusionCriterion {
public void initialize(String arguments);
public boolean exclude(char ref, LocusContext context, rodVariants variant);
}

View File

@ -12,11 +12,14 @@ import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.PackageUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.playground.gatk.walkers.variants.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
/**
* VariantFiltrationWalker applies specified conditionally independent features to pre-called variants, thus modifying
@ -25,47 +28,25 @@ import java.util.ArrayList;
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=rodVariants.class))
public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
@Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'") public String[] FEATURES;
@Argument(fullName="exclusion_criterion", shortName="X", doc="Exclusion test (optionally with arguments) to apply to variant call. Syntax: 'testname[:arguments]'") public String[] EXCLUSIONS;
@Argument(fullName="variants_out", shortName="VO", doc="File to which modified variants should be written") public File VARIANTS_OUT;
@Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false;
@Argument(fullName="list_features", shortName="list", doc="List the available features and exit") public Boolean LIST_FEATURES = false;
@Argument(fullName="list", shortName="ls", doc="List the available features and exclusion criteria and exit") public Boolean LIST = false;
private ArrayList<Class> featureClasses;
private ArrayList<Class> exclusionClasses;
private PrintWriter vwriter;
/**
* Trim the 'IVF' off the feature name so the user needn't specify that on the command-line.
*
* @param featureClass the feature class whose name we should rationalize
* @return the class name, minus 'IVF'
*/
private String rationalizeFeatureClassName(Class featureClass) {
String featureClassName = featureClass.getSimpleName();
return featureClassName.replaceFirst("IVF", "");
}
/**
* Returns a comma-separated list of available features the user may specify at the command-line.
*
* @return String of available features
*/
private String getAvailableFeatureClasses() {
String featureString = "";
for (int featureClassIndex = 0; featureClassIndex < featureClasses.size(); featureClassIndex++) {
featureString += rationalizeFeatureClassName(featureClasses.get(featureClassIndex)) + (featureClassIndex == featureClasses.size() - 1 ? "" : ",");
}
return featureString;
}
/**
* Prepare the output file and the list of available features.
*/
public void initialize() {
featureClasses = PackageUtils.getClassesImplementingInterface(IndependentVariantFeature.class);
if (LIST_FEATURES) {
out.println("\nAvailable features: " + getAvailableFeatureClasses() + "\n");
exclusionClasses = PackageUtils.getClassesImplementingInterface(VariantExclusionCriterion.class);
if (LIST) {
out.println("\nAvailable features: " + getAvailableClasses(featureClasses) + "\n");
out.println("\nAvailable exclusion criteria: " + getAvailableClasses(exclusionClasses) + "\n");
System.exit(0);
}
@ -78,6 +59,35 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
}
}
/**
* Trim the 'IVF' or 'VEC' off the feature/exclusion name so the user needn't specify that on the command-line.
*
* @param featureClass the feature class whose name we should rationalize
* @return the class name, minus 'IVF'
*/
private String rationalizeClassName(Class featureClass) {
String featureClassName = featureClass.getSimpleName();
String newName = featureClassName.replaceFirst("IVF", "");
newName = newName.replaceFirst("VEC", "");
return newName;
}
/**
* Returns a comma-separated list of available classes the user may specify at the command-line.
*
* @param classes an ArrayList of classes
* @return String of available classes
*/
private String getAvailableClasses(ArrayList<Class> classes) {
String availableString = "";
for (int classIndex = 0; classIndex < classes.size(); classIndex++) {
availableString += rationalizeClassName(classes.get(classIndex)) + (classIndex == classes.size() - 1 ? "" : ",");
}
return availableString;
}
/**
* Initialize the number of loci processed to zero.
*
@ -100,6 +110,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
if (variant != null && BaseUtils.simpleBaseToBaseIndex(ref) != -1) {
if (VERBOSE) { out.println("Original:\n " + variant); }
// Apply features that modify the likelihoods and LOD scores
for (String requestedFeatureString : FEATURES) {
String[] requestedFeaturePieces = requestedFeatureString.split(":");
String requestedFeatureName = requestedFeaturePieces[0];
@ -107,7 +118,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
int notYetSeenFeature = 0;
for ( Class featureClass : featureClasses ) {
String featureClassName = rationalizeFeatureClassName(featureClass);
String featureClassName = rationalizeClassName(featureClass);
if (requestedFeatureName.equalsIgnoreCase(featureClassName)) {
try {
@ -128,13 +139,61 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
}
if (notYetSeenFeature == featureClasses.size()) {
throw new StingException(String.format("Unknown feature '%s'. Valid features are '%s'", requestedFeatureName, getAvailableFeatureClasses()));
throw new StingException(String.format("Unknown feature '%s'. Valid features are '%s'", requestedFeatureName, getAvailableClasses(featureClasses)));
}
if (VERBOSE) { System.out.println(); }
}
vwriter.println(variant);
// Apply exclusion tests that accept or reject the variant call
ArrayList<String> exclusionResults = new ArrayList<String>();
for (String requestedExclusionString : EXCLUSIONS) {
String[] requestedExclusionPieces = requestedExclusionString.split(":");
String requestedExclusionName = requestedExclusionPieces[0];
String requestedExclusionArgs = (requestedExclusionPieces.length == 2) ? requestedExclusionPieces[1] : "";
int notYetSeenExclusion = 0;
for ( Class exclusionClass : exclusionClasses ) {
String exclusionClassName = rationalizeClassName(exclusionClass);
if (requestedExclusionName.equalsIgnoreCase(exclusionClassName)) {
try {
VariantExclusionCriterion vec = (VariantExclusionCriterion) exclusionClass.newInstance();
vec.initialize(requestedExclusionArgs);
boolean excludeResult = vec.exclude(ref, context, variant);
if (excludeResult) {
exclusionResults.add(exclusionClassName);
}
} catch (InstantiationException e) {
throw new StingException(String.format("Cannot instantiate exclusion class '%s': must be concrete class", exclusionClass.getSimpleName()));
} catch (IllegalAccessException e) {
throw new StingException(String.format("Cannot instantiate exclusion class '%s': must have no-arg constructor", exclusionClass.getSimpleName()));
}
} else {
notYetSeenExclusion++;
}
}
if (notYetSeenExclusion == exclusionClasses.size()) {
throw new StingException(String.format("Unknown exclusion '%s'. Valid features are '%s'", requestedExclusionName, getAvailableClasses(exclusionClasses)));
}
}
if (exclusionResults.size() > 0) {
if (VERBOSE) {
String exclusions = "";
for (int i = 0; i < exclusionResults.size(); i++) {
exclusions += exclusionResults.get(i) + (i == exclusionResults.size() - 1 ? "" : ",");
}
out.printf("Exclusions: %s\n", exclusions);
}
} else {
vwriter.println(variant);
}
if (VERBOSE) { out.println(); }
return 1;
}