VariantFiltrationWalker can now apply specified exclusion tests after the feature tests. For a given variant, all reasons for exclusions are printed to screen.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1061 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8ac40e8e2d
commit
a0a3cf2f9f
|
|
@ -15,13 +15,14 @@ public interface IndependentVariantFeature {
|
||||||
* Method so that features can initialize themselves based on a short argument string.
|
* Method so that features can initialize themselves based on a short argument string.
|
||||||
* At the moment, each feature is responsible for interpreting their own argument string.
|
* At the moment, each feature is responsible for interpreting their own argument string.
|
||||||
*
|
*
|
||||||
* @param arguments
|
* @param arguments the arguments!
|
||||||
*/
|
*/
|
||||||
public void initialize(String arguments);
|
public void initialize(String arguments);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Method to compute the result of this feature for each of the ten genotypes. The return value must
|
* Method to compute the result of this feature for each of the ten genotypes. The return value must
|
||||||
* be a double array of length 10 (one for each genotype) and the value must be in log10-space.
|
* be a double array of length 10 (one for each genotype) and the value must be in log10-space.
|
||||||
|
*
|
||||||
* @param ref the reference base
|
* @param ref the reference base
|
||||||
* @param context the context for the given locus
|
* @param context the context for the given locus
|
||||||
* @return a ten-element array of log-likelihood result of the feature applied to each genotype
|
* @return a ten-element array of log-likelihood result of the feature applied to each genotype
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,18 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||||
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
|
||||||
|
public class VECLodThreshold implements VariantExclusionCriterion {
|
||||||
|
private double lodThreshold = 5.0;
|
||||||
|
|
||||||
|
public void initialize(String arguments) {
|
||||||
|
if (arguments != null && !arguments.isEmpty()) {
|
||||||
|
lodThreshold = Double.valueOf(arguments);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean exclude(char ref, LocusContext context, rodVariants variant) {
|
||||||
|
return (variant.getLodBtr() < lodThreshold);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||||
|
|
||||||
|
public interface VariantExclusionCriterion {
|
||||||
|
public void initialize(String arguments);
|
||||||
|
|
||||||
|
public boolean exclude(char ref, LocusContext context, rodVariants variant);
|
||||||
|
}
|
||||||
|
|
@ -12,11 +12,14 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.PackageUtils;
|
import org.broadinstitute.sting.utils.PackageUtils;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.variants.*;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.PrintWriter;
|
import java.io.PrintWriter;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* VariantFiltrationWalker applies specified conditionally independent features to pre-called variants, thus modifying
|
* VariantFiltrationWalker applies specified conditionally independent features to pre-called variants, thus modifying
|
||||||
|
|
@ -25,47 +28,25 @@ import java.util.ArrayList;
|
||||||
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=rodVariants.class))
|
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=rodVariants.class))
|
||||||
public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
@Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'") public String[] FEATURES;
|
@Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'") public String[] FEATURES;
|
||||||
|
@Argument(fullName="exclusion_criterion", shortName="X", doc="Exclusion test (optionally with arguments) to apply to variant call. Syntax: 'testname[:arguments]'") public String[] EXCLUSIONS;
|
||||||
@Argument(fullName="variants_out", shortName="VO", doc="File to which modified variants should be written") public File VARIANTS_OUT;
|
@Argument(fullName="variants_out", shortName="VO", doc="File to which modified variants should be written") public File VARIANTS_OUT;
|
||||||
@Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false;
|
@Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false;
|
||||||
@Argument(fullName="list_features", shortName="list", doc="List the available features and exit") public Boolean LIST_FEATURES = false;
|
@Argument(fullName="list", shortName="ls", doc="List the available features and exclusion criteria and exit") public Boolean LIST = false;
|
||||||
|
|
||||||
private ArrayList<Class> featureClasses;
|
private ArrayList<Class> featureClasses;
|
||||||
|
private ArrayList<Class> exclusionClasses;
|
||||||
private PrintWriter vwriter;
|
private PrintWriter vwriter;
|
||||||
|
|
||||||
/**
|
|
||||||
* Trim the 'IVF' off the feature name so the user needn't specify that on the command-line.
|
|
||||||
*
|
|
||||||
* @param featureClass the feature class whose name we should rationalize
|
|
||||||
* @return the class name, minus 'IVF'
|
|
||||||
*/
|
|
||||||
private String rationalizeFeatureClassName(Class featureClass) {
|
|
||||||
String featureClassName = featureClass.getSimpleName();
|
|
||||||
return featureClassName.replaceFirst("IVF", "");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a comma-separated list of available features the user may specify at the command-line.
|
|
||||||
*
|
|
||||||
* @return String of available features
|
|
||||||
*/
|
|
||||||
private String getAvailableFeatureClasses() {
|
|
||||||
String featureString = "";
|
|
||||||
|
|
||||||
for (int featureClassIndex = 0; featureClassIndex < featureClasses.size(); featureClassIndex++) {
|
|
||||||
featureString += rationalizeFeatureClassName(featureClasses.get(featureClassIndex)) + (featureClassIndex == featureClasses.size() - 1 ? "" : ",");
|
|
||||||
}
|
|
||||||
|
|
||||||
return featureString;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prepare the output file and the list of available features.
|
* Prepare the output file and the list of available features.
|
||||||
*/
|
*/
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
featureClasses = PackageUtils.getClassesImplementingInterface(IndependentVariantFeature.class);
|
featureClasses = PackageUtils.getClassesImplementingInterface(IndependentVariantFeature.class);
|
||||||
|
exclusionClasses = PackageUtils.getClassesImplementingInterface(VariantExclusionCriterion.class);
|
||||||
if (LIST_FEATURES) {
|
|
||||||
out.println("\nAvailable features: " + getAvailableFeatureClasses() + "\n");
|
if (LIST) {
|
||||||
|
out.println("\nAvailable features: " + getAvailableClasses(featureClasses) + "\n");
|
||||||
|
out.println("\nAvailable exclusion criteria: " + getAvailableClasses(exclusionClasses) + "\n");
|
||||||
System.exit(0);
|
System.exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -78,6 +59,35 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Trim the 'IVF' or 'VEC' off the feature/exclusion name so the user needn't specify that on the command-line.
|
||||||
|
*
|
||||||
|
* @param featureClass the feature class whose name we should rationalize
|
||||||
|
* @return the class name, minus 'IVF'
|
||||||
|
*/
|
||||||
|
private String rationalizeClassName(Class featureClass) {
|
||||||
|
String featureClassName = featureClass.getSimpleName();
|
||||||
|
String newName = featureClassName.replaceFirst("IVF", "");
|
||||||
|
newName = newName.replaceFirst("VEC", "");
|
||||||
|
return newName;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a comma-separated list of available classes the user may specify at the command-line.
|
||||||
|
*
|
||||||
|
* @param classes an ArrayList of classes
|
||||||
|
* @return String of available classes
|
||||||
|
*/
|
||||||
|
private String getAvailableClasses(ArrayList<Class> classes) {
|
||||||
|
String availableString = "";
|
||||||
|
|
||||||
|
for (int classIndex = 0; classIndex < classes.size(); classIndex++) {
|
||||||
|
availableString += rationalizeClassName(classes.get(classIndex)) + (classIndex == classes.size() - 1 ? "" : ",");
|
||||||
|
}
|
||||||
|
|
||||||
|
return availableString;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize the number of loci processed to zero.
|
* Initialize the number of loci processed to zero.
|
||||||
*
|
*
|
||||||
|
|
@ -100,6 +110,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
if (variant != null && BaseUtils.simpleBaseToBaseIndex(ref) != -1) {
|
if (variant != null && BaseUtils.simpleBaseToBaseIndex(ref) != -1) {
|
||||||
if (VERBOSE) { out.println("Original:\n " + variant); }
|
if (VERBOSE) { out.println("Original:\n " + variant); }
|
||||||
|
|
||||||
|
// Apply features that modify the likelihoods and LOD scores
|
||||||
for (String requestedFeatureString : FEATURES) {
|
for (String requestedFeatureString : FEATURES) {
|
||||||
String[] requestedFeaturePieces = requestedFeatureString.split(":");
|
String[] requestedFeaturePieces = requestedFeatureString.split(":");
|
||||||
String requestedFeatureName = requestedFeaturePieces[0];
|
String requestedFeatureName = requestedFeaturePieces[0];
|
||||||
|
|
@ -107,7 +118,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
|
|
||||||
int notYetSeenFeature = 0;
|
int notYetSeenFeature = 0;
|
||||||
for ( Class featureClass : featureClasses ) {
|
for ( Class featureClass : featureClasses ) {
|
||||||
String featureClassName = rationalizeFeatureClassName(featureClass);
|
String featureClassName = rationalizeClassName(featureClass);
|
||||||
|
|
||||||
if (requestedFeatureName.equalsIgnoreCase(featureClassName)) {
|
if (requestedFeatureName.equalsIgnoreCase(featureClassName)) {
|
||||||
try {
|
try {
|
||||||
|
|
@ -128,13 +139,61 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (notYetSeenFeature == featureClasses.size()) {
|
if (notYetSeenFeature == featureClasses.size()) {
|
||||||
throw new StingException(String.format("Unknown feature '%s'. Valid features are '%s'", requestedFeatureName, getAvailableFeatureClasses()));
|
throw new StingException(String.format("Unknown feature '%s'. Valid features are '%s'", requestedFeatureName, getAvailableClasses(featureClasses)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VERBOSE) { System.out.println(); }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
vwriter.println(variant);
|
// Apply exclusion tests that accept or reject the variant call
|
||||||
|
ArrayList<String> exclusionResults = new ArrayList<String>();
|
||||||
|
|
||||||
|
for (String requestedExclusionString : EXCLUSIONS) {
|
||||||
|
String[] requestedExclusionPieces = requestedExclusionString.split(":");
|
||||||
|
String requestedExclusionName = requestedExclusionPieces[0];
|
||||||
|
String requestedExclusionArgs = (requestedExclusionPieces.length == 2) ? requestedExclusionPieces[1] : "";
|
||||||
|
|
||||||
|
int notYetSeenExclusion = 0;
|
||||||
|
for ( Class exclusionClass : exclusionClasses ) {
|
||||||
|
String exclusionClassName = rationalizeClassName(exclusionClass);
|
||||||
|
|
||||||
|
if (requestedExclusionName.equalsIgnoreCase(exclusionClassName)) {
|
||||||
|
try {
|
||||||
|
VariantExclusionCriterion vec = (VariantExclusionCriterion) exclusionClass.newInstance();
|
||||||
|
vec.initialize(requestedExclusionArgs);
|
||||||
|
|
||||||
|
boolean excludeResult = vec.exclude(ref, context, variant);
|
||||||
|
|
||||||
|
if (excludeResult) {
|
||||||
|
exclusionResults.add(exclusionClassName);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (InstantiationException e) {
|
||||||
|
throw new StingException(String.format("Cannot instantiate exclusion class '%s': must be concrete class", exclusionClass.getSimpleName()));
|
||||||
|
} catch (IllegalAccessException e) {
|
||||||
|
throw new StingException(String.format("Cannot instantiate exclusion class '%s': must have no-arg constructor", exclusionClass.getSimpleName()));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
notYetSeenExclusion++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (notYetSeenExclusion == exclusionClasses.size()) {
|
||||||
|
throw new StingException(String.format("Unknown exclusion '%s'. Valid features are '%s'", requestedExclusionName, getAvailableClasses(exclusionClasses)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (exclusionResults.size() > 0) {
|
||||||
|
if (VERBOSE) {
|
||||||
|
String exclusions = "";
|
||||||
|
for (int i = 0; i < exclusionResults.size(); i++) {
|
||||||
|
exclusions += exclusionResults.get(i) + (i == exclusionResults.size() - 1 ? "" : ",");
|
||||||
|
}
|
||||||
|
out.printf("Exclusions: %s\n", exclusions);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
vwriter.println(variant);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (VERBOSE) { out.println(); }
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue