Added javadocs. Now throws an exception if an unknown feature is specified. General cleanup.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1055 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2009-06-18 23:28:38 +00:00
parent 284fd6a5fb
commit ed7afd8b70
3 changed files with 126 additions and 50 deletions

View File

@ -1,9 +1,9 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.MathUtils;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import java.util.List;
@ -16,8 +16,6 @@ public class IVFBinomialStrand implements IndependentVariantFeature {
}
}
public String getFeatureName() { return "binomial"; }
public double[] compute(char ref, LocusContext context) {
double[] likelihoods = new double[10];

View File

@ -2,10 +2,29 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.LocusContext;
/**
* Interface for conditionally independent variant features.
*/
public interface IndependentVariantFeature {
/**
* A convenient enumeration for each of the ten genotypes.
*/
public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
/**
* Method so that features can initialize themselves based on a short argument string.
* At the moment, each feature is responsible for interpreting their own argument string.
*
* @param arguments
*/
public void initialize(String arguments);
/**
* Method to compute the result of this feature for each of the ten genotypes. The return value must
* be a double array of length 10 (one for each genotype) and the value must be in log10-space.
* @param ref the reference base
* @param context the context for the given locus
* @return a ten-element array of log-likelihood result of the feature applied to each genotype
*/
public double[] compute(char ref, LocusContext context);
}
}

View File

@ -1,34 +1,65 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.PackageUtils;
import org.broadinstitute.sting.utils.JVMUtils;
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.PackageUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.io.File;
import java.io.PrintWriter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
/**
* VariantFiltrationWalker applies specified conditionally independent features to pre-called variants, thus modifying
* the likelihoods of each genotype. At the moment, the variants are expected to be in gelitext format.
*/
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=rodVariants.class))
public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
@Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname:arguments'") public String[] FEATURES;
@Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'") public String[] FEATURES;
@Argument(fullName="variants_out", shortName="VO", doc="File to which modified variants should be written") public File VARIANTS_OUT;
@Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false;
private PrintWriter vwriter;
private ArrayList<Class> featureClasses;
private PrintWriter vwriter;
/**
* Trim the 'IVF' off the feature name so the user needn't specify that on the command-line.
*
* @param featureClass the feature class whose name we should rationalize
* @return the class name, minus 'IVF'
*/
private String rationalizeFeatureClassName(Class featureClass) {
String featureClassName = featureClass.getSimpleName();
return featureClassName.replaceFirst("IVF", "");
}
/**
* Returns a comma-separated list of available features the user may specify at the command-line.
*
* @return String of available features
*/
private String getAvailableFeatureClasses() {
String featureString = "";
for (int featureClassIndex = 0; featureClassIndex < featureClasses.size(); featureClassIndex++) {
featureString += rationalizeFeatureClassName(featureClasses.get(featureClassIndex)) + (featureClassIndex == featureClasses.size() - 1 ? "" : ",");
}
return featureString;
}
/**
* Prepare the output file and the list of available features.
*/
public void initialize() {
try {
vwriter = new PrintWriter(VARIANTS_OUT);
@ -40,58 +71,86 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
}
}
/**
* Initialize the number of loci processed to zero.
*
* @return 0
*/
public Integer reduceInit() { return 0; }
/**
* For each site of interest, rescore the genotype likelihoods by applying the specified feature set.
*
* @param tracker the meta-data tracker
* @param ref the reference base
* @param context the context for the given locus
* @return 1 if the locus was successfully processed, 0 if otherwise
*/
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
rodVariants variant = (rodVariants) tracker.lookup("variant", null);
for (String feature : FEATURES) {
String[] featurePieces = feature.split(":");
String featureName = featurePieces[0];
String featureArgs = featurePieces[1];
// Ignore places where we don't have a variant or where the reference base is ambiguous.
if (variant != null && BaseUtils.simpleBaseToBaseIndex(ref) != -1) {
if (VERBOSE) { out.println("Original:\n " + variant); }
IndependentVariantFeature ivf;
for (String requestedFeatureString : FEATURES) {
String[] requestedFeaturePieces = requestedFeatureString.split(":");
String requestedFeatureName = requestedFeaturePieces[0];
String requestedFeatureArgs = (requestedFeaturePieces.length == 2) ? requestedFeaturePieces[1] : "";
if (VERBOSE) {
out.println("Original:");
out.println(" " + variant);
}
int notYetSeenFeature = 0;
for ( Class featureClass : featureClasses ) {
String featureClassName = rationalizeFeatureClassName(featureClass);
for ( Class featureClass : featureClasses ) {
String featureClassName = featureClass.getSimpleName();
featureClassName = featureClassName.replaceFirst("IVF", "");
if (requestedFeatureName.equalsIgnoreCase(featureClassName)) {
try {
IndependentVariantFeature ivf = (IndependentVariantFeature) featureClass.newInstance();
ivf.initialize(requestedFeatureArgs);
if (featureName.equalsIgnoreCase(featureClassName)) {
try {
ivf = (IndependentVariantFeature) featureClass.newInstance();
ivf.initialize(featureArgs);
variant.adjustLikelihoods(ivf.compute(ref, context));
variant.adjustLikelihoods(ivf.compute(ref, context));
if (VERBOSE) {
out.println(featureClassName + ":");
out.println(" " + variant);
if (VERBOSE) { out.println(featureClassName + ":\n " + variant); }
} catch (InstantiationException e) {
throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName()));
} catch (IllegalAccessException e) {
throw new StingException(String.format("Cannot instantiate feature class '%s': must have no-arg constructor", featureClass.getSimpleName()));
}
} catch (InstantiationException e) {
throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName()));
} catch (IllegalAccessException e) {
throw new StingException(String.format("Cannot instantiate feature class '%s': must have no-arg constructor", featureClass.getSimpleName()));
} else {
notYetSeenFeature++;
}
}
if (notYetSeenFeature == featureClasses.size()) {
throw new StingException(String.format("Unknown feature '%s'. Valid features are '%s'", requestedFeatureName, getAvailableFeatureClasses()));
}
if (VERBOSE) { System.out.println(); }
}
if (VERBOSE) { System.out.println(); }
vwriter.println(variant);
return 1;
}
vwriter.println(variant);
return 1;
return 0;
}
/**
* Increment the number of loci processed.
*
* @param value result of the map.
* @param sum accumulator for the reduce.
* @return the new number of loci processed.
*/
public Integer reduce(Integer value, Integer sum) {
return sum + 1;
return sum + value;
}
/**
* Tell the user the number of loci processed and close out the new variants file.
*
* @param result the number of loci seen.
*/
public void onTraversalDone(Integer result) {
out.printf("Processed %d loci.\n", result);