Added javadocs. Now throws an exception if an unknown feature is specified. General cleanup.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1055 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
284fd6a5fb
commit
ed7afd8b70
|
|
@ -1,9 +1,9 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
|
||||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -16,8 +16,6 @@ public class IVFBinomialStrand implements IndependentVariantFeature {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getFeatureName() { return "binomial"; }
|
|
||||||
|
|
||||||
public double[] compute(char ref, LocusContext context) {
|
public double[] compute(char ref, LocusContext context) {
|
||||||
double[] likelihoods = new double[10];
|
double[] likelihoods = new double[10];
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,10 +2,29 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface for conditionally independent variant features.
|
||||||
|
*/
|
||||||
public interface IndependentVariantFeature {
|
public interface IndependentVariantFeature {
|
||||||
|
/**
|
||||||
|
* A convenient enumeration for each of the ten genotypes.
|
||||||
|
*/
|
||||||
public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method so that features can initialize themselves based on a short argument string.
|
||||||
|
* At the moment, each feature is responsible for interpreting their own argument string.
|
||||||
|
*
|
||||||
|
* @param arguments
|
||||||
|
*/
|
||||||
public void initialize(String arguments);
|
public void initialize(String arguments);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method to compute the result of this feature for each of the ten genotypes. The return value must
|
||||||
|
* be a double array of length 10 (one for each genotype) and the value must be in log10-space.
|
||||||
|
* @param ref the reference base
|
||||||
|
* @param context the context for the given locus
|
||||||
|
* @return a ten-element array of log-likelihood result of the feature applied to each genotype
|
||||||
|
*/
|
||||||
public double[] compute(char ref, LocusContext context);
|
public double[] compute(char ref, LocusContext context);
|
||||||
}
|
}
|
||||||
|
|
@ -1,34 +1,65 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
import org.broadinstitute.sting.utils.PackageUtils;
|
|
||||||
import org.broadinstitute.sting.utils.JVMUtils;
|
|
||||||
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.PackageUtils;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.PrintWriter;
|
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.PrintWriter;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* VariantFiltrationWalker applies specified conditionally independent features to pre-called variants, thus modifying
|
||||||
|
* the likelihoods of each genotype. At the moment, the variants are expected to be in gelitext format.
|
||||||
|
*/
|
||||||
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=rodVariants.class))
|
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=rodVariants.class))
|
||||||
public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
@Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname:arguments'") public String[] FEATURES;
|
@Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'") public String[] FEATURES;
|
||||||
@Argument(fullName="variants_out", shortName="VO", doc="File to which modified variants should be written") public File VARIANTS_OUT;
|
@Argument(fullName="variants_out", shortName="VO", doc="File to which modified variants should be written") public File VARIANTS_OUT;
|
||||||
@Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false;
|
@Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false;
|
||||||
|
|
||||||
private PrintWriter vwriter;
|
|
||||||
private ArrayList<Class> featureClasses;
|
private ArrayList<Class> featureClasses;
|
||||||
|
private PrintWriter vwriter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Trim the 'IVF' off the feature name so the user needn't specify that on the command-line.
|
||||||
|
*
|
||||||
|
* @param featureClass the feature class whose name we should rationalize
|
||||||
|
* @return the class name, minus 'IVF'
|
||||||
|
*/
|
||||||
|
private String rationalizeFeatureClassName(Class featureClass) {
|
||||||
|
String featureClassName = featureClass.getSimpleName();
|
||||||
|
return featureClassName.replaceFirst("IVF", "");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a comma-separated list of available features the user may specify at the command-line.
|
||||||
|
*
|
||||||
|
* @return String of available features
|
||||||
|
*/
|
||||||
|
private String getAvailableFeatureClasses() {
|
||||||
|
String featureString = "";
|
||||||
|
|
||||||
|
for (int featureClassIndex = 0; featureClassIndex < featureClasses.size(); featureClassIndex++) {
|
||||||
|
featureString += rationalizeFeatureClassName(featureClasses.get(featureClassIndex)) + (featureClassIndex == featureClasses.size() - 1 ? "" : ",");
|
||||||
|
}
|
||||||
|
|
||||||
|
return featureString;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prepare the output file and the list of available features.
|
||||||
|
*/
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
try {
|
try {
|
||||||
vwriter = new PrintWriter(VARIANTS_OUT);
|
vwriter = new PrintWriter(VARIANTS_OUT);
|
||||||
|
|
@ -40,58 +71,86 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the number of loci processed to zero.
|
||||||
|
*
|
||||||
|
* @return 0
|
||||||
|
*/
|
||||||
public Integer reduceInit() { return 0; }
|
public Integer reduceInit() { return 0; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For each site of interest, rescore the genotype likelihoods by applying the specified feature set.
|
||||||
|
*
|
||||||
|
* @param tracker the meta-data tracker
|
||||||
|
* @param ref the reference base
|
||||||
|
* @param context the context for the given locus
|
||||||
|
* @return 1 if the locus was successfully processed, 0 if otherwise
|
||||||
|
*/
|
||||||
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
rodVariants variant = (rodVariants) tracker.lookup("variant", null);
|
rodVariants variant = (rodVariants) tracker.lookup("variant", null);
|
||||||
|
|
||||||
for (String feature : FEATURES) {
|
// Ignore places where we don't have a variant or where the reference base is ambiguous.
|
||||||
String[] featurePieces = feature.split(":");
|
if (variant != null && BaseUtils.simpleBaseToBaseIndex(ref) != -1) {
|
||||||
String featureName = featurePieces[0];
|
if (VERBOSE) { out.println("Original:\n " + variant); }
|
||||||
String featureArgs = featurePieces[1];
|
|
||||||
|
|
||||||
IndependentVariantFeature ivf;
|
for (String requestedFeatureString : FEATURES) {
|
||||||
|
String[] requestedFeaturePieces = requestedFeatureString.split(":");
|
||||||
|
String requestedFeatureName = requestedFeaturePieces[0];
|
||||||
|
String requestedFeatureArgs = (requestedFeaturePieces.length == 2) ? requestedFeaturePieces[1] : "";
|
||||||
|
|
||||||
if (VERBOSE) {
|
int notYetSeenFeature = 0;
|
||||||
out.println("Original:");
|
for ( Class featureClass : featureClasses ) {
|
||||||
out.println(" " + variant);
|
String featureClassName = rationalizeFeatureClassName(featureClass);
|
||||||
}
|
|
||||||
|
|
||||||
for ( Class featureClass : featureClasses ) {
|
if (requestedFeatureName.equalsIgnoreCase(featureClassName)) {
|
||||||
String featureClassName = featureClass.getSimpleName();
|
try {
|
||||||
featureClassName = featureClassName.replaceFirst("IVF", "");
|
IndependentVariantFeature ivf = (IndependentVariantFeature) featureClass.newInstance();
|
||||||
|
ivf.initialize(requestedFeatureArgs);
|
||||||
|
|
||||||
if (featureName.equalsIgnoreCase(featureClassName)) {
|
variant.adjustLikelihoods(ivf.compute(ref, context));
|
||||||
try {
|
|
||||||
ivf = (IndependentVariantFeature) featureClass.newInstance();
|
|
||||||
ivf.initialize(featureArgs);
|
|
||||||
|
|
||||||
variant.adjustLikelihoods(ivf.compute(ref, context));
|
if (VERBOSE) { out.println(featureClassName + ":\n " + variant); }
|
||||||
|
} catch (InstantiationException e) {
|
||||||
if (VERBOSE) {
|
throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName()));
|
||||||
out.println(featureClassName + ":");
|
} catch (IllegalAccessException e) {
|
||||||
out.println(" " + variant);
|
throw new StingException(String.format("Cannot instantiate feature class '%s': must have no-arg constructor", featureClass.getSimpleName()));
|
||||||
}
|
}
|
||||||
} catch (InstantiationException e) {
|
} else {
|
||||||
throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName()));
|
notYetSeenFeature++;
|
||||||
} catch (IllegalAccessException e) {
|
|
||||||
throw new StingException(String.format("Cannot instantiate feature class '%s': must have no-arg constructor", featureClass.getSimpleName()));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (notYetSeenFeature == featureClasses.size()) {
|
||||||
|
throw new StingException(String.format("Unknown feature '%s'. Valid features are '%s'", requestedFeatureName, getAvailableFeatureClasses()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (VERBOSE) { System.out.println(); }
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VERBOSE) { System.out.println(); }
|
vwriter.println(variant);
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
vwriter.println(variant);
|
|
||||||
|
|
||||||
return 1;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increment the number of loci processed.
|
||||||
|
*
|
||||||
|
* @param value result of the map.
|
||||||
|
* @param sum accumulator for the reduce.
|
||||||
|
* @return the new number of loci processed.
|
||||||
|
*/
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
return sum + 1;
|
return sum + value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tell the user the number of loci processed and close out the new variants file.
|
||||||
|
*
|
||||||
|
* @param result the number of loci seen.
|
||||||
|
*/
|
||||||
public void onTraversalDone(Integer result) {
|
public void onTraversalDone(Integer result) {
|
||||||
out.printf("Processed %d loci.\n", result);
|
out.printf("Processed %d loci.\n", result);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue