Prototype of RODBinding @Arguments instead of -B syntax

Initial version of RodBinding class.
Flow from walker Rodbinding @Arguments -> RMDTriplet (old system) -> GATK engine (standard).  Will need refactoring.
This commit is contained in:
Mark DePristo 2011-07-26 11:09:06 -04:00
parent 2a51543693
commit 38969b9783
6 changed files with 386 additions and 5 deletions

View File

@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.lang.annotation.Annotation;
import java.lang.reflect.*;
import java.util.*;
@ -275,6 +276,35 @@ public abstract class ArgumentTypeDescriptor {
}
}
/**
* Parser for RodBinding objects
*/
class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
/**
* We only want RodBinding class objects
* @param type The type to check.
* @return true if the provided class is a RodBinding.class
*/
@Override
public boolean supports( Class type ) {
return isRodBinding(type);
}
public static boolean isRodBinding( Class type ) {
return type.isAssignableFrom(RodBinding.class);
}
@Override
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) {
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
String value = getArgumentValue( defaultDefinition, matches );
RodBinding<Object> result = new RodBinding<Object>(source.field.getName(), new File(value));
Tags tags = getArgumentTags(matches);
parsingEngine.addTags(result,tags);
return result;
}
}
/**
* Parse simple argument types: java primitives, wrapper classes, and anything that has
* a simple String constructor.
@ -282,9 +312,10 @@ public abstract class ArgumentTypeDescriptor {
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public boolean supports( Class type ) {
if( type.isPrimitive() ) return true;
if( type.isEnum() ) return true;
if( primitiveToWrapperMap.containsValue(type) ) return true;
if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false;
if ( type.isPrimitive() ) return true;
if ( type.isEnum() ) return true;
if ( primitiveToWrapperMap.containsValue(type) ) return true;
try {
type.getConstructor(String.class);
@ -385,7 +416,6 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Class type, ArgumentMatches matches) {
Class componentType;
Object result;
Tags tags;
if( Collection.class.isAssignableFrom(type) ) {

View File

@ -64,6 +64,7 @@ public class ParsingEngine {
* The type of set used must be ordered (but not necessarily sorted).
*/
private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(),
new RodBindingArgumentTypeDescriptor(),
new CompoundArgumentTypeDescriptor(),
new MultiplexArgumentTypeDescriptor()) );

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.commandline;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.io.File;
import java.util.List;
/**
*
*/
// TODO -- should have a derived class called VariantContentRodBinding with simple accessors
public class RodBinding<T> {
final String variableName;
final File sourceFile;
public RodBinding(final String variableName, final File sourceFile) {
this.variableName = variableName;
this.sourceFile = sourceFile;
}
public String getVariableName() {
return variableName;
}
public File getSourceFile() {
return sourceFile;
}
public List<T> getAll(RefMetaDataTracker tracker) {
return (List<T>)tracker.getReferenceMetaData(variableName);
}
public T getVariantContext(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc) {
return (T)tracker.getVariantContext(ref, variableName, loc);
}
}

View File

@ -25,8 +25,10 @@
package org.broadinstitute.sting.gatk;
import org.broadinstitute.sting.commandline.ArgumentSource;
import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
@ -34,12 +36,15 @@ import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescripto
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.text.ListFileUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
/**
* @author aaron
@ -81,7 +86,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
// File lists can require a bit of additional expansion. Set these explicitly by the engine.
engine.setSAMFileIDs(ListFileUtils.unpackBAMFileList(getArgumentCollection().samFiles,parser));
engine.setReferenceMetaDataFiles(ListFileUtils.unpackRODBindings(getArgumentCollection().RODBindings,getArgumentCollection().DBSNPFile,parser));
engine.setWalker(walker);
walker.setToolkit(engine);
@ -96,6 +100,11 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
loadArgumentsIntoObject(walker);
argumentSources.add(walker);
Collection<RMDTriplet> newStyle = ListFileUtils.unpackRODBindings(getRodBindingsInWalker(walker), parser);
Collection<RMDTriplet> oldStyle = ListFileUtils.unpackRODBindings(getArgumentCollection().RODBindings, getArgumentCollection().DBSNPFile, parser);
oldStyle.addAll(newStyle);
engine.setReferenceMetaDataFiles(oldStyle);
for (ReadFilter filter: filters) {
loadArgumentsIntoObject(filter);
argumentSources.add(filter);
@ -112,6 +121,20 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
return 0;
}
private List<RodBinding> getRodBindingsInWalker(Walker<?,?> walker) {
List<RodBinding> rods = new ArrayList<RodBinding>();
for ( ArgumentSource source : parser.extractArgumentSources(walker.getClass()) ) {
Object obj = JVMUtils.getFieldValue(source.field, walker);
if ( obj instanceof RodBinding ) {
System.out.printf("Found rod binding for field %s of %s%n", obj, source.field);
rods.add((RodBinding)obj);
}
}
return rods;
}
/**
* Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled.
* This report will be written to either STDOUT or to the run repository, depending on the options

View File

@ -0,0 +1,225 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import java.io.PrintStream;
import java.util.*;
/**
* Emits specific fields as dictated by the user from one or more VCF files.
*/
public class VariantsToTableNewRodStyle extends RodWalker<Integer, Integer> {
@Output(doc="File to which results should be written",required=true)
protected PrintStream out;
@Argument(fullName="fields", shortName="F", doc="Fields to emit from the VCF, allows any VCF field, any info field, and some meta fields like nHets", required=true)
public ArrayList<String> fieldsToTake = new ArrayList<String>();
@Argument(fullName="showFiltered", shortName="raw", doc="Include filtered records")
public boolean showFiltered = false;
@Argument(fullName="maxRecords", shortName="M", doc="Maximum number of records to emit, if provided", required=false)
public int MAX_RECORDS = -1;
int nRecords = 0;
@Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false)
public boolean keepMultiAllelic = false;
@Argument(fullName="allowMissingData", shortName="AMD", doc="If provided, we will not require every record to contain every field", required=false)
public boolean ALLOW_MISSING_DATA = false;
@Input(fullName="variants", shortName="V", doc="The variant file we will convert to a table", required=true)
public RodBinding<VariantContext> variants;
public void initialize() {
out.println(Utils.join("\t", fieldsToTake));
}
public static abstract class Getter { public abstract String get(VariantContext vc); }
public static Map<String, Getter> getters = new HashMap<String, Getter>();
static {
// #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT
getters.put("CHROM", new Getter() { public String get(VariantContext vc) { return vc.getChr(); } });
getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } });
getters.put("REF", new Getter() {
public String get(VariantContext vc) {
String x = "";
if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) {
Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY));
x=x+new String(new byte[]{refByte});
}
return x+vc.getReference().getDisplayString();
}
});
getters.put("ALT", new Getter() {
public String get(VariantContext vc) {
StringBuilder x = new StringBuilder();
int n = vc.getAlternateAlleles().size();
if ( n == 0 ) return ".";
if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) {
Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY));
x.append(new String(new byte[]{refByte}));
}
for ( int i = 0; i < n; i++ ) {
if ( i != 0 ) x.append(",");
x.append(vc.getAlternateAllele(i).getDisplayString());
}
return x.toString();
}
});
getters.put("QUAL", new Getter() { public String get(VariantContext vc) { return Double.toString(vc.getPhredScaledQual()); } });
getters.put("TRANSITION", new Getter() { public String get(VariantContext vc) {
if ( vc.isSNP() && vc.isBiallelic() )
return VariantContextUtils.isTransition(vc) ? "1" : "0";
else
return "-1";
}});
getters.put("FILTER", new Getter() { public String get(VariantContext vc) {
return vc.isNotFiltered() ? "PASS" : Utils.join(",", vc.getFilters()); }
});
getters.put("HET", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount()); } });
getters.put("HOM-REF", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomRefCount()); } });
getters.put("HOM-VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomVarCount()); } });
getters.put("NO-CALL", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNoCallCount()); } });
getters.put("VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount() + vc.getHomVarCount()); } });
getters.put("NSAMPLES", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples()); } });
getters.put("NCALLED", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples() - vc.getNoCallCount()); } });
getters.put("GQ", new Getter() { public String get(VariantContext vc) {
if ( vc.getNSamples() > 1 ) throw new UserException("Cannot get GQ values for multi-sample VCF");
return String.format("%.2f", 10 * vc.getGenotype(0).getNegLog10PError());
}});
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null ) // RodWalkers can make funky map calls
return 0;
if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) {
VariantContext vc = variants.getVariantContext(tracker, ref, context.getLocation());
if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) {
List<String> vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA);
out.println(Utils.join("\t", vals));
}
return 1;
} else {
if ( nRecords >= MAX_RECORDS ) {
logger.warn("Calling sys exit to leave after " + nRecords + " records");
System.exit(0); // todo -- what's the recommend way to abort like this?
}
return 0;
}
}
private static final boolean isWildCard(String s) {
return s.endsWith("*");
}
public static List<String> extractFields(VariantContext vc, List<String> fields, boolean allowMissingData) {
List<String> vals = new ArrayList<String>();
for ( String field : fields ) {
String val = "NA";
if ( getters.containsKey(field) ) {
val = getters.get(field).get(vc);
} else if ( vc.hasAttribute(field) ) {
val = vc.getAttributeAsString(field);
} else if ( isWildCard(field) ) {
Set<String> wildVals = new HashSet<String>();
for ( Map.Entry<String,Object> elt : vc.getAttributes().entrySet()) {
if ( elt.getKey().startsWith(field.substring(0, field.length() - 1)) ) {
wildVals.add(elt.getValue().toString());
}
}
if ( wildVals.size() > 0 ) {
List<String> toVal = new ArrayList<String>(wildVals);
Collections.sort(toVal);
val = Utils.join(",", toVal);
}
} else if ( ! allowMissingData ) {
throw new UserException(String.format("Missing field %s in vc %s at %s", field, vc.getSource(), vc));
}
if (field.equals("AF") || field.equals("AC")) {
String afo = val;
double af=0;
if (afo.contains(",")) {
String[] afs = afo.split(",");
afs[0] = afs[0].substring(1,afs[0].length());
afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1);
double[] afd = new double[afs.length];
for (int k=0; k < afd.length; k++)
afd[k] = Double.valueOf(afs[k]);
af = MathUtils.arrayMax(afd);
//af = Double.valueOf(afs[0]);
}
else
if (!afo.equals("NA"))
af = Double.valueOf(afo);
val = Double.toString(af);
}
vals.add(val);
}
return vals;
}
public Integer reduceInit() {
return 0;
}
public Integer reduce(Integer counter, Integer sum) {
return counter + sum;
}
public void onTraversalDone(Integer sum) {}
}

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.utils.text;
import org.broadinstitute.sting.commandline.ParsingEngine;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper;
@ -93,6 +94,7 @@ public class ListFileUtils {
* @return a list of expanded, bound RODs.
*/
public static Collection<RMDTriplet> unpackRODBindings(final List<String> RODBindings, final String dbSNPFile, final ParsingEngine parser) {
// todo -- this is a strange home for this code. Move into ROD system
Collection<RMDTriplet> rodBindings = new ArrayList<RMDTriplet>();
for (String fileName: RODBindings) {
@ -134,6 +136,43 @@ public class ListFileUtils {
return rodBindings;
}
/**
* Convert command-line argument representation of ROD bindings to something more easily understandable by the engine.
* @param RODBindings a text equivale
* @return a list of expanded, bound RODs.
*/
public static Collection<RMDTriplet> unpackRODBindings(final List<RodBinding> RODBindings, final ParsingEngine parser) {
// todo -- this is a strange home for this code. Move into ROD system
Collection<RMDTriplet> rodBindings = new ArrayList<RMDTriplet>();
for (RodBinding rodBinding: RODBindings) {
String argValue = rodBinding.getSourceFile().getPath();
String fileName = expandFileName(argValue);
final Tags tags = parser.getTags(rodBinding);
List<String> positionalTags = tags.getPositionalTags();
if(positionalTags.size() != 1)
throw new UserException("Invalid syntax for RODBinding (reference-ordered data) input . " +
"Please use the following syntax when providing reference-ordered " +
"data: -<arg-name>:<type> <filename>.");
// Assume that if tags are present, those tags are name and type.
// Name is always first, followed by type.
String name = rodBinding.getVariableName();
String type = positionalTags.get(0);
RMDTriplet.RMDStorageType storageType = null;
if(tags.getValue("storage") != null)
storageType = Enum.valueOf(RMDTriplet.RMDStorageType.class,tags.getValue("storage"));
else if(fileName.toLowerCase().endsWith("stdin"))
storageType = RMDTriplet.RMDStorageType.STREAM;
else
storageType = RMDTriplet.RMDStorageType.FILE;
rodBindings.add(new RMDTriplet(name,type,fileName,storageType,tags));
}
return rodBindings;
}
/**
* Expand any special characters that appear in the filename. Right now, '-' is expanded to