Refactored a FeatureManager class from RMDTrackBuilder

New class handles (vastly more cleanly) the db of tribble codecs, features, and names for use throughout the GATK.
Added SelfScopingFeatureCodec interface that allows a FeatureCodec to examine a file and determine if the file can be parsed.  This is the first step towards allowing the GATK to dynamically determine the type of a RodBinding.
This commit is contained in:
Mark DePristo 2011-08-08 14:04:46 -04:00
parent e5fde0d16b
commit e36994e36b
15 changed files with 540 additions and 174 deletions

View File

@ -110,11 +110,11 @@ public class ReferenceOrderedDataSource {
}
public Class getType() {
return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
return builder.getFeatureManager().getByTriplet(fileDescriptor).getCodecClass();
}
public Class getRecordType() {
return builder.createCodec(getType(),getName()).getFeatureType();
return builder.getFeatureManager().getByTriplet(fileDescriptor).getFeatureClass();
}
public File getFile() {

View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata;
import java.io.File;
/**
* An interface marking that a given Tribble codec can look at the file and determine whether the
* codec specifically parsing the contents of the file.
*/
public interface SelfScopingFeatureCodec {
/**
* This function returns true iff the File potentialInput can be parsed by this
* codec.
*
* The GATK assumes that there's never a situation where two SelfScopingFeaetureCodecs
* return true for the same file. If this occurs the GATK splits out an error.
*
* Note this function must never throw an error. All errors should be trapped
* and false returned.
*
* @param potentialInput the file to test for parsiability with this codec
* @return true if potentialInput can be parsed, false otherwise
*/
public boolean canDecode(final File potentialInput);
}

View File

@ -12,14 +12,13 @@ import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import java.io.File;
import java.io.FileOutputStream;
import java.util.Map;
/**
* a utility class that can create an index, written to a target location. This is useful when you're unable to write to the directory
@ -83,14 +82,14 @@ public class RMDIndexer extends CommandLineProgram {
RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL);
// find the types available to the track builders
Map<String,Class> typeMapping = builder.getAvailableTrackNamesAndTypes();
FeatureManager.FeatureDescriptor descriptor = builder.getFeatureManager().getByName(inputFileType);
// check that the type is valid
if (!typeMapping.containsKey(inputFileType))
throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + Utils.join(",",typeMapping.keySet()));
if (descriptor == null)
throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + builder.getFeatureManager().userFriendlyListOfAvailableFeatures());
// create the codec
FeatureCodec codec = builder.createByType(typeMapping.get(inputFileType));
FeatureCodec codec = builder.getFeatureManager().createCodec(descriptor, "foo", genomeLocParser);
// check if it's a reference dependent feature codec
if (codec instanceof ReferenceDependentFeatureCodec)

View File

@ -0,0 +1,216 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.NameAwareCodec;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;
import java.util.*;
/**
* Class for managing Tribble Feature readers available to the GATK. The features
* are dynamically determined via a PluginManager. This class provides convenient
* getter methods for obtaining FeatureDescriptor objects that collect all of the
* useful information about the Tribble Codec, Feature, and name in one place.
*
* @author depristo
*/
public class FeatureManager {
public static class FeatureDescriptor {
final String name;
final FeatureCodec codec;
public FeatureDescriptor(final String name, final FeatureCodec codec) {
this.name = name;
this.codec = codec;
}
public String getName() {
return name;
}
public FeatureCodec getCodec() {
return codec;
}
public Class getCodecClass() { return codec.getClass(); }
public Class getFeatureClass() { return codec.getFeatureType(); }
@Override
public String toString() {
return String.format("FeatureDescriptor name=%s codec=%s feature=%s", getName(), getCodecClass().getName(), getFeatureClass().getName());
}
}
private final PluginManager<FeatureCodec> pluginManager;
private final Collection<FeatureDescriptor> featureDescriptors = new HashSet<FeatureDescriptor>();
/**
* Construct a FeatureManager
*/
public FeatureManager() {
pluginManager = new PluginManager<FeatureCodec>(FeatureCodec.class, "Codecs", "Codec");
for (final String rawName: pluginManager.getPluginsByName().keySet()) {
FeatureCodec codec = pluginManager.createByName(rawName);
String name = rawName.toUpperCase();
FeatureDescriptor featureDescriptor = new FeatureDescriptor(name, codec);
featureDescriptors.add(featureDescriptor);
}
}
/**
* Return the FeatureDescriptor whose getCodecClass().equals(codecClass).
*
* @param codecClass
* @return A FeatureDescriptor or null if none is found
*/
@Requires("codecClass != null")
public FeatureDescriptor getByCodec(Class codecClass) {
for ( FeatureDescriptor descriptor : featureDescriptors )
if ( descriptor.getCodecClass().equals(codecClass) )
return descriptor;
return null;
}
/**
* Returns a collection of FeatureDescriptors that emit records of type featureClass
*
* @param featureClass
* @return A FeatureDescriptor or null if none is found
*/
@Requires("featureClass != null")
public <T extends Feature> Collection<FeatureDescriptor> getByFeature(Class<T> featureClass) {
Set<FeatureDescriptor> consistentDescriptors = new HashSet<FeatureDescriptor>();
if (featureClass == null)
throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object");
for ( FeatureDescriptor descriptor : featureDescriptors ) {
if ( featureClass.isAssignableFrom(descriptor.getFeatureClass()))
consistentDescriptors.add(descriptor);
}
return consistentDescriptors;
}
/**
* Return the FeatureDescriptor with getName().equals(name)
*
* @param name
* @return A FeatureDescriptor or null if none is found
*/
@Requires("name != null")
public FeatureDescriptor getByName(String name) {
for ( FeatureDescriptor descriptor : featureDescriptors )
if ( descriptor.getName().equalsIgnoreCase(name) )
return descriptor;
return null;
}
/**
* Returns the FeatureDescriptor that can read the contexts of File file, is one can be determined
*
* @param file
* @return A FeatureDescriptor or null if none is found
*/
@Requires({"file != null", "file.isFile()", "file.canRead()"})
public FeatureDescriptor getByFiletype(File file) {
List<FeatureDescriptor> canParse = new ArrayList<FeatureDescriptor>();
for ( FeatureDescriptor descriptor : featureDescriptors )
if ( descriptor.getCodec() instanceof SelfScopingFeatureCodec ) {
if ( ((SelfScopingFeatureCodec) descriptor.getCodec()).canDecode(file) ) {
canParse.add(descriptor);
}
}
if ( canParse.size() == 0 )
return null;
else if ( canParse.size() > 1 )
throw new ReviewedStingException("BUG: multiple feature descriptors can read file " + file + ": " + canParse);
else
return canParse.get(0);
}
/**
* Returns the FeatureDescriptor associated with the type described by triplet, or null if none is found
* @param triplet
* @return
*/
@Requires("triplet != null")
public FeatureDescriptor getByTriplet(RMDTriplet triplet) {
return getByName(triplet.getType());
}
/**
* @return all of the FeatureDescriptors available to the GATK. Never null
*/
@Ensures("result != null")
public Collection<FeatureDescriptor> getFeatureDescriptors() {
return Collections.unmodifiableCollection(featureDescriptors);
}
/**
* Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load
* @return
*/
@Ensures("result != null")
public String userFriendlyListOfAvailableFeatures() {
List<String> names = new ArrayList<String>();
for ( final FeatureDescriptor descriptor : featureDescriptors )
names.add(descriptor.getName());
return Utils.join(",", names);
}
/**
* Create a new FeatureCodec of the type described in descriptor, assigning it the
* name (if possible) and providing it the genomeLocParser (where necessary)
*
* @param descriptor FeatureDescriptor of the Tribble FeatureCodec we want to create
* @param name the name to assign this codec
* @return the feature codec itself
*/
@Requires({"descriptor != null", "name != null", "genomeLocParser != null"})
@Ensures("result != null")
public FeatureCodec createCodec(FeatureDescriptor descriptor, String name, GenomeLocParser genomeLocParser) {
FeatureCodec codex = pluginManager.createByType(descriptor.getCodecClass());
if ( codex instanceof NameAwareCodec )
((NameAwareCodec)codex).setName(name);
if ( codex instanceof ReferenceDependentFeatureCodec )
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
return codex;
}
}

View File

@ -65,7 +65,7 @@ import java.util.*;
* that gets iterators from the FeatureReader using Tribble.
*
*/
public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
/**
* our log, which we use to capture anything from this class
*/
@ -74,8 +74,6 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
// a constant we use for marking sequence dictionary entries in the Tribble index property list
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
private Map<String, Class> classes = null;
// private sequence dictionary we use to set our tracks with
private SAMSequenceDictionary dict = null;
@ -89,6 +87,8 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
*/
private ValidationExclusion.TYPE validationExclusionType;
FeatureManager featureManager;
/**
* Construct an RMDTrackerBuilder, allowing the user to define tracks to build after-the-fact. This is generally
* used when walkers want to directly manage the ROD system for whatever reason. Before using this constructor,
@ -100,66 +100,14 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
public RMDTrackBuilder(SAMSequenceDictionary dict,
GenomeLocParser genomeLocParser,
ValidationExclusion.TYPE validationExclusionType) {
this();
this.dict = dict;
this.genomeLocParser = genomeLocParser;
this.validationExclusionType = validationExclusionType;
classes = new HashMap<String, Class>();
for (String name: this.getPluginsByName().keySet()) {
classes.put(name.toUpperCase(), getPluginsByName().get(name));
}
this.genomeLocParser = genomeLocParser;
featureManager = new FeatureManager();
}
/**
* Limited constructor that produces a builder capable for validating types, but not building tracks
*/
public RMDTrackBuilder() {
super(FeatureCodec.class, "Codecs", "Codec");
classes = new HashMap<String, Class>();
for (String name: this.getPluginsByName().keySet()) {
classes.put(name.toUpperCase(), getPluginsByName().get(name));
}
}
/** @return a list of all available track types we currently have access to create */
public Map<String, Class> getAvailableTrackNamesAndTypes() {
return Collections.unmodifiableMap(classes);
}
/** @return a list of all available track record types we currently have access to create */
public Map<String, Class> getAvailableTrackNamesAndRecordTypes() {
HashMap classToRecord = new HashMap<String, Class>();
for (String name: this.getPluginsByName().keySet()) {
FeatureCodec codec = this.createByName(name);
classToRecord.put(name.toUpperCase(), codec.getFeatureType());
}
return classToRecord;
}
public Class getFeatureCodecClass(RMDTriplet fileDescriptor) {
return getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
}
/**
* Returns the FeatureClass (BeagleFeature) produced by an RMDTriplet, or null
* if no such binding is found
*
* @param fileDescriptor
* @return
*/
public Class getFeatureClass(RMDTriplet fileDescriptor) {
return getAvailableTrackNamesAndRecordTypes().get(fileDescriptor.getType().toUpperCase());
}
/**
* Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load
* @return
*/
public String getAvailableTribbleFeatureNames() {
return Utils.join(",", getAvailableTrackNamesAndRecordTypes().keySet());
public FeatureManager getFeatureManager() {
return featureManager;
}
/**
@ -173,38 +121,33 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
String name = fileDescriptor.getName();
File inputFile = new File(fileDescriptor.getFile());
Class featureCodecClass = getFeatureCodecClass(fileDescriptor);
if (featureCodecClass == null)
FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByTriplet(fileDescriptor);
if (descriptor == null)
throw new UserException.BadArgumentValue("-B",fileDescriptor.getType());
// return a feature reader track
Pair<FeatureSource, SAMSequenceDictionary> pair;
if (inputFile.getAbsolutePath().endsWith(".gz"))
pair = createTabixIndexedFeatureSource(featureCodecClass, name, inputFile);
pair = createTabixIndexedFeatureSource(descriptor, name, inputFile);
else
pair = getFeatureSource(featureCodecClass, name, inputFile, fileDescriptor.getStorageType());
pair = getFeatureSource(descriptor, name, inputFile, fileDescriptor.getStorageType());
if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file");
return new RMDTrack(featureCodecClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(featureCodecClass,name));
return new RMDTrack(descriptor.getCodecClass(), name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(descriptor, name));
}
/**
* Convenience method simplifying track creation. Assume unnamed track based on a file rather than a stream.
* @param targetClass Type of Tribble class to build.
* @param codecClass Type of Tribble codec class to build.
* @param inputFile Input file type to use.
* @return An RMDTrack, suitable for accessing reference metadata.
*/
public RMDTrack createInstanceOfTrack(Class targetClass, File inputFile) {
// TODO: Update RMDTriplet to contain an actual class object rather than a name to avoid these gymnastics.
String typeName = null;
for(Map.Entry<String,Class> trackType: getAvailableTrackNamesAndTypes().entrySet()) {
if(trackType.getValue().equals(targetClass))
typeName = trackType.getKey();
}
public RMDTrack createInstanceOfTrack(Class codecClass, File inputFile) {
final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass);
if(typeName == null)
throw new ReviewedStingException("Unable to find type name for class " + targetClass.getName());
if (descriptor == null)
throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName());
return createInstanceOfTrack(new RMDTriplet("anonymous",typeName,inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
}
/**
@ -212,16 +155,16 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
* reader of the appropriate type will figure out what the right index type is, and determine if it
* exists.
*
* @param targetClass the codec class type
* @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create
* @param name the name of the track
* @param inputFile the file to load
* @return a feature reader implementation
*/
private Pair<FeatureSource, SAMSequenceDictionary> createTabixIndexedFeatureSource(Class targetClass, String name, File inputFile) {
private Pair<FeatureSource, SAMSequenceDictionary> createTabixIndexedFeatureSource(FeatureManager.FeatureDescriptor descriptor, String name, File inputFile) {
// we might not know the index type, try loading with the default reader constructor
logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file");
try {
return new Pair<FeatureSource, SAMSequenceDictionary>(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(targetClass, name)),null);
return new Pair<FeatureSource, SAMSequenceDictionary>(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(descriptor, name)),null);
} catch (TribbleException e) {
throw new UserException(e.getMessage(), e);
}
@ -229,28 +172,26 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
/**
* add a name to the codec, if it takes one
* @param targetClass the class to create a codec for
* @param descriptor the class to create a codec for
* @param name the name to assign this codec
* @return the feature codec itself
*/
public FeatureCodec createCodec(Class targetClass, String name) {
FeatureCodec codex = this.createByType(targetClass);
if ( codex instanceof NameAwareCodec )
((NameAwareCodec)codex).setName(name);
if(codex instanceof ReferenceDependentFeatureCodec)
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
return codex;
private FeatureCodec createCodec(FeatureManager.FeatureDescriptor descriptor, String name) {
return featureManager.createCodec(descriptor, name, genomeLocParser);
}
/**
* create a feature source object given:
* @param targetClass the target class
* @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create
* @param name the name of the codec
* @param inputFile the tribble file to parse
* @param storageType How the RMD is streamed into the input file.
* @return the input file as a FeatureReader
*/
private Pair<FeatureSource, SAMSequenceDictionary> getFeatureSource(Class targetClass, String name, File inputFile, RMDStorageType storageType) {
private Pair<FeatureSource, SAMSequenceDictionary> getFeatureSource(FeatureManager.FeatureDescriptor descriptor,
String name,
File inputFile,
RMDStorageType storageType) {
// Feature source and sequence dictionary to use as the ultimate reference
FeatureSource featureSource = null;
SAMSequenceDictionary sequenceDictionary = null;
@ -260,7 +201,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
if(canBeIndexed) {
try {
Index index = loadIndex(inputFile, createCodec(targetClass, name));
Index index = loadIndex(inputFile, createCodec(descriptor, name));
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
catch (ReviewedStingException e) { }
@ -273,7 +214,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
sequenceDictionary = getSequenceDictionaryFromProperties(index);
}
featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(targetClass, name));
featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name));
}
catch (TribbleException e) {
throw new UserException(e.getMessage());
@ -283,7 +224,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
}
}
else {
featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(targetClass, name),false);
featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(descriptor, name),false);
}
return new Pair<FeatureSource,SAMSequenceDictionary>(featureSource,sequenceDictionary);
@ -418,22 +359,6 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
return idx;
}
/**
* Returns a collection of track names that match the record type.
* @param trackRecordType the record type specified in the @RMD annotation
* @return a collection of available track record type names that match the record type
*/
public Collection<String> getTrackRecordTypeNames(Class trackRecordType) {
Set<String> names = new TreeSet<String>();
if (trackRecordType == null)
throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object");
for (Map.Entry<String, Class> availableTrackRecordType: getAvailableTrackNamesAndRecordTypes().entrySet()) {
if (availableTrackRecordType.getValue() != null && trackRecordType.isAssignableFrom(availableTrackRecordType.getValue()))
names.add(availableTrackRecordType.getKey());
}
return names;
}
// ---------------------------------------------------------------------------------------------------------
// static functions to work with the sequence dictionaries of indexes

View File

@ -129,14 +129,6 @@ public class VCFDiffableReader implements DiffableReader {
@Override
public boolean canRead(File file) {
try {
final String VCF4_HEADER = "##fileformat=VCFv4";
char[] buff = new char[VCF4_HEADER.length()];
new FileReader(file).read(buff, 0, VCF4_HEADER.length());
String firstLine = new String(buff);
return firstLine.startsWith(VCF4_HEADER);
} catch ( IOException e ) {
return false;
}
return AbstractVCFCodec.canDecodeFile(file, VCFCodec.VCF4_MAGIC_HEADER);
}
}

View File

@ -157,7 +157,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
List<ArgumentField> argumentFields = new ArrayList<ArgumentField>();
argumentFields.addAll(ArgumentDefinitionField.getArgumentFields(parser,walkerType));
argumentFields.addAll(RodBindField.getRodArguments(walkerType, trackBuilder));
//argumentFields.addAll(RodBindField.getRodArguments(walkerType, trackBuilder));
argumentFields.addAll(ReadFilterField.getFilterArguments(parser,walkerType));
String constructor = String.format("analysisName = \"%1$s\"%nanalysis_type = \"%1$s\"%n", walkerName);

View File

@ -91,39 +91,39 @@ public class RodBindField extends ArgumentField {
}
return exclusiveOf.toString();
}
public static List<ArgumentField> getRodArguments(Class<? extends Walker> walkerClass, RMDTrackBuilder trackBuilder) {
List<ArgumentField> argumentFields = new ArrayList<ArgumentField>();
List<RMD> requires = WalkerManager.getRequiredMetaData(walkerClass);
List<RMD> allows = WalkerManager.getAllowsMetaData(walkerClass);
for (RMD required: requires) {
List<RodBindField> fields = new ArrayList<RodBindField>();
String trackName = required.name();
if ("*".equals(trackName)) {
// TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers
//fields.add(new RodBindArgumentField(argumentDefinition, true));
} else {
for (String typeName: trackBuilder.getTrackRecordTypeNames(required.type()))
fields.add(new RodBindField(trackName, typeName, fields, true));
}
argumentFields.addAll(fields);
}
for (RMD allowed: allows) {
List<RodBindField> fields = new ArrayList<RodBindField>();
String trackName = allowed.name();
if ("*".equals(trackName)) {
// TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers
//fields.add(new RodBindArgumentField(argumentDefinition, false));
} else {
for (String typeName: trackBuilder.getTrackRecordTypeNames(allowed.type()))
fields.add(new RodBindField(trackName, typeName, fields, true));
}
argumentFields.addAll(fields);
}
return argumentFields;
}
//
// public static List<ArgumentField> getRodArguments(Class<? extends Walker> walkerClass, RMDTrackBuilder trackBuilder) {
// List<ArgumentField> argumentFields = new ArrayList<ArgumentField>();
//
// List<RMD> requires = WalkerManager.getRequiredMetaData(walkerClass);
// List<RMD> allows = WalkerManager.getAllowsMetaData(walkerClass);
//
// for (RMD required: requires) {
// List<RodBindField> fields = new ArrayList<RodBindField>();
// String trackName = required.name();
// if ("*".equals(trackName)) {
// // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers
// //fields.add(new RodBindArgumentField(argumentDefinition, true));
// } else {
// for (String typeName: trackBuilder.getFeatureManager().getTrackRecordTypeNames(required.type()))
// fields.add(new RodBindField(trackName, typeName, fields, true));
// }
// argumentFields.addAll(fields);
// }
//
// for (RMD allowed: allows) {
// List<RodBindField> fields = new ArrayList<RodBindField>();
// String trackName = allowed.name();
// if ("*".equals(trackName)) {
// // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers
// //fields.add(new RodBindArgumentField(argumentDefinition, false));
// } else {
// for (String typeName: trackBuilder.getFeatureManager().getTrackRecordTypeNames(allowed.type()))
// fields.add(new RodBindField(trackName, typeName, fields, true));
// }
// argumentFields.addAll(fields);
// }
//
// return argumentFields;
// }
}

View File

@ -172,7 +172,7 @@ public class PluginManager<PluginType> {
}
}
protected Map<String, Class<? extends PluginType>> getPluginsByName() {
public Map<String, Class<? extends PluginType>> getPluginsByName() {
return Collections.unmodifiableMap(pluginsByName);
}

View File

@ -7,16 +7,20 @@ import org.broad.tribble.NameAwareCodec;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, VCFParser {
public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, VCFParser, SelfScopingFeatureCodec {
protected final static Logger log = Logger.getLogger(VCFCodec.class);
protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column
@ -616,4 +620,15 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
return inputVC;
}
public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) {
try {
char[] buff = new char[MAGIC_HEADER_LINE.length()];
new FileReader(potentialInput).read(buff, 0, MAGIC_HEADER_LINE.length());
String firstLine = new String(buff);
return firstLine.startsWith(MAGIC_HEADER_LINE);
} catch ( IOException e ) {
return false;
}
}
}

View File

@ -7,6 +7,8 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
@ -16,6 +18,8 @@ import java.util.*;
* quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters.
*/
public class VCF3Codec extends AbstractVCFCodec {
public final static String VCF3_MAGIC_HEADER = "##fileformat=VCFv3";
/**
* @param reader the line reader to take header lines from
@ -178,4 +182,8 @@ public class VCF3Codec extends AbstractVCFCodec {
return genotypes;
}
@Override
public boolean canDecode(final File potentialInput) {
return canDecodeFile(potentialInput, VCF3_MAGIC_HEADER);
}
}

View File

@ -7,6 +7,8 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
@ -16,6 +18,7 @@ import java.util.*;
* quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters.
*/
public class VCFCodec extends AbstractVCFCodec {
public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4";
/**
* @param reader the line reader to take header lines from
@ -184,5 +187,8 @@ public class VCFCodec extends AbstractVCFCodec {
return genotypes;
}
@Override
public boolean canDecode(final File potentialInput) {
return canDecodeFile(potentialInput, VCF4_MAGIC_HEADER);
}
}

View File

@ -28,7 +28,7 @@ import org.broadinstitute.sting.commandline.ParsingEngine;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -134,7 +134,7 @@ public class ListFileUtils {
public static Collection<RMDTriplet> unpackRODBindings(final Collection<RodBinding> RODBindings, final ParsingEngine parser) {
// todo -- this is a strange home for this code. Move into ROD system
Collection<RMDTriplet> rodBindings = new ArrayList<RMDTriplet>();
RMDTrackBuilder builderForValidation = new RMDTrackBuilder();
FeatureManager builderForValidation = new FeatureManager();
for (RodBinding rodBinding: RODBindings) {
String argValue = rodBinding.getSource();
@ -153,15 +153,15 @@ public class ListFileUtils {
RMDTriplet triplet = new RMDTriplet(name,type,fileName,storageType,rodBinding.getTags());
// validate triplet type
Class typeFromTribble = builderForValidation.getFeatureClass(triplet);
if ( typeFromTribble == null )
FeatureManager.FeatureDescriptor descriptor = builderForValidation.getByTriplet(triplet);
if ( descriptor == null )
throw new UserException.UnknownTribbleType(rodBinding.getTribbleType(),
String.format("Field %s had provided type %s but there's no such Tribble type. Available types are %s",
rodBinding.getName(), rodBinding.getTribbleType(), builderForValidation.getAvailableTribbleFeatureNames()));
if ( ! rodBinding.getType().isAssignableFrom(typeFromTribble) )
rodBinding.getName(), rodBinding.getTribbleType(), builderForValidation.userFriendlyListOfAvailableFeatures()));
if ( ! rodBinding.getType().isAssignableFrom(descriptor.getFeatureClass()) )
throw new UserException.BadArgumentValue(rodBinding.getName(),
String.format("Field %s expected type %s, but the type of the input file provided on the command line was %s",
rodBinding.getName(), rodBinding.getType(), typeFromTribble));
rodBinding.getName(), rodBinding.getType(), descriptor.getName()));
rodBindings.add(triplet);

View File

@ -0,0 +1,157 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.features.table.BedTableCodec;
import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.*;
import java.util.*;
/**
* @author depristo
*
* UnitTests for RMD FeatureManager
*/
public class FeatureManagerUnitTest extends BaseTest {
private static final File RANDOM_FILE = new File(validationDataLocation + "exampleGATKReport.eval");
private static final File VCF3_FILE = new File(validationDataLocation + "vcfexample3.vcf");
private static final File VCF4_FILE = new File(validationDataLocation + "vcf4.1.example.vcf");
private FeatureManager manager;
private GenomeLocParser genomeLocParser;
@BeforeMethod
public void setup() {
File referenceFile = new File(b36KGReference);
try {
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile);
genomeLocParser = new GenomeLocParser(seq);
manager = new FeatureManager();
}
catch(FileNotFoundException ex) {
throw new UserException.CouldNotReadInputFile(referenceFile,ex);
}
}
@Test
public void testManagerCreation() {
Assert.assertTrue(manager.getFeatureDescriptors().size() > 0);
}
private class FMTest extends BaseTest.TestDataProvider {
public Class codec;
public Class<? extends Feature> feature;
public String name;
public File associatedFile;
private FMTest(final Class feature, final Class codec, final String name, final File file) {
super(FMTest.class);
this.codec = codec;
this.feature = feature;
this.name = name;
this.associatedFile = file;
}
public void assertExpected(FeatureManager.FeatureDescriptor featureDescriptor) {
Assert.assertEquals(featureDescriptor.getCodecClass(), codec);
Assert.assertEquals(featureDescriptor.getFeatureClass(), feature);
Assert.assertEquals(featureDescriptor.getName().toLowerCase(), name.toLowerCase());
}
public String toString() {
return String.format("FMTest name=%s codec=%s feature=%s file=%s", name, codec, feature, associatedFile);
}
}
@DataProvider(name = "tests")
public Object[][] createTests() {
new FMTest(VariantContext.class, VCF3Codec.class, "VCF3", VCF3_FILE);
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE);
new FMTest(TableFeature.class, BedTableCodec.class, "bedtable", null);
return FMTest.getTests(FMTest.class);
}
@Test(dataProvider = "tests")
public void testGetByFile(FMTest params) {
if ( params.associatedFile != null ) {
FeatureManager.FeatureDescriptor byFile = manager.getByFiletype(params.associatedFile);
Assert.assertNotNull(byFile, "Couldn't find any type associated with file " + params.associatedFile);
params.assertExpected(byFile);
}
}
@Test
public void testGetByFileNoMatch() {
FeatureManager.FeatureDescriptor byFile = manager.getByFiletype(RANDOM_FILE);
Assert.assertNull(byFile, "Found type " + byFile + " associated with RANDOM, non-Tribble file " + RANDOM_FILE);
}
@Test(dataProvider = "tests")
public void testGetters(FMTest params) {
params.assertExpected(manager.getByCodec(params.codec));
params.assertExpected(manager.getByName(params.name));
params.assertExpected(manager.getByName(params.name.toLowerCase()));
params.assertExpected(manager.getByName(params.name.toUpperCase()));
Collection<FeatureManager.FeatureDescriptor> descriptors = manager.getByFeature(params.feature);
Assert.assertTrue(descriptors.size() > 0, "Look up by FeatureClass failed");
}
@Test
public void testUserFriendlyList() {
Assert.assertTrue(manager.userFriendlyListOfAvailableFeatures().length() > 0, "Expected at least one codec to be listed");
Assert.assertTrue(manager.userFriendlyListOfAvailableFeatures().split(",").length > 0, "Expected at least two codecs, but only saw one");
}
@Test
public void testCodecCreation() {
FeatureManager.FeatureDescriptor descriptor = manager.getByName("vcf");
Assert.assertNotNull(descriptor, "Couldn't find VCF feature descriptor!");
FeatureCodec c = manager.createCodec(descriptor, "foo", genomeLocParser);
Assert.assertNotNull(c, "Couldn't create codec");
Assert.assertEquals(c.getClass(), descriptor.getCodecClass());
Assert.assertEquals(c.getFeatureType(), descriptor.getFeatureClass());
}
}

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2010. The Broad Institute
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
@ -11,7 +12,7 @@
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
@ -74,8 +75,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
@Test
public void testBuilder() {
Map<String, Class> classes = builder.getAvailableTrackNamesAndTypes();
Assert.assertTrue(classes.size() > 0);
Assert.assertTrue(builder.getFeatureManager().getFeatureDescriptors().size() > 0);
}
@Test