From e36994e36bd9028601f75dff4939809fe8747c76 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 8 Aug 2011 14:04:46 -0400 Subject: [PATCH] Refactored a FeatureManager class from RMDTrackBuilder New class handles (vastly more cleanly) the db of tribble codecs, features, and names for use throughout the GATK. Added SelfScopingFeatureCodec interface that allows a FeatureCodec to examine a file and determine if the file can be parsed. This is the first step towards allowing the GATK to dynamically determine the type of a RodBinding. --- .../rmd/ReferenceOrderedDataSource.java | 4 +- .../gatk/refdata/SelfScopingFeatureCodec.java | 48 ++++ .../gatk/refdata/indexer/RMDIndexer.java | 11 +- .../gatk/refdata/tracks/FeatureManager.java | 216 ++++++++++++++++++ .../gatk/refdata/tracks/RMDTrackBuilder.java | 139 +++-------- .../walkers/diffengine/VCFDiffableReader.java | 10 +- .../gatk/GATKExtensionsGenerator.java | 2 +- .../queue/extensions/gatk/RodBindField.java | 70 +++--- .../utils/classloader/PluginManager.java | 2 +- .../utils/codecs/vcf/AbstractVCFCodec.java | 17 +- .../sting/utils/codecs/vcf/VCF3Codec.java | 8 + .../sting/utils/codecs/vcf/VCFCodec.java | 8 +- .../sting/utils/text/ListFileUtils.java | 14 +- .../tracks/FeatureManagerUnitTest.java | 157 +++++++++++++ .../tracks/RMDTrackBuilderUnitTest.java | 8 +- 15 files changed, 540 insertions(+), 174 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.java create mode 100644 public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java create mode 100644 public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java index 6992fc1ff..18679dd77 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java @@ -110,11 +110,11 @@ public class ReferenceOrderedDataSource { } public Class getType() { - return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); + return builder.getFeatureManager().getByTriplet(fileDescriptor).getCodecClass(); } public Class getRecordType() { - return builder.createCodec(getType(),getName()).getFeatureType(); + return builder.getFeatureManager().getByTriplet(fileDescriptor).getFeatureClass(); } public File getFile() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.java new file mode 100644 index 000000000..de781b839 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata; + +import java.io.File; + +/** + * An interface marking that a given Tribble codec can look at the file and determine whether the + * codec specifically parsing the contents of the file. + */ +public interface SelfScopingFeatureCodec { + /** + * This function returns true iff the File potentialInput can be parsed by this + * codec. + * + * The GATK assumes that there's never a situation where two SelfScopingFeaetureCodecs + * return true for the same file. If this occurs the GATK splits out an error. + * + * Note this function must never throw an error. All errors should be trapped + * and false returned. + * + * @param potentialInput the file to test for parsiability with this codec + * @return true if potentialInput can be parsed, false otherwise + */ + public boolean canDecode(final File potentialInput); +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java index 85374757d..029800aea 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java @@ -12,14 +12,13 @@ import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; +import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import java.io.File; import java.io.FileOutputStream; -import java.util.Map; /** * a utility class that can create an index, written to a target location. This is useful when you're unable to write to the directory @@ -83,14 +82,14 @@ public class RMDIndexer extends CommandLineProgram { RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL); // find the types available to the track builders - Map typeMapping = builder.getAvailableTrackNamesAndTypes(); + FeatureManager.FeatureDescriptor descriptor = builder.getFeatureManager().getByName(inputFileType); // check that the type is valid - if (!typeMapping.containsKey(inputFileType)) - throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + Utils.join(",",typeMapping.keySet())); + if (descriptor == null) + throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + builder.getFeatureManager().userFriendlyListOfAvailableFeatures()); // create the codec - FeatureCodec codec = builder.createByType(typeMapping.get(inputFileType)); + FeatureCodec codec = builder.getFeatureManager().createCodec(descriptor, "foo", genomeLocParser); // check if it's a reference dependent feature codec if (codec instanceof ReferenceDependentFeatureCodec) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java new file mode 100644 index 000000000..26a400071 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.tracks; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import org.broad.tribble.Feature; +import org.broad.tribble.FeatureCodec; +import org.broad.tribble.NameAwareCodec; +import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; +import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.util.*; + + +/** + * Class for managing Tribble Feature readers available to the GATK. The features + * are dynamically determined via a PluginManager. This class provides convenient + * getter methods for obtaining FeatureDescriptor objects that collect all of the + * useful information about the Tribble Codec, Feature, and name in one place. + * + * @author depristo + */ +public class FeatureManager { + public static class FeatureDescriptor { + final String name; + final FeatureCodec codec; + + public FeatureDescriptor(final String name, final FeatureCodec codec) { + this.name = name; + this.codec = codec; + } + + public String getName() { + return name; + } + public FeatureCodec getCodec() { + return codec; + } + public Class getCodecClass() { return codec.getClass(); } + public Class getFeatureClass() { return codec.getFeatureType(); } + + @Override + public String toString() { + return String.format("FeatureDescriptor name=%s codec=%s feature=%s", getName(), getCodecClass().getName(), getFeatureClass().getName()); + } + } + + private final PluginManager pluginManager; + private final Collection featureDescriptors = new HashSet(); + + + /** + * Construct a FeatureManager + */ + public FeatureManager() { + pluginManager = new PluginManager(FeatureCodec.class, "Codecs", "Codec"); + + for (final String rawName: pluginManager.getPluginsByName().keySet()) { + FeatureCodec codec = pluginManager.createByName(rawName); + String name = rawName.toUpperCase(); + FeatureDescriptor featureDescriptor = new FeatureDescriptor(name, codec); + featureDescriptors.add(featureDescriptor); + } + } + + /** + * Return the FeatureDescriptor whose getCodecClass().equals(codecClass). + * + * @param codecClass + * @return A FeatureDescriptor or null if none is found + */ + @Requires("codecClass != null") + public FeatureDescriptor getByCodec(Class codecClass) { + for ( FeatureDescriptor descriptor : featureDescriptors ) + if ( descriptor.getCodecClass().equals(codecClass) ) + return descriptor; + return null; + } + + /** + * Returns a collection of FeatureDescriptors that emit records of type featureClass + * + * @param featureClass + * @return A FeatureDescriptor or null if none is found + */ + @Requires("featureClass != null") + public Collection getByFeature(Class featureClass) { + Set consistentDescriptors = new HashSet(); + + if (featureClass == null) + throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object"); + + for ( FeatureDescriptor descriptor : featureDescriptors ) { + if ( featureClass.isAssignableFrom(descriptor.getFeatureClass())) + consistentDescriptors.add(descriptor); + } + return consistentDescriptors; + } + + /** + * Return the FeatureDescriptor with getName().equals(name) + * + * @param name + * @return A FeatureDescriptor or null if none is found + */ + @Requires("name != null") + public FeatureDescriptor getByName(String name) { + for ( FeatureDescriptor descriptor : featureDescriptors ) + if ( descriptor.getName().equalsIgnoreCase(name) ) + return descriptor; + return null; + } + + /** + * Returns the FeatureDescriptor that can read the contexts of File file, is one can be determined + * + * @param file + * @return A FeatureDescriptor or null if none is found + */ + @Requires({"file != null", "file.isFile()", "file.canRead()"}) + public FeatureDescriptor getByFiletype(File file) { + List canParse = new ArrayList(); + for ( FeatureDescriptor descriptor : featureDescriptors ) + if ( descriptor.getCodec() instanceof SelfScopingFeatureCodec ) { + if ( ((SelfScopingFeatureCodec) descriptor.getCodec()).canDecode(file) ) { + canParse.add(descriptor); + } + } + + if ( canParse.size() == 0 ) + return null; + else if ( canParse.size() > 1 ) + throw new ReviewedStingException("BUG: multiple feature descriptors can read file " + file + ": " + canParse); + else + return canParse.get(0); + } + + /** + * Returns the FeatureDescriptor associated with the type described by triplet, or null if none is found + * @param triplet + * @return + */ + @Requires("triplet != null") + public FeatureDescriptor getByTriplet(RMDTriplet triplet) { + return getByName(triplet.getType()); + } + + /** + * @return all of the FeatureDescriptors available to the GATK. Never null + */ + @Ensures("result != null") + public Collection getFeatureDescriptors() { + return Collections.unmodifiableCollection(featureDescriptors); + } + + + /** + * Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load + * @return + */ + @Ensures("result != null") + public String userFriendlyListOfAvailableFeatures() { + List names = new ArrayList(); + for ( final FeatureDescriptor descriptor : featureDescriptors ) + names.add(descriptor.getName()); + return Utils.join(",", names); + } + + /** + * Create a new FeatureCodec of the type described in descriptor, assigning it the + * name (if possible) and providing it the genomeLocParser (where necessary) + * + * @param descriptor FeatureDescriptor of the Tribble FeatureCodec we want to create + * @param name the name to assign this codec + * @return the feature codec itself + */ + @Requires({"descriptor != null", "name != null", "genomeLocParser != null"}) + @Ensures("result != null") + public FeatureCodec createCodec(FeatureDescriptor descriptor, String name, GenomeLocParser genomeLocParser) { + FeatureCodec codex = pluginManager.createByType(descriptor.getCodecClass()); + if ( codex instanceof NameAwareCodec ) + ((NameAwareCodec)codex).setName(name); + if ( codex instanceof ReferenceDependentFeatureCodec ) + ((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser); + return codex; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java index f285f1263..d352894e8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java @@ -65,7 +65,7 @@ import java.util.*; * that gets iterators from the FeatureReader using Tribble. * */ -public class RMDTrackBuilder extends PluginManager { +public class RMDTrackBuilder { // extends PluginManager { /** * our log, which we use to capture anything from this class */ @@ -74,8 +74,6 @@ public class RMDTrackBuilder extends PluginManager { // a constant we use for marking sequence dictionary entries in the Tribble index property list public static final String SequenceDictionaryPropertyPredicate = "DICT:"; - private Map classes = null; - // private sequence dictionary we use to set our tracks with private SAMSequenceDictionary dict = null; @@ -89,6 +87,8 @@ public class RMDTrackBuilder extends PluginManager { */ private ValidationExclusion.TYPE validationExclusionType; + FeatureManager featureManager; + /** * Construct an RMDTrackerBuilder, allowing the user to define tracks to build after-the-fact. This is generally * used when walkers want to directly manage the ROD system for whatever reason. Before using this constructor, @@ -100,66 +100,14 @@ public class RMDTrackBuilder extends PluginManager { public RMDTrackBuilder(SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, ValidationExclusion.TYPE validationExclusionType) { - this(); this.dict = dict; - this.genomeLocParser = genomeLocParser; this.validationExclusionType = validationExclusionType; - - classes = new HashMap(); - for (String name: this.getPluginsByName().keySet()) { - classes.put(name.toUpperCase(), getPluginsByName().get(name)); - } + this.genomeLocParser = genomeLocParser; + featureManager = new FeatureManager(); } - /** - * Limited constructor that produces a builder capable for validating types, but not building tracks - */ - public RMDTrackBuilder() { - super(FeatureCodec.class, "Codecs", "Codec"); - - classes = new HashMap(); - for (String name: this.getPluginsByName().keySet()) { - classes.put(name.toUpperCase(), getPluginsByName().get(name)); - } - } - - - /** @return a list of all available track types we currently have access to create */ - public Map getAvailableTrackNamesAndTypes() { - return Collections.unmodifiableMap(classes); - } - - /** @return a list of all available track record types we currently have access to create */ - public Map getAvailableTrackNamesAndRecordTypes() { - HashMap classToRecord = new HashMap(); - for (String name: this.getPluginsByName().keySet()) { - FeatureCodec codec = this.createByName(name); - classToRecord.put(name.toUpperCase(), codec.getFeatureType()); - } - return classToRecord; - } - - public Class getFeatureCodecClass(RMDTriplet fileDescriptor) { - return getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); - } - - /** - * Returns the FeatureClass (BeagleFeature) produced by an RMDTriplet, or null - * if no such binding is found - * - * @param fileDescriptor - * @return - */ - public Class getFeatureClass(RMDTriplet fileDescriptor) { - return getAvailableTrackNamesAndRecordTypes().get(fileDescriptor.getType().toUpperCase()); - } - - /** - * Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load - * @return - */ - public String getAvailableTribbleFeatureNames() { - return Utils.join(",", getAvailableTrackNamesAndRecordTypes().keySet()); + public FeatureManager getFeatureManager() { + return featureManager; } /** @@ -173,38 +121,33 @@ public class RMDTrackBuilder extends PluginManager { String name = fileDescriptor.getName(); File inputFile = new File(fileDescriptor.getFile()); - Class featureCodecClass = getFeatureCodecClass(fileDescriptor); - if (featureCodecClass == null) + FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByTriplet(fileDescriptor); + if (descriptor == null) throw new UserException.BadArgumentValue("-B",fileDescriptor.getType()); // return a feature reader track Pair pair; if (inputFile.getAbsolutePath().endsWith(".gz")) - pair = createTabixIndexedFeatureSource(featureCodecClass, name, inputFile); + pair = createTabixIndexedFeatureSource(descriptor, name, inputFile); else - pair = getFeatureSource(featureCodecClass, name, inputFile, fileDescriptor.getStorageType()); + pair = getFeatureSource(descriptor, name, inputFile, fileDescriptor.getStorageType()); if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file"); - return new RMDTrack(featureCodecClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(featureCodecClass,name)); + return new RMDTrack(descriptor.getCodecClass(), name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(descriptor, name)); } /** * Convenience method simplifying track creation. Assume unnamed track based on a file rather than a stream. - * @param targetClass Type of Tribble class to build. + * @param codecClass Type of Tribble codec class to build. * @param inputFile Input file type to use. * @return An RMDTrack, suitable for accessing reference metadata. */ - public RMDTrack createInstanceOfTrack(Class targetClass, File inputFile) { - // TODO: Update RMDTriplet to contain an actual class object rather than a name to avoid these gymnastics. - String typeName = null; - for(Map.Entry trackType: getAvailableTrackNamesAndTypes().entrySet()) { - if(trackType.getValue().equals(targetClass)) - typeName = trackType.getKey(); - } + public RMDTrack createInstanceOfTrack(Class codecClass, File inputFile) { + final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass); - if(typeName == null) - throw new ReviewedStingException("Unable to find type name for class " + targetClass.getName()); + if (descriptor == null) + throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName()); - return createInstanceOfTrack(new RMDTriplet("anonymous",typeName,inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags())); + return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags())); } /** @@ -212,16 +155,16 @@ public class RMDTrackBuilder extends PluginManager { * reader of the appropriate type will figure out what the right index type is, and determine if it * exists. * - * @param targetClass the codec class type + * @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create * @param name the name of the track * @param inputFile the file to load * @return a feature reader implementation */ - private Pair createTabixIndexedFeatureSource(Class targetClass, String name, File inputFile) { + private Pair createTabixIndexedFeatureSource(FeatureManager.FeatureDescriptor descriptor, String name, File inputFile) { // we might not know the index type, try loading with the default reader constructor logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file"); try { - return new Pair(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(targetClass, name)),null); + return new Pair(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(descriptor, name)),null); } catch (TribbleException e) { throw new UserException(e.getMessage(), e); } @@ -229,28 +172,26 @@ public class RMDTrackBuilder extends PluginManager { /** * add a name to the codec, if it takes one - * @param targetClass the class to create a codec for + * @param descriptor the class to create a codec for * @param name the name to assign this codec * @return the feature codec itself */ - public FeatureCodec createCodec(Class targetClass, String name) { - FeatureCodec codex = this.createByType(targetClass); - if ( codex instanceof NameAwareCodec ) - ((NameAwareCodec)codex).setName(name); - if(codex instanceof ReferenceDependentFeatureCodec) - ((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser); - return codex; + private FeatureCodec createCodec(FeatureManager.FeatureDescriptor descriptor, String name) { + return featureManager.createCodec(descriptor, name, genomeLocParser); } /** * create a feature source object given: - * @param targetClass the target class + * @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create * @param name the name of the codec * @param inputFile the tribble file to parse * @param storageType How the RMD is streamed into the input file. * @return the input file as a FeatureReader */ - private Pair getFeatureSource(Class targetClass, String name, File inputFile, RMDStorageType storageType) { + private Pair getFeatureSource(FeatureManager.FeatureDescriptor descriptor, + String name, + File inputFile, + RMDStorageType storageType) { // Feature source and sequence dictionary to use as the ultimate reference FeatureSource featureSource = null; SAMSequenceDictionary sequenceDictionary = null; @@ -260,7 +201,7 @@ public class RMDTrackBuilder extends PluginManager { if(canBeIndexed) { try { - Index index = loadIndex(inputFile, createCodec(targetClass, name)); + Index index = loadIndex(inputFile, createCodec(descriptor, name)); try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); } catch (ReviewedStingException e) { } @@ -273,7 +214,7 @@ public class RMDTrackBuilder extends PluginManager { sequenceDictionary = getSequenceDictionaryFromProperties(index); } - featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(targetClass, name)); + featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name)); } catch (TribbleException e) { throw new UserException(e.getMessage()); @@ -283,7 +224,7 @@ public class RMDTrackBuilder extends PluginManager { } } else { - featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(targetClass, name),false); + featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(descriptor, name),false); } return new Pair(featureSource,sequenceDictionary); @@ -418,22 +359,6 @@ public class RMDTrackBuilder extends PluginManager { return idx; } - /** - * Returns a collection of track names that match the record type. - * @param trackRecordType the record type specified in the @RMD annotation - * @return a collection of available track record type names that match the record type - */ - public Collection getTrackRecordTypeNames(Class trackRecordType) { - Set names = new TreeSet(); - if (trackRecordType == null) - throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object"); - - for (Map.Entry availableTrackRecordType: getAvailableTrackNamesAndRecordTypes().entrySet()) { - if (availableTrackRecordType.getValue() != null && trackRecordType.isAssignableFrom(availableTrackRecordType.getValue())) - names.add(availableTrackRecordType.getKey()); - } - return names; - } // --------------------------------------------------------------------------------------------------------- // static functions to work with the sequence dictionaries of indexes diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index 77a992ce0..a447d17af 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -129,14 +129,6 @@ public class VCFDiffableReader implements DiffableReader { @Override public boolean canRead(File file) { - try { - final String VCF4_HEADER = "##fileformat=VCFv4"; - char[] buff = new char[VCF4_HEADER.length()]; - new FileReader(file).read(buff, 0, VCF4_HEADER.length()); - String firstLine = new String(buff); - return firstLine.startsWith(VCF4_HEADER); - } catch ( IOException e ) { - return false; - } + return AbstractVCFCodec.canDecodeFile(file, VCFCodec.VCF4_MAGIC_HEADER); } } diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java index e5974e165..9578eda84 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java @@ -157,7 +157,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram { List argumentFields = new ArrayList(); argumentFields.addAll(ArgumentDefinitionField.getArgumentFields(parser,walkerType)); - argumentFields.addAll(RodBindField.getRodArguments(walkerType, trackBuilder)); + //argumentFields.addAll(RodBindField.getRodArguments(walkerType, trackBuilder)); argumentFields.addAll(ReadFilterField.getFilterArguments(parser,walkerType)); String constructor = String.format("analysisName = \"%1$s\"%nanalysis_type = \"%1$s\"%n", walkerName); diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java index 02d2fd0a8..baf083575 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java @@ -91,39 +91,39 @@ public class RodBindField extends ArgumentField { } return exclusiveOf.toString(); } - - public static List getRodArguments(Class walkerClass, RMDTrackBuilder trackBuilder) { - List argumentFields = new ArrayList(); - - List requires = WalkerManager.getRequiredMetaData(walkerClass); - List allows = WalkerManager.getAllowsMetaData(walkerClass); - - for (RMD required: requires) { - List fields = new ArrayList(); - String trackName = required.name(); - if ("*".equals(trackName)) { - // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers - //fields.add(new RodBindArgumentField(argumentDefinition, true)); - } else { - for (String typeName: trackBuilder.getTrackRecordTypeNames(required.type())) - fields.add(new RodBindField(trackName, typeName, fields, true)); - } - argumentFields.addAll(fields); - } - - for (RMD allowed: allows) { - List fields = new ArrayList(); - String trackName = allowed.name(); - if ("*".equals(trackName)) { - // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers - //fields.add(new RodBindArgumentField(argumentDefinition, false)); - } else { - for (String typeName: trackBuilder.getTrackRecordTypeNames(allowed.type())) - fields.add(new RodBindField(trackName, typeName, fields, true)); - } - argumentFields.addAll(fields); - } - - return argumentFields; - } +// +// public static List getRodArguments(Class walkerClass, RMDTrackBuilder trackBuilder) { +// List argumentFields = new ArrayList(); +// +// List requires = WalkerManager.getRequiredMetaData(walkerClass); +// List allows = WalkerManager.getAllowsMetaData(walkerClass); +// +// for (RMD required: requires) { +// List fields = new ArrayList(); +// String trackName = required.name(); +// if ("*".equals(trackName)) { +// // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers +// //fields.add(new RodBindArgumentField(argumentDefinition, true)); +// } else { +// for (String typeName: trackBuilder.getFeatureManager().getTrackRecordTypeNames(required.type())) +// fields.add(new RodBindField(trackName, typeName, fields, true)); +// } +// argumentFields.addAll(fields); +// } +// +// for (RMD allowed: allows) { +// List fields = new ArrayList(); +// String trackName = allowed.name(); +// if ("*".equals(trackName)) { +// // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers +// //fields.add(new RodBindArgumentField(argumentDefinition, false)); +// } else { +// for (String typeName: trackBuilder.getFeatureManager().getTrackRecordTypeNames(allowed.type())) +// fields.add(new RodBindField(trackName, typeName, fields, true)); +// } +// argumentFields.addAll(fields); +// } +// +// return argumentFields; +// } } diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java index 8d37ff573..04cbef0c3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java @@ -172,7 +172,7 @@ public class PluginManager { } } - protected Map> getPluginsByName() { + public Map> getPluginsByName() { return Collections.unmodifiableMap(pluginsByName); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 9788f8654..cb505c717 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -7,16 +7,20 @@ import org.broad.tribble.NameAwareCodec; import org.broad.tribble.TribbleException; import org.broad.tribble.readers.LineReader; import org.broad.tribble.util.ParsingUtils; +import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; import java.util.*; -public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, VCFParser { +public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, VCFParser, SelfScopingFeatureCodec { protected final static Logger log = Logger.getLogger(VCFCodec.class); protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column @@ -616,4 +620,15 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, return inputVC; } + + public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) { + try { + char[] buff = new char[MAGIC_HEADER_LINE.length()]; + new FileReader(potentialInput).read(buff, 0, MAGIC_HEADER_LINE.length()); + String firstLine = new String(buff); + return firstLine.startsWith(MAGIC_HEADER_LINE); + } catch ( IOException e ) { + return false; + } + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java index c29f2ba8b..ea16595bb 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java @@ -7,6 +7,8 @@ import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import java.io.File; +import java.io.FileReader; import java.io.IOException; import java.util.*; @@ -16,6 +18,8 @@ import java.util.*; * quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters. */ public class VCF3Codec extends AbstractVCFCodec { + public final static String VCF3_MAGIC_HEADER = "##fileformat=VCFv3"; + /** * @param reader the line reader to take header lines from @@ -178,4 +182,8 @@ public class VCF3Codec extends AbstractVCFCodec { return genotypes; } + @Override + public boolean canDecode(final File potentialInput) { + return canDecodeFile(potentialInput, VCF3_MAGIC_HEADER); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java index 05fff5d9e..55a0eb3f9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java @@ -7,6 +7,8 @@ import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import java.io.File; +import java.io.FileReader; import java.io.IOException; import java.util.*; @@ -16,6 +18,7 @@ import java.util.*; * quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters. */ public class VCFCodec extends AbstractVCFCodec { + public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4"; /** * @param reader the line reader to take header lines from @@ -184,5 +187,8 @@ public class VCFCodec extends AbstractVCFCodec { return genotypes; } - + @Override + public boolean canDecode(final File potentialInput) { + return canDecodeFile(potentialInput, VCF4_MAGIC_HEADER); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index 82a8f86d9..79271464b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -28,7 +28,7 @@ import org.broadinstitute.sting.commandline.ParsingEngine; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -134,7 +134,7 @@ public class ListFileUtils { public static Collection unpackRODBindings(final Collection RODBindings, final ParsingEngine parser) { // todo -- this is a strange home for this code. Move into ROD system Collection rodBindings = new ArrayList(); - RMDTrackBuilder builderForValidation = new RMDTrackBuilder(); + FeatureManager builderForValidation = new FeatureManager(); for (RodBinding rodBinding: RODBindings) { String argValue = rodBinding.getSource(); @@ -153,15 +153,15 @@ public class ListFileUtils { RMDTriplet triplet = new RMDTriplet(name,type,fileName,storageType,rodBinding.getTags()); // validate triplet type - Class typeFromTribble = builderForValidation.getFeatureClass(triplet); - if ( typeFromTribble == null ) + FeatureManager.FeatureDescriptor descriptor = builderForValidation.getByTriplet(triplet); + if ( descriptor == null ) throw new UserException.UnknownTribbleType(rodBinding.getTribbleType(), String.format("Field %s had provided type %s but there's no such Tribble type. Available types are %s", - rodBinding.getName(), rodBinding.getTribbleType(), builderForValidation.getAvailableTribbleFeatureNames())); - if ( ! rodBinding.getType().isAssignableFrom(typeFromTribble) ) + rodBinding.getName(), rodBinding.getTribbleType(), builderForValidation.userFriendlyListOfAvailableFeatures())); + if ( ! rodBinding.getType().isAssignableFrom(descriptor.getFeatureClass()) ) throw new UserException.BadArgumentValue(rodBinding.getName(), String.format("Field %s expected type %s, but the type of the input file provided on the command line was %s", - rodBinding.getName(), rodBinding.getType(), typeFromTribble)); + rodBinding.getName(), rodBinding.getType(), descriptor.getName())); rodBindings.add(triplet); diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java new file mode 100644 index 000000000..5d662ffed --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.tracks; + + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broad.tribble.Feature; +import org.broad.tribble.FeatureCodec; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.refdata.features.table.BedTableCodec; +import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec; +import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.*; +import java.util.*; + + +/** + * @author depristo + * + * UnitTests for RMD FeatureManager + */ +public class FeatureManagerUnitTest extends BaseTest { + private static final File RANDOM_FILE = new File(validationDataLocation + "exampleGATKReport.eval"); + private static final File VCF3_FILE = new File(validationDataLocation + "vcfexample3.vcf"); + private static final File VCF4_FILE = new File(validationDataLocation + "vcf4.1.example.vcf"); + + private FeatureManager manager; + private GenomeLocParser genomeLocParser; + + @BeforeMethod + public void setup() { + File referenceFile = new File(b36KGReference); + try { + IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile); + genomeLocParser = new GenomeLocParser(seq); + manager = new FeatureManager(); + } + catch(FileNotFoundException ex) { + throw new UserException.CouldNotReadInputFile(referenceFile,ex); + } + } + + @Test + public void testManagerCreation() { + Assert.assertTrue(manager.getFeatureDescriptors().size() > 0); + } + + private class FMTest extends BaseTest.TestDataProvider { + public Class codec; + public Class feature; + public String name; + public File associatedFile; + + private FMTest(final Class feature, final Class codec, final String name, final File file) { + super(FMTest.class); + this.codec = codec; + this.feature = feature; + this.name = name; + this.associatedFile = file; + } + + public void assertExpected(FeatureManager.FeatureDescriptor featureDescriptor) { + Assert.assertEquals(featureDescriptor.getCodecClass(), codec); + Assert.assertEquals(featureDescriptor.getFeatureClass(), feature); + Assert.assertEquals(featureDescriptor.getName().toLowerCase(), name.toLowerCase()); + } + + public String toString() { + return String.format("FMTest name=%s codec=%s feature=%s file=%s", name, codec, feature, associatedFile); + } + } + + @DataProvider(name = "tests") + public Object[][] createTests() { + new FMTest(VariantContext.class, VCF3Codec.class, "VCF3", VCF3_FILE); + new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE); + new FMTest(TableFeature.class, BedTableCodec.class, "bedtable", null); + return FMTest.getTests(FMTest.class); + } + + @Test(dataProvider = "tests") + public void testGetByFile(FMTest params) { + if ( params.associatedFile != null ) { + FeatureManager.FeatureDescriptor byFile = manager.getByFiletype(params.associatedFile); + Assert.assertNotNull(byFile, "Couldn't find any type associated with file " + params.associatedFile); + params.assertExpected(byFile); + } + } + + @Test + public void testGetByFileNoMatch() { + FeatureManager.FeatureDescriptor byFile = manager.getByFiletype(RANDOM_FILE); + Assert.assertNull(byFile, "Found type " + byFile + " associated with RANDOM, non-Tribble file " + RANDOM_FILE); + } + + @Test(dataProvider = "tests") + public void testGetters(FMTest params) { + params.assertExpected(manager.getByCodec(params.codec)); + params.assertExpected(manager.getByName(params.name)); + params.assertExpected(manager.getByName(params.name.toLowerCase())); + params.assertExpected(manager.getByName(params.name.toUpperCase())); + + Collection descriptors = manager.getByFeature(params.feature); + Assert.assertTrue(descriptors.size() > 0, "Look up by FeatureClass failed"); + } + + @Test + public void testUserFriendlyList() { + Assert.assertTrue(manager.userFriendlyListOfAvailableFeatures().length() > 0, "Expected at least one codec to be listed"); + Assert.assertTrue(manager.userFriendlyListOfAvailableFeatures().split(",").length > 0, "Expected at least two codecs, but only saw one"); + } + + @Test + public void testCodecCreation() { + FeatureManager.FeatureDescriptor descriptor = manager.getByName("vcf"); + Assert.assertNotNull(descriptor, "Couldn't find VCF feature descriptor!"); + + FeatureCodec c = manager.createCodec(descriptor, "foo", genomeLocParser); + Assert.assertNotNull(c, "Couldn't create codec"); + Assert.assertEquals(c.getClass(), descriptor.getCodecClass()); + Assert.assertEquals(c.getFeatureType(), descriptor.getFeatureClass()); + } + +} + diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java index 70d2e7a85..ae218e898 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java @@ -1,5 +1,6 @@ /* - * Copyright (c) 2010. The Broad Institute + * Copyright (c) 2011, The Broad Institute + * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without @@ -11,7 +12,7 @@ * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT @@ -74,8 +75,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest { @Test public void testBuilder() { - Map classes = builder.getAvailableTrackNamesAndTypes(); - Assert.assertTrue(classes.size() > 0); + Assert.assertTrue(builder.getFeatureManager().getFeatureDescriptors().size() > 0); } @Test