From 33ae25618674ee8741ffba523d7e3ec9b32471bf Mon Sep 17 00:00:00 2001 From: aaron Date: Thu, 18 Feb 2010 18:43:52 +0000 Subject: [PATCH] a start to some of the infrastructure for Tribble, including dynamic detection of new RMD; not nearly wired in or complete yet. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2855 348d0f76-0448-11de-a6fe-93d51630548a --- .../refdata/tracks/FeatureReaderTrack.java | 94 ++++++++++++ .../gatk/refdata/tracks/QueryableTrack.java | 44 ++++++ .../sting/gatk/refdata/tracks/RMDTrack.java | 88 +++++++++++ .../tracks/RMDTrackCreationException.java | 45 ++++++ .../gatk/refdata/tracks/RMDTrackManager.java | 126 ++++++++++++++++ .../gatk/refdata/tracks/RODRMDTrack.java | 69 +++++++++ .../tracks/builders/RMDTrackBuilder.java | 58 ++++++++ .../builders/TribbleRMDTrackBuilder.java | 134 +++++++++++++++++ .../utils/FeatureToGATKFeatureIterator.java | 62 ++++++++ .../sting/gatk/refdata/utils/GATKFeature.java | 139 ++++++++++++++++++ .../sting/gatk/refdata/utils/RMDTriplet.java | 50 +++++++ .../sting/utils/PluginManager.java | 24 ++- .../refdata/tracks/RMDTrackManagerTest.java | 98 ++++++++++++ .../builders/TribbleRMDTrackBuilderTest.java | 57 +++++++ 14 files changed, 1084 insertions(+), 4 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureReaderTrack.java create mode 100644 java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java create mode 100644 java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java create mode 100644 java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackCreationException.java create mode 100644 java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java create mode 100644 java/src/org/broadinstitute/sting/gatk/refdata/tracks/RODRMDTrack.java create mode 100644 java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java create mode 100644 java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java create mode 100644 java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java create mode 100644 java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java create mode 100644 java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDTriplet.java create mode 100644 java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManagerTest.java create mode 100644 java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureReaderTrack.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureReaderTrack.java new file mode 100644 index 000000000..71df8bb61 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureReaderTrack.java @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.tracks; + +import org.broad.tribble.FeatureReader; +import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.StingException; + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; + + +/** + * + * @author aaron + * + * Class FeatureReaderTrack + * + * A feature reader track, implementing the RMDTrack for tracks that are generated out of Tribble + */ +public class FeatureReaderTrack extends RMDTrack implements QueryableTrack { + // our feature reader - allows queries + private FeatureReader reader; + + /** + * Create a track + * + * @param type the type of track, used for track lookup + * @param name the name of this specific track + * @param file the associated file, for reference or recreating the reader + * @param reader the feature reader to use as the underlying data source + */ + public FeatureReaderTrack(Class type, String name, File file, FeatureReader reader) { + super(type, name, file); + this.reader = reader; + } + + /** + * @return how to get an iterator of the underlying data. This is all a track has to support, + * but other more advanced tracks support the query interface + */ + @Override + public Iterator getIterator() { + try { + return new FeatureToGATKFeatureIterator(reader.iterator(),this.getName()); + } catch (IOException e) { + throw new StingException("Unable to read from file",e); + } + } + + @Override + public Iterator query(GenomeLoc interval) throws IOException { + return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName()); + } + + @Override + public Iterator query(GenomeLoc interval, boolean contained) throws IOException { + return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop(), contained),this.getName()); + } + + @Override + public Iterator query(String contig, int start, int stop) throws IOException { + return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName()); + } + + @Override + public Iterator query(String contig, int start, int stop, boolean contained) throws IOException { + return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop, contained),this.getName()); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java new file mode 100644 index 000000000..ee32e03d1 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.tracks; + +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.io.IOException; +import java.util.Iterator; + +/** + * @author aaron + *

+ * Interface QueryableTrack + *

+ * a decorator interface for tracks that are queryable + */ +public interface QueryableTrack { + public Iterator query(final GenomeLoc interval) throws IOException; + public Iterator query(final GenomeLoc interval, final boolean contained) throws IOException; + public Iterator query(final String contig, final int start, final int stop) throws IOException; + public Iterator query(final String contig, final int start, final int stop, final boolean contained) throws IOException; +} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java new file mode 100644 index 000000000..1d6d64429 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.tracks; + +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; + +import java.io.File; +import java.util.Iterator; + + +/** + * @author aaron + *

+ * Class RMDTrack + *

+ * the basics of what a reference metadata track must contain. + */ +public abstract class RMDTrack { + + // the basics of a track: + private final Class type; // our type + private final String name; // the name + private final File file; // the associated file we create the reader from + + /** + * Create a track + * + * @param type the type of track, used for track lookup + * @param name the name of this specific track + * @param file the associated file, for reference or recreating the reader + */ + protected RMDTrack(Class type, String name, File file) { + this.type = type; + this.name = name; + this.file = file; + } + + public Class getType() { + return type; + } + + public String getName() { + return name; + } + + public File getFile() { + return file; + } + + /** + * @return how to get an iterator of the underlying data. This is all a track has to support, + * but other more advanced tracks support the query interface + */ + public abstract Iterator getIterator(); + + /** + * helper function for determining if we are the same track + * + * @param name the name to match + * @param type the type to match + * + * @return true on a match, false if the name or type is different + */ + public boolean matches(String name, String type) { + return (name.equals(this.name) && type.equals(this.type.getSimpleName())); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackCreationException.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackCreationException.java new file mode 100644 index 000000000..8bab261a7 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackCreationException.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.tracks; + +import org.broadinstitute.sting.utils.StingException; + + +/** + * + * @author aaron + * + * Class RMDTrackCreationException + * + * if we fail for some reason to make a track, throw this exception + */ +public class RMDTrackCreationException extends StingException { + public RMDTrackCreationException(String msg) { + super(msg); + } + + public RMDTrackCreationException(String message, Throwable throwable) { + super(message, throwable); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java new file mode 100644 index 000000000..5dfbf92a6 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.tracks; + +import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; +import org.broadinstitute.sting.utils.PluginManager; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.Utils; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +/** + * @author aaron + *

+ * Class RMDTrackManager + *

+ * Find the available track builders, and create the requisite tracks from the command line. + */ +public class RMDTrackManager extends PluginManager { + // the input strings we use to create RODs from + List inputs = new ArrayList(); + + // create an active mapping of builder instances, and a map of the name -> class for convenience + Map availableTracks; + Map availableTrackClasses; + + /** Create a new track plugin manager. */ + protected RMDTrackManager() { + super(RMDTrackBuilder.class, "TrackBuilders", null); + } + + /** + * find the associated reference meta data + * + * @param triplets the triplets of strings from the -B command line option + * + * @return a list of RMDTracks, one for each -B option + */ + public List getReferenceMetaDataSources(List triplets) { + if (availableTracks == null || availableTrackClasses == null) initialize(triplets); + // try and make the tracks given their requests + return createTracksRequestedTrackObjects(availableTracks, availableTrackClasses); + } + + /** + * initialize our lists of tracks and builders + * @param triplets the input to the GATK, as a list of strings passed in through the -B options + */ + private void initialize(List triplets) { + if (triplets.size() % 3 != 0) throw new StingException("Incorect ROD line " + Utils.join(" ", triplets)); + for (int x = 0; x < triplets.size(); x = x + 3) { + inputs.add(new RMDTriplet(triplets.get(x), triplets.get(x + 1), triplets.get(x + 2))); + } + + // create an active mapping of builder instances, and a map of the name -> class for convenience + availableTracks = new HashMap(); + availableTrackClasses = new HashMap(); + createBuilderObjects(); + + + } + + /** + * create the builder objects from the retrieved list + */ + private void createBuilderObjects() { + // create a track builder instance for each track builder, and find out what tracks we can make + for (String builderName : this.pluginsByName.keySet()) { + RMDTrackBuilder builder = this.createByName(builderName); + for (String name : builder.getAvailableTrackNamesAndTypes().keySet()) { + availableTracks.put(name.toUpperCase(), builder); + availableTrackClasses.put(name.toUpperCase(), builder.getAvailableTrackNamesAndTypes().get(name)); + System.err.println("Adding track " + name.toUpperCase()); + } + } + } + + /** + * create the requested track objects + * + * @param availableTracks the tracks that are available to us, associated with their builder + * @param availableTrackClasses the classes names, with their class description + * + * @return a list of the tracks, one for each of the requested input tracks + */ + private List createTracksRequestedTrackObjects(Map availableTracks, Map availableTrackClasses) { + // create of live instances of the tracks + List tracks = new ArrayList(); + + // create instances of each of the requested types + for (RMDTriplet trip : inputs) { + RMDTrackBuilder b = availableTracks.get(trip.getType()); + if (b == null) throw new StingException("Unable to find track for " + trip.getType()); + tracks.add(b.createInstanceOfTrack(availableTrackClasses.get(trip.getType()), trip.getName(), new File(trip.getFile()))); + } + return tracks; + } +} + diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RODRMDTrack.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RODRMDTrack.java new file mode 100644 index 000000000..0b7e67fbf --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RODRMDTrack.java @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.tracks; + +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; + +import java.io.File; +import java.util.Iterator; + + + +/** + * + * @author aaron + * + * Class RODRMDTrack + * + * wrap a reference ordered data object in the new track style. This will hopefully be phased-out as we move to + * a FeatureReader based system. + */ +public class RODRMDTrack extends RMDTrack { + + // our ROD + private ReferenceOrderedData data; + + /** + * Create a track + * + * @param type the type of track, used for track lookup + * @param name the name of this specific track + * @param file the associated file, for reference or recreating the reader + * @param data the ROD to use as the underlying data source for this track + */ + public RODRMDTrack(Class type, String name, File file, ReferenceOrderedData data) { + super(type, name, file); + this.data = data; + } + + /** + * @return how to get an iterator of the underlying data. This is all a track has to support, + * but other more advanced tracks support the query interface + */ + @Override + public Iterator getIterator() { + return data.iterator(); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java new file mode 100644 index 000000000..ce479446e --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.tracks.builders; + +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException; + +import java.io.File; +import java.util.Map; + + + +/** + * @author aaron + *

+ * Interface RMDTrackBuilder + *

+ * The basic interface for finding and parsing RMDTracks. Track builders present an interface that allows + * the track manager to find and create tracks of the specified type. + */ +public interface RMDTrackBuilder { + + /** @return a list of all available tracks types we currently have access to create */ + public Map getAvailableTrackNamesAndTypes(); + + /** + * create a RMDTrack of the specified type + * + * @param targetClass the target class of track + * @param name what to call the track + * @param inputFile the input file + * + * @return an instance of the track + * @throws RMDTrackCreationException if we don't know of the target class or we couldn't create it + */ + public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException; +} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java new file mode 100644 index 000000000..ead6b9be0 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.tracks.builders; + +import org.broad.tribble.Feature; +import org.broad.tribble.FeatureCodec; +import org.broad.tribble.FeatureReader; +import org.broad.tribble.index.LinearIndex; +import org.broad.tribble.index.LinearIndexCreator; +import org.broadinstitute.sting.gatk.refdata.tracks.FeatureReaderTrack; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException; +import org.broadinstitute.sting.utils.PluginManager; +import org.broadinstitute.sting.utils.StingException; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + + +/** + * + * @author aaron + * + * Class TribbleRMDTrackBuilder + * + * This class keeps track of the available codecs, and knows how to put together a track of + * that gets iterators from the FeatureReader using Tribble. + */ +public class TribbleRMDTrackBuilder extends PluginManager implements RMDTrackBuilder { + + // the linear index extension + private static final String linearIndexExtension = ".idx"; + + /** Create a new plugin manager. */ + public TribbleRMDTrackBuilder() { + super(FeatureCodec.class, "Codecs", "Codec"); + } + + /** @return a list of all available tracks we currently have access to create */ + @Override + public Map getAvailableTrackNamesAndTypes() { + Map classes = new HashMap(); + for (String c : this.pluginsByName.keySet()) + classes.put(c,this.pluginsByName.get(c)); + return classes; + } + + /** + * create a RMDTrack of the specified type + * + * @param targetClass the target class of track + * @param name what to call the track + * @param inputFile the input file + * + * @return an instance of the track + * @throws RMDTrackCreationException + * if we don't know of the target class or we couldn't create it + */ + @Override + public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException { + // make a feature reader + FeatureReader reader; + try { + FeatureCodec codec = this.createByType(targetClass); + + // check to see if the input file has an index + if (!(new File(inputFile.getAbsolutePath() + linearIndexExtension).canRead())) { + LinearIndex index = createIndex(inputFile, codec); + reader = new FeatureReader(inputFile,index, codec); + } + else { + reader = new FeatureReader(inputFile,codec); + } + } catch (FileNotFoundException e) { + throw new StingException("Unable to create reader with file " + inputFile, e); + } catch (IOException e) { + throw new StingException("Unable to make the index file for " + inputFile, e); + } + // return a feature reader track + return new FeatureReaderTrack(targetClass, name, inputFile, reader); + } + + /** + * create an index for the input file + * @param inputFile the input file + * @param codec the codec to use + * @return a linear index for the specified type + * @throws IOException if we cannot write the index file + */ + private LinearIndex createIndex(File inputFile, FeatureCodec codec) throws IOException { + LinearIndexCreator create = new LinearIndexCreator(inputFile, codec); + return create.createIndex(); + } +} + +/** + * a fake Tribble track, used to test out the Tribble interface and feature codec detection + */ +class FakeTribbleTrack implements FeatureCodec { + + @Override + public Feature decode(String s) { + return null; + } + + @Override + public int headerLineCount(File file) { + return 0; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java new file mode 100644 index 000000000..0d31fed75 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.utils; + +import org.broad.tribble.Feature; + +import java.util.Iterator; + + +/** + * + * @author aaron + * + * Class FeatureToGATKFeatureIterator + * + * a wrapper on Tribble feature iterators so that they produce GATKFeatures (which produce GenomeLocs) + */ +public class FeatureToGATKFeatureIterator implements Iterator { + private final Iterator iterator; + private final String name; + + public FeatureToGATKFeatureIterator(Iterator iter, String name) { + this.name = name; + this.iterator = iter; + } + + @Override + public boolean hasNext() { + return iterator.hasNext(); + } + + @Override + public GATKFeature next() { + return new TribbleGATKFeature(iterator.next(),name); + } + + @Override + public void remove() { + throw new UnsupportedOperationException("Why does Iterator have this method? We always throw an exception here"); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java new file mode 100644 index 000000000..c3b3322d7 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.utils; + +import org.broad.tribble.Feature; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; + + +/** + * + * @author aaron + * + * Class GATKFeature + * + * This wraps a Tribble feature or a RODatum so that both present the same interface: a genome loc for position and a + * way of retrieving the track name. + */ +public abstract class GATKFeature implements Feature { + + public GATKFeature(String name) { + this.name = name; + } + + private String name; + + protected void setName(String name) { + this.name = name; + } + + public String getName() { + return name; + } + + public abstract GenomeLoc getLocation(); + + public abstract Object getUnderlyingObject(); +} + +/** + * wrapping a Tribble feature in a GATK friendly interface + */ +class TribbleGATKFeature extends GATKFeature { + private final Feature feature; + + public TribbleGATKFeature(Feature f, String name) { + super(name); + feature = f; + } + public GenomeLoc getLocation() { + return GenomeLocParser.createGenomeLoc(feature.getChr(), feature.getStart(), feature.getEnd()); + } + + /** Return the features reference sequence name, e.g chromosome or contig */ + @Override + public String getChr() { + return feature.getChr(); + } + + /** Return the start position in 1-based coordinates (first base is 1) */ + @Override + public int getStart() { + return feature.getStart(); + } + + /** + * Return the end position following 1-based fully closed conventions. The length of a feature is + * end - start + 1; + */ + @Override + public int getEnd() { + return feature.getEnd(); + } + + public Object getUnderlyingObject() { + return feature; + } +} + +/** + * wrapping a old style rod into the new GATK feature style + */ +class RODGATKFeature extends GATKFeature { + + // our data + private ReferenceOrderedDatum datum; + + public RODGATKFeature(ReferenceOrderedDatum datum) { + super(datum.getName()); + this.datum = datum; + } + + @Override + public GenomeLoc getLocation() { + return datum.getLocation(); + } + + @Override + public Object getUnderlyingObject() { + return datum; + } + + @Override + public String getChr() { + return datum.getLocation().getContig(); + } + + @Override + public int getStart() { + return (int)datum.getLocation().getStart(); + } + + @Override + public int getEnd() { + return (int)datum.getLocation().getStop(); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDTriplet.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDTriplet.java new file mode 100644 index 000000000..5a6d6add6 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDTriplet.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.utils; + + +/** a helper class to manage our triplets of data for the -B command line option (name, type, file) */ +public class RMDTriplet { + private String name; + private String type; + private String file; + + public RMDTriplet(String name, String type, String file) { + this.name = name; + this.type = type; + this.file = file; + } + + public String getName() { + return name; + } + + public String getType() { + return type; + } + + public String getFile() { + return file; + } +} diff --git a/java/src/org/broadinstitute/sting/utils/PluginManager.java b/java/src/org/broadinstitute/sting/utils/PluginManager.java index 12c2ed8e1..aea9c6736 100644 --- a/java/src/org/broadinstitute/sting/utils/PluginManager.java +++ b/java/src/org/broadinstitute/sting/utils/PluginManager.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.utils; -import java.util.Map; -import java.util.List; import java.util.HashMap; +import java.util.List; +import java.util.Map; /** * Manage plugins and plugin configuration. @@ -50,7 +50,7 @@ public abstract class PluginManager { /** - * Gets a plugin with the given name, or null if no plugin exists. + * Gets a plugin with the given name * * @param pluginName Name of the plugin to retrieve. * @return The plugin object if found; null otherwise. @@ -70,7 +70,23 @@ public abstract class PluginManager { } } - + /** + * create a plugin with the given type + * + * @param pluginType type of the plugin to create. + * @return The plugin object if created; null otherwise. + */ + public PluginType createByType(Class pluginType) { + try { + return ((Class) pluginType).newInstance(); + } + catch( InstantiationException ex ) { + throw new StingException(String.format("Unable to instantiate %s",pluginCategory), ex); + } + catch( IllegalAccessException ex ) { + throw new StingException(String.format("Unable to access %s",pluginCategory), ex); + } + } /** * Create the list of available plugins and add them to the database. diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManagerTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManagerTest.java new file mode 100644 index 000000000..1ee417b52 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManagerTest.java @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.tracks; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.utils.StingException; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +/** + * class RMDTrackManagerTest + * tests out the ability of the RMDTrackManager to correctly create RMDtracks based on the requested types. + */ +public class RMDTrackManagerTest extends BaseTest { + List triplets; + List tracks; + + @Before + public void setup() { + RMDTrackManager manager = new RMDTrackManager(); + triplets = new ArrayList(); + + // add our db snp data + triplets.add("MyDbSNP"); + triplets.add("DBSNP"); + triplets.add("testdata/small.dbsnp.rod"); + tracks = manager.getReferenceMetaDataSources(triplets); + } + + @Test + public void testBuilderQuery() { + + + for (RMDTrack t : tracks) { + + System.err.println("name = " + t.getName() + " type = " + t.getType().getSimpleName() + " file = " + t.getFile()); + int count = 0; + Iterator fIter; + try { + fIter = ((FeatureReaderTrack) t).query("1", 1, 5000); + } catch (IOException e) { + throw new StingException("blah I/O exception"); + } + while (fIter.hasNext()) { + fIter.next(); + count++; + } + Assert.assertEquals(100, count); + } + + } + + @Test + public void testBuilderIterator() { + for (RMDTrack t : tracks) { + + System.err.println("name = " + t.getName() + " type = " + t.getType().getSimpleName() + " file = " + t.getFile()); + int count = 0; + Iterator fIter = null; + fIter = t.getIterator(); + while (fIter.hasNext()) { + fIter.next(); + count++; + } + Assert.assertEquals(100, count); + } + + } +} + diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderTest.java new file mode 100644 index 000000000..dc8cd7369 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderTest.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.tracks.builders; + +import org.broadinstitute.sting.BaseTest; +import org.junit.Before; +import org.junit.Test; + +import java.util.Map; + + +/** + * + * @author aaron + * + * Class TribbleRMDTrackBuilderTest + * + * Testing out the builder for tribble Tracks (not really a functional test right now) + */ +public class TribbleRMDTrackBuilderTest extends BaseTest { + private TribbleRMDTrackBuilder builder; + + @Before + public void setup() { + builder = new TribbleRMDTrackBuilder(); + } + + @Test + public void testBuilder() { + Map classes = builder.getAvailableTrackNamesAndTypes(); + for (String c: classes.keySet()) { + System.err.println("class = " + c); + } + //Assert.fail("Fail"); + } +}