2011-10-27 02:11:49 +08:00
|
|
|
/*
|
|
|
|
|
* Copyright (c) 2011, The Broad Institute
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person
|
|
|
|
|
* obtaining a copy of this software and associated documentation
|
|
|
|
|
* files (the "Software"), to deal in the Software without
|
|
|
|
|
* restriction, including without limitation the rights to use,
|
|
|
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
* copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following
|
|
|
|
|
* conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice shall be
|
|
|
|
|
* included in all copies or substantial portions of the Software.
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
|
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
|
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
|
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
|
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
package org.broadinstitute.sting.commandline;
|
|
|
|
|
|
|
|
|
|
import com.google.java.contract.Requires;
|
Rev Tribble to r97, adding binary feature support
From tribble logs:
Binary feature support in tribble
-- Massive refactoring and cleanup
-- Many bug fixes throughout
-- FeatureCodec is now general, with decode etc. taking a PositionBufferedStream
as an argument not a String
-- See ExampleBinaryCodec for an example binary codec
-- AbstractAsciiFeatureCodec provides to its subclass the same String decode,
readHeader functionality before. Old ASCII codecs should inherit from this base
class, and will work without additional modifications
-- Split AsciiLineReader into a position tracking stream
(PositionalBufferedStream). The new AsciiLineReader takes as an argument a
PositionalBufferedStream and provides the readLine() functionality of before.
Could potentially use optimizations (its a TODO in the code)
-- The Positional interface includes some more functionality that's now
necessary to support the more general decoding of binary features
-- FeatureReaders now work using the general FeatureCodec interface, so they can
index binary features
-- Bugfixes to LinearIndexCreator off by 1 error in setting the end block
position
-- Deleted VariantType, since this wasn't used anywhere and it's a particularly
clean why of thinking about the problem
-- Moved DiploidGenotype, which is specific to Gelitext, to the gelitext package
-- TabixReader requires an AsciiFeatureCodec as it's currently only implemented
to handle line oriented records
-- Renamed AsciiFeatureReader to TribbleIndexedFeatureReader now that it handles
Ascii and binary features
-- Removed unused functions here and there as encountered
-- Fixed build.xml to be truly headless
-- FeatureCodec readHeader returns a FeatureCodecHeader obtain that contains a
value and the position in the file where the header ends (not inclusive).
TribbleReaders now skip the header if the position is set, so its no longer
necessary, if one implements the general readHeader(PositionalBufferedStream)
version to see header lines in the decode functions. Necessary for binary
codecs but a nice side benefit for ascii codecs as well
-- Cleaned up the IndexFactory interface so there's a truly general createIndex
function that takes the enumerated index type. Added a writeIndex() function
that writes an index to disk.
-- Vastly expanded the index unit tests and reader tests to really test linear,
interval, and tabix indexed files. Updated test.bed, and created a tabix
version of it as well.
-- Significant BinaryFeaturesTest suite.
-- Some test files have indent changes
2012-05-03 19:02:28 +08:00
|
|
|
import org.broad.tribble.AbstractFeatureReader;
|
2011-10-27 02:11:49 +08:00
|
|
|
import org.broad.tribble.Feature;
|
2011-10-27 03:42:53 +08:00
|
|
|
import org.broad.tribble.FeatureCodec;
|
Rev Tribble to r97, adding binary feature support
From tribble logs:
Binary feature support in tribble
-- Massive refactoring and cleanup
-- Many bug fixes throughout
-- FeatureCodec is now general, with decode etc. taking a PositionBufferedStream
as an argument not a String
-- See ExampleBinaryCodec for an example binary codec
-- AbstractAsciiFeatureCodec provides to its subclass the same String decode,
readHeader functionality before. Old ASCII codecs should inherit from this base
class, and will work without additional modifications
-- Split AsciiLineReader into a position tracking stream
(PositionalBufferedStream). The new AsciiLineReader takes as an argument a
PositionalBufferedStream and provides the readLine() functionality of before.
Could potentially use optimizations (its a TODO in the code)
-- The Positional interface includes some more functionality that's now
necessary to support the more general decoding of binary features
-- FeatureReaders now work using the general FeatureCodec interface, so they can
index binary features
-- Bugfixes to LinearIndexCreator off by 1 error in setting the end block
position
-- Deleted VariantType, since this wasn't used anywhere and it's a particularly
clean why of thinking about the problem
-- Moved DiploidGenotype, which is specific to Gelitext, to the gelitext package
-- TabixReader requires an AsciiFeatureCodec as it's currently only implemented
to handle line oriented records
-- Renamed AsciiFeatureReader to TribbleIndexedFeatureReader now that it handles
Ascii and binary features
-- Removed unused functions here and there as encountered
-- Fixed build.xml to be truly headless
-- FeatureCodec readHeader returns a FeatureCodecHeader obtain that contains a
value and the position in the file where the header ends (not inclusive).
TribbleReaders now skip the header if the position is set, so its no longer
necessary, if one implements the general readHeader(PositionalBufferedStream)
version to see header lines in the decode functions. Necessary for binary
codecs but a nice side benefit for ascii codecs as well
-- Cleaned up the IndexFactory interface so there's a truly general createIndex
function that takes the enumerated index type. Added a writeIndex() function
that writes an index to disk.
-- Vastly expanded the index unit tests and reader tests to really test linear,
interval, and tabix indexed files. Updated test.bed, and created a tabix
version of it as well.
-- Significant BinaryFeaturesTest suite.
-- Some test files have indent changes
2012-05-03 19:02:28 +08:00
|
|
|
import org.broad.tribble.FeatureReader;
|
2011-10-27 03:42:53 +08:00
|
|
|
import org.broad.tribble.readers.AsciiLineReader;
|
2011-10-27 02:11:49 +08:00
|
|
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
2011-10-28 11:54:28 +08:00
|
|
|
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
2011-10-27 02:11:49 +08:00
|
|
|
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
2011-10-27 03:42:53 +08:00
|
|
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
2011-10-27 02:11:49 +08:00
|
|
|
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
|
|
|
|
|
|
|
|
|
import java.io.File;
|
2011-10-27 03:42:53 +08:00
|
|
|
import java.io.FileInputStream;
|
|
|
|
|
import java.io.IOException;
|
2011-10-27 02:11:49 +08:00
|
|
|
import java.util.*;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* An IntervalBinding representing a walker argument that gets bound to either a ROD track or interval string.
|
|
|
|
|
*
|
|
|
|
|
* The IntervalBinding<T> is a formal GATK argument that bridges between a walker and
|
|
|
|
|
* the engine to construct intervals for traversal at runtime. The IntervalBinding can
|
2011-11-22 00:52:39 +08:00
|
|
|
* either be a RodBinding<T>, a string of one interval, or a file with interval strings.
|
2011-10-27 02:11:49 +08:00
|
|
|
* The GATK Engine takes care of initializing the binding when appropriate and determining intervals from it.
|
|
|
|
|
*
|
|
|
|
|
* Note that this class is immutable.
|
|
|
|
|
*/
|
|
|
|
|
public final class IntervalBinding<T extends Feature> {
|
|
|
|
|
|
|
|
|
|
private RodBinding<T> featureIntervals;
|
|
|
|
|
private String stringIntervals;
|
|
|
|
|
|
|
|
|
|
@Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"})
|
|
|
|
|
public IntervalBinding(Class<T> type, final String rawName, final String source, final String tribbleType, final Tags tags) {
|
|
|
|
|
featureIntervals = new RodBinding<T>(type, rawName, source, tribbleType, tags);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Requires({"intervalArgument != null"})
|
|
|
|
|
public IntervalBinding(String intervalArgument) {
|
|
|
|
|
stringIntervals = intervalArgument;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public String getSource() {
|
|
|
|
|
if ( featureIntervals != null )
|
|
|
|
|
return featureIntervals.getSource();
|
|
|
|
|
return stringIntervals;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public List<GenomeLoc> getIntervals(GenomeAnalysisEngine toolkit) {
|
|
|
|
|
List<GenomeLoc> intervals;
|
|
|
|
|
|
|
|
|
|
if ( featureIntervals != null ) {
|
|
|
|
|
intervals = new ArrayList<GenomeLoc>();
|
|
|
|
|
|
2011-10-27 03:42:53 +08:00
|
|
|
// TODO -- after ROD system cleanup, go through the ROD system so that we can handle things like gzipped files
|
|
|
|
|
|
2012-04-04 22:57:05 +08:00
|
|
|
final FeatureCodec codec = new FeatureManager().getByName(featureIntervals.getTribbleType()).getCodec();
|
2011-10-28 11:54:28 +08:00
|
|
|
if ( codec instanceof ReferenceDependentFeatureCodec )
|
|
|
|
|
((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(toolkit.getGenomeLocParser());
|
2011-10-27 03:42:53 +08:00
|
|
|
try {
|
Rev Tribble to r97, adding binary feature support
From tribble logs:
Binary feature support in tribble
-- Massive refactoring and cleanup
-- Many bug fixes throughout
-- FeatureCodec is now general, with decode etc. taking a PositionBufferedStream
as an argument not a String
-- See ExampleBinaryCodec for an example binary codec
-- AbstractAsciiFeatureCodec provides to its subclass the same String decode,
readHeader functionality before. Old ASCII codecs should inherit from this base
class, and will work without additional modifications
-- Split AsciiLineReader into a position tracking stream
(PositionalBufferedStream). The new AsciiLineReader takes as an argument a
PositionalBufferedStream and provides the readLine() functionality of before.
Could potentially use optimizations (its a TODO in the code)
-- The Positional interface includes some more functionality that's now
necessary to support the more general decoding of binary features
-- FeatureReaders now work using the general FeatureCodec interface, so they can
index binary features
-- Bugfixes to LinearIndexCreator off by 1 error in setting the end block
position
-- Deleted VariantType, since this wasn't used anywhere and it's a particularly
clean why of thinking about the problem
-- Moved DiploidGenotype, which is specific to Gelitext, to the gelitext package
-- TabixReader requires an AsciiFeatureCodec as it's currently only implemented
to handle line oriented records
-- Renamed AsciiFeatureReader to TribbleIndexedFeatureReader now that it handles
Ascii and binary features
-- Removed unused functions here and there as encountered
-- Fixed build.xml to be truly headless
-- FeatureCodec readHeader returns a FeatureCodecHeader obtain that contains a
value and the position in the file where the header ends (not inclusive).
TribbleReaders now skip the header if the position is set, so its no longer
necessary, if one implements the general readHeader(PositionalBufferedStream)
version to see header lines in the decode functions. Necessary for binary
codecs but a nice side benefit for ascii codecs as well
-- Cleaned up the IndexFactory interface so there's a truly general createIndex
function that takes the enumerated index type. Added a writeIndex() function
that writes an index to disk.
-- Vastly expanded the index unit tests and reader tests to really test linear,
interval, and tabix indexed files. Updated test.bed, and created a tabix
version of it as well.
-- Significant BinaryFeaturesTest suite.
-- Some test files have indent changes
2012-05-03 19:02:28 +08:00
|
|
|
FeatureReader<Feature> reader = AbstractFeatureReader.getFeatureReader(featureIntervals.getSource(), codec, false);
|
|
|
|
|
for ( Feature feature : reader.iterator() )
|
2011-11-11 06:10:26 +08:00
|
|
|
intervals.add(toolkit.getGenomeLocParser().createGenomeLoc(feature));
|
2012-02-28 03:02:26 +08:00
|
|
|
} catch (Exception e) {
|
|
|
|
|
throw new UserException.MalformedFile(featureIntervals.getSource(), "Problem reading the interval file", e);
|
2011-10-27 02:11:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
intervals = IntervalUtils.parseIntervalArguments(toolkit.getGenomeLocParser(), stringIntervals);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return intervals;
|
|
|
|
|
}
|
2011-11-24 00:45:57 +08:00
|
|
|
|
|
|
|
|
public String toString() {
|
|
|
|
|
return getSource();
|
|
|
|
|
}
|
2011-10-27 02:11:49 +08:00
|
|
|
}
|