The first step in integrating Jim's tree based index scheme:

- changed to a better method for getting headers from Codecs
- some removal of old commented out code in the GATKAgrumentCollection
- changes for the rename of FeatureReader to FeatureSource
- removed the old Beagle ROD
- cleaned up some of the code in SampleUtils

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3826 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-07-19 04:49:27 +00:00
parent 40a963541d
commit f4cfb0f990
30 changed files with 118 additions and 312 deletions

View File

@ -3,18 +3,17 @@ package org.broad.tribble.vcf;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.util.LineReader;
import org.broad.tribble.readers.LineReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
*
* User: delangel
* Date: Jul 13, 2010
* Time: 3:57:01 PM
* To change this template use File | Settings | File Templates.
*
* The reader for VCF 3 files
*/
public class VCF3Codec implements FeatureCodec {
@ -49,6 +48,8 @@ public class VCF3Codec implements FeatureCodec {
}
private Feature reallyDecode(String line, boolean justLocationPlease ) {
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
if (line.startsWith("#")) return null;
// transform the line, if we have a transform to do
if (transformer != null) line = transformer.lineTransform(line);
if (line.startsWith("#"))
@ -65,7 +66,7 @@ public class VCF3Codec implements FeatureCodec {
*
* @return 0
*/
public int readHeader(LineReader reader) {
public Object readHeader(LineReader reader) {
String line = "";
try {
while ((line = reader.readLine()) != null) {
@ -75,7 +76,7 @@ public class VCF3Codec implements FeatureCodec {
else if (line.startsWith("#")) {
headerStrings.add(line);
header = VCFReaderUtils.createHeader(headerStrings,version);
return headerStrings.size();
return header;
}
else {
throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file");
@ -95,12 +96,6 @@ public class VCF3Codec implements FeatureCodec {
return VCFRecord.class;
}
public VCFHeader getHeader(Class clazz) throws ClassCastException {
if (!clazz.equals(VCFHeader.class))
throw new ClassCastException("Unable to cast to expected type " + clazz + " from type " + VCFHeader.class);
return header;
}
public static interface LineTransform {
public String lineTransform(String line);
}

View File

@ -1,9 +1,5 @@
package org.broad.tribble.vcf;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.util.LineReader;
import org.broadinstitute.sting.gatk.refdata.features.vcf4.VCF4Codec;
import java.io.IOException;

View File

@ -167,11 +167,6 @@ public class GATKArgumentCollection {
@Argument(fullName = "interval_merging", shortName = "im", doc = "What interval merging rule should we use.", required = false)
public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
/** Should we enable rodWalkers? This is currently unsafe */
// @Element(required = false)
// @Argument(fullName = "enableRodWalkers", shortName = "erw", doc = "Enable experimental rodWalker support. TEMPORARY HACK TO ALLOW EXPERIMENTATION WITH ROD WALKERS. [default is false]}.", required = false)
// public boolean enableRodWalkers = false;
@ElementList(required = false)
@Argument(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
public List<String> readGroupBlackList = null;

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.FeatureSource;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.TribbleTrack;
@ -177,7 +177,7 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
/**
* a data pool for the new query based RODs
*/
class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureReader, LocationAwareSeekableRODIterator> {
class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureSource, LocationAwareSeekableRODIterator> {
// the reference-ordered data itself.
private final RMDTrack rod;
@ -193,19 +193,19 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureReader, Location
}
@Override
protected FeatureReader createNewResource() {
protected FeatureSource createNewResource() {
return builder.createFeatureReader(rod.getType(),rod.getFile()).first;
}
@Override
protected FeatureReader selectBestExistingResource(DataStreamSegment segment, List<FeatureReader> availableResources) {
for (FeatureReader reader : availableResources)
protected FeatureSource selectBestExistingResource(DataStreamSegment segment, List<FeatureSource> availableResources) {
for (FeatureSource reader : availableResources)
if (reader != null) return reader;
return null;
}
@Override
protected LocationAwareSeekableRODIterator createIteratorFromResource(DataStreamSegment position, FeatureReader resource) {
protected LocationAwareSeekableRODIterator createIteratorFromResource(DataStreamSegment position, FeatureSource resource) {
try {
if (position instanceof MappedStreamSegment) {
GenomeLoc pos = ((MappedStreamSegment) position).locus;
@ -219,7 +219,7 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureReader, Location
}
@Override
protected void closeResource(FeatureReader resource) {
protected void closeResource(FeatureSource resource) {
try {
resource.close();
} catch (IOException e) {

View File

@ -1,112 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.io.IOException;
import java.io.File;
import java.io.FileNotFoundException;
public class BeagleROD extends BasicReferenceOrderedDatum {
GenomeLoc loc;
List<String> sampleNames = null;
Map<String, List<String>> sampleGenotypes = new HashMap<String, List<String>>();
public BeagleROD(String name) {
super(name);
}
public String toString() { return "BeagleRod"; }
public String delimiterRegex() {
return " ";
}
public GenomeLoc getLocation() {
return loc;
}
public List<String> getSampleNames() {
return sampleNames;
}
public Map<String, List<String>> getGenotypes() {
return sampleGenotypes;
}
public Object initialize(final File source) throws FileNotFoundException {
String firstLine = new XReadLines(source).next();
String[] parts = firstLine.split(" ");
if ( parts[0].equals("I") ) {
// I id NA12891 NA12891 NA12892 NA12892
sampleNames = Arrays.asList(parts).subList(2, parts.length);
return sampleNames;
} else {
throw new IllegalStateException("Beagle file " + source + " doesn't have required header line I");
}
}
private static Pattern MARKER_PATTERN = Pattern.compile("c(.+)_p([0-9]+)");
public static GenomeLoc parseMarkerName(String markerName) {
Matcher m = MARKER_PATTERN.matcher(markerName);
if ( m.matches() ) {
String contig = m.group(1);
long start = Long.valueOf(m.group(2));
return GenomeLocParser.createGenomeLoc(contig, start, start);
} else {
throw new IllegalArgumentException("Malformatted family structure string: " + markerName + " required format is mom+dad=child");
}
}
public boolean parseLine(final Object header, final String[] parts) throws IOException {
//System.out.printf("Parsing beagle parts=%s header=%s%n", parts, header);
List<String> sampleNames = (List<String>)header;
if ( parts.length == 0 || ! parts[0].equals("M") )
return false;
else {
loc = parseMarkerName(parts[1]);
for ( int i = 2; i < parts.length; i++ ) {
String sampleName = sampleNames.get(i-2);
if ( ! sampleGenotypes.containsKey(sampleName) ) {
sampleGenotypes.put(sampleName, new ArrayList<String>());
}
sampleGenotypes.get(sampleName).add(parts[i]);
}
return true;
}
}
}

View File

@ -34,8 +34,8 @@ import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.util.AsciiLineReader;
import org.broad.tribble.util.LineReader;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
@ -55,7 +55,7 @@ public class AnnotatorInputTableCodec implements FeatureCodec<AnnotatorInputTabl
*
* @return The # of header lines for this file.
*/
public int readHeader(LineReader reader)
public Object readHeader(LineReader reader)
{
int[] lineCounter = new int[1];
try {
@ -63,18 +63,13 @@ public class AnnotatorInputTableCodec implements FeatureCodec<AnnotatorInputTabl
} catch(IOException e) {
throw new IllegalArgumentException("Unable to read from file.", e);
}
return lineCounter[0];
return header;
}
public Class<AnnotatorInputTableFeature> getFeatureType() {
return AnnotatorInputTableFeature.class;
}
@Override
public <HeaderType> HeaderType getHeader(Class<HeaderType> clazz) throws ClassCastException {
return null; // TODO: do we want the header to be a concrete type?
}
@Override
public Feature decodeLoc(String line) {
int tabIndex = line.indexOf(DELIMITER);

View File

@ -1,8 +1,4 @@
package org.broadinstitute.sting.gatk.refdata.features.beagle;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature;
/*
* Copyright (c) 2010 The Broad Institute
*
@ -29,22 +25,20 @@ import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTa
*/
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.util.AsciiLineReader;
import org.broad.tribble.util.LineReader;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
public class BeagleCodec implements FeatureCodec<BeagleFeature> {
private String[] header;
@ -71,7 +65,7 @@ public class BeagleCodec implements FeatureCodec<BeagleFeature> {
}
}
public int readHeader(LineReader reader)
public Object readHeader(LineReader reader)
{
int[] lineCounter = new int[1];
try {
@ -118,7 +112,7 @@ public class BeagleCodec implements FeatureCodec<BeagleFeature> {
} catch(IOException e) {
throw new IllegalArgumentException("Unable to read from file.", e);
}
return lineCounter[0];
return header;
}
private static String[] readHeader(final LineReader source, int[] lineCounter) throws IOException {
@ -169,11 +163,6 @@ public class BeagleCodec implements FeatureCodec<BeagleFeature> {
return BeagleFeature.class;
}
@Override
public <HeaderType> HeaderType getHeader(Class<HeaderType> clazz) throws ClassCastException {
return null; // we haven't stored the header
}
public BeagleFeature decode(String line) {
String[] tokens;

View File

@ -2,11 +2,7 @@ package org.broadinstitute.sting.gatk.refdata.features.refseq;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.util.LineReader;
import org.broadinstitute.sting.gatk.refdata.BasicReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.Transcript;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
@ -64,17 +60,12 @@ public class RefSeqCodec implements FeatureCodec {
}
@Override
public int readHeader(LineReader reader) {
return 0; //To change body of implemented methods use File | Settings | File Templates.
public Object readHeader(LineReader reader) {
return null;
}
@Override
public Class getFeatureType() {
return RefSeqCodec.class;
}
@Override
public Object getHeader(Class clazz) throws ClassCastException {
return null; // we don't have a header
}
}

View File

@ -28,8 +28,8 @@ package org.broadinstitute.sting.gatk.refdata.features.sampileup;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.Feature;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.ParsingUtils;
import org.broad.tribble.util.LineReader;
import java.util.ArrayList;
import java.util.regex.Pattern;
@ -61,9 +61,9 @@ public class SAMPileupCodec implements FeatureCodec<SAMPileupFeature> {
* @param reader the line reader
* @return 0 in this case, we assume no header lines.
*/
public int readHeader(LineReader reader) {
public Object readHeader(LineReader reader) {
// we don't require a header line, but it may exist. We'll deal with that above.
return 0;
return null;
}
@Override
@ -71,11 +71,6 @@ public class SAMPileupCodec implements FeatureCodec<SAMPileupFeature> {
return SAMPileupFeature.class;
}
@Override
public <HeaderType> HeaderType getHeader(Class<HeaderType> clazz) throws ClassCastException {
return null; // we don't have a header
}
public Feature decodeLoc(String line) {
return decode(line);
}

View File

@ -27,8 +27,8 @@ package org.broadinstitute.sting.gatk.refdata.features.samread;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.ParsingUtils;
import org.broad.tribble.util.LineReader;
import net.sf.samtools.util.StringUtil;
import net.sf.samtools.TextCigarCodec;
import net.sf.samtools.Cigar;
@ -52,9 +52,9 @@ public class SAMReadCodec implements FeatureCodec<SAMReadFeature> {
* @return 0 in this case, we assume no header lines. The reads file may have a
* header line beginning with '@', but we can ignore that in the decode function.
*/
public int readHeader(LineReader reader) {
public Object readHeader(LineReader reader) {
// we don't require a header line, but it may exist. We'll deal with that above.
return 0;
return null;
}
@Override
@ -62,11 +62,6 @@ public class SAMReadCodec implements FeatureCodec<SAMReadFeature> {
return SAMReadFeature.class;
}
@Override
public <HeaderType> HeaderType getHeader(Class<HeaderType> clazz) throws ClassCastException {
return null; // we haven't stored the header
}
public Feature decodeLoc(String line) {
return decode(line);
}

View File

@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.refdata.features.vcf4;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.util.LineReader;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.ParsingUtils;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
@ -74,7 +74,7 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
* @return the number of header lines
*/
@Override
public int readHeader(LineReader reader) {
public Object readHeader(LineReader reader) {
List<String> headerStrings = new ArrayList<String>();
String line = "";
@ -115,7 +115,7 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
* @param line the single # line (column names)
* @return the count of header lines
*/
public int createHeader(List<String> headerStrings, String line) {
public Object createHeader(List<String> headerStrings, String line) {
headerStrings.add(line);
header = VCFReaderUtils.createHeader(headerStrings, this.version);
@ -134,7 +134,7 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
// sort the lists so we can binary search them later on
Collections.sort(filterFields);
return headerStrings.size();
return header;
}
/**
@ -156,6 +156,9 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
}
private Feature reallyDecode(String line, boolean parseGenotypes) {
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
if (line.startsWith("#")) return null;
if (parts == null)
parts = new String[header.getColumnCount()];
@ -598,18 +601,6 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
public Class getFeatureType() {
return VariantContext.class;
}
/**
* get the header
* @param clazz the class were expecting
* @return our VCFHeader
* @throws ClassCastException
*/
@Override
public VCFHeader getHeader(Class clazz) throws ClassCastException {
if (clazz != VCFHeader.class) throw new ClassCastException("expecting class " + clazz + " but VCF4Codec provides " + VCFHeader.class);
return this.header;
}
/**
* get the name of this codec

View File

@ -110,17 +110,6 @@ public abstract class RMDTrack {
return null; // default, others can override this
}
/**
* ask for the header, supplying the expected type. Overridden in track types
* @param clazz the class of the expected type
* @param <HeaderType> the expected type
* @return a object of type HeaderType
* @throws ClassCastException if the class provided doesn't match our header type
*/
public <HeaderType> HeaderType getHeader(Class<HeaderType> clazz) throws ClassCastException {
return null;
}
public Object getHeader() {
return null;
}

View File

@ -25,7 +25,8 @@ package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.FeatureSource;
import org.broad.tribble.source.BasicFeatureSource;
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -45,7 +46,7 @@ import java.io.IOException;
*/
public class TribbleTrack extends RMDTrack implements QueryableTrack {
// our feature reader - allows queries
private FeatureReader reader;
private BasicFeatureSource reader;
// our sequence dictionary, which can be null
private final SAMSequenceDictionary dictionary;
@ -60,7 +61,7 @@ public class TribbleTrack extends RMDTrack implements QueryableTrack {
* @param reader the feature reader to use as the underlying data source
* @param dict the sam sequence dictionary
*/
public TribbleTrack(Class type, Class recordType, String name, File file, FeatureReader reader, SAMSequenceDictionary dict) {
public TribbleTrack(Class type, Class recordType, String name, File file, BasicFeatureSource reader, SAMSequenceDictionary dict) {
super(type, recordType, name, file);
this.reader = reader;
this.dictionary = dict;
@ -119,7 +120,7 @@ public class TribbleTrack extends RMDTrack implements QueryableTrack {
reader = null;
}
public FeatureReader getReader() {
public FeatureSource getReader() {
return reader;
}
@ -131,14 +132,7 @@ public class TribbleTrack extends RMDTrack implements QueryableTrack {
return dictionary;
}
/**
* ask for the header, supplying the expected type. Overridden in track types
* @param clazz the class of the expected type
* @param <HeaderType> the expected type
* @return a object of type HeaderType
* @throws ClassCastException if the class provided doesn't match our header type
*/
public <HeaderType> HeaderType getHeader(Class<HeaderType> clazz) throws ClassCastException {
return (HeaderType) (reader).getHeader(clazz);
public Object getHeader() {
return reader.getHeader();
}
}

View File

@ -57,7 +57,6 @@ public class RODTrackBuilder implements RMDTrackBuilder {
Types.put("Table", TabularROD.class);
Types.put("HapMap", HapMapROD.class);
Types.put("Intervals", IntervalRod.class);
Types.put("Beagle", BeagleROD.class);
Types.put("Plink", PlinkRod.class);
}

View File

@ -29,9 +29,10 @@ import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Logger;
import org.broad.tribble.*;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.index.linear.LinearIndex;
import org.broad.tribble.index.linear.LinearIndexCreator;
import org.broad.tribble.readers.BasicFeatureReader;
import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.vcf.NameAwareCodec;
import org.broadinstitute.sting.gatk.refdata.tracks.TribbleTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
@ -96,12 +97,12 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
@Override
public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException {
// return a feature reader track
Pair<BasicFeatureReader, SAMSequenceDictionary> pair = createFeatureReader(targetClass, name, inputFile);
Pair<BasicFeatureSource, SAMSequenceDictionary> pair = createFeatureReader(targetClass, name, inputFile);
if (pair == null) throw new StingException("Unable to make the feature reader for input file " + inputFile);
return new TribbleTrack(targetClass, createCodec(targetClass, name).getFeatureType(), name, inputFile, pair.first, pair.second);
}
public Pair<BasicFeatureReader, SAMSequenceDictionary> createFeatureReader(Class targetClass, File inputFile) {
public Pair<BasicFeatureSource, SAMSequenceDictionary> createFeatureReader(Class targetClass, File inputFile) {
return createFeatureReader(targetClass, "anonymous", inputFile);
}
@ -111,10 +112,10 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
* @param inputFile the input file to create the track from (of the codec type)
* @return the FeatureReader instance
*/
public Pair<BasicFeatureReader, SAMSequenceDictionary> createFeatureReader(Class targetClass, String name, File inputFile) {
Pair<BasicFeatureReader, SAMSequenceDictionary> pair = null;
public Pair<BasicFeatureSource, SAMSequenceDictionary> createFeatureReader(Class targetClass, String name, File inputFile) {
Pair<BasicFeatureSource, SAMSequenceDictionary> pair = null;
if (inputFile.getAbsolutePath().endsWith(".gz"))
pair = createBasicFeatureReaderNoAssumedIndex(targetClass, name, inputFile);
pair = createBasicFeatureSourceNoAssumedIndex(targetClass, name, inputFile);
else
pair = getLinearFeatureReader(targetClass, name, inputFile);
return pair;
@ -129,11 +130,11 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
* @param inputFile the file to load
* @return a feature reader implementation
*/
private Pair<BasicFeatureReader, SAMSequenceDictionary> createBasicFeatureReaderNoAssumedIndex(Class targetClass, String name, File inputFile) {
private Pair<BasicFeatureSource, SAMSequenceDictionary> createBasicFeatureSourceNoAssumedIndex(Class targetClass, String name, File inputFile) {
// we might not know the index type, try loading with the default reader constructor
logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file");
try {
return new Pair<BasicFeatureReader, SAMSequenceDictionary>(new BasicFeatureReader(inputFile.getAbsolutePath(), createCodec(targetClass, name)),null);
return new Pair<BasicFeatureSource, SAMSequenceDictionary>(new BasicFeatureSource(inputFile.getAbsolutePath(), createCodec(targetClass, name)),null);
} catch (IOException e) {
throw new StingException("Unable to create feature reader from file " + inputFile);
}
@ -152,11 +153,11 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
* @param inputFile the tribble file to parse
* @return the input file as a FeatureReader
*/
private Pair<BasicFeatureReader, SAMSequenceDictionary> getLinearFeatureReader(Class targetClass, String name, File inputFile) {
Pair<BasicFeatureReader, SAMSequenceDictionary> reader;
private Pair<BasicFeatureSource, SAMSequenceDictionary> getLinearFeatureReader(Class targetClass, String name, File inputFile) {
Pair<BasicFeatureSource, SAMSequenceDictionary> reader;
try {
Index index = loadIndex(inputFile, createCodec(targetClass, name), true);
reader = new Pair<BasicFeatureReader, SAMSequenceDictionary>(new BasicFeatureReader(inputFile.getAbsolutePath(), index, createCodec(targetClass, name)),index.getSequenceDictionary());
reader = new Pair<BasicFeatureSource, SAMSequenceDictionary>(new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(targetClass, name)),index.getSequenceDictionary());
} catch (FileNotFoundException e) {
throw new StingException("Unable to create reader with file " + inputFile, e);
} catch (IOException e) {
@ -190,7 +191,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
// if the file exists, and we can read it, load the index from disk (i.e. wasn't deleted in the last step).
if (indexFile.exists() && indexFile.canRead() && obtainedLock) {
logger.info("Loading Tribble index from disk for file " + inputFile);
Index index = LinearIndex.createIndex(indexFile);
Index index = IndexFactory.loadIndex(indexFile.getAbsolutePath());
if (index.isCurrentVersion())
return index;
@ -220,7 +221,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
// this can take a while, let them know what we're doing
logger.info("Creating Tribble index in memory for file " + inputFile);
LinearIndex index = create.createIndex(null); // we don't want to write initially, so we pass in null
LinearIndex index = (LinearIndex)create.createIndex(); // we don't want to write initially, so we pass in null
// if the index doesn't exist, and we can write to the directory, and we got a lock: write to the disk
if (indexFile.getParentFile().canWrite() &&

View File

@ -26,7 +26,7 @@
package org.broadinstitute.sting.gatk.walkers.coverage;
import net.sf.samtools.SAMReadGroupRecord;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.FeatureSource;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -424,7 +424,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
private LocationAwareSeekableRODIterator initializeRefSeq() {
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
FeatureReader refseq = builder.createFeatureReader(RefSeqCodec.class,refSeqGeneList).first;
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,refSeqGeneList).first;
try {
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(),"refseq"));
} catch (IOException e) {

View File

@ -29,7 +29,7 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.FeatureSource;
import org.broad.tribble.dbsnp.DbSNPCodec;
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter;
@ -151,7 +151,7 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
if ( RefseqFileName != null ) {
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
FeatureReader refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
try {
refseqIterator = new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(),"refseq"));

View File

@ -2,10 +2,10 @@ package org.broadinstitute.sting.oneoffprojects.walkers;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.FeatureIterator;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.FeatureSource;
import org.broad.tribble.dbsnp.DbSNPCodec;
import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broad.tribble.util.CloseableTribbleIterator;
import org.broad.tribble.iterators.CloseableTribbleIterator;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -35,7 +35,7 @@ import java.io.IOException;
public class DbSNPWindowCounter extends LocusWalker<Integer, Long> {
// what we read in new tracks with
private FeatureReader reader;
private FeatureSource reader;
@Argument(fullName = "dbSNPFile", shortName = "db", doc="The dbsnp file to search upstream and downstream for nearby snps", required = true)
private File myDbSNPFile;

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.FeatureSource;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -63,7 +63,7 @@ public class IndelAnnotator extends RodWalker<Integer,Long>{
public void initialize() {
if ( RefseqFileName != null ) {
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
FeatureReader refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
try {
refseqIterator = new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(),"refseq"));

View File

@ -24,9 +24,9 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.vcf.*;
import org.broad.tribble.util.ParsingUtils;
import org.broad.tribble.util.AsciiLineReader;
import org.broad.tribble.FeatureCodec;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -101,9 +101,11 @@ public class VCF4ReaderTestWalker extends RodWalker<VCFRecord,Long> {
try {
AsciiLineReader lineReader = new AsciiLineReader(new FileInputStream(vcfFile));
int lineNumber = codec.readHeader(lineReader);
out.printf("Read %d header lines%n", lineNumber);
VCFHeader header = (VCFHeader)codec.readHeader(lineReader);
out.printf("Read %d header lines%n", header.getMetaData().size()+1);
// a counter of the number of lines we've read
int lineNumber = header.getMetaData().size()+1;
while (true) {
String line = lineReader.readLine();

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broad.tribble.util.AsciiLineReader;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -89,10 +89,8 @@ public class VCF4WriterTestWalker extends RodWalker<Integer, Integer> {
try {
AsciiLineReader lineReader = new AsciiLineReader(new FileInputStream(rod.getFile().getAbsolutePath()));
int lineNumber = vcf4codec.readHeader(lineReader);
out.printf("Read %d header lines%n", lineNumber);
header = vcf4codec.getHeader(VCFHeader.class);
header = (VCFHeader)vcf4codec.readHeader(lineReader);
out.printf("Read %d header lines%n", header.getMetaData().size());
}
catch (FileNotFoundException e ) {
throw new StingException(e.getMessage());

View File

@ -32,7 +32,7 @@ import net.sf.picard.util.IntervalList;
import net.sf.picard.util.OverlapDetector;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.StringUtil;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.FeatureSource;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -110,7 +110,7 @@ public class HybSelPerformanceWalker extends LocusWalker<Integer, HybSelPerforma
public void initialize() {
if ( REFSEQ_FILE != null ) {
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
FeatureReader refseq = builder.createFeatureReader(RefSeqCodec.class, new File(REFSEQ_FILE)).first;
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class, new File(REFSEQ_FILE)).first;
try {
refseqIterator = new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(), "refseq"));

View File

@ -108,22 +108,13 @@ public class SampleUtils {
continue;
RMDTrack rod = source.getReferenceOrderedData();
if ( containsVCFHeader(rod) )
data.put(rod.getName(), rod.getHeader(VCFHeader.class));
if ( rod.getHeader() != null && rod.getHeader() instanceof VCFHeader )
data.put(rod.getName(), (VCFHeader)rod.getHeader());
}
return data;
}
// todo -- remove when we can actually just get the header itself from tribble
private static boolean containsVCFHeader(RMDTrack rod) {
try {
return rod.getHeader(VCFHeader.class) != null;
} catch ( ClassCastException e ) {
return false;
}
}
public static Set<String> getSampleListWithVCFHeader(GenomeAnalysisEngine toolkit, Collection<String> rodNames) {
return getSampleList(SampleUtils.getVCFHeadersFromRods(toolkit, rodNames));
}

View File

@ -5,9 +5,9 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Iterator;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.FeatureSource;
import org.broad.tribble.index.Index;
import org.broad.tribble.readers.BasicFeatureReader;
import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.utils.StingException;
@ -22,7 +22,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
private Iterator<VCFRecord> iterator;
// our feature reader; so we can close it
private FeatureReader<VCFRecord> vcfReader = null;
private FeatureSource<VCFRecord> vcfReader = null;
/**
* Create a VCF reader, given a VCF file
@ -62,14 +62,14 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
Index index = createIndex(vcfFile, createIndexOnDisk);
if (transform != null) codec.setTransformer(transform);
try {
vcfReader = new BasicFeatureReader(vcfFile.getAbsolutePath(),index,codec);
vcfReader = new BasicFeatureSource(vcfFile.getAbsolutePath(),index,codec);
iterator= vcfReader.iterator();
} catch (FileNotFoundException e) {
throw new StingException("Unable to read VCF File from " + vcfFile, e);
} catch (IOException e) {
throw new StingException("Unable to read VCF File from " + vcfFile, e);
}
mHeader = codec.getHeader(VCFHeader.class);
mHeader = (VCFHeader)vcfReader.getHeader();
}
/**

View File

@ -62,8 +62,8 @@ public class VCFUtils {
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getRecordType().equals(VariantContext.class) ) {
fields.addAll(rod.getHeader(VCFHeader.class).getMetaData());
if ( rod.getRecordType().equals(VariantContext.class)) {
fields.addAll(((VCFHeader)rod.getHeader()).getMetaData());
}
}

View File

@ -7,7 +7,7 @@ import net.sf.samtools.util.CloseableIterator;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broad.tribble.gelitext.GeliTextCodec;
import org.broad.tribble.gelitext.GeliTextFeature;
import org.broad.tribble.util.AsciiLineReader;
import org.broad.tribble.readers.AsciiLineReader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.GenomeLocParser;

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.gatk.refdata.features.vcf4;
import org.broad.tribble.util.AsciiLineReader;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
@ -503,8 +503,7 @@ public class VCF4UnitTest extends BaseTest {
Assert.fail("Unable to parse out VCF file " + vcfFile);
}
codec = new VCF4Codec();
codec.readHeader(reader);
header = codec.getHeader(VCFHeader.class);
header = (VCFHeader)codec.readHeader(reader);
return this;
}
}

View File

@ -59,19 +59,20 @@ public class CombineVariantsUnitTest {
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
};
private VCF4Codec createHeader(String[] headerStr) {
private VCFHeader createHeader(String[] headerStr) {
VCF4Codec codec = new VCF4Codec();
List<String> headerFields = new ArrayList<String>();
for (String str : headerStr)
headerFields.add(str);
Assert.assertEquals(headerStr.length+1 /* for the # line */,codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
return codec;
VCFHeader head = (VCFHeader)codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO");
Assert.assertEquals(headerStr.length /* for the # line */,head.getMetaData().size());
return head;
}
@Test
public void testHeadersWhereOneIsAStrictSubsetOfTheOther() {
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings).getHeader(VCFHeader.class);
VCFHeader two = createHeader(VCF4headerStringsSmallSubset).getHeader(VCFHeader.class);
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings);
VCFHeader two = createHeader(VCF4headerStringsSmallSubset);
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
headers.add(one);
headers.add(two);
@ -81,8 +82,8 @@ public class CombineVariantsUnitTest {
@Test(expected=IllegalStateException.class)
public void testHeadersInfoDifferentValues() {
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings).getHeader(VCFHeader.class);
VCFHeader two = createHeader(VCF4headerStringsBrokenInfo).getHeader(VCFHeader.class);
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings);
VCFHeader two = createHeader(VCF4headerStringsBrokenInfo);
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
headers.add(one);
headers.add(two);
@ -92,8 +93,8 @@ public class CombineVariantsUnitTest {
@Test
public void testHeadersFormatDifferentValues() {
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings).getHeader(VCFHeader.class);
VCFHeader two = createHeader(VCF4headerStringsBrokenFormat).getHeader(VCFHeader.class);
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings);
VCFHeader two = createHeader(VCF4headerStringsBrokenFormat);
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
headers.add(one);
headers.add(two);

View File

@ -21,28 +21,29 @@ import java.util.List;
*/
public class VCFHeaderUnitTest extends BaseTest {
private VCF4Codec createHeader(String[] headerStr) {
private VCFHeader createHeader(String[] headerStr) {
VCF4Codec codec = new VCF4Codec();
List<String> headerFields = new ArrayList<String>();
for (String str : headerStr)
headerFields.add(str);
Assert.assertEquals(headerStr.length+1 /* for the # line */,codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
return codec;
VCFHeader header = (VCFHeader)codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO");
Assert.assertEquals(headerStr.length /* for the # line */,header.getMetaData().size());
return header;
}
@Test
public void testVCF4ToVCF4() {
VCF4Codec codec = createHeader(VCF4headerStrings);
checkMD5ofHeaderFile(codec, "4648aa1169257e0a8a9d30131adb5f35");
VCFHeader header = createHeader(VCF4headerStrings);
checkMD5ofHeaderFile(header, "4648aa1169257e0a8a9d30131adb5f35");
}
@Test
public void testVCF4ToVCF4_alternate() {
VCF4Codec codec = createHeader(VCF4headerStrings_with_negitiveOne);
checkMD5ofHeaderFile(codec, "ad8c4cf85e868b0261ab49ee2c613088");
VCFHeader header = createHeader(VCF4headerStrings_with_negitiveOne);
checkMD5ofHeaderFile(header, "ad8c4cf85e868b0261ab49ee2c613088");
}
private void checkMD5ofHeaderFile(VCF4Codec codec, String md5sum) {
private void checkMD5ofHeaderFile(VCFHeader header, String md5sum) {
File myTempFile = null;
PrintWriter pw = null;
try {
@ -52,7 +53,7 @@ public class VCFHeaderUnitTest extends BaseTest {
} catch (IOException e) {
Assert.fail("Unable to make a temp file!");
}
for (VCFHeaderLine line : codec.getHeader(VCFHeader.class).getMetaData())
for (VCFHeaderLine line : header.getMetaData())
pw.println(line);
pw.close();
Assert.assertTrue(md5sum.equals(md5SumFile(myTempFile)));

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.util.AsciiLineReader;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
@ -52,10 +52,10 @@ public class VCFWriterUnitTest extends BaseTest {
writer.close();
VCFCodec reader = new VCFCodec();
AsciiLineReader lineReader;
VCFHeader headerFromFile = null;
try {
lineReader = new AsciiLineReader(new FileInputStream(fakeVCFFile));
int lineNumber = reader.readHeader(lineReader);
headerFromFile = (VCFHeader)reader.readHeader(lineReader);
}
catch (FileNotFoundException e ) {
throw new StingException(e.getMessage());
@ -64,7 +64,8 @@ public class VCFWriterUnitTest extends BaseTest {
int counter = 0;
// validate what we're reading in
validateHeader(reader.getHeader(VCFHeader.class));
validateHeader(headerFromFile);
try {
while(true) {
String line = lineReader.readLine();