Rev Tribble to r97, adding binary feature support

From tribble logs:

Binary feature support in tribble

-- Massive refactoring and cleanup
-- Many bug fixes throughout
-- FeatureCodec is now general, with decode etc. taking a PositionBufferedStream
as an argument not a String
-- See ExampleBinaryCodec for an example binary codec
-- AbstractAsciiFeatureCodec provides to its subclass the same String decode,
readHeader functionality before.  Old ASCII codecs should inherit from this base
class, and will work without additional modifications
-- Split AsciiLineReader into a position tracking stream
(PositionalBufferedStream).  The new AsciiLineReader takes as an argument a
PositionalBufferedStream and provides the readLine() functionality of before.
Could potentially use optimizations (its a TODO in the code)
-- The Positional interface includes some more functionality that's now
necessary to support the more general decoding of binary features
-- FeatureReaders now work using the general FeatureCodec interface, so they can
index binary features
-- Bugfixes to LinearIndexCreator off by 1 error in setting the end block
position
-- Deleted VariantType, since this wasn't used anywhere and it's a particularly
clean why of thinking about the problem
-- Moved DiploidGenotype, which is specific to Gelitext, to the gelitext package
-- TabixReader requires an AsciiFeatureCodec as it's currently only implemented
to handle line oriented records
-- Renamed AsciiFeatureReader to TribbleIndexedFeatureReader now that it handles
Ascii and binary features
-- Removed unused functions here and there as encountered
-- Fixed build.xml to be truly headless
-- FeatureCodec readHeader returns a FeatureCodecHeader obtain that contains a
value and the position in the file where the header ends (not inclusive).
TribbleReaders now skip the header if the position is set, so its no longer
necessary, if one implements the general readHeader(PositionalBufferedStream)
version to see header lines in the decode functions.  Necessary for binary
codecs but a nice side benefit for ascii codecs as well
-- Cleaned up the IndexFactory interface so there's a truly general createIndex
function that takes the enumerated index type.  Added a writeIndex() function
that writes an index to disk.
-- Vastly expanded the index unit tests and reader tests to really test linear,
interval, and tabix indexed files.  Updated test.bed, and created a tabix
version of it as well.
-- Significant BinaryFeaturesTest suite.
-- Some test files have indent changes
This commit is contained in:
Mark DePristo 2012-05-03 07:02:28 -04:00
parent 58c470a6c5
commit 43d97c2e00
19 changed files with 131 additions and 221 deletions

View File

@ -25,8 +25,10 @@
package org.broadinstitute.sting.commandline; package org.broadinstitute.sting.commandline;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.AsciiLineReader;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
@ -77,27 +79,15 @@ public final class IntervalBinding<T extends Feature> {
if ( featureIntervals != null ) { if ( featureIntervals != null ) {
intervals = new ArrayList<GenomeLoc>(); intervals = new ArrayList<GenomeLoc>();
//RMDTrackBuilder builder = new RMDTrackBuilder(toolkit.getReferenceDataSource().getReference().getSequenceDictionary(),
// toolkit.getGenomeLocParser(),
// toolkit.getArguments().unsafe);
// TODO -- after ROD system cleanup, go through the ROD system so that we can handle things like gzipped files // TODO -- after ROD system cleanup, go through the ROD system so that we can handle things like gzipped files
final FeatureCodec codec = new FeatureManager().getByName(featureIntervals.getTribbleType()).getCodec(); final FeatureCodec codec = new FeatureManager().getByName(featureIntervals.getTribbleType()).getCodec();
if ( codec instanceof ReferenceDependentFeatureCodec ) if ( codec instanceof ReferenceDependentFeatureCodec )
((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(toolkit.getGenomeLocParser()); ((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(toolkit.getGenomeLocParser());
try { try {
final FileInputStream fis = new FileInputStream(new File(featureIntervals.getSource())); FeatureReader<Feature> reader = AbstractFeatureReader.getFeatureReader(featureIntervals.getSource(), codec, false);
final AsciiLineReader lineReader = new AsciiLineReader(fis); for ( Feature feature : reader.iterator() )
codec.readHeader(lineReader);
String line = lineReader.readLine();
while ( line != null ) {
final Feature feature = codec.decodeLoc(line);
if ( feature == null )
throw new UserException.MalformedFile(featureIntervals.getSource(), "Couldn't parse line '" + line + "'");
intervals.add(toolkit.getGenomeLocParser().createGenomeLoc(feature)); intervals.add(toolkit.getGenomeLocParser().createGenomeLoc(feature));
line = lineReader.readLine();
}
} catch (Exception e) { } catch (Exception e) {
throw new UserException.MalformedFile(featureIntervals.getSource(), "Problem reading the interval file", e); throw new UserException.MalformedFile(featureIntervals.getSource(), "Problem reading the interval file", e);
} }

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
* A HACK. Tribble should contain all the information in needs to decode the unqualified position of * A HACK. Tribble should contain all the information in needs to decode the unqualified position of
* a feature. * a feature.
*/ */
public interface ReferenceDependentFeatureCodec<T extends org.broad.tribble.Feature> extends FeatureCodec<T> { public interface ReferenceDependentFeatureCodec {
/** /**
* Sets the appropriate GenomeLocParser, providing additional context when decoding larger and more variable features. * Sets the appropriate GenomeLocParser, providing additional context when decoding larger and more variable features.
* @param genomeLocParser The parser to supply. * @param genomeLocParser The parser to supply.

View File

@ -25,15 +25,16 @@
package org.broadinstitute.sting.gatk.walkers.diffengine; package org.broadinstitute.sting.gatk.walkers.diffengine;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader; import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File; import java.io.*;
import java.io.FileInputStream; import java.util.Iterator;
import java.io.IOException;
import java.util.Map; import java.util.Map;
@ -56,16 +57,14 @@ public class VCFDiffableReader implements DiffableReader {
DiffNode root = DiffNode.rooted(file.getName()); DiffNode root = DiffNode.rooted(file.getName());
try { try {
// read the version line from the file // read the version line from the file
LineReader lineReader = new AsciiLineReader(new FileInputStream(file)); BufferedReader br = new BufferedReader(new FileReader(file));
final String version = lineReader.readLine(); final String version = br.readLine();
root.add("VERSION", version); root.add("VERSION", version);
lineReader.close(); br.close();
lineReader = new AsciiLineReader(new FileInputStream(file));
VCFCodec vcfCodec = new VCFCodec();
// must be read as state is stored in reader itself // must be read as state is stored in reader itself
VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader); FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
VCFHeader header = (VCFHeader)reader.getHeader();
for ( VCFHeaderLine headerLine : header.getMetaData() ) { for ( VCFHeaderLine headerLine : header.getMetaData() ) {
String key = headerLine.getKey(); String key = headerLine.getKey();
if ( headerLine instanceof VCFIDHeaderLine) if ( headerLine instanceof VCFIDHeaderLine)
@ -76,14 +75,14 @@ public class VCFDiffableReader implements DiffableReader {
root.add(key, headerLine.toString()); root.add(key, headerLine.toString());
} }
String line = lineReader.readLine();
int count = 0, nRecordsAtPos = 1; int count = 0, nRecordsAtPos = 1;
String prevName = ""; String prevName = "";
while ( line != null ) { Iterator<VariantContext> it = reader.iterator();
while ( it.hasNext() ) {
if ( count++ > maxElementsToRead && maxElementsToRead != -1) if ( count++ > maxElementsToRead && maxElementsToRead != -1)
break; break;
VariantContext vc = (VariantContext)vcfCodec.decode(line); VariantContext vc = it.next();
String name = vc.getChr() + ":" + vc.getStart(); String name = vc.getChr() + ":" + vc.getStart();
if ( name.equals(prevName) ) { if ( name.equals(prevName) ) {
name += "_" + ++nRecordsAtPos; name += "_" + ++nRecordsAtPos;
@ -121,10 +120,9 @@ public class VCFDiffableReader implements DiffableReader {
} }
root.add(vcRoot); root.add(vcRoot);
line = lineReader.readLine();
} }
lineReader.close(); reader.close();
} catch ( IOException e ) { } catch ( IOException e ) {
return null; return null;
} }

View File

@ -25,16 +25,14 @@ package org.broadinstitute.sting.utils.codecs.beagle;
*/ */
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader; import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
@ -63,7 +61,7 @@ import java.util.regex.Pattern;
* @author Mark DePristo * @author Mark DePristo
* @since 2010 * @since 2010
*/ */
public class BeagleCodec implements ReferenceDependentFeatureCodec<BeagleFeature> { public class BeagleCodec extends AsciiFeatureCodec<BeagleFeature> implements ReferenceDependentFeatureCodec {
private String[] header; private String[] header;
public enum BeagleReaderType {PROBLIKELIHOOD, GENOTYPES, R2}; public enum BeagleReaderType {PROBLIKELIHOOD, GENOTYPES, R2};
private BeagleReaderType readerType; private BeagleReaderType readerType;
@ -80,25 +78,16 @@ public class BeagleCodec implements ReferenceDependentFeatureCodec<BeagleFeature
*/ */
private GenomeLocParser genomeLocParser; private GenomeLocParser genomeLocParser;
public BeagleCodec() {
super(BeagleFeature.class);
}
/** /**
* Set the parser to use when resolving genetic data. * Set the parser to use when resolving genetic data.
* @param genomeLocParser The supplied parser. * @param genomeLocParser The supplied parser.
*/ */
public void setGenomeLocParser(GenomeLocParser genomeLocParser) { public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
this.genomeLocParser = genomeLocParser; this.genomeLocParser = genomeLocParser;
}
public Feature decodeLoc(String line) {
return decode(line);
}
public static String[] readHeader(final File source) throws IOException {
FileInputStream is = new FileInputStream(source);
try {
return readHeader(new AsciiLineReader(is), null);
} finally {
is.close();
}
} }
public Object readHeader(LineReader reader) public Object readHeader(LineReader reader)
@ -183,11 +172,6 @@ public class BeagleCodec implements ReferenceDependentFeatureCodec<BeagleFeature
private static Pattern MARKER_PATTERN = Pattern.compile("(.+):([0-9]+)"); private static Pattern MARKER_PATTERN = Pattern.compile("(.+):([0-9]+)");
@Override
public Class<BeagleFeature> getFeatureType() {
return BeagleFeature.class;
}
public BeagleFeature decode(String line) { public BeagleFeature decode(String line) {
String[] tokens; String[] tokens;

View File

@ -24,8 +24,7 @@
package org.broadinstitute.sting.utils.codecs.hapmap; package org.broadinstitute.sting.utils.codecs.hapmap;
import org.broad.tribble.AbstractFeatureCodec; import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature;
import org.broad.tribble.annotation.Strand; import org.broad.tribble.annotation.Strand;
import org.broad.tribble.readers.LineReader; import org.broad.tribble.readers.LineReader;
@ -71,18 +70,14 @@ import java.util.Arrays;
* @author Mark DePristo * @author Mark DePristo
* @since 2010 * @since 2010
*/ */
public class RawHapMapCodec extends AbstractFeatureCodec { public class RawHapMapCodec extends AsciiFeatureCodec<RawHapMapFeature> {
// the minimum number of features in the HapMap file line // the minimum number of features in the HapMap file line
private static final int minimumFeatureCount = 11; private static final int minimumFeatureCount = 11;
private String headerLine; private String headerLine;
/**
* decode the location only public RawHapMapCodec() {
* @param line the input line to decode super(RawHapMapFeature.class);
* @return a HapMapFeature
*/
public Feature decodeLoc(String line) {
return decode(line);
} }
/** /**
@ -90,7 +85,7 @@ public class RawHapMapCodec extends AbstractFeatureCodec {
* @param line the input line to decode * @param line the input line to decode
* @return a HapMapFeature, with the given fields * @return a HapMapFeature, with the given fields
*/ */
public Feature decode(String line) { public RawHapMapFeature decode(String line) {
String[] array = line.split("\\s+"); String[] array = line.split("\\s+");
// make sure the split was successful - that we got an appropriate number of fields // make sure the split was successful - that we got an appropriate number of fields
@ -113,10 +108,6 @@ public class RawHapMapCodec extends AbstractFeatureCodec {
headerLine); headerLine);
} }
public Class<RawHapMapFeature> getFeatureType() {
return RawHapMapFeature.class;
}
public Object readHeader(LineReader reader) { public Object readHeader(LineReader reader) {
try { try {
headerLine = reader.readLine(); headerLine = reader.readLine();

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.utils.codecs.refseq; package org.broadinstitute.sting.utils.codecs.refseq;
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import org.broad.tribble.TribbleException; import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
@ -46,13 +46,18 @@ import java.util.ArrayList;
* @author Mark DePristo * @author Mark DePristo
* @since 2010 * @since 2010
*/ */
public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature> { public class RefSeqCodec extends AsciiFeatureCodec<RefSeqFeature> implements ReferenceDependentFeatureCodec {
/** /**
* The parser to use when resolving genome-wide locations. * The parser to use when resolving genome-wide locations.
*/ */
private GenomeLocParser genomeLocParser; private GenomeLocParser genomeLocParser;
private boolean zero_coding_length_user_warned = false; private boolean zero_coding_length_user_warned = false;
public RefSeqCodec() {
super(RefSeqFeature.class);
}
/** /**
* Set the parser to use when resolving genetic data. * Set the parser to use when resolving genetic data.
* @param genomeLocParser The supplied parser. * @param genomeLocParser The supplied parser.
@ -130,17 +135,4 @@ public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature
feature.setExon_frames(exon_frames); feature.setExon_frames(exon_frames);
return feature; return feature;
} }
@Override
public Object readHeader(LineReader reader) {
return null;
}
@Override
public Class<RefSeqFeature> getFeatureType() {
return RefSeqFeature.class;
}
public boolean canDecode(final String potentialInput) { return false; }
} }

View File

@ -25,10 +25,9 @@
package org.broadinstitute.sting.utils.codecs.sampileup; package org.broadinstitute.sting.utils.codecs.sampileup;
import org.broad.tribble.AbstractFeatureCodec; import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.ParsingUtils; import org.broad.tribble.util.ParsingUtils;
import java.util.ArrayList; import java.util.ArrayList;
@ -76,7 +75,7 @@ import static org.broadinstitute.sting.utils.codecs.sampileup.SAMPileupFeature.V
* @author Matt Hanna * @author Matt Hanna
* @since 2009 * @since 2009
*/ */
public class SAMPileupCodec extends AbstractFeatureCodec<SAMPileupFeature> { public class SAMPileupCodec extends AsciiFeatureCodec<SAMPileupFeature> {
// the number of tokens we expect to parse from a pileup line // the number of tokens we expect to parse from a pileup line
private static final int expectedTokenCount = 10; private static final int expectedTokenCount = 10;
private static final char fldDelim = '\t'; private static final char fldDelim = '\t';
@ -88,24 +87,8 @@ public class SAMPileupCodec extends AbstractFeatureCodec<SAMPileupFeature> {
private static final String baseT = "T"; private static final String baseT = "T";
private static final String emptyStr = ""; // we will use this for "reference" allele in insertions private static final String emptyStr = ""; // we will use this for "reference" allele in insertions
/** public SAMPileupCodec() {
* Return the # of header lines for this file. super(SAMPileupFeature.class);
*
* @param reader the line reader
* @return 0 in this case, we assume no header lines.
*/
public Object readHeader(LineReader reader) {
// we don't require a header line, but it may exist. We'll deal with that above.
return null;
}
@Override
public Class<SAMPileupFeature> getFeatureType() {
return SAMPileupFeature.class;
}
public Feature decodeLoc(String line) {
return decode(line);
} }
public SAMPileupFeature decode(String line) { public SAMPileupFeature decode(String line) {
@ -285,5 +268,4 @@ public class SAMPileupCodec extends AbstractFeatureCodec<SAMPileupFeature> {
feature.setPileupBases(baseBuilder.toString()); feature.setPileupBases(baseBuilder.toString());
feature.setPileupQuals(qualBuilder.toString()); feature.setPileupQuals(qualBuilder.toString());
} }
} }

View File

@ -27,10 +27,9 @@ package org.broadinstitute.sting.utils.codecs.samread;
import net.sf.samtools.Cigar; import net.sf.samtools.Cigar;
import net.sf.samtools.TextCigarCodec; import net.sf.samtools.TextCigarCodec;
import net.sf.samtools.util.StringUtil; import net.sf.samtools.util.StringUtil;
import org.broad.tribble.AbstractFeatureCodec; import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.ParsingUtils; import org.broad.tribble.util.ParsingUtils;
/** /**
@ -52,31 +51,14 @@ import org.broad.tribble.util.ParsingUtils;
* @author Matt Hanna * @author Matt Hanna
* @since 2009 * @since 2009
*/ */
public class SAMReadCodec extends AbstractFeatureCodec<SAMReadFeature> { public class SAMReadCodec extends AsciiFeatureCodec<SAMReadFeature> {
/* SL-XBC:1:10:628:923#0 16 Escherichia_coli_K12 1 37 76M = 1 0 AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGA B@>87<;A@?@957:>>@AA@B>@A9AB@B>@A@@@@@A;=AAB@BBBBBCBBBB@>A>:ABB@BAABCB=CA@CB */ /* SL-XBC:1:10:628:923#0 16 Escherichia_coli_K12 1 37 76M = 1 0 AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGA B@>87<;A@?@957:>>@AA@B>@A9AB@B>@A@@@@@A;=AAB@BBBBBCBBBB@>A>:ABB@BAABCB=CA@CB */
// the number of tokens we expect to parse from a read line // the number of tokens we expect to parse from a read line
private static final int expectedTokenCount = 11; private static final int expectedTokenCount = 11;
/** public SAMReadCodec() {
* Return the # of header lines for this file. super(SAMReadFeature.class);
*
* @param reader the line reader
* @return 0 in this case, we assume no header lines. The reads file may have a
* header line beginning with '@', but we can ignore that in the decode function.
*/
public Object readHeader(LineReader reader) {
// we don't require a header line, but it may exist. We'll deal with that above.
return null;
}
@Override
public Class<SAMReadFeature> getFeatureType() {
return SAMReadFeature.class;
}
public Feature decodeLoc(String line) {
return decode(line);
} }
/** /**
@ -131,6 +113,4 @@ public class SAMReadCodec extends AbstractFeatureCodec<SAMReadFeature> {
bases, bases,
qualities); qualities);
} }
} }

View File

@ -23,7 +23,7 @@ import java.util.Arrays;
public class BedTableCodec extends TableCodec implements ReferenceDependentFeatureCodec { public class BedTableCodec extends TableCodec implements ReferenceDependentFeatureCodec {
@Override @Override
public Feature decode(String line) { public TableFeature decode(String line) {
if (line.startsWith(headerDelimiter) || line.startsWith(commentDelimiter) || line.startsWith(igvHeaderDelimiter)) if (line.startsWith(headerDelimiter) || line.startsWith(commentDelimiter) || line.startsWith(igvHeaderDelimiter))
return null; return null;
String[] split = line.split(delimiterRegex); String[] split = line.split(delimiterRegex);

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.utils.codecs.table; package org.broadinstitute.sting.utils.codecs.table;
import org.broad.tribble.Feature; import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.readers.LineReader; import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
@ -39,7 +39,7 @@ import java.util.Arrays;
* @author Mark DePristo * @author Mark DePristo
* @since 2009 * @since 2009
*/ */
public class TableCodec implements ReferenceDependentFeatureCodec { public class TableCodec extends AsciiFeatureCodec<TableFeature> implements ReferenceDependentFeatureCodec {
final static protected String delimiterRegex = "\\s+"; final static protected String delimiterRegex = "\\s+";
final static protected String headerDelimiter = "HEADER"; final static protected String headerDelimiter = "HEADER";
final static protected String igvHeaderDelimiter = "track"; final static protected String igvHeaderDelimiter = "track";
@ -52,6 +52,10 @@ public class TableCodec implements ReferenceDependentFeatureCodec {
*/ */
protected GenomeLocParser genomeLocParser; protected GenomeLocParser genomeLocParser;
public TableCodec() {
super(TableFeature.class);
}
/** /**
* Set the parser to use when resolving genetic data. * Set the parser to use when resolving genetic data.
* @param genomeLocParser The supplied parser. * @param genomeLocParser The supplied parser.
@ -61,14 +65,8 @@ public class TableCodec implements ReferenceDependentFeatureCodec {
this.genomeLocParser = genomeLocParser; this.genomeLocParser = genomeLocParser;
} }
@Override @Override
public Feature decodeLoc(String line) { public TableFeature decode(String line) {
return decode(line);
}
@Override
public Feature decode(String line) {
if (line.startsWith(headerDelimiter) || line.startsWith(commentDelimiter) || line.startsWith(igvHeaderDelimiter)) if (line.startsWith(headerDelimiter) || line.startsWith(commentDelimiter) || line.startsWith(igvHeaderDelimiter))
return null; return null;
String[] split = line.split(delimiterRegex); String[] split = line.split(delimiterRegex);
@ -77,11 +75,6 @@ public class TableCodec implements ReferenceDependentFeatureCodec {
return new TableFeature(genomeLocParser.parseGenomeLoc(split[0]),Arrays.asList(split),header); return new TableFeature(genomeLocParser.parseGenomeLoc(split[0]),Arrays.asList(split),header);
} }
@Override
public Class<TableFeature> getFeatureType() {
return TableFeature.class;
}
@Override @Override
public Object readHeader(LineReader reader) { public Object readHeader(LineReader reader) {
String line = ""; String line = "";
@ -106,7 +99,4 @@ public class TableCodec implements ReferenceDependentFeatureCodec {
} }
return header; return header;
} }
public boolean canDecode(final String potentialInput) { return false; }
} }

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.utils.codecs.vcf; package org.broadinstitute.sting.utils.codecs.vcf;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.NameAwareCodec; import org.broad.tribble.NameAwareCodec;
import org.broad.tribble.TribbleException; import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.LineReader; import org.broad.tribble.readers.LineReader;
@ -10,14 +10,20 @@ import org.broad.tribble.util.BlockCompressedInputStream;
import org.broad.tribble.util.ParsingUtils; import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.LazyGenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import java.io.*; import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.*; import java.util.*;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext> implements NameAwareCodec {
public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20); public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20);
protected final static Logger log = Logger.getLogger(VCFCodec.class); protected final static Logger log = Logger.getLogger(VCFCodec.class);
@ -61,6 +67,10 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
protected Map<String, String> stringCache = new HashMap<String, String>(); protected Map<String, String> stringCache = new HashMap<String, String>();
protected AbstractVCFCodec() {
super(VariantContext.class);
}
/** /**
* Creates a LazyParser for a LazyGenotypesContext to use to decode * Creates a LazyParser for a LazyGenotypesContext to use to decode
* our genotypes only when necessary. We do this instead of eagarly * our genotypes only when necessary. We do this instead of eagarly
@ -266,7 +276,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
* @param line the line * @param line the line
* @return a VariantContext * @return a VariantContext
*/ */
public Feature decode(String line) { public VariantContext decode(String line) {
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line // the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null; if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
@ -378,14 +388,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
return vc; return vc;
} }
/**
*
* @return the type of record
*/
public Class<VariantContext> getFeatureType() {
return VariantContext.class;
}
/** /**
* get the name of this codec * get the name of this codec
* @return our set name * @return our set name

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.variantutils; package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
@ -63,7 +64,7 @@ public class CombineVariantsUnitTest {
private VCFHeader createHeader(String headerStr) { private VCFHeader createHeader(String headerStr) {
VCFCodec codec = new VCFCodec(); VCFCodec codec = new VCFCodec();
VCFHeader head = (VCFHeader)codec.readHeader(new AsciiLineReader(new StringBufferInputStream(headerStr))); VCFHeader head = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(headerStr))));
return head; return head;
} }

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.codecs.hapmap;
import org.broad.tribble.annotation.Strand; import org.broad.tribble.annotation.Strand;
import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.Test; import org.testng.annotations.Test;
@ -152,7 +153,7 @@ public class HapMapUnitTest {
public AsciiLineReader getReader() { public AsciiLineReader getReader() {
try { try {
return new AsciiLineReader(new FileInputStream(hapMapFile)); return new AsciiLineReader(new PositionalBufferedStream(new FileInputStream(hapMapFile)));
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
Assert.fail("Unable to open hapmap file : " + hapMapFile); Assert.fail("Unable to open hapmap file : " + hapMapFile);
} }

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.genotype.vcf; package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.testng.Assert; import org.testng.Assert;
@ -24,7 +25,7 @@ public class VCFHeaderUnitTest extends BaseTest {
private VCFHeader createHeader(String headerStr) { private VCFHeader createHeader(String headerStr) {
VCFCodec codec = new VCFCodec(); VCFCodec codec = new VCFCodec();
VCFHeader header = (VCFHeader)codec.readHeader(new AsciiLineReader(new StringBufferInputStream(headerStr))); VCFHeader header = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(headerStr))));
Assert.assertEquals(header.getMetaData().size(), VCF4headerStringCount); Assert.assertEquals(header.getMetaData().size(), VCF4headerStringCount);
return header; return header;
} }

View File

@ -1,7 +1,10 @@
package org.broadinstitute.sting.utils.genotype.vcf; package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.Tribble; import org.broad.tribble.Tribble;
import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
@ -59,16 +62,10 @@ public class VCFWriterUnitTest extends BaseTest {
writer.add(createVC(header)); writer.add(createVC(header));
writer.add(createVC(header)); writer.add(createVC(header));
writer.close(); writer.close();
VCFCodec reader = new VCFCodec(); VCFCodec codec = new VCFCodec();
AsciiLineReader lineReader;
VCFHeader headerFromFile = null; VCFHeader headerFromFile = null;
try { FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(fakeVCFFile.getAbsolutePath(), codec, false);
lineReader = new AsciiLineReader(new FileInputStream(fakeVCFFile)); headerFromFile = (VCFHeader)reader.getHeader();
headerFromFile = (VCFHeader)reader.readHeader(lineReader);
}
catch (FileNotFoundException e ) {
throw new ReviewedStingException(e.getMessage());
}
int counter = 0; int counter = 0;
@ -76,12 +73,9 @@ public class VCFWriterUnitTest extends BaseTest {
validateHeader(headerFromFile); validateHeader(headerFromFile);
try { try {
while(true) { Iterator<VariantContext> it = reader.iterator();
String line = lineReader.readLine(); while(it.hasNext()) {
if (line == null) VariantContext vc = it.next();
break;
VariantContext vc = (VariantContext)reader.decode(line);
counter++; counter++;
} }
Assert.assertEquals(counter, 2); Assert.assertEquals(counter, 2);

View File

@ -78,30 +78,31 @@ public class VariantContextBenchmark extends SimpleBenchmark {
private GenomeLocParser b37GenomeLocParser; private GenomeLocParser b37GenomeLocParser;
@Override protected void setUp() { @Override protected void setUp() {
try { // TODO -- update for new tribble interface
ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.b37KGReference)); // try {
b37GenomeLocParser = new GenomeLocParser(seq); // ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.b37KGReference));
} catch ( FileNotFoundException e) { // b37GenomeLocParser = new GenomeLocParser(seq);
throw new RuntimeException(e); // } catch ( FileNotFoundException e) {
} // throw new RuntimeException(e);
// }
// read it into a String so that we don't try to benchmark IO issues //
try { // // read it into a String so that we don't try to benchmark IO issues
FileInputStream s = new FileInputStream(new File(vcfFile)); // try {
AsciiLineReader lineReader = new AsciiLineReader(s); // FileInputStream s = new FileInputStream(new File(vcfFile));
int counter = 0; // AsciiLineReader lineReader = new AsciiLineReader(s);
StringBuffer sb = new StringBuffer(); // int counter = 0;
while (counter++ < linesToRead ) { // StringBuffer sb = new StringBuffer();
String line = lineReader.readLine(); // while (counter++ < linesToRead ) {
if ( line == null ) // String line = lineReader.readLine();
break; // if ( line == null )
sb.append(line + "\n"); // break;
} // sb.append(line + "\n");
s.close(); // }
INPUT_STRING = sb.toString(); // s.close();
} catch (IOException e) { // INPUT_STRING = sb.toString();
throw new RuntimeException(e); // } catch (IOException e) {
} // throw new RuntimeException(e);
// }
} }
private interface FunctionToBenchmark<T extends Feature> { private interface FunctionToBenchmark<T extends Feature> {
@ -109,23 +110,24 @@ public class VariantContextBenchmark extends SimpleBenchmark {
} }
private <T extends Feature> void runBenchmark(FeatureCodec<T> codec, FunctionToBenchmark<T> func) { private <T extends Feature> void runBenchmark(FeatureCodec<T> codec, FunctionToBenchmark<T> func) {
try { // TODO -- update for new Tribble interface
InputStream is = new ByteArrayInputStream(INPUT_STRING.getBytes()); // try {
AsciiLineReader lineReader = new AsciiLineReader(is); // InputStream is = new ByteArrayInputStream(INPUT_STRING.getBytes());
codec.readHeader(lineReader); // AsciiLineReader lineReader = new AsciiLineReader(is);
// codec.readHeader(lineReader);
int counter = 0; //
while (counter++ < linesToRead ) { // int counter = 0;
String line = lineReader.readLine(); // while (counter++ < linesToRead ) {
if ( line == null ) // String line = lineReader.readLine();
break; // if ( line == null )
// break;
T vc = codec.decode(line); //
func.run(vc); // T vc = codec.decode(line);
} // func.run(vc);
} catch (Exception e) { // }
System.out.println("Benchmarking run failure because of " + e.getMessage()); // } catch (Exception e) {
} // System.out.println("Benchmarking run failure because of " + e.getMessage());
// }
} }
public void timeV14(int rep) { public void timeV14(int rep) {

View File

@ -6,12 +6,14 @@ import scala.io.Source._
import net.sf.samtools.SAMFileReader import net.sf.samtools.SAMFileReader
import org.broadinstitute.sting.utils.codecs.vcf.{VCFHeader, VCFCodec} import org.broadinstitute.sting.utils.codecs.vcf.{VCFHeader, VCFCodec}
import scala.collection.JavaConversions._ import scala.collection.JavaConversions._
import org.broad.tribble.AbstractFeatureReader import org.broad.tribble.{FeatureCodec, AbstractFeatureReader}
object VCF_BAM_utilities { object VCF_BAM_utilities {
def getSamplesFromVCF(vcfFile: File): List[String] = { def getSamplesFromVCF(vcfFile: File): List[String] = {
return AbstractFeatureReader.getFeatureReader(vcfFile.getPath(), new VCFCodec()).getHeader().asInstanceOf[VCFHeader].getGenotypeSamples().toList List();
// TODO -- ask khalid for help here with type error
//return AbstractFeatureReader.getFeatureReader(vcfFile.getPath(), new VCFCodec()).getHeader().asInstanceOf[VCFHeader].getGenotypeSamples().toList
} }
def getSamplesInBAM(bam: File): List[String] = { def getSamplesInBAM(bam: File): List[String] = {

View File

@ -1,3 +1,3 @@
<ivy-module version="1.0"> <ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="94" status="integration" /> <info organisation="org.broad" module="tribble" revision="98" status="integration" />
</ivy-module> </ivy-module>