Rev Tribble to r97, adding binary feature support

From tribble logs:

Binary feature support in tribble

-- Massive refactoring and cleanup
-- Many bug fixes throughout
-- FeatureCodec is now general, with decode etc. taking a PositionBufferedStream
as an argument not a String
-- See ExampleBinaryCodec for an example binary codec
-- AbstractAsciiFeatureCodec provides to its subclass the same String decode,
readHeader functionality before.  Old ASCII codecs should inherit from this base
class, and will work without additional modifications
-- Split AsciiLineReader into a position tracking stream
(PositionalBufferedStream).  The new AsciiLineReader takes as an argument a
PositionalBufferedStream and provides the readLine() functionality of before.
Could potentially use optimizations (its a TODO in the code)
-- The Positional interface includes some more functionality that's now
necessary to support the more general decoding of binary features
-- FeatureReaders now work using the general FeatureCodec interface, so they can
index binary features
-- Bugfixes to LinearIndexCreator off by 1 error in setting the end block
position
-- Deleted VariantType, since this wasn't used anywhere and it's a particularly
clean why of thinking about the problem
-- Moved DiploidGenotype, which is specific to Gelitext, to the gelitext package
-- TabixReader requires an AsciiFeatureCodec as it's currently only implemented
to handle line oriented records
-- Renamed AsciiFeatureReader to TribbleIndexedFeatureReader now that it handles
Ascii and binary features
-- Removed unused functions here and there as encountered
-- Fixed build.xml to be truly headless
-- FeatureCodec readHeader returns a FeatureCodecHeader obtain that contains a
value and the position in the file where the header ends (not inclusive).
TribbleReaders now skip the header if the position is set, so its no longer
necessary, if one implements the general readHeader(PositionalBufferedStream)
version to see header lines in the decode functions.  Necessary for binary
codecs but a nice side benefit for ascii codecs as well
-- Cleaned up the IndexFactory interface so there's a truly general createIndex
function that takes the enumerated index type.  Added a writeIndex() function
that writes an index to disk.
-- Vastly expanded the index unit tests and reader tests to really test linear,
interval, and tabix indexed files.  Updated test.bed, and created a tabix
version of it as well.
-- Significant BinaryFeaturesTest suite.
-- Some test files have indent changes
This commit is contained in:
Mark DePristo 2012-05-03 07:02:28 -04:00
parent 58c470a6c5
commit 43d97c2e00
19 changed files with 131 additions and 221 deletions

View File

@ -25,8 +25,10 @@
package org.broadinstitute.sting.commandline;
import com.google.java.contract.Requires;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.readers.AsciiLineReader;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
@ -77,27 +79,15 @@ public final class IntervalBinding<T extends Feature> {
if ( featureIntervals != null ) {
intervals = new ArrayList<GenomeLoc>();
//RMDTrackBuilder builder = new RMDTrackBuilder(toolkit.getReferenceDataSource().getReference().getSequenceDictionary(),
// toolkit.getGenomeLocParser(),
// toolkit.getArguments().unsafe);
// TODO -- after ROD system cleanup, go through the ROD system so that we can handle things like gzipped files
final FeatureCodec codec = new FeatureManager().getByName(featureIntervals.getTribbleType()).getCodec();
if ( codec instanceof ReferenceDependentFeatureCodec )
((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(toolkit.getGenomeLocParser());
try {
final FileInputStream fis = new FileInputStream(new File(featureIntervals.getSource()));
final AsciiLineReader lineReader = new AsciiLineReader(fis);
codec.readHeader(lineReader);
String line = lineReader.readLine();
while ( line != null ) {
final Feature feature = codec.decodeLoc(line);
if ( feature == null )
throw new UserException.MalformedFile(featureIntervals.getSource(), "Couldn't parse line '" + line + "'");
FeatureReader<Feature> reader = AbstractFeatureReader.getFeatureReader(featureIntervals.getSource(), codec, false);
for ( Feature feature : reader.iterator() )
intervals.add(toolkit.getGenomeLocParser().createGenomeLoc(feature));
line = lineReader.readLine();
}
} catch (Exception e) {
throw new UserException.MalformedFile(featureIntervals.getSource(), "Problem reading the interval file", e);
}

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
* A HACK. Tribble should contain all the information in needs to decode the unqualified position of
* a feature.
*/
public interface ReferenceDependentFeatureCodec<T extends org.broad.tribble.Feature> extends FeatureCodec<T> {
public interface ReferenceDependentFeatureCodec {
/**
* Sets the appropriate GenomeLocParser, providing additional context when decoding larger and more variable features.
* @param genomeLocParser The parser to supply.

View File

@ -25,15 +25,16 @@
package org.broadinstitute.sting.gatk.walkers.diffengine;
import org.apache.log4j.Logger;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.*;
import java.util.Iterator;
import java.util.Map;
@ -56,16 +57,14 @@ public class VCFDiffableReader implements DiffableReader {
DiffNode root = DiffNode.rooted(file.getName());
try {
// read the version line from the file
LineReader lineReader = new AsciiLineReader(new FileInputStream(file));
final String version = lineReader.readLine();
BufferedReader br = new BufferedReader(new FileReader(file));
final String version = br.readLine();
root.add("VERSION", version);
lineReader.close();
lineReader = new AsciiLineReader(new FileInputStream(file));
VCFCodec vcfCodec = new VCFCodec();
br.close();
// must be read as state is stored in reader itself
VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader);
FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
VCFHeader header = (VCFHeader)reader.getHeader();
for ( VCFHeaderLine headerLine : header.getMetaData() ) {
String key = headerLine.getKey();
if ( headerLine instanceof VCFIDHeaderLine)
@ -76,14 +75,14 @@ public class VCFDiffableReader implements DiffableReader {
root.add(key, headerLine.toString());
}
String line = lineReader.readLine();
int count = 0, nRecordsAtPos = 1;
String prevName = "";
while ( line != null ) {
Iterator<VariantContext> it = reader.iterator();
while ( it.hasNext() ) {
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
break;
VariantContext vc = (VariantContext)vcfCodec.decode(line);
VariantContext vc = it.next();
String name = vc.getChr() + ":" + vc.getStart();
if ( name.equals(prevName) ) {
name += "_" + ++nRecordsAtPos;
@ -121,10 +120,9 @@ public class VCFDiffableReader implements DiffableReader {
}
root.add(vcRoot);
line = lineReader.readLine();
}
lineReader.close();
reader.close();
} catch ( IOException e ) {
return null;
}

View File

@ -25,16 +25,14 @@ package org.broadinstitute.sting.utils.codecs.beagle;
*/
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
@ -63,7 +61,7 @@ import java.util.regex.Pattern;
* @author Mark DePristo
* @since 2010
*/
public class BeagleCodec implements ReferenceDependentFeatureCodec<BeagleFeature> {
public class BeagleCodec extends AsciiFeatureCodec<BeagleFeature> implements ReferenceDependentFeatureCodec {
private String[] header;
public enum BeagleReaderType {PROBLIKELIHOOD, GENOTYPES, R2};
private BeagleReaderType readerType;
@ -80,25 +78,16 @@ public class BeagleCodec implements ReferenceDependentFeatureCodec<BeagleFeature
*/
private GenomeLocParser genomeLocParser;
public BeagleCodec() {
super(BeagleFeature.class);
}
/**
* Set the parser to use when resolving genetic data.
* @param genomeLocParser The supplied parser.
*/
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
this.genomeLocParser = genomeLocParser;
}
public Feature decodeLoc(String line) {
return decode(line);
}
public static String[] readHeader(final File source) throws IOException {
FileInputStream is = new FileInputStream(source);
try {
return readHeader(new AsciiLineReader(is), null);
} finally {
is.close();
}
}
public Object readHeader(LineReader reader)
@ -183,11 +172,6 @@ public class BeagleCodec implements ReferenceDependentFeatureCodec<BeagleFeature
private static Pattern MARKER_PATTERN = Pattern.compile("(.+):([0-9]+)");
@Override
public Class<BeagleFeature> getFeatureType() {
return BeagleFeature.class;
}
public BeagleFeature decode(String line) {
String[] tokens;

View File

@ -24,8 +24,7 @@
package org.broadinstitute.sting.utils.codecs.hapmap;
import org.broad.tribble.AbstractFeatureCodec;
import org.broad.tribble.Feature;
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.annotation.Strand;
import org.broad.tribble.readers.LineReader;
@ -71,18 +70,14 @@ import java.util.Arrays;
* @author Mark DePristo
* @since 2010
*/
public class RawHapMapCodec extends AbstractFeatureCodec {
public class RawHapMapCodec extends AsciiFeatureCodec<RawHapMapFeature> {
// the minimum number of features in the HapMap file line
private static final int minimumFeatureCount = 11;
private String headerLine;
/**
* decode the location only
* @param line the input line to decode
* @return a HapMapFeature
*/
public Feature decodeLoc(String line) {
return decode(line);
public RawHapMapCodec() {
super(RawHapMapFeature.class);
}
/**
@ -90,7 +85,7 @@ public class RawHapMapCodec extends AbstractFeatureCodec {
* @param line the input line to decode
* @return a HapMapFeature, with the given fields
*/
public Feature decode(String line) {
public RawHapMapFeature decode(String line) {
String[] array = line.split("\\s+");
// make sure the split was successful - that we got an appropriate number of fields
@ -113,10 +108,6 @@ public class RawHapMapCodec extends AbstractFeatureCodec {
headerLine);
}
public Class<RawHapMapFeature> getFeatureType() {
return RawHapMapFeature.class;
}
public Object readHeader(LineReader reader) {
try {
headerLine = reader.readLine();

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.utils.codecs.refseq;
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -46,13 +46,18 @@ import java.util.ArrayList;
* @author Mark DePristo
* @since 2010
*/
public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature> {
public class RefSeqCodec extends AsciiFeatureCodec<RefSeqFeature> implements ReferenceDependentFeatureCodec {
/**
* The parser to use when resolving genome-wide locations.
*/
private GenomeLocParser genomeLocParser;
private boolean zero_coding_length_user_warned = false;
public RefSeqCodec() {
super(RefSeqFeature.class);
}
/**
* Set the parser to use when resolving genetic data.
* @param genomeLocParser The supplied parser.
@ -130,17 +135,4 @@ public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature
feature.setExon_frames(exon_frames);
return feature;
}
@Override
public Object readHeader(LineReader reader) {
return null;
}
@Override
public Class<RefSeqFeature> getFeatureType() {
return RefSeqFeature.class;
}
public boolean canDecode(final String potentialInput) { return false; }
}

View File

@ -25,10 +25,9 @@
package org.broadinstitute.sting.utils.codecs.sampileup;
import org.broad.tribble.AbstractFeatureCodec;
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.ParsingUtils;
import java.util.ArrayList;
@ -76,7 +75,7 @@ import static org.broadinstitute.sting.utils.codecs.sampileup.SAMPileupFeature.V
* @author Matt Hanna
* @since 2009
*/
public class SAMPileupCodec extends AbstractFeatureCodec<SAMPileupFeature> {
public class SAMPileupCodec extends AsciiFeatureCodec<SAMPileupFeature> {
// the number of tokens we expect to parse from a pileup line
private static final int expectedTokenCount = 10;
private static final char fldDelim = '\t';
@ -88,24 +87,8 @@ public class SAMPileupCodec extends AbstractFeatureCodec<SAMPileupFeature> {
private static final String baseT = "T";
private static final String emptyStr = ""; // we will use this for "reference" allele in insertions
/**
* Return the # of header lines for this file.
*
* @param reader the line reader
* @return 0 in this case, we assume no header lines.
*/
public Object readHeader(LineReader reader) {
// we don't require a header line, but it may exist. We'll deal with that above.
return null;
}
@Override
public Class<SAMPileupFeature> getFeatureType() {
return SAMPileupFeature.class;
}
public Feature decodeLoc(String line) {
return decode(line);
public SAMPileupCodec() {
super(SAMPileupFeature.class);
}
public SAMPileupFeature decode(String line) {
@ -285,5 +268,4 @@ public class SAMPileupCodec extends AbstractFeatureCodec<SAMPileupFeature> {
feature.setPileupBases(baseBuilder.toString());
feature.setPileupQuals(qualBuilder.toString());
}
}

View File

@ -27,10 +27,9 @@ package org.broadinstitute.sting.utils.codecs.samread;
import net.sf.samtools.Cigar;
import net.sf.samtools.TextCigarCodec;
import net.sf.samtools.util.StringUtil;
import org.broad.tribble.AbstractFeatureCodec;
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.ParsingUtils;
/**
@ -52,31 +51,14 @@ import org.broad.tribble.util.ParsingUtils;
* @author Matt Hanna
* @since 2009
*/
public class SAMReadCodec extends AbstractFeatureCodec<SAMReadFeature> {
public class SAMReadCodec extends AsciiFeatureCodec<SAMReadFeature> {
/* SL-XBC:1:10:628:923#0 16 Escherichia_coli_K12 1 37 76M = 1 0 AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGA B@>87<;A@?@957:>>@AA@B>@A9AB@B>@A@@@@@A;=AAB@BBBBBCBBBB@>A>:ABB@BAABCB=CA@CB */
// the number of tokens we expect to parse from a read line
private static final int expectedTokenCount = 11;
/**
* Return the # of header lines for this file.
*
* @param reader the line reader
* @return 0 in this case, we assume no header lines. The reads file may have a
* header line beginning with '@', but we can ignore that in the decode function.
*/
public Object readHeader(LineReader reader) {
// we don't require a header line, but it may exist. We'll deal with that above.
return null;
}
@Override
public Class<SAMReadFeature> getFeatureType() {
return SAMReadFeature.class;
}
public Feature decodeLoc(String line) {
return decode(line);
public SAMReadCodec() {
super(SAMReadFeature.class);
}
/**
@ -131,6 +113,4 @@ public class SAMReadCodec extends AbstractFeatureCodec<SAMReadFeature> {
bases,
qualities);
}
}

View File

@ -23,7 +23,7 @@ import java.util.Arrays;
public class BedTableCodec extends TableCodec implements ReferenceDependentFeatureCodec {
@Override
public Feature decode(String line) {
public TableFeature decode(String line) {
if (line.startsWith(headerDelimiter) || line.startsWith(commentDelimiter) || line.startsWith(igvHeaderDelimiter))
return null;
String[] split = line.split(delimiterRegex);

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.utils.codecs.table;
import org.broad.tribble.Feature;
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -39,7 +39,7 @@ import java.util.Arrays;
* @author Mark DePristo
* @since 2009
*/
public class TableCodec implements ReferenceDependentFeatureCodec {
public class TableCodec extends AsciiFeatureCodec<TableFeature> implements ReferenceDependentFeatureCodec {
final static protected String delimiterRegex = "\\s+";
final static protected String headerDelimiter = "HEADER";
final static protected String igvHeaderDelimiter = "track";
@ -52,6 +52,10 @@ public class TableCodec implements ReferenceDependentFeatureCodec {
*/
protected GenomeLocParser genomeLocParser;
public TableCodec() {
super(TableFeature.class);
}
/**
* Set the parser to use when resolving genetic data.
* @param genomeLocParser The supplied parser.
@ -61,14 +65,8 @@ public class TableCodec implements ReferenceDependentFeatureCodec {
this.genomeLocParser = genomeLocParser;
}
@Override
public Feature decodeLoc(String line) {
return decode(line);
}
@Override
public Feature decode(String line) {
public TableFeature decode(String line) {
if (line.startsWith(headerDelimiter) || line.startsWith(commentDelimiter) || line.startsWith(igvHeaderDelimiter))
return null;
String[] split = line.split(delimiterRegex);
@ -77,11 +75,6 @@ public class TableCodec implements ReferenceDependentFeatureCodec {
return new TableFeature(genomeLocParser.parseGenomeLoc(split[0]),Arrays.asList(split),header);
}
@Override
public Class<TableFeature> getFeatureType() {
return TableFeature.class;
}
@Override
public Object readHeader(LineReader reader) {
String line = "";
@ -106,7 +99,4 @@ public class TableCodec implements ReferenceDependentFeatureCodec {
}
return header;
}
public boolean canDecode(final String potentialInput) { return false; }
}

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.utils.codecs.vcf;
import org.apache.log4j.Logger;
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.NameAwareCodec;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.LineReader;
@ -10,14 +10,20 @@ import org.broad.tribble.util.BlockCompressedInputStream;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.LazyGenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import java.io.*;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.zip.GZIPInputStream;
public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext> implements NameAwareCodec {
public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20);
protected final static Logger log = Logger.getLogger(VCFCodec.class);
@ -61,6 +67,10 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
protected Map<String, String> stringCache = new HashMap<String, String>();
protected AbstractVCFCodec() {
super(VariantContext.class);
}
/**
* Creates a LazyParser for a LazyGenotypesContext to use to decode
* our genotypes only when necessary. We do this instead of eagarly
@ -266,7 +276,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
* @param line the line
* @return a VariantContext
*/
public Feature decode(String line) {
public VariantContext decode(String line) {
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
@ -378,14 +388,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
return vc;
}
/**
*
* @return the type of record
*/
public Class<VariantContext> getFeatureType() {
return VariantContext.class;
}
/**
* get the name of this codec
* @return our set name

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
@ -63,7 +64,7 @@ public class CombineVariantsUnitTest {
private VCFHeader createHeader(String headerStr) {
VCFCodec codec = new VCFCodec();
VCFHeader head = (VCFHeader)codec.readHeader(new AsciiLineReader(new StringBufferInputStream(headerStr)));
VCFHeader head = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(headerStr))));
return head;
}

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.codecs.hapmap;
import org.broad.tribble.annotation.Strand;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.testng.Assert;
import org.testng.annotations.Test;
@ -152,7 +153,7 @@ public class HapMapUnitTest {
public AsciiLineReader getReader() {
try {
return new AsciiLineReader(new FileInputStream(hapMapFile));
return new AsciiLineReader(new PositionalBufferedStream(new FileInputStream(hapMapFile)));
} catch (FileNotFoundException e) {
Assert.fail("Unable to open hapmap file : " + hapMapFile);
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.testng.Assert;
@ -24,7 +25,7 @@ public class VCFHeaderUnitTest extends BaseTest {
private VCFHeader createHeader(String headerStr) {
VCFCodec codec = new VCFCodec();
VCFHeader header = (VCFHeader)codec.readHeader(new AsciiLineReader(new StringBufferInputStream(headerStr)));
VCFHeader header = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(headerStr))));
Assert.assertEquals(header.getMetaData().size(), VCF4headerStringCount);
return header;
}

View File

@ -1,7 +1,10 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.Tribble;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -59,16 +62,10 @@ public class VCFWriterUnitTest extends BaseTest {
writer.add(createVC(header));
writer.add(createVC(header));
writer.close();
VCFCodec reader = new VCFCodec();
AsciiLineReader lineReader;
VCFCodec codec = new VCFCodec();
VCFHeader headerFromFile = null;
try {
lineReader = new AsciiLineReader(new FileInputStream(fakeVCFFile));
headerFromFile = (VCFHeader)reader.readHeader(lineReader);
}
catch (FileNotFoundException e ) {
throw new ReviewedStingException(e.getMessage());
}
FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(fakeVCFFile.getAbsolutePath(), codec, false);
headerFromFile = (VCFHeader)reader.getHeader();
int counter = 0;
@ -76,12 +73,9 @@ public class VCFWriterUnitTest extends BaseTest {
validateHeader(headerFromFile);
try {
while(true) {
String line = lineReader.readLine();
if (line == null)
break;
VariantContext vc = (VariantContext)reader.decode(line);
Iterator<VariantContext> it = reader.iterator();
while(it.hasNext()) {
VariantContext vc = it.next();
counter++;
}
Assert.assertEquals(counter, 2);

View File

@ -78,30 +78,31 @@ public class VariantContextBenchmark extends SimpleBenchmark {
private GenomeLocParser b37GenomeLocParser;
@Override protected void setUp() {
try {
ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.b37KGReference));
b37GenomeLocParser = new GenomeLocParser(seq);
} catch ( FileNotFoundException e) {
throw new RuntimeException(e);
}
// read it into a String so that we don't try to benchmark IO issues
try {
FileInputStream s = new FileInputStream(new File(vcfFile));
AsciiLineReader lineReader = new AsciiLineReader(s);
int counter = 0;
StringBuffer sb = new StringBuffer();
while (counter++ < linesToRead ) {
String line = lineReader.readLine();
if ( line == null )
break;
sb.append(line + "\n");
}
s.close();
INPUT_STRING = sb.toString();
} catch (IOException e) {
throw new RuntimeException(e);
}
// TODO -- update for new tribble interface
// try {
// ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.b37KGReference));
// b37GenomeLocParser = new GenomeLocParser(seq);
// } catch ( FileNotFoundException e) {
// throw new RuntimeException(e);
// }
//
// // read it into a String so that we don't try to benchmark IO issues
// try {
// FileInputStream s = new FileInputStream(new File(vcfFile));
// AsciiLineReader lineReader = new AsciiLineReader(s);
// int counter = 0;
// StringBuffer sb = new StringBuffer();
// while (counter++ < linesToRead ) {
// String line = lineReader.readLine();
// if ( line == null )
// break;
// sb.append(line + "\n");
// }
// s.close();
// INPUT_STRING = sb.toString();
// } catch (IOException e) {
// throw new RuntimeException(e);
// }
}
private interface FunctionToBenchmark<T extends Feature> {
@ -109,23 +110,24 @@ public class VariantContextBenchmark extends SimpleBenchmark {
}
private <T extends Feature> void runBenchmark(FeatureCodec<T> codec, FunctionToBenchmark<T> func) {
try {
InputStream is = new ByteArrayInputStream(INPUT_STRING.getBytes());
AsciiLineReader lineReader = new AsciiLineReader(is);
codec.readHeader(lineReader);
int counter = 0;
while (counter++ < linesToRead ) {
String line = lineReader.readLine();
if ( line == null )
break;
T vc = codec.decode(line);
func.run(vc);
}
} catch (Exception e) {
System.out.println("Benchmarking run failure because of " + e.getMessage());
}
// TODO -- update for new Tribble interface
// try {
// InputStream is = new ByteArrayInputStream(INPUT_STRING.getBytes());
// AsciiLineReader lineReader = new AsciiLineReader(is);
// codec.readHeader(lineReader);
//
// int counter = 0;
// while (counter++ < linesToRead ) {
// String line = lineReader.readLine();
// if ( line == null )
// break;
//
// T vc = codec.decode(line);
// func.run(vc);
// }
// } catch (Exception e) {
// System.out.println("Benchmarking run failure because of " + e.getMessage());
// }
}
public void timeV14(int rep) {

View File

@ -6,12 +6,14 @@ import scala.io.Source._
import net.sf.samtools.SAMFileReader
import org.broadinstitute.sting.utils.codecs.vcf.{VCFHeader, VCFCodec}
import scala.collection.JavaConversions._
import org.broad.tribble.AbstractFeatureReader
import org.broad.tribble.{FeatureCodec, AbstractFeatureReader}
object VCF_BAM_utilities {
def getSamplesFromVCF(vcfFile: File): List[String] = {
return AbstractFeatureReader.getFeatureReader(vcfFile.getPath(), new VCFCodec()).getHeader().asInstanceOf[VCFHeader].getGenotypeSamples().toList
List();
// TODO -- ask khalid for help here with type error
//return AbstractFeatureReader.getFeatureReader(vcfFile.getPath(), new VCFCodec()).getHeader().asInstanceOf[VCFHeader].getGenotypeSamples().toList
}
def getSamplesInBAM(bam: File): List[String] = {

View File

@ -1,3 +1,3 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="94" status="integration" />
<info organisation="org.broad" module="tribble" revision="98" status="integration" />
</ivy-module>