diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index 84098c5f0..20692c68a 100644 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescripto import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.Walker; @@ -204,26 +205,28 @@ public abstract class CommandLineExecutable extends CommandLineProgram { */ private List unpackBAMFileList(GATKArgumentCollection argCollection) { List unpackedReads = new ArrayList(); - for( File inputFile: argCollection.samFiles ) { - if (inputFile.getName().toLowerCase().endsWith(".list") ) { + for( String inputFileName: argCollection.samFiles ) { + List inputFileNameTags = parser.getTags(inputFileName); + inputFileName = expandFileName(inputFileName); + if (inputFileName.toLowerCase().endsWith(".list") ) { try { - for(String fileName : new XReadLines(inputFile)) - unpackedReads.add(new SAMReaderID(new File(fileName),parser.getTags(inputFile))); + for(String fileName : new XReadLines(new File(inputFileName))) + unpackedReads.add(new SAMReaderID(fileName,parser.getTags(inputFileName))); } catch( FileNotFoundException ex ) { - throw new UserException.CouldNotReadInputFile(inputFile, "Unable to find file while unpacking reads", ex); + throw new UserException.CouldNotReadInputFile(new File(inputFileName), "Unable to find file while unpacking reads", ex); } } - else if(inputFile.getName().toLowerCase().endsWith(".bam")) { - unpackedReads.add( new SAMReaderID(inputFile,parser.getTags(inputFile)) ); + else if(inputFileName.toLowerCase().endsWith(".bam")) { + unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags)); } - else if(inputFile.getName().equals("-")) { - unpackedReads.add(new SAMReaderID(new File("/dev/stdin"),Collections.emptyList())); + else if(inputFileName.endsWith("stdin")) { + unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags)); } else { throw new UserException.CommandLineException(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " + "with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " + - "of BAM files is in the correct format, update the extension, and try again.",inputFile.getName())); + "of BAM files is in the correct format, update the extension, and try again.",inputFileName)); } } return unpackedReads; @@ -236,27 +239,47 @@ public abstract class CommandLineExecutable extends CommandLineProgram { private Collection unpackRODBindings(GATKArgumentCollection argCollection) { Collection rodBindings = new ArrayList(); - for (String binding: argCollection.RODBindings) { - if(parser.getTags(binding).size() != 2) + + for (String fileName: argCollection.RODBindings) { + List parameters = parser.getTags(fileName); + fileName = expandFileName(fileName); + RMDStorageType storageType = fileName.toLowerCase().endsWith("stdin") ? RMDStorageType.STREAM : RMDStorageType.FILE; + + if(parameters.size() != 2) throw new UserException("Invalid syntax for -B (reference-ordered data) input flag. " + "Please use the following syntax when providing reference-ordered " + "data: -B:, ."); // Assume that if tags are present, those tags are name and type. // Name is always first, followed by type. - List parameters = parser.getTags(binding); String name = parameters.get(0); String type = parameters.get(1); - rodBindings.add(new RMDTriplet(name,type,binding)); + rodBindings.add(new RMDTriplet(name,type,fileName,storageType)); } if (argCollection.DBSNPFile != null) { if(argCollection.DBSNPFile.toLowerCase().contains("vcf")) throw new UserException("--DBSNP (-D) argument currently does not support VCF. To use dbSNP in VCF format, please use -B:dbsnp,vcf ."); - rodBindings.add(new RMDTriplet(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, "dbsnp", argCollection.DBSNPFile)); + + String fileName = expandFileName(argCollection.DBSNPFile); + RMDStorageType storageType = fileName.toLowerCase().endsWith("stdin") ? RMDStorageType.STREAM : RMDStorageType.FILE; + + rodBindings.add(new RMDTriplet(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME,"dbsnp",fileName,storageType)); } return rodBindings; } - + /** + * Expand any special characters that appear in the filename. Right now, '-' is expanded to + * '/dev/stdin' only, but in the future, special characters like '~' and '*' that are passed + * directly to the command line in some circumstances could be expanded as well. Be careful + * when adding UNIX-isms. + * @param argument the text appearing on the command-line. + * @return An expanded string suitable for opening by Java/UNIX file handling utilities. + */ + private String expandFileName(String argument) { + if(argument.trim().equals("-")) + return "/dev/stdin"; + return argument; + } } diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 29a8c51c6..4bb14e25c 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -148,7 +148,7 @@ public class GenomeAnalysisEngine { /** * A processed collection of SAM reader identifiers. */ - private Collection samReaderIDs; + private Collection samReaderIDs = Collections.emptyList(); /** * Set the SAM/BAM files over which to traverse. @@ -334,12 +334,12 @@ public class GenomeAnalysisEngine { * * @param rods Reference-ordered data to load. */ - protected void validateSuppliedReferenceOrderedData(List rods) { + protected void validateSuppliedReferenceOrderedData(List rods) { // Check to make sure that all required metadata is present. List allRequired = WalkerManager.getRequiredMetaData(walker); for (RMD required : allRequired) { boolean found = false; - for (RMDTrack rod : rods) { + for (ReferenceOrderedDataSource rod : rods) { if (rod.matchesNameAndRecordType(required.name(), required.type())) found = true; } @@ -349,7 +349,7 @@ public class GenomeAnalysisEngine { } // Check to see that no forbidden rods are present. - for (RMDTrack rod : rods) { + for (ReferenceOrderedDataSource rod : rods) { if (!WalkerManager.isAllowed(walker, rod)) throw new ArgumentException(String.format("Walker of type %s does not allow access to metadata: %s", walker.getClass(), rod.getName())); } @@ -614,7 +614,7 @@ public class GenomeAnalysisEngine { for (String str : rodNames.keySet()) if (str.equals(rodName)) { logger.info("Adding interval list from track (ROD) named " + rodName); - RMDIntervalGenerator intervalGenerator = new RMDIntervalGenerator(rodNames.get(str).getReferenceOrderedData()); + RMDIntervalGenerator intervalGenerator = new RMDIntervalGenerator(rodNames.get(str)); ret.addAll(intervalGenerator.toGenomeLocList()); } } @@ -661,14 +661,7 @@ public class GenomeAnalysisEngine { sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles); // set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference - RMDTrackBuilder manager = new RMDTrackBuilder(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe); - List tracks = manager.getReferenceMetaDataSources(this,argCollection); - validateSuppliedReferenceOrderedData(tracks); - - // validate all the sequence dictionaries against the reference - validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), tracks, manager); - - rodDataSources = getReferenceOrderedDataSources(tracks); + rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe); } /** @@ -788,10 +781,10 @@ public class GenomeAnalysisEngine { * * @param reads Reads data source. * @param reference Reference data source. - * @param tracks a collection of the reference ordered data tracks + * @param rods a collection of the reference ordered data tracks */ - private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection tracks, RMDTrackBuilder manager) { - if ((reads.isEmpty() && (tracks == null || tracks.isEmpty())) || reference == null ) + private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection rods, RMDTrackBuilder manager) { + if ((reads.isEmpty() && (rods == null || rods.isEmpty())) || reference == null ) return; // Compile a set of sequence names that exist in the reference file. @@ -815,9 +808,8 @@ public class GenomeAnalysisEngine { SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, "reads", readsDictionary, "reference", referenceDictionary); } - // compare the tracks to the reference, if they have a sequence dictionary - for (RMDTrack track : tracks) - manager.validateTrackSequenceDictionary(track.getName(),track.getSequenceDictionary(),referenceDictionary); + for (ReferenceOrderedDataSource rod : rods) + manager.validateTrackSequenceDictionary(rod.getName(),rod.getSequenceDictionary(),referenceDictionary); } /** @@ -862,18 +854,34 @@ public class GenomeAnalysisEngine { /** * Open the reference-ordered data sources. * - * @param rods the reference order data to execute using + * @param referenceMetaDataFiles collection of RMD descriptors to load and validate. + * @param sequenceDictionary GATK-wide sequnce dictionary to use for validation. + * @param genomeLocParser to use when creating and validating GenomeLocs. + * @param validationExclusionType potentially indicate which validations to include / exclude. + * * @return A list of reference-ordered data sources. */ - private List getReferenceOrderedDataSources(List rods) { + private List getReferenceOrderedDataSources(Collection referenceMetaDataFiles, + SAMSequenceDictionary sequenceDictionary, + GenomeLocParser genomeLocParser, + ValidationExclusion.TYPE validationExclusionType) { + RMDTrackBuilder builder = new RMDTrackBuilder(referenceMetaDataFiles,sequenceDictionary,genomeLocParser,validationExclusionType); + // try and make the tracks given their requests + // create of live instances of the tracks + List tracks = new ArrayList(); + List dataSources = new ArrayList(); - for (RMDTrack rod : rods) - dataSources.add(new ReferenceOrderedDataSource(referenceMetaDataFiles, - referenceDataSource.getReference().getSequenceDictionary(), + for (RMDTriplet fileDescriptor : referenceMetaDataFiles) + dataSources.add(new ReferenceOrderedDataSource(fileDescriptor, + builder, + sequenceDictionary, genomeLocParser, - argCollection.unsafe, - rod, flashbackData())); + + // validation: check to make sure everything the walker needs is present, and that all sequence dictionaries match. + validateSuppliedReferenceOrderedData(dataSources); + validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), dataSources, builder); + return dataSources; } diff --git a/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/java/src/org/broadinstitute/sting/gatk/WalkerManager.java index 635da4a03..a75f30173 100755 --- a/java/src/org/broadinstitute/sting/gatk/WalkerManager.java +++ b/java/src/org/broadinstitute/sting/gatk/WalkerManager.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk; import net.sf.picard.filter.SamRecordFilter; import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.walkers.*; @@ -236,7 +237,7 @@ public class WalkerManager extends PluginManager { * @param rod Source to check. * @return True if the walker forbids this data type. False otherwise. */ - public static boolean isAllowed(Class walkerClass, RMDTrack rod) { + public static boolean isAllowed(Class walkerClass, ReferenceOrderedDataSource rod) { Allows allowsDataSource = getWalkerAllowed(walkerClass); // Allows is less restrictive than requires. If an allows @@ -263,7 +264,7 @@ public class WalkerManager extends PluginManager { * @param rod Source to check. * @return True if the walker forbids this data type. False otherwise. */ - public static boolean isAllowed(Walker walker, RMDTrack rod) { + public static boolean isAllowed(Walker walker, ReferenceOrderedDataSource rod) { return isAllowed(walker.getClass(), rod); } diff --git a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index a56d321da..0d5010e0d 100755 --- a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -69,7 +69,7 @@ public class GATKArgumentCollection { // parameters and their defaults @ElementList(required = false) @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false) - public List samFiles = new ArrayList(); + public List samFiles = new ArrayList(); // parameters and their defaults @ElementList(required = false) diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java index 19ec928b7..f661a8803 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java @@ -1,25 +1,20 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import net.sf.samtools.SAMSequenceDictionary; -import org.broad.tribble.FeatureSource; -import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; -import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator; import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.exceptions.UserException; +import java.io.File; import java.io.IOException; -import java.util.Collection; +import java.lang.reflect.Type; import java.util.List; /** * User: hanna @@ -41,7 +36,22 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { /** * The reference-ordered data itself. */ - private final RMDTrack rod; + private final RMDTriplet fileDescriptor; + + /** + * The header associated with this VCF, if any. + */ + private final Object header; + + /** + * The private sequence dictionary associated with this RMD. + */ + private final SAMSequenceDictionary sequenceDictionary; + + /** + * The builder to use when constructing new reference-ordered data readers. + */ + private final RMDTrackBuilder builder; /** * A pool of iterators for navigating through the genome. @@ -50,21 +60,28 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { /** * Create a new reference-ordered data source. - * @param rod the reference ordered data */ - public ReferenceOrderedDataSource(Collection refMetaDataDescriptors, - SAMSequenceDictionary sequenceDictionary, + public ReferenceOrderedDataSource(RMDTriplet fileDescriptor, + RMDTrackBuilder builder, + SAMSequenceDictionary referenceSequenceDictionary, GenomeLocParser genomeLocParser, - ValidationExclusion.TYPE validationExclusionType, - RMDTrack rod, boolean flashbackData ) { - this.rod = rod; - if (rod.supportsQuery()) - iteratorPool = new ReferenceOrderedQueryDataPool(sequenceDictionary, - genomeLocParser, - new RMDTrackBuilder(refMetaDataDescriptors,sequenceDictionary,genomeLocParser,validationExclusionType), - rod); - else - iteratorPool = new ReferenceOrderedDataPool(sequenceDictionary,genomeLocParser,rod, flashbackData ); + boolean flashbackData ) { + this.fileDescriptor = fileDescriptor; + this.builder = builder; + if (fileDescriptor.getStorageType() != RMDTriplet.RMDStorageType.STREAM) { + iteratorPool = new ReferenceOrderedQueryDataPool(fileDescriptor, + builder, + referenceSequenceDictionary, + genomeLocParser); + header = ((ReferenceOrderedQueryDataPool)iteratorPool).getHeader(); + this.sequenceDictionary = ((ReferenceOrderedQueryDataPool)iteratorPool).getSequenceDictionary(); + } + else { + RMDTrack track = builder.createInstanceOfTrack(fileDescriptor); + header = track.getHeader(); + this.sequenceDictionary = track.getSequenceDictionary(); + iteratorPool = new ReferenceOrderedDataPool(track,referenceSequenceDictionary,genomeLocParser,flashbackData); + } } /** @@ -72,15 +89,43 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * @return Name of the underlying rod. */ public String getName() { - return this.rod.getName(); + return fileDescriptor.getName(); + } + + public Class getType() { + return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); + } + + public Class getRecordType() { + return builder.createCodec(getType(),getName()).getFeatureType(); + } + + public File getFile() { + return new File(fileDescriptor.getFile()); + } + + public Object getHeader() { + return header; } /** - * Return the underlying reference-ordered data. - * @return the underlying rod. + * Retrieves the sequence dictionary created by this ROD. + * @return */ - public RMDTrack getReferenceOrderedData() { - return this.rod; + public SAMSequenceDictionary getSequenceDictionary() { + return sequenceDictionary; + } + + /** + * helper function for determining if we are the same track based on name and record type + * + * @param name the name to match + * @param type the type to match + * + * @return true on a match, false if the name or type is different + */ + public boolean matchesNameAndRecordType(String name, Type type) { + return (name.equals(fileDescriptor.getName()) && (type.getClass().isAssignableFrom(getType().getClass()))); } /** @@ -120,12 +165,12 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * A pool of reference-ordered data iterators. */ class ReferenceOrderedDataPool extends ResourcePool { - private final RMDTrack rod; + private final RMDTrack track; boolean flashbackData = false; - public ReferenceOrderedDataPool( SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, RMDTrack rod, boolean flashbackData ) { + public ReferenceOrderedDataPool( RMDTrack track, SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, boolean flashbackData ) { super(sequenceDictionary,genomeLocParser); + this.track = track; this.flashbackData = flashbackData; - this.rod = rod; } /** @@ -134,7 +179,7 @@ class ReferenceOrderedDataPool extends ResourcePool { +class ReferenceOrderedQueryDataPool extends ResourcePool { // the reference-ordered data itself. - private final RMDTrack rod; + private final RMDTriplet fileDescriptor; // our tribble track builder private final RMDTrackBuilder builder; - public ReferenceOrderedQueryDataPool( SAMSequenceDictionary sequenceDictionary, GenomeLocParser genomeLocParser, RMDTrackBuilder builder, RMDTrack rod ) { - super(sequenceDictionary,genomeLocParser); - this.rod = rod; + /** + * The header from this RMD, if present. + */ + private final Object header; + + /** + * The sequence dictionary from this ROD. If no sequence dictionary is present, this dictionary will be the same as the reference's. + */ + private final SAMSequenceDictionary sequenceDictionary; + + public ReferenceOrderedQueryDataPool(RMDTriplet fileDescriptor, RMDTrackBuilder builder, SAMSequenceDictionary referenceSequenceDictionary, GenomeLocParser genomeLocParser) { + super(referenceSequenceDictionary,genomeLocParser); + this.fileDescriptor = fileDescriptor; this.builder = builder; - // a little bit of a hack, but it saves us from re-reading the index from the file - this.addNewResource(rod.getReader()); + + // prepopulate one RMDTrack + RMDTrack track = builder.createInstanceOfTrack(fileDescriptor); + this.addNewResource(track); + + // Pull the proper header and sequence dictionary from the prepopulated track. + this.header = track.getHeader(); + this.sequenceDictionary = track.getSequenceDictionary(); + } + + public Object getHeader() { + return header; + } + + public SAMSequenceDictionary getSequenceDictionary() { + return sequenceDictionary; } @Override - protected FeatureSource createNewResource() { - return builder.createFeatureReader(rod.getType(),rod.getFile()).first; + protected RMDTrack createNewResource() { + return builder.createInstanceOfTrack(fileDescriptor); } @Override - protected FeatureSource selectBestExistingResource(DataStreamSegment segment, List availableResources) { - for (FeatureSource reader : availableResources) + protected RMDTrack selectBestExistingResource(DataStreamSegment segment, List availableResources) { + for (RMDTrack reader : availableResources) if (reader != null) return reader; return null; } @Override - protected LocationAwareSeekableRODIterator createIteratorFromResource(DataStreamSegment position, FeatureSource resource) { + protected LocationAwareSeekableRODIterator createIteratorFromResource(DataStreamSegment position, RMDTrack track) { try { if (position instanceof MappedStreamSegment) { GenomeLoc pos = ((MappedStreamSegment) position).locus; - return new SeekableRODIterator(sequenceDictionary,genomeLocParser,new FeatureToGATKFeatureIterator(genomeLocParser,resource.query(pos.getContig(),(int) pos.getStart(), (int) pos.getStop()),rod.getName())); + return new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,track.query(pos)); } else { - return new SeekableRODIterator(sequenceDictionary,genomeLocParser,new FeatureToGATKFeatureIterator(genomeLocParser,resource.iterator(),rod.getName())); + return new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,track.getIterator()); } } catch (IOException e) { - throw new ReviewedStingException("Unable to create iterator for rod named " + rod.getName(),e); + throw new ReviewedStingException("Unable to create iterator for rod named " + fileDescriptor.getName(),e); } } @Override - protected void closeResource(FeatureSource resource) { - try { - resource.close(); - } catch (IOException e) { - throw new UserException.CouldNotReadInputFile("Unable to close reader for rod named " + rod.getName(),e); - } + protected void closeResource(RMDTrack track) { + track.close(); } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java index 6797cb613..22f00ac67 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java @@ -30,7 +30,7 @@ abstract class ResourcePool { /** * Sequence dictionary. */ - protected final SAMSequenceDictionary sequenceDictionary; + protected final SAMSequenceDictionary referenceSequenceDictionary; /** * Builder/parser for GenomeLocs. @@ -52,8 +52,8 @@ abstract class ResourcePool { */ private Map resourceAssignments = new HashMap(); - protected ResourcePool(SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser) { - this.sequenceDictionary = sequenceDictionary; + protected ResourcePool(SAMSequenceDictionary referenceSequenceDictionary,GenomeLocParser genomeLocParser) { + this.referenceSequenceDictionary = referenceSequenceDictionary; this.genomeLocParser = genomeLocParser; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMReaderID.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMReaderID.java index 733190541..847ad2441 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMReaderID.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMReaderID.java @@ -32,6 +32,15 @@ public class SAMReaderID { this.tags = tags; } + /** + * Creates an identifier for a SAM file based on read. + * @param samFileName The source filename for SAM data. + * @param tags tags to use when creating a reader ID. + */ + public SAMReaderID(String samFileName, List tags) { + this(new File(samFileName),tags); + } + /** * Gets the tags associated with the given BAM file. * @return A collection of the tags associated with this file. diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java index ab11b9333..0a3168b56 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java @@ -80,23 +80,6 @@ public class RMDTrack { return file; } - public Class getRecordType() { - return recordType; - } - - /** - * helper function for determining if we are the same track based on name and record type - * - * @param name the name to match - * @param type the type to match - * - * @return true on a match, false if the name or type is different - */ - public boolean matchesNameAndRecordType(String name, Type type) { - return (name.equals(this.name) && (type.getClass().isAssignableFrom(this.type.getClass()))); - } - - /** * Create a track * @@ -130,15 +113,6 @@ public class RMDTrack { } } - /** - * do we support the query interface? - * - * @return true - */ - public boolean supportsQuery() { - return true; - } - public CloseableIterator query(GenomeLoc interval) throws IOException { return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName()); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java index 038243cee..b3340b018 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java @@ -33,14 +33,12 @@ import org.broad.tribble.index.Index; import org.broad.tribble.index.IndexFactory; import org.broad.tribble.source.BasicFeatureSource; import org.broad.tribble.util.LittleEndianOutputStream; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SequenceDictionaryUtils; import org.broadinstitute.sting.utils.collections.Pair; @@ -55,6 +53,7 @@ import java.io.*; import java.util.*; + /** * * @author aaron @@ -146,45 +145,48 @@ public class RMDTrackBuilder extends PluginManager { /** * create a RMDTrack of the specified type * - * @param targetClass the target class of track - * @param name what to call the track - * @param inputFile the input file + * @param fileDescriptor a description of the type of track to build. * * @return an instance of the track * @throws RMDTrackCreationException * if we don't know of the target class or we couldn't create it */ - public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException { + public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) throws RMDTrackCreationException { + String name = fileDescriptor.getName(); + File inputFile = new File(fileDescriptor.getFile()); + + Class featureCodecClass = getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); + if (featureCodecClass == null) + throw new UserException.BadArgumentValue("-B",fileDescriptor.getType()); + // return a feature reader track - Pair pair = createFeatureReader(targetClass, name, inputFile); - if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file"); - return new RMDTrack(targetClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(targetClass,name)); - } - - /** - * create a tribble feature reader class, given the target class and the input file - * @param targetClass the target class, of a Tribble Codec type - * @param inputFile the input file, that corresponds to the feature type - * @return a pair of - */ - public Pair createFeatureReader(Class targetClass, File inputFile) { - return createFeatureReader(targetClass, "anonymous", inputFile); - } - - /** - * create a feature reader of the specified type - * @param targetClass the target codec type - * @param name the target name - * @param inputFile the input file to create the track from (of the codec type) - * @return the FeatureReader instance - */ - public Pair createFeatureReader(Class targetClass, String name, File inputFile) { Pair pair; if (inputFile.getAbsolutePath().endsWith(".gz")) - pair = createBasicFeatureSourceNoAssumedIndex(targetClass, name, inputFile); + pair = createTabixIndexedFeatureSource(featureCodecClass, name, inputFile); else - pair = getFeatureSource(targetClass, name, inputFile); - return pair; + pair = getFeatureSource(featureCodecClass, name, inputFile, fileDescriptor.getStorageType()); + if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file"); + return new RMDTrack(featureCodecClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(featureCodecClass,name)); + } + + /** + * Convenience method simplifying track creation. Assume unnamed track based on a file rather than a stream. + * @param targetClass Type of Tribble class to build. + * @param inputFile Input file type to use. + * @return An RMDTrack, suitable for accessing reference metadata. + */ + public RMDTrack createInstanceOfTrack(Class targetClass, File inputFile) { + // TODO: Update RMDTriplet to contain an actual class object rather than a name to avoid these gymnastics. + String typeName = null; + for(Map.Entry trackType: getAvailableTrackNamesAndTypes().entrySet()) { + if(trackType.getValue().equals(targetClass)) + typeName = trackType.getKey(); + } + + if(typeName == null) + throw new ReviewedStingException("Unable to find type name for class " + targetClass.getName()); + + return createInstanceOfTrack(new RMDTriplet("anonymous",typeName,inputFile.getAbsolutePath(),RMDStorageType.FILE)); } /** @@ -197,7 +199,7 @@ public class RMDTrackBuilder extends PluginManager { * @param inputFile the file to load * @return a feature reader implementation */ - private Pair createBasicFeatureSourceNoAssumedIndex(Class targetClass, String name, File inputFile) { + private Pair createTabixIndexedFeatureSource(Class targetClass, String name, File inputFile) { // we might not know the index type, try loading with the default reader constructor logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file"); try { @@ -213,7 +215,7 @@ public class RMDTrackBuilder extends PluginManager { * @param name the name to assign this codec * @return the feature codec itself */ - private FeatureCodec createCodec(Class targetClass, String name) { + public FeatureCodec createCodec(Class targetClass, String name) { FeatureCodec codex = this.createByType(targetClass); if ( codex instanceof NameAwareCodec ) ((NameAwareCodec)codex).setName(name); @@ -227,33 +229,48 @@ public class RMDTrackBuilder extends PluginManager { * @param targetClass the target class * @param name the name of the codec * @param inputFile the tribble file to parse + * @param storageType How the RMD is streamed into the input file. * @return the input file as a FeatureReader */ - private Pair getFeatureSource(Class targetClass, String name, File inputFile) { - Pair reader; - try { - Index index = loadIndex(inputFile, createCodec(targetClass, name)); - try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); } - catch ( ReviewedStingException e) { } + private Pair getFeatureSource(Class targetClass, String name, File inputFile, RMDStorageType storageType) { + // Feature source and sequence dictionary to use as the ultimate reference + FeatureSource featureSource = null; + SAMSequenceDictionary sequenceDictionary = null; - SAMSequenceDictionary dictFromIndex = getSequenceDictionaryFromProperties(index); + FeatureCodec codec = createCodec(targetClass, name); - // if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match - if (dictFromIndex.size() == 0 && dict != null) { - File indexFile = Tribble.indexFile(inputFile); - setIndexSequenceDictionary(inputFile,index,dict,indexFile,true); - dictFromIndex = getSequenceDictionaryFromProperties(index); + // Detect whether or not this source should be indexed. + boolean canBeIndexed = (storageType == RMDStorageType.FILE); + + if(canBeIndexed) { + try { + Index index = loadIndex(inputFile, codec); + try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); } + catch (ReviewedStingException e) { } + + sequenceDictionary = getSequenceDictionaryFromProperties(index); + + // if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match + if (sequenceDictionary.size() == 0 && dict != null) { + File indexFile = Tribble.indexFile(inputFile); + setIndexSequenceDictionary(inputFile,index,dict,indexFile,true); + sequenceDictionary = getSequenceDictionaryFromProperties(index); + } + + featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, codec); + } + catch (TribbleException e) { + throw new UserException(e.getMessage()); + } + catch (IOException e) { + throw new UserException.CouldNotCreateOutputFile(inputFile, "unable to write Tribble index", e); } - - FeatureSource source = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(targetClass, name)); - //source = new CachingFeatureSource(source, 100, 100000); - reader = new Pair(source, dictFromIndex); - } catch (TribbleException e) { - throw new UserException(e.getMessage()); - } catch (IOException e) { - throw new UserException.CouldNotCreateOutputFile(inputFile, "unable to write Tribble index", e); } - return reader; + else { + featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),codec,false); + } + + return new Pair(featureSource,sequenceDictionary); } /** @@ -402,29 +419,6 @@ public class RMDTrackBuilder extends PluginManager { return names; } - /** - * find the associated reference meta data - * - * @param argCollection the input arguments to the GATK. - * @param engine the GATK engine to bind the tracks to - * - * @return a list of RMDTracks, one for each -B option - */ - public List getReferenceMetaDataSources(GenomeAnalysisEngine engine, GATKArgumentCollection argCollection) { - // try and make the tracks given their requests - // create of live instances of the tracks - List tracks = new ArrayList(); - - // create instances of each of the requested types - for (RMDTriplet trip : inputs) { - Class featureCodecClass = getAvailableTrackNamesAndTypes().get(trip.getType().toUpperCase()); - if (featureCodecClass == null) - throw new UserException.BadArgumentValue("-B",trip.getType()); - tracks.add(createInstanceOfTrack(featureCodecClass, trip.getName(), new File(trip.getFile()))); - } - return tracks; - } - // --------------------------------------------------------------------------------------------------------- // static functions to work with the sequence dictionaries of indexes // --------------------------------------------------------------------------------------------------------- diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java index 8817b8d49..59ece0ee5 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java @@ -16,15 +16,15 @@ import java.util.*; * Creates an interval list, given an RMDTrack */ public class RMDIntervalGenerator { - public RMDTrack track; + public ReferenceOrderedDataSource dataSource; /** * create a interval representation of a ROD track - * @param track the track + * @param dataSource the track */ - public RMDIntervalGenerator(RMDTrack track) { - if (track == null) throw new IllegalArgumentException("Track cannot be null"); - this.track = track; + public RMDIntervalGenerator(ReferenceOrderedDataSource dataSource) { + if (dataSource == null) throw new IllegalArgumentException("Data source cannot be null"); + this.dataSource = dataSource; } /** @@ -32,10 +32,10 @@ public class RMDIntervalGenerator { * @return a list of genome locations */ public List toGenomeLocList() { - Iterator iter = track.getIterator(); + Iterator iter = dataSource.seek((GenomeLoc)null); List locations = new ArrayList(); while (iter.hasNext()) { - GATKFeature feature = iter.next(); + RODRecordList feature = iter.next(); GenomeLoc loc = feature.getLocation(); if (loc != null) locations.add(loc); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDTriplet.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDTriplet.java index 7fe3b6731..3f23d94c8 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDTriplet.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDTriplet.java @@ -24,27 +24,55 @@ package org.broadinstitute.sting.gatk.refdata.utils; -/** a helper class to manage our triplets of data for the -B command line option (name, type, file) */ +/** + * a helper class to manage our triplets of data for the -B command line option (name, type, file) + * TODO: The presence of four datapoints here suggests that this class' name isn't sufficient to describe its function. Rename. + */ public class RMDTriplet { - private String name; - private String type; - private String file; + public enum RMDStorageType { FILE, STREAM }; - public RMDTriplet(String name, String type, String file) { + private final String name; + private final String type; + private final String file; + private final RMDStorageType storageType; + + public RMDTriplet(final String name, final String type, final String file, final RMDStorageType storageType) { this.name = name; this.type = type; this.file = file; + this.storageType = storageType; } + /** + * Gets the name of this track. RefMetaDataTrackers can use this identifier to retrieve data of a certain type. + * @return Name associated with this track. + */ public String getName() { return name; } + /** + * Gets the type of this track. Informs the GATK how to parse this file type. + * @return Type associated with this track. + */ public String getType() { return type; } + /** + * Gets the filename representing this track. Data is loaded from this file. + * @return Filename of the RMD. + */ public String getFile() { return file; } + + /** + * The type of storage being used for this metadata track. Right now, can be either a + * file type (can be indexed) or a stream type (can't be indexed). + * @return Storage type for this RMD 'triplet'. + */ + public RMDStorageType getStorageType() { + return storageType; + } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java b/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java index ad18dbf92..d1b0c953c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java @@ -176,7 +176,7 @@ public class VariantsToVCF extends RodWalker { throw new UserException.BadInput("No dbSNP rod was provided, but one is needed to decipher the correct indel alleles from the HapMap records"); RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe); - dbsnpIterator = builder.createInstanceOfTrack(DbSNPCodec.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, dbsnpDataSource.getReferenceOrderedData().getFile()).getIterator(); + dbsnpIterator = builder.createInstanceOfTrack(DbSNPCodec.class, dbsnpDataSource.getFile()).getIterator(); // Note that we should really use some sort of seekable iterator here so that the search doesn't take forever // (but it's complicated because the hapmap location doesn't match the dbsnp location, so we don't know where to seek to) } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java index 761576c73..c54eb190f 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java @@ -94,11 +94,11 @@ public class GenomicAnnotator extends RodWalker implements Tre final Set allFullyQualifiedColumnNames = new LinkedHashSet(); final Set allBindingNames = new LinkedHashSet(); for(ReferenceOrderedDataSource ds : getToolkit().getRodDataSources()) { - if(! ds.getReferenceOrderedData().getType().equals(AnnotatorInputTableCodec.class)) { + if(! ds.getType().equals(AnnotatorInputTableCodec.class)) { continue; //skip all non-AnnotatorInputTable files. } final String bindingName = ds.getName(); - File file = ds.getReferenceOrderedData().getFile(); + File file = ds.getFile(); allBindingNames.add(bindingName); try { final ArrayList header = AnnotatorInputTableCodec.readHeader(file); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java index 074bac4b5..fe6b2c012 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java @@ -172,14 +172,14 @@ public class TranscriptToGenomicInfo extends RodWalker { final ArrayList header; try { - header = AnnotatorInputTableCodec.readHeader(transcriptsDataSource.getReferenceOrderedData().getFile()); + header = AnnotatorInputTableCodec.readHeader(transcriptsDataSource.getFile()); } catch(Exception e) { - throw new UserException.MalformedFile(transcriptsDataSource.getReferenceOrderedData().getFile(), "Failed when attempting to read header from file", e); + throw new UserException.MalformedFile(transcriptsDataSource.getFile(), "Failed when attempting to read header from file", e); } for ( String columnName : GENE_NAME_COLUMNS ) { if ( !header.contains(columnName) ) - throw new UserException.CommandLineException("The column name '" + columnName + "' provided to -n doesn't match any of the column names in: " + transcriptsDataSource.getReferenceOrderedData().getFile()); + throw new UserException.CommandLineException("The column name '" + columnName + "' provided to -n doesn't match any of the column names in: " + transcriptsDataSource.getFile()); } //init outputColumnNames list diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index f713e341e..37e9dc4d4 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -96,9 +96,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { final List dataSources = this.getToolkit().getRodDataSources(); for( final ReferenceOrderedDataSource source : dataSources ) { - final RMDTrack rod = source.getReferenceOrderedData(); - - if (rod.getName().equals(COMP_ROD_NAME)) { + if (source.getName().equals(COMP_ROD_NAME)) { hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site")); hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site")); hInfo.add(new VCFInfoHeaderLine("AFH", 1, VCFHeaderLineType.Float, "Allele Number from Comparison ROD at this site")); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index 56443415b..972dbe511 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -26,13 +26,13 @@ package org.broadinstitute.sting.gatk.walkers.coverage; import net.sf.samtools.SAMReadGroupRecord; -import org.broad.tribble.FeatureSource; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.*; import org.broadinstitute.sting.gatk.walkers.*; @@ -44,7 +44,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; -import java.io.IOException; import java.io.PrintStream; import java.util.*; @@ -406,15 +405,9 @@ public class DepthOfCoverageWalker extends LocusWalker intervalStats) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index b97a92b8a..869705173 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -110,7 +110,7 @@ public class VariantFiltrationWalker extends RodWalker { List dataSources = getToolkit().getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { - if ( source.getReferenceOrderedData().getName().equals("mask") ) { + if ( source.getName().equals("mask") ) { hInfo.add(new VCFFilterHeaderLine(MASK_NAME, "Overlaps a user-input mask")); break; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java index eaea000e4..1f3263590 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.indels; import net.sf.samtools.*; -import org.broad.tribble.FeatureSource; import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.util.variantcontext.Genotype; @@ -35,9 +34,10 @@ import org.broadinstitute.sting.gatk.filters.*; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; -import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.gatk.walkers.ReadWalker; @@ -56,7 +56,6 @@ import org.broadinstitute.sting.utils.collections.CircularArray; import org.broadinstitute.sting.utils.collections.PrimitivePair; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.CommandLineUtils; import org.broadinstitute.sting.commandline.Hidden; import java.io.*; @@ -228,8 +227,8 @@ public class IndelGenotyperV2Walker extends ReadWalker { for ( Map.Entry commandLineArg : commandLineArgs.entrySet() ) headerInfo.add(new VCFHeaderLine(String.format("IGv2_%s", commandLineArg.getKey()), commandLineArg.getValue())); // also, the list of input bams - for ( File file : getToolkit().getArguments().samFiles ) - headerInfo.add(new VCFHeaderLine("IGv2_bam_file_used", file.getName())); + for ( String fileName : getToolkit().getArguments().samFiles ) + headerInfo.add(new VCFHeaderLine("IGv2_bam_file_used", fileName)); return headerInfo; } @@ -251,15 +250,11 @@ public class IndelGenotyperV2Walker extends ReadWalker { RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), getToolkit().getGenomeLocParser(), getToolkit().getArguments().unsafe); - FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first; + RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,new File(RefseqFileName)); - try { - refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), - getToolkit().getGenomeLocParser(), - new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),refseq.iterator(),"refseq")); - } catch (IOException e) { - throw new UserException.CouldNotReadInputFile(new File(RefseqFileName), "Write failed", e); - } + refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), + getToolkit().getGenomeLocParser(), + refseq.getIterator()); } if ( refseqIterator == null ) logger.info("No gene annotations available"); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java index 7451565c8..5a14a921e 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java @@ -164,7 +164,7 @@ public class CycleQualityWalker extends ReadWalker { if ( HTML ) { out.println("

Cycle Quality QC

\n"); out.println("File(s) analyzed:
"); - for ( File f : getToolkit().getArguments().samFiles) out.println(f.toString()+"
"); + for ( String fileName : getToolkit().getArguments().samFiles) out.println(fileName+"
"); out.println("
"); } if ( HTML ) out.println("

"); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java index 1fd673940..13110f725 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java @@ -126,7 +126,7 @@ public class ProfileRodSystem extends RodWalker { private File getRodFile() { List rods = this.getToolkit().getRodDataSources(); ReferenceOrderedDataSource rod = rods.get(0); - return rod.getReferenceOrderedData().getFile(); + return rod.getFile(); } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index 21ef32e87..187b26426 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -187,9 +187,9 @@ public class CountCovariatesWalker extends LocusWalker { ReferenceOrderedData snp_mask; if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) { RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe); - CloseableIterator iter = builder.createInstanceOfTrack(DbSNPCodec.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator(); + CloseableIterator iter = builder.createInstanceOfTrack(DbSNPCodec.class,new java.io.File(SNP_MASK)).getIterator(); snpMaskIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),iter); } else { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index ab2c301e9..64840c597 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -65,7 +65,7 @@ public class ValidateVariants extends RodWalker { public void initialize() { for ( ReferenceOrderedDataSource source : getToolkit().getRodDataSources() ) { if ( source.getName().equals(TARGET_ROD_NAME) ) { - file = source.getReferenceOrderedData().getFile(); + file = source.getFile(); break; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DbSNPWindowCounter.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DbSNPWindowCounter.java index 7f4666231..9c370a597 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DbSNPWindowCounter.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DbSNPWindowCounter.java @@ -1,15 +1,16 @@ package org.broadinstitute.sting.oneoffprojects.walkers; -import org.broad.tribble.FeatureSource; +import net.sf.samtools.util.CloseableIterator; import org.broad.tribble.dbsnp.DbSNPCodec; -import org.broad.tribble.dbsnp.DbSNPFeature; -import org.broad.tribble.iterators.CloseableTribbleIterator; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.walkers.By; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.LocusWalker; @@ -34,7 +35,7 @@ import java.io.PrintStream; public class DbSNPWindowCounter extends LocusWalker { // what we read in new tracks with - private FeatureSource reader; + private RMDTrack track; @Output private PrintStream out; @@ -50,12 +51,12 @@ public class DbSNPWindowCounter extends LocusWalker { RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), getToolkit().getGenomeLocParser(), getToolkit().getArguments().unsafe); - reader = builder.createFeatureReader(DbSNPCodec.class,myDbSNPFile).first; + track = builder.createInstanceOfTrack(DbSNPCodec.class,myDbSNPFile); } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - CloseableTribbleIterator dbSNPs; + CloseableIterator dbSNPs; // our upstream and downstream window locations int windowStart = (int)Math.max(context.getLocation().getStart()-windowSize,0); @@ -63,19 +64,17 @@ public class DbSNPWindowCounter extends LocusWalker { // query the dnSNP iterator try { - dbSNPs = reader.query(context.getContig(), - windowStart, - windowStop); + dbSNPs = track.query(getToolkit().getGenomeLocParser().createGenomeLoc(context.getContig(),windowStart,windowStop)); } catch (IOException e) { throw new UserException.CouldNotReadInputFile(myDbSNPFile, e); } // count the number of dbSNPs we've seen int counter = 0; - for (DbSNPFeature feature: dbSNPs) + while(dbSNPs.hasNext()) counter++; out.println(context.getContig() + ":" + windowStart + "-" + context.getContig() + ":" + windowStop + "=" + - counter + " (dnSNP records)"); + counter + " (dbSNP records)"); return 1; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java index d6277a2b2..6dc1a4443 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java @@ -10,18 +10,16 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; -import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.vcf.VCFUtils; import java.io.File; -import java.io.IOException; import java.util.*; public class IndelAnnotator extends RodWalker { @@ -38,15 +36,10 @@ public class IndelAnnotator extends RodWalker { RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), getToolkit().getGenomeLocParser(), getToolkit().getArguments().unsafe); - FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first; + RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,new File(RefseqFileName)); - try { - refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), - getToolkit().getGenomeLocParser(), - new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),refseq.iterator(),"refseq")); - } catch (IOException e) { - throw new UserException.CouldNotReadInputFile(RefseqFileName, e); - } + refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), + getToolkit().getGenomeLocParser(),refseq.getIterator()); logger.info("Using RefSeq annotations from " + RefseqFileName); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java index 5abbe87dc..5c09d984f 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java @@ -86,11 +86,10 @@ public class VCF4WriterTestWalker extends RodWalker { vcfWriter = new StandardVCFWriter(new File(OUTPUT_FILE)); VCFHeader header = null; for( final ReferenceOrderedDataSource source : dataSources ) { - final RMDTrack rod = source.getReferenceOrderedData(); - if(rod.getName().equalsIgnoreCase(INPUT_ROD_NAME)) { + if(source.getName().equalsIgnoreCase(INPUT_ROD_NAME)) { try { - AsciiLineReader lineReader = new AsciiLineReader(new FileInputStream(rod.getFile().getAbsolutePath())); + AsciiLineReader lineReader = new AsciiLineReader(new FileInputStream(source.getFile().getAbsolutePath())); header = (VCFHeader)vcf4codec.readHeader(lineReader); out.printf("Read %d header lines%n", header.getMetaData().size()); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java index 85d687e48..ae72d6b7f 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java @@ -55,9 +55,9 @@ public class RodSystemValidationWalker extends RodWalker { // enumerate the list of ROD's we've loaded rodList = this.getToolkit().getRodDataSources(); for (ReferenceOrderedDataSource rod : rodList) { - out.println(rod.getName() + DIVIDER + rod.getReferenceOrderedData().getType()); - out.println(rod.getName() + DIVIDER + rod.getReferenceOrderedData().getFile()); - out.println(rod.getName() + DIVIDER + md5sum(rod.getReferenceOrderedData().getFile())); + out.println(rod.getName() + DIVIDER + rod.getType()); + out.println(rod.getName() + DIVIDER + rod.getFile()); + out.println(rod.getName() + DIVIDER + md5sum(rod.getFile())); } out.println("Data:"); } diff --git a/java/src/org/broadinstitute/sting/utils/vcf/VCFUtils.java b/java/src/org/broadinstitute/sting/utils/vcf/VCFUtils.java index d43492da9..6ad1a6b97 100755 --- a/java/src/org/broadinstitute/sting/utils/vcf/VCFUtils.java +++ b/java/src/org/broadinstitute/sting/utils/vcf/VCFUtils.java @@ -57,9 +57,8 @@ public class VCFUtils { if ( rodNames != null && !rodNames.contains(source.getName()) ) continue; - RMDTrack rod = source.getReferenceOrderedData(); - if ( rod.getHeader() != null && rod.getHeader() instanceof VCFHeader ) - data.put(rod.getName(), (VCFHeader)rod.getHeader()); + if ( source.getHeader() != null && source.getHeader() instanceof VCFHeader ) + data.put(source.getName(), (VCFHeader)source.getHeader()); } return data; @@ -96,9 +95,8 @@ public class VCFUtils { if ( rodNames != null && !rodNames.contains(source.getName()) ) continue; - RMDTrack rod = source.getReferenceOrderedData(); - if ( rod.getRecordType().equals(VariantContext.class)) { - VCFHeader header = (VCFHeader)rod.getHeader(); + if ( source.getRecordType().equals(VariantContext.class)) { + VCFHeader header = (VCFHeader)source.getHeader(); if ( header != null ) fields.addAll(header.getMetaData()); } diff --git a/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java b/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java index 6f1a7e332..59edf934e 100755 --- a/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java @@ -76,8 +76,8 @@ public class GATKArgumentCollectionUnitTest extends BaseTest { wArgs.put("wArgType3", "Arg3"); collect.walkerArgs = wArgs; - List input = new ArrayList(); - input.add(new File("test.file")); + List input = new ArrayList(); + input.add("test.file"); collect.samFiles = input; collect.strictnessLevel = SAMFileReader.ValidationStringency.STRICT; collect.referenceFile = new File("referenceFile".toLowerCase()); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java index 2dc458b2a..0ab804eb9 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java @@ -7,10 +7,9 @@ import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.MockLocusShard; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.features.table.TableCodec; import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; @@ -78,14 +77,9 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { */ @Test public void testSingleBinding() { - File file = new File(testDir + "TabularDataTest.dat"); - RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest",file); - ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(Collections.singleton(new RMDTriplet("tableTest","Table",file.getAbsolutePath())), - seq.getSequenceDictionary(), - genomeLocParser, - null, - track, - false); + String fileName = testDir + "TabularDataTest.dat"; + RMDTriplet triplet = new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE); + ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(triplet,builder,seq.getSequenceDictionary(),genomeLocParser,false); Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30))); @@ -107,14 +101,11 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { public void testMultipleBinding() { File file = new File(testDir + "TabularDataTest.dat"); - RMDTriplet testTriplet1 = new RMDTriplet("tableTest1","Table",file.getAbsolutePath()); - RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest1",file); - ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(Collections.singleton(testTriplet1),seq.getSequenceDictionary(),genomeLocParser,null,track,false); - - RMDTriplet testTriplet2 = new RMDTriplet("tableTest2","Table",file.getAbsolutePath()); - RMDTrack track2 = builder.createInstanceOfTrack(TableCodec.class,"tableTest2",file); - ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(Collections.singleton(testTriplet2),seq.getSequenceDictionary(),genomeLocParser,null,track2,false); + RMDTriplet testTriplet1 = new RMDTriplet("tableTest1","Table",file.getAbsolutePath(),RMDStorageType.FILE); + ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(testTriplet1,builder,seq.getSequenceDictionary(),genomeLocParser,false); + RMDTriplet testTriplet2 = new RMDTriplet("tableTest2","Table",file.getAbsolutePath(),RMDStorageType.FILE); + ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(testTriplet2,builder,seq.getSequenceDictionary(),genomeLocParser,false); Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30))); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java index 6caf64bd8..555e429b8 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java @@ -2,11 +2,12 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.refdata.features.table.TableCodec; import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; @@ -60,14 +61,14 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @BeforeMethod public void setUp() { - File file = new File(testDir + "TabularDataTest.dat"); + String fileName = testDir + "TabularDataTest.dat"; RMDTrackBuilder builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null); - rod = builder.createInstanceOfTrack(TableCodec.class, "tableTest", file); + rod = builder.createInstanceOfTrack(new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE)); } @Test public void testCreateSingleIterator() { - ResourcePool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser,rod, false); + ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false); LocationAwareSeekableRODIterator iterator = (LocationAwareSeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); @@ -88,7 +89,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @Test public void testCreateMultipleIterators() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser,rod, false); + ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false); LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); // Create a new iterator at position 2. @@ -138,7 +139,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @Test public void testIteratorConservation() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser,rod, false); + ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false); LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); @@ -173,7 +174,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @Test public void testIteratorCreation() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser, rod, false); + ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false); LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) ); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");