From cba18116e4dbb56c6ce91f8c6b5de70959d9842e Mon Sep 17 00:00:00 2001 From: hanna Date: Fri, 31 Dec 2010 04:52:22 +0000 Subject: [PATCH] A significant refactoring of the ROD system, done largely to simplify the process of streaming/piping VCFs into the GATK. Notable changes: - Public interface to RMDTrackBuilder is greatly simplified; users can use it only to build RMDTracks and lookup codecs. - RODDataSource and RMDTrack are no longer functionally at the same level; RODDataSources now manage RMDTracks on behalf of the GATK, and the only direct consumers of the RMDTrack class are the walkers that feel the need to access the ROD system directly. (We need to stamp out this access pattern. A few minor warts were introduced as part of this process, labeled with TODOs. These'll be fixed as part of the VCF streaming project. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4915 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/CommandLineExecutable.java | 55 ++++-- .../sting/gatk/GenomeAnalysisEngine.java | 60 ++++--- .../sting/gatk/WalkerManager.java | 5 +- .../arguments/GATKArgumentCollection.java | 2 +- .../ReferenceOrderedDataSource.java | 169 ++++++++++++------ .../simpleDataSources/ResourcePool.java | 6 +- .../simpleDataSources/SAMReaderID.java | 9 + .../sting/gatk/refdata/tracks/RMDTrack.java | 26 --- .../tracks/builders/RMDTrackBuilder.java | 152 ++++++++-------- .../refdata/utils/RMDIntervalGenerator.java | 14 +- .../sting/gatk/refdata/utils/RMDTriplet.java | 38 +++- .../sting/gatk/walkers/VariantsToVCF.java | 2 +- .../genomicannotator/GenomicAnnotator.java | 4 +- .../TranscriptToGenomicInfo.java | 6 +- .../beagle/BeagleOutputToVCFWalker.java | 4 +- .../coverage/DepthOfCoverageWalker.java | 15 +- .../filters/VariantFiltrationWalker.java | 2 +- .../indels/IndelGenotyperV2Walker.java | 21 +-- .../gatk/walkers/qc/CycleQualityWalker.java | 2 +- .../gatk/walkers/qc/ProfileRodSystem.java | 2 +- .../recalibration/CountCovariatesWalker.java | 6 +- .../walkers/sequenom/PickSequenomProbes.java | 2 +- .../variantutils/ValidateVariants.java | 2 +- .../walkers/DbSNPWindowCounter.java | 21 ++- .../walkers/IndelAnnotator.java | 17 +- .../walkers/VCF4WriterTestWalker.java | 5 +- .../validation/RodSystemValidationWalker.java | 6 +- .../sting/utils/vcf/VCFUtils.java | 10 +- .../GATKArgumentCollectionUnitTest.java | 4 +- .../ReferenceOrderedViewUnitTest.java | 25 +-- .../ReferenceOrderedDataPoolUnitTest.java | 15 +- 31 files changed, 388 insertions(+), 319 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index 84098c5f0..20692c68a 100644 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescripto import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.Walker; @@ -204,26 +205,28 @@ public abstract class CommandLineExecutable extends CommandLineProgram { */ private List unpackBAMFileList(GATKArgumentCollection argCollection) { List unpackedReads = new ArrayList(); - for( File inputFile: argCollection.samFiles ) { - if (inputFile.getName().toLowerCase().endsWith(".list") ) { + for( String inputFileName: argCollection.samFiles ) { + List inputFileNameTags = parser.getTags(inputFileName); + inputFileName = expandFileName(inputFileName); + if (inputFileName.toLowerCase().endsWith(".list") ) { try { - for(String fileName : new XReadLines(inputFile)) - unpackedReads.add(new SAMReaderID(new File(fileName),parser.getTags(inputFile))); + for(String fileName : new XReadLines(new File(inputFileName))) + unpackedReads.add(new SAMReaderID(fileName,parser.getTags(inputFileName))); } catch( FileNotFoundException ex ) { - throw new UserException.CouldNotReadInputFile(inputFile, "Unable to find file while unpacking reads", ex); + throw new UserException.CouldNotReadInputFile(new File(inputFileName), "Unable to find file while unpacking reads", ex); } } - else if(inputFile.getName().toLowerCase().endsWith(".bam")) { - unpackedReads.add( new SAMReaderID(inputFile,parser.getTags(inputFile)) ); + else if(inputFileName.toLowerCase().endsWith(".bam")) { + unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags)); } - else if(inputFile.getName().equals("-")) { - unpackedReads.add(new SAMReaderID(new File("/dev/stdin"),Collections.emptyList())); + else if(inputFileName.endsWith("stdin")) { + unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags)); } else { throw new UserException.CommandLineException(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " + "with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " + - "of BAM files is in the correct format, update the extension, and try again.",inputFile.getName())); + "of BAM files is in the correct format, update the extension, and try again.",inputFileName)); } } return unpackedReads; @@ -236,27 +239,47 @@ public abstract class CommandLineExecutable extends CommandLineProgram { private Collection unpackRODBindings(GATKArgumentCollection argCollection) { Collection rodBindings = new ArrayList(); - for (String binding: argCollection.RODBindings) { - if(parser.getTags(binding).size() != 2) + + for (String fileName: argCollection.RODBindings) { + List parameters = parser.getTags(fileName); + fileName = expandFileName(fileName); + RMDStorageType storageType = fileName.toLowerCase().endsWith("stdin") ? RMDStorageType.STREAM : RMDStorageType.FILE; + + if(parameters.size() != 2) throw new UserException("Invalid syntax for -B (reference-ordered data) input flag. " + "Please use the following syntax when providing reference-ordered " + "data: -B:, ."); // Assume that if tags are present, those tags are name and type. // Name is always first, followed by type. - List parameters = parser.getTags(binding); String name = parameters.get(0); String type = parameters.get(1); - rodBindings.add(new RMDTriplet(name,type,binding)); + rodBindings.add(new RMDTriplet(name,type,fileName,storageType)); } if (argCollection.DBSNPFile != null) { if(argCollection.DBSNPFile.toLowerCase().contains("vcf")) throw new UserException("--DBSNP (-D) argument currently does not support VCF. To use dbSNP in VCF format, please use -B:dbsnp,vcf ."); - rodBindings.add(new RMDTriplet(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, "dbsnp", argCollection.DBSNPFile)); + + String fileName = expandFileName(argCollection.DBSNPFile); + RMDStorageType storageType = fileName.toLowerCase().endsWith("stdin") ? RMDStorageType.STREAM : RMDStorageType.FILE; + + rodBindings.add(new RMDTriplet(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME,"dbsnp",fileName,storageType)); } return rodBindings; } - + /** + * Expand any special characters that appear in the filename. Right now, '-' is expanded to + * '/dev/stdin' only, but in the future, special characters like '~' and '*' that are passed + * directly to the command line in some circumstances could be expanded as well. Be careful + * when adding UNIX-isms. + * @param argument the text appearing on the command-line. + * @return An expanded string suitable for opening by Java/UNIX file handling utilities. + */ + private String expandFileName(String argument) { + if(argument.trim().equals("-")) + return "/dev/stdin"; + return argument; + } } diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 29a8c51c6..4bb14e25c 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -148,7 +148,7 @@ public class GenomeAnalysisEngine { /** * A processed collection of SAM reader identifiers. */ - private Collection samReaderIDs; + private Collection samReaderIDs = Collections.emptyList(); /** * Set the SAM/BAM files over which to traverse. @@ -334,12 +334,12 @@ public class GenomeAnalysisEngine { * * @param rods Reference-ordered data to load. */ - protected void validateSuppliedReferenceOrderedData(List rods) { + protected void validateSuppliedReferenceOrderedData(List rods) { // Check to make sure that all required metadata is present. List allRequired = WalkerManager.getRequiredMetaData(walker); for (RMD required : allRequired) { boolean found = false; - for (RMDTrack rod : rods) { + for (ReferenceOrderedDataSource rod : rods) { if (rod.matchesNameAndRecordType(required.name(), required.type())) found = true; } @@ -349,7 +349,7 @@ public class GenomeAnalysisEngine { } // Check to see that no forbidden rods are present. - for (RMDTrack rod : rods) { + for (ReferenceOrderedDataSource rod : rods) { if (!WalkerManager.isAllowed(walker, rod)) throw new ArgumentException(String.format("Walker of type %s does not allow access to metadata: %s", walker.getClass(), rod.getName())); } @@ -614,7 +614,7 @@ public class GenomeAnalysisEngine { for (String str : rodNames.keySet()) if (str.equals(rodName)) { logger.info("Adding interval list from track (ROD) named " + rodName); - RMDIntervalGenerator intervalGenerator = new RMDIntervalGenerator(rodNames.get(str).getReferenceOrderedData()); + RMDIntervalGenerator intervalGenerator = new RMDIntervalGenerator(rodNames.get(str)); ret.addAll(intervalGenerator.toGenomeLocList()); } } @@ -661,14 +661,7 @@ public class GenomeAnalysisEngine { sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles); // set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference - RMDTrackBuilder manager = new RMDTrackBuilder(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe); - List tracks = manager.getReferenceMetaDataSources(this,argCollection); - validateSuppliedReferenceOrderedData(tracks); - - // validate all the sequence dictionaries against the reference - validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), tracks, manager); - - rodDataSources = getReferenceOrderedDataSources(tracks); + rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe); } /** @@ -788,10 +781,10 @@ public class GenomeAnalysisEngine { * * @param reads Reads data source. * @param reference Reference data source. - * @param tracks a collection of the reference ordered data tracks + * @param rods a collection of the reference ordered data tracks */ - private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection tracks, RMDTrackBuilder manager) { - if ((reads.isEmpty() && (tracks == null || tracks.isEmpty())) || reference == null ) + private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection rods, RMDTrackBuilder manager) { + if ((reads.isEmpty() && (rods == null || rods.isEmpty())) || reference == null ) return; // Compile a set of sequence names that exist in the reference file. @@ -815,9 +808,8 @@ public class GenomeAnalysisEngine { SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, "reads", readsDictionary, "reference", referenceDictionary); } - // compare the tracks to the reference, if they have a sequence dictionary - for (RMDTrack track : tracks) - manager.validateTrackSequenceDictionary(track.getName(),track.getSequenceDictionary(),referenceDictionary); + for (ReferenceOrderedDataSource rod : rods) + manager.validateTrackSequenceDictionary(rod.getName(),rod.getSequenceDictionary(),referenceDictionary); } /** @@ -862,18 +854,34 @@ public class GenomeAnalysisEngine { /** * Open the reference-ordered data sources. * - * @param rods the reference order data to execute using + * @param referenceMetaDataFiles collection of RMD descriptors to load and validate. + * @param sequenceDictionary GATK-wide sequnce dictionary to use for validation. + * @param genomeLocParser to use when creating and validating GenomeLocs. + * @param validationExclusionType potentially indicate which validations to include / exclude. + * * @return A list of reference-ordered data sources. */ - private List getReferenceOrderedDataSources(List rods) { + private List getReferenceOrderedDataSources(Collection referenceMetaDataFiles, + SAMSequenceDictionary sequenceDictionary, + GenomeLocParser genomeLocParser, + ValidationExclusion.TYPE validationExclusionType) { + RMDTrackBuilder builder = new RMDTrackBuilder(referenceMetaDataFiles,sequenceDictionary,genomeLocParser,validationExclusionType); + // try and make the tracks given their requests + // create of live instances of the tracks + List tracks = new ArrayList(); + List dataSources = new ArrayList(); - for (RMDTrack rod : rods) - dataSources.add(new ReferenceOrderedDataSource(referenceMetaDataFiles, - referenceDataSource.getReference().getSequenceDictionary(), + for (RMDTriplet fileDescriptor : referenceMetaDataFiles) + dataSources.add(new ReferenceOrderedDataSource(fileDescriptor, + builder, + sequenceDictionary, genomeLocParser, - argCollection.unsafe, - rod, flashbackData())); + + // validation: check to make sure everything the walker needs is present, and that all sequence dictionaries match. + validateSuppliedReferenceOrderedData(dataSources); + validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), dataSources, builder); + return dataSources; } diff --git a/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/java/src/org/broadinstitute/sting/gatk/WalkerManager.java index 635da4a03..a75f30173 100755 --- a/java/src/org/broadinstitute/sting/gatk/WalkerManager.java +++ b/java/src/org/broadinstitute/sting/gatk/WalkerManager.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk; import net.sf.picard.filter.SamRecordFilter; import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.walkers.*; @@ -236,7 +237,7 @@ public class WalkerManager extends PluginManager { * @param rod Source to check. * @return True if the walker forbids this data type. False otherwise. */ - public static boolean isAllowed(Class walkerClass, RMDTrack rod) { + public static boolean isAllowed(Class walkerClass, ReferenceOrderedDataSource rod) { Allows allowsDataSource = getWalkerAllowed(walkerClass); // Allows is less restrictive than requires. If an allows @@ -263,7 +264,7 @@ public class WalkerManager extends PluginManager { * @param rod Source to check. * @return True if the walker forbids this data type. False otherwise. */ - public static boolean isAllowed(Walker walker, RMDTrack rod) { + public static boolean isAllowed(Walker walker, ReferenceOrderedDataSource rod) { return isAllowed(walker.getClass(), rod); } diff --git a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index a56d321da..0d5010e0d 100755 --- a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -69,7 +69,7 @@ public class GATKArgumentCollection { // parameters and their defaults @ElementList(required = false) @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false) - public List samFiles = new ArrayList(); + public List samFiles = new ArrayList(); // parameters and their defaults @ElementList(required = false) diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java index 19ec928b7..f661a8803 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java @@ -1,25 +1,20 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import net.sf.samtools.SAMSequenceDictionary; -import org.broad.tribble.FeatureSource; -import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; -import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator; import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.exceptions.UserException; +import java.io.File; import java.io.IOException; -import java.util.Collection; +import java.lang.reflect.Type; import java.util.List; /** * User: hanna @@ -41,7 +36,22 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { /** * The reference-ordered data itself. */ - private final RMDTrack rod; + private final RMDTriplet fileDescriptor; + + /** + * The header associated with this VCF, if any. + */ + private final Object header; + + /** + * The private sequence dictionary associated with this RMD. + */ + private final SAMSequenceDictionary sequenceDictionary; + + /** + * The builder to use when constructing new reference-ordered data readers. + */ + private final RMDTrackBuilder builder; /** * A pool of iterators for navigating through the genome. @@ -50,21 +60,28 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { /** * Create a new reference-ordered data source. - * @param rod the reference ordered data */ - public ReferenceOrderedDataSource(Collection refMetaDataDescriptors, - SAMSequenceDictionary sequenceDictionary, + public ReferenceOrderedDataSource(RMDTriplet fileDescriptor, + RMDTrackBuilder builder, + SAMSequenceDictionary referenceSequenceDictionary, GenomeLocParser genomeLocParser, - ValidationExclusion.TYPE validationExclusionType, - RMDTrack rod, boolean flashbackData ) { - this.rod = rod; - if (rod.supportsQuery()) - iteratorPool = new ReferenceOrderedQueryDataPool(sequenceDictionary, - genomeLocParser, - new RMDTrackBuilder(refMetaDataDescriptors,sequenceDictionary,genomeLocParser,validationExclusionType), - rod); - else - iteratorPool = new ReferenceOrderedDataPool(sequenceDictionary,genomeLocParser,rod, flashbackData ); + boolean flashbackData ) { + this.fileDescriptor = fileDescriptor; + this.builder = builder; + if (fileDescriptor.getStorageType() != RMDTriplet.RMDStorageType.STREAM) { + iteratorPool = new ReferenceOrderedQueryDataPool(fileDescriptor, + builder, + referenceSequenceDictionary, + genomeLocParser); + header = ((ReferenceOrderedQueryDataPool)iteratorPool).getHeader(); + this.sequenceDictionary = ((ReferenceOrderedQueryDataPool)iteratorPool).getSequenceDictionary(); + } + else { + RMDTrack track = builder.createInstanceOfTrack(fileDescriptor); + header = track.getHeader(); + this.sequenceDictionary = track.getSequenceDictionary(); + iteratorPool = new ReferenceOrderedDataPool(track,referenceSequenceDictionary,genomeLocParser,flashbackData); + } } /** @@ -72,15 +89,43 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * @return Name of the underlying rod. */ public String getName() { - return this.rod.getName(); + return fileDescriptor.getName(); + } + + public Class getType() { + return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); + } + + public Class getRecordType() { + return builder.createCodec(getType(),getName()).getFeatureType(); + } + + public File getFile() { + return new File(fileDescriptor.getFile()); + } + + public Object getHeader() { + return header; } /** - * Return the underlying reference-ordered data. - * @return the underlying rod. + * Retrieves the sequence dictionary created by this ROD. + * @return */ - public RMDTrack getReferenceOrderedData() { - return this.rod; + public SAMSequenceDictionary getSequenceDictionary() { + return sequenceDictionary; + } + + /** + * helper function for determining if we are the same track based on name and record type + * + * @param name the name to match + * @param type the type to match + * + * @return true on a match, false if the name or type is different + */ + public boolean matchesNameAndRecordType(String name, Type type) { + return (name.equals(fileDescriptor.getName()) && (type.getClass().isAssignableFrom(getType().getClass()))); } /** @@ -120,12 +165,12 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * A pool of reference-ordered data iterators. */ class ReferenceOrderedDataPool extends ResourcePool { - private final RMDTrack rod; + private final RMDTrack track; boolean flashbackData = false; - public ReferenceOrderedDataPool( SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, RMDTrack rod, boolean flashbackData ) { + public ReferenceOrderedDataPool( RMDTrack track, SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, boolean flashbackData ) { super(sequenceDictionary,genomeLocParser); + this.track = track; this.flashbackData = flashbackData; - this.rod = rod; } /** @@ -134,7 +179,7 @@ class ReferenceOrderedDataPool extends ResourcePool { +class ReferenceOrderedQueryDataPool extends ResourcePool { // the reference-ordered data itself. - private final RMDTrack rod; + private final RMDTriplet fileDescriptor; // our tribble track builder private final RMDTrackBuilder builder; - public ReferenceOrderedQueryDataPool( SAMSequenceDictionary sequenceDictionary, GenomeLocParser genomeLocParser, RMDTrackBuilder builder, RMDTrack rod ) { - super(sequenceDictionary,genomeLocParser); - this.rod = rod; + /** + * The header from this RMD, if present. + */ + private final Object header; + + /** + * The sequence dictionary from this ROD. If no sequence dictionary is present, this dictionary will be the same as the reference's. + */ + private final SAMSequenceDictionary sequenceDictionary; + + public ReferenceOrderedQueryDataPool(RMDTriplet fileDescriptor, RMDTrackBuilder builder, SAMSequenceDictionary referenceSequenceDictionary, GenomeLocParser genomeLocParser) { + super(referenceSequenceDictionary,genomeLocParser); + this.fileDescriptor = fileDescriptor; this.builder = builder; - // a little bit of a hack, but it saves us from re-reading the index from the file - this.addNewResource(rod.getReader()); + + // prepopulate one RMDTrack + RMDTrack track = builder.createInstanceOfTrack(fileDescriptor); + this.addNewResource(track); + + // Pull the proper header and sequence dictionary from the prepopulated track. + this.header = track.getHeader(); + this.sequenceDictionary = track.getSequenceDictionary(); + } + + public Object getHeader() { + return header; + } + + public SAMSequenceDictionary getSequenceDictionary() { + return sequenceDictionary; } @Override - protected FeatureSource createNewResource() { - return builder.createFeatureReader(rod.getType(),rod.getFile()).first; + protected RMDTrack createNewResource() { + return builder.createInstanceOfTrack(fileDescriptor); } @Override - protected FeatureSource selectBestExistingResource(DataStreamSegment segment, List availableResources) { - for (FeatureSource reader : availableResources) + protected RMDTrack selectBestExistingResource(DataStreamSegment segment, List availableResources) { + for (RMDTrack reader : availableResources) if (reader != null) return reader; return null; } @Override - protected LocationAwareSeekableRODIterator createIteratorFromResource(DataStreamSegment position, FeatureSource resource) { + protected LocationAwareSeekableRODIterator createIteratorFromResource(DataStreamSegment position, RMDTrack track) { try { if (position instanceof MappedStreamSegment) { GenomeLoc pos = ((MappedStreamSegment) position).locus; - return new SeekableRODIterator(sequenceDictionary,genomeLocParser,new FeatureToGATKFeatureIterator(genomeLocParser,resource.query(pos.getContig(),(int) pos.getStart(), (int) pos.getStop()),rod.getName())); + return new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,track.query(pos)); } else { - return new SeekableRODIterator(sequenceDictionary,genomeLocParser,new FeatureToGATKFeatureIterator(genomeLocParser,resource.iterator(),rod.getName())); + return new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,track.getIterator()); } } catch (IOException e) { - throw new ReviewedStingException("Unable to create iterator for rod named " + rod.getName(),e); + throw new ReviewedStingException("Unable to create iterator for rod named " + fileDescriptor.getName(),e); } } @Override - protected void closeResource(FeatureSource resource) { - try { - resource.close(); - } catch (IOException e) { - throw new UserException.CouldNotReadInputFile("Unable to close reader for rod named " + rod.getName(),e); - } + protected void closeResource(RMDTrack track) { + track.close(); } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java index 6797cb613..22f00ac67 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java @@ -30,7 +30,7 @@ abstract class ResourcePool { /** * Sequence dictionary. */ - protected final SAMSequenceDictionary sequenceDictionary; + protected final SAMSequenceDictionary referenceSequenceDictionary; /** * Builder/parser for GenomeLocs. @@ -52,8 +52,8 @@ abstract class ResourcePool { */ private Map resourceAssignments = new HashMap(); - protected ResourcePool(SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser) { - this.sequenceDictionary = sequenceDictionary; + protected ResourcePool(SAMSequenceDictionary referenceSequenceDictionary,GenomeLocParser genomeLocParser) { + this.referenceSequenceDictionary = referenceSequenceDictionary; this.genomeLocParser = genomeLocParser; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMReaderID.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMReaderID.java index 733190541..847ad2441 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMReaderID.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMReaderID.java @@ -32,6 +32,15 @@ public class SAMReaderID { this.tags = tags; } + /** + * Creates an identifier for a SAM file based on read. + * @param samFileName The source filename for SAM data. + * @param tags tags to use when creating a reader ID. + */ + public SAMReaderID(String samFileName, List tags) { + this(new File(samFileName),tags); + } + /** * Gets the tags associated with the given BAM file. * @return A collection of the tags associated with this file. diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java index ab11b9333..0a3168b56 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java @@ -80,23 +80,6 @@ public class RMDTrack { return file; } - public Class getRecordType() { - return recordType; - } - - /** - * helper function for determining if we are the same track based on name and record type - * - * @param name the name to match - * @param type the type to match - * - * @return true on a match, false if the name or type is different - */ - public boolean matchesNameAndRecordType(String name, Type type) { - return (name.equals(this.name) && (type.getClass().isAssignableFrom(this.type.getClass()))); - } - - /** * Create a track * @@ -130,15 +113,6 @@ public class RMDTrack { } } - /** - * do we support the query interface? - * - * @return true - */ - public boolean supportsQuery() { - return true; - } - public CloseableIterator query(GenomeLoc interval) throws IOException { return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName()); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java index 038243cee..b3340b018 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java @@ -33,14 +33,12 @@ import org.broad.tribble.index.Index; import org.broad.tribble.index.IndexFactory; import org.broad.tribble.source.BasicFeatureSource; import org.broad.tribble.util.LittleEndianOutputStream; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SequenceDictionaryUtils; import org.broadinstitute.sting.utils.collections.Pair; @@ -55,6 +53,7 @@ import java.io.*; import java.util.*; + /** * * @author aaron @@ -146,45 +145,48 @@ public class RMDTrackBuilder extends PluginManager { /** * create a RMDTrack of the specified type * - * @param targetClass the target class of track - * @param name what to call the track - * @param inputFile the input file + * @param fileDescriptor a description of the type of track to build. * * @return an instance of the track * @throws RMDTrackCreationException * if we don't know of the target class or we couldn't create it */ - public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException { + public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) throws RMDTrackCreationException { + String name = fileDescriptor.getName(); + File inputFile = new File(fileDescriptor.getFile()); + + Class featureCodecClass = getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); + if (featureCodecClass == null) + throw new UserException.BadArgumentValue("-B",fileDescriptor.getType()); + // return a feature reader track - Pair pair = createFeatureReader(targetClass, name, inputFile); - if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file"); - return new RMDTrack(targetClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(targetClass,name)); - } - - /** - * create a tribble feature reader class, given the target class and the input file - * @param targetClass the target class, of a Tribble Codec type - * @param inputFile the input file, that corresponds to the feature type - * @return a pair of - */ - public Pair createFeatureReader(Class targetClass, File inputFile) { - return createFeatureReader(targetClass, "anonymous", inputFile); - } - - /** - * create a feature reader of the specified type - * @param targetClass the target codec type - * @param name the target name - * @param inputFile the input file to create the track from (of the codec type) - * @return the FeatureReader instance - */ - public Pair createFeatureReader(Class targetClass, String name, File inputFile) { Pair pair; if (inputFile.getAbsolutePath().endsWith(".gz")) - pair = createBasicFeatureSourceNoAssumedIndex(targetClass, name, inputFile); + pair = createTabixIndexedFeatureSource(featureCodecClass, name, inputFile); else - pair = getFeatureSource(targetClass, name, inputFile); - return pair; + pair = getFeatureSource(featureCodecClass, name, inputFile, fileDescriptor.getStorageType()); + if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file"); + return new RMDTrack(featureCodecClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(featureCodecClass,name)); + } + + /** + * Convenience method simplifying track creation. Assume unnamed track based on a file rather than a stream. + * @param targetClass Type of Tribble class to build. + * @param inputFile Input file type to use. + * @return An RMDTrack, suitable for accessing reference metadata. + */ + public RMDTrack createInstanceOfTrack(Class targetClass, File inputFile) { + // TODO: Update RMDTriplet to contain an actual class object rather than a name to avoid these gymnastics. + String typeName = null; + for(Map.Entry trackType: getAvailableTrackNamesAndTypes().entrySet()) { + if(trackType.getValue().equals(targetClass)) + typeName = trackType.getKey(); + } + + if(typeName == null) + throw new ReviewedStingException("Unable to find type name for class " + targetClass.getName()); + + return createInstanceOfTrack(new RMDTriplet("anonymous",typeName,inputFile.getAbsolutePath(),RMDStorageType.FILE)); } /** @@ -197,7 +199,7 @@ public class RMDTrackBuilder extends PluginManager { * @param inputFile the file to load * @return a feature reader implementation */ - private Pair createBasicFeatureSourceNoAssumedIndex(Class targetClass, String name, File inputFile) { + private Pair createTabixIndexedFeatureSource(Class targetClass, String name, File inputFile) { // we might not know the index type, try loading with the default reader constructor logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file"); try { @@ -213,7 +215,7 @@ public class RMDTrackBuilder extends PluginManager { * @param name the name to assign this codec * @return the feature codec itself */ - private FeatureCodec createCodec(Class targetClass, String name) { + public FeatureCodec createCodec(Class targetClass, String name) { FeatureCodec codex = this.createByType(targetClass); if ( codex instanceof NameAwareCodec ) ((NameAwareCodec)codex).setName(name); @@ -227,33 +229,48 @@ public class RMDTrackBuilder extends PluginManager { * @param targetClass the target class * @param name the name of the codec * @param inputFile the tribble file to parse + * @param storageType How the RMD is streamed into the input file. * @return the input file as a FeatureReader */ - private Pair getFeatureSource(Class targetClass, String name, File inputFile) { - Pair reader; - try { - Index index = loadIndex(inputFile, createCodec(targetClass, name)); - try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); } - catch ( ReviewedStingException e) { } + private Pair getFeatureSource(Class targetClass, String name, File inputFile, RMDStorageType storageType) { + // Feature source and sequence dictionary to use as the ultimate reference + FeatureSource featureSource = null; + SAMSequenceDictionary sequenceDictionary = null; - SAMSequenceDictionary dictFromIndex = getSequenceDictionaryFromProperties(index); + FeatureCodec codec = createCodec(targetClass, name); - // if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match - if (dictFromIndex.size() == 0 && dict != null) { - File indexFile = Tribble.indexFile(inputFile); - setIndexSequenceDictionary(inputFile,index,dict,indexFile,true); - dictFromIndex = getSequenceDictionaryFromProperties(index); + // Detect whether or not this source should be indexed. + boolean canBeIndexed = (storageType == RMDStorageType.FILE); + + if(canBeIndexed) { + try { + Index index = loadIndex(inputFile, codec); + try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); } + catch (ReviewedStingException e) { } + + sequenceDictionary = getSequenceDictionaryFromProperties(index); + + // if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match + if (sequenceDictionary.size() == 0 && dict != null) { + File indexFile = Tribble.indexFile(inputFile); + setIndexSequenceDictionary(inputFile,index,dict,indexFile,true); + sequenceDictionary = getSequenceDictionaryFromProperties(index); + } + + featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, codec); + } + catch (TribbleException e) { + throw new UserException(e.getMessage()); + } + catch (IOException e) { + throw new UserException.CouldNotCreateOutputFile(inputFile, "unable to write Tribble index", e); } - - FeatureSource source = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(targetClass, name)); - //source = new CachingFeatureSource(source, 100, 100000); - reader = new Pair(source, dictFromIndex); - } catch (TribbleException e) { - throw new UserException(e.getMessage()); - } catch (IOException e) { - throw new UserException.CouldNotCreateOutputFile(inputFile, "unable to write Tribble index", e); } - return reader; + else { + featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),codec,false); + } + + return new Pair(featureSource,sequenceDictionary); } /** @@ -402,29 +419,6 @@ public class RMDTrackBuilder extends PluginManager { return names; } - /** - * find the associated reference meta data - * - * @param argCollection the input arguments to the GATK. - * @param engine the GATK engine to bind the tracks to - * - * @return a list of RMDTracks, one for each -B option - */ - public List getReferenceMetaDataSources(GenomeAnalysisEngine engine, GATKArgumentCollection argCollection) { - // try and make the tracks given their requests - // create of live instances of the tracks - List tracks = new ArrayList(); - - // create instances of each of the requested types - for (RMDTriplet trip : inputs) { - Class featureCodecClass = getAvailableTrackNamesAndTypes().get(trip.getType().toUpperCase()); - if (featureCodecClass == null) - throw new UserException.BadArgumentValue("-B",trip.getType()); - tracks.add(createInstanceOfTrack(featureCodecClass, trip.getName(), new File(trip.getFile()))); - } - return tracks; - } - // --------------------------------------------------------------------------------------------------------- // static functions to work with the sequence dictionaries of indexes // --------------------------------------------------------------------------------------------------------- diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java index 8817b8d49..59ece0ee5 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java @@ -16,15 +16,15 @@ import java.util.*; * Creates an interval list, given an RMDTrack */ public class RMDIntervalGenerator { - public RMDTrack track; + public ReferenceOrderedDataSource dataSource; /** * create a interval representation of a ROD track - * @param track the track + * @param dataSource the track */ - public RMDIntervalGenerator(RMDTrack track) { - if (track == null) throw new IllegalArgumentException("Track cannot be null"); - this.track = track; + public RMDIntervalGenerator(ReferenceOrderedDataSource dataSource) { + if (dataSource == null) throw new IllegalArgumentException("Data source cannot be null"); + this.dataSource = dataSource; } /** @@ -32,10 +32,10 @@ public class RMDIntervalGenerator { * @return a list of genome locations */ public List toGenomeLocList() { - Iterator iter = track.getIterator(); + Iterator iter = dataSource.seek((GenomeLoc)null); List locations = new ArrayList(); while (iter.hasNext()) { - GATKFeature feature = iter.next(); + RODRecordList feature = iter.next(); GenomeLoc loc = feature.getLocation(); if (loc != null) locations.add(loc); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDTriplet.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDTriplet.java index 7fe3b6731..3f23d94c8 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDTriplet.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDTriplet.java @@ -24,27 +24,55 @@ package org.broadinstitute.sting.gatk.refdata.utils; -/** a helper class to manage our triplets of data for the -B command line option (name, type, file) */ +/** + * a helper class to manage our triplets of data for the -B command line option (name, type, file) + * TODO: The presence of four datapoints here suggests that this class' name isn't sufficient to describe its function. Rename. + */ public class RMDTriplet { - private String name; - private String type; - private String file; + public enum RMDStorageType { FILE, STREAM }; - public RMDTriplet(String name, String type, String file) { + private final String name; + private final String type; + private final String file; + private final RMDStorageType storageType; + + public RMDTriplet(final String name, final String type, final String file, final RMDStorageType storageType) { this.name = name; this.type = type; this.file = file; + this.storageType = storageType; } + /** + * Gets the name of this track. RefMetaDataTrackers can use this identifier to retrieve data of a certain type. + * @return Name associated with this track. + */ public String getName() { return name; } + /** + * Gets the type of this track. Informs the GATK how to parse this file type. + * @return Type associated with this track. + */ public String getType() { return type; } + /** + * Gets the filename representing this track. Data is loaded from this file. + * @return Filename of the RMD. + */ public String getFile() { return file; } + + /** + * The type of storage being used for this metadata track. Right now, can be either a + * file type (can be indexed) or a stream type (can't be indexed). + * @return Storage type for this RMD 'triplet'. + */ + public RMDStorageType getStorageType() { + return storageType; + } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java b/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java index ad18dbf92..d1b0c953c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java @@ -176,7 +176,7 @@ public class VariantsToVCF extends RodWalker { throw new UserException.BadInput("No dbSNP rod was provided, but one is needed to decipher the correct indel alleles from the HapMap records"); RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe); - dbsnpIterator = builder.createInstanceOfTrack(DbSNPCodec.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, dbsnpDataSource.getReferenceOrderedData().getFile()).getIterator(); + dbsnpIterator = builder.createInstanceOfTrack(DbSNPCodec.class, dbsnpDataSource.getFile()).getIterator(); // Note that we should really use some sort of seekable iterator here so that the search doesn't take forever // (but it's complicated because the hapmap location doesn't match the dbsnp location, so we don't know where to seek to) } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java index 761576c73..c54eb190f 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java @@ -94,11 +94,11 @@ public class GenomicAnnotator extends RodWalker implements Tre final Set allFullyQualifiedColumnNames = new LinkedHashSet(); final Set allBindingNames = new LinkedHashSet(); for(ReferenceOrderedDataSource ds : getToolkit().getRodDataSources()) { - if(! ds.getReferenceOrderedData().getType().equals(AnnotatorInputTableCodec.class)) { + if(! ds.getType().equals(AnnotatorInputTableCodec.class)) { continue; //skip all non-AnnotatorInputTable files. } final String bindingName = ds.getName(); - File file = ds.getReferenceOrderedData().getFile(); + File file = ds.getFile(); allBindingNames.add(bindingName); try { final ArrayList header = AnnotatorInputTableCodec.readHeader(file); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java index 074bac4b5..fe6b2c012 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java @@ -172,14 +172,14 @@ public class TranscriptToGenomicInfo extends RodWalker { final ArrayList header; try { - header = AnnotatorInputTableCodec.readHeader(transcriptsDataSource.getReferenceOrderedData().getFile()); + header = AnnotatorInputTableCodec.readHeader(transcriptsDataSource.getFile()); } catch(Exception e) { - throw new UserException.MalformedFile(transcriptsDataSource.getReferenceOrderedData().getFile(), "Failed when attempting to read header from file", e); + throw new UserException.MalformedFile(transcriptsDataSource.getFile(), "Failed when attempting to read header from file", e); } for ( String columnName : GENE_NAME_COLUMNS ) { if ( !header.contains(columnName) ) - throw new UserException.CommandLineException("The column name '" + columnName + "' provided to -n doesn't match any of the column names in: " + transcriptsDataSource.getReferenceOrderedData().getFile()); + throw new UserException.CommandLineException("The column name '" + columnName + "' provided to -n doesn't match any of the column names in: " + transcriptsDataSource.getFile()); } //init outputColumnNames list diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index f713e341e..37e9dc4d4 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -96,9 +96,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { final List dataSources = this.getToolkit().getRodDataSources(); for( final ReferenceOrderedDataSource source : dataSources ) { - final RMDTrack rod = source.getReferenceOrderedData(); - - if (rod.getName().equals(COMP_ROD_NAME)) { + if (source.getName().equals(COMP_ROD_NAME)) { hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site")); hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site")); hInfo.add(new VCFInfoHeaderLine("AFH", 1, VCFHeaderLineType.Float, "Allele Number from Comparison ROD at this site")); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index 56443415b..972dbe511 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -26,13 +26,13 @@ package org.broadinstitute.sting.gatk.walkers.coverage; import net.sf.samtools.SAMReadGroupRecord; -import org.broad.tribble.FeatureSource; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.*; import org.broadinstitute.sting.gatk.walkers.*; @@ -44,7 +44,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; -import java.io.IOException; import java.io.PrintStream; import java.util.*; @@ -406,15 +405,9 @@ public class DepthOfCoverageWalker extends LocusWalker intervalStats) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index b97a92b8a..869705173 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -110,7 +110,7 @@ public class VariantFiltrationWalker extends RodWalker { List dataSources = getToolkit().getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { - if ( source.getReferenceOrderedData().getName().equals("mask") ) { + if ( source.getName().equals("mask") ) { hInfo.add(new VCFFilterHeaderLine(MASK_NAME, "Overlaps a user-input mask")); break; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java index eaea000e4..1f3263590 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.indels; import net.sf.samtools.*; -import org.broad.tribble.FeatureSource; import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.util.variantcontext.Genotype; @@ -35,9 +34,10 @@ import org.broadinstitute.sting.gatk.filters.*; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; -import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.gatk.walkers.ReadWalker; @@ -56,7 +56,6 @@ import org.broadinstitute.sting.utils.collections.CircularArray; import org.broadinstitute.sting.utils.collections.PrimitivePair; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.CommandLineUtils; import org.broadinstitute.sting.commandline.Hidden; import java.io.*; @@ -228,8 +227,8 @@ public class IndelGenotyperV2Walker extends ReadWalker { for ( Map.Entry commandLineArg : commandLineArgs.entrySet() ) headerInfo.add(new VCFHeaderLine(String.format("IGv2_%s", commandLineArg.getKey()), commandLineArg.getValue())); // also, the list of input bams - for ( File file : getToolkit().getArguments().samFiles ) - headerInfo.add(new VCFHeaderLine("IGv2_bam_file_used", file.getName())); + for ( String fileName : getToolkit().getArguments().samFiles ) + headerInfo.add(new VCFHeaderLine("IGv2_bam_file_used", fileName)); return headerInfo; } @@ -251,15 +250,11 @@ public class IndelGenotyperV2Walker extends ReadWalker { RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), getToolkit().getGenomeLocParser(), getToolkit().getArguments().unsafe); - FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first; + RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,new File(RefseqFileName)); - try { - refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), - getToolkit().getGenomeLocParser(), - new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),refseq.iterator(),"refseq")); - } catch (IOException e) { - throw new UserException.CouldNotReadInputFile(new File(RefseqFileName), "Write failed", e); - } + refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), + getToolkit().getGenomeLocParser(), + refseq.getIterator()); } if ( refseqIterator == null ) logger.info("No gene annotations available"); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java index 7451565c8..5a14a921e 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java @@ -164,7 +164,7 @@ public class CycleQualityWalker extends ReadWalker { if ( HTML ) { out.println("

Cycle Quality QC

\n"); out.println("File(s) analyzed:
"); - for ( File f : getToolkit().getArguments().samFiles) out.println(f.toString()+"
"); + for ( String fileName : getToolkit().getArguments().samFiles) out.println(fileName+"
"); out.println("
"); } if ( HTML ) out.println("

"); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java index 1fd673940..13110f725 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java @@ -126,7 +126,7 @@ public class ProfileRodSystem extends RodWalker { private File getRodFile() { List rods = this.getToolkit().getRodDataSources(); ReferenceOrderedDataSource rod = rods.get(0); - return rod.getReferenceOrderedData().getFile(); + return rod.getFile(); } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index 21ef32e87..187b26426 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -187,9 +187,9 @@ public class CountCovariatesWalker extends LocusWalker { ReferenceOrderedData snp_mask; if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) { RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe); - CloseableIterator iter = builder.createInstanceOfTrack(DbSNPCodec.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator(); + CloseableIterator iter = builder.createInstanceOfTrack(DbSNPCodec.class,new java.io.File(SNP_MASK)).getIterator(); snpMaskIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),iter); } else { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index ab2c301e9..64840c597 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -65,7 +65,7 @@ public class ValidateVariants extends RodWalker { public void initialize() { for ( ReferenceOrderedDataSource source : getToolkit().getRodDataSources() ) { if ( source.getName().equals(TARGET_ROD_NAME) ) { - file = source.getReferenceOrderedData().getFile(); + file = source.getFile(); break; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DbSNPWindowCounter.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DbSNPWindowCounter.java index 7f4666231..9c370a597 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DbSNPWindowCounter.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DbSNPWindowCounter.java @@ -1,15 +1,16 @@ package org.broadinstitute.sting.oneoffprojects.walkers; -import org.broad.tribble.FeatureSource; +import net.sf.samtools.util.CloseableIterator; import org.broad.tribble.dbsnp.DbSNPCodec; -import org.broad.tribble.dbsnp.DbSNPFeature; -import org.broad.tribble.iterators.CloseableTribbleIterator; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.walkers.By; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.LocusWalker; @@ -34,7 +35,7 @@ import java.io.PrintStream; public class DbSNPWindowCounter extends LocusWalker { // what we read in new tracks with - private FeatureSource reader; + private RMDTrack track; @Output private PrintStream out; @@ -50,12 +51,12 @@ public class DbSNPWindowCounter extends LocusWalker { RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), getToolkit().getGenomeLocParser(), getToolkit().getArguments().unsafe); - reader = builder.createFeatureReader(DbSNPCodec.class,myDbSNPFile).first; + track = builder.createInstanceOfTrack(DbSNPCodec.class,myDbSNPFile); } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - CloseableTribbleIterator dbSNPs; + CloseableIterator dbSNPs; // our upstream and downstream window locations int windowStart = (int)Math.max(context.getLocation().getStart()-windowSize,0); @@ -63,19 +64,17 @@ public class DbSNPWindowCounter extends LocusWalker { // query the dnSNP iterator try { - dbSNPs = reader.query(context.getContig(), - windowStart, - windowStop); + dbSNPs = track.query(getToolkit().getGenomeLocParser().createGenomeLoc(context.getContig(),windowStart,windowStop)); } catch (IOException e) { throw new UserException.CouldNotReadInputFile(myDbSNPFile, e); } // count the number of dbSNPs we've seen int counter = 0; - for (DbSNPFeature feature: dbSNPs) + while(dbSNPs.hasNext()) counter++; out.println(context.getContig() + ":" + windowStart + "-" + context.getContig() + ":" + windowStop + "=" + - counter + " (dnSNP records)"); + counter + " (dbSNP records)"); return 1; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java index d6277a2b2..6dc1a4443 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java @@ -10,18 +10,16 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; -import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.vcf.VCFUtils; import java.io.File; -import java.io.IOException; import java.util.*; public class IndelAnnotator extends RodWalker { @@ -38,15 +36,10 @@ public class IndelAnnotator extends RodWalker { RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), getToolkit().getGenomeLocParser(), getToolkit().getArguments().unsafe); - FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first; + RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,new File(RefseqFileName)); - try { - refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), - getToolkit().getGenomeLocParser(), - new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),refseq.iterator(),"refseq")); - } catch (IOException e) { - throw new UserException.CouldNotReadInputFile(RefseqFileName, e); - } + refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), + getToolkit().getGenomeLocParser(),refseq.getIterator()); logger.info("Using RefSeq annotations from " + RefseqFileName); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java index 5abbe87dc..5c09d984f 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java @@ -86,11 +86,10 @@ public class VCF4WriterTestWalker extends RodWalker { vcfWriter = new StandardVCFWriter(new File(OUTPUT_FILE)); VCFHeader header = null; for( final ReferenceOrderedDataSource source : dataSources ) { - final RMDTrack rod = source.getReferenceOrderedData(); - if(rod.getName().equalsIgnoreCase(INPUT_ROD_NAME)) { + if(source.getName().equalsIgnoreCase(INPUT_ROD_NAME)) { try { - AsciiLineReader lineReader = new AsciiLineReader(new FileInputStream(rod.getFile().getAbsolutePath())); + AsciiLineReader lineReader = new AsciiLineReader(new FileInputStream(source.getFile().getAbsolutePath())); header = (VCFHeader)vcf4codec.readHeader(lineReader); out.printf("Read %d header lines%n", header.getMetaData().size()); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java index 85d687e48..ae72d6b7f 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java @@ -55,9 +55,9 @@ public class RodSystemValidationWalker extends RodWalker { // enumerate the list of ROD's we've loaded rodList = this.getToolkit().getRodDataSources(); for (ReferenceOrderedDataSource rod : rodList) { - out.println(rod.getName() + DIVIDER + rod.getReferenceOrderedData().getType()); - out.println(rod.getName() + DIVIDER + rod.getReferenceOrderedData().getFile()); - out.println(rod.getName() + DIVIDER + md5sum(rod.getReferenceOrderedData().getFile())); + out.println(rod.getName() + DIVIDER + rod.getType()); + out.println(rod.getName() + DIVIDER + rod.getFile()); + out.println(rod.getName() + DIVIDER + md5sum(rod.getFile())); } out.println("Data:"); } diff --git a/java/src/org/broadinstitute/sting/utils/vcf/VCFUtils.java b/java/src/org/broadinstitute/sting/utils/vcf/VCFUtils.java index d43492da9..6ad1a6b97 100755 --- a/java/src/org/broadinstitute/sting/utils/vcf/VCFUtils.java +++ b/java/src/org/broadinstitute/sting/utils/vcf/VCFUtils.java @@ -57,9 +57,8 @@ public class VCFUtils { if ( rodNames != null && !rodNames.contains(source.getName()) ) continue; - RMDTrack rod = source.getReferenceOrderedData(); - if ( rod.getHeader() != null && rod.getHeader() instanceof VCFHeader ) - data.put(rod.getName(), (VCFHeader)rod.getHeader()); + if ( source.getHeader() != null && source.getHeader() instanceof VCFHeader ) + data.put(source.getName(), (VCFHeader)source.getHeader()); } return data; @@ -96,9 +95,8 @@ public class VCFUtils { if ( rodNames != null && !rodNames.contains(source.getName()) ) continue; - RMDTrack rod = source.getReferenceOrderedData(); - if ( rod.getRecordType().equals(VariantContext.class)) { - VCFHeader header = (VCFHeader)rod.getHeader(); + if ( source.getRecordType().equals(VariantContext.class)) { + VCFHeader header = (VCFHeader)source.getHeader(); if ( header != null ) fields.addAll(header.getMetaData()); } diff --git a/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java b/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java index 6f1a7e332..59edf934e 100755 --- a/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java @@ -76,8 +76,8 @@ public class GATKArgumentCollectionUnitTest extends BaseTest { wArgs.put("wArgType3", "Arg3"); collect.walkerArgs = wArgs; - List input = new ArrayList(); - input.add(new File("test.file")); + List input = new ArrayList(); + input.add("test.file"); collect.samFiles = input; collect.strictnessLevel = SAMFileReader.ValidationStringency.STRICT; collect.referenceFile = new File("referenceFile".toLowerCase()); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java index 2dc458b2a..0ab804eb9 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java @@ -7,10 +7,9 @@ import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.MockLocusShard; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.features.table.TableCodec; import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; @@ -78,14 +77,9 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { */ @Test public void testSingleBinding() { - File file = new File(testDir + "TabularDataTest.dat"); - RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest",file); - ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(Collections.singleton(new RMDTriplet("tableTest","Table",file.getAbsolutePath())), - seq.getSequenceDictionary(), - genomeLocParser, - null, - track, - false); + String fileName = testDir + "TabularDataTest.dat"; + RMDTriplet triplet = new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE); + ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(triplet,builder,seq.getSequenceDictionary(),genomeLocParser,false); Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30))); @@ -107,14 +101,11 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { public void testMultipleBinding() { File file = new File(testDir + "TabularDataTest.dat"); - RMDTriplet testTriplet1 = new RMDTriplet("tableTest1","Table",file.getAbsolutePath()); - RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest1",file); - ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(Collections.singleton(testTriplet1),seq.getSequenceDictionary(),genomeLocParser,null,track,false); - - RMDTriplet testTriplet2 = new RMDTriplet("tableTest2","Table",file.getAbsolutePath()); - RMDTrack track2 = builder.createInstanceOfTrack(TableCodec.class,"tableTest2",file); - ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(Collections.singleton(testTriplet2),seq.getSequenceDictionary(),genomeLocParser,null,track2,false); + RMDTriplet testTriplet1 = new RMDTriplet("tableTest1","Table",file.getAbsolutePath(),RMDStorageType.FILE); + ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(testTriplet1,builder,seq.getSequenceDictionary(),genomeLocParser,false); + RMDTriplet testTriplet2 = new RMDTriplet("tableTest2","Table",file.getAbsolutePath(),RMDStorageType.FILE); + ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(testTriplet2,builder,seq.getSequenceDictionary(),genomeLocParser,false); Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30))); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java index 6caf64bd8..555e429b8 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java @@ -2,11 +2,12 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.refdata.features.table.TableCodec; import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; @@ -60,14 +61,14 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @BeforeMethod public void setUp() { - File file = new File(testDir + "TabularDataTest.dat"); + String fileName = testDir + "TabularDataTest.dat"; RMDTrackBuilder builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null); - rod = builder.createInstanceOfTrack(TableCodec.class, "tableTest", file); + rod = builder.createInstanceOfTrack(new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE)); } @Test public void testCreateSingleIterator() { - ResourcePool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser,rod, false); + ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false); LocationAwareSeekableRODIterator iterator = (LocationAwareSeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); @@ -88,7 +89,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @Test public void testCreateMultipleIterators() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser,rod, false); + ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false); LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); // Create a new iterator at position 2. @@ -138,7 +139,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @Test public void testIteratorConservation() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser,rod, false); + ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false); LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); @@ -173,7 +174,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @Test public void testIteratorCreation() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser, rod, false); + ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false); LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) ); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");