diff --git a/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java index e2ff2b68f..6b06ec97c 100755 --- a/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java @@ -326,11 +326,8 @@ public abstract class AbstractGenomeAnalysisEngine { sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles); - RMDTrackBuilder manager = new RMDTrackBuilder(); - // set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference - manager.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe); - + RMDTrackBuilder manager = new RMDTrackBuilder(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe); List tracks = manager.getReferenceMetaDataSources(this,argCollection); validateSuppliedReferenceOrderedData(tracks); @@ -591,7 +588,11 @@ public abstract class AbstractGenomeAnalysisEngine { private List getReferenceOrderedDataSources(List rods) { List dataSources = new ArrayList(); for (RMDTrack rod : rods) - dataSources.add(new ReferenceOrderedDataSource(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,rod,flashbackData())); + dataSources.add(new ReferenceOrderedDataSource(referenceDataSource.getReference().getSequenceDictionary(), + genomeLocParser, + argCollection.unsafe, + rod, + flashbackData())); return dataSources; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java index c652f32cf..d925f1352 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import net.sf.samtools.SAMSequenceDictionary; import org.broad.tribble.FeatureSource; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; @@ -49,10 +50,16 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * Create a new reference-ordered data source. * @param rod the reference ordered data */ - public ReferenceOrderedDataSource(SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, RMDTrack rod, boolean flashbackData ) { + public ReferenceOrderedDataSource(SAMSequenceDictionary sequenceDictionary, + GenomeLocParser genomeLocParser, + ValidationExclusion.TYPE validationExclusionType, + RMDTrack rod, boolean flashbackData ) { this.rod = rod; if (rod.supportsQuery()) - iteratorPool = new ReferenceOrderedQueryDataPool(sequenceDictionary,genomeLocParser,new RMDTrackBuilder(),rod); + iteratorPool = new ReferenceOrderedQueryDataPool(sequenceDictionary, + genomeLocParser, + new RMDTrackBuilder(sequenceDictionary,genomeLocParser,validationExclusionType), + rod); else iteratorPool = new ReferenceOrderedDataPool(sequenceDictionary,genomeLocParser,rod, flashbackData ); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java b/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java index bfc6c2b18..e7d12cd52 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java @@ -11,6 +11,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -79,7 +80,7 @@ public class RMDIndexer extends CommandLineProgram { genomeLocParser = new GenomeLocParser(ref); // get a track builder - RMDTrackBuilder builder = new RMDTrackBuilder(); + RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL); // find the types available to the track builders Map typeMapping = builder.getAvailableTrackNamesAndTypes(); @@ -100,13 +101,8 @@ public class RMDIndexer extends CommandLineProgram { Index index = IndexFactory.createIndex(inputFileSource, codec, approach); - if (referenceFile != null) { - // create a reference file reader - IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(referenceFile); - - // add writing of the sequence dictionary, if supplied - builder.setIndexSequenceDictionary(inputFileSource, index, seq.getSequenceDictionary(), indexFile, false); - } + // add writing of the sequence dictionary, if supplied + builder.setIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary(), indexFile, false); // create the output stream, and write the index LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile)); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java index 24d36af36..8f8a556ca 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java @@ -92,11 +92,6 @@ public class RMDTrackBuilder extends PluginManager { */ private ValidationExclusion.TYPE validationExclusionType; - /** Create a new plugin manager. */ - public RMDTrackBuilder() { - super(FeatureCodec.class, "Codecs", "Codec"); - } - /** * Create a new RMDTrackBuilder, with dictionary and genomeLocParser predefined. * @param dict Sequence dictionary to use. @@ -105,20 +100,10 @@ public class RMDTrackBuilder extends PluginManager { */ public RMDTrackBuilder(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser, ValidationExclusion.TYPE validationExclusionType) { super(FeatureCodec.class, "Codecs", "Codec"); - setSequenceDictionary(dict,genomeLocParser,validationExclusionType); - } - - /** - * Establish location-aware parsing and services for relevant reference metadata. - * @param dict Sequence dictionary to use. - * @param genomeLocParser Location parser to use. - * @param validationExclusionType Types of validations to exclude, for sequence dictionary verification. - */ - public void setSequenceDictionary(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser,ValidationExclusion.TYPE validationExclusionType) { this.dict = dict; this.genomeLocParser = genomeLocParser; this.validationExclusionType = validationExclusionType; - } + } /** @return a list of all available track types we currently have access to create */ public Map getAvailableTrackNamesAndTypes() { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java index b3ebd57fa..eaea000e4 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java @@ -248,7 +248,9 @@ public class IndelGenotyperV2Walker extends ReadWalker { if ( RefseqFileName != null ) { logger.info("Using RefSeq annotations from "+RefseqFileName); - RMDTrackBuilder builder = new RMDTrackBuilder(); + RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), + getToolkit().getGenomeLocParser(), + getToolkit().getArguments().unsafe); FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first; try { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DbSNPWindowCounter.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DbSNPWindowCounter.java index 3c5c27173..7f4666231 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DbSNPWindowCounter.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DbSNPWindowCounter.java @@ -47,7 +47,9 @@ public class DbSNPWindowCounter extends LocusWalker { public void initialize() { - RMDTrackBuilder builder = new RMDTrackBuilder(); + RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), + getToolkit().getGenomeLocParser(), + getToolkit().getArguments().unsafe); reader = builder.createFeatureReader(DbSNPCodec.class,myDbSNPFile).first; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java index c07f2f168..d6277a2b2 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java @@ -35,7 +35,9 @@ public class IndelAnnotator extends RodWalker { public void initialize() { if ( RefseqFileName != null ) { - RMDTrackBuilder builder = new RMDTrackBuilder(); + RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), + getToolkit().getGenomeLocParser(), + getToolkit().getArguments().unsafe); FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first; try { diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java index 06d7cadef..fb626b859 100644 --- a/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java +++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.WalkerManager; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; @@ -67,7 +68,10 @@ public class GATKExtensionsGenerator extends CommandLineProgram { GenomeAnalysisEngine GATKEngine = new GenomeAnalysisEngine(); WalkerManager walkerManager = new WalkerManager(); FilterManager filterManager = new FilterManager(); - RMDTrackBuilder trackBuilder = new RMDTrackBuilder(); + // HACK: We're currently relying on the fact that RMDTrackBuilder is used only from RMD type lookups, not + // RMD track location. Therefore, no sequence dictionary is required. In the future, we should separate + // RMD track lookups from track creation. + RMDTrackBuilder trackBuilder = new RMDTrackBuilder(null,null,ValidationExclusion.TYPE.ALL); /** * Required main method implementation. diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java index 5ba5cc034..a7dbf6e7c 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java @@ -55,8 +55,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { // sequence seq = new IndexedFastaSequenceFile(new File(hg18Reference)); genomeLocParser = new GenomeLocParser(seq); - builder = new RMDTrackBuilder(); - builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser,null); + builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null); } /** @@ -79,7 +78,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { public void testSingleBinding() { File file = new File(testDir + "TabularDataTest.dat"); RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest",file); - ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,track,false); + ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,null,track,false); Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30))); @@ -103,9 +102,9 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest1",file); - ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,track,false); + ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,null,track,false); RMDTrack track2 = builder.createInstanceOfTrack(TableCodec.class,"tableTest2",file); - ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,track2,false); + ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,null,track2,false); Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30))); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java index 3652ef3ef..85f82c839 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java @@ -60,8 +60,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @BeforeMethod public void setUp() { File file = new File(testDir + "TabularDataTest.dat"); - RMDTrackBuilder builder = new RMDTrackBuilder(); - builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser,null); + RMDTrackBuilder builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null); rod = builder.createInstanceOfTrack(TableCodec.class, "tableTest", file); } diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java index c62175182..da1763b76 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java @@ -60,7 +60,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest { public void setup() { seq = new IndexedFastaSequenceFile(new File(b36KGReference)); genomeLocParser = new GenomeLocParser(seq); - builder = new RMDTrackBuilder(); + builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null); } @Test @@ -153,7 +153,6 @@ public class RMDTrackBuilderUnitTest extends BaseTest { File vcfFile = createCorrectDateIndexFile(new File(validationDataLocation + "/ROD_validation/newerTribbleTrack.vcf")); Long indexTimeStamp = Tribble.indexFile(vcfFile).lastModified(); try { - builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser,null); Index idx = builder.loadIndex(vcfFile, new VCFCodec()); // catch any exception; this call should pass correctly SAMSequenceDictionary dict = RMDTrackBuilder.getSequenceDictionaryFromProperties(idx);