Change the interface for RMDTrackBuilder, therefore always mandating the specification

of a sequence dictionary and related info.  This will hopefully eliminate the cases in
which the refseq track depends a sequence dictionary / contig parser that hasn't been
specified.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4700 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-11-17 19:00:17 +00:00
parent 367cc9135f
commit 90711d445c
11 changed files with 40 additions and 44 deletions

View File

@ -326,11 +326,8 @@ public abstract class AbstractGenomeAnalysisEngine {
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
RMDTrackBuilder manager = new RMDTrackBuilder();
// set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
manager.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
RMDTrackBuilder manager = new RMDTrackBuilder(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
List<RMDTrack> tracks = manager.getReferenceMetaDataSources(this,argCollection);
validateSuppliedReferenceOrderedData(tracks);
@ -591,7 +588,11 @@ public abstract class AbstractGenomeAnalysisEngine {
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(List<RMDTrack> rods) {
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
for (RMDTrack rod : rods)
dataSources.add(new ReferenceOrderedDataSource(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,rod,flashbackData()));
dataSources.add(new ReferenceOrderedDataSource(referenceDataSource.getReference().getSequenceDictionary(),
genomeLocParser,
argCollection.unsafe,
rod,
flashbackData()));
return dataSources;
}

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import net.sf.samtools.SAMSequenceDictionary;
import org.broad.tribble.FeatureSource;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
@ -49,10 +50,16 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* Create a new reference-ordered data source.
* @param rod the reference ordered data
*/
public ReferenceOrderedDataSource(SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, RMDTrack rod, boolean flashbackData ) {
public ReferenceOrderedDataSource(SAMSequenceDictionary sequenceDictionary,
GenomeLocParser genomeLocParser,
ValidationExclusion.TYPE validationExclusionType,
RMDTrack rod, boolean flashbackData ) {
this.rod = rod;
if (rod.supportsQuery())
iteratorPool = new ReferenceOrderedQueryDataPool(sequenceDictionary,genomeLocParser,new RMDTrackBuilder(),rod);
iteratorPool = new ReferenceOrderedQueryDataPool(sequenceDictionary,
genomeLocParser,
new RMDTrackBuilder(sequenceDictionary,genomeLocParser,validationExclusionType),
rod);
else
iteratorPool = new ReferenceOrderedDataPool(sequenceDictionary,genomeLocParser,rod, flashbackData );
}

View File

@ -11,6 +11,7 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -79,7 +80,7 @@ public class RMDIndexer extends CommandLineProgram {
genomeLocParser = new GenomeLocParser(ref);
// get a track builder
RMDTrackBuilder builder = new RMDTrackBuilder();
RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL);
// find the types available to the track builders
Map<String,Class> typeMapping = builder.getAvailableTrackNamesAndTypes();
@ -100,13 +101,8 @@ public class RMDIndexer extends CommandLineProgram {
Index index = IndexFactory.createIndex(inputFileSource, codec, approach);
if (referenceFile != null) {
// create a reference file reader
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(referenceFile);
// add writing of the sequence dictionary, if supplied
builder.setIndexSequenceDictionary(inputFileSource, index, seq.getSequenceDictionary(), indexFile, false);
}
// add writing of the sequence dictionary, if supplied
builder.setIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary(), indexFile, false);
// create the output stream, and write the index
LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));

View File

@ -92,11 +92,6 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
*/
private ValidationExclusion.TYPE validationExclusionType;
/** Create a new plugin manager. */
public RMDTrackBuilder() {
super(FeatureCodec.class, "Codecs", "Codec");
}
/**
* Create a new RMDTrackBuilder, with dictionary and genomeLocParser predefined.
* @param dict Sequence dictionary to use.
@ -105,20 +100,10 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
*/
public RMDTrackBuilder(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser, ValidationExclusion.TYPE validationExclusionType) {
super(FeatureCodec.class, "Codecs", "Codec");
setSequenceDictionary(dict,genomeLocParser,validationExclusionType);
}
/**
* Establish location-aware parsing and services for relevant reference metadata.
* @param dict Sequence dictionary to use.
* @param genomeLocParser Location parser to use.
* @param validationExclusionType Types of validations to exclude, for sequence dictionary verification.
*/
public void setSequenceDictionary(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser,ValidationExclusion.TYPE validationExclusionType) {
this.dict = dict;
this.genomeLocParser = genomeLocParser;
this.validationExclusionType = validationExclusionType;
}
}
/** @return a list of all available track types we currently have access to create */
public Map<String, Class> getAvailableTrackNamesAndTypes() {

View File

@ -248,7 +248,9 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
if ( RefseqFileName != null ) {
logger.info("Using RefSeq annotations from "+RefseqFileName);
RMDTrackBuilder builder = new RMDTrackBuilder();
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
getToolkit().getGenomeLocParser(),
getToolkit().getArguments().unsafe);
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
try {

View File

@ -47,7 +47,9 @@ public class DbSNPWindowCounter extends LocusWalker<Integer, Long> {
public void initialize() {
RMDTrackBuilder builder = new RMDTrackBuilder();
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
getToolkit().getGenomeLocParser(),
getToolkit().getArguments().unsafe);
reader = builder.createFeatureReader(DbSNPCodec.class,myDbSNPFile).first;
}

View File

@ -35,7 +35,9 @@ public class IndelAnnotator extends RodWalker<Integer,Long> {
public void initialize() {
if ( RefseqFileName != null ) {
RMDTrackBuilder builder = new RMDTrackBuilder();
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
getToolkit().getGenomeLocParser(),
getToolkit().getArguments().unsafe);
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
try {

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.filters.FilterManager;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
@ -67,7 +68,10 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
GenomeAnalysisEngine GATKEngine = new GenomeAnalysisEngine();
WalkerManager walkerManager = new WalkerManager();
FilterManager filterManager = new FilterManager();
RMDTrackBuilder trackBuilder = new RMDTrackBuilder();
// HACK: We're currently relying on the fact that RMDTrackBuilder is used only from RMD type lookups, not
// RMD track location. Therefore, no sequence dictionary is required. In the future, we should separate
// RMD track lookups from track creation.
RMDTrackBuilder trackBuilder = new RMDTrackBuilder(null,null,ValidationExclusion.TYPE.ALL);
/**
* Required main method implementation.

View File

@ -55,8 +55,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
// sequence
seq = new IndexedFastaSequenceFile(new File(hg18Reference));
genomeLocParser = new GenomeLocParser(seq);
builder = new RMDTrackBuilder();
builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser,null);
builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null);
}
/**
@ -79,7 +78,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
public void testSingleBinding() {
File file = new File(testDir + "TabularDataTest.dat");
RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest",file);
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,track,false);
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,null,track,false);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30)));
@ -103,9 +102,9 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest1",file);
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,track,false);
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,null,track,false);
RMDTrack track2 = builder.createInstanceOfTrack(TableCodec.class,"tableTest2",file);
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,track2,false);
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,null,track2,false);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30)));

View File

@ -60,8 +60,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
@BeforeMethod
public void setUp() {
File file = new File(testDir + "TabularDataTest.dat");
RMDTrackBuilder builder = new RMDTrackBuilder();
builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser,null);
RMDTrackBuilder builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null);
rod = builder.createInstanceOfTrack(TableCodec.class, "tableTest", file);
}

View File

@ -60,7 +60,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
public void setup() {
seq = new IndexedFastaSequenceFile(new File(b36KGReference));
genomeLocParser = new GenomeLocParser(seq);
builder = new RMDTrackBuilder();
builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null);
}
@Test
@ -153,7 +153,6 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
File vcfFile = createCorrectDateIndexFile(new File(validationDataLocation + "/ROD_validation/newerTribbleTrack.vcf"));
Long indexTimeStamp = Tribble.indexFile(vcfFile).lastModified();
try {
builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser,null);
Index idx = builder.loadIndex(vcfFile, new VCFCodec());
// catch any exception; this call should pass correctly
SAMSequenceDictionary dict = RMDTrackBuilder.getSequenceDictionaryFromProperties(idx);