Change the interface for RMDTrackBuilder, therefore always mandating the specification
of a sequence dictionary and related info. This will hopefully eliminate the cases in which the refseq track depends a sequence dictionary / contig parser that hasn't been specified. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4700 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
367cc9135f
commit
90711d445c
|
|
@ -326,11 +326,8 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
|
|
||||||
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
|
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
|
||||||
|
|
||||||
RMDTrackBuilder manager = new RMDTrackBuilder();
|
|
||||||
|
|
||||||
// set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
|
// set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
|
||||||
manager.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
|
RMDTrackBuilder manager = new RMDTrackBuilder(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
|
||||||
|
|
||||||
List<RMDTrack> tracks = manager.getReferenceMetaDataSources(this,argCollection);
|
List<RMDTrack> tracks = manager.getReferenceMetaDataSources(this,argCollection);
|
||||||
validateSuppliedReferenceOrderedData(tracks);
|
validateSuppliedReferenceOrderedData(tracks);
|
||||||
|
|
||||||
|
|
@ -591,7 +588,11 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(List<RMDTrack> rods) {
|
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(List<RMDTrack> rods) {
|
||||||
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||||
for (RMDTrack rod : rods)
|
for (RMDTrack rod : rods)
|
||||||
dataSources.add(new ReferenceOrderedDataSource(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,rod,flashbackData()));
|
dataSources.add(new ReferenceOrderedDataSource(referenceDataSource.getReference().getSequenceDictionary(),
|
||||||
|
genomeLocParser,
|
||||||
|
argCollection.unsafe,
|
||||||
|
rod,
|
||||||
|
flashbackData()));
|
||||||
return dataSources;
|
return dataSources;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import org.broad.tribble.FeatureSource;
|
import org.broad.tribble.FeatureSource;
|
||||||
|
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||||
|
|
@ -49,10 +50,16 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
||||||
* Create a new reference-ordered data source.
|
* Create a new reference-ordered data source.
|
||||||
* @param rod the reference ordered data
|
* @param rod the reference ordered data
|
||||||
*/
|
*/
|
||||||
public ReferenceOrderedDataSource(SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, RMDTrack rod, boolean flashbackData ) {
|
public ReferenceOrderedDataSource(SAMSequenceDictionary sequenceDictionary,
|
||||||
|
GenomeLocParser genomeLocParser,
|
||||||
|
ValidationExclusion.TYPE validationExclusionType,
|
||||||
|
RMDTrack rod, boolean flashbackData ) {
|
||||||
this.rod = rod;
|
this.rod = rod;
|
||||||
if (rod.supportsQuery())
|
if (rod.supportsQuery())
|
||||||
iteratorPool = new ReferenceOrderedQueryDataPool(sequenceDictionary,genomeLocParser,new RMDTrackBuilder(),rod);
|
iteratorPool = new ReferenceOrderedQueryDataPool(sequenceDictionary,
|
||||||
|
genomeLocParser,
|
||||||
|
new RMDTrackBuilder(sequenceDictionary,genomeLocParser,validationExclusionType),
|
||||||
|
rod);
|
||||||
else
|
else
|
||||||
iteratorPool = new ReferenceOrderedDataPool(sequenceDictionary,genomeLocParser,rod, flashbackData );
|
iteratorPool = new ReferenceOrderedDataPool(sequenceDictionary,genomeLocParser,rod, flashbackData );
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
@ -79,7 +80,7 @@ public class RMDIndexer extends CommandLineProgram {
|
||||||
genomeLocParser = new GenomeLocParser(ref);
|
genomeLocParser = new GenomeLocParser(ref);
|
||||||
|
|
||||||
// get a track builder
|
// get a track builder
|
||||||
RMDTrackBuilder builder = new RMDTrackBuilder();
|
RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL);
|
||||||
|
|
||||||
// find the types available to the track builders
|
// find the types available to the track builders
|
||||||
Map<String,Class> typeMapping = builder.getAvailableTrackNamesAndTypes();
|
Map<String,Class> typeMapping = builder.getAvailableTrackNamesAndTypes();
|
||||||
|
|
@ -100,13 +101,8 @@ public class RMDIndexer extends CommandLineProgram {
|
||||||
|
|
||||||
Index index = IndexFactory.createIndex(inputFileSource, codec, approach);
|
Index index = IndexFactory.createIndex(inputFileSource, codec, approach);
|
||||||
|
|
||||||
if (referenceFile != null) {
|
// add writing of the sequence dictionary, if supplied
|
||||||
// create a reference file reader
|
builder.setIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary(), indexFile, false);
|
||||||
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(referenceFile);
|
|
||||||
|
|
||||||
// add writing of the sequence dictionary, if supplied
|
|
||||||
builder.setIndexSequenceDictionary(inputFileSource, index, seq.getSequenceDictionary(), indexFile, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
// create the output stream, and write the index
|
// create the output stream, and write the index
|
||||||
LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
|
LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
|
||||||
|
|
|
||||||
|
|
@ -92,11 +92,6 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
*/
|
*/
|
||||||
private ValidationExclusion.TYPE validationExclusionType;
|
private ValidationExclusion.TYPE validationExclusionType;
|
||||||
|
|
||||||
/** Create a new plugin manager. */
|
|
||||||
public RMDTrackBuilder() {
|
|
||||||
super(FeatureCodec.class, "Codecs", "Codec");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new RMDTrackBuilder, with dictionary and genomeLocParser predefined.
|
* Create a new RMDTrackBuilder, with dictionary and genomeLocParser predefined.
|
||||||
* @param dict Sequence dictionary to use.
|
* @param dict Sequence dictionary to use.
|
||||||
|
|
@ -105,20 +100,10 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
*/
|
*/
|
||||||
public RMDTrackBuilder(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser, ValidationExclusion.TYPE validationExclusionType) {
|
public RMDTrackBuilder(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser, ValidationExclusion.TYPE validationExclusionType) {
|
||||||
super(FeatureCodec.class, "Codecs", "Codec");
|
super(FeatureCodec.class, "Codecs", "Codec");
|
||||||
setSequenceDictionary(dict,genomeLocParser,validationExclusionType);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Establish location-aware parsing and services for relevant reference metadata.
|
|
||||||
* @param dict Sequence dictionary to use.
|
|
||||||
* @param genomeLocParser Location parser to use.
|
|
||||||
* @param validationExclusionType Types of validations to exclude, for sequence dictionary verification.
|
|
||||||
*/
|
|
||||||
public void setSequenceDictionary(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser,ValidationExclusion.TYPE validationExclusionType) {
|
|
||||||
this.dict = dict;
|
this.dict = dict;
|
||||||
this.genomeLocParser = genomeLocParser;
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.validationExclusionType = validationExclusionType;
|
this.validationExclusionType = validationExclusionType;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @return a list of all available track types we currently have access to create */
|
/** @return a list of all available track types we currently have access to create */
|
||||||
public Map<String, Class> getAvailableTrackNamesAndTypes() {
|
public Map<String, Class> getAvailableTrackNamesAndTypes() {
|
||||||
|
|
|
||||||
|
|
@ -248,7 +248,9 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
if ( RefseqFileName != null ) {
|
if ( RefseqFileName != null ) {
|
||||||
logger.info("Using RefSeq annotations from "+RefseqFileName);
|
logger.info("Using RefSeq annotations from "+RefseqFileName);
|
||||||
|
|
||||||
RMDTrackBuilder builder = new RMDTrackBuilder();
|
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||||
|
getToolkit().getGenomeLocParser(),
|
||||||
|
getToolkit().getArguments().unsafe);
|
||||||
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
|
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,9 @@ public class DbSNPWindowCounter extends LocusWalker<Integer, Long> {
|
||||||
|
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
RMDTrackBuilder builder = new RMDTrackBuilder();
|
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||||
|
getToolkit().getGenomeLocParser(),
|
||||||
|
getToolkit().getArguments().unsafe);
|
||||||
reader = builder.createFeatureReader(DbSNPCodec.class,myDbSNPFile).first;
|
reader = builder.createFeatureReader(DbSNPCodec.class,myDbSNPFile).first;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,9 @@ public class IndelAnnotator extends RodWalker<Integer,Long> {
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
if ( RefseqFileName != null ) {
|
if ( RefseqFileName != null ) {
|
||||||
RMDTrackBuilder builder = new RMDTrackBuilder();
|
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||||
|
getToolkit().getGenomeLocParser(),
|
||||||
|
getToolkit().getArguments().unsafe);
|
||||||
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
|
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||||
|
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
import org.broadinstitute.sting.gatk.filters.FilterManager;
|
import org.broadinstitute.sting.gatk.filters.FilterManager;
|
||||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
||||||
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
|
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
|
||||||
|
|
@ -67,7 +68,10 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
|
||||||
GenomeAnalysisEngine GATKEngine = new GenomeAnalysisEngine();
|
GenomeAnalysisEngine GATKEngine = new GenomeAnalysisEngine();
|
||||||
WalkerManager walkerManager = new WalkerManager();
|
WalkerManager walkerManager = new WalkerManager();
|
||||||
FilterManager filterManager = new FilterManager();
|
FilterManager filterManager = new FilterManager();
|
||||||
RMDTrackBuilder trackBuilder = new RMDTrackBuilder();
|
// HACK: We're currently relying on the fact that RMDTrackBuilder is used only from RMD type lookups, not
|
||||||
|
// RMD track location. Therefore, no sequence dictionary is required. In the future, we should separate
|
||||||
|
// RMD track lookups from track creation.
|
||||||
|
RMDTrackBuilder trackBuilder = new RMDTrackBuilder(null,null,ValidationExclusion.TYPE.ALL);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Required main method implementation.
|
* Required main method implementation.
|
||||||
|
|
|
||||||
|
|
@ -55,8 +55,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
||||||
// sequence
|
// sequence
|
||||||
seq = new IndexedFastaSequenceFile(new File(hg18Reference));
|
seq = new IndexedFastaSequenceFile(new File(hg18Reference));
|
||||||
genomeLocParser = new GenomeLocParser(seq);
|
genomeLocParser = new GenomeLocParser(seq);
|
||||||
builder = new RMDTrackBuilder();
|
builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null);
|
||||||
builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser,null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -79,7 +78,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
||||||
public void testSingleBinding() {
|
public void testSingleBinding() {
|
||||||
File file = new File(testDir + "TabularDataTest.dat");
|
File file = new File(testDir + "TabularDataTest.dat");
|
||||||
RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest",file);
|
RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest",file);
|
||||||
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,track,false);
|
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,null,track,false);
|
||||||
|
|
||||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30)));
|
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30)));
|
||||||
|
|
||||||
|
|
@ -103,9 +102,9 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
||||||
|
|
||||||
|
|
||||||
RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest1",file);
|
RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest1",file);
|
||||||
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,track,false);
|
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,null,track,false);
|
||||||
RMDTrack track2 = builder.createInstanceOfTrack(TableCodec.class,"tableTest2",file);
|
RMDTrack track2 = builder.createInstanceOfTrack(TableCodec.class,"tableTest2",file);
|
||||||
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,track2,false);
|
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,null,track2,false);
|
||||||
|
|
||||||
|
|
||||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30)));
|
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30)));
|
||||||
|
|
|
||||||
|
|
@ -60,8 +60,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
||||||
@BeforeMethod
|
@BeforeMethod
|
||||||
public void setUp() {
|
public void setUp() {
|
||||||
File file = new File(testDir + "TabularDataTest.dat");
|
File file = new File(testDir + "TabularDataTest.dat");
|
||||||
RMDTrackBuilder builder = new RMDTrackBuilder();
|
RMDTrackBuilder builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null);
|
||||||
builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser,null);
|
|
||||||
rod = builder.createInstanceOfTrack(TableCodec.class, "tableTest", file);
|
rod = builder.createInstanceOfTrack(TableCodec.class, "tableTest", file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
|
||||||
public void setup() {
|
public void setup() {
|
||||||
seq = new IndexedFastaSequenceFile(new File(b36KGReference));
|
seq = new IndexedFastaSequenceFile(new File(b36KGReference));
|
||||||
genomeLocParser = new GenomeLocParser(seq);
|
genomeLocParser = new GenomeLocParser(seq);
|
||||||
builder = new RMDTrackBuilder();
|
builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -153,7 +153,6 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
|
||||||
File vcfFile = createCorrectDateIndexFile(new File(validationDataLocation + "/ROD_validation/newerTribbleTrack.vcf"));
|
File vcfFile = createCorrectDateIndexFile(new File(validationDataLocation + "/ROD_validation/newerTribbleTrack.vcf"));
|
||||||
Long indexTimeStamp = Tribble.indexFile(vcfFile).lastModified();
|
Long indexTimeStamp = Tribble.indexFile(vcfFile).lastModified();
|
||||||
try {
|
try {
|
||||||
builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser,null);
|
|
||||||
Index idx = builder.loadIndex(vcfFile, new VCFCodec());
|
Index idx = builder.loadIndex(vcfFile, new VCFCodec());
|
||||||
// catch any exception; this call should pass correctly
|
// catch any exception; this call should pass correctly
|
||||||
SAMSequenceDictionary dict = RMDTrackBuilder.getSequenceDictionaryFromProperties(idx);
|
SAMSequenceDictionary dict = RMDTrackBuilder.getSequenceDictionaryFromProperties(idx);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue