Push command-line expansions, such as BAM list unpacking and -B tag parsing, out
into the CommandLine* classes. This makes it easier for external functionality (such as the VCF streamer) to use GenomeAnalysisEngine directly. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4897 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
66cca7de0f
commit
e313eeede8
|
|
@ -28,16 +28,23 @@ package org.broadinstitute.sting.gatk;
|
|||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
|
|
@ -76,6 +83,11 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
|
||||
try {
|
||||
engine.setArguments(getArgumentCollection());
|
||||
|
||||
// File lists can require a bit of additional expansion. Set these explicitly by the engine.
|
||||
engine.setSAMFileIDs(unpackBAMFileList(getArgumentCollection()));
|
||||
engine.setReferenceMetaDataFiles(unpackRODBindings(getArgumentCollection()));
|
||||
|
||||
engine.setWalker(walker);
|
||||
walker.setToolkit(engine);
|
||||
|
||||
|
|
@ -182,4 +194,69 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
protected String getArgumentSourceName( Class argumentSource ) {
|
||||
return engine.getWalkerName((Class<Walker>)argumentSource);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unpack the bam files to be processed, given a list of files. That list of files can
|
||||
* itself contain entries which are lists of other files to be read (note: you cannot have lists of lists of lists)
|
||||
*
|
||||
* @param argCollection the command-line arguments from which to extract the BAM file list.
|
||||
* @return a flattened list of the bam files provided
|
||||
*/
|
||||
private List<SAMReaderID> unpackBAMFileList(GATKArgumentCollection argCollection) {
|
||||
List<SAMReaderID> unpackedReads = new ArrayList<SAMReaderID>();
|
||||
for( File inputFile: argCollection.samFiles ) {
|
||||
if (inputFile.getName().toLowerCase().endsWith(".list") ) {
|
||||
try {
|
||||
for(String fileName : new XReadLines(inputFile))
|
||||
unpackedReads.add(new SAMReaderID(new File(fileName),parser.getTags(inputFile)));
|
||||
}
|
||||
catch( FileNotFoundException ex ) {
|
||||
throw new UserException.CouldNotReadInputFile(inputFile, "Unable to find file while unpacking reads", ex);
|
||||
}
|
||||
}
|
||||
else if(inputFile.getName().toLowerCase().endsWith(".bam")) {
|
||||
unpackedReads.add( new SAMReaderID(inputFile,parser.getTags(inputFile)) );
|
||||
}
|
||||
else if(inputFile.getName().equals("-")) {
|
||||
unpackedReads.add(new SAMReaderID(new File("/dev/stdin"),Collections.<String>emptyList()));
|
||||
}
|
||||
else {
|
||||
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " +
|
||||
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " +
|
||||
"of BAM files is in the correct format, update the extension, and try again.",inputFile.getName()));
|
||||
}
|
||||
}
|
||||
return unpackedReads;
|
||||
}
|
||||
/**
|
||||
* Convert command-line argument representation of ROD bindings to something more easily understandable by the engine.
|
||||
* @param argCollection input arguments to the GATK.
|
||||
* @return a list of expanded, bound RODs.
|
||||
*/
|
||||
private Collection<RMDTriplet> unpackRODBindings(GATKArgumentCollection argCollection) {
|
||||
Collection<RMDTriplet> rodBindings = new ArrayList<RMDTriplet>();
|
||||
|
||||
for (String binding: argCollection.RODBindings) {
|
||||
if(parser.getTags(binding).size() != 2)
|
||||
throw new UserException("Invalid syntax for -B (reference-ordered data) input flag. " +
|
||||
"Please use the following syntax when providing reference-ordered " +
|
||||
"data: -B:<name>,<type> <filename>.");
|
||||
// Assume that if tags are present, those tags are name and type.
|
||||
// Name is always first, followed by type.
|
||||
List<String> parameters = parser.getTags(binding);
|
||||
String name = parameters.get(0);
|
||||
String type = parameters.get(1);
|
||||
rodBindings.add(new RMDTriplet(name,type,binding));
|
||||
}
|
||||
|
||||
if (argCollection.DBSNPFile != null) {
|
||||
if(argCollection.DBSNPFile.toLowerCase().contains("vcf"))
|
||||
throw new UserException("--DBSNP (-D) argument currently does not support VCF. To use dbSNP in VCF format, please use -B:dbsnp,vcf <filename>.");
|
||||
rodBindings.add(new RMDTriplet(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, "dbsnp", argCollection.DBSNPFile));
|
||||
}
|
||||
|
||||
return rodBindings;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ import org.broadinstitute.sting.gatk.io.stubs.Stub;
|
|||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -144,6 +145,32 @@ public class GenomeAnalysisEngine {
|
|||
this.walker = walker;
|
||||
}
|
||||
|
||||
/**
|
||||
* A processed collection of SAM reader identifiers.
|
||||
*/
|
||||
private Collection<SAMReaderID> samReaderIDs;
|
||||
|
||||
/**
|
||||
* Set the SAM/BAM files over which to traverse.
|
||||
* @param samReaderIDs Collection of ids to use during this traversal.
|
||||
*/
|
||||
public void setSAMFileIDs(Collection<SAMReaderID> samReaderIDs) {
|
||||
this.samReaderIDs = samReaderIDs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collection of reference metadata files over which to traverse.
|
||||
*/
|
||||
private Collection<RMDTriplet> referenceMetaDataFiles;
|
||||
|
||||
/**
|
||||
* Set the reference metadata files to use for this traversal.
|
||||
* @param referenceMetaDataFiles Collection of files and descriptors over which to traverse.
|
||||
*/
|
||||
public void setReferenceMetaDataFiles(Collection<RMDTriplet> referenceMetaDataFiles) {
|
||||
this.referenceMetaDataFiles = referenceMetaDataFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Actually run the GATK with the specified walker.
|
||||
*
|
||||
|
|
@ -613,15 +640,6 @@ public class GenomeAnalysisEngine {
|
|||
outputs.add(stub);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the tags associated with a given object.
|
||||
* @param key Key for which to find a tag.
|
||||
* @return List of tags associated with this key.
|
||||
*/
|
||||
public List<String> getTags(Object key) {
|
||||
return parsingEngine.getTags(key);
|
||||
}
|
||||
|
||||
protected void initializeDataSources() {
|
||||
logger.info("Strictness is " + argCollection.strictnessLevel);
|
||||
|
||||
|
|
@ -643,7 +661,7 @@ public class GenomeAnalysisEngine {
|
|||
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
|
||||
|
||||
// set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
|
||||
RMDTrackBuilder manager = new RMDTrackBuilder(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
|
||||
RMDTrackBuilder manager = new RMDTrackBuilder(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
|
||||
List<RMDTrack> tracks = manager.getReferenceMetaDataSources(this,argCollection);
|
||||
validateSuppliedReferenceOrderedData(tracks);
|
||||
|
||||
|
|
@ -680,7 +698,7 @@ public class GenomeAnalysisEngine {
|
|||
* @return Sets of samples in the merged input SAM stream, grouped by readers
|
||||
*/
|
||||
public List<Set<String>> getSamplesByReaders() {
|
||||
List<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
|
||||
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
|
||||
|
||||
List<Set<String>> sample_sets = new ArrayList<Set<String>>(readers.size());
|
||||
|
||||
|
|
@ -709,7 +727,7 @@ public class GenomeAnalysisEngine {
|
|||
public List<Set<String>> getLibrariesByReaders() {
|
||||
|
||||
|
||||
List<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
|
||||
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
|
||||
|
||||
List<Set<String>> lib_sets = new ArrayList<Set<String>>(readers.size());
|
||||
|
||||
|
|
@ -740,7 +758,7 @@ public class GenomeAnalysisEngine {
|
|||
public List<Set<String>> getMergedReadGroupsByReaders() {
|
||||
|
||||
|
||||
List<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
|
||||
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
|
||||
|
||||
List<Set<String>> rg_sets = new ArrayList<Set<String>>(readers.size());
|
||||
|
||||
|
|
@ -814,7 +832,7 @@ public class GenomeAnalysisEngine {
|
|||
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
|
||||
|
||||
return new SAMDataSource(
|
||||
unpackBAMFileList(argCollection.samFiles),
|
||||
samReaderIDs,
|
||||
genomeLocParser,
|
||||
argCollection.useOriginalBaseQualities,
|
||||
argCollection.strictnessLevel,
|
||||
|
|
@ -850,7 +868,8 @@ public class GenomeAnalysisEngine {
|
|||
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(List<RMDTrack> rods) {
|
||||
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||
for (RMDTrack rod : rods)
|
||||
dataSources.add(new ReferenceOrderedDataSource(referenceDataSource.getReference().getSequenceDictionary(),
|
||||
dataSources.add(new ReferenceOrderedDataSource(referenceMetaDataFiles,
|
||||
referenceDataSource.getReference().getSequenceDictionary(),
|
||||
genomeLocParser,
|
||||
argCollection.unsafe,
|
||||
rod,
|
||||
|
|
@ -971,41 +990,6 @@ public class GenomeAnalysisEngine {
|
|||
return readsDataSource == null ? null : readsDataSource.getCumulativeReadMetrics();
|
||||
}
|
||||
|
||||
/**
|
||||
* Unpack the bam files to be processed, given a list of files. That list of files can
|
||||
* itself contain entries which are lists of other files to be read (note: you cannot have lists of lists of lists)
|
||||
*
|
||||
* @param inputFiles a list of files that represent either bam files themselves, or a file containing a list of bam files to process
|
||||
*
|
||||
* @return a flattened list of the bam files provided
|
||||
*/
|
||||
private List<SAMReaderID> unpackBAMFileList( List<File> inputFiles ) {
|
||||
List<SAMReaderID> unpackedReads = new ArrayList<SAMReaderID>();
|
||||
for( File inputFile: inputFiles ) {
|
||||
if (inputFile.getName().toLowerCase().endsWith(".list") ) {
|
||||
try {
|
||||
for(String fileName : new XReadLines(inputFile))
|
||||
unpackedReads.add(new SAMReaderID(new File(fileName),getTags(inputFile)));
|
||||
}
|
||||
catch( FileNotFoundException ex ) {
|
||||
throw new UserException.CouldNotReadInputFile(inputFile, "Unable to find file while unpacking reads", ex);
|
||||
}
|
||||
}
|
||||
else if(inputFile.getName().toLowerCase().endsWith(".bam")) {
|
||||
unpackedReads.add( new SAMReaderID(inputFile,getTags(inputFile)) );
|
||||
}
|
||||
else if(inputFile.getName().equals("-")) {
|
||||
unpackedReads.add(new SAMReaderID(new File("/dev/stdin"),Collections.<String>emptyList()));
|
||||
}
|
||||
else {
|
||||
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " +
|
||||
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " +
|
||||
"of BAM files is in the correct format, update the extension, and try again.",inputFile.getName()));
|
||||
}
|
||||
}
|
||||
return unpackedReads;
|
||||
}
|
||||
|
||||
public SampleDataSource getSampleMetadata() {
|
||||
return this.sampleDataSource;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ import java.util.Collection;
|
|||
* information about how they should be downsampled, sorted, and filtered.
|
||||
*/
|
||||
public class ReadProperties {
|
||||
private List<SAMReaderID> readers = null;
|
||||
private Collection<SAMReaderID> readers = null;
|
||||
private SAMFileHeader header = null;
|
||||
private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT;
|
||||
private Integer readBufferSize = null;
|
||||
|
|
@ -71,7 +71,7 @@ public class ReadProperties {
|
|||
* Gets a list of the files acting as sources of reads.
|
||||
* @return A list of files storing reads data.
|
||||
*/
|
||||
public List<SAMReaderID> getSAMReaderIDs() {
|
||||
public Collection<SAMReaderID> getSAMReaderIDs() {
|
||||
return readers;
|
||||
}
|
||||
|
||||
|
|
@ -157,7 +157,7 @@ public class ReadProperties {
|
|||
* @param qmode How should we apply the BAQ calculation to the reads?
|
||||
* @param refReader if applyBAQ is true, must be a valid pointer to a indexed fasta file reads so we can get the ref bases for BAQ calculation
|
||||
*/
|
||||
public ReadProperties( List<SAMReaderID> samFiles,
|
||||
public ReadProperties( Collection<SAMReaderID> samFiles,
|
||||
SAMFileHeader header,
|
||||
boolean useOriginalBaseQualities,
|
||||
SAMFileReader.ValidationStringency strictness,
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
|||
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -18,6 +19,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
/**
|
||||
* User: hanna
|
||||
|
|
@ -50,7 +52,8 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
* Create a new reference-ordered data source.
|
||||
* @param rod the reference ordered data
|
||||
*/
|
||||
public ReferenceOrderedDataSource(SAMSequenceDictionary sequenceDictionary,
|
||||
public ReferenceOrderedDataSource(Collection<RMDTriplet> refMetaDataDescriptors,
|
||||
SAMSequenceDictionary sequenceDictionary,
|
||||
GenomeLocParser genomeLocParser,
|
||||
ValidationExclusion.TYPE validationExclusionType,
|
||||
RMDTrack rod, boolean flashbackData ) {
|
||||
|
|
@ -58,7 +61,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
if (rod.supportsQuery())
|
||||
iteratorPool = new ReferenceOrderedQueryDataPool(sequenceDictionary,
|
||||
genomeLocParser,
|
||||
new RMDTrackBuilder(sequenceDictionary,genomeLocParser,validationExclusionType),
|
||||
new RMDTrackBuilder(refMetaDataDescriptors,sequenceDictionary,genomeLocParser,validationExclusionType),
|
||||
rod);
|
||||
else
|
||||
iteratorPool = new ReferenceOrderedDataPool(sequenceDictionary,genomeLocParser,rod, flashbackData );
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
/**
|
||||
* Identifiers for the readers driving this data source.
|
||||
*/
|
||||
private final List<SAMReaderID> readerIDs;
|
||||
private final Collection<SAMReaderID> readerIDs;
|
||||
|
||||
/**
|
||||
* How strict are the readers driving this data source.
|
||||
|
|
@ -129,7 +129,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
* Create a new SAM data source given the supplied read metadata.
|
||||
* @param samFiles list of reads files.
|
||||
*/
|
||||
public SAMDataSource(List<SAMReaderID> samFiles,GenomeLocParser genomeLocParser) {
|
||||
public SAMDataSource(Collection<SAMReaderID> samFiles,GenomeLocParser genomeLocParser) {
|
||||
this(
|
||||
samFiles,
|
||||
genomeLocParser,
|
||||
|
|
@ -148,7 +148,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
* See complete constructor. Does not enable BAQ by default.
|
||||
*/
|
||||
public SAMDataSource(
|
||||
List<SAMReaderID> samFiles,
|
||||
Collection<SAMReaderID> samFiles,
|
||||
GenomeLocParser genomeLocParser,
|
||||
boolean useOriginalBaseQualities,
|
||||
SAMFileReader.ValidationStringency strictness,
|
||||
|
|
@ -189,7 +189,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
* bases will be seen in the pileups, and the deletions will be skipped silently.
|
||||
*/
|
||||
public SAMDataSource(
|
||||
List<SAMReaderID> samFiles,
|
||||
Collection<SAMReaderID> samFiles,
|
||||
GenomeLocParser genomeLocParser,
|
||||
boolean useOriginalBaseQualities,
|
||||
SAMFileReader.ValidationStringency strictness,
|
||||
|
|
@ -301,7 +301,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
* Returns readers used by this data source.
|
||||
* @return A list of SAM reader IDs.
|
||||
*/
|
||||
public List<SAMReaderID> getReaderIDs() {
|
||||
public Collection<SAMReaderID> getReaderIDs() {
|
||||
return readerIDs;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
|
||||
|
||||
// the input strings we use to create RODs from
|
||||
private final List<RMDTriplet> inputs = new ArrayList<RMDTriplet>();
|
||||
private final Collection<RMDTriplet> inputs;
|
||||
|
||||
private Map<String, Class> classes = null;
|
||||
|
||||
|
|
@ -93,18 +93,37 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
private ValidationExclusion.TYPE validationExclusionType;
|
||||
|
||||
/**
|
||||
* Create a new RMDTrackBuilder, with dictionary and genomeLocParser predefined.
|
||||
* Create a new RMDTrackBuilder, predefined to use a given set of reference metadata.
|
||||
* @param referenceMetaDataDescriptors file descriptors to build out during trackbuilder construction.
|
||||
* @param dict Sequence dictionary to use.
|
||||
* @param genomeLocParser Location parser to use.
|
||||
* @param validationExclusionType Types of validations to exclude, for sequence dictionary verification.
|
||||
*/
|
||||
public RMDTrackBuilder(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser, ValidationExclusion.TYPE validationExclusionType) {
|
||||
public RMDTrackBuilder(Collection<RMDTriplet> referenceMetaDataDescriptors,
|
||||
SAMSequenceDictionary dict,
|
||||
GenomeLocParser genomeLocParser,
|
||||
ValidationExclusion.TYPE validationExclusionType) {
|
||||
super(FeatureCodec.class, "Codecs", "Codec");
|
||||
this.inputs = referenceMetaDataDescriptors;
|
||||
this.dict = dict;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.validationExclusionType = validationExclusionType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct an RMDTrackerBuilder, allowing the user to define tracks to build after-the-fact. This is generally
|
||||
* used when walkers want to directly manage the ROD system for whatever reason. Before using this constructor,
|
||||
* please talk through your approach with the SE team.
|
||||
* @param dict Sequence dictionary to use.
|
||||
* @param genomeLocParser Location parser to use.
|
||||
* @param validationExclusionType Types of validations to exclude, for sequence dictionary verification.
|
||||
*/
|
||||
public RMDTrackBuilder(SAMSequenceDictionary dict,
|
||||
GenomeLocParser genomeLocParser,
|
||||
ValidationExclusion.TYPE validationExclusionType) {
|
||||
this(Collections.<RMDTriplet>emptyList(),dict,genomeLocParser,validationExclusionType);
|
||||
}
|
||||
|
||||
/** @return a list of all available track types we currently have access to create */
|
||||
public Map<String, Class> getAvailableTrackNamesAndTypes() {
|
||||
classes = new HashMap<String, Class>();
|
||||
|
|
@ -392,52 +411,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
* @return a list of RMDTracks, one for each -B option
|
||||
*/
|
||||
public List<RMDTrack> getReferenceMetaDataSources(GenomeAnalysisEngine engine, GATKArgumentCollection argCollection) {
|
||||
initializeConvenienceBindings(engine,argCollection);
|
||||
initializeFullBindings(engine,argCollection);
|
||||
// try and make the tracks given their requests
|
||||
return createRequestedTrackObjects();
|
||||
}
|
||||
|
||||
private void initializeConvenienceBindings(GenomeAnalysisEngine engine, GATKArgumentCollection argCollection) {
|
||||
if (argCollection.DBSNPFile != null) {
|
||||
if(argCollection.DBSNPFile.toLowerCase().contains("vcf"))
|
||||
throw new UserException("--DBSNP (-D) argument currently does not support VCF. To use dbSNP in VCF format, please use -B:dbsnp,vcf <filename>.");
|
||||
inputs.add(new RMDTriplet(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, "dbsnp", argCollection.DBSNPFile));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* initialize our lists of bindings
|
||||
* @param engine The engine, used to populate tags.
|
||||
* @param argCollection input arguments to the GATK.
|
||||
*/
|
||||
private void initializeFullBindings(GenomeAnalysisEngine engine,GATKArgumentCollection argCollection) {
|
||||
// NOTE: Method acts as a static. Once the inputs have been passed once they are locked in.
|
||||
if (argCollection.RODBindings.size() == 0)
|
||||
return;
|
||||
|
||||
for (String binding: argCollection.RODBindings) {
|
||||
if(engine != null) {
|
||||
if(engine.getTags(binding).size() != 2)
|
||||
throw new UserException("Invalid syntax for -B (reference-ordered data) input flag. " +
|
||||
"Please use the following syntax when providing reference-ordered " +
|
||||
"data: -B:<name>,<type> <filename>.");
|
||||
// Assume that if tags are present, those tags are name and type.
|
||||
// Name is always first, followed by type.
|
||||
List<String> parameters = engine.getTags(binding);
|
||||
String name = parameters.get(0);
|
||||
String type = parameters.get(1);
|
||||
inputs.add(new RMDTriplet(name,type,binding));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* create the requested track objects
|
||||
*
|
||||
* @return a list of the tracks, one for each of the requested input tracks
|
||||
*/
|
||||
private List<RMDTrack> createRequestedTrackObjects() {
|
||||
// create of live instances of the tracks
|
||||
List<RMDTrack> tracks = new ArrayList<RMDTrack>();
|
||||
|
||||
|
|
@ -451,7 +425,6 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
return tracks;
|
||||
}
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
// static functions to work with the sequence dictionaries of indexes
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
|
|
@ -79,7 +80,12 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
|||
public void testSingleBinding() {
|
||||
File file = new File(testDir + "TabularDataTest.dat");
|
||||
RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest",file);
|
||||
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,null,track,false);
|
||||
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(Collections.singleton(new RMDTriplet("tableTest","Table",file.getAbsolutePath())),
|
||||
seq.getSequenceDictionary(),
|
||||
genomeLocParser,
|
||||
null,
|
||||
track,
|
||||
false);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30)));
|
||||
|
||||
|
|
@ -101,11 +107,13 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
|||
public void testMultipleBinding() {
|
||||
File file = new File(testDir + "TabularDataTest.dat");
|
||||
|
||||
|
||||
RMDTriplet testTriplet1 = new RMDTriplet("tableTest1","Table",file.getAbsolutePath());
|
||||
RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest1",file);
|
||||
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,null,track,false);
|
||||
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(Collections.singleton(testTriplet1),seq.getSequenceDictionary(),genomeLocParser,null,track,false);
|
||||
|
||||
RMDTriplet testTriplet2 = new RMDTriplet("tableTest2","Table",file.getAbsolutePath());
|
||||
RMDTrack track2 = builder.createInstanceOfTrack(TableCodec.class,"tableTest2",file);
|
||||
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,null,track2,false);
|
||||
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(Collections.singleton(testTriplet2),seq.getSequenceDictionary(),genomeLocParser,null,track2,false);
|
||||
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30)));
|
||||
|
|
|
|||
Loading…
Reference in New Issue