A significant refactoring of the ROD system, done largely to simplify the process of
streaming/piping VCFs into the GATK. Notable changes: - Public interface to RMDTrackBuilder is greatly simplified; users can use it only to build RMDTracks and lookup codecs. - RODDataSource and RMDTrack are no longer functionally at the same level; RODDataSources now manage RMDTracks on behalf of the GATK, and the only direct consumers of the RMDTrack class are the walkers that feel the need to access the ROD system directly. (We need to stamp out this access pattern. A few minor warts were introduced as part of this process, labeled with TODOs. These'll be fixed as part of the VCF streaming project. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4915 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d70483c50a
commit
cba18116e4
|
|
@ -35,6 +35,7 @@ import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescripto
|
|||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
|
||||
|
|
@ -204,26 +205,28 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
*/
|
||||
private List<SAMReaderID> unpackBAMFileList(GATKArgumentCollection argCollection) {
|
||||
List<SAMReaderID> unpackedReads = new ArrayList<SAMReaderID>();
|
||||
for( File inputFile: argCollection.samFiles ) {
|
||||
if (inputFile.getName().toLowerCase().endsWith(".list") ) {
|
||||
for( String inputFileName: argCollection.samFiles ) {
|
||||
List<String> inputFileNameTags = parser.getTags(inputFileName);
|
||||
inputFileName = expandFileName(inputFileName);
|
||||
if (inputFileName.toLowerCase().endsWith(".list") ) {
|
||||
try {
|
||||
for(String fileName : new XReadLines(inputFile))
|
||||
unpackedReads.add(new SAMReaderID(new File(fileName),parser.getTags(inputFile)));
|
||||
for(String fileName : new XReadLines(new File(inputFileName)))
|
||||
unpackedReads.add(new SAMReaderID(fileName,parser.getTags(inputFileName)));
|
||||
}
|
||||
catch( FileNotFoundException ex ) {
|
||||
throw new UserException.CouldNotReadInputFile(inputFile, "Unable to find file while unpacking reads", ex);
|
||||
throw new UserException.CouldNotReadInputFile(new File(inputFileName), "Unable to find file while unpacking reads", ex);
|
||||
}
|
||||
}
|
||||
else if(inputFile.getName().toLowerCase().endsWith(".bam")) {
|
||||
unpackedReads.add( new SAMReaderID(inputFile,parser.getTags(inputFile)) );
|
||||
else if(inputFileName.toLowerCase().endsWith(".bam")) {
|
||||
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
||||
}
|
||||
else if(inputFile.getName().equals("-")) {
|
||||
unpackedReads.add(new SAMReaderID(new File("/dev/stdin"),Collections.<String>emptyList()));
|
||||
else if(inputFileName.endsWith("stdin")) {
|
||||
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
||||
}
|
||||
else {
|
||||
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " +
|
||||
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " +
|
||||
"of BAM files is in the correct format, update the extension, and try again.",inputFile.getName()));
|
||||
"of BAM files is in the correct format, update the extension, and try again.",inputFileName));
|
||||
}
|
||||
}
|
||||
return unpackedReads;
|
||||
|
|
@ -236,27 +239,47 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
private Collection<RMDTriplet> unpackRODBindings(GATKArgumentCollection argCollection) {
|
||||
Collection<RMDTriplet> rodBindings = new ArrayList<RMDTriplet>();
|
||||
|
||||
for (String binding: argCollection.RODBindings) {
|
||||
if(parser.getTags(binding).size() != 2)
|
||||
|
||||
for (String fileName: argCollection.RODBindings) {
|
||||
List<String> parameters = parser.getTags(fileName);
|
||||
fileName = expandFileName(fileName);
|
||||
RMDStorageType storageType = fileName.toLowerCase().endsWith("stdin") ? RMDStorageType.STREAM : RMDStorageType.FILE;
|
||||
|
||||
if(parameters.size() != 2)
|
||||
throw new UserException("Invalid syntax for -B (reference-ordered data) input flag. " +
|
||||
"Please use the following syntax when providing reference-ordered " +
|
||||
"data: -B:<name>,<type> <filename>.");
|
||||
// Assume that if tags are present, those tags are name and type.
|
||||
// Name is always first, followed by type.
|
||||
List<String> parameters = parser.getTags(binding);
|
||||
String name = parameters.get(0);
|
||||
String type = parameters.get(1);
|
||||
rodBindings.add(new RMDTriplet(name,type,binding));
|
||||
rodBindings.add(new RMDTriplet(name,type,fileName,storageType));
|
||||
}
|
||||
|
||||
if (argCollection.DBSNPFile != null) {
|
||||
if(argCollection.DBSNPFile.toLowerCase().contains("vcf"))
|
||||
throw new UserException("--DBSNP (-D) argument currently does not support VCF. To use dbSNP in VCF format, please use -B:dbsnp,vcf <filename>.");
|
||||
rodBindings.add(new RMDTriplet(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, "dbsnp", argCollection.DBSNPFile));
|
||||
|
||||
String fileName = expandFileName(argCollection.DBSNPFile);
|
||||
RMDStorageType storageType = fileName.toLowerCase().endsWith("stdin") ? RMDStorageType.STREAM : RMDStorageType.FILE;
|
||||
|
||||
rodBindings.add(new RMDTriplet(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME,"dbsnp",fileName,storageType));
|
||||
}
|
||||
|
||||
return rodBindings;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Expand any special characters that appear in the filename. Right now, '-' is expanded to
|
||||
* '/dev/stdin' only, but in the future, special characters like '~' and '*' that are passed
|
||||
* directly to the command line in some circumstances could be expanded as well. Be careful
|
||||
* when adding UNIX-isms.
|
||||
* @param argument the text appearing on the command-line.
|
||||
* @return An expanded string suitable for opening by Java/UNIX file handling utilities.
|
||||
*/
|
||||
private String expandFileName(String argument) {
|
||||
if(argument.trim().equals("-"))
|
||||
return "/dev/stdin";
|
||||
return argument;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -148,7 +148,7 @@ public class GenomeAnalysisEngine {
|
|||
/**
|
||||
* A processed collection of SAM reader identifiers.
|
||||
*/
|
||||
private Collection<SAMReaderID> samReaderIDs;
|
||||
private Collection<SAMReaderID> samReaderIDs = Collections.emptyList();
|
||||
|
||||
/**
|
||||
* Set the SAM/BAM files over which to traverse.
|
||||
|
|
@ -334,12 +334,12 @@ public class GenomeAnalysisEngine {
|
|||
*
|
||||
* @param rods Reference-ordered data to load.
|
||||
*/
|
||||
protected void validateSuppliedReferenceOrderedData(List<RMDTrack> rods) {
|
||||
protected void validateSuppliedReferenceOrderedData(List<ReferenceOrderedDataSource> rods) {
|
||||
// Check to make sure that all required metadata is present.
|
||||
List<RMD> allRequired = WalkerManager.getRequiredMetaData(walker);
|
||||
for (RMD required : allRequired) {
|
||||
boolean found = false;
|
||||
for (RMDTrack rod : rods) {
|
||||
for (ReferenceOrderedDataSource rod : rods) {
|
||||
if (rod.matchesNameAndRecordType(required.name(), required.type()))
|
||||
found = true;
|
||||
}
|
||||
|
|
@ -349,7 +349,7 @@ public class GenomeAnalysisEngine {
|
|||
}
|
||||
|
||||
// Check to see that no forbidden rods are present.
|
||||
for (RMDTrack rod : rods) {
|
||||
for (ReferenceOrderedDataSource rod : rods) {
|
||||
if (!WalkerManager.isAllowed(walker, rod))
|
||||
throw new ArgumentException(String.format("Walker of type %s does not allow access to metadata: %s", walker.getClass(), rod.getName()));
|
||||
}
|
||||
|
|
@ -614,7 +614,7 @@ public class GenomeAnalysisEngine {
|
|||
for (String str : rodNames.keySet())
|
||||
if (str.equals(rodName)) {
|
||||
logger.info("Adding interval list from track (ROD) named " + rodName);
|
||||
RMDIntervalGenerator intervalGenerator = new RMDIntervalGenerator(rodNames.get(str).getReferenceOrderedData());
|
||||
RMDIntervalGenerator intervalGenerator = new RMDIntervalGenerator(rodNames.get(str));
|
||||
ret.addAll(intervalGenerator.toGenomeLocList());
|
||||
}
|
||||
}
|
||||
|
|
@ -661,14 +661,7 @@ public class GenomeAnalysisEngine {
|
|||
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
|
||||
|
||||
// set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
|
||||
RMDTrackBuilder manager = new RMDTrackBuilder(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
|
||||
List<RMDTrack> tracks = manager.getReferenceMetaDataSources(this,argCollection);
|
||||
validateSuppliedReferenceOrderedData(tracks);
|
||||
|
||||
// validate all the sequence dictionaries against the reference
|
||||
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), tracks, manager);
|
||||
|
||||
rodDataSources = getReferenceOrderedDataSources(tracks);
|
||||
rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -788,10 +781,10 @@ public class GenomeAnalysisEngine {
|
|||
*
|
||||
* @param reads Reads data source.
|
||||
* @param reference Reference data source.
|
||||
* @param tracks a collection of the reference ordered data tracks
|
||||
* @param rods a collection of the reference ordered data tracks
|
||||
*/
|
||||
private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<RMDTrack> tracks, RMDTrackBuilder manager) {
|
||||
if ((reads.isEmpty() && (tracks == null || tracks.isEmpty())) || reference == null )
|
||||
private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, RMDTrackBuilder manager) {
|
||||
if ((reads.isEmpty() && (rods == null || rods.isEmpty())) || reference == null )
|
||||
return;
|
||||
|
||||
// Compile a set of sequence names that exist in the reference file.
|
||||
|
|
@ -815,9 +808,8 @@ public class GenomeAnalysisEngine {
|
|||
SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, "reads", readsDictionary, "reference", referenceDictionary);
|
||||
}
|
||||
|
||||
// compare the tracks to the reference, if they have a sequence dictionary
|
||||
for (RMDTrack track : tracks)
|
||||
manager.validateTrackSequenceDictionary(track.getName(),track.getSequenceDictionary(),referenceDictionary);
|
||||
for (ReferenceOrderedDataSource rod : rods)
|
||||
manager.validateTrackSequenceDictionary(rod.getName(),rod.getSequenceDictionary(),referenceDictionary);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -862,18 +854,34 @@ public class GenomeAnalysisEngine {
|
|||
/**
|
||||
* Open the reference-ordered data sources.
|
||||
*
|
||||
* @param rods the reference order data to execute using
|
||||
* @param referenceMetaDataFiles collection of RMD descriptors to load and validate.
|
||||
* @param sequenceDictionary GATK-wide sequnce dictionary to use for validation.
|
||||
* @param genomeLocParser to use when creating and validating GenomeLocs.
|
||||
* @param validationExclusionType potentially indicate which validations to include / exclude.
|
||||
*
|
||||
* @return A list of reference-ordered data sources.
|
||||
*/
|
||||
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(List<RMDTrack> rods) {
|
||||
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(Collection<RMDTriplet> referenceMetaDataFiles,
|
||||
SAMSequenceDictionary sequenceDictionary,
|
||||
GenomeLocParser genomeLocParser,
|
||||
ValidationExclusion.TYPE validationExclusionType) {
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(referenceMetaDataFiles,sequenceDictionary,genomeLocParser,validationExclusionType);
|
||||
// try and make the tracks given their requests
|
||||
// create of live instances of the tracks
|
||||
List<RMDTrack> tracks = new ArrayList<RMDTrack>();
|
||||
|
||||
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||
for (RMDTrack rod : rods)
|
||||
dataSources.add(new ReferenceOrderedDataSource(referenceMetaDataFiles,
|
||||
referenceDataSource.getReference().getSequenceDictionary(),
|
||||
for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
|
||||
dataSources.add(new ReferenceOrderedDataSource(fileDescriptor,
|
||||
builder,
|
||||
sequenceDictionary,
|
||||
genomeLocParser,
|
||||
argCollection.unsafe,
|
||||
rod,
|
||||
flashbackData()));
|
||||
|
||||
// validation: check to make sure everything the walker needs is present, and that all sequence dictionaries match.
|
||||
validateSuppliedReferenceOrderedData(dataSources);
|
||||
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), dataSources, builder);
|
||||
|
||||
return dataSources;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk;
|
|||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.filters.FilterManager;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
|
|
@ -236,7 +237,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
* @param rod Source to check.
|
||||
* @return True if the walker forbids this data type. False otherwise.
|
||||
*/
|
||||
public static boolean isAllowed(Class<? extends Walker> walkerClass, RMDTrack rod) {
|
||||
public static boolean isAllowed(Class<? extends Walker> walkerClass, ReferenceOrderedDataSource rod) {
|
||||
Allows allowsDataSource = getWalkerAllowed(walkerClass);
|
||||
|
||||
// Allows is less restrictive than requires. If an allows
|
||||
|
|
@ -263,7 +264,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
* @param rod Source to check.
|
||||
* @return True if the walker forbids this data type. False otherwise.
|
||||
*/
|
||||
public static boolean isAllowed(Walker walker, RMDTrack rod) {
|
||||
public static boolean isAllowed(Walker walker, ReferenceOrderedDataSource rod) {
|
||||
return isAllowed(walker.getClass(), rod);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ public class GATKArgumentCollection {
|
|||
// parameters and their defaults
|
||||
@ElementList(required = false)
|
||||
@Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
|
||||
public List<File> samFiles = new ArrayList<File>();
|
||||
public List<String> samFiles = new ArrayList<String>();
|
||||
|
||||
// parameters and their defaults
|
||||
@ElementList(required = false)
|
||||
|
|
|
|||
|
|
@ -1,25 +1,20 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broad.tribble.FeatureSource;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.lang.reflect.Type;
|
||||
import java.util.List;
|
||||
/**
|
||||
* User: hanna
|
||||
|
|
@ -41,7 +36,22 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
/**
|
||||
* The reference-ordered data itself.
|
||||
*/
|
||||
private final RMDTrack rod;
|
||||
private final RMDTriplet fileDescriptor;
|
||||
|
||||
/**
|
||||
* The header associated with this VCF, if any.
|
||||
*/
|
||||
private final Object header;
|
||||
|
||||
/**
|
||||
* The private sequence dictionary associated with this RMD.
|
||||
*/
|
||||
private final SAMSequenceDictionary sequenceDictionary;
|
||||
|
||||
/**
|
||||
* The builder to use when constructing new reference-ordered data readers.
|
||||
*/
|
||||
private final RMDTrackBuilder builder;
|
||||
|
||||
/**
|
||||
* A pool of iterators for navigating through the genome.
|
||||
|
|
@ -50,21 +60,28 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
|
||||
/**
|
||||
* Create a new reference-ordered data source.
|
||||
* @param rod the reference ordered data
|
||||
*/
|
||||
public ReferenceOrderedDataSource(Collection<RMDTriplet> refMetaDataDescriptors,
|
||||
SAMSequenceDictionary sequenceDictionary,
|
||||
public ReferenceOrderedDataSource(RMDTriplet fileDescriptor,
|
||||
RMDTrackBuilder builder,
|
||||
SAMSequenceDictionary referenceSequenceDictionary,
|
||||
GenomeLocParser genomeLocParser,
|
||||
ValidationExclusion.TYPE validationExclusionType,
|
||||
RMDTrack rod, boolean flashbackData ) {
|
||||
this.rod = rod;
|
||||
if (rod.supportsQuery())
|
||||
iteratorPool = new ReferenceOrderedQueryDataPool(sequenceDictionary,
|
||||
genomeLocParser,
|
||||
new RMDTrackBuilder(refMetaDataDescriptors,sequenceDictionary,genomeLocParser,validationExclusionType),
|
||||
rod);
|
||||
else
|
||||
iteratorPool = new ReferenceOrderedDataPool(sequenceDictionary,genomeLocParser,rod, flashbackData );
|
||||
boolean flashbackData ) {
|
||||
this.fileDescriptor = fileDescriptor;
|
||||
this.builder = builder;
|
||||
if (fileDescriptor.getStorageType() != RMDTriplet.RMDStorageType.STREAM) {
|
||||
iteratorPool = new ReferenceOrderedQueryDataPool(fileDescriptor,
|
||||
builder,
|
||||
referenceSequenceDictionary,
|
||||
genomeLocParser);
|
||||
header = ((ReferenceOrderedQueryDataPool)iteratorPool).getHeader();
|
||||
this.sequenceDictionary = ((ReferenceOrderedQueryDataPool)iteratorPool).getSequenceDictionary();
|
||||
}
|
||||
else {
|
||||
RMDTrack track = builder.createInstanceOfTrack(fileDescriptor);
|
||||
header = track.getHeader();
|
||||
this.sequenceDictionary = track.getSequenceDictionary();
|
||||
iteratorPool = new ReferenceOrderedDataPool(track,referenceSequenceDictionary,genomeLocParser,flashbackData);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -72,15 +89,43 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
* @return Name of the underlying rod.
|
||||
*/
|
||||
public String getName() {
|
||||
return this.rod.getName();
|
||||
return fileDescriptor.getName();
|
||||
}
|
||||
|
||||
public Class getType() {
|
||||
return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
|
||||
}
|
||||
|
||||
public Class getRecordType() {
|
||||
return builder.createCodec(getType(),getName()).getFeatureType();
|
||||
}
|
||||
|
||||
public File getFile() {
|
||||
return new File(fileDescriptor.getFile());
|
||||
}
|
||||
|
||||
public Object getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the underlying reference-ordered data.
|
||||
* @return the underlying rod.
|
||||
* Retrieves the sequence dictionary created by this ROD.
|
||||
* @return
|
||||
*/
|
||||
public RMDTrack getReferenceOrderedData() {
|
||||
return this.rod;
|
||||
public SAMSequenceDictionary getSequenceDictionary() {
|
||||
return sequenceDictionary;
|
||||
}
|
||||
|
||||
/**
|
||||
* helper function for determining if we are the same track based on name and record type
|
||||
*
|
||||
* @param name the name to match
|
||||
* @param type the type to match
|
||||
*
|
||||
* @return true on a match, false if the name or type is different
|
||||
*/
|
||||
public boolean matchesNameAndRecordType(String name, Type type) {
|
||||
return (name.equals(fileDescriptor.getName()) && (type.getClass().isAssignableFrom(getType().getClass())));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -120,12 +165,12 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
* A pool of reference-ordered data iterators.
|
||||
*/
|
||||
class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> {
|
||||
private final RMDTrack rod;
|
||||
private final RMDTrack track;
|
||||
boolean flashbackData = false;
|
||||
public ReferenceOrderedDataPool( SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, RMDTrack rod, boolean flashbackData ) {
|
||||
public ReferenceOrderedDataPool( RMDTrack track, SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, boolean flashbackData ) {
|
||||
super(sequenceDictionary,genomeLocParser);
|
||||
this.track = track;
|
||||
this.flashbackData = flashbackData;
|
||||
this.rod = rod;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -134,7 +179,7 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
|
|||
* @return The newly created resource.
|
||||
*/
|
||||
public LocationAwareSeekableRODIterator createNewResource() {
|
||||
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(sequenceDictionary,genomeLocParser,rod.getIterator());
|
||||
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,track.getIterator());
|
||||
return (flashbackData) ? new FlashBackIterator(iter) : iter;
|
||||
}
|
||||
|
||||
|
|
@ -183,61 +228,81 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
|
|||
* kill the buffers in the iterator
|
||||
*/
|
||||
public void closeResource( LocationAwareSeekableRODIterator resource ) {
|
||||
if (resource instanceof FlashBackIterator) ((FlashBackIterator)resource).close();
|
||||
if (resource instanceof FlashBackIterator) ((FlashBackIterator)resource).close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* a data pool for the new query based RODs
|
||||
*/
|
||||
class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureSource, LocationAwareSeekableRODIterator> {
|
||||
class ReferenceOrderedQueryDataPool extends ResourcePool<RMDTrack,LocationAwareSeekableRODIterator> {
|
||||
// the reference-ordered data itself.
|
||||
private final RMDTrack rod;
|
||||
private final RMDTriplet fileDescriptor;
|
||||
|
||||
// our tribble track builder
|
||||
private final RMDTrackBuilder builder;
|
||||
|
||||
public ReferenceOrderedQueryDataPool( SAMSequenceDictionary sequenceDictionary, GenomeLocParser genomeLocParser, RMDTrackBuilder builder, RMDTrack rod ) {
|
||||
super(sequenceDictionary,genomeLocParser);
|
||||
this.rod = rod;
|
||||
/**
|
||||
* The header from this RMD, if present.
|
||||
*/
|
||||
private final Object header;
|
||||
|
||||
/**
|
||||
* The sequence dictionary from this ROD. If no sequence dictionary is present, this dictionary will be the same as the reference's.
|
||||
*/
|
||||
private final SAMSequenceDictionary sequenceDictionary;
|
||||
|
||||
public ReferenceOrderedQueryDataPool(RMDTriplet fileDescriptor, RMDTrackBuilder builder, SAMSequenceDictionary referenceSequenceDictionary, GenomeLocParser genomeLocParser) {
|
||||
super(referenceSequenceDictionary,genomeLocParser);
|
||||
this.fileDescriptor = fileDescriptor;
|
||||
this.builder = builder;
|
||||
// a little bit of a hack, but it saves us from re-reading the index from the file
|
||||
this.addNewResource(rod.getReader());
|
||||
|
||||
// prepopulate one RMDTrack
|
||||
RMDTrack track = builder.createInstanceOfTrack(fileDescriptor);
|
||||
this.addNewResource(track);
|
||||
|
||||
// Pull the proper header and sequence dictionary from the prepopulated track.
|
||||
this.header = track.getHeader();
|
||||
this.sequenceDictionary = track.getSequenceDictionary();
|
||||
}
|
||||
|
||||
public Object getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
public SAMSequenceDictionary getSequenceDictionary() {
|
||||
return sequenceDictionary;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FeatureSource createNewResource() {
|
||||
return builder.createFeatureReader(rod.getType(),rod.getFile()).first;
|
||||
protected RMDTrack createNewResource() {
|
||||
return builder.createInstanceOfTrack(fileDescriptor);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FeatureSource selectBestExistingResource(DataStreamSegment segment, List<FeatureSource> availableResources) {
|
||||
for (FeatureSource reader : availableResources)
|
||||
protected RMDTrack selectBestExistingResource(DataStreamSegment segment, List<RMDTrack> availableResources) {
|
||||
for (RMDTrack reader : availableResources)
|
||||
if (reader != null) return reader;
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected LocationAwareSeekableRODIterator createIteratorFromResource(DataStreamSegment position, FeatureSource resource) {
|
||||
protected LocationAwareSeekableRODIterator createIteratorFromResource(DataStreamSegment position, RMDTrack track) {
|
||||
try {
|
||||
if (position instanceof MappedStreamSegment) {
|
||||
GenomeLoc pos = ((MappedStreamSegment) position).locus;
|
||||
return new SeekableRODIterator(sequenceDictionary,genomeLocParser,new FeatureToGATKFeatureIterator(genomeLocParser,resource.query(pos.getContig(),(int) pos.getStart(), (int) pos.getStop()),rod.getName()));
|
||||
return new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,track.query(pos));
|
||||
} else {
|
||||
return new SeekableRODIterator(sequenceDictionary,genomeLocParser,new FeatureToGATKFeatureIterator(genomeLocParser,resource.iterator(),rod.getName()));
|
||||
return new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,track.getIterator());
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new ReviewedStingException("Unable to create iterator for rod named " + rod.getName(),e);
|
||||
throw new ReviewedStingException("Unable to create iterator for rod named " + fileDescriptor.getName(),e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void closeResource(FeatureSource resource) {
|
||||
try {
|
||||
resource.close();
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotReadInputFile("Unable to close reader for rod named " + rod.getName(),e);
|
||||
}
|
||||
protected void closeResource(RMDTrack track) {
|
||||
track.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ abstract class ResourcePool <T,I extends Iterator> {
|
|||
/**
|
||||
* Sequence dictionary.
|
||||
*/
|
||||
protected final SAMSequenceDictionary sequenceDictionary;
|
||||
protected final SAMSequenceDictionary referenceSequenceDictionary;
|
||||
|
||||
/**
|
||||
* Builder/parser for GenomeLocs.
|
||||
|
|
@ -52,8 +52,8 @@ abstract class ResourcePool <T,I extends Iterator> {
|
|||
*/
|
||||
private Map<I,T> resourceAssignments = new HashMap<I,T>();
|
||||
|
||||
protected ResourcePool(SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser) {
|
||||
this.sequenceDictionary = sequenceDictionary;
|
||||
protected ResourcePool(SAMSequenceDictionary referenceSequenceDictionary,GenomeLocParser genomeLocParser) {
|
||||
this.referenceSequenceDictionary = referenceSequenceDictionary;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,15 @@ public class SAMReaderID {
|
|||
this.tags = tags;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an identifier for a SAM file based on read.
|
||||
* @param samFileName The source filename for SAM data.
|
||||
* @param tags tags to use when creating a reader ID.
|
||||
*/
|
||||
public SAMReaderID(String samFileName, List<String> tags) {
|
||||
this(new File(samFileName),tags);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the tags associated with the given BAM file.
|
||||
* @return A collection of the tags associated with this file.
|
||||
|
|
|
|||
|
|
@ -80,23 +80,6 @@ public class RMDTrack {
|
|||
return file;
|
||||
}
|
||||
|
||||
public Class getRecordType() {
|
||||
return recordType;
|
||||
}
|
||||
|
||||
/**
|
||||
* helper function for determining if we are the same track based on name and record type
|
||||
*
|
||||
* @param name the name to match
|
||||
* @param type the type to match
|
||||
*
|
||||
* @return true on a match, false if the name or type is different
|
||||
*/
|
||||
public boolean matchesNameAndRecordType(String name, Type type) {
|
||||
return (name.equals(this.name) && (type.getClass().isAssignableFrom(this.type.getClass())));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a track
|
||||
*
|
||||
|
|
@ -130,15 +113,6 @@ public class RMDTrack {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* do we support the query interface?
|
||||
*
|
||||
* @return true
|
||||
*/
|
||||
public boolean supportsQuery() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
|
||||
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,14 +33,12 @@ import org.broad.tribble.index.Index;
|
|||
import org.broad.tribble.index.IndexFactory;
|
||||
import org.broad.tribble.source.BasicFeatureSource;
|
||||
import org.broad.tribble.util.LittleEndianOutputStream;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
|
@ -55,6 +53,7 @@ import java.io.*;
|
|||
import java.util.*;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
|
|
@ -146,45 +145,48 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
/**
|
||||
* create a RMDTrack of the specified type
|
||||
*
|
||||
* @param targetClass the target class of track
|
||||
* @param name what to call the track
|
||||
* @param inputFile the input file
|
||||
* @param fileDescriptor a description of the type of track to build.
|
||||
*
|
||||
* @return an instance of the track
|
||||
* @throws RMDTrackCreationException
|
||||
* if we don't know of the target class or we couldn't create it
|
||||
*/
|
||||
public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException {
|
||||
public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) throws RMDTrackCreationException {
|
||||
String name = fileDescriptor.getName();
|
||||
File inputFile = new File(fileDescriptor.getFile());
|
||||
|
||||
Class featureCodecClass = getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
|
||||
if (featureCodecClass == null)
|
||||
throw new UserException.BadArgumentValue("-B",fileDescriptor.getType());
|
||||
|
||||
// return a feature reader track
|
||||
Pair<FeatureSource, SAMSequenceDictionary> pair = createFeatureReader(targetClass, name, inputFile);
|
||||
if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file");
|
||||
return new RMDTrack(targetClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(targetClass,name));
|
||||
}
|
||||
|
||||
/**
|
||||
* create a tribble feature reader class, given the target class and the input file
|
||||
* @param targetClass the target class, of a Tribble Codec type
|
||||
* @param inputFile the input file, that corresponds to the feature type
|
||||
* @return a pair of <BasicFeatureSource, SAMSequenceDictionary>
|
||||
*/
|
||||
public Pair<FeatureSource, SAMSequenceDictionary> createFeatureReader(Class targetClass, File inputFile) {
|
||||
return createFeatureReader(targetClass, "anonymous", inputFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a feature reader of the specified type
|
||||
* @param targetClass the target codec type
|
||||
* @param name the target name
|
||||
* @param inputFile the input file to create the track from (of the codec type)
|
||||
* @return the FeatureReader instance
|
||||
*/
|
||||
public Pair<FeatureSource, SAMSequenceDictionary> createFeatureReader(Class targetClass, String name, File inputFile) {
|
||||
Pair<FeatureSource, SAMSequenceDictionary> pair;
|
||||
if (inputFile.getAbsolutePath().endsWith(".gz"))
|
||||
pair = createBasicFeatureSourceNoAssumedIndex(targetClass, name, inputFile);
|
||||
pair = createTabixIndexedFeatureSource(featureCodecClass, name, inputFile);
|
||||
else
|
||||
pair = getFeatureSource(targetClass, name, inputFile);
|
||||
return pair;
|
||||
pair = getFeatureSource(featureCodecClass, name, inputFile, fileDescriptor.getStorageType());
|
||||
if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file");
|
||||
return new RMDTrack(featureCodecClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(featureCodecClass,name));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method simplifying track creation. Assume unnamed track based on a file rather than a stream.
|
||||
* @param targetClass Type of Tribble class to build.
|
||||
* @param inputFile Input file type to use.
|
||||
* @return An RMDTrack, suitable for accessing reference metadata.
|
||||
*/
|
||||
public RMDTrack createInstanceOfTrack(Class targetClass, File inputFile) {
|
||||
// TODO: Update RMDTriplet to contain an actual class object rather than a name to avoid these gymnastics.
|
||||
String typeName = null;
|
||||
for(Map.Entry<String,Class> trackType: getAvailableTrackNamesAndTypes().entrySet()) {
|
||||
if(trackType.getValue().equals(targetClass))
|
||||
typeName = trackType.getKey();
|
||||
}
|
||||
|
||||
if(typeName == null)
|
||||
throw new ReviewedStingException("Unable to find type name for class " + targetClass.getName());
|
||||
|
||||
return createInstanceOfTrack(new RMDTriplet("anonymous",typeName,inputFile.getAbsolutePath(),RMDStorageType.FILE));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -197,7 +199,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
* @param inputFile the file to load
|
||||
* @return a feature reader implementation
|
||||
*/
|
||||
private Pair<FeatureSource, SAMSequenceDictionary> createBasicFeatureSourceNoAssumedIndex(Class targetClass, String name, File inputFile) {
|
||||
private Pair<FeatureSource, SAMSequenceDictionary> createTabixIndexedFeatureSource(Class targetClass, String name, File inputFile) {
|
||||
// we might not know the index type, try loading with the default reader constructor
|
||||
logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file");
|
||||
try {
|
||||
|
|
@ -213,7 +215,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
* @param name the name to assign this codec
|
||||
* @return the feature codec itself
|
||||
*/
|
||||
private FeatureCodec createCodec(Class targetClass, String name) {
|
||||
public FeatureCodec createCodec(Class targetClass, String name) {
|
||||
FeatureCodec codex = this.createByType(targetClass);
|
||||
if ( codex instanceof NameAwareCodec )
|
||||
((NameAwareCodec)codex).setName(name);
|
||||
|
|
@ -227,33 +229,48 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
* @param targetClass the target class
|
||||
* @param name the name of the codec
|
||||
* @param inputFile the tribble file to parse
|
||||
* @param storageType How the RMD is streamed into the input file.
|
||||
* @return the input file as a FeatureReader
|
||||
*/
|
||||
private Pair<FeatureSource, SAMSequenceDictionary> getFeatureSource(Class targetClass, String name, File inputFile) {
|
||||
Pair<FeatureSource, SAMSequenceDictionary> reader;
|
||||
try {
|
||||
Index index = loadIndex(inputFile, createCodec(targetClass, name));
|
||||
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
|
||||
catch ( ReviewedStingException e) { }
|
||||
private Pair<FeatureSource, SAMSequenceDictionary> getFeatureSource(Class targetClass, String name, File inputFile, RMDStorageType storageType) {
|
||||
// Feature source and sequence dictionary to use as the ultimate reference
|
||||
FeatureSource featureSource = null;
|
||||
SAMSequenceDictionary sequenceDictionary = null;
|
||||
|
||||
SAMSequenceDictionary dictFromIndex = getSequenceDictionaryFromProperties(index);
|
||||
FeatureCodec codec = createCodec(targetClass, name);
|
||||
|
||||
// if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match
|
||||
if (dictFromIndex.size() == 0 && dict != null) {
|
||||
File indexFile = Tribble.indexFile(inputFile);
|
||||
setIndexSequenceDictionary(inputFile,index,dict,indexFile,true);
|
||||
dictFromIndex = getSequenceDictionaryFromProperties(index);
|
||||
// Detect whether or not this source should be indexed.
|
||||
boolean canBeIndexed = (storageType == RMDStorageType.FILE);
|
||||
|
||||
if(canBeIndexed) {
|
||||
try {
|
||||
Index index = loadIndex(inputFile, codec);
|
||||
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
|
||||
catch (ReviewedStingException e) { }
|
||||
|
||||
sequenceDictionary = getSequenceDictionaryFromProperties(index);
|
||||
|
||||
// if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match
|
||||
if (sequenceDictionary.size() == 0 && dict != null) {
|
||||
File indexFile = Tribble.indexFile(inputFile);
|
||||
setIndexSequenceDictionary(inputFile,index,dict,indexFile,true);
|
||||
sequenceDictionary = getSequenceDictionaryFromProperties(index);
|
||||
}
|
||||
|
||||
featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, codec);
|
||||
}
|
||||
catch (TribbleException e) {
|
||||
throw new UserException(e.getMessage());
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(inputFile, "unable to write Tribble index", e);
|
||||
}
|
||||
|
||||
FeatureSource source = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(targetClass, name));
|
||||
//source = new CachingFeatureSource(source, 100, 100000);
|
||||
reader = new Pair<FeatureSource, SAMSequenceDictionary>(source, dictFromIndex);
|
||||
} catch (TribbleException e) {
|
||||
throw new UserException(e.getMessage());
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(inputFile, "unable to write Tribble index", e);
|
||||
}
|
||||
return reader;
|
||||
else {
|
||||
featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),codec,false);
|
||||
}
|
||||
|
||||
return new Pair<FeatureSource,SAMSequenceDictionary>(featureSource,sequenceDictionary);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -402,29 +419,6 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
return names;
|
||||
}
|
||||
|
||||
/**
|
||||
* find the associated reference meta data
|
||||
*
|
||||
* @param argCollection the input arguments to the GATK.
|
||||
* @param engine the GATK engine to bind the tracks to
|
||||
*
|
||||
* @return a list of RMDTracks, one for each -B option
|
||||
*/
|
||||
public List<RMDTrack> getReferenceMetaDataSources(GenomeAnalysisEngine engine, GATKArgumentCollection argCollection) {
|
||||
// try and make the tracks given their requests
|
||||
// create of live instances of the tracks
|
||||
List<RMDTrack> tracks = new ArrayList<RMDTrack>();
|
||||
|
||||
// create instances of each of the requested types
|
||||
for (RMDTriplet trip : inputs) {
|
||||
Class featureCodecClass = getAvailableTrackNamesAndTypes().get(trip.getType().toUpperCase());
|
||||
if (featureCodecClass == null)
|
||||
throw new UserException.BadArgumentValue("-B",trip.getType());
|
||||
tracks.add(createInstanceOfTrack(featureCodecClass, trip.getName(), new File(trip.getFile())));
|
||||
}
|
||||
return tracks;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
// static functions to work with the sequence dictionaries of indexes
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -16,15 +16,15 @@ import java.util.*;
|
|||
* Creates an interval list, given an RMDTrack
|
||||
*/
|
||||
public class RMDIntervalGenerator {
|
||||
public RMDTrack track;
|
||||
public ReferenceOrderedDataSource dataSource;
|
||||
|
||||
/**
|
||||
* create a interval representation of a ROD track
|
||||
* @param track the track
|
||||
* @param dataSource the track
|
||||
*/
|
||||
public RMDIntervalGenerator(RMDTrack track) {
|
||||
if (track == null) throw new IllegalArgumentException("Track cannot be null");
|
||||
this.track = track;
|
||||
public RMDIntervalGenerator(ReferenceOrderedDataSource dataSource) {
|
||||
if (dataSource == null) throw new IllegalArgumentException("Data source cannot be null");
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -32,10 +32,10 @@ public class RMDIntervalGenerator {
|
|||
* @return a list of genome locations
|
||||
*/
|
||||
public List<GenomeLoc> toGenomeLocList() {
|
||||
Iterator<GATKFeature> iter = track.getIterator();
|
||||
Iterator<RODRecordList> iter = dataSource.seek((GenomeLoc)null);
|
||||
List<GenomeLoc> locations = new ArrayList<GenomeLoc>();
|
||||
while (iter.hasNext()) {
|
||||
GATKFeature feature = iter.next();
|
||||
RODRecordList feature = iter.next();
|
||||
GenomeLoc loc = feature.getLocation();
|
||||
if (loc != null) locations.add(loc);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,27 +24,55 @@
|
|||
package org.broadinstitute.sting.gatk.refdata.utils;
|
||||
|
||||
|
||||
/** a helper class to manage our triplets of data for the -B command line option (name, type, file) */
|
||||
/**
|
||||
* a helper class to manage our triplets of data for the -B command line option (name, type, file)
|
||||
* TODO: The presence of four datapoints here suggests that this class' name isn't sufficient to describe its function. Rename.
|
||||
*/
|
||||
public class RMDTriplet {
|
||||
private String name;
|
||||
private String type;
|
||||
private String file;
|
||||
public enum RMDStorageType { FILE, STREAM };
|
||||
|
||||
public RMDTriplet(String name, String type, String file) {
|
||||
private final String name;
|
||||
private final String type;
|
||||
private final String file;
|
||||
private final RMDStorageType storageType;
|
||||
|
||||
public RMDTriplet(final String name, final String type, final String file, final RMDStorageType storageType) {
|
||||
this.name = name;
|
||||
this.type = type;
|
||||
this.file = file;
|
||||
this.storageType = storageType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the name of this track. RefMetaDataTrackers can use this identifier to retrieve data of a certain type.
|
||||
* @return Name associated with this track.
|
||||
*/
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the type of this track. Informs the GATK how to parse this file type.
|
||||
* @return Type associated with this track.
|
||||
*/
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the filename representing this track. Data is loaded from this file.
|
||||
* @return Filename of the RMD.
|
||||
*/
|
||||
public String getFile() {
|
||||
return file;
|
||||
}
|
||||
|
||||
/**
|
||||
* The type of storage being used for this metadata track. Right now, can be either a
|
||||
* file type (can be indexed) or a stream type (can't be indexed).
|
||||
* @return Storage type for this RMD 'triplet'.
|
||||
*/
|
||||
public RMDStorageType getStorageType() {
|
||||
return storageType;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
throw new UserException.BadInput("No dbSNP rod was provided, but one is needed to decipher the correct indel alleles from the HapMap records");
|
||||
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe);
|
||||
dbsnpIterator = builder.createInstanceOfTrack(DbSNPCodec.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, dbsnpDataSource.getReferenceOrderedData().getFile()).getIterator();
|
||||
dbsnpIterator = builder.createInstanceOfTrack(DbSNPCodec.class, dbsnpDataSource.getFile()).getIterator();
|
||||
// Note that we should really use some sort of seekable iterator here so that the search doesn't take forever
|
||||
// (but it's complicated because the hapmap location doesn't match the dbsnp location, so we don't know where to seek to)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -94,11 +94,11 @@ public class GenomicAnnotator extends RodWalker<Integer, Integer> implements Tre
|
|||
final Set<String> allFullyQualifiedColumnNames = new LinkedHashSet<String>();
|
||||
final Set<String> allBindingNames = new LinkedHashSet<String>();
|
||||
for(ReferenceOrderedDataSource ds : getToolkit().getRodDataSources()) {
|
||||
if(! ds.getReferenceOrderedData().getType().equals(AnnotatorInputTableCodec.class)) {
|
||||
if(! ds.getType().equals(AnnotatorInputTableCodec.class)) {
|
||||
continue; //skip all non-AnnotatorInputTable files.
|
||||
}
|
||||
final String bindingName = ds.getName();
|
||||
File file = ds.getReferenceOrderedData().getFile();
|
||||
File file = ds.getFile();
|
||||
allBindingNames.add(bindingName);
|
||||
try {
|
||||
final ArrayList<String> header = AnnotatorInputTableCodec.readHeader(file);
|
||||
|
|
|
|||
|
|
@ -172,14 +172,14 @@ public class TranscriptToGenomicInfo extends RodWalker<Integer, Integer> {
|
|||
|
||||
final ArrayList<String> header;
|
||||
try {
|
||||
header = AnnotatorInputTableCodec.readHeader(transcriptsDataSource.getReferenceOrderedData().getFile());
|
||||
header = AnnotatorInputTableCodec.readHeader(transcriptsDataSource.getFile());
|
||||
} catch(Exception e) {
|
||||
throw new UserException.MalformedFile(transcriptsDataSource.getReferenceOrderedData().getFile(), "Failed when attempting to read header from file", e);
|
||||
throw new UserException.MalformedFile(transcriptsDataSource.getFile(), "Failed when attempting to read header from file", e);
|
||||
}
|
||||
|
||||
for ( String columnName : GENE_NAME_COLUMNS ) {
|
||||
if ( !header.contains(columnName) )
|
||||
throw new UserException.CommandLineException("The column name '" + columnName + "' provided to -n doesn't match any of the column names in: " + transcriptsDataSource.getReferenceOrderedData().getFile());
|
||||
throw new UserException.CommandLineException("The column name '" + columnName + "' provided to -n doesn't match any of the column names in: " + transcriptsDataSource.getFile());
|
||||
}
|
||||
|
||||
//init outputColumnNames list
|
||||
|
|
|
|||
|
|
@ -96,9 +96,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
|
||||
|
||||
for( final ReferenceOrderedDataSource source : dataSources ) {
|
||||
final RMDTrack rod = source.getReferenceOrderedData();
|
||||
|
||||
if (rod.getName().equals(COMP_ROD_NAME)) {
|
||||
if (source.getName().equals(COMP_ROD_NAME)) {
|
||||
hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site"));
|
||||
hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site"));
|
||||
hInfo.add(new VCFInfoHeaderLine("AFH", 1, VCFHeaderLineType.Float, "Allele Number from Comparison ROD at this site"));
|
||||
|
|
|
|||
|
|
@ -26,13 +26,13 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.coverage;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broad.tribble.FeatureSource;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
|
|
@ -44,7 +44,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -406,15 +405,9 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partiti
|
|||
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
getToolkit().getArguments().unsafe);
|
||||
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,refSeqGeneList).first;
|
||||
try {
|
||||
return new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),
|
||||
refseq.iterator(),"refseq"));
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotReadInputFile(refSeqGeneList, "Unable to open file", e);
|
||||
}
|
||||
RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,refSeqGeneList);
|
||||
return new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),refseq.getIterator());
|
||||
}
|
||||
|
||||
private void printTargetSummary(PrintStream output, Pair<?,DepthOfCoverageStats> intervalStats) {
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
|
||||
List<ReferenceOrderedDataSource> dataSources = getToolkit().getRodDataSources();
|
||||
for ( ReferenceOrderedDataSource source : dataSources ) {
|
||||
if ( source.getReferenceOrderedData().getName().equals("mask") ) {
|
||||
if ( source.getName().equals("mask") ) {
|
||||
hInfo.add(new VCFFilterHeaderLine(MASK_NAME, "Overlaps a user-input mask"));
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import org.broad.tribble.FeatureSource;
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
|
|
@ -35,9 +34,10 @@ import org.broadinstitute.sting.gatk.filters.*;
|
|||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
|
|
@ -56,7 +56,6 @@ import org.broadinstitute.sting.utils.collections.CircularArray;
|
|||
import org.broadinstitute.sting.utils.collections.PrimitivePair;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.CommandLineUtils;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
|
||||
import java.io.*;
|
||||
|
|
@ -228,8 +227,8 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
for ( Map.Entry<String, String> commandLineArg : commandLineArgs.entrySet() )
|
||||
headerInfo.add(new VCFHeaderLine(String.format("IGv2_%s", commandLineArg.getKey()), commandLineArg.getValue()));
|
||||
// also, the list of input bams
|
||||
for ( File file : getToolkit().getArguments().samFiles )
|
||||
headerInfo.add(new VCFHeaderLine("IGv2_bam_file_used", file.getName()));
|
||||
for ( String fileName : getToolkit().getArguments().samFiles )
|
||||
headerInfo.add(new VCFHeaderLine("IGv2_bam_file_used", fileName));
|
||||
|
||||
return headerInfo;
|
||||
}
|
||||
|
|
@ -251,15 +250,11 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
getToolkit().getArguments().unsafe);
|
||||
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
|
||||
RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,new File(RefseqFileName));
|
||||
|
||||
try {
|
||||
refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),refseq.iterator(),"refseq"));
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotReadInputFile(new File(RefseqFileName), "Write failed", e);
|
||||
}
|
||||
refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
refseq.getIterator());
|
||||
}
|
||||
|
||||
if ( refseqIterator == null ) logger.info("No gene annotations available");
|
||||
|
|
|
|||
|
|
@ -164,7 +164,7 @@ public class CycleQualityWalker extends ReadWalker<Integer,Integer> {
|
|||
if ( HTML ) {
|
||||
out.println("<h3>Cycle Quality QC</h3>\n");
|
||||
out.println("File(s) analyzed: <br>");
|
||||
for ( File f : getToolkit().getArguments().samFiles) out.println(f.toString()+"<br>");
|
||||
for ( String fileName : getToolkit().getArguments().samFiles) out.println(fileName+"<br>");
|
||||
out.println("<br>");
|
||||
}
|
||||
if ( HTML ) out.println("<br><br>");
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ public class ProfileRodSystem extends RodWalker<Integer, Integer> {
|
|||
private File getRodFile() {
|
||||
List<ReferenceOrderedDataSource> rods = this.getToolkit().getRodDataSources();
|
||||
ReferenceOrderedDataSource rod = rods.get(0);
|
||||
return rod.getReferenceOrderedData().getFile();
|
||||
return rod.getFile();
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
|
|
|||
|
|
@ -187,9 +187,9 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
boolean foundDBSNP = false;
|
||||
for( ReferenceOrderedDataSource rod : this.getToolkit().getRodDataSources() ) {
|
||||
if( rod != null ) {
|
||||
if( rod.getReferenceOrderedData().getType().equals(DbSNPCodec.class) ||
|
||||
rod.getReferenceOrderedData().getType().equals(VCFCodec.class) ||
|
||||
rod.getReferenceOrderedData().getType().equals(BEDCodec.class) ) {
|
||||
if( rod.getType().equals(DbSNPCodec.class) ||
|
||||
rod.getType().equals(VCFCodec.class) ||
|
||||
rod.getType().equals(BEDCodec.class) ) {
|
||||
foundDBSNP = true;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
|||
ReferenceOrderedData snp_mask;
|
||||
if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) {
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe);
|
||||
CloseableIterator<GATKFeature> iter = builder.createInstanceOfTrack(DbSNPCodec.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator();
|
||||
CloseableIterator<GATKFeature> iter = builder.createInstanceOfTrack(DbSNPCodec.class,new java.io.File(SNP_MASK)).getIterator();
|
||||
snpMaskIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),iter);
|
||||
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
public void initialize() {
|
||||
for ( ReferenceOrderedDataSource source : getToolkit().getRodDataSources() ) {
|
||||
if ( source.getName().equals(TARGET_ROD_NAME) ) {
|
||||
file = source.getReferenceOrderedData().getFile();
|
||||
file = source.getFile();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,15 +1,16 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broad.tribble.FeatureSource;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broad.tribble.dbsnp.DbSNPCodec;
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.iterators.CloseableTribbleIterator;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.walkers.By;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
|
|
@ -34,7 +35,7 @@ import java.io.PrintStream;
|
|||
public class DbSNPWindowCounter extends LocusWalker<Integer, Long> {
|
||||
|
||||
// what we read in new tracks with
|
||||
private FeatureSource reader;
|
||||
private RMDTrack track;
|
||||
|
||||
@Output
|
||||
private PrintStream out;
|
||||
|
|
@ -50,12 +51,12 @@ public class DbSNPWindowCounter extends LocusWalker<Integer, Long> {
|
|||
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
getToolkit().getArguments().unsafe);
|
||||
reader = builder.createFeatureReader(DbSNPCodec.class,myDbSNPFile).first;
|
||||
track = builder.createInstanceOfTrack(DbSNPCodec.class,myDbSNPFile);
|
||||
}
|
||||
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
CloseableTribbleIterator<DbSNPFeature> dbSNPs;
|
||||
CloseableIterator<GATKFeature> dbSNPs;
|
||||
|
||||
// our upstream and downstream window locations
|
||||
int windowStart = (int)Math.max(context.getLocation().getStart()-windowSize,0);
|
||||
|
|
@ -63,19 +64,17 @@ public class DbSNPWindowCounter extends LocusWalker<Integer, Long> {
|
|||
|
||||
// query the dnSNP iterator
|
||||
try {
|
||||
dbSNPs = reader.query(context.getContig(),
|
||||
windowStart,
|
||||
windowStop);
|
||||
dbSNPs = track.query(getToolkit().getGenomeLocParser().createGenomeLoc(context.getContig(),windowStart,windowStop));
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotReadInputFile(myDbSNPFile, e);
|
||||
}
|
||||
|
||||
// count the number of dbSNPs we've seen
|
||||
int counter = 0;
|
||||
for (DbSNPFeature feature: dbSNPs)
|
||||
while(dbSNPs.hasNext())
|
||||
counter++;
|
||||
out.println(context.getContig() + ":" + windowStart + "-" + context.getContig() + ":" + windowStop + "=" +
|
||||
counter + " (dnSNP records)");
|
||||
counter + " (dbSNP records)");
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,18 +10,16 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
public class IndelAnnotator extends RodWalker<Integer,Long> {
|
||||
|
|
@ -38,15 +36,10 @@ public class IndelAnnotator extends RodWalker<Integer,Long> {
|
|||
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
getToolkit().getArguments().unsafe);
|
||||
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
|
||||
RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,new File(RefseqFileName));
|
||||
|
||||
try {
|
||||
refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),refseq.iterator(),"refseq"));
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotReadInputFile(RefseqFileName, e);
|
||||
}
|
||||
refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),refseq.getIterator());
|
||||
|
||||
logger.info("Using RefSeq annotations from " + RefseqFileName);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -86,11 +86,10 @@ public class VCF4WriterTestWalker extends RodWalker<Integer, Integer> {
|
|||
vcfWriter = new StandardVCFWriter(new File(OUTPUT_FILE));
|
||||
VCFHeader header = null;
|
||||
for( final ReferenceOrderedDataSource source : dataSources ) {
|
||||
final RMDTrack rod = source.getReferenceOrderedData();
|
||||
if(rod.getName().equalsIgnoreCase(INPUT_ROD_NAME)) {
|
||||
if(source.getName().equalsIgnoreCase(INPUT_ROD_NAME)) {
|
||||
|
||||
try {
|
||||
AsciiLineReader lineReader = new AsciiLineReader(new FileInputStream(rod.getFile().getAbsolutePath()));
|
||||
AsciiLineReader lineReader = new AsciiLineReader(new FileInputStream(source.getFile().getAbsolutePath()));
|
||||
header = (VCFHeader)vcf4codec.readHeader(lineReader);
|
||||
out.printf("Read %d header lines%n", header.getMetaData().size());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,9 +55,9 @@ public class RodSystemValidationWalker extends RodWalker<Integer,Integer> {
|
|||
// enumerate the list of ROD's we've loaded
|
||||
rodList = this.getToolkit().getRodDataSources();
|
||||
for (ReferenceOrderedDataSource rod : rodList) {
|
||||
out.println(rod.getName() + DIVIDER + rod.getReferenceOrderedData().getType());
|
||||
out.println(rod.getName() + DIVIDER + rod.getReferenceOrderedData().getFile());
|
||||
out.println(rod.getName() + DIVIDER + md5sum(rod.getReferenceOrderedData().getFile()));
|
||||
out.println(rod.getName() + DIVIDER + rod.getType());
|
||||
out.println(rod.getName() + DIVIDER + rod.getFile());
|
||||
out.println(rod.getName() + DIVIDER + md5sum(rod.getFile()));
|
||||
}
|
||||
out.println("Data:");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -57,9 +57,8 @@ public class VCFUtils {
|
|||
if ( rodNames != null && !rodNames.contains(source.getName()) )
|
||||
continue;
|
||||
|
||||
RMDTrack rod = source.getReferenceOrderedData();
|
||||
if ( rod.getHeader() != null && rod.getHeader() instanceof VCFHeader )
|
||||
data.put(rod.getName(), (VCFHeader)rod.getHeader());
|
||||
if ( source.getHeader() != null && source.getHeader() instanceof VCFHeader )
|
||||
data.put(source.getName(), (VCFHeader)source.getHeader());
|
||||
}
|
||||
|
||||
return data;
|
||||
|
|
@ -96,9 +95,8 @@ public class VCFUtils {
|
|||
if ( rodNames != null && !rodNames.contains(source.getName()) )
|
||||
continue;
|
||||
|
||||
RMDTrack rod = source.getReferenceOrderedData();
|
||||
if ( rod.getRecordType().equals(VariantContext.class)) {
|
||||
VCFHeader header = (VCFHeader)rod.getHeader();
|
||||
if ( source.getRecordType().equals(VariantContext.class)) {
|
||||
VCFHeader header = (VCFHeader)source.getHeader();
|
||||
if ( header != null )
|
||||
fields.addAll(header.getMetaData());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -76,8 +76,8 @@ public class GATKArgumentCollectionUnitTest extends BaseTest {
|
|||
wArgs.put("wArgType3", "Arg3");
|
||||
collect.walkerArgs = wArgs;
|
||||
|
||||
List<File> input = new ArrayList<File>();
|
||||
input.add(new File("test.file"));
|
||||
List<String> input = new ArrayList<String>();
|
||||
input.add("test.file");
|
||||
collect.samFiles = input;
|
||||
collect.strictnessLevel = SAMFileReader.ValidationStringency.STRICT;
|
||||
collect.referenceFile = new File("referenceFile".toLowerCase());
|
||||
|
|
|
|||
|
|
@ -7,10 +7,9 @@ import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
|||
import org.broadinstitute.sting.gatk.datasources.shards.MockLocusShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.table.TableCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
|
||||
|
|
@ -78,14 +77,9 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
|||
*/
|
||||
@Test
|
||||
public void testSingleBinding() {
|
||||
File file = new File(testDir + "TabularDataTest.dat");
|
||||
RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest",file);
|
||||
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(Collections.singleton(new RMDTriplet("tableTest","Table",file.getAbsolutePath())),
|
||||
seq.getSequenceDictionary(),
|
||||
genomeLocParser,
|
||||
null,
|
||||
track,
|
||||
false);
|
||||
String fileName = testDir + "TabularDataTest.dat";
|
||||
RMDTriplet triplet = new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE);
|
||||
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(triplet,builder,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30)));
|
||||
|
||||
|
|
@ -107,14 +101,11 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
|||
public void testMultipleBinding() {
|
||||
File file = new File(testDir + "TabularDataTest.dat");
|
||||
|
||||
RMDTriplet testTriplet1 = new RMDTriplet("tableTest1","Table",file.getAbsolutePath());
|
||||
RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest1",file);
|
||||
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(Collections.singleton(testTriplet1),seq.getSequenceDictionary(),genomeLocParser,null,track,false);
|
||||
|
||||
RMDTriplet testTriplet2 = new RMDTriplet("tableTest2","Table",file.getAbsolutePath());
|
||||
RMDTrack track2 = builder.createInstanceOfTrack(TableCodec.class,"tableTest2",file);
|
||||
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(Collections.singleton(testTriplet2),seq.getSequenceDictionary(),genomeLocParser,null,track2,false);
|
||||
RMDTriplet testTriplet1 = new RMDTriplet("tableTest1","Table",file.getAbsolutePath(),RMDStorageType.FILE);
|
||||
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(testTriplet1,builder,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||
|
||||
RMDTriplet testTriplet2 = new RMDTriplet("tableTest2","Table",file.getAbsolutePath(),RMDStorageType.FILE);
|
||||
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(testTriplet2,builder,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30)));
|
||||
|
||||
|
|
|
|||
|
|
@ -2,11 +2,12 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
|||
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.table.TableCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
|
|
@ -60,14 +61,14 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
|||
|
||||
@BeforeMethod
|
||||
public void setUp() {
|
||||
File file = new File(testDir + "TabularDataTest.dat");
|
||||
String fileName = testDir + "TabularDataTest.dat";
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null);
|
||||
rod = builder.createInstanceOfTrack(TableCodec.class, "tableTest", file);
|
||||
rod = builder.createInstanceOfTrack(new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateSingleIterator() {
|
||||
ResourcePool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser,rod, false);
|
||||
ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||
LocationAwareSeekableRODIterator iterator = (LocationAwareSeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||
|
||||
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
|
||||
|
|
@ -88,7 +89,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testCreateMultipleIterators() {
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser,rod, false);
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||
LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||
|
||||
// Create a new iterator at position 2.
|
||||
|
|
@ -138,7 +139,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testIteratorConservation() {
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser,rod, false);
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||
LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||
|
||||
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
|
||||
|
|
@ -173,7 +174,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testIteratorCreation() {
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser, rod, false);
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||
LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
|
||||
|
||||
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
|
||||
|
|
|
|||
Loading…
Reference in New Issue