updating the tribble library to include the reference dictionary reading / writing. We now check the dictionaries of any tracks that have them against the reference (all new tribble tracks and out-of-date tracks will have this). Also renamed some classes to be more reflective of their function.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3485 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3d055e3d16
commit
0b03e28b60
|
|
@ -334,8 +334,6 @@ public class GenomeAnalysisEngine {
|
||||||
validateSuppliedReferenceAgainstWalker(my_walker, argCollection);
|
validateSuppliedReferenceAgainstWalker(my_walker, argCollection);
|
||||||
referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile);
|
referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile);
|
||||||
|
|
||||||
validateReadsAndReferenceAreCompatible(readsDataSource, referenceDataSource);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// please don't use these in the future, use the new syntax <- if we're not using these please remove them
|
// please don't use these in the future, use the new syntax <- if we're not using these please remove them
|
||||||
//
|
//
|
||||||
|
|
@ -353,6 +351,9 @@ public class GenomeAnalysisEngine {
|
||||||
List<RMDTrack> tracks = manager.getReferenceMetaDataSources(argCollection.RODBindings);
|
List<RMDTrack> tracks = manager.getReferenceMetaDataSources(argCollection.RODBindings);
|
||||||
validateSuppliedReferenceOrderedDataAgainstWalker(my_walker, tracks);
|
validateSuppliedReferenceOrderedDataAgainstWalker(my_walker, tracks);
|
||||||
|
|
||||||
|
// validate all the sequence dictionaries against the reference
|
||||||
|
validateSourcesAgainstReference(readsDataSource, referenceDataSource, tracks);
|
||||||
|
|
||||||
rodDataSources = getReferenceOrderedDataSources(my_walker, tracks);
|
rodDataSources = getReferenceOrderedDataSources(my_walker, tracks);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -604,22 +605,16 @@ public class GenomeAnalysisEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Now that all files are open, validate the sequence dictionaries of the reads vs. the reference.
|
* Now that all files are open, validate the sequence dictionaries of the reads vs. the reference vrs the reference ordered data (if available).
|
||||||
*
|
*
|
||||||
* @param reads Reads data source.
|
* @param reads Reads data source.
|
||||||
* @param reference Reference data source.
|
* @param reference Reference data source.
|
||||||
|
* @param tracks a collection of the reference ordered data tracks
|
||||||
*/
|
*/
|
||||||
private void validateReadsAndReferenceAreCompatible(SAMDataSource reads, ReferenceSequenceFile reference) {
|
private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<RMDTrack> tracks) {
|
||||||
if (reads == null || reference == null)
|
if ((reads == null && (tracks == null || tracks.isEmpty())) || reference == null )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Compile a set of sequence names that exist in the BAM files.
|
|
||||||
SAMSequenceDictionary readsDictionary = reads.getHeader().getSequenceDictionary();
|
|
||||||
|
|
||||||
Set<String> readsSequenceNames = new TreeSet<String>();
|
|
||||||
for (SAMSequenceRecord dictionaryEntry : readsDictionary.getSequences())
|
|
||||||
readsSequenceNames.add(dictionaryEntry.getSequenceName());
|
|
||||||
|
|
||||||
// Compile a set of sequence names that exist in the reference file.
|
// Compile a set of sequence names that exist in the reference file.
|
||||||
SAMSequenceDictionary referenceDictionary = reference.getSequenceDictionary();
|
SAMSequenceDictionary referenceDictionary = reference.getSequenceDictionary();
|
||||||
|
|
||||||
|
|
@ -627,32 +622,70 @@ public class GenomeAnalysisEngine {
|
||||||
for (SAMSequenceRecord dictionaryEntry : referenceDictionary.getSequences())
|
for (SAMSequenceRecord dictionaryEntry : referenceDictionary.getSequences())
|
||||||
referenceSequenceNames.add(dictionaryEntry.getSequenceName());
|
referenceSequenceNames.add(dictionaryEntry.getSequenceName());
|
||||||
|
|
||||||
|
|
||||||
|
if (reads != null) {
|
||||||
|
// Compile a set of sequence names that exist in the BAM files.
|
||||||
|
SAMSequenceDictionary readsDictionary = reads.getHeader().getSequenceDictionary();
|
||||||
|
|
||||||
|
Set<String> readsSequenceNames = new TreeSet<String>();
|
||||||
|
for (SAMSequenceRecord dictionaryEntry : readsDictionary.getSequences())
|
||||||
|
readsSequenceNames.add(dictionaryEntry.getSequenceName());
|
||||||
|
|
||||||
|
|
||||||
if (readsSequenceNames.size() == 0) {
|
if (readsSequenceNames.size() == 0) {
|
||||||
logger.info("Reads file is unmapped. Skipping validation against reference.");
|
logger.info("Reads file is unmapped. Skipping validation against reference.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// compare the reads to the reference
|
||||||
|
compareTwoDictionaries("reads", readsDictionary, readsSequenceNames, referenceDictionary, referenceSequenceNames);
|
||||||
|
}
|
||||||
|
|
||||||
|
// compare the tracks to the reference, if they have a sequence dictionary
|
||||||
|
for (RMDTrack track : tracks) {
|
||||||
|
SAMSequenceDictionary trackDict = track.getSequenceDictionary();
|
||||||
|
if (trackDict == null) {
|
||||||
|
logger.info("Track " + track.getName() + "doesn't have a sequence dictionary built in, skipping dictionary validation");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Set<String> trackSequences = new TreeSet<String>();
|
||||||
|
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
|
||||||
|
trackSequences.add(dictionaryEntry.getSequenceName());
|
||||||
|
compareTwoDictionaries(track.getName(), trackDict, trackSequences, referenceDictionary, referenceSequenceNames);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* compare two dictionaries, warning if one isn't a subset of the other, or erroring out if they have no overlap
|
||||||
|
* @param compareToName the name of the track or bam (used in the output to the user)
|
||||||
|
* @param comparedToDictionary the dictionary to compare to
|
||||||
|
* @param compareToSequenceNames the unique sequence names in the compared to dictionary
|
||||||
|
* @param referenceDictionary the reference dictionary
|
||||||
|
* @param referenceSequenceNames the reference unique sequence names
|
||||||
|
*/
|
||||||
|
private void compareTwoDictionaries(String compareToName, SAMSequenceDictionary comparedToDictionary, Set<String> compareToSequenceNames, SAMSequenceDictionary referenceDictionary, Set<String> referenceSequenceNames) {
|
||||||
// If there's no overlap between reads and reference, data will be bogus. Throw an exception.
|
// If there's no overlap between reads and reference, data will be bogus. Throw an exception.
|
||||||
Set<String> intersectingSequenceNames = new HashSet<String>(readsSequenceNames);
|
Set<String> intersectingSequenceNames = new HashSet<String>(compareToSequenceNames);
|
||||||
intersectingSequenceNames.retainAll(referenceSequenceNames);
|
intersectingSequenceNames.retainAll(referenceSequenceNames);
|
||||||
if (intersectingSequenceNames.size() == 0) {
|
if (intersectingSequenceNames.size() == 0) {
|
||||||
StringBuilder error = new StringBuilder();
|
StringBuilder error = new StringBuilder();
|
||||||
error.append("No overlap exists between sequence dictionary of the reads and the sequence dictionary of the reference. Perhaps you're using the wrong reference?\n");
|
error.append("No overlap exists between sequence dictionary of the " + compareToName + " and the sequence dictionary of the reference. Perhaps you're using the wrong reference?\n");
|
||||||
error.append(System.getProperty("line.separator"));
|
error.append(System.getProperty("line.separator"));
|
||||||
error.append(String.format("Reads contigs: %s%n", prettyPrintSequenceRecords(readsDictionary)));
|
error.append(String.format(compareToName + " contigs: %s%n", prettyPrintSequenceRecords(comparedToDictionary)));
|
||||||
error.append(String.format("Reference contigs: %s%n", prettyPrintSequenceRecords(referenceDictionary)));
|
error.append(String.format("Reference contigs: %s%n", prettyPrintSequenceRecords(referenceDictionary)));
|
||||||
logger.error(error.toString());
|
logger.error(error.toString());
|
||||||
Utils.scareUser("No overlap exists between sequence dictionary of the reads and the sequence dictionary of the reference.");
|
Utils.scareUser("No overlap exists between sequence dictionary of " + compareToName + " and the sequence dictionary of the reference.");
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the two datasets are not equal and neither is a strict subset of the other, warn the user.
|
// If the two datasets are not equal and neither is a strict subset of the other, warn the user.
|
||||||
if (!readsSequenceNames.equals(referenceSequenceNames) &&
|
if (!compareToSequenceNames.equals(referenceSequenceNames) &&
|
||||||
!readsSequenceNames.containsAll(referenceSequenceNames) &&
|
!compareToSequenceNames.containsAll(referenceSequenceNames) &&
|
||||||
!referenceSequenceNames.containsAll(readsSequenceNames)) {
|
!referenceSequenceNames.containsAll(compareToSequenceNames)) {
|
||||||
StringBuilder warning = new StringBuilder();
|
StringBuilder warning = new StringBuilder();
|
||||||
warning.append("Limited overlap exists between sequence dictionary of the reads and the sequence dictionary of the reference. Perhaps you're using the wrong reference?\n");
|
warning.append("Limited overlap exists between sequence dictionary of the " + compareToName + " and the sequence dictionary of the reference. Perhaps you're using the wrong reference?\n");
|
||||||
warning.append(System.getProperty("line.separator"));
|
warning.append(System.getProperty("line.separator"));
|
||||||
warning.append(String.format("Reads contigs: %s%n", prettyPrintSequenceRecords(readsDictionary)));
|
warning.append(String.format(compareToName + " contigs: %s%n", prettyPrintSequenceRecords(comparedToDictionary)));
|
||||||
warning.append(String.format("Reference contigs: %s%n", prettyPrintSequenceRecords(referenceDictionary)));
|
warning.append(String.format("Reference contigs: %s%n", prettyPrintSequenceRecords(referenceDictionary)));
|
||||||
logger.warn(warning.toString());
|
logger.warn(warning.toString());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||||
import org.broad.tribble.FeatureReader;
|
import org.broad.tribble.FeatureReader;
|
||||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureReaderTrack;
|
import org.broadinstitute.sting.gatk.refdata.tracks.TribbleTrack;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
|
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
||||||
|
|
@ -15,8 +15,6 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
|
|
@ -52,7 +50,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
||||||
public ReferenceOrderedDataSource( Walker walker, RMDTrack rod) {
|
public ReferenceOrderedDataSource( Walker walker, RMDTrack rod) {
|
||||||
this.rod = rod;
|
this.rod = rod;
|
||||||
if (rod.supportsQuery())
|
if (rod.supportsQuery())
|
||||||
iteratorPool = new ReferenceOrderedQueryDataPool(new TribbleRMDTrackBuilder(), (FeatureReaderTrack)rod);
|
iteratorPool = new ReferenceOrderedQueryDataPool(new TribbleRMDTrackBuilder(), (TribbleTrack)rod);
|
||||||
else
|
else
|
||||||
iteratorPool = new ReferenceOrderedDataPool( walker, rod );
|
iteratorPool = new ReferenceOrderedDataPool( walker, rod );
|
||||||
}
|
}
|
||||||
|
|
@ -187,7 +185,7 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureReader, Location
|
||||||
// our tribble track builder
|
// our tribble track builder
|
||||||
private final TribbleRMDTrackBuilder builder;
|
private final TribbleRMDTrackBuilder builder;
|
||||||
|
|
||||||
public ReferenceOrderedQueryDataPool( TribbleRMDTrackBuilder builder, FeatureReaderTrack rod ) {
|
public ReferenceOrderedQueryDataPool( TribbleRMDTrackBuilder builder, TribbleTrack rod ) {
|
||||||
this.rod = rod;
|
this.rod = rod;
|
||||||
this.builder = builder;
|
this.builder = builder;
|
||||||
// a little bit of a hack, but it saves us from re-reading the index from the file
|
// a little bit of a hack, but it saves us from re-reading the index from the file
|
||||||
|
|
@ -196,7 +194,7 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureReader, Location
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected FeatureReader createNewResource() {
|
protected FeatureReader createNewResource() {
|
||||||
return builder.createFeatureReader(rod.getType(),rod.getFile());
|
return builder.createFeatureReader(rod.getType(),rod.getFile()).first;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||||
|
|
||||||
|
|
@ -112,4 +113,12 @@ public abstract class RMDTrack {
|
||||||
* @return true if we can be cast to the QueryableTrack interface
|
* @return true if we can be cast to the QueryableTrack interface
|
||||||
*/
|
*/
|
||||||
public abstract boolean supportsQuery();
|
public abstract boolean supportsQuery();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the sequence dictionary from the track, if available
|
||||||
|
* @return a SAMSequenceDictionary if available, null if unavailable
|
||||||
|
*/
|
||||||
|
public SAMSequenceDictionary getSequenceDictionary() {
|
||||||
|
return null; // default, others can override this
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -23,6 +23,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
import org.broad.tribble.FeatureReader;
|
import org.broad.tribble.FeatureReader;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
||||||
|
|
@ -32,21 +33,23 @@ import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @author aaron
|
* @author aaron
|
||||||
*
|
*
|
||||||
* Class FeatureReaderTrack
|
* Class TribbleTrack
|
||||||
*
|
*
|
||||||
* A feature reader track, implementing the RMDTrack for tracks that are generated out of Tribble
|
* A feature reader track, implementing the RMDTrack for tracks that are generated out of Tribble
|
||||||
*/
|
*/
|
||||||
public class FeatureReaderTrack extends RMDTrack implements QueryableTrack {
|
public class TribbleTrack extends RMDTrack implements QueryableTrack {
|
||||||
// our feature reader - allows queries
|
// our feature reader - allows queries
|
||||||
private FeatureReader reader;
|
private FeatureReader reader;
|
||||||
|
|
||||||
|
// our sequence dictionary, which can be null
|
||||||
|
private final SAMSequenceDictionary dictionary;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a track
|
* Create a track
|
||||||
*
|
*
|
||||||
|
|
@ -55,10 +58,12 @@ public class FeatureReaderTrack extends RMDTrack implements QueryableTrack {
|
||||||
* @param name the name of this specific track
|
* @param name the name of this specific track
|
||||||
* @param file the associated file, for reference or recreating the reader
|
* @param file the associated file, for reference or recreating the reader
|
||||||
* @param reader the feature reader to use as the underlying data source
|
* @param reader the feature reader to use as the underlying data source
|
||||||
|
* @param dict the sam sequence dictionary
|
||||||
*/
|
*/
|
||||||
public FeatureReaderTrack(Class type, Class recordType, String name, File file, FeatureReader reader) {
|
public TribbleTrack(Class type, Class recordType, String name, File file, FeatureReader reader, SAMSequenceDictionary dict) {
|
||||||
super(type, recordType, name, file);
|
super(type, recordType, name, file);
|
||||||
this.reader = reader;
|
this.reader = reader;
|
||||||
|
this.dictionary = dict;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -117,4 +122,12 @@ public class FeatureReaderTrack extends RMDTrack implements QueryableTrack {
|
||||||
public FeatureReader getReader() {
|
public FeatureReader getReader() {
|
||||||
return reader;
|
return reader;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the sequence dictionary from the track, if available
|
||||||
|
* @return a SAMSequenceDictionary if available, null if unavailable
|
||||||
|
*/
|
||||||
|
public SAMSequenceDictionary getSequenceDictionary() {
|
||||||
|
return dictionary;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -25,16 +25,17 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.refdata.tracks.builders;
|
package org.broadinstitute.sting.gatk.refdata.tracks.builders;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broad.tribble.*;
|
import org.broad.tribble.*;
|
||||||
import org.broad.tribble.index.Index;
|
import org.broad.tribble.index.Index;
|
||||||
import org.broad.tribble.index.linear.LinearIndex;
|
import org.broad.tribble.index.linear.LinearIndex;
|
||||||
import org.broad.tribble.index.linear.LinearIndexCreator;
|
import org.broad.tribble.index.linear.LinearIndexCreator;
|
||||||
import org.broad.tribble.readers.BasicFeatureReader;
|
import org.broad.tribble.readers.BasicFeatureReader;
|
||||||
import org.broad.tribble.util.LineReader;
|
import org.broadinstitute.sting.gatk.refdata.tracks.TribbleTrack;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureReaderTrack;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.file.FSLock;
|
import org.broadinstitute.sting.utils.file.FSLock;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
@ -94,7 +95,9 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
||||||
@Override
|
@Override
|
||||||
public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException {
|
public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException {
|
||||||
// return a feature reader track
|
// return a feature reader track
|
||||||
return new FeatureReaderTrack(targetClass, this.createByType(targetClass).getFeatureType(), name, inputFile, createFeatureReader(targetClass, inputFile));
|
Pair<BasicFeatureReader, SAMSequenceDictionary> pair = createFeatureReader(targetClass, inputFile);
|
||||||
|
if (pair == null) throw new StingException("Unable to make the feature reader for input file " + inputFile);
|
||||||
|
return new TribbleTrack(targetClass, this.createByType(targetClass).getFeatureType(), name, inputFile, pair.first, pair.second);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -103,13 +106,13 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
||||||
* @param inputFile the input file to create the track from (of the codec type)
|
* @param inputFile the input file to create the track from (of the codec type)
|
||||||
* @return the FeatureReader instance
|
* @return the FeatureReader instance
|
||||||
*/
|
*/
|
||||||
public FeatureReader createFeatureReader(Class targetClass, File inputFile) {
|
public Pair<BasicFeatureReader, SAMSequenceDictionary> createFeatureReader(Class targetClass, File inputFile) {
|
||||||
FeatureReader reader = null;
|
Pair<BasicFeatureReader, SAMSequenceDictionary> pair = null;
|
||||||
if (inputFile.getAbsolutePath().endsWith(".gz"))
|
if (inputFile.getAbsolutePath().endsWith(".gz"))
|
||||||
reader = createBasicFeatureReaderNoAssumedIndex(targetClass, inputFile);
|
pair = createBasicFeatureReaderNoAssumedIndex(targetClass, inputFile);
|
||||||
else
|
else
|
||||||
reader = getLinearFeatureReader(targetClass, inputFile);
|
pair = getLinearFeatureReader(targetClass, inputFile);
|
||||||
return reader;
|
return pair;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -121,11 +124,11 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
||||||
* @param inputFile the file to load
|
* @param inputFile the file to load
|
||||||
* @return a feature reader implementation
|
* @return a feature reader implementation
|
||||||
*/
|
*/
|
||||||
private BasicFeatureReader createBasicFeatureReaderNoAssumedIndex(Class targetClass, File inputFile) {
|
private Pair<BasicFeatureReader, SAMSequenceDictionary> createBasicFeatureReaderNoAssumedIndex(Class targetClass, File inputFile) {
|
||||||
// we might not know the index type, try loading with the default reader constructor
|
// we might not know the index type, try loading with the default reader constructor
|
||||||
logger.debug("Attempting to blindly load " + inputFile);
|
logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file");
|
||||||
try {
|
try {
|
||||||
return new BasicFeatureReader(inputFile.getAbsolutePath(),this.createByType(targetClass));
|
return new Pair<BasicFeatureReader, SAMSequenceDictionary>(new BasicFeatureReader(inputFile.getAbsolutePath(),this.createByType(targetClass)),null);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new StingException("Unable to create feature reader from file " + inputFile);
|
throw new StingException("Unable to create feature reader from file " + inputFile);
|
||||||
}
|
}
|
||||||
|
|
@ -137,11 +140,11 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
||||||
* @param inputFile the tribble file to parse
|
* @param inputFile the tribble file to parse
|
||||||
* @return the input file as a FeatureReader
|
* @return the input file as a FeatureReader
|
||||||
*/
|
*/
|
||||||
private FeatureReader getLinearFeatureReader(Class targetClass, File inputFile) {
|
private Pair<BasicFeatureReader, SAMSequenceDictionary> getLinearFeatureReader(Class targetClass, File inputFile) {
|
||||||
FeatureReader reader;
|
Pair<BasicFeatureReader, SAMSequenceDictionary> reader;
|
||||||
try {
|
try {
|
||||||
Index index = loadIndex(inputFile, this.createByType(targetClass), true);
|
Index index = loadIndex(inputFile, this.createByType(targetClass), true);
|
||||||
reader = new BasicFeatureReader(inputFile.getAbsolutePath(), index, this.createByType(targetClass));
|
reader = new Pair<BasicFeatureReader, SAMSequenceDictionary>(new BasicFeatureReader(inputFile.getAbsolutePath(), index, this.createByType(targetClass)),index.getSequenceDictionary());
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
throw new StingException("Unable to create reader with file " + inputFile, e);
|
throw new StingException("Unable to create reader with file " + inputFile, e);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
|
@ -160,9 +163,6 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
||||||
*/
|
*/
|
||||||
public synchronized static Index loadIndex(File inputFile, FeatureCodec codec, boolean onDisk) throws IOException {
|
public synchronized static Index loadIndex(File inputFile, FeatureCodec codec, boolean onDisk) throws IOException {
|
||||||
|
|
||||||
// our return index
|
|
||||||
LinearIndex returnIndex = null;
|
|
||||||
|
|
||||||
// create the index file name, locking on the index file name
|
// create the index file name, locking on the index file name
|
||||||
File indexFile = new File(inputFile.getAbsoluteFile() + linearIndexExtension);
|
File indexFile = new File(inputFile.getAbsoluteFile() + linearIndexExtension);
|
||||||
FSLock lock = new FSLock(indexFile);
|
FSLock lock = new FSLock(indexFile);
|
||||||
|
|
@ -178,7 +178,12 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
||||||
// if the file exists, and we can read it, load the index from disk (i.e. wasn't deleted in the last step).
|
// if the file exists, and we can read it, load the index from disk (i.e. wasn't deleted in the last step).
|
||||||
if (indexFile.exists() && indexFile.canRead() && obtainedLock) {
|
if (indexFile.exists() && indexFile.canRead() && obtainedLock) {
|
||||||
logger.info("Loading Tribble index from disk for file " + inputFile);
|
logger.info("Loading Tribble index from disk for file " + inputFile);
|
||||||
return LinearIndex.createIndex(indexFile);
|
Index index = LinearIndex.createIndex(indexFile);
|
||||||
|
if (index.isCurrentVersion())
|
||||||
|
return index;
|
||||||
|
|
||||||
|
logger.warn("Index file " + indexFile + " is out of date (old version), deleting and updating the index file");
|
||||||
|
indexFile.delete();
|
||||||
}
|
}
|
||||||
return writeIndexToDisk(inputFile, codec, onDisk, indexFile, obtainedLock);
|
return writeIndexToDisk(inputFile, codec, onDisk, indexFile, obtainedLock);
|
||||||
}
|
}
|
||||||
|
|
@ -200,6 +205,9 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
||||||
*/
|
*/
|
||||||
private static LinearIndex writeIndexToDisk(File inputFile, FeatureCodec codec, boolean onDisk, File indexFile, boolean obtainedLock) throws IOException {
|
private static LinearIndex writeIndexToDisk(File inputFile, FeatureCodec codec, boolean onDisk, File indexFile, boolean obtainedLock) throws IOException {
|
||||||
LinearIndexCreator create = new LinearIndexCreator(inputFile, codec);
|
LinearIndexCreator create = new LinearIndexCreator(inputFile, codec);
|
||||||
|
|
||||||
|
// this can take a while, let them know what we're doing
|
||||||
|
logger.info("Creating Tribble index in memory for file " + inputFile);
|
||||||
LinearIndex index = create.createIndex(null); // we don't want to write initially, so we pass in null
|
LinearIndex index = create.createIndex(null); // we don't want to write initially, so we pass in null
|
||||||
|
|
||||||
// if the index doesn't exist, and we can write to the directory, and we got a lock: write to the disk
|
// if the index doesn't exist, and we can write to the directory, and we got a lock: write to the disk
|
||||||
|
|
@ -207,7 +215,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
||||||
(!indexFile.exists() || indexFile.canWrite()) &&
|
(!indexFile.exists() || indexFile.canWrite()) &&
|
||||||
onDisk &&
|
onDisk &&
|
||||||
obtainedLock) {
|
obtainedLock) {
|
||||||
logger.info("Creating Tribble Index on disk for file " + inputFile);
|
logger.info("Writing Tribble index to disk for file " + inputFile);
|
||||||
index.write(indexFile);
|
index.write(indexFile);
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@ public class DbSNPWindowCounter extends LocusWalker<Integer, Long> {
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
|
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
|
||||||
reader = builder.createFeatureReader(DbSNPCodec.class,myDbSNPFile);
|
reader = builder.createFeatureReader(DbSNPCodec.class,myDbSNPFile).first;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -71,7 +71,7 @@ public class RMDTrackManagerUnitTest extends BaseTest {
|
||||||
int count = 0;
|
int count = 0;
|
||||||
Iterator<GATKFeature> fIter;
|
Iterator<GATKFeature> fIter;
|
||||||
try {
|
try {
|
||||||
fIter = ((FeatureReaderTrack) t).query("1", 1, 5000);
|
fIter = ((TribbleTrack) t).query("1", 1, 5000);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new StingException("blah I/O exception");
|
throw new StingException("blah I/O exception");
|
||||||
}
|
}
|
||||||
|
|
@ -126,7 +126,7 @@ public class RMDTrackManagerUnitTest extends BaseTest {
|
||||||
long firstTime = System.currentTimeMillis();
|
long firstTime = System.currentTimeMillis();
|
||||||
long count = 0;
|
long count = 0;
|
||||||
try {
|
try {
|
||||||
fIter = ((FeatureReaderTrack) t).query("1", x, x + intervalSize);
|
fIter = ((TribbleTrack) t).query("1", x, x + intervalSize);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new StingException("blah I/O exception");
|
throw new StingException("blah I/O exception");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,8 @@ public class TribbleRMDTrackBuilderUnitTest extends BaseTest {
|
||||||
Assert.fail("IO exception unexpected" + e.getMessage());
|
Assert.fail("IO exception unexpected" + e.getMessage());
|
||||||
}
|
}
|
||||||
// make sure we didn't write the file (check that it's timestamp is within bounds)
|
// make sure we didn't write the file (check that it's timestamp is within bounds)
|
||||||
Assert.assertTrue(Math.abs(1274210993000l - new File(vcfFile + TribbleRMDTrackBuilder.linearIndexExtension).lastModified()) < 100);
|
//System.err.println(new File(vcfFile + TribbleRMDTrackBuilder.linearIndexExtension).lastModified());
|
||||||
|
Assert.assertTrue(Math.abs(1275597793000l - new File(vcfFile + TribbleRMDTrackBuilder.linearIndexExtension).lastModified()) < 100);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
||||||
<ivy-module version="1.0">
|
|
||||||
<info organisation="org.broad" module="tribble" revision="87" status="integration" publication="20100528124200" />
|
|
||||||
</ivy-module>
|
|
||||||
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
||||||
|
<ivy-module version="1.0">
|
||||||
|
<info organisation="org.broad" module="tribble" revision="88" status="integration" publication="20100603124200" />
|
||||||
|
</ivy-module>
|
||||||
Loading…
Reference in New Issue