Convert GenomeLocParser into an instance variable. This change is required
for anything that needs to be simultaneously aware of multiple references, eg Queue's interval sharding code, liftover support, distributed GATK etc. GenomeLocParser instances must now be used to create/parse GenomeLocs. GenomeLocParser instances are available in walkers by calling either -getToolkit().getGenomeLocParser() or -refContext.getGenomeLocParser() This is an intermediate change; GenomeLocParser will eventually be merged with the reference, but we're not clear exactly how to do that yet. This will become clearer when contig aliasing is implemented. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4642 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
760f06cf8c
commit
8e36a07bea
|
|
@ -48,7 +48,6 @@ import org.broadinstitute.sting.gatk.io.stubs.Stub;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||||
|
|
@ -69,6 +68,11 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
*/
|
*/
|
||||||
private ParsingEngine parsingEngine;
|
private ParsingEngine parsingEngine;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The genomeLocParser can create and parse GenomeLocs.
|
||||||
|
*/
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Accessor for sharded read data.
|
* Accessor for sharded read data.
|
||||||
*/
|
*/
|
||||||
|
|
@ -82,6 +86,10 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
return referenceDataSource;
|
return referenceDataSource;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public GenomeLocParser getGenomeLocParser() {
|
||||||
|
return genomeLocParser;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Accessor for sharded reference data.
|
* Accessor for sharded reference data.
|
||||||
*/
|
*/
|
||||||
|
|
@ -136,6 +144,14 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
this.parsingEngine = parsingEngine;
|
this.parsingEngine = parsingEngine;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Explicitly set the GenomeLocParser, for unit testing.
|
||||||
|
* @param genomeLocParser GenomeLocParser to use.
|
||||||
|
*/
|
||||||
|
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Actually run the engine.
|
* Actually run the engine.
|
||||||
* @return the value of this traversal.
|
* @return the value of this traversal.
|
||||||
|
|
@ -188,7 +204,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) :
|
GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) :
|
||||||
loadIntervals(argCollection.intervals,
|
loadIntervals(argCollection.intervals,
|
||||||
argCollection.intervalMerging,
|
argCollection.intervalMerging,
|
||||||
GenomeLocParser.mergeIntervalLocations(checkRODToIntervalArgument(),argCollection.intervalMerging)));
|
genomeLocParser.mergeIntervalLocations(checkRODToIntervalArgument(),argCollection.intervalMerging)));
|
||||||
|
|
||||||
// if no exclude arguments, can return parseIntervalArguments directly
|
// if no exclude arguments, can return parseIntervalArguments directly
|
||||||
if (argCollection.excludeIntervals == null)
|
if (argCollection.excludeIntervals == null)
|
||||||
|
|
@ -221,11 +237,11 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
IntervalMergingRule mergingRule,
|
IntervalMergingRule mergingRule,
|
||||||
List<GenomeLoc> additionalIntervals) {
|
List<GenomeLoc> additionalIntervals) {
|
||||||
|
|
||||||
return IntervalUtils.sortAndMergeIntervals(IntervalUtils.mergeListsBySetOperator(additionalIntervals,
|
return IntervalUtils.sortAndMergeIntervals(genomeLocParser,IntervalUtils.mergeListsBySetOperator(additionalIntervals,
|
||||||
IntervalUtils.parseIntervalArguments(argList,
|
IntervalUtils.parseIntervalArguments(genomeLocParser,argList,
|
||||||
this.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST),
|
this.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST),
|
||||||
argCollection.BTIMergeRule),
|
argCollection.BTIMergeRule),
|
||||||
mergingRule);
|
mergingRule);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -298,22 +314,22 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
protected void initializeDataSources() {
|
protected void initializeDataSources() {
|
||||||
logger.info("Strictness is " + argCollection.strictnessLevel);
|
logger.info("Strictness is " + argCollection.strictnessLevel);
|
||||||
|
|
||||||
|
validateSuppliedReference();
|
||||||
|
referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile);
|
||||||
|
|
||||||
validateSuppliedReads();
|
validateSuppliedReads();
|
||||||
readsDataSource = createReadsDataSource();
|
readsDataSource = createReadsDataSource(genomeLocParser);
|
||||||
|
|
||||||
for (SamRecordFilter filter : filters)
|
for (SamRecordFilter filter : filters)
|
||||||
if (filter instanceof SamRecordHeaderFilter)
|
if (filter instanceof SamRecordHeaderFilter)
|
||||||
((SamRecordHeaderFilter)filter).setHeader(this.getSAMFileHeader());
|
((SamRecordHeaderFilter)filter).setHeader(this.getSAMFileHeader());
|
||||||
|
|
||||||
validateSuppliedReference();
|
|
||||||
referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile);
|
|
||||||
|
|
||||||
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
|
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
|
||||||
|
|
||||||
RMDTrackBuilder manager = new RMDTrackBuilder();
|
RMDTrackBuilder manager = new RMDTrackBuilder();
|
||||||
|
|
||||||
// set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
|
// set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
|
||||||
manager.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary());
|
manager.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser);
|
||||||
|
|
||||||
List<RMDTrack> tracks = manager.getReferenceMetaDataSources(this,argCollection);
|
List<RMDTrack> tracks = manager.getReferenceMetaDataSources(this,argCollection);
|
||||||
validateSuppliedReferenceOrderedData(tracks);
|
validateSuppliedReferenceOrderedData(tracks);
|
||||||
|
|
@ -330,7 +346,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
* @return A unique identifier for the source file of this read. Exception if not found.
|
* @return A unique identifier for the source file of this read. Exception if not found.
|
||||||
*/
|
*/
|
||||||
public SAMReaderID getReaderIDForRead(final SAMRecord read) {
|
public SAMReaderID getReaderIDForRead(final SAMRecord read) {
|
||||||
return getDataSource().getReaderID(read);
|
return getReadsDataSource().getReaderID(read);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -339,7 +355,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
* @return The source filename for this read.
|
* @return The source filename for this read.
|
||||||
*/
|
*/
|
||||||
public File getSourceFileForReaderID(final SAMReaderID id) {
|
public File getSourceFileForReaderID(final SAMReaderID id) {
|
||||||
return getDataSource().getSAMFile(id);
|
return getReadsDataSource().getSAMFile(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -351,7 +367,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
* @return Sets of samples in the merged input SAM stream, grouped by readers
|
* @return Sets of samples in the merged input SAM stream, grouped by readers
|
||||||
*/
|
*/
|
||||||
public List<Set<String>> getSamplesByReaders() {
|
public List<Set<String>> getSamplesByReaders() {
|
||||||
List<SAMReaderID> readers = getDataSource().getReaderIDs();
|
List<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
|
||||||
|
|
||||||
List<Set<String>> sample_sets = new ArrayList<Set<String>>(readers.size());
|
List<Set<String>> sample_sets = new ArrayList<Set<String>>(readers.size());
|
||||||
|
|
||||||
|
|
@ -360,7 +376,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
Set<String> samples = new HashSet<String>(1);
|
Set<String> samples = new HashSet<String>(1);
|
||||||
sample_sets.add(samples);
|
sample_sets.add(samples);
|
||||||
|
|
||||||
for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) {
|
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
|
||||||
samples.add(g.getSample());
|
samples.add(g.getSample());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -380,7 +396,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
public List<Set<String>> getLibrariesByReaders() {
|
public List<Set<String>> getLibrariesByReaders() {
|
||||||
|
|
||||||
|
|
||||||
List<SAMReaderID> readers = getDataSource().getReaderIDs();
|
List<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
|
||||||
|
|
||||||
List<Set<String>> lib_sets = new ArrayList<Set<String>>(readers.size());
|
List<Set<String>> lib_sets = new ArrayList<Set<String>>(readers.size());
|
||||||
|
|
||||||
|
|
@ -389,7 +405,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
Set<String> libs = new HashSet<String>(2);
|
Set<String> libs = new HashSet<String>(2);
|
||||||
lib_sets.add(libs);
|
lib_sets.add(libs);
|
||||||
|
|
||||||
for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) {
|
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
|
||||||
libs.add(g.getLibrary());
|
libs.add(g.getLibrary());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -406,22 +422,22 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
public Map<File, Set<String>> getFileToReadGroupIdMapping() {
|
public Map<File, Set<String>> getFileToReadGroupIdMapping() {
|
||||||
// populate the file -> read group mapping
|
// populate the file -> read group mapping
|
||||||
Map<File, Set<String>> fileToReadGroupIdMap = new HashMap<File, Set<String>>();
|
Map<File, Set<String>> fileToReadGroupIdMap = new HashMap<File, Set<String>>();
|
||||||
for (SAMReaderID id: getDataSource().getReaderIDs()) {
|
for (SAMReaderID id: getReadsDataSource().getReaderIDs()) {
|
||||||
Set<String> readGroups = new HashSet<String>(5);
|
Set<String> readGroups = new HashSet<String>(5);
|
||||||
|
|
||||||
for (SAMReadGroupRecord g : getDataSource().getHeader(id).getReadGroups()) {
|
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(id).getReadGroups()) {
|
||||||
if (getDataSource().hasReadGroupCollisions()) {
|
if (getReadsDataSource().hasReadGroupCollisions()) {
|
||||||
// Check if there were read group clashes.
|
// Check if there were read group clashes.
|
||||||
// If there were, use the SamFileHeaderMerger to translate from the
|
// If there were, use the SamFileHeaderMerger to translate from the
|
||||||
// original read group id to the read group id in the merged stream
|
// original read group id to the read group id in the merged stream
|
||||||
readGroups.add(getDataSource().getReadGroupId(id,g.getReadGroupId()));
|
readGroups.add(getReadsDataSource().getReadGroupId(id,g.getReadGroupId()));
|
||||||
} else {
|
} else {
|
||||||
// otherwise, pass through the unmapped read groups since this is what Picard does as well
|
// otherwise, pass through the unmapped read groups since this is what Picard does as well
|
||||||
readGroups.add(g.getReadGroupId());
|
readGroups.add(g.getReadGroupId());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fileToReadGroupIdMap.put(getDataSource().getSAMFile(id),readGroups);
|
fileToReadGroupIdMap.put(getReadsDataSource().getSAMFile(id),readGroups);
|
||||||
}
|
}
|
||||||
|
|
||||||
return fileToReadGroupIdMap;
|
return fileToReadGroupIdMap;
|
||||||
|
|
@ -440,7 +456,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
public List<Set<String>> getMergedReadGroupsByReaders() {
|
public List<Set<String>> getMergedReadGroupsByReaders() {
|
||||||
|
|
||||||
|
|
||||||
List<SAMReaderID> readers = getDataSource().getReaderIDs();
|
List<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
|
||||||
|
|
||||||
List<Set<String>> rg_sets = new ArrayList<Set<String>>(readers.size());
|
List<Set<String>> rg_sets = new ArrayList<Set<String>>(readers.size());
|
||||||
|
|
||||||
|
|
@ -449,11 +465,11 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
Set<String> groups = new HashSet<String>(5);
|
Set<String> groups = new HashSet<String>(5);
|
||||||
rg_sets.add(groups);
|
rg_sets.add(groups);
|
||||||
|
|
||||||
for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) {
|
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
|
||||||
if (getDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so:
|
if (getReadsDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so:
|
||||||
// use HeaderMerger to translate original read group id from the reader into the read group id in the
|
// use HeaderMerger to translate original read group id from the reader into the read group id in the
|
||||||
// merged stream, and save that remapped read group id to associate it with specific reader
|
// merged stream, and save that remapped read group id to associate it with specific reader
|
||||||
groups.add(getDataSource().getReadGroupId(r, g.getReadGroupId()));
|
groups.add(getReadsDataSource().getReadGroupId(r, g.getReadGroupId()));
|
||||||
} else {
|
} else {
|
||||||
// otherwise, pass through the unmapped read groups since this is what Picard does as well
|
// otherwise, pass through the unmapped read groups since this is what Picard does as well
|
||||||
groups.add(g.getReadGroupId());
|
groups.add(g.getReadGroupId());
|
||||||
|
|
@ -533,29 +549,17 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convenience function that binds RODs using the old-style command line parser to the new style list for
|
|
||||||
* a uniform processing.
|
|
||||||
*
|
|
||||||
* @param name the name of the rod
|
|
||||||
* @param type its type
|
|
||||||
* @param file the file to load the rod from
|
|
||||||
*/
|
|
||||||
private void bindConvenienceRods(final String name, final String type, final String file) {
|
|
||||||
argCollection.RODBindings.add(Utils.join(",", new String[]{name, type, file}));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a data source for the given set of reads.
|
* Gets a data source for the given set of reads.
|
||||||
*
|
*
|
||||||
* @return A data source for the given set of reads.
|
* @return A data source for the given set of reads.
|
||||||
*/
|
*/
|
||||||
private SAMDataSource createReadsDataSource() {
|
private SAMDataSource createReadsDataSource(GenomeLocParser genomeLocParser) {
|
||||||
DownsamplingMethod method = getDownsamplingMethod();
|
DownsamplingMethod method = getDownsamplingMethod();
|
||||||
|
|
||||||
return new SAMDataSource(
|
return new SAMDataSource(
|
||||||
unpackBAMFileList(argCollection.samFiles),
|
unpackBAMFileList(argCollection.samFiles),
|
||||||
|
genomeLocParser,
|
||||||
argCollection.useOriginalBaseQualities,
|
argCollection.useOriginalBaseQualities,
|
||||||
argCollection.strictnessLevel,
|
argCollection.strictnessLevel,
|
||||||
argCollection.readBufferSize,
|
argCollection.readBufferSize,
|
||||||
|
|
@ -574,7 +578,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
*/
|
*/
|
||||||
private ReferenceDataSource openReferenceSequenceFile(File refFile) {
|
private ReferenceDataSource openReferenceSequenceFile(File refFile) {
|
||||||
ReferenceDataSource ref = new ReferenceDataSource(refFile);
|
ReferenceDataSource ref = new ReferenceDataSource(refFile);
|
||||||
GenomeLocParser.setupRefContigOrdering(ref.getReference());
|
genomeLocParser = new GenomeLocParser(ref.getReference());
|
||||||
return ref;
|
return ref;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -587,7 +591,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(List<RMDTrack> rods) {
|
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(List<RMDTrack> rods) {
|
||||||
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||||
for (RMDTrack rod : rods)
|
for (RMDTrack rod : rods)
|
||||||
dataSources.add(new ReferenceOrderedDataSource(rod, flashbackData()));
|
dataSources.add(new ReferenceOrderedDataSource(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,rod,flashbackData()));
|
||||||
return dataSources;
|
return dataSources;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -614,10 +618,12 @@ public abstract class AbstractGenomeAnalysisEngine {
|
||||||
*
|
*
|
||||||
* @return the reads data source
|
* @return the reads data source
|
||||||
*/
|
*/
|
||||||
public SAMDataSource getDataSource() {
|
public SAMDataSource getReadsDataSource() {
|
||||||
return this.readsDataSource;
|
return this.readsDataSource;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the collection of GATK main application arguments.
|
* Sets the collection of GATK main application arguments.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -165,7 +165,7 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
|
||||||
throw new UserException.CommandLineException("Read-based traversals require a reference file but none was given");
|
throw new UserException.CommandLineException("Read-based traversals require a reference file but none was given");
|
||||||
}
|
}
|
||||||
|
|
||||||
return MicroScheduler.create(this,my_walker,this.getDataSource(),this.getReferenceDataSource().getReference(),this.getRodDataSources(),this.getArguments().numberOfThreads);
|
return MicroScheduler.create(this,my_walker,this.getReadsDataSource(),this.getReferenceDataSource().getReference(),this.getRodDataSources(),this.getArguments().numberOfThreads);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -258,7 +258,7 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
|
||||||
*/
|
*/
|
||||||
protected ShardStrategy getShardStrategy(ReferenceSequenceFile drivingDataSource) {
|
protected ShardStrategy getShardStrategy(ReferenceSequenceFile drivingDataSource) {
|
||||||
GenomeLocSortedSet intervals = this.getIntervals();
|
GenomeLocSortedSet intervals = this.getIntervals();
|
||||||
SAMDataSource readsDataSource = this.getDataSource();
|
SAMDataSource readsDataSource = this.getReadsDataSource();
|
||||||
ValidationExclusion exclusions = (readsDataSource != null ? readsDataSource.getReadsInfo().getValidationExclusionList() : null);
|
ValidationExclusion exclusions = (readsDataSource != null ? readsDataSource.getReadsInfo().getValidationExclusionList() : null);
|
||||||
ReferenceDataSource referenceDataSource = this.getReferenceDataSource();
|
ReferenceDataSource referenceDataSource = this.getReferenceDataSource();
|
||||||
// Use monolithic sharding if no index is present. Monolithic sharding is always required for the original
|
// Use monolithic sharding if no index is present. Monolithic sharding is always required for the original
|
||||||
|
|
@ -286,7 +286,7 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
|
||||||
else {
|
else {
|
||||||
region = new ArrayList<GenomeLoc>();
|
region = new ArrayList<GenomeLoc>();
|
||||||
for(SAMSequenceRecord sequenceRecord: drivingDataSource.getSequenceDictionary().getSequences())
|
for(SAMSequenceRecord sequenceRecord: drivingDataSource.getSequenceDictionary().getSequences())
|
||||||
region.add(GenomeLocParser.createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength()));
|
region.add(getGenomeLocParser().createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength()));
|
||||||
}
|
}
|
||||||
|
|
||||||
return new MonolithicShardStrategy(readsDataSource,shardType,region);
|
return new MonolithicShardStrategy(readsDataSource,shardType,region);
|
||||||
|
|
@ -309,13 +309,14 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
|
||||||
ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL,
|
ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL,
|
||||||
drivingDataSource.getSequenceDictionary(),
|
drivingDataSource.getSequenceDictionary(),
|
||||||
SHARD_SIZE,
|
SHARD_SIZE,
|
||||||
|
getGenomeLocParser(),
|
||||||
intervals);
|
intervals);
|
||||||
} else
|
} else
|
||||||
shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
|
shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
|
||||||
referenceDataSource.getReference(),
|
referenceDataSource.getReference(),
|
||||||
ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL,
|
ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL,
|
||||||
drivingDataSource.getSequenceDictionary(),
|
drivingDataSource.getSequenceDictionary(),
|
||||||
SHARD_SIZE);
|
SHARD_SIZE,getGenomeLocParser());
|
||||||
} else if (walker instanceof ReadWalker ||
|
} else if (walker instanceof ReadWalker ||
|
||||||
walker instanceof DuplicateWalker) {
|
walker instanceof DuplicateWalker) {
|
||||||
shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL;
|
shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL;
|
||||||
|
|
@ -326,13 +327,15 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
|
||||||
shardType,
|
shardType,
|
||||||
drivingDataSource.getSequenceDictionary(),
|
drivingDataSource.getSequenceDictionary(),
|
||||||
SHARD_SIZE,
|
SHARD_SIZE,
|
||||||
|
getGenomeLocParser(),
|
||||||
intervals);
|
intervals);
|
||||||
} else {
|
} else {
|
||||||
shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
|
shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
|
||||||
referenceDataSource.getReference(),
|
referenceDataSource.getReference(),
|
||||||
shardType,
|
shardType,
|
||||||
drivingDataSource.getSequenceDictionary(),
|
drivingDataSource.getSequenceDictionary(),
|
||||||
SHARD_SIZE);
|
SHARD_SIZE,
|
||||||
|
getGenomeLocParser());
|
||||||
}
|
}
|
||||||
} else if (walker instanceof ReadPairWalker) {
|
} else if (walker instanceof ReadPairWalker) {
|
||||||
if(readsDataSource != null && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.queryname)
|
if(readsDataSource != null && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.queryname)
|
||||||
|
|
@ -344,7 +347,8 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
|
||||||
referenceDataSource.getReference(),
|
referenceDataSource.getReference(),
|
||||||
ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL,
|
ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL,
|
||||||
drivingDataSource.getSequenceDictionary(),
|
drivingDataSource.getSequenceDictionary(),
|
||||||
SHARD_SIZE);
|
SHARD_SIZE,
|
||||||
|
getGenomeLocParser());
|
||||||
} else
|
} else
|
||||||
throw new ReviewedStingException("Unable to support walker of type" + walker.getClass().getName());
|
throw new ReviewedStingException("Unable to support walker of type" + walker.getClass().getName());
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.contexts;
|
package org.broadinstitute.sting.gatk.contexts;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
|
@ -41,6 +42,11 @@ import net.sf.samtools.util.StringUtil;
|
||||||
public class ReferenceContext {
|
public class ReferenceContext {
|
||||||
final public static boolean UPPERCASE_REFERENCE = true;
|
final public static boolean UPPERCASE_REFERENCE = true;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Facilitates creation of new GenomeLocs.
|
||||||
|
*/
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The locus.
|
* The locus.
|
||||||
*/
|
*/
|
||||||
|
|
@ -101,18 +107,18 @@ public class ReferenceContext {
|
||||||
* @param locus locus of interest.
|
* @param locus locus of interest.
|
||||||
* @param base reference base at that locus.
|
* @param base reference base at that locus.
|
||||||
*/
|
*/
|
||||||
public ReferenceContext( GenomeLoc locus, byte base ) {
|
public ReferenceContext( GenomeLocParser genomeLocParser, GenomeLoc locus, byte base ) {
|
||||||
this( locus, locus, new ForwardingProvider(base) );
|
this( genomeLocParser, locus, locus, new ForwardingProvider(base) );
|
||||||
}
|
}
|
||||||
|
|
||||||
public ReferenceContext( GenomeLoc locus, GenomeLoc window, byte[] bases ) {
|
public ReferenceContext( GenomeLocParser genomeLocParser, GenomeLoc locus, GenomeLoc window, byte[] bases ) {
|
||||||
this( locus, window, new ForwardingProvider(bases) );
|
this( genomeLocParser, locus, window, new ForwardingProvider(bases) );
|
||||||
}
|
}
|
||||||
|
|
||||||
public ReferenceContext( GenomeLoc locus, GenomeLoc window, ReferenceContextRefProvider basesProvider ) {
|
public ReferenceContext( GenomeLocParser genomeLocParser, GenomeLoc locus, GenomeLoc window, ReferenceContextRefProvider basesProvider ) {
|
||||||
// if( !window.containsP(locus) )
|
// if( !window.containsP(locus) )
|
||||||
// throw new StingException("Invalid locus or window; window does not contain locus");
|
// throw new StingException("Invalid locus or window; window does not contain locus");
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.locus = locus;
|
this.locus = locus;
|
||||||
this.window = window;
|
this.window = window;
|
||||||
this.basesProvider = basesProvider;
|
this.basesProvider = basesProvider;
|
||||||
|
|
@ -125,6 +131,10 @@ public class ReferenceContext {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public GenomeLocParser getGenomeLocParser() {
|
||||||
|
return genomeLocParser;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The locus currently being examined.
|
* The locus currently being examined.
|
||||||
* @return The current locus.
|
* @return The current locus.
|
||||||
|
|
|
||||||
|
|
@ -219,8 +219,8 @@ public class VariantContextUtils {
|
||||||
* @param exp expression
|
* @param exp expression
|
||||||
* @return true if there is a match
|
* @return true if there is a match
|
||||||
*/
|
*/
|
||||||
public static boolean match(VariantContext vc, JexlVCMatchExp exp) {
|
public static boolean match(GenomeLocParser genomeLocParser,VariantContext vc, JexlVCMatchExp exp) {
|
||||||
return match(vc,Arrays.asList(exp)).get(exp);
|
return match(genomeLocParser,vc,Arrays.asList(exp)).get(exp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -233,8 +233,8 @@ public class VariantContextUtils {
|
||||||
* @param exps expressions
|
* @param exps expressions
|
||||||
* @return true if there is a match
|
* @return true if there is a match
|
||||||
*/
|
*/
|
||||||
public static Map<JexlVCMatchExp, Boolean> match(VariantContext vc, Collection<JexlVCMatchExp> exps) {
|
public static Map<JexlVCMatchExp, Boolean> match(GenomeLocParser genomeLocParser,VariantContext vc, Collection<JexlVCMatchExp> exps) {
|
||||||
return new JEXLMap(exps,vc);
|
return new JEXLMap(genomeLocParser,exps,vc);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -245,8 +245,8 @@ public class VariantContextUtils {
|
||||||
* @param exp expression
|
* @param exp expression
|
||||||
* @return true if there is a match
|
* @return true if there is a match
|
||||||
*/
|
*/
|
||||||
public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp) {
|
public static boolean match(GenomeLocParser genomeLocParser,VariantContext vc, Genotype g, JexlVCMatchExp exp) {
|
||||||
return match(vc,g,Arrays.asList(exp)).get(exp);
|
return match(genomeLocParser,vc,g,Arrays.asList(exp)).get(exp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -260,8 +260,8 @@ public class VariantContextUtils {
|
||||||
* @param exps expressions
|
* @param exps expressions
|
||||||
* @return true if there is a match
|
* @return true if there is a match
|
||||||
*/
|
*/
|
||||||
public static Map<JexlVCMatchExp, Boolean> match(VariantContext vc, Genotype g, Collection<JexlVCMatchExp> exps) {
|
public static Map<JexlVCMatchExp, Boolean> match(GenomeLocParser genomeLocParser,VariantContext vc, Genotype g, Collection<JexlVCMatchExp> exps) {
|
||||||
return new JEXLMap(exps,vc,g);
|
return new JEXLMap(genomeLocParser,exps,vc,g);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -306,8 +306,8 @@ public class VariantContextUtils {
|
||||||
UNION, INTERSECT
|
UNION, INTERSECT
|
||||||
}
|
}
|
||||||
|
|
||||||
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs, byte refBase) {
|
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, byte refBase) {
|
||||||
return simpleMerge(unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBase);
|
return simpleMerge(genomeLocParser, unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBase);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -322,14 +322,14 @@ public class VariantContextUtils {
|
||||||
* @param genotypeMergeOptions
|
* @param genotypeMergeOptions
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||||
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
|
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
|
||||||
boolean annotateOrigin, boolean printMessages, byte inputRefBase ) {
|
boolean annotateOrigin, boolean printMessages, byte inputRefBase ) {
|
||||||
|
|
||||||
return simpleMerge(unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false);
|
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||||
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
|
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
|
||||||
boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey,
|
boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey,
|
||||||
boolean filteredAreUncalled ) {
|
boolean filteredAreUncalled ) {
|
||||||
|
|
@ -357,7 +357,7 @@ public class VariantContextUtils {
|
||||||
// establish the baseline info from the first VC
|
// establish the baseline info from the first VC
|
||||||
VariantContext first = VCs.get(0);
|
VariantContext first = VCs.get(0);
|
||||||
String name = first.getSource();
|
String name = first.getSource();
|
||||||
GenomeLoc loc = getLocation(first);
|
GenomeLoc loc = getLocation(genomeLocParser,first);
|
||||||
|
|
||||||
Set<Allele> alleles = new TreeSet<Allele>();
|
Set<Allele> alleles = new TreeSet<Allele>();
|
||||||
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
|
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
|
||||||
|
|
@ -380,8 +380,8 @@ public class VariantContextUtils {
|
||||||
if ( loc.getStart() != vc.getStart() ) // || !first.getReference().equals(vc.getReference()) )
|
if ( loc.getStart() != vc.getStart() ) // || !first.getReference().equals(vc.getReference()) )
|
||||||
throw new ReviewedStingException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString());
|
throw new ReviewedStingException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString());
|
||||||
|
|
||||||
if ( getLocation(vc).size() > loc.size() )
|
if ( getLocation(genomeLocParser,vc).size() > loc.size() )
|
||||||
loc = getLocation(vc); // get the longest location
|
loc = getLocation(genomeLocParser,vc); // get the longest location
|
||||||
|
|
||||||
nFiltered += vc.isFiltered() ? 1 : 0;
|
nFiltered += vc.isFiltered() ? 1 : 0;
|
||||||
nVariant += vc.isVariant() ? 1 : 0;
|
nVariant += vc.isVariant() ? 1 : 0;
|
||||||
|
|
@ -753,13 +753,13 @@ public class VariantContextUtils {
|
||||||
* @param vc the variant context
|
* @param vc the variant context
|
||||||
* @return the genomeLoc
|
* @return the genomeLoc
|
||||||
*/
|
*/
|
||||||
public static final GenomeLoc getLocation(VariantContext vc) {
|
public static final GenomeLoc getLocation(GenomeLocParser genomeLocParser,VariantContext vc) {
|
||||||
return GenomeLocParser.createGenomeLoc(vc.getChr(),(int)vc.getStart(),(int)vc.getEnd());
|
return genomeLocParser.createGenomeLoc(vc.getChr(),(int)vc.getStart(),(int)vc.getEnd());
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: returns null if vc1 and vc2 are not mergeable into a single MNP record
|
// NOTE: returns null if vc1 and vc2 are not mergeable into a single MNP record
|
||||||
public static VariantContext mergeIntoMNP(VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
|
public static VariantContext mergeIntoMNP(GenomeLocParser genomeLocParser,VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
|
||||||
if (!mergeIntoMNPvalidationCheck(vc1, vc2))
|
if (!mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2))
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// Check that it's logically possible to merge the VCs, and that there's a point in doing so (e.g., annotations could be changed):
|
// Check that it's logically possible to merge the VCs, and that there's a point in doing so (e.g., annotations could be changed):
|
||||||
|
|
@ -974,9 +974,9 @@ public class VariantContextUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean mergeIntoMNPvalidationCheck(VariantContext vc1, VariantContext vc2) {
|
private static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser,VariantContext vc1, VariantContext vc2) {
|
||||||
GenomeLoc loc1 = VariantContextUtils.getLocation(vc1);
|
GenomeLoc loc1 = VariantContextUtils.getLocation(genomeLocParser,vc1);
|
||||||
GenomeLoc loc2 = VariantContextUtils.getLocation(vc2);
|
GenomeLoc loc2 = VariantContextUtils.getLocation(genomeLocParser,vc2);
|
||||||
|
|
||||||
if (!loc1.onSameContig(loc2))
|
if (!loc1.onSameContig(loc2))
|
||||||
throw new ReviewedStingException("Can only merge vc1, vc2 if on the same chromosome");
|
throw new ReviewedStingException("Can only merge vc1, vc2 if on the same chromosome");
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ import org.apache.commons.jexl2.JexlContext;
|
||||||
import org.apache.commons.jexl2.MapContext;
|
import org.apache.commons.jexl2.MapContext;
|
||||||
import org.broad.tribble.util.variantcontext.Genotype;
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broad.tribble.vcf.VCFConstants;
|
import org.broad.tribble.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -49,6 +50,7 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class VariantJEXLContext implements JexlContext {
|
class VariantJEXLContext implements JexlContext {
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
// our stored variant context
|
// our stored variant context
|
||||||
private VariantContext vc;
|
private VariantContext vc;
|
||||||
|
|
||||||
|
|
@ -73,7 +75,8 @@ class VariantJEXLContext implements JexlContext {
|
||||||
x.put("homVarCount", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getHomVarCount(); }});
|
x.put("homVarCount", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getHomVarCount(); }});
|
||||||
}
|
}
|
||||||
|
|
||||||
public VariantJEXLContext(VariantContext vc) {
|
public VariantJEXLContext(GenomeLocParser genomeLocParser,VariantContext vc) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.vc = vc;
|
this.vc = vc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -119,6 +122,7 @@ class VariantJEXLContext implements JexlContext {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
|
class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
|
||||||
|
private final GenomeLocParser genomeLocParser;
|
||||||
// our variant context and/or Genotype
|
// our variant context and/or Genotype
|
||||||
private final VariantContext vc;
|
private final VariantContext vc;
|
||||||
private final Genotype g;
|
private final Genotype g;
|
||||||
|
|
@ -130,18 +134,19 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
|
||||||
private Map<VariantContextUtils.JexlVCMatchExp,Boolean> jexl;
|
private Map<VariantContextUtils.JexlVCMatchExp,Boolean> jexl;
|
||||||
|
|
||||||
|
|
||||||
public JEXLMap(Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection, VariantContext vc, Genotype g) {
|
public JEXLMap(GenomeLocParser genomeLocParser,Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection, VariantContext vc, Genotype g) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.vc = vc;
|
this.vc = vc;
|
||||||
this.g = g;
|
this.g = g;
|
||||||
initialize(jexlCollection);
|
initialize(jexlCollection);
|
||||||
}
|
}
|
||||||
|
|
||||||
public JEXLMap(Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection, VariantContext vc) {
|
public JEXLMap(GenomeLocParser genomeLocParser,Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection, VariantContext vc) {
|
||||||
this(jexlCollection, vc, null);
|
this(genomeLocParser,jexlCollection, vc, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public JEXLMap(Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection, Genotype g) {
|
public JEXLMap(GenomeLocParser genomeLocParser,Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection, Genotype g) {
|
||||||
this(jexlCollection, null, g);
|
this(genomeLocParser,jexlCollection, null, g);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void initialize(Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection) {
|
private void initialize(Collection<VariantContextUtils.JexlVCMatchExp> jexlCollection) {
|
||||||
|
|
@ -159,14 +164,14 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
|
||||||
private void createContext() {
|
private void createContext() {
|
||||||
if ( g == null ) {
|
if ( g == null ) {
|
||||||
// todo -- remove dependancy on g to the entire system
|
// todo -- remove dependancy on g to the entire system
|
||||||
jContext = new VariantJEXLContext(vc);
|
jContext = new VariantJEXLContext(genomeLocParser,vc);
|
||||||
} else {
|
} else {
|
||||||
Map<String, Object> infoMap = new HashMap<String, Object>();
|
Map<String, Object> infoMap = new HashMap<String, Object>();
|
||||||
|
|
||||||
if ( vc != null ) {
|
if ( vc != null ) {
|
||||||
// create a mapping of what we know about the variant context, its Chromosome, positions, etc.
|
// create a mapping of what we know about the variant context, its Chromosome, positions, etc.
|
||||||
infoMap.put("CHROM", VariantContextUtils.getLocation(vc).getContig());
|
infoMap.put("CHROM", VariantContextUtils.getLocation(genomeLocParser,vc).getContig());
|
||||||
infoMap.put("POS", String.valueOf(VariantContextUtils.getLocation(vc).getStart()));
|
infoMap.put("POS", String.valueOf(VariantContextUtils.getLocation(genomeLocParser,vc).getStart()));
|
||||||
infoMap.put("TYPE", vc.getType().toString());
|
infoMap.put("TYPE", vc.getType().toString());
|
||||||
infoMap.put("QUAL", String.valueOf(vc.getPhredScaledQual()));
|
infoMap.put("QUAL", String.valueOf(vc.getPhredScaledQual()));
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
|
|
@ -47,7 +48,7 @@ public class AllLocusView extends LocusView {
|
||||||
public AllLocusView(LocusShardDataProvider provider) {
|
public AllLocusView(LocusShardDataProvider provider) {
|
||||||
super( provider );
|
super( provider );
|
||||||
// Seed the state tracking members with the first possible seek position and the first possible locus context.
|
// Seed the state tracking members with the first possible seek position and the first possible locus context.
|
||||||
locusIterator = new GenomeLocusIterator(provider.getLocus());
|
locusIterator = new GenomeLocusIterator(genomeLocParser,provider.getLocus());
|
||||||
if( locusIterator.hasNext() ) {
|
if( locusIterator.hasNext() ) {
|
||||||
// cache next position and next alignment context
|
// cache next position and next alignment context
|
||||||
nextPosition = locusIterator.next();
|
nextPosition = locusIterator.next();
|
||||||
|
|
|
||||||
|
|
@ -97,9 +97,9 @@ public class LocusReferenceView extends ReferenceView {
|
||||||
}
|
}
|
||||||
|
|
||||||
if(bounds != null) {
|
if(bounds != null) {
|
||||||
long expandedStart = getWindowStart( bounds );
|
int expandedStart = getWindowStart( bounds );
|
||||||
long expandedStop = getWindowStop( bounds );
|
int expandedStop = getWindowStop( bounds );
|
||||||
initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop));
|
initializeReferenceSequence(genomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -123,12 +123,12 @@ public class LocusReferenceView extends ReferenceView {
|
||||||
if ( loc.getContigIndex() != bounds.getContigIndex() )
|
if ( loc.getContigIndex() != bounds.getContigIndex() )
|
||||||
throw new ReviewedStingException("Illegal attempt to expand reference view bounds to accommodate location on a different contig.");
|
throw new ReviewedStingException("Illegal attempt to expand reference view bounds to accommodate location on a different contig.");
|
||||||
|
|
||||||
bounds = GenomeLocParser.createGenomeLoc(bounds.getContigIndex(),
|
bounds = genomeLocParser.createGenomeLoc(bounds.getContig(),
|
||||||
Math.min(bounds.getStart(),loc.getStart()),
|
Math.min(bounds.getStart(),loc.getStart()),
|
||||||
Math.max(bounds.getStop(),loc.getStop()));
|
Math.max(bounds.getStop(),loc.getStop()));
|
||||||
long expandedStart = getWindowStart( bounds );
|
int expandedStart = getWindowStart( bounds );
|
||||||
long expandedStop = getWindowStop( bounds );
|
int expandedStop = getWindowStop( bounds );
|
||||||
initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop));
|
initializeReferenceSequence(genomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -137,8 +137,8 @@ public class LocusReferenceView extends ReferenceView {
|
||||||
*/
|
*/
|
||||||
private void initializeBounds(LocusShardDataProvider provider) {
|
private void initializeBounds(LocusShardDataProvider provider) {
|
||||||
if(provider.getLocus() != null) {
|
if(provider.getLocus() != null) {
|
||||||
long sequenceLength = reference.getSequenceDictionary().getSequence(provider.getLocus().getContig()).getSequenceLength();
|
int sequenceLength = reference.getSequenceDictionary().getSequence(provider.getLocus().getContig()).getSequenceLength();
|
||||||
bounds = GenomeLocParser.createGenomeLoc(provider.getLocus().getContig(),
|
bounds = genomeLocParser.createGenomeLoc(provider.getLocus().getContig(),
|
||||||
Math.max(provider.getLocus().getStart(),1),
|
Math.max(provider.getLocus().getStart(),1),
|
||||||
Math.min(provider.getLocus().getStop(),sequenceLength));
|
Math.min(provider.getLocus().getStop(),sequenceLength));
|
||||||
}
|
}
|
||||||
|
|
@ -155,10 +155,10 @@ public class LocusReferenceView extends ReferenceView {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected GenomeLoc trimToBounds(GenomeLoc l) {
|
protected GenomeLoc trimToBounds(GenomeLoc l) {
|
||||||
long expandedStart = getWindowStart( bounds );
|
int expandedStart = getWindowStart( bounds );
|
||||||
long expandedStop = getWindowStop( bounds );
|
int expandedStop = getWindowStop( bounds );
|
||||||
if ( l.getStart() < expandedStart ) l = GenomeLocParser.setStart(l, expandedStart);
|
if ( l.getStart() < expandedStart ) l = genomeLocParser.setStart(l, expandedStart);
|
||||||
if ( l.getStop() > expandedStop ) l = GenomeLocParser.setStop(l, expandedStop);
|
if ( l.getStop() > expandedStop ) l = genomeLocParser.setStop(l, expandedStop);
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -186,7 +186,7 @@ public class LocusReferenceView extends ReferenceView {
|
||||||
public ReferenceContext getReferenceContext( GenomeLoc genomeLoc ) {
|
public ReferenceContext getReferenceContext( GenomeLoc genomeLoc ) {
|
||||||
//validateLocation( genomeLoc );
|
//validateLocation( genomeLoc );
|
||||||
|
|
||||||
GenomeLoc window = GenomeLocParser.createGenomeLoc( genomeLoc.getContig(), getWindowStart(genomeLoc), getWindowStop(genomeLoc) );
|
GenomeLoc window = genomeLocParser.createGenomeLoc( genomeLoc.getContig(), getWindowStart(genomeLoc), getWindowStop(genomeLoc) );
|
||||||
|
|
||||||
int refStart = -1;
|
int refStart = -1;
|
||||||
if (bounds != null) {
|
if (bounds != null) {
|
||||||
|
|
@ -200,7 +200,7 @@ public class LocusReferenceView extends ReferenceView {
|
||||||
}
|
}
|
||||||
|
|
||||||
int len = (int)window.size();
|
int len = (int)window.size();
|
||||||
return new ReferenceContext( genomeLoc, window, new Provider(refStart, len));
|
return new ReferenceContext( genomeLocParser, genomeLoc, window, new Provider(refStart, len));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -228,7 +228,7 @@ public class LocusReferenceView extends ReferenceView {
|
||||||
* @param locus The locus to expand.
|
* @param locus The locus to expand.
|
||||||
* @return The expanded window.
|
* @return The expanded window.
|
||||||
*/
|
*/
|
||||||
private long getWindowStart( GenomeLoc locus ) {
|
private int getWindowStart( GenomeLoc locus ) {
|
||||||
// If the locus is not within the bounds of the contig it allegedly maps to, expand only as much as we can.
|
// If the locus is not within the bounds of the contig it allegedly maps to, expand only as much as we can.
|
||||||
if(locus.getStart() < 1) return 1;
|
if(locus.getStart() < 1) return 1;
|
||||||
// if(locus.getStart() < 1) return locus.getStart();
|
// if(locus.getStart() < 1) return locus.getStart();
|
||||||
|
|
@ -240,9 +240,9 @@ public class LocusReferenceView extends ReferenceView {
|
||||||
* @param locus The locus to expand.
|
* @param locus The locus to expand.
|
||||||
* @return The expanded window.
|
* @return The expanded window.
|
||||||
*/
|
*/
|
||||||
private long getWindowStop( GenomeLoc locus ) {
|
private int getWindowStop( GenomeLoc locus ) {
|
||||||
// If the locus is not within the bounds of the contig it allegedly maps to, expand only as much as we can.
|
// If the locus is not within the bounds of the contig it allegedly maps to, expand only as much as we can.
|
||||||
long sequenceLength = reference.getSequenceDictionary().getSequence(locus.getContig()).getSequenceLength();
|
int sequenceLength = reference.getSequenceDictionary().getSequence(locus.getContig()).getSequenceLength();
|
||||||
if(locus.getStop() > sequenceLength) return sequenceLength;
|
if(locus.getStop() > sequenceLength) return sequenceLength;
|
||||||
return Math.min( locus.getStop() + windowStop, sequenceLength );
|
return Math.min( locus.getStop() + windowStop, sequenceLength );
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.ReadProperties;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Presents data sharded by locus to the traversal engine.
|
* Presents data sharded by locus to the traversal engine.
|
||||||
|
|
@ -22,6 +23,11 @@ public class LocusShardDataProvider extends ShardDataProvider {
|
||||||
*/
|
*/
|
||||||
private final ReadProperties sourceInfo;
|
private final ReadProperties sourceInfo;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The parser, used to create and build new GenomeLocs.
|
||||||
|
*/
|
||||||
|
private final GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The particular locus for which data is provided. Should be contained within shard.getGenomeLocs().
|
* The particular locus for which data is provided. Should be contained within shard.getGenomeLocs().
|
||||||
*/
|
*/
|
||||||
|
|
@ -37,9 +43,10 @@ public class LocusShardDataProvider extends ShardDataProvider {
|
||||||
* @param shard The chunk of data over which traversals happen.
|
* @param shard The chunk of data over which traversals happen.
|
||||||
* @param reference A getter for a section of the reference.
|
* @param reference A getter for a section of the reference.
|
||||||
*/
|
*/
|
||||||
public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
||||||
super(shard,reference,rods);
|
super(shard,genomeLocParser,reference,rods);
|
||||||
this.sourceInfo = sourceInfo;
|
this.sourceInfo = sourceInfo;
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.locus = locus;
|
this.locus = locus;
|
||||||
this.locusIterator = locusIterator;
|
this.locusIterator = locusIterator;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.DownsampleType;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
@ -33,6 +34,11 @@ public abstract class LocusView extends LocusIterator implements View {
|
||||||
*/
|
*/
|
||||||
protected GenomeLoc locus;
|
protected GenomeLoc locus;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The GenomeLocParser, used to create new genome locs.
|
||||||
|
*/
|
||||||
|
protected GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Source info for this view. Informs the class about downsampling requirements.
|
* Source info for this view. Informs the class about downsampling requirements.
|
||||||
*/
|
*/
|
||||||
|
|
@ -53,6 +59,7 @@ public abstract class LocusView extends LocusIterator implements View {
|
||||||
this.locus = provider.getLocus();
|
this.locus = provider.getLocus();
|
||||||
|
|
||||||
this.sourceInfo = provider.getSourceInfo();
|
this.sourceInfo = provider.getSourceInfo();
|
||||||
|
this.genomeLocParser = provider.getGenomeLocParser();
|
||||||
this.loci = provider.getLocusIterator();
|
this.loci = provider.getLocusIterator();
|
||||||
|
|
||||||
seedNextLocus();
|
seedNextLocus();
|
||||||
|
|
|
||||||
|
|
@ -76,7 +76,7 @@ public class ReadBasedReferenceOrderedView implements View {
|
||||||
/** stores a window of data, dropping RODs if we've passed the new reads start point. */
|
/** stores a window of data, dropping RODs if we've passed the new reads start point. */
|
||||||
class WindowedData {
|
class WindowedData {
|
||||||
// the queue of possibly in-frame RODs; RODs are removed as soon as they are out of scope
|
// the queue of possibly in-frame RODs; RODs are removed as soon as they are out of scope
|
||||||
private final TreeMap<Long, RODMetaDataContainer> mapping = new TreeMap<Long, RODMetaDataContainer>();
|
private final TreeMap<Integer, RODMetaDataContainer> mapping = new TreeMap<Integer, RODMetaDataContainer>();
|
||||||
|
|
||||||
// our current location from the last read we processed
|
// our current location from the last read we processed
|
||||||
private GenomeLoc currentLoc;
|
private GenomeLoc currentLoc;
|
||||||
|
|
@ -109,16 +109,16 @@ class WindowedData {
|
||||||
*/
|
*/
|
||||||
private void getStates(ShardDataProvider provider, SAMRecord rec) {
|
private void getStates(ShardDataProvider provider, SAMRecord rec) {
|
||||||
|
|
||||||
long stop = Integer.MAX_VALUE;
|
int stop = Integer.MAX_VALUE;
|
||||||
// figure out the appropriate alignment stop
|
// figure out the appropriate alignment stop
|
||||||
if (provider.hasReference()) {
|
if (provider.hasReference()) {
|
||||||
stop = provider.getReference().getSequenceDictionary().getSequence(rec.getReferenceIndex()).getSequenceLength();
|
stop = provider.getReference().getSequenceDictionary().getSequence(rec.getReferenceIndex()).getSequenceLength();
|
||||||
}
|
}
|
||||||
|
|
||||||
// calculate the range of positions we need to look at
|
// calculate the range of positions we need to look at
|
||||||
GenomeLoc range = GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(),
|
GenomeLoc range = provider.getGenomeLocParser().createGenomeLoc(rec.getReferenceName(),
|
||||||
rec.getAlignmentStart(),
|
rec.getAlignmentStart(),
|
||||||
stop);
|
stop);
|
||||||
states = new ArrayList<RMDDataState>();
|
states = new ArrayList<RMDDataState>();
|
||||||
if (provider != null && provider.getReferenceOrderedData() != null)
|
if (provider != null && provider.getReferenceOrderedData() != null)
|
||||||
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
|
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
|
||||||
|
|
@ -144,7 +144,7 @@ class WindowedData {
|
||||||
*/
|
*/
|
||||||
public ReadMetaDataTracker getTracker(SAMRecord rec) {
|
public ReadMetaDataTracker getTracker(SAMRecord rec) {
|
||||||
updatePosition(rec);
|
updatePosition(rec);
|
||||||
return new ReadMetaDataTracker(rec, mapping);
|
return new ReadMetaDataTracker(provider.getGenomeLocParser(), rec, mapping);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -154,7 +154,7 @@ class WindowedData {
|
||||||
*/
|
*/
|
||||||
private void updatePosition(SAMRecord rec) {
|
private void updatePosition(SAMRecord rec) {
|
||||||
if (states == null) getStates(this.provider, rec);
|
if (states == null) getStates(this.provider, rec);
|
||||||
currentLoc = GenomeLocParser.createGenomeLoc(rec);
|
currentLoc = provider.getGenomeLocParser().createGenomeLoc(rec);
|
||||||
|
|
||||||
// flush the queue looking for records we've passed over
|
// flush the queue looking for records we've passed over
|
||||||
while (mapping.size() > 0 && mapping.firstKey() < currentLoc.getStart())
|
while (mapping.size() > 0 && mapping.firstKey() < currentLoc.getStart())
|
||||||
|
|
|
||||||
|
|
@ -67,10 +67,10 @@ public class ReadReferenceView extends ReferenceView {
|
||||||
}
|
}
|
||||||
|
|
||||||
public ReferenceContext getReferenceContext( SAMRecord read ) {
|
public ReferenceContext getReferenceContext( SAMRecord read ) {
|
||||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(read);
|
GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
|
||||||
// byte[] bases = super.getReferenceBases(loc);
|
// byte[] bases = super.getReferenceBases(loc);
|
||||||
// return new ReferenceContext( loc, loc, bases );
|
// return new ReferenceContext( loc, loc, bases );
|
||||||
return new ReferenceContext( loc, loc, getReferenceBasesProvider(loc) );
|
return new ReferenceContext( genomeLocParser, loc, loc, getReferenceBasesProvider(loc) );
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Present data sharded by read to a traversal engine.
|
* Present data sharded by read to a traversal engine.
|
||||||
|
|
@ -26,8 +27,8 @@ public class ReadShardDataProvider extends ShardDataProvider {
|
||||||
* @param shard The chunk of data over which traversals happen.
|
* @param shard The chunk of data over which traversals happen.
|
||||||
* @param reference A getter for a section of the reference.
|
* @param reference A getter for a section of the reference.
|
||||||
*/
|
*/
|
||||||
public ReadShardDataProvider(Shard shard, StingSAMIterator reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
public ReadShardDataProvider(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
||||||
super(shard,reference,rods);
|
super(shard,genomeLocParser,reference,rods);
|
||||||
this.reads = reads;
|
this.reads = reads;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,11 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
* A view into the reference backing this shard.
|
* A view into the reference backing this shard.
|
||||||
*/
|
*/
|
||||||
public class ReferenceView implements View {
|
public class ReferenceView implements View {
|
||||||
|
/**
|
||||||
|
* The parser, used to create and parse GenomeLocs.
|
||||||
|
*/
|
||||||
|
protected final GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The source of reference data.
|
* The source of reference data.
|
||||||
*/
|
*/
|
||||||
|
|
@ -38,6 +43,7 @@ public class ReferenceView implements View {
|
||||||
* @param provider
|
* @param provider
|
||||||
*/
|
*/
|
||||||
public ReferenceView( ShardDataProvider provider ) {
|
public ReferenceView( ShardDataProvider provider ) {
|
||||||
|
this.genomeLocParser = provider.getGenomeLocParser();
|
||||||
this.reference = provider.getReference();
|
this.reference = provider.getReference();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -68,7 +74,7 @@ public class ReferenceView implements View {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected byte[] getReferenceBases( SAMRecord read ) {
|
protected byte[] getReferenceBases( SAMRecord read ) {
|
||||||
return getReferenceBases(GenomeLocParser.createGenomeLoc(read));
|
return getReferenceBases(genomeLocParser.createGenomeLoc(read));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -80,7 +80,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
||||||
// the iterator to immediately before it, so that it can be added to the merging iterator primed for
|
// the iterator to immediately before it, so that it can be added to the merging iterator primed for
|
||||||
// next() to return the first real ROD in this shard
|
// next() to return the first real ROD in this shard
|
||||||
LocationAwareSeekableRODIterator it = dataSource.seek(provider.getShard());
|
LocationAwareSeekableRODIterator it = dataSource.seek(provider.getShard());
|
||||||
it.seekForward(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart()-1));
|
it.seekForward(genomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart()-1));
|
||||||
|
|
||||||
states.add(new ReferenceOrderedDataState(dataSource,it));
|
states.add(new ReferenceOrderedDataState(dataSource,it));
|
||||||
|
|
||||||
|
|
@ -128,7 +128,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
||||||
tracker = createTracker(allTracksHere);
|
tracker = createTracker(allTracksHere);
|
||||||
|
|
||||||
GenomeLoc rodSite = datum.getLocation();
|
GenomeLoc rodSite = datum.getLocation();
|
||||||
GenomeLoc site = GenomeLocParser.createGenomeLoc( rodSite.getContigIndex(), rodSite.getStart(), rodSite.getStart());
|
GenomeLoc site = genomeLocParser.createGenomeLoc( rodSite.getContig(), rodSite.getStart(), rodSite.getStart());
|
||||||
|
|
||||||
if ( DEBUG ) System.out.printf("rodLocusView.next() is at %s%n", site);
|
if ( DEBUG ) System.out.printf("rodLocusView.next() is at %s%n", site);
|
||||||
|
|
||||||
|
|
@ -167,7 +167,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
||||||
*/
|
*/
|
||||||
private long getSkippedBases( GenomeLoc currentPos ) {
|
private long getSkippedBases( GenomeLoc currentPos ) {
|
||||||
// the minus - is because if lastLoc == null, you haven't yet seen anything in this interval, so it should also be counted as skipped
|
// the minus - is because if lastLoc == null, you haven't yet seen anything in this interval, so it should also be counted as skipped
|
||||||
Long compStop = lastLoc == null ? locus.getStart() - 1 : lastLoc.getStop();
|
Integer compStop = lastLoc == null ? locus.getStart() - 1 : lastLoc.getStop();
|
||||||
long skippedBases = currentPos.getStart() - compStop - 1;
|
long skippedBases = currentPos.getStart() - compStop - 1;
|
||||||
|
|
||||||
if ( skippedBases < -1 ) { // minus 1 value is ok
|
if ( skippedBases < -1 ) { // minus 1 value is ok
|
||||||
|
|
@ -182,7 +182,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public GenomeLoc getLocOneBeyondShard() {
|
public GenomeLoc getLocOneBeyondShard() {
|
||||||
return GenomeLocParser.createGenomeLoc(locus.getContigIndex(),locus.getStop()+1);
|
return genomeLocParser.createGenomeLoc(locus.getContig(),locus.getStop()+1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
@ -37,6 +38,11 @@ public abstract class ShardDataProvider {
|
||||||
*/
|
*/
|
||||||
private final Shard shard;
|
private final Shard shard;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The parser, used to create and build new GenomeLocs.
|
||||||
|
*/
|
||||||
|
private final GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provider of reference data for this particular shard.
|
* Provider of reference data for this particular shard.
|
||||||
*/
|
*/
|
||||||
|
|
@ -47,6 +53,14 @@ public abstract class ShardDataProvider {
|
||||||
*/
|
*/
|
||||||
private final Collection<ReferenceOrderedDataSource> referenceOrderedData;
|
private final Collection<ReferenceOrderedDataSource> referenceOrderedData;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the GenomeLocParser associated with this traversal.
|
||||||
|
* @return The associated parser.
|
||||||
|
*/
|
||||||
|
public GenomeLocParser getGenomeLocParser() {
|
||||||
|
return genomeLocParser;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the shard associated with this data provider.
|
* Retrieves the shard associated with this data provider.
|
||||||
* @return The shard associated with this data provider.
|
* @return The shard associated with this data provider.
|
||||||
|
|
@ -86,8 +100,9 @@ public abstract class ShardDataProvider {
|
||||||
* @param shard The chunk of data over which traversals happen.
|
* @param shard The chunk of data over which traversals happen.
|
||||||
* @param reference A getter for a section of the reference.
|
* @param reference A getter for a section of the reference.
|
||||||
*/
|
*/
|
||||||
public ShardDataProvider(Shard shard,IndexedFastaSequenceFile reference,Collection<ReferenceOrderedDataSource> rods) {
|
public ShardDataProvider(Shard shard,GenomeLocParser genomeLocParser,IndexedFastaSequenceFile reference,Collection<ReferenceOrderedDataSource> rods) {
|
||||||
this.shard = shard;
|
this.shard = shard;
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.reference = reference;
|
this.reference = reference;
|
||||||
this.referenceOrderedData = rods;
|
this.referenceOrderedData = rods;
|
||||||
}
|
}
|
||||||
|
|
@ -96,8 +111,8 @@ public abstract class ShardDataProvider {
|
||||||
* Skeletal, package protected constructor for unit tests which require a ShardDataProvider.
|
* Skeletal, package protected constructor for unit tests which require a ShardDataProvider.
|
||||||
* @param shard the shard
|
* @param shard the shard
|
||||||
*/
|
*/
|
||||||
ShardDataProvider(Shard shard) {
|
ShardDataProvider(Shard shard,GenomeLocParser genomeLocParser) {
|
||||||
this(shard,null,null);
|
this(shard,genomeLocParser,null,null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
public class IntervalSharder {
|
public class IntervalSharder {
|
||||||
private static Logger logger = Logger.getLogger(IntervalSharder.class);
|
private static Logger logger = Logger.getLogger(IntervalSharder.class);
|
||||||
|
|
||||||
public static Iterator<FilePointer> shardIntervals(final SAMDataSource dataSource, final List<GenomeLoc> loci) {
|
public static Iterator<FilePointer> shardIntervals(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
|
||||||
return new FilePointerIterator(dataSource,loci);
|
return new FilePointerIterator(dataSource,loci);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -55,11 +55,13 @@ public class IntervalSharder {
|
||||||
*/
|
*/
|
||||||
private static class FilePointerIterator implements Iterator<FilePointer> {
|
private static class FilePointerIterator implements Iterator<FilePointer> {
|
||||||
final SAMDataSource dataSource;
|
final SAMDataSource dataSource;
|
||||||
|
final GenomeLocSortedSet loci;
|
||||||
final PeekableIterator<GenomeLoc> locusIterator;
|
final PeekableIterator<GenomeLoc> locusIterator;
|
||||||
final Queue<FilePointer> cachedFilePointers = new LinkedList<FilePointer>();
|
final Queue<FilePointer> cachedFilePointers = new LinkedList<FilePointer>();
|
||||||
|
|
||||||
public FilePointerIterator(final SAMDataSource dataSource, final List<GenomeLoc> loci) {
|
public FilePointerIterator(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
|
||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
|
this.loci = loci;
|
||||||
locusIterator = new PeekableIterator<GenomeLoc>(loci.iterator());
|
locusIterator = new PeekableIterator<GenomeLoc>(loci.iterator());
|
||||||
advance();
|
advance();
|
||||||
}
|
}
|
||||||
|
|
@ -82,7 +84,7 @@ public class IntervalSharder {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void advance() {
|
private void advance() {
|
||||||
List<GenomeLoc> nextBatch = new ArrayList<GenomeLoc>();
|
GenomeLocSortedSet nextBatch = new GenomeLocSortedSet(loci.getGenomeLocParser());
|
||||||
String contig = null;
|
String contig = null;
|
||||||
|
|
||||||
while(locusIterator.hasNext() && nextBatch.isEmpty()) {
|
while(locusIterator.hasNext() && nextBatch.isEmpty()) {
|
||||||
|
|
@ -99,7 +101,7 @@ public class IntervalSharder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<FilePointer> shardIntervalsOnContig(final SAMDataSource dataSource, final String contig, final List<GenomeLoc> loci) {
|
private static List<FilePointer> shardIntervalsOnContig(final SAMDataSource dataSource, final String contig, final GenomeLocSortedSet loci) {
|
||||||
// Gather bins for the given loci, splitting loci as necessary so that each falls into exactly one lowest-level bin.
|
// Gather bins for the given loci, splitting loci as necessary so that each falls into exactly one lowest-level bin.
|
||||||
List<FilePointer> filePointers = new ArrayList<FilePointer>();
|
List<FilePointer> filePointers = new ArrayList<FilePointer>();
|
||||||
FilePointer lastFilePointer = null;
|
FilePointer lastFilePointer = null;
|
||||||
|
|
@ -171,7 +173,7 @@ public class IntervalSharder {
|
||||||
|
|
||||||
final int regionStop = Math.min(locationStop,binStart-1);
|
final int regionStop = Math.min(locationStop,binStart-1);
|
||||||
|
|
||||||
GenomeLoc subset = GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,regionStop);
|
GenomeLoc subset = loci.getGenomeLocParser().createGenomeLoc(location.getContig(),locationStart,regionStop);
|
||||||
lastFilePointer = new FilePointer(subset);
|
lastFilePointer = new FilePointer(subset);
|
||||||
|
|
||||||
locationStart = regionStop + 1;
|
locationStart = regionStop + 1;
|
||||||
|
|
@ -184,7 +186,7 @@ public class IntervalSharder {
|
||||||
lastBAMOverlap = null;
|
lastBAMOverlap = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
GenomeLoc subset = GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,locationStop);
|
GenomeLoc subset = loci.getGenomeLocParser().createGenomeLoc(location.getContig(),locationStart,locationStop);
|
||||||
filePointers.add(new FilePointer(subset));
|
filePointers.add(new FilePointer(subset));
|
||||||
|
|
||||||
locationStart = locationStop + 1;
|
locationStart = locationStop + 1;
|
||||||
|
|
@ -195,7 +197,7 @@ public class IntervalSharder {
|
||||||
|
|
||||||
// The start of the region overlaps the bin. Add the overlapping subset.
|
// The start of the region overlaps the bin. Add the overlapping subset.
|
||||||
final int regionStop = Math.min(locationStop,binStop);
|
final int regionStop = Math.min(locationStop,binStop);
|
||||||
lastFilePointer.addLocation(GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,regionStop));
|
lastFilePointer.addLocation(loci.getGenomeLocParser().createGenomeLoc(location.getContig(),locationStart,regionStop));
|
||||||
locationStart = regionStop + 1;
|
locationStart = regionStop + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -58,14 +58,14 @@ public class LocusShardStrategy implements ShardStrategy {
|
||||||
* @param reads Data source from which to load index data.
|
* @param reads Data source from which to load index data.
|
||||||
* @param locations List of locations for which to load data.
|
* @param locations List of locations for which to load data.
|
||||||
*/
|
*/
|
||||||
LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocSortedSet locations) {
|
LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocParser genomeLocParser, GenomeLocSortedSet locations) {
|
||||||
this.reads = reads;
|
this.reads = reads;
|
||||||
if(!reads.isEmpty()) {
|
if(!reads.isEmpty()) {
|
||||||
List<GenomeLoc> intervals;
|
GenomeLocSortedSet intervals;
|
||||||
if(locations == null) {
|
if(locations == null) {
|
||||||
// If no locations were passed in, shard the entire BAM file.
|
// If no locations were passed in, shard the entire BAM file.
|
||||||
SAMFileHeader header = reads.getHeader();
|
SAMFileHeader header = reads.getHeader();
|
||||||
intervals = new ArrayList<GenomeLoc>();
|
intervals = new GenomeLocSortedSet(genomeLocParser);
|
||||||
|
|
||||||
for(SAMSequenceRecord readsSequenceRecord: header.getSequenceDictionary().getSequences()) {
|
for(SAMSequenceRecord readsSequenceRecord: header.getSequenceDictionary().getSequences()) {
|
||||||
// Check this sequence against the reference sequence dictionary.
|
// Check this sequence against the reference sequence dictionary.
|
||||||
|
|
@ -73,12 +73,12 @@ public class LocusShardStrategy implements ShardStrategy {
|
||||||
SAMSequenceRecord refSequenceRecord = reference.getSequenceDictionary().getSequence(readsSequenceRecord.getSequenceName());
|
SAMSequenceRecord refSequenceRecord = reference.getSequenceDictionary().getSequence(readsSequenceRecord.getSequenceName());
|
||||||
if(refSequenceRecord != null) {
|
if(refSequenceRecord != null) {
|
||||||
final int length = Math.min(readsSequenceRecord.getSequenceLength(),refSequenceRecord.getSequenceLength());
|
final int length = Math.min(readsSequenceRecord.getSequenceLength(),refSequenceRecord.getSequenceLength());
|
||||||
intervals.add(GenomeLocParser.createGenomeLoc(readsSequenceRecord.getSequenceName(),1,length));
|
intervals.add(genomeLocParser.createGenomeLoc(readsSequenceRecord.getSequenceName(),1,length));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
intervals = locations.toList();
|
intervals = locations;
|
||||||
|
|
||||||
this.filePointerIterator = IntervalSharder.shardIntervals(this.reads,intervals);
|
this.filePointerIterator = IntervalSharder.shardIntervals(this.reads,intervals);
|
||||||
}
|
}
|
||||||
|
|
@ -89,15 +89,15 @@ public class LocusShardStrategy implements ShardStrategy {
|
||||||
for(SAMSequenceRecord refSequenceRecord: reference.getSequenceDictionary().getSequences()) {
|
for(SAMSequenceRecord refSequenceRecord: reference.getSequenceDictionary().getSequences()) {
|
||||||
for(int shardStart = 1; shardStart <= refSequenceRecord.getSequenceLength(); shardStart += maxShardSize) {
|
for(int shardStart = 1; shardStart <= refSequenceRecord.getSequenceLength(); shardStart += maxShardSize) {
|
||||||
final int shardStop = Math.min(shardStart+maxShardSize-1, refSequenceRecord.getSequenceLength());
|
final int shardStop = Math.min(shardStart+maxShardSize-1, refSequenceRecord.getSequenceLength());
|
||||||
filePointers.add(new FilePointer(GenomeLocParser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)));
|
filePointers.add(new FilePointer(genomeLocParser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
for(GenomeLoc interval: locations) {
|
for(GenomeLoc interval: locations) {
|
||||||
while(interval.size() > maxShardSize) {
|
while(interval.size() > maxShardSize) {
|
||||||
filePointers.add(new FilePointer(GenomeLocParser.createGenomeLoc(interval.getContig(),interval.getStart(),interval.getStart()+maxShardSize-1)));
|
filePointers.add(new FilePointer(locations.getGenomeLocParser().createGenomeLoc(interval.getContig(),interval.getStart(),interval.getStart()+maxShardSize-1)));
|
||||||
interval = GenomeLocParser.createGenomeLoc(interval.getContig(),interval.getStart()+maxShardSize,interval.getStop());
|
interval = locations.getGenomeLocParser().createGenomeLoc(interval.getContig(),interval.getStart()+maxShardSize,interval.getStop());
|
||||||
}
|
}
|
||||||
filePointers.add(new FilePointer(interval));
|
filePointers.add(new FilePointer(interval));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -90,7 +90,7 @@ public class ReadShardStrategy implements ShardStrategy {
|
||||||
this.locations = locations;
|
this.locations = locations;
|
||||||
|
|
||||||
if(locations != null)
|
if(locations != null)
|
||||||
filePointerIterator = IntervalSharder.shardIntervals(this.dataSource,locations.toList());
|
filePointerIterator = IntervalSharder.shardIntervals(this.dataSource,locations);
|
||||||
else
|
else
|
||||||
filePointerIterator = filePointers.iterator();
|
filePointerIterator = filePointers.iterator();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||||
|
|
@ -50,8 +51,8 @@ public class ShardStrategyFactory {
|
||||||
* @param startingSize the starting size
|
* @param startingSize the starting size
|
||||||
* @return a shard strategy capable of dividing input data into shards.
|
* @return a shard strategy capable of dividing input data into shards.
|
||||||
*/
|
*/
|
||||||
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize) {
|
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser) {
|
||||||
return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, -1L);
|
return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, genomeLocParser, -1L);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -64,10 +65,10 @@ public class ShardStrategyFactory {
|
||||||
* @param startingSize the starting size
|
* @param startingSize the starting size
|
||||||
* @return a shard strategy capable of dividing input data into shards.
|
* @return a shard strategy capable of dividing input data into shards.
|
||||||
*/
|
*/
|
||||||
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, long limitByCount) {
|
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser, long limitByCount) {
|
||||||
switch (strat) {
|
switch (strat) {
|
||||||
case LOCUS_EXPERIMENTAL:
|
case LOCUS_EXPERIMENTAL:
|
||||||
return new LocusShardStrategy(readsDataSource,referenceDataSource,null);
|
return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,null);
|
||||||
case READS_EXPERIMENTAL:
|
case READS_EXPERIMENTAL:
|
||||||
return new ReadShardStrategy(readsDataSource,null);
|
return new ReadShardStrategy(readsDataSource,null);
|
||||||
default:
|
default:
|
||||||
|
|
@ -87,8 +88,8 @@ public class ShardStrategyFactory {
|
||||||
* @param startingSize the starting size
|
* @param startingSize the starting size
|
||||||
* @return a shard strategy capable of dividing input data into shards.
|
* @return a shard strategy capable of dividing input data into shards.
|
||||||
*/
|
*/
|
||||||
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst) {
|
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser, GenomeLocSortedSet lst) {
|
||||||
return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, lst, -1l);
|
return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, genomeLocParser, lst, -1l);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -102,10 +103,10 @@ public class ShardStrategyFactory {
|
||||||
* @param startingSize the starting size
|
* @param startingSize the starting size
|
||||||
* @return A strategy for shattering this data.
|
* @return A strategy for shattering this data.
|
||||||
*/
|
*/
|
||||||
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst, long limitDataCount) {
|
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser, GenomeLocSortedSet lst, long limitDataCount) {
|
||||||
switch (strat) {
|
switch (strat) {
|
||||||
case LOCUS_EXPERIMENTAL:
|
case LOCUS_EXPERIMENTAL:
|
||||||
return new LocusShardStrategy(readsDataSource,referenceDataSource,lst);
|
return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,lst);
|
||||||
case READS_EXPERIMENTAL:
|
case READS_EXPERIMENTAL:
|
||||||
return new ReadShardStrategy(readsDataSource,lst);
|
return new ReadShardStrategy(readsDataSource,lst);
|
||||||
default:
|
default:
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import org.broad.tribble.FeatureSource;
|
import org.broad.tribble.FeatureSource;
|
||||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||||
|
|
@ -10,6 +11,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -47,12 +49,12 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
||||||
* Create a new reference-ordered data source.
|
* Create a new reference-ordered data source.
|
||||||
* @param rod the reference ordered data
|
* @param rod the reference ordered data
|
||||||
*/
|
*/
|
||||||
public ReferenceOrderedDataSource( RMDTrack rod, boolean flashbackData ) {
|
public ReferenceOrderedDataSource(SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, RMDTrack rod, boolean flashbackData ) {
|
||||||
this.rod = rod;
|
this.rod = rod;
|
||||||
if (rod.supportsQuery())
|
if (rod.supportsQuery())
|
||||||
iteratorPool = new ReferenceOrderedQueryDataPool(new RMDTrackBuilder(),rod);
|
iteratorPool = new ReferenceOrderedQueryDataPool(sequenceDictionary,genomeLocParser,new RMDTrackBuilder(),rod);
|
||||||
else
|
else
|
||||||
iteratorPool = new ReferenceOrderedDataPool( rod, flashbackData );
|
iteratorPool = new ReferenceOrderedDataPool(sequenceDictionary,genomeLocParser,rod, flashbackData );
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -110,7 +112,8 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
||||||
class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> {
|
class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> {
|
||||||
private final RMDTrack rod;
|
private final RMDTrack rod;
|
||||||
boolean flashbackData = false;
|
boolean flashbackData = false;
|
||||||
public ReferenceOrderedDataPool( RMDTrack rod, boolean flashbackData ) {
|
public ReferenceOrderedDataPool( SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, RMDTrack rod, boolean flashbackData ) {
|
||||||
|
super(sequenceDictionary,genomeLocParser);
|
||||||
this.flashbackData = flashbackData;
|
this.flashbackData = flashbackData;
|
||||||
this.rod = rod;
|
this.rod = rod;
|
||||||
}
|
}
|
||||||
|
|
@ -121,7 +124,7 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
|
||||||
* @return The newly created resource.
|
* @return The newly created resource.
|
||||||
*/
|
*/
|
||||||
public LocationAwareSeekableRODIterator createNewResource() {
|
public LocationAwareSeekableRODIterator createNewResource() {
|
||||||
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(rod.getIterator());
|
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(sequenceDictionary,genomeLocParser,rod.getIterator());
|
||||||
return (flashbackData) ? new FlashBackIterator(iter) : iter;
|
return (flashbackData) ? new FlashBackIterator(iter) : iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -134,7 +137,7 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
|
||||||
*/
|
*/
|
||||||
public LocationAwareSeekableRODIterator selectBestExistingResource( DataStreamSegment segment, List<LocationAwareSeekableRODIterator> resources ) {
|
public LocationAwareSeekableRODIterator selectBestExistingResource( DataStreamSegment segment, List<LocationAwareSeekableRODIterator> resources ) {
|
||||||
if(segment instanceof MappedStreamSegment) {
|
if(segment instanceof MappedStreamSegment) {
|
||||||
GenomeLoc position = ((MappedStreamSegment)segment).getFirstLocation();
|
GenomeLoc position = ((MappedStreamSegment)segment).getLocation();
|
||||||
|
|
||||||
for( LocationAwareSeekableRODIterator RODIterator : resources ) {
|
for( LocationAwareSeekableRODIterator RODIterator : resources ) {
|
||||||
|
|
||||||
|
|
@ -178,14 +181,14 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
|
||||||
* a data pool for the new query based RODs
|
* a data pool for the new query based RODs
|
||||||
*/
|
*/
|
||||||
class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureSource, LocationAwareSeekableRODIterator> {
|
class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureSource, LocationAwareSeekableRODIterator> {
|
||||||
|
|
||||||
// the reference-ordered data itself.
|
// the reference-ordered data itself.
|
||||||
private final RMDTrack rod;
|
private final RMDTrack rod;
|
||||||
|
|
||||||
// our tribble track builder
|
// our tribble track builder
|
||||||
private final RMDTrackBuilder builder;
|
private final RMDTrackBuilder builder;
|
||||||
|
|
||||||
public ReferenceOrderedQueryDataPool( RMDTrackBuilder builder, RMDTrack rod ) {
|
public ReferenceOrderedQueryDataPool( SAMSequenceDictionary sequenceDictionary, GenomeLocParser genomeLocParser, RMDTrackBuilder builder, RMDTrack rod ) {
|
||||||
|
super(sequenceDictionary,genomeLocParser);
|
||||||
this.rod = rod;
|
this.rod = rod;
|
||||||
this.builder = builder;
|
this.builder = builder;
|
||||||
// a little bit of a hack, but it saves us from re-reading the index from the file
|
// a little bit of a hack, but it saves us from re-reading the index from the file
|
||||||
|
|
@ -209,9 +212,9 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureSource, Location
|
||||||
try {
|
try {
|
||||||
if (position instanceof MappedStreamSegment) {
|
if (position instanceof MappedStreamSegment) {
|
||||||
GenomeLoc pos = ((MappedStreamSegment) position).locus;
|
GenomeLoc pos = ((MappedStreamSegment) position).locus;
|
||||||
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(resource.query(pos.getContig(),(int) pos.getStart(), (int) pos.getStop()),rod.getName()));
|
return new SeekableRODIterator(sequenceDictionary,genomeLocParser,new FeatureToGATKFeatureIterator(genomeLocParser,resource.query(pos.getContig(),(int) pos.getStart(), (int) pos.getStop()),rod.getName()));
|
||||||
} else {
|
} else {
|
||||||
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(resource.iterator(),rod.getName()));
|
return new SeekableRODIterator(sequenceDictionary,genomeLocParser,new FeatureToGATKFeatureIterator(genomeLocParser,resource.iterator(),rod.getName()));
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new ReviewedStingException("Unable to create iterator for rod named " + rod.getName(),e);
|
throw new ReviewedStingException("Unable to create iterator for rod named " + rod.getName(),e);
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
@ -26,6 +27,16 @@ import java.util.Map;
|
||||||
* A pool of open resources, all of which can create a closeable iterator.
|
* A pool of open resources, all of which can create a closeable iterator.
|
||||||
*/
|
*/
|
||||||
abstract class ResourcePool <T,I extends Iterator> {
|
abstract class ResourcePool <T,I extends Iterator> {
|
||||||
|
/**
|
||||||
|
* Sequence dictionary.
|
||||||
|
*/
|
||||||
|
protected final SAMSequenceDictionary sequenceDictionary;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builder/parser for GenomeLocs.
|
||||||
|
*/
|
||||||
|
protected final GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* All iterators of this reference-ordered data.
|
* All iterators of this reference-ordered data.
|
||||||
*/
|
*/
|
||||||
|
|
@ -41,6 +52,11 @@ abstract class ResourcePool <T,I extends Iterator> {
|
||||||
*/
|
*/
|
||||||
private Map<I,T> resourceAssignments = new HashMap<I,T>();
|
private Map<I,T> resourceAssignments = new HashMap<I,T>();
|
||||||
|
|
||||||
|
protected ResourcePool(SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser) {
|
||||||
|
this.sequenceDictionary = sequenceDictionary;
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get an iterator whose position is before the specified location. Create a new one if none exists.
|
* Get an iterator whose position is before the specified location. Create a new one if none exists.
|
||||||
* @param segment Target position for the iterator.
|
* @param segment Target position for the iterator.
|
||||||
|
|
@ -180,36 +196,11 @@ class MappedStreamSegment implements DataStreamSegment {
|
||||||
* Retrieves the first location covered by a mapped stream segment.
|
* Retrieves the first location covered by a mapped stream segment.
|
||||||
* @return Location of the first base in this segment.
|
* @return Location of the first base in this segment.
|
||||||
*/
|
*/
|
||||||
public GenomeLoc getFirstLocation() {
|
public GenomeLoc getLocation() {
|
||||||
return GenomeLocParser.createGenomeLoc(locus.getContigIndex(),locus.getStart());
|
return locus;
|
||||||
}
|
}
|
||||||
|
|
||||||
public MappedStreamSegment(GenomeLoc locus) {
|
public MappedStreamSegment(GenomeLoc locus) {
|
||||||
this.locus = locus;
|
this.locus = locus;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Models a position within the unmapped reads in a stream of GATK input data.
|
|
||||||
*/
|
|
||||||
class UnmappedStreamSegment implements DataStreamSegment {
|
|
||||||
/**
|
|
||||||
* Where does this region start, given 0 = the position of the first unmapped read.
|
|
||||||
*/
|
|
||||||
public final long position;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* How many reads wide is this region? This size is generally treated as an upper bound.
|
|
||||||
*/
|
|
||||||
public final long size;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new target location in an unmapped read stream.
|
|
||||||
* @param position The 0-based index into the unmapped reads. Position 0 represents the first unmapped read.
|
|
||||||
* @param size the size of the segment.
|
|
||||||
*/
|
|
||||||
public UnmappedStreamSegment( long position, long size ) {
|
|
||||||
this.position = position;
|
|
||||||
this.size = size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,7 @@ import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator;
|
import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
|
|
@ -63,17 +64,22 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
/**
|
/**
|
||||||
* Runtime metrics of reads filtered, etc.
|
* Runtime metrics of reads filtered, etc.
|
||||||
*/
|
*/
|
||||||
protected final ReadMetrics readMetrics;
|
private final ReadMetrics readMetrics;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tools for parsing GenomeLocs, for verifying BAM ordering against general ordering.
|
||||||
|
*/
|
||||||
|
private final GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Identifiers for the readers driving this data source.
|
* Identifiers for the readers driving this data source.
|
||||||
*/
|
*/
|
||||||
protected final List<SAMReaderID> readerIDs;
|
private final List<SAMReaderID> readerIDs;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* How strict are the readers driving this data source.
|
* How strict are the readers driving this data source.
|
||||||
*/
|
*/
|
||||||
protected final SAMFileReader.ValidationStringency validationStringency;
|
private final SAMFileReader.ValidationStringency validationStringency;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* How far along is each reader?
|
* How far along is each reader?
|
||||||
|
|
@ -113,9 +119,10 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
* Create a new SAM data source given the supplied read metadata.
|
* Create a new SAM data source given the supplied read metadata.
|
||||||
* @param samFiles list of reads files.
|
* @param samFiles list of reads files.
|
||||||
*/
|
*/
|
||||||
public SAMDataSource(List<SAMReaderID> samFiles) {
|
public SAMDataSource(List<SAMReaderID> samFiles,GenomeLocParser genomeLocParser) {
|
||||||
this(
|
this(
|
||||||
samFiles,
|
samFiles,
|
||||||
|
genomeLocParser,
|
||||||
false,
|
false,
|
||||||
SAMFileReader.ValidationStringency.STRICT,
|
SAMFileReader.ValidationStringency.STRICT,
|
||||||
null,
|
null,
|
||||||
|
|
@ -145,6 +152,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
*/
|
*/
|
||||||
public SAMDataSource(
|
public SAMDataSource(
|
||||||
List<SAMReaderID> samFiles,
|
List<SAMReaderID> samFiles,
|
||||||
|
GenomeLocParser genomeLocParser,
|
||||||
boolean useOriginalBaseQualities,
|
boolean useOriginalBaseQualities,
|
||||||
SAMFileReader.ValidationStringency strictness,
|
SAMFileReader.ValidationStringency strictness,
|
||||||
Integer readBufferSize,
|
Integer readBufferSize,
|
||||||
|
|
@ -155,6 +163,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
boolean generateExtendedEvents
|
boolean generateExtendedEvents
|
||||||
) {
|
) {
|
||||||
this.readMetrics = new ReadMetrics();
|
this.readMetrics = new ReadMetrics();
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
|
|
||||||
readerIDs = samFiles;
|
readerIDs = samFiles;
|
||||||
validationStringency = strictness;
|
validationStringency = strictness;
|
||||||
|
|
@ -520,7 +529,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
// unless they've said not to validate read ordering (!noValidationOfReadOrder) and we've enabled verification,
|
// unless they've said not to validate read ordering (!noValidationOfReadOrder) and we've enabled verification,
|
||||||
// verify the read ordering by applying a sort order iterator
|
// verify the read ordering by applying a sort order iterator
|
||||||
if (!noValidationOfReadOrder && enableVerification)
|
if (!noValidationOfReadOrder && enableVerification)
|
||||||
wrappedIterator = new VerifyingSamIterator(wrappedIterator);
|
wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator);
|
||||||
|
|
||||||
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
|
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -53,9 +53,9 @@ public class LinearMicroScheduler extends MicroScheduler {
|
||||||
// New experimental code for managing locus intervals.
|
// New experimental code for managing locus intervals.
|
||||||
if(shard.getShardType() == Shard.ShardType.LOCUS) {
|
if(shard.getShardType() == Shard.ShardType.LOCUS) {
|
||||||
LocusWalker lWalker = (LocusWalker)walker;
|
LocusWalker lWalker = (LocusWalker)walker;
|
||||||
WindowMaker windowMaker = new WindowMaker(shard, getReadIterator(shard), shard.getGenomeLocs(), lWalker.getDiscards());
|
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), lWalker.getDiscards());
|
||||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||||
ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),iterator.getLocus(),iterator,reference,rods);
|
ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),iterator.getLocus(),iterator,reference,rods);
|
||||||
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
||||||
accumulator.accumulate(dataProvider,result);
|
accumulator.accumulate(dataProvider,result);
|
||||||
dataProvider.close();
|
dataProvider.close();
|
||||||
|
|
@ -63,7 +63,7 @@ public class LinearMicroScheduler extends MicroScheduler {
|
||||||
windowMaker.close();
|
windowMaker.close();
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
ShardDataProvider dataProvider = new ReadShardDataProvider(shard,getReadIterator(shard),reference,rods);
|
ShardDataProvider dataProvider = new ReadShardDataProvider(shard,engine.getGenomeLocParser(),getReadIterator(shard),reference,rods);
|
||||||
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
||||||
accumulator.accumulate(dataProvider,result);
|
accumulator.accumulate(dataProvider,result);
|
||||||
dataProvider.close();
|
dataProvider.close();
|
||||||
|
|
|
||||||
|
|
@ -158,6 +158,12 @@ public abstract class MicroScheduler {
|
||||||
traversalEngine.printOnTraversalDone(metrics);
|
traversalEngine.printOnTraversalDone(metrics);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the engine that created this microscheduler.
|
||||||
|
* @return The engine owning this microscheduler.
|
||||||
|
*/
|
||||||
|
public GenomeAnalysisEngine getEngine() { return engine; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns data source maintained by this scheduler
|
* Returns data source maintained by this scheduler
|
||||||
* @return
|
* @return
|
||||||
|
|
|
||||||
|
|
@ -61,11 +61,11 @@ public class ShardTraverser implements Callable {
|
||||||
|
|
||||||
Object accumulator = walker.reduceInit();
|
Object accumulator = walker.reduceInit();
|
||||||
LocusWalker lWalker = (LocusWalker)walker;
|
LocusWalker lWalker = (LocusWalker)walker;
|
||||||
WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getReadIterator(shard),shard.getGenomeLocs(),lWalker.getDiscards());
|
WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),microScheduler.getReadIterator(shard),shard.getGenomeLocs(),lWalker.getDiscards());
|
||||||
ShardDataProvider dataProvider = null;
|
ShardDataProvider dataProvider = null;
|
||||||
|
|
||||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||||
dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods);
|
dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),microScheduler.getEngine().getGenomeLocParser(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods);
|
||||||
accumulator = traversalEngine.traverse( walker, dataProvider, accumulator );
|
accumulator = traversalEngine.traverse( walker, dataProvider, accumulator );
|
||||||
dataProvider.close();
|
dataProvider.close();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
import net.sf.picard.util.PeekableIterator;
|
import net.sf.picard.util.PeekableIterator;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Buffer shards of data which may or may not contain multiple loci into
|
* Buffer shards of data which may or may not contain multiple loci into
|
||||||
|
|
@ -51,11 +52,11 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
||||||
* @param intervals The set of intervals over which to traverse.
|
* @param intervals The set of intervals over which to traverse.
|
||||||
* @param discards a filter at that indicates read position relative to some locus?
|
* @param discards a filter at that indicates read position relative to some locus?
|
||||||
*/
|
*/
|
||||||
public WindowMaker(Shard shard, StingSAMIterator iterator, List<GenomeLoc> intervals, List<LocusIteratorFilter> discards ) {
|
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, List<LocusIteratorFilter> discards ) {
|
||||||
this.sourceInfo = shard.getReadProperties();
|
this.sourceInfo = shard.getReadProperties();
|
||||||
this.readIterator = iterator;
|
this.readIterator = iterator;
|
||||||
|
|
||||||
LocusIterator locusIterator = new LocusIteratorByState(iterator,sourceInfo,discards);
|
LocusIterator locusIterator = new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,discards);
|
||||||
|
|
||||||
this.sourceIterator = new PeekableIterator<AlignmentContext>(locusIterator);
|
this.sourceIterator = new PeekableIterator<AlignmentContext>(locusIterator);
|
||||||
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
|
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,11 @@ import java.util.Iterator;
|
||||||
* Iterates through all of the loci provided in the reference.
|
* Iterates through all of the loci provided in the reference.
|
||||||
*/
|
*/
|
||||||
public class GenomeLocusIterator implements Iterator<GenomeLoc> {
|
public class GenomeLocusIterator implements Iterator<GenomeLoc> {
|
||||||
|
/**
|
||||||
|
* Builds individual loci.
|
||||||
|
*/
|
||||||
|
private GenomeLocParser parser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The entire region over which we're iterating.
|
* The entire region over which we're iterating.
|
||||||
*/
|
*/
|
||||||
|
|
@ -38,9 +43,10 @@ public class GenomeLocusIterator implements Iterator<GenomeLoc> {
|
||||||
* @param completeLocus Data provider to use as a backing source.
|
* @param completeLocus Data provider to use as a backing source.
|
||||||
* Provider must have a reference (hasReference() == true).
|
* Provider must have a reference (hasReference() == true).
|
||||||
*/
|
*/
|
||||||
public GenomeLocusIterator( GenomeLoc completeLocus ) {
|
public GenomeLocusIterator( GenomeLocParser parser, GenomeLoc completeLocus ) {
|
||||||
|
this.parser = parser;
|
||||||
this.completeLocus = completeLocus;
|
this.completeLocus = completeLocus;
|
||||||
this.currentLocus = GenomeLocParser.createGenomeLoc(completeLocus.getContig(),completeLocus.getStart());
|
this.currentLocus = parser.createGenomeLoc(completeLocus.getContig(),completeLocus.getStart());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -59,7 +65,7 @@ public class GenomeLocusIterator implements Iterator<GenomeLoc> {
|
||||||
if( !hasNext() )
|
if( !hasNext() )
|
||||||
throw new NoSuchElementException("No elements remaining in bounded reference region.");
|
throw new NoSuchElementException("No elements remaining in bounded reference region.");
|
||||||
GenomeLoc toReturn = (GenomeLoc)currentLocus.clone();
|
GenomeLoc toReturn = (GenomeLoc)currentLocus.clone();
|
||||||
currentLocus = GenomeLocParser.incPos(currentLocus);
|
currentLocus = parser.incPos(currentLocus);
|
||||||
return toReturn;
|
return toReturn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,11 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
private boolean hasExtendedEvents = false; // will be set to true if at least one read had an indel right before the current position
|
private boolean hasExtendedEvents = false; // will be set to true if at least one read had an indel right before the current position
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used to create new GenomeLocs.
|
||||||
|
*/
|
||||||
|
private final GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
private final Collection<String> sampleNames = new ArrayList<String>();
|
private final Collection<String> sampleNames = new ArrayList<String>();
|
||||||
private final ReadStateManager readStates;
|
private final ReadStateManager readStates;
|
||||||
|
|
||||||
|
|
@ -129,8 +134,8 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
|
|
||||||
public int getGenomePosition() { return read.getAlignmentStart() + getGenomeOffset(); }
|
public int getGenomePosition() { return read.getAlignmentStart() + getGenomeOffset(); }
|
||||||
|
|
||||||
public GenomeLoc getLocation() {
|
public GenomeLoc getLocation(GenomeLocParser genomeLocParser) {
|
||||||
return GenomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
|
return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
|
||||||
}
|
}
|
||||||
|
|
||||||
public CigarOperator getCurrentCigarOperator() {
|
public CigarOperator getCurrentCigarOperator() {
|
||||||
|
|
@ -268,12 +273,13 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
// constructors and other basic operations
|
// constructors and other basic operations
|
||||||
//
|
//
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation ) {
|
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser ) {
|
||||||
this(samIterator, readInformation, NO_FILTERS);
|
this(samIterator, readInformation, genomeLocParser, NO_FILTERS);
|
||||||
}
|
}
|
||||||
|
|
||||||
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, List<LocusIteratorFilter> filters ) {
|
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, List<LocusIteratorFilter> filters ) {
|
||||||
this.readInfo = readInformation;
|
this.readInfo = readInformation;
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.filters = filters;
|
this.filters = filters;
|
||||||
// Aggregate all sample names.
|
// Aggregate all sample names.
|
||||||
sampleNames.addAll(SampleUtils.getSAMFileSamples(readInfo.getHeader()));
|
sampleNames.addAll(SampleUtils.getSAMFileSamples(readInfo.getHeader()));
|
||||||
|
|
@ -310,7 +316,7 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
}
|
}
|
||||||
|
|
||||||
private GenomeLoc getLocation() {
|
private GenomeLoc getLocation() {
|
||||||
return readStates.isEmpty() ? null : readStates.getFirst().getLocation();
|
return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
|
||||||
}
|
}
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
|
@ -354,7 +360,7 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
SAMRecordState our1stState = readStates.getFirst();
|
SAMRecordState our1stState = readStates.getFirst();
|
||||||
// get current location on the reference and decrement it by 1: the indels we just stepped over
|
// get current location on the reference and decrement it by 1: the indels we just stepped over
|
||||||
// are associated with the *previous* reference base
|
// are associated with the *previous* reference base
|
||||||
GenomeLoc loc = GenomeLocParser.incPos(our1stState.getLocation(),-1);
|
GenomeLoc loc = genomeLocParser.incPos(our1stState.getLocation(genomeLocParser),-1);
|
||||||
|
|
||||||
boolean hasBeenSampled = false;
|
boolean hasBeenSampled = false;
|
||||||
for(String sampleName: sampleNames) {
|
for(String sampleName: sampleNames) {
|
||||||
|
|
|
||||||
|
|
@ -16,11 +16,13 @@ import java.util.Iterator;
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public class VerifyingSamIterator implements StingSAMIterator {
|
public class VerifyingSamIterator implements StingSAMIterator {
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
StingSAMIterator it;
|
StingSAMIterator it;
|
||||||
SAMRecord last = null;
|
SAMRecord last = null;
|
||||||
boolean checkOrderP = true;
|
boolean checkOrderP = true;
|
||||||
|
|
||||||
public VerifyingSamIterator(StingSAMIterator it) {
|
public VerifyingSamIterator(GenomeLocParser genomeLocParser,StingSAMIterator it) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.it = it;
|
this.it = it;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -35,27 +37,19 @@ public class VerifyingSamIterator implements StingSAMIterator {
|
||||||
return cur;
|
return cur;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
private void verifyRecord( final SAMRecord last, final SAMRecord cur ) {
|
||||||
* If true, enables ordered checking of the reads in the file. By default this is enabled.
|
|
||||||
* @param checkP If true, sam records will be checked to insure they come in order
|
|
||||||
*/
|
|
||||||
public void setCheckOrderP( boolean checkP ) {
|
|
||||||
checkOrderP = checkP;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void verifyRecord( final SAMRecord last, final SAMRecord cur ) {
|
|
||||||
if ( checkOrderP && isOutOfOrder(last, cur) ) {
|
if ( checkOrderP && isOutOfOrder(last, cur) ) {
|
||||||
this.last = null;
|
this.last = null;
|
||||||
throw new RuntimeIOException(String.format("Reads are out of order:%nlast:%n%s%ncurrent:%n%s%n", last.format(), cur.format()) );
|
throw new RuntimeIOException(String.format("Reads are out of order:%nlast:%n%s%ncurrent:%n%s%n", last.format(), cur.format()) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isOutOfOrder( final SAMRecord last, final SAMRecord cur ) {
|
private boolean isOutOfOrder( final SAMRecord last, final SAMRecord cur ) {
|
||||||
if ( last == null || cur.getReadUnmappedFlag() )
|
if ( last == null || cur.getReadUnmappedFlag() )
|
||||||
return false;
|
return false;
|
||||||
else {
|
else {
|
||||||
GenomeLoc lastLoc = GenomeLocParser.createGenomeLoc( last );
|
GenomeLoc lastLoc = genomeLocParser.createGenomeLoc( last );
|
||||||
GenomeLoc curLoc = GenomeLocParser.createGenomeLoc( cur );
|
GenomeLoc curLoc = genomeLocParser.createGenomeLoc( cur );
|
||||||
return curLoc.compareTo(lastLoc) == -1;
|
return curLoc.compareTo(lastLoc) == -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -43,10 +43,15 @@ import java.util.TreeMap;
|
||||||
* a read-based meta data tracker
|
* a read-based meta data tracker
|
||||||
*/
|
*/
|
||||||
public class ReadMetaDataTracker {
|
public class ReadMetaDataTracker {
|
||||||
|
/**
|
||||||
|
* The parser, used to create new GenomeLocs.
|
||||||
|
*/
|
||||||
|
private final GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
private final SAMRecord record;
|
private final SAMRecord record;
|
||||||
|
|
||||||
// the buffer of positions and RODs we've stored
|
// the buffer of positions and RODs we've stored
|
||||||
private final TreeMap<Long, RODMetaDataContainer> mapping;
|
private final TreeMap<Integer, RODMetaDataContainer> mapping;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a read meta data tracker, given the read and a queue of RODatum positions
|
* create a read meta data tracker, given the read and a queue of RODatum positions
|
||||||
|
|
@ -54,7 +59,8 @@ public class ReadMetaDataTracker {
|
||||||
* @param record the read to create offset from
|
* @param record the read to create offset from
|
||||||
* @param mapping the mapping of reference ordered datum
|
* @param mapping the mapping of reference ordered datum
|
||||||
*/
|
*/
|
||||||
public ReadMetaDataTracker(SAMRecord record, TreeMap<Long, RODMetaDataContainer> mapping) {
|
public ReadMetaDataTracker(GenomeLocParser genomeLocParser, SAMRecord record, TreeMap<Integer, RODMetaDataContainer> mapping) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.record = record;
|
this.record = record;
|
||||||
this.mapping = mapping;
|
this.mapping = mapping;
|
||||||
}
|
}
|
||||||
|
|
@ -69,13 +75,13 @@ public class ReadMetaDataTracker {
|
||||||
*
|
*
|
||||||
* @return a mapping from the position in the read to the reference ordered datum
|
* @return a mapping from the position in the read to the reference ordered datum
|
||||||
*/
|
*/
|
||||||
private Map<Long, Collection<GATKFeature>> createReadAlignment(SAMRecord record, TreeMap<Long, RODMetaDataContainer> queue, Class cl, String name) {
|
private Map<Integer, Collection<GATKFeature>> createReadAlignment(SAMRecord record, TreeMap<Integer, RODMetaDataContainer> queue, Class cl, String name) {
|
||||||
if (name != null && cl != null) throw new IllegalStateException("Both a class and name cannot be specified");
|
if (name != null && cl != null) throw new IllegalStateException("Both a class and name cannot be specified");
|
||||||
Map<Long, Collection<GATKFeature>> ret = new LinkedHashMap<Long, Collection<GATKFeature>>();
|
Map<Integer, Collection<GATKFeature>> ret = new LinkedHashMap<Integer, Collection<GATKFeature>>();
|
||||||
GenomeLoc location = GenomeLocParser.createGenomeLoc(record);
|
GenomeLoc location = genomeLocParser.createGenomeLoc(record);
|
||||||
int length = record.getReadLength();
|
int length = record.getReadLength();
|
||||||
for (Long loc : queue.keySet()) {
|
for (Integer loc : queue.keySet()) {
|
||||||
Long position = loc - location.getStart();
|
Integer position = loc - location.getStart();
|
||||||
if (position >= 0 && position < length) {
|
if (position >= 0 && position < length) {
|
||||||
Collection<GATKFeature> set;
|
Collection<GATKFeature> set;
|
||||||
if (cl != null)
|
if (cl != null)
|
||||||
|
|
@ -95,11 +101,11 @@ public class ReadMetaDataTracker {
|
||||||
*
|
*
|
||||||
* @return a mapping from the position in the read to the reference ordered datum
|
* @return a mapping from the position in the read to the reference ordered datum
|
||||||
*/
|
*/
|
||||||
private Map<Long, Collection<GATKFeature>> createGenomeLocAlignment(SAMRecord record, TreeMap<Long, RODMetaDataContainer> mapping, Class cl, String name) {
|
private Map<Integer, Collection<GATKFeature>> createGenomeLocAlignment(SAMRecord record, TreeMap<Integer, RODMetaDataContainer> mapping, Class cl, String name) {
|
||||||
Map<Long, Collection<GATKFeature>> ret = new LinkedHashMap<Long, Collection<GATKFeature>>();
|
Map<Integer, Collection<GATKFeature>> ret = new LinkedHashMap<Integer, Collection<GATKFeature>>();
|
||||||
int start = record.getAlignmentStart();
|
int start = record.getAlignmentStart();
|
||||||
int stop = record.getAlignmentEnd();
|
int stop = record.getAlignmentEnd();
|
||||||
for (Long location : mapping.keySet()) {
|
for (Integer location : mapping.keySet()) {
|
||||||
if (location >= start && location <= stop)
|
if (location >= start && location <= stop)
|
||||||
if (cl != null)
|
if (cl != null)
|
||||||
ret.put(location, mapping.get(location).getSet(cl));
|
ret.put(location, mapping.get(location).getSet(cl));
|
||||||
|
|
@ -114,7 +120,7 @@ public class ReadMetaDataTracker {
|
||||||
*
|
*
|
||||||
* @return a mapping of read offset to ROD(s)
|
* @return a mapping of read offset to ROD(s)
|
||||||
*/
|
*/
|
||||||
public Map<Long, Collection<GATKFeature>> getReadOffsetMapping() {
|
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping() {
|
||||||
return createReadAlignment(record, mapping, null, null);
|
return createReadAlignment(record, mapping, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -123,7 +129,7 @@ public class ReadMetaDataTracker {
|
||||||
*
|
*
|
||||||
* @return a mapping of genome loc position to ROD(s)
|
* @return a mapping of genome loc position to ROD(s)
|
||||||
*/
|
*/
|
||||||
public Map<Long, Collection<GATKFeature>> getContigOffsetMapping() {
|
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping() {
|
||||||
return createGenomeLocAlignment(record, mapping, null, null);
|
return createGenomeLocAlignment(record, mapping, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -132,7 +138,7 @@ public class ReadMetaDataTracker {
|
||||||
*
|
*
|
||||||
* @return a mapping of read offset to ROD(s)
|
* @return a mapping of read offset to ROD(s)
|
||||||
*/
|
*/
|
||||||
public Map<Long, Collection<GATKFeature>> getReadOffsetMapping(String name) {
|
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping(String name) {
|
||||||
return createReadAlignment(record, mapping, null, name);
|
return createReadAlignment(record, mapping, null, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -141,7 +147,7 @@ public class ReadMetaDataTracker {
|
||||||
*
|
*
|
||||||
* @return a mapping of genome loc position to ROD(s)
|
* @return a mapping of genome loc position to ROD(s)
|
||||||
*/
|
*/
|
||||||
public Map<Long, Collection<GATKFeature>> getContigOffsetMapping(String name) {
|
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping(String name) {
|
||||||
return createGenomeLocAlignment(record, mapping, null, name);
|
return createGenomeLocAlignment(record, mapping, null, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -150,7 +156,7 @@ public class ReadMetaDataTracker {
|
||||||
*
|
*
|
||||||
* @return a mapping of read offset to ROD(s)
|
* @return a mapping of read offset to ROD(s)
|
||||||
*/
|
*/
|
||||||
public Map<Long, Collection<GATKFeature>> getReadOffsetMapping(Class cl) {
|
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping(Class cl) {
|
||||||
return createReadAlignment(record, mapping, cl, null);
|
return createReadAlignment(record, mapping, cl, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -159,7 +165,7 @@ public class ReadMetaDataTracker {
|
||||||
*
|
*
|
||||||
* @return a mapping of genome loc position to ROD(s)
|
* @return a mapping of genome loc position to ROD(s)
|
||||||
*/
|
*/
|
||||||
public Map<Long, Collection<GATKFeature>> getContigOffsetMapping(Class cl) {
|
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping(Class cl) {
|
||||||
return createGenomeLocAlignment(record, mapping, cl, null);
|
return createGenomeLocAlignment(record, mapping, cl, null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -22,18 +22,21 @@
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils;
|
package org.broadinstitute.sting.gatk.refdata;
|
||||||
|
|
||||||
|
import org.broad.tribble.FeatureCodec;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A suite of utilities for working with the GenomeLocParser
|
* An interface marking that a given Tribble feature/codec is actually dependent on context within the
|
||||||
* in the context of the sequence dictionary.
|
* reference, rather than having a dependency only on the contig, start, and stop of the given feature.
|
||||||
|
* A HACK. Tribble should contain all the information in needs to decode the unqualified position of
|
||||||
|
* a feature.
|
||||||
*/
|
*/
|
||||||
public class GenomeLocParserTestUtils {
|
public interface ReferenceDependentFeatureCodec<T extends org.broad.tribble.Feature> extends FeatureCodec<T> {
|
||||||
/**
|
/**
|
||||||
* Clear out the sequence dictionary associated with
|
* Sets the appropriate GenomeLocParser, providing additional context when decoding larger and more variable features.
|
||||||
* the genomeloc creator.
|
* @param genomeLocParser The parser to supply.
|
||||||
*/
|
*/
|
||||||
public static void clearSequenceDictionary() {
|
public void setGenomeLocParser(GenomeLocParser genomeLocParser);
|
||||||
GenomeLocParser.clearRefContigOrdering();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata;
|
package org.broadinstitute.sting.gatk.refdata;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||||
|
|
@ -38,21 +39,26 @@ import java.util.List;
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||||
|
/**
|
||||||
|
* The parser, used to construct new genome locs.
|
||||||
|
*/
|
||||||
|
private final GenomeLocParser parser;
|
||||||
|
|
||||||
private PushbackIterator<GATKFeature> it;
|
private PushbackIterator<GATKFeature> it;
|
||||||
List<GATKFeature> records = null; // here we will keep a pile of records overlaping with current position; when we iterate
|
List<GATKFeature> records = null; // here we will keep a pile of records overlaping with current position; when we iterate
|
||||||
// and step out of record's scope, we purge it from the list
|
// and step out of record's scope, we purge it from the list
|
||||||
String name = null; // name of the ROD track wrapped by this iterator. Will be pulled from underlying iterator.
|
String name = null; // name of the ROD track wrapped by this iterator. Will be pulled from underlying iterator.
|
||||||
|
|
||||||
long curr_position = 0; // where the iterator is currently positioned on the genome
|
int curr_position = 0; // where the iterator is currently positioned on the genome
|
||||||
long max_position = 0; // the rightmost stop position of currently loaded records
|
int max_position = 0; // the rightmost stop position of currently loaded records
|
||||||
int curr_contig = -1; // what contig the iterator is currently on
|
String curr_contig = null; // what contig the iterator is currently on
|
||||||
boolean next_is_allowed = true; // see discussion below. next() is illegal after seek-forward queries of length > 1
|
boolean next_is_allowed = true; // see discussion below. next() is illegal after seek-forward queries of length > 1
|
||||||
|
|
||||||
// the stop position of the last query. We can query only in forward direction ("seek forward");
|
// the stop position of the last query. We can query only in forward direction ("seek forward");
|
||||||
// it is not only the start position of every successive query that can not be before the start
|
// it is not only the start position of every successive query that can not be before the start
|
||||||
// of the previous one (curr_start), but it is also illegal for a query interval to *end* before
|
// of the previous one (curr_start), but it is also illegal for a query interval to *end* before
|
||||||
// the end of previous query, otherwise we can end up in an inconsistent state
|
// the end of previous query, otherwise we can end up in an inconsistent state
|
||||||
long curr_query_end = -1;
|
int curr_query_end = -1;
|
||||||
|
|
||||||
// EXAMPLE of inconsistency curr_query_end guards against:
|
// EXAMPLE of inconsistency curr_query_end guards against:
|
||||||
// record 1 record 2
|
// record 1 record 2
|
||||||
|
|
@ -80,7 +86,8 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||||
// This implementation tracks the query history and makes next() illegal after a seekforward query of length > 1,
|
// This implementation tracks the query history and makes next() illegal after a seekforward query of length > 1,
|
||||||
// but re-enables next() again after a length-1 query.
|
// but re-enables next() again after a length-1 query.
|
||||||
|
|
||||||
public SeekableRODIterator(CloseableIterator<GATKFeature> it) {
|
public SeekableRODIterator(SAMSequenceDictionary dictionary,GenomeLocParser parser,CloseableIterator<GATKFeature> it) {
|
||||||
|
this.parser = parser;
|
||||||
this.it = new PushbackIterator<GATKFeature>(it);
|
this.it = new PushbackIterator<GATKFeature>(it);
|
||||||
records = new LinkedList<GATKFeature>();
|
records = new LinkedList<GATKFeature>();
|
||||||
// the following is a trick: we would like the iterator to know the actual name assigned to
|
// the following is a trick: we would like the iterator to know the actual name assigned to
|
||||||
|
|
@ -91,6 +98,8 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||||
GATKFeature r = null;
|
GATKFeature r = null;
|
||||||
if (this.it.hasNext()) r = this.it.element();
|
if (this.it.hasNext()) r = this.it.element();
|
||||||
name = (r==null?null:r.getName());
|
name = (r==null?null:r.getName());
|
||||||
|
|
||||||
|
curr_contig = dictionary.getSequence(0).getSequenceName();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -111,14 +120,14 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||||
// Returns point location (i.e. genome loc of length 1) on the reference, to which this iterator will advance
|
// Returns point location (i.e. genome loc of length 1) on the reference, to which this iterator will advance
|
||||||
// upon next call to next().
|
// upon next call to next().
|
||||||
public GenomeLoc peekNextLocation() {
|
public GenomeLoc peekNextLocation() {
|
||||||
if ( curr_position + 1 <= max_position ) return GenomeLocParser.createGenomeLoc(curr_contig,curr_position+1);
|
if ( curr_position + 1 <= max_position ) return parser.createGenomeLoc(curr_contig,curr_position+1);
|
||||||
|
|
||||||
// sorry, next reference position is not covered by the RODs we are currently holding. In this case,
|
// sorry, next reference position is not covered by the RODs we are currently holding. In this case,
|
||||||
// the location we will jump to upon next call to next() is the start of the next ROD record that we did
|
// the location we will jump to upon next call to next() is the start of the next ROD record that we did
|
||||||
// not read yet:
|
// not read yet:
|
||||||
if ( it.hasNext() ) {
|
if ( it.hasNext() ) {
|
||||||
GATKFeature r = it.element(); // peek, do not load!
|
GATKFeature r = it.element(); // peek, do not load!
|
||||||
return GenomeLocParser.createGenomeLoc(r.getLocation().getContigIndex(),r.getLocation().getStart());
|
return parser.createGenomeLoc(r.getLocation().getContig(),r.getLocation().getStart());
|
||||||
}
|
}
|
||||||
return null; // underlying iterator has no more records, there is no next location!
|
return null; // underlying iterator has no more records, there is no next location!
|
||||||
}
|
}
|
||||||
|
|
@ -147,7 +156,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||||
records.clear();
|
records.clear();
|
||||||
GATKFeature r = it.next(); // if hasNext() previously returned true, we are guaranteed that this call to reader.next() is safe
|
GATKFeature r = it.next(); // if hasNext() previously returned true, we are guaranteed that this call to reader.next() is safe
|
||||||
records.add( r );
|
records.add( r );
|
||||||
curr_contig = r.getLocation().getContigIndex();
|
curr_contig = r.getLocation().getContig();
|
||||||
curr_position = r.getLocation().getStart();
|
curr_position = r.getLocation().getStart();
|
||||||
max_position = r.getLocation().getStop();
|
max_position = r.getLocation().getStop();
|
||||||
}
|
}
|
||||||
|
|
@ -163,11 +172,14 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||||
it.next();
|
it.next();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
int that_contig = r.getLocation().getContigIndex();
|
|
||||||
if ( curr_contig > that_contig )
|
GenomeLoc currentContig = parser.createOverEntireContig(curr_contig);
|
||||||
|
GenomeLoc thatContig = r.getLocation();
|
||||||
|
|
||||||
|
if ( currentContig.isPast(thatContig) )
|
||||||
throw new UserException("LocationAwareSeekableRODIterator: contig " +r.getLocation().getContig() +
|
throw new UserException("LocationAwareSeekableRODIterator: contig " +r.getLocation().getContig() +
|
||||||
" occurs out of order in track " + r.getName() );
|
" occurs out of order in track " + r.getName() );
|
||||||
if ( curr_contig < that_contig ) break; // next record is on a higher contig, we do not need it yet...
|
if ( currentContig.isBefore(thatContig) ) break; // next record is on a higher contig, we do not need it yet...
|
||||||
|
|
||||||
if ( r.getLocation().getStart() < curr_position )
|
if ( r.getLocation().getStart() < curr_position )
|
||||||
throw new UserException("LocationAwareSeekableRODIterator: track "+r.getName() +
|
throw new UserException("LocationAwareSeekableRODIterator: track "+r.getName() +
|
||||||
|
|
@ -177,7 +189,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||||
|
|
||||||
r = it.next(); // we got here only if we do need next record, time to load it for real
|
r = it.next(); // we got here only if we do need next record, time to load it for real
|
||||||
|
|
||||||
long stop = r.getLocation().getStop();
|
int stop = r.getLocation().getStop();
|
||||||
if ( stop < curr_position ) throw new ReviewedStingException("DEBUG: encountered contig that should have been loaded earlier"); // this should never happen
|
if ( stop < curr_position ) throw new ReviewedStingException("DEBUG: encountered contig that should have been loaded earlier"); // this should never happen
|
||||||
if ( stop > max_position ) max_position = stop; // max_position keeps the rightmost stop position across all loaded records
|
if ( stop > max_position ) max_position = stop; // max_position keeps the rightmost stop position across all loaded records
|
||||||
records.add(r);
|
records.add(r);
|
||||||
|
|
@ -186,7 +198,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||||
// 'records' and current position are fully updated. Last, we need to set the location of the whole track
|
// 'records' and current position are fully updated. Last, we need to set the location of the whole track
|
||||||
// (collection of ROD records) to the genomic site we are currently looking at, and return the list
|
// (collection of ROD records) to the genomic site we are currently looking at, and return the list
|
||||||
|
|
||||||
return new RODRecordListImpl(name,records, GenomeLocParser.createGenomeLoc(curr_contig,curr_position));
|
return new RODRecordListImpl(name,records, parser.createGenomeLoc(curr_contig,curr_position));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -218,13 +230,13 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||||
* @return Current ending position of the iterator, or null if no position exists.
|
* @return Current ending position of the iterator, or null if no position exists.
|
||||||
*/
|
*/
|
||||||
public GenomeLoc position() {
|
public GenomeLoc position() {
|
||||||
if ( curr_contig < 0 ) return null;
|
if ( curr_contig == null ) return null;
|
||||||
if ( curr_query_end > curr_position ) {
|
if ( curr_query_end > curr_position ) {
|
||||||
// do not attempt to reuse this iterator if the position we need it for lies before the end of last query performed
|
// do not attempt to reuse this iterator if the position we need it for lies before the end of last query performed
|
||||||
return GenomeLocParser.createGenomeLoc(curr_contig,curr_query_end,curr_query_end);
|
return parser.createGenomeLoc(curr_contig,curr_query_end,curr_query_end);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return GenomeLocParser.createGenomeLoc(curr_contig,curr_position);
|
return parser.createGenomeLoc(curr_contig,curr_position);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -256,10 +268,11 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||||
*/
|
*/
|
||||||
public RODRecordList seekForward(GenomeLoc interval) {
|
public RODRecordList seekForward(GenomeLoc interval) {
|
||||||
|
|
||||||
if ( interval.getContigIndex() < curr_contig )
|
if ( interval.isBefore(parser.createOverEntireContig(curr_contig)) &&
|
||||||
|
!(interval.getStart() == 0 && interval.getStop() == 0 && interval.getContig().equals(curr_contig)) ) // This criteria is syntactic sugar for 'seek to right before curr_contig'
|
||||||
throw new ReviewedStingException("Out of order query: query contig "+interval.getContig()+" is located before "+
|
throw new ReviewedStingException("Out of order query: query contig "+interval.getContig()+" is located before "+
|
||||||
"the iterator's current contig");
|
"the iterator's current contig");
|
||||||
if ( interval.getContigIndex() == curr_contig ) {
|
if ( interval.getContig().equals(curr_contig) ) {
|
||||||
if ( interval.getStart() < curr_position )
|
if ( interval.getStart() < curr_position )
|
||||||
throw new ReviewedStingException("Out of order query: query position "+interval +" is located before "+
|
throw new ReviewedStingException("Out of order query: query position "+interval +" is located before "+
|
||||||
"the iterator's current position "+curr_contig + ":" + curr_position);
|
"the iterator's current position "+curr_contig + ":" + curr_position);
|
||||||
|
|
@ -273,7 +286,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||||
|
|
||||||
next_is_allowed = ( curr_position == curr_query_end ); // we can call next() later only if interval length is 1
|
next_is_allowed = ( curr_position == curr_query_end ); // we can call next() later only if interval length is 1
|
||||||
|
|
||||||
if ( interval.getContigIndex() == curr_contig && curr_position <= max_position ) {
|
if ( interval.getContig().equals(curr_contig) && curr_position <= max_position ) {
|
||||||
// some of the intervals we are currently keeping do overlap with the query interval
|
// some of the intervals we are currently keeping do overlap with the query interval
|
||||||
|
|
||||||
purgeOutOfScopeRecords();
|
purgeOutOfScopeRecords();
|
||||||
|
|
@ -281,7 +294,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||||
// clean up and get ready for fast-forwarding towards the requested position
|
// clean up and get ready for fast-forwarding towards the requested position
|
||||||
records.clear();
|
records.clear();
|
||||||
max_position = -1;
|
max_position = -1;
|
||||||
curr_contig = interval.getContigIndex();
|
curr_contig = interval.getContig();
|
||||||
}
|
}
|
||||||
|
|
||||||
// curr_contig and curr_position are set to where we asked to scroll to
|
// curr_contig and curr_position are set to where we asked to scroll to
|
||||||
|
|
@ -289,10 +302,12 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||||
while ( it.hasNext() ) {
|
while ( it.hasNext() ) {
|
||||||
GATKFeature r = it.next();
|
GATKFeature r = it.next();
|
||||||
if ( r == null ) continue;
|
if ( r == null ) continue;
|
||||||
int that_contig = r.getLocation().getContigIndex();
|
|
||||||
|
|
||||||
if ( curr_contig > that_contig ) continue; // did not reach requested contig yet
|
GenomeLoc currentContig = parser.createOverEntireContig(curr_contig);
|
||||||
if ( curr_contig < that_contig ) {
|
GenomeLoc thatContig = r.getLocation();
|
||||||
|
|
||||||
|
if ( currentContig.isPast(thatContig) ) continue; // did not reach requested contig yet
|
||||||
|
if ( currentContig.isBefore(thatContig) ) {
|
||||||
it.pushback(r); // next record is on the higher contig, we do not need it yet...
|
it.pushback(r); // next record is on the higher contig, we do not need it yet...
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -340,4 +355,5 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||||
public void close() {
|
public void close() {
|
||||||
if (this.it != null) ((CloseableIterator)this.it.getUnderlyingIterator()).close();
|
if (this.it != null) ((CloseableIterator)this.it.getUnderlyingIterator()).close();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -181,7 +181,7 @@ public class VariantContextAdaptors {
|
||||||
// add the call to the genotype list, and then use this list to create a VariantContext
|
// add the call to the genotype list, and then use this list to create a VariantContext
|
||||||
genotypes.add(call);
|
genotypes.add(call);
|
||||||
alleles.add(refAllele);
|
alleles.add(refAllele);
|
||||||
VariantContext vc = VariantContextUtils.toVC(name, GenomeLocParser.createGenomeLoc(geli.getChr(),geli.getStart()), alleles, genotypes, geli.getLODBestToReference(), null, attributes);
|
VariantContext vc = VariantContextUtils.toVC(name, ref.getGenomeLocParser().createGenomeLoc(geli.getChr(),geli.getStart()), alleles, genotypes, geli.getLODBestToReference(), null, attributes);
|
||||||
return vc;
|
return vc;
|
||||||
} else
|
} else
|
||||||
return null; // can't handle anything else
|
return null; // can't handle anything else
|
||||||
|
|
|
||||||
|
|
@ -33,15 +33,15 @@ import java.util.StringTokenizer;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broad.tribble.FeatureCodec;
|
|
||||||
import org.broad.tribble.exception.CodecLineParsingException;
|
import org.broad.tribble.exception.CodecLineParsingException;
|
||||||
import org.broad.tribble.readers.AsciiLineReader;
|
import org.broad.tribble.readers.AsciiLineReader;
|
||||||
import org.broad.tribble.readers.LineReader;
|
import org.broad.tribble.readers.LineReader;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
public class AnnotatorInputTableCodec implements FeatureCodec<AnnotatorInputTableFeature> {
|
public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec<AnnotatorInputTableFeature> {
|
||||||
|
|
||||||
private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class);
|
private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class);
|
||||||
|
|
||||||
|
|
@ -49,6 +49,19 @@ public class AnnotatorInputTableCodec implements FeatureCodec<AnnotatorInputTabl
|
||||||
|
|
||||||
private ArrayList<String> header;
|
private ArrayList<String> header;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The parser to use when resolving genome-wide locations.
|
||||||
|
*/
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the parser to use when resolving genetic data.
|
||||||
|
* @param genomeLocParser The supplied parser.
|
||||||
|
*/
|
||||||
|
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses the header.
|
* Parses the header.
|
||||||
*
|
*
|
||||||
|
|
@ -80,9 +93,9 @@ public class AnnotatorInputTableCodec implements FeatureCodec<AnnotatorInputTabl
|
||||||
GenomeLoc loc;
|
GenomeLoc loc;
|
||||||
String chr = st.nextToken();
|
String chr = st.nextToken();
|
||||||
if ( chr.indexOf(":") != -1 )
|
if ( chr.indexOf(":") != -1 )
|
||||||
loc = GenomeLocParser.parseGenomeInterval(chr);
|
loc = genomeLocParser.parseGenomeInterval(chr);
|
||||||
else
|
else
|
||||||
loc = GenomeLocParser.createGenomeLoc(chr, Integer.valueOf(st.nextToken()), Integer.valueOf(st.nextToken()));
|
loc = genomeLocParser.createGenomeLoc(chr, Integer.valueOf(st.nextToken()), Integer.valueOf(st.nextToken()));
|
||||||
return new AnnotatorInputTableFeature(loc.getContig(), (int)loc.getStart(), (int)loc.getStop());
|
return new AnnotatorInputTableFeature(loc.getContig(), (int)loc.getStart(), (int)loc.getStop());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -107,9 +120,9 @@ public class AnnotatorInputTableCodec implements FeatureCodec<AnnotatorInputTabl
|
||||||
|
|
||||||
GenomeLoc loc;
|
GenomeLoc loc;
|
||||||
if ( values.get(0).indexOf(":") != -1 )
|
if ( values.get(0).indexOf(":") != -1 )
|
||||||
loc = GenomeLocParser.parseGenomeInterval(values.get(0));
|
loc = genomeLocParser.parseGenomeInterval(values.get(0));
|
||||||
else
|
else
|
||||||
loc = GenomeLocParser.createGenomeLoc(values.get(0), Integer.valueOf(values.get(1)), Integer.valueOf(values.get(2)));
|
loc = genomeLocParser.createGenomeLoc(values.get(0), Integer.valueOf(values.get(1)), Integer.valueOf(values.get(2)));
|
||||||
|
|
||||||
//parse the location
|
//parse the location
|
||||||
feature.setChr(loc.getContig());
|
feature.setChr(loc.getContig());
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.refdata.features.beagle;
|
||||||
|
|
||||||
|
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broad.tribble.FeatureCodec;
|
|
||||||
import org.broad.tribble.readers.AsciiLineReader;
|
import org.broad.tribble.readers.AsciiLineReader;
|
||||||
import org.broad.tribble.readers.LineReader;
|
import org.broad.tribble.readers.LineReader;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -37,10 +36,11 @@ import java.util.HashMap;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import org.broad.tribble.exception.CodecLineParsingException;
|
import org.broad.tribble.exception.CodecLineParsingException;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
public class BeagleCodec implements FeatureCodec<BeagleFeature> {
|
public class BeagleCodec implements ReferenceDependentFeatureCodec<BeagleFeature> {
|
||||||
private String[] header;
|
private String[] header;
|
||||||
public enum BeagleReaderType {PROBLIKELIHOOD, GENOTYPES, R2};
|
public enum BeagleReaderType {PROBLIKELIHOOD, GENOTYPES, R2};
|
||||||
private BeagleReaderType readerType;
|
private BeagleReaderType readerType;
|
||||||
|
|
@ -52,6 +52,19 @@ public class BeagleCodec implements FeatureCodec<BeagleFeature> {
|
||||||
|
|
||||||
private static final String delimiterRegex = "\\s+";
|
private static final String delimiterRegex = "\\s+";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The parser to use when resolving genome-wide locations.
|
||||||
|
*/
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the parser to use when resolving genetic data.
|
||||||
|
* @param genomeLocParser The supplied parser.
|
||||||
|
*/
|
||||||
|
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
|
}
|
||||||
|
|
||||||
public Feature decodeLoc(String line) {
|
public Feature decodeLoc(String line) {
|
||||||
return decode(line);
|
return decode(line);
|
||||||
}
|
}
|
||||||
|
|
@ -147,17 +160,6 @@ public class BeagleCodec implements FeatureCodec<BeagleFeature> {
|
||||||
|
|
||||||
private static Pattern MARKER_PATTERN = Pattern.compile("(.+):([0-9]+)");
|
private static Pattern MARKER_PATTERN = Pattern.compile("(.+):([0-9]+)");
|
||||||
|
|
||||||
private static GenomeLoc parseMarkerName(String markerName) {
|
|
||||||
Matcher m = MARKER_PATTERN.matcher(markerName);
|
|
||||||
if ( m.matches() ) {
|
|
||||||
String contig = m.group(1);
|
|
||||||
long start = Long.valueOf(m.group(2));
|
|
||||||
return GenomeLocParser.createGenomeLoc(contig, start, start);
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException("Malformatted marker string: " + markerName + " required format is chrN:position");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Class<BeagleFeature> getFeatureType() {
|
public Class<BeagleFeature> getFeatureType() {
|
||||||
return BeagleFeature.class;
|
return BeagleFeature.class;
|
||||||
|
|
@ -175,7 +177,7 @@ public class BeagleCodec implements FeatureCodec<BeagleFeature> {
|
||||||
|
|
||||||
BeagleFeature bglFeature = new BeagleFeature();
|
BeagleFeature bglFeature = new BeagleFeature();
|
||||||
|
|
||||||
final GenomeLoc loc = GenomeLocParser.parseGenomeLoc(tokens[markerPosition]); //GenomeLocParser.parseGenomeInterval(values.get(0)); - TODO switch to this
|
final GenomeLoc loc = genomeLocParser.parseGenomeLoc(tokens[markerPosition]); //GenomeLocParser.parseGenomeInterval(values.get(0)); - TODO switch to this
|
||||||
|
|
||||||
//parse the location: common to all readers
|
//parse the location: common to all readers
|
||||||
bglFeature.setChr(loc.getContig());
|
bglFeature.setChr(loc.getContig());
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata.features.refseq;
|
package org.broadinstitute.sting.gatk.refdata.features.refseq;
|
||||||
|
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broad.tribble.FeatureCodec;
|
|
||||||
import org.broad.tribble.TribbleException;
|
import org.broad.tribble.TribbleException;
|
||||||
import org.broad.tribble.readers.LineReader;
|
import org.broad.tribble.readers.LineReader;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -13,7 +13,21 @@ import java.util.ArrayList;
|
||||||
/**
|
/**
|
||||||
* the ref seq codec
|
* the ref seq codec
|
||||||
*/
|
*/
|
||||||
public class RefSeqCodec implements FeatureCodec {
|
public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The parser to use when resolving genome-wide locations.
|
||||||
|
*/
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the parser to use when resolving genetic data.
|
||||||
|
* @param genomeLocParser The supplied parser.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Feature decodeLoc(String line) {
|
public Feature decodeLoc(String line) {
|
||||||
|
|
@ -21,19 +35,19 @@ public class RefSeqCodec implements FeatureCodec {
|
||||||
String fields[] = line.split("\t");
|
String fields[] = line.split("\t");
|
||||||
if (fields.length < 3) throw new TribbleException("RefSeq (decodeLoc) : Unable to parse line -> " + line + ", we expected at least 3 columns, we saw " + fields.length);
|
if (fields.length < 3) throw new TribbleException("RefSeq (decodeLoc) : Unable to parse line -> " + line + ", we expected at least 3 columns, we saw " + fields.length);
|
||||||
String contig_name = fields[2];
|
String contig_name = fields[2];
|
||||||
return new RefSeqFeature(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]));
|
return new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Fills this object from a text line in RefSeq (UCSC) text dump file */
|
/** Fills this object from a text line in RefSeq (UCSC) text dump file */
|
||||||
@Override
|
@Override
|
||||||
public Feature decode(String line) {
|
public RefSeqFeature decode(String line) {
|
||||||
if (line.startsWith("#")) return null;
|
if (line.startsWith("#")) return null;
|
||||||
String fields[] = line.split("\t");
|
String fields[] = line.split("\t");
|
||||||
|
|
||||||
// we reference postion 15 in the split array below, make sure we have at least that many columns
|
// we reference postion 15 in the split array below, make sure we have at least that many columns
|
||||||
if (fields.length < 16) throw new TribbleException("RefSeq (decode) : Unable to parse line -> " + line + ", we expected at least 16 columns, we saw " + fields.length);
|
if (fields.length < 16) throw new TribbleException("RefSeq (decode) : Unable to parse line -> " + line + ", we expected at least 16 columns, we saw " + fields.length);
|
||||||
String contig_name = fields[2];
|
String contig_name = fields[2];
|
||||||
RefSeqFeature feature = new RefSeqFeature(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]));
|
RefSeqFeature feature = new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
|
||||||
|
|
||||||
feature.setTranscript_id(fields[1]);
|
feature.setTranscript_id(fields[1]);
|
||||||
if ( fields[3].length()==1 && fields[3].charAt(0)=='+') feature.setStrand(1);
|
if ( fields[3].length()==1 && fields[3].charAt(0)=='+') feature.setStrand(1);
|
||||||
|
|
@ -41,8 +55,8 @@ public class RefSeqCodec implements FeatureCodec {
|
||||||
else throw new UserException.MalformedFile("Expected strand symbol (+/-), found: "+fields[3] + " for line=" + line);
|
else throw new UserException.MalformedFile("Expected strand symbol (+/-), found: "+fields[3] + " for line=" + line);
|
||||||
|
|
||||||
|
|
||||||
feature.setTranscript_interval(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
|
feature.setTranscript_interval(genomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
|
||||||
feature.setTranscript_coding_interval(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7])));
|
feature.setTranscript_coding_interval(genomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7])));
|
||||||
feature.setGene_name(fields[12]);
|
feature.setGene_name(fields[12]);
|
||||||
String[] exon_starts = fields[9].split(",");
|
String[] exon_starts = fields[9].split(",");
|
||||||
String[] exon_stops = fields[10].split(",");
|
String[] exon_stops = fields[10].split(",");
|
||||||
|
|
@ -57,7 +71,7 @@ public class RefSeqCodec implements FeatureCodec {
|
||||||
ArrayList<Integer> exon_frames = new ArrayList<Integer>(eframes.length);
|
ArrayList<Integer> exon_frames = new ArrayList<Integer>(eframes.length);
|
||||||
|
|
||||||
for ( int i = 0 ; i < exon_starts.length ; i++ ) {
|
for ( int i = 0 ; i < exon_starts.length ; i++ ) {
|
||||||
exons.add(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
|
exons.add(genomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
|
||||||
exon_frames.add(Integer.decode(eframes[i]));
|
exon_frames.add(Integer.decode(eframes[i]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,15 +25,8 @@ public class RefSeqFeature implements Transcript, Feature {
|
||||||
private List<Integer> exon_frames;
|
private List<Integer> exon_frames;
|
||||||
private String name;
|
private String name;
|
||||||
|
|
||||||
// store the contig, start, and stop for this record
|
public RefSeqFeature(GenomeLoc genomeLoc) {
|
||||||
private final String contig;
|
this.transcript_interval = genomeLoc;
|
||||||
private final int start;
|
|
||||||
private final int stop;
|
|
||||||
|
|
||||||
public RefSeqFeature(String contig, int start, int stop) {
|
|
||||||
this.contig = contig;
|
|
||||||
this.start = start;
|
|
||||||
this.stop = stop;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns id of the transcript (RefSeq NM_* id) */
|
/** Returns id of the transcript (RefSeq NM_* id) */
|
||||||
|
|
@ -44,8 +37,6 @@ public class RefSeqFeature implements Transcript, Feature {
|
||||||
|
|
||||||
/** Returns transcript's full genomic interval (includes all exons with UTRs) */
|
/** Returns transcript's full genomic interval (includes all exons with UTRs) */
|
||||||
public GenomeLoc getLocation() {
|
public GenomeLoc getLocation() {
|
||||||
if (transcript_interval == null)
|
|
||||||
transcript_interval = GenomeLocParser.parseGenomeLoc(contig,start,stop);
|
|
||||||
return transcript_interval;
|
return transcript_interval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -270,16 +261,16 @@ public class RefSeqFeature implements Transcript, Feature {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getChr() {
|
public String getChr() {
|
||||||
return contig;
|
return transcript_interval.getContig();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getStart() {
|
public int getStart() {
|
||||||
return start;
|
return transcript_interval.getStart();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getEnd() {
|
public int getEnd() {
|
||||||
return stop;
|
return transcript_interval.getStop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.refdata.features.table;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broad.tribble.FeatureCodec;
|
import org.broad.tribble.FeatureCodec;
|
||||||
import org.broad.tribble.readers.LineReader;
|
import org.broad.tribble.readers.LineReader;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -14,12 +15,27 @@ import java.util.*;
|
||||||
/**
|
/**
|
||||||
* implementation of a simple table (tab or comma delimited format) input files... more improvements to come
|
* implementation of a simple table (tab or comma delimited format) input files... more improvements to come
|
||||||
*/
|
*/
|
||||||
public class TableCodec implements FeatureCodec {
|
public class TableCodec implements ReferenceDependentFeatureCodec {
|
||||||
private String delimiterRegex = "\\s+";
|
private String delimiterRegex = "\\s+";
|
||||||
private String headerDelimiter = "HEADER";
|
private String headerDelimiter = "HEADER";
|
||||||
private String commentDelimiter = "#";
|
private String commentDelimiter = "#";
|
||||||
private ArrayList<String> header = new ArrayList<String>();
|
private ArrayList<String> header = new ArrayList<String>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The parser to use when resolving genome-wide locations.
|
||||||
|
*/
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the parser to use when resolving genetic data.
|
||||||
|
* @param genomeLocParser The supplied parser.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Feature decodeLoc(String line) {
|
public Feature decodeLoc(String line) {
|
||||||
return decode(line);
|
return decode(line);
|
||||||
|
|
@ -34,7 +50,7 @@ public class TableCodec implements FeatureCodec {
|
||||||
throw new IllegalArgumentException("TableCodec line = " + line + " doesn't appear to be a valid table format");
|
throw new IllegalArgumentException("TableCodec line = " + line + " doesn't appear to be a valid table format");
|
||||||
|
|
||||||
|
|
||||||
return new TableFeature(GenomeLocParser.parseGenomeLoc(split[0]),Arrays.asList(split),header);
|
return new TableFeature(genomeLocParser.parseGenomeLoc(split[0]),Arrays.asList(split),header);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ import org.broad.tribble.FeatureSource;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -59,6 +60,11 @@ public class RMDTrack {
|
||||||
// our sequence dictionary, which can be null
|
// our sequence dictionary, which can be null
|
||||||
private final SAMSequenceDictionary dictionary;
|
private final SAMSequenceDictionary dictionary;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parser to use when creating/parsing GenomeLocs.
|
||||||
|
*/
|
||||||
|
private final GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
// our codec type
|
// our codec type
|
||||||
private final FeatureCodec codec;
|
private final FeatureCodec codec;
|
||||||
|
|
||||||
|
|
@ -101,13 +107,14 @@ public class RMDTrack {
|
||||||
* @param dict the sam sequence dictionary
|
* @param dict the sam sequence dictionary
|
||||||
* @param codec the feature codec we use to decode this type
|
* @param codec the feature codec we use to decode this type
|
||||||
*/
|
*/
|
||||||
public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, FeatureCodec codec) {
|
public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
this.recordType = codec.getFeatureType();
|
this.recordType = codec.getFeatureType();
|
||||||
this.name = name;
|
this.name = name;
|
||||||
this.file = file;
|
this.file = file;
|
||||||
this.reader = reader;
|
this.reader = reader;
|
||||||
this.dictionary = dict;
|
this.dictionary = dict;
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.codec = codec;
|
this.codec = codec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -117,7 +124,7 @@ public class RMDTrack {
|
||||||
*/
|
*/
|
||||||
public CloseableIterator<GATKFeature> getIterator() {
|
public CloseableIterator<GATKFeature> getIterator() {
|
||||||
try {
|
try {
|
||||||
return new FeatureToGATKFeatureIterator(reader.iterator(),this.getName());
|
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.iterator(),this.getName());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new UserException.CouldNotReadInputFile(getFile(), "Unable to read from file", e);
|
throw new UserException.CouldNotReadInputFile(getFile(), "Unable to read from file", e);
|
||||||
}
|
}
|
||||||
|
|
@ -133,19 +140,19 @@ public class RMDTrack {
|
||||||
}
|
}
|
||||||
|
|
||||||
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
|
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
|
||||||
return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
|
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
|
public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
|
||||||
return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
|
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
|
public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
|
||||||
return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName());
|
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
public CloseableIterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException {
|
public CloseableIterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException {
|
||||||
return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName());
|
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() {
|
public void close() {
|
||||||
|
|
|
||||||
|
|
@ -35,11 +35,13 @@ import org.broad.tribble.source.BasicFeatureSource;
|
||||||
import org.broad.tribble.source.CachingFeatureSource;
|
import org.broad.tribble.source.CachingFeatureSource;
|
||||||
import org.broad.tribble.util.LittleEndianOutputStream;
|
import org.broad.tribble.util.LittleEndianOutputStream;
|
||||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||||
import org.broadinstitute.sting.gatk.AbstractGenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.AbstractGenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
|
|
@ -80,18 +82,34 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
// private sequence dictionary we use to set our tracks with
|
// private sequence dictionary we use to set our tracks with
|
||||||
private SAMSequenceDictionary dict = null;
|
private SAMSequenceDictionary dict = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Private genome loc parser to use when building out new locs.
|
||||||
|
*/
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
/** Create a new plugin manager. */
|
/** Create a new plugin manager. */
|
||||||
public RMDTrackBuilder() {
|
public RMDTrackBuilder() {
|
||||||
super(FeatureCodec.class, "Codecs", "Codec");
|
super(FeatureCodec.class, "Codecs", "Codec");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new RMDTrackBuilder, with dictionary and genomeLocParser predefined.
|
||||||
|
* @param dict
|
||||||
|
* @param genomeLocParser
|
||||||
|
*/
|
||||||
|
public RMDTrackBuilder(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser) {
|
||||||
|
super(FeatureCodec.class, "Codecs", "Codec");
|
||||||
|
setSequenceDictionary(dict,genomeLocParser);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param dict the sequence dictionary to use as a reference for Tribble track contig length lookups
|
* @param dict the sequence dictionary to use as a reference for Tribble track contig length lookups
|
||||||
*/
|
*/
|
||||||
public void setSequenceDictionary(SAMSequenceDictionary dict) {
|
public void setSequenceDictionary(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser) {
|
||||||
this.dict = dict;
|
this.dict = dict;
|
||||||
}
|
this.genomeLocParser = genomeLocParser;
|
||||||
|
}
|
||||||
|
|
||||||
/** @return a list of all available track types we currently have access to create */
|
/** @return a list of all available track types we currently have access to create */
|
||||||
public Map<String, Class> getAvailableTrackNamesAndTypes() {
|
public Map<String, Class> getAvailableTrackNamesAndTypes() {
|
||||||
|
|
@ -115,6 +133,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
/**
|
/**
|
||||||
* create a RMDTrack of the specified type
|
* create a RMDTrack of the specified type
|
||||||
*
|
*
|
||||||
|
* @param genomeLocParser GenomeLocParser to use, if case track needs additional reference context.
|
||||||
* @param targetClass the target class of track
|
* @param targetClass the target class of track
|
||||||
* @param name what to call the track
|
* @param name what to call the track
|
||||||
* @param inputFile the input file
|
* @param inputFile the input file
|
||||||
|
|
@ -127,7 +146,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
// return a feature reader track
|
// return a feature reader track
|
||||||
Pair<FeatureSource, SAMSequenceDictionary> pair = createFeatureReader(targetClass, name, inputFile);
|
Pair<FeatureSource, SAMSequenceDictionary> pair = createFeatureReader(targetClass, name, inputFile);
|
||||||
if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file");
|
if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file");
|
||||||
return new RMDTrack(targetClass, name, inputFile, pair.first, pair.second, createCodec(targetClass, name));
|
return new RMDTrack(targetClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(targetClass,name));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -186,6 +205,8 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
FeatureCodec codex = this.createByType(targetClass);
|
FeatureCodec codex = this.createByType(targetClass);
|
||||||
if ( codex instanceof NameAwareCodec )
|
if ( codex instanceof NameAwareCodec )
|
||||||
((NameAwareCodec)codex).setName(name);
|
((NameAwareCodec)codex).setName(name);
|
||||||
|
if(codex instanceof ReferenceDependentFeatureCodec)
|
||||||
|
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
|
||||||
return codex;
|
return codex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.refdata.utils;
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broad.tribble.iterators.CloseableTribbleIterator;
|
import org.broad.tribble.iterators.CloseableTribbleIterator;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
|
@ -39,10 +40,12 @@ import java.util.Iterator;
|
||||||
* a wrapper on Tribble feature iterators so that they produce GATKFeatures (which produce GenomeLocs)
|
* a wrapper on Tribble feature iterators so that they produce GATKFeatures (which produce GenomeLocs)
|
||||||
*/
|
*/
|
||||||
public class FeatureToGATKFeatureIterator implements CloseableIterator<GATKFeature> {
|
public class FeatureToGATKFeatureIterator implements CloseableIterator<GATKFeature> {
|
||||||
|
private final GenomeLocParser genomeLocParser;
|
||||||
private final CloseableTribbleIterator<Feature> iterator;
|
private final CloseableTribbleIterator<Feature> iterator;
|
||||||
private final String name;
|
private final String name;
|
||||||
|
|
||||||
public FeatureToGATKFeatureIterator(CloseableTribbleIterator<Feature> iter, String name) {
|
public FeatureToGATKFeatureIterator(GenomeLocParser genomeLocParser,CloseableTribbleIterator<Feature> iter, String name) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.name = name;
|
this.name = name;
|
||||||
this.iterator = iter;
|
this.iterator = iter;
|
||||||
}
|
}
|
||||||
|
|
@ -54,7 +57,7 @@ public class FeatureToGATKFeatureIterator implements CloseableIterator<GATKFeatu
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public GATKFeature next() {
|
public GATKFeature next() {
|
||||||
return new GATKFeature.TribbleGATKFeature(iterator.next(),name);
|
return new GATKFeature.TribbleGATKFeature(genomeLocParser,iterator.next(),name);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -62,15 +62,17 @@ public abstract class GATKFeature implements Feature {
|
||||||
* wrapping a Tribble feature in a GATK friendly interface
|
* wrapping a Tribble feature in a GATK friendly interface
|
||||||
*/
|
*/
|
||||||
public static class TribbleGATKFeature extends GATKFeature {
|
public static class TribbleGATKFeature extends GATKFeature {
|
||||||
|
private final GenomeLocParser genomeLocParser;
|
||||||
private final Feature feature;
|
private final Feature feature;
|
||||||
private GenomeLoc position = null;
|
private GenomeLoc position = null;
|
||||||
|
|
||||||
public TribbleGATKFeature(Feature f, String name) {
|
public TribbleGATKFeature(GenomeLocParser genomeLocParser,Feature f, String name) {
|
||||||
super(name);
|
super(name);
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
feature = f;
|
feature = f;
|
||||||
}
|
}
|
||||||
public GenomeLoc getLocation() {
|
public GenomeLoc getLocation() {
|
||||||
if (position == null) position = GenomeLocParser.createGenomeLoc(feature.getChr(), feature.getStart(), feature.getEnd());
|
if (position == null) position = genomeLocParser.createGenomeLoc(feature.getChr(), feature.getStart(), feature.getEnd());
|
||||||
return position;
|
return position;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -48,19 +48,22 @@ import java.util.Iterator;
|
||||||
* to parse a string.
|
* to parse a string.
|
||||||
*/
|
*/
|
||||||
public class StringToGenomeLocIteratorAdapter implements Iterator<GenomeLoc> {
|
public class StringToGenomeLocIteratorAdapter implements Iterator<GenomeLoc> {
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
private PushbackIterator<String> it = null;
|
private PushbackIterator<String> it = null;
|
||||||
|
|
||||||
public enum FORMAT { BED, GATK };
|
public enum FORMAT { BED, GATK };
|
||||||
|
|
||||||
FORMAT myFormat = FORMAT.GATK;
|
FORMAT myFormat = FORMAT.GATK;
|
||||||
|
|
||||||
public StringToGenomeLocIteratorAdapter(Iterator<String> it, FORMAT format) {
|
public StringToGenomeLocIteratorAdapter(GenomeLocParser genomeLocParser,Iterator<String> it, FORMAT format) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.it = new PushbackIterator<String>(it);
|
this.it = new PushbackIterator<String>(it);
|
||||||
myFormat = format;
|
myFormat = format;
|
||||||
}
|
}
|
||||||
|
|
||||||
public StringToGenomeLocIteratorAdapter(Iterator<String> it ) {
|
public StringToGenomeLocIteratorAdapter(GenomeLocParser genomeLocParser,Iterator<String> it ) {
|
||||||
this(it,FORMAT.GATK);
|
this(genomeLocParser,it,FORMAT.GATK);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
|
|
@ -81,8 +84,8 @@ public class StringToGenomeLocIteratorAdapter implements Iterator<GenomeLoc> {
|
||||||
|
|
||||||
public GenomeLoc next() {
|
public GenomeLoc next() {
|
||||||
|
|
||||||
if ( myFormat == FORMAT.GATK ) return GenomeLocParser.parseGenomeInterval( it.next() );
|
if ( myFormat == FORMAT.GATK ) return genomeLocParser.parseGenomeInterval( it.next() );
|
||||||
return BedParser.parseLocation( it.next() );
|
return BedParser.parseLocation( genomeLocParser,it.next() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void remove() {
|
public void remove() {
|
||||||
|
|
|
||||||
|
|
@ -48,7 +48,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
||||||
/** our log, which we want to capture anything from this class */
|
/** our log, which we want to capture anything from this class */
|
||||||
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
|
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||||
|
|
||||||
private GenomeAnalysisEngine engine;
|
protected GenomeAnalysisEngine engine;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the named traversal type associated with the given traversal.
|
* Gets the named traversal type associated with the given traversal.
|
||||||
|
|
@ -74,7 +74,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
||||||
public void printProgress(Shard shard,GenomeLoc loc) {
|
public void printProgress(Shard shard,GenomeLoc loc) {
|
||||||
// A bypass is inserted here for unit testing.
|
// A bypass is inserted here for unit testing.
|
||||||
// TODO: print metrics outside of the traversal engine to more easily handle cumulative stats.
|
// TODO: print metrics outside of the traversal engine to more easily handle cumulative stats.
|
||||||
ReadMetrics cumulativeMetrics = engine != null ? engine.getCumulativeMetrics().clone() : new ReadMetrics();
|
ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics() != null ? engine.getCumulativeMetrics().clone() : new ReadMetrics();
|
||||||
cumulativeMetrics.incrementMetrics(shard.getReadMetrics());
|
cumulativeMetrics.incrementMetrics(shard.getReadMetrics());
|
||||||
printProgress(loc, cumulativeMetrics, false);
|
printProgress(loc, cumulativeMetrics, false);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -59,12 +59,12 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<SAMRecord> readsAtLoc(final SAMRecord read, PushbackIterator<SAMRecord> iter) {
|
private List<SAMRecord> readsAtLoc(final SAMRecord read, PushbackIterator<SAMRecord> iter) {
|
||||||
GenomeLoc site = GenomeLocParser.createGenomeLoc(read);
|
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
|
||||||
ArrayList<SAMRecord> l = new ArrayList<SAMRecord>();
|
ArrayList<SAMRecord> l = new ArrayList<SAMRecord>();
|
||||||
|
|
||||||
l.add(read);
|
l.add(read);
|
||||||
for (SAMRecord read2 : iter) {
|
for (SAMRecord read2 : iter) {
|
||||||
GenomeLoc site2 = GenomeLocParser.createGenomeLoc(read2);
|
GenomeLoc site2 = engine.getGenomeLocParser().createGenomeLoc(read2);
|
||||||
|
|
||||||
// the next read starts too late
|
// the next read starts too late
|
||||||
if (site2.getStart() != site.getStart()) {
|
if (site2.getStart() != site.getStart()) {
|
||||||
|
|
@ -114,7 +114,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
||||||
protected List<SAMRecord> findDuplicateReads(SAMRecord read, Set<List<SAMRecord>> readSets ) {
|
protected List<SAMRecord> findDuplicateReads(SAMRecord read, Set<List<SAMRecord>> readSets ) {
|
||||||
if ( read.getReadPairedFlag() ) {
|
if ( read.getReadPairedFlag() ) {
|
||||||
// paired
|
// paired
|
||||||
final GenomeLoc readMateLoc = GenomeLocParser.createGenomeLoc(read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getMateAlignmentStart());
|
final GenomeLoc readMateLoc = engine.getGenomeLocParser().createGenomeLoc(read.getMateReferenceName(), read.getMateAlignmentStart(), read.getMateAlignmentStart());
|
||||||
|
|
||||||
for (List<SAMRecord> reads : readSets) {
|
for (List<SAMRecord> reads : readSets) {
|
||||||
SAMRecord key = reads.get(0);
|
SAMRecord key = reads.get(0);
|
||||||
|
|
@ -123,7 +123,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
||||||
// share a mate location or the read is flagged as a duplicate
|
// share a mate location or the read is flagged as a duplicate
|
||||||
if ( read.getAlignmentStart() == key.getAlignmentStart() && key.getReadPairedFlag() && ( key.getDuplicateReadFlag() || read.getDuplicateReadFlag() ) ) {
|
if ( read.getAlignmentStart() == key.getAlignmentStart() && key.getReadPairedFlag() && ( key.getDuplicateReadFlag() || read.getDuplicateReadFlag() ) ) {
|
||||||
// at least one has to be marked as a duplicate
|
// at least one has to be marked as a duplicate
|
||||||
final GenomeLoc keyMateLoc = GenomeLocParser.createGenomeLoc(key.getMateReferenceIndex(), key.getMateAlignmentStart(), key.getMateAlignmentStart());
|
final GenomeLoc keyMateLoc = engine.getGenomeLocParser().createGenomeLoc(key.getMateReferenceName(), key.getMateAlignmentStart(), key.getMateAlignmentStart());
|
||||||
if ( readMateLoc.compareTo(keyMateLoc) == 0 ) {
|
if ( readMateLoc.compareTo(keyMateLoc) == 0 ) {
|
||||||
// we are at the same position as the dup and have the same mat pos, it's a dup
|
// we are at the same position as the dup and have the same mat pos, it's a dup
|
||||||
if (DEBUG) logger.debug(String.format(" => Adding read to dups list: %s %d %s vs. %s", read, reads.size(), readMateLoc, keyMateLoc));
|
if (DEBUG) logger.debug(String.format(" => Adding read to dups list: %s %d %s vs. %s", read, reads.size(), readMateLoc, keyMateLoc));
|
||||||
|
|
@ -176,7 +176,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
||||||
*/
|
*/
|
||||||
for (SAMRecord read : iter) {
|
for (SAMRecord read : iter) {
|
||||||
// get the genome loc from the read
|
// get the genome loc from the read
|
||||||
GenomeLoc site = GenomeLocParser.createGenomeLoc(read);
|
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
|
||||||
|
|
||||||
Set<List<SAMRecord>> readSets = uniqueReadSets(readsAtLoc(read, iter));
|
Set<List<SAMRecord>> readSets = uniqueReadSets(readsAtLoc(read, iter));
|
||||||
if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size()));
|
if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size()));
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
|
||||||
// if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions
|
// if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions
|
||||||
// associated with the current site), we need to update the location. The updated location still starts
|
// associated with the current site), we need to update the location. The updated location still starts
|
||||||
// at the current genomic position, but it has to span the length of the longest deletion (if any).
|
// at the current genomic position, but it has to span the length of the longest deletion (if any).
|
||||||
location = GenomeLocParser.setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength());
|
location = engine.getGenomeLocParser().setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength());
|
||||||
|
|
||||||
// it is possible that the new expanded location spans the current shard boundary; the next method ensures
|
// it is possible that the new expanded location spans the current shard boundary; the next method ensures
|
||||||
// that when it is the case, the reference sequence held by the ReferenceView will be reloaded so that
|
// that when it is the case, the reference sequence held by the ReferenceView will be reloaded so that
|
||||||
|
|
|
||||||
|
|
@ -102,7 +102,7 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
|
||||||
sum = walker.reduce(x, sum);
|
sum = walker.reduce(x, sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart());
|
GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : engine.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),read.getAlignmentStart());
|
||||||
printProgress(dataProvider.getShard(),locus);
|
printProgress(dataProvider.getShard(),locus);
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
|
|
|
||||||
|
|
@ -225,7 +225,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
||||||
vcfWriter.add(annotatedVC, ref.getBase());
|
vcfWriter.add(annotatedVC, ref.getBase());
|
||||||
} else {
|
} else {
|
||||||
// check to see if the buffered context is different (in location) this context
|
// check to see if the buffered context is different (in location) this context
|
||||||
if ( indelBufferContext != null && ! VariantContextUtils.getLocation(indelBufferContext.iterator().next()).equals(VariantContextUtils.getLocation(annotatedVCs.iterator().next())) ) {
|
if ( indelBufferContext != null && ! VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),indelBufferContext.iterator().next()).equals(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),annotatedVCs.iterator().next())) ) {
|
||||||
for ( VariantContext annotatedVC : indelBufferContext )
|
for ( VariantContext annotatedVC : indelBufferContext )
|
||||||
vcfWriter.add(annotatedVC, ref.getBase());
|
vcfWriter.add(annotatedVC, ref.getBase());
|
||||||
indelBufferContext = annotatedVCs;
|
indelBufferContext = annotatedVCs;
|
||||||
|
|
|
||||||
|
|
@ -165,10 +165,10 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void writeBeagleOutput(VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) {
|
public void writeBeagleOutput(VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) {
|
||||||
GenomeLoc currentLoc = VariantContextUtils.getLocation(preferredVC);
|
GenomeLoc currentLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),preferredVC);
|
||||||
beagleWriter.print(String.format("%s:%d ",currentLoc.getContig(),currentLoc.getStart()));
|
beagleWriter.print(String.format("%s:%d ",currentLoc.getContig(),currentLoc.getStart()));
|
||||||
if ( beagleGenotypesWriter != null ) {
|
if ( beagleGenotypesWriter != null ) {
|
||||||
beagleGenotypesWriter.print(String.format("%s ",VariantContextUtils.getLocation(preferredVC).toString()));
|
beagleGenotypesWriter.print(String.format("%s ",VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),preferredVC).toString()));
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( Allele allele : preferredVC.getAlleles() ) {
|
for ( Allele allele : preferredVC.getAlleles() ) {
|
||||||
|
|
|
||||||
|
|
@ -100,10 +100,12 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class CallableBaseState {
|
public static class CallableBaseState {
|
||||||
|
public GenomeLocParser genomeLocParser;
|
||||||
public GenomeLoc loc;
|
public GenomeLoc loc;
|
||||||
public CalledState state;
|
public CalledState state;
|
||||||
|
|
||||||
public CallableBaseState(GenomeLoc loc, CalledState state) {
|
public CallableBaseState(GenomeLocParser genomeLocParser,GenomeLoc loc, CalledState state) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.loc = loc;
|
this.loc = loc;
|
||||||
this.state = state;
|
this.state = state;
|
||||||
}
|
}
|
||||||
|
|
@ -126,7 +128,7 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
|
||||||
* @param newStop
|
* @param newStop
|
||||||
*/
|
*/
|
||||||
public void update(GenomeLoc newStop) {
|
public void update(GenomeLoc newStop) {
|
||||||
loc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), newStop.getStop());
|
loc = genomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), newStop.getStop());
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
@ -174,7 +176,7 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new CallableBaseState(context.getLocation(), state);
|
return new CallableBaseState(getToolkit().getGenomeLocParser(),context.getLocation(), state);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integrator reduce(CallableBaseState state, Integrator integrator) {
|
public Integrator reduce(CallableBaseState state, Integrator integrator) {
|
||||||
|
|
|
||||||
|
|
@ -98,9 +98,9 @@ public class CompareCallableLociWalker extends RodWalker<List<CallableLociWalker
|
||||||
}
|
}
|
||||||
|
|
||||||
FullBEDFeature bed = (FullBEDFeature)bindings.get(0);
|
FullBEDFeature bed = (FullBEDFeature)bindings.get(0);
|
||||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(bed.getChr(), bed.getStart(), bed.getEnd());
|
GenomeLoc loc = getToolkit().getGenomeLocParser().createGenomeLoc(bed.getChr(), bed.getStart(), bed.getEnd());
|
||||||
CallableLociWalker.CalledState state = CallableLociWalker.CalledState.valueOf(bed.getName());
|
CallableLociWalker.CalledState state = CallableLociWalker.CalledState.valueOf(bed.getName());
|
||||||
return new CallableLociWalker.CallableBaseState(loc, state);
|
return new CallableLociWalker.CallableBaseState(getToolkit().getGenomeLocParser(),loc, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -406,7 +406,10 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partiti
|
||||||
RMDTrackBuilder builder = new RMDTrackBuilder();
|
RMDTrackBuilder builder = new RMDTrackBuilder();
|
||||||
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,refSeqGeneList).first;
|
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,refSeqGeneList).first;
|
||||||
try {
|
try {
|
||||||
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(),"refseq"));
|
return new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||||
|
getToolkit().getGenomeLocParser(),
|
||||||
|
new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),
|
||||||
|
refseq.iterator(),"refseq"));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new UserException.CouldNotReadInputFile(refSeqGeneList, "Unable to open file", e);
|
throw new UserException.CouldNotReadInputFile(refSeqGeneList, "Unable to open file", e);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -81,7 +81,7 @@ public class FastaReferenceWalker extends RefWalker<Pair<GenomeLoc, String>, Gen
|
||||||
}
|
}
|
||||||
// otherwise, merge them
|
// otherwise, merge them
|
||||||
else {
|
else {
|
||||||
sum = GenomeLocParser.setStop(sum, value.first.getStop());
|
sum = getToolkit().getGenomeLocParser().setStop(sum, value.first.getStop());
|
||||||
fasta.append(value.second);
|
fasta.append(value.second);
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
|
|
|
||||||
|
|
@ -2,13 +2,16 @@ package org.broadinstitute.sting.gatk.walkers.filters;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
public class ClusteredSnps {
|
public class ClusteredSnps {
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
private int window = 10;
|
private int window = 10;
|
||||||
private int snpThreshold = 3;
|
private int snpThreshold = 3;
|
||||||
|
|
||||||
public ClusteredSnps(int snpThreshold, int window) {
|
public ClusteredSnps(GenomeLocParser genomeLocParser,int snpThreshold, int window) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.window = window;
|
this.window = window;
|
||||||
this.snpThreshold = snpThreshold;
|
this.snpThreshold = snpThreshold;
|
||||||
if ( window < 1 || snpThreshold < 1 )
|
if ( window < 1 || snpThreshold < 1 )
|
||||||
|
|
@ -29,7 +32,7 @@ public class ClusteredSnps {
|
||||||
throw new UserException.BadInput("The clustered SNPs filter does not work in the presence of non-variant records; see the documentation for more details");
|
throw new UserException.BadInput("The clustered SNPs filter does not work in the presence of non-variant records; see the documentation for more details");
|
||||||
|
|
||||||
// find the nth variant
|
// find the nth variant
|
||||||
GenomeLoc left = VariantContextUtils.getLocation(variants[i].getVariantContext());
|
GenomeLoc left = VariantContextUtils.getLocation(genomeLocParser,variants[i].getVariantContext());
|
||||||
GenomeLoc right = null;
|
GenomeLoc right = null;
|
||||||
int snpsSeen = 1;
|
int snpsSeen = 1;
|
||||||
|
|
||||||
|
|
@ -37,7 +40,7 @@ public class ClusteredSnps {
|
||||||
while ( ++currentIndex < variants.length ) {
|
while ( ++currentIndex < variants.length ) {
|
||||||
if ( variants[currentIndex] != null && variants[currentIndex].getVariantContext() != null && variants[currentIndex].getVariantContext().isVariant() ) {
|
if ( variants[currentIndex] != null && variants[currentIndex].getVariantContext() != null && variants[currentIndex].getVariantContext().isVariant() ) {
|
||||||
if ( ++snpsSeen == snpThreshold ) {
|
if ( ++snpsSeen == snpThreshold ) {
|
||||||
right = VariantContextUtils.getLocation(variants[currentIndex].getVariantContext());
|
right = VariantContextUtils.getLocation(genomeLocParser,variants[currentIndex].getVariantContext());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -117,7 +117,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
if ( clusterWindow > 0 )
|
if ( clusterWindow > 0 )
|
||||||
clusteredSNPs = new ClusteredSnps(clusterSize, clusterWindow);
|
clusteredSNPs = new ClusteredSnps(getToolkit().getGenomeLocParser(),clusterSize, clusterWindow);
|
||||||
|
|
||||||
filterExps = VariantContextUtils.initializeMatchExps(FILTER_NAMES, FILTER_EXPS);
|
filterExps = VariantContextUtils.initializeMatchExps(FILTER_NAMES, FILTER_EXPS);
|
||||||
genotypeFilterExps = VariantContextUtils.initializeMatchExps(GENOTYPE_FILTER_NAMES, GENOTYPE_FILTER_EXPS);
|
genotypeFilterExps = VariantContextUtils.initializeMatchExps(GENOTYPE_FILTER_NAMES, GENOTYPE_FILTER_EXPS);
|
||||||
|
|
@ -188,7 +188,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
Set<String> filters = new LinkedHashSet<String>(g.getFilters());
|
Set<String> filters = new LinkedHashSet<String>(g.getFilters());
|
||||||
|
|
||||||
for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) {
|
for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) {
|
||||||
if ( VariantContextUtils.match(vc, g, exp) )
|
if ( VariantContextUtils.match(getToolkit().getGenomeLocParser(),vc, g, exp) )
|
||||||
filters.add(exp.name);
|
filters.add(exp.name);
|
||||||
}
|
}
|
||||||
genotypes.put(genotype.getKey(), new Genotype(genotype.getKey(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.genotypesArePhased()));
|
genotypes.put(genotype.getKey(), new Genotype(genotype.getKey(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.genotypesArePhased()));
|
||||||
|
|
@ -211,7 +211,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
filters.add(CLUSTERED_SNP_FILTER_NAME);
|
filters.add(CLUSTERED_SNP_FILTER_NAME);
|
||||||
|
|
||||||
for ( VariantContextUtils.JexlVCMatchExp exp : filterExps ) {
|
for ( VariantContextUtils.JexlVCMatchExp exp : filterExps ) {
|
||||||
if ( VariantContextUtils.match(vc, exp) )
|
if ( VariantContextUtils.match(getToolkit().getGenomeLocParser(),vc, exp) )
|
||||||
filters.add(exp.name);
|
filters.add(exp.name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -133,7 +133,7 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
|
||||||
}
|
}
|
||||||
|
|
||||||
// merge the variant contexts
|
// merge the variant contexts
|
||||||
return VariantContextUtils.simpleMerge(calls, ref.getBase());
|
return VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), calls, ref.getBase());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static AlignmentContext filterForSamples(ReadBackedPileup pileup, Set<String> samples) {
|
public static AlignmentContext filterForSamples(ReadBackedPileup pileup, Set<String> samples) {
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.contexts.*;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
|
public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
|
||||||
|
private final GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
private int MIN_COVERAGE = 6;
|
private int MIN_COVERAGE = 6;
|
||||||
private double MIN_FRACTION = 0.3;
|
private double MIN_FRACTION = 0.3;
|
||||||
|
|
@ -20,7 +21,9 @@ public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
|
||||||
// the previous normal event context
|
// the previous normal event context
|
||||||
// private Map<String, StratifiedAlignmentContext> cachedContext;
|
// private Map<String, StratifiedAlignmentContext> cachedContext;
|
||||||
|
|
||||||
protected SimpleIndelCalculationModel() {}
|
protected SimpleIndelCalculationModel(GenomeLocParser genomeLocParser) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
|
}
|
||||||
|
|
||||||
private int totalIndels = 0;
|
private int totalIndels = 0;
|
||||||
private int totalCoverage = 0;
|
private int totalCoverage = 0;
|
||||||
|
|
@ -70,7 +73,7 @@ public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
|
||||||
if ( bestEvent.charAt(0) == '-' ) {
|
if ( bestEvent.charAt(0) == '-' ) {
|
||||||
alleles.add( Allele.create(Allele.NULL_ALLELE_STRING,false) );
|
alleles.add( Allele.create(Allele.NULL_ALLELE_STRING,false) );
|
||||||
alleles.add( Allele.create(bestEvent.substring(1), true ));
|
alleles.add( Allele.create(bestEvent.substring(1), true ));
|
||||||
loc = GenomeLocParser.setStop(loc, loc.getStop() + bestEvent.length()-1);
|
loc = genomeLocParser.setStop(loc, loc.getStop() + bestEvent.length()-1);
|
||||||
} else
|
} else
|
||||||
throw new ReviewedStingException("Internal error (probably a bug): event does not conform to expected format: "+ bestEvent);
|
throw new ReviewedStingException("Internal error (probably a bug): event does not conform to expected format: "+ bestEvent);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -239,7 +239,9 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
|
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
refseqIterator = new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(),"refseq"));
|
refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||||
|
getToolkit().getGenomeLocParser(),
|
||||||
|
new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),refseq.iterator(),"refseq"));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new UserException.CouldNotReadInputFile(new File(RefseqFileName), "Write failed", e);
|
throw new UserException.CouldNotReadInputFile(new File(RefseqFileName), "Write failed", e);
|
||||||
}
|
}
|
||||||
|
|
@ -257,7 +259,7 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
int nNorm = 0;
|
int nNorm = 0;
|
||||||
int nTum = 0;
|
int nTum = 0;
|
||||||
for ( SAMReaderID rid : getToolkit().getDataSource().getReaderIDs() ) {
|
for ( SAMReaderID rid : getToolkit().getReadsDataSource().getReaderIDs() ) {
|
||||||
List<String> tags = rid.getTags() ;
|
List<String> tags = rid.getTags() ;
|
||||||
if ( tags.isEmpty() && call_somatic )
|
if ( tags.isEmpty() && call_somatic )
|
||||||
throw new UserException.BadInput("In somatic mode all input bam files must be tagged as either 'normal' or 'tumor'. Untagged file: "+
|
throw new UserException.BadInput("In somatic mode all input bam files must be tagged as either 'normal' or 'tumor'. Untagged file: "+
|
||||||
|
|
@ -297,12 +299,12 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
if ( ! GENOTYPE_NOT_SORTED && IntervalUtils.isIntervalFile(genotypeIntervalsFile)) {
|
if ( ! GENOTYPE_NOT_SORTED && IntervalUtils.isIntervalFile(genotypeIntervalsFile)) {
|
||||||
// prepare to read intervals one-by-one, as needed (assuming they are sorted).
|
// prepare to read intervals one-by-one, as needed (assuming they are sorted).
|
||||||
genotypeIntervals = new IntervalFileMergingIterator(
|
genotypeIntervals = new IntervalFileMergingIterator(getToolkit().getGenomeLocParser(),
|
||||||
new java.io.File(genotypeIntervalsFile), IntervalMergingRule.OVERLAPPING_ONLY );
|
new java.io.File(genotypeIntervalsFile), IntervalMergingRule.OVERLAPPING_ONLY );
|
||||||
} else {
|
} else {
|
||||||
// read in the whole list of intervals for cleaning
|
// read in the whole list of intervals for cleaning
|
||||||
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(
|
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),
|
||||||
IntervalUtils.parseIntervalArguments(Arrays.asList(genotypeIntervalsFile),true), IntervalMergingRule.OVERLAPPING_ONLY);
|
IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(genotypeIntervalsFile),true), IntervalMergingRule.OVERLAPPING_ONLY);
|
||||||
genotypeIntervals = locs.iterator();
|
genotypeIntervals = locs.iterator();
|
||||||
}
|
}
|
||||||
currentGenotypeInterval = genotypeIntervals.hasNext() ? genotypeIntervals.next() : null;
|
currentGenotypeInterval = genotypeIntervals.hasNext() ? genotypeIntervals.next() : null;
|
||||||
|
|
@ -310,7 +312,7 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
location = GenomeLocParser.createGenomeLoc(0,1);
|
location = getToolkit().getGenomeLocParser().createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(),1);
|
||||||
|
|
||||||
// List<Set<String>> readGroupSets = getToolkit().getMergedReadGroupsByReaders();
|
// List<Set<String>> readGroupSets = getToolkit().getMergedReadGroupsByReaders();
|
||||||
// List<Set<String>> sampleSets = getToolkit().getSamplesByReaders();
|
// List<Set<String>> sampleSets = getToolkit().getSamplesByReaders();
|
||||||
|
|
@ -387,8 +389,8 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
currentPosition = read.getAlignmentStart();
|
currentPosition = read.getAlignmentStart();
|
||||||
refName = new String(read.getReferenceName());
|
refName = new String(read.getReferenceName());
|
||||||
|
|
||||||
location = GenomeLocParser.setContig(location,refName);
|
location = getToolkit().getGenomeLocParser().createGenomeLoc(refName,location.getStart(),location.getStop());
|
||||||
contigLength = GenomeLocParser.getContigInfo(refName).getSequenceLength();
|
contigLength = getToolkit().getGenomeLocParser().getContigInfo(refName).getSequenceLength();
|
||||||
outOfContigUserWarned = false;
|
outOfContigUserWarned = false;
|
||||||
|
|
||||||
normal_context.clear(); // reset coverage window; this will also set reference position to 0
|
normal_context.clear(); // reset coverage window; this will also set reference position to 0
|
||||||
|
|
@ -543,7 +545,7 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
}
|
}
|
||||||
long move_to = adjustedPosition;
|
long move_to = adjustedPosition;
|
||||||
|
|
||||||
for ( long pos = normal_context.getStart() ; pos < Math.min(adjustedPosition,normal_context.getStop()+1) ; pos++ ) {
|
for ( int pos = normal_context.getStart() ; pos < Math.min(adjustedPosition,normal_context.getStop()+1) ; pos++ ) {
|
||||||
|
|
||||||
if ( normal_context.indelsAt(pos).size() == 0 ) continue; // no indels
|
if ( normal_context.indelsAt(pos).size() == 0 ) continue; // no indels
|
||||||
|
|
||||||
|
|
@ -579,8 +581,8 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
// if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right:
|
// if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right:
|
||||||
if ( right > normal_context.getStop() ) right = normal_context.getStop();
|
if ( right > normal_context.getStop() ) right = normal_context.getStop();
|
||||||
|
|
||||||
location = GenomeLocParser.setStart(location,pos);
|
location = getToolkit().getGenomeLocParser().setStart(location,pos);
|
||||||
location = GenomeLocParser.setStop(location,pos); // retrieve annotation data
|
location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data
|
||||||
|
|
||||||
if ( normalCall.isCall() ) {
|
if ( normalCall.isCall() ) {
|
||||||
normalCallsMade++;
|
normalCallsMade++;
|
||||||
|
|
@ -692,7 +694,7 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
if ( DEBUG ) System.out.println("DEBUG>> Emitting in somatic mode up to "+position+" force shift="+force+" current window="+tumor_context.getStart()+"-"+tumor_context.getStop());
|
if ( DEBUG ) System.out.println("DEBUG>> Emitting in somatic mode up to "+position+" force shift="+force+" current window="+tumor_context.getStart()+"-"+tumor_context.getStop());
|
||||||
|
|
||||||
for ( long pos = tumor_context.getStart() ; pos < Math.min(adjustedPosition,tumor_context.getStop()+1) ; pos++ ) {
|
for ( int pos = tumor_context.getStart() ; pos < Math.min(adjustedPosition,tumor_context.getStop()+1) ; pos++ ) {
|
||||||
|
|
||||||
if ( tumor_context.indelsAt(pos).size() == 0 ) continue; // no indels in tumor
|
if ( tumor_context.indelsAt(pos).size() == 0 ) continue; // no indels in tumor
|
||||||
|
|
||||||
|
|
@ -735,8 +737,8 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
if ( right > tumor_context.getStop() ) right = tumor_context.getStop(); // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right
|
if ( right > tumor_context.getStop() ) right = tumor_context.getStop(); // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right
|
||||||
|
|
||||||
location = GenomeLocParser.setStart(location,pos);
|
location = getToolkit().getGenomeLocParser().setStart(location,pos);
|
||||||
location = GenomeLocParser.setStop(location,pos); // retrieve annotation data
|
location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data
|
||||||
|
|
||||||
if ( tumorCall.isCall() ) {
|
if ( tumorCall.isCall() ) {
|
||||||
tumorCallsMade++;
|
tumorCallsMade++;
|
||||||
|
|
@ -1395,13 +1397,13 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
class WindowContext implements IndelListener {
|
class WindowContext implements IndelListener {
|
||||||
private Set<ExpandedSAMRecord> reads;
|
private Set<ExpandedSAMRecord> reads;
|
||||||
private long start=0; // where the window starts on the ref, 1-based
|
private int start=0; // where the window starts on the ref, 1-based
|
||||||
private CircularArray< List< IndelVariant > > indels;
|
private CircularArray< List< IndelVariant > > indels;
|
||||||
|
|
||||||
private List<IndelVariant> emptyIndelList = new ArrayList<IndelVariant>();
|
private List<IndelVariant> emptyIndelList = new ArrayList<IndelVariant>();
|
||||||
|
|
||||||
|
|
||||||
public WindowContext(long start, int length) {
|
public WindowContext(int start, int length) {
|
||||||
this.start = start;
|
this.start = start;
|
||||||
indels = new CircularArray< List<IndelVariant> >(length);
|
indels = new CircularArray< List<IndelVariant> >(length);
|
||||||
// reads = new LinkedList<SAMRecord>();
|
// reads = new LinkedList<SAMRecord>();
|
||||||
|
|
@ -1412,13 +1414,13 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public long getStart() { return start; }
|
public int getStart() { return start; }
|
||||||
|
|
||||||
/** Returns 1-based reference stop position (inclusive) of the interval this object keeps context for.
|
/** Returns 1-based reference stop position (inclusive) of the interval this object keeps context for.
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public long getStop() { return start + indels.length() - 1; }
|
public int getStop() { return start + indels.length() - 1; }
|
||||||
|
|
||||||
/** Resets reference start position to 0 and clears the context.
|
/** Resets reference start position to 0 and clears the context.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -211,9 +211,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
for (String fileOrInterval : intervalsFile.split(";")) {
|
for (String fileOrInterval : intervalsFile.split(";")) {
|
||||||
// if it's a file, add items to raw interval list
|
// if it's a file, add items to raw interval list
|
||||||
if (IntervalUtils.isIntervalFile(fileOrInterval)) {
|
if (IntervalUtils.isIntervalFile(fileOrInterval)) {
|
||||||
merger.add(new IntervalFileMergingIterator( new java.io.File(fileOrInterval), IntervalMergingRule.OVERLAPPING_ONLY ) );
|
merger.add(new IntervalFileMergingIterator( getToolkit().getGenomeLocParser(), new java.io.File(fileOrInterval), IntervalMergingRule.OVERLAPPING_ONLY ) );
|
||||||
} else {
|
} else {
|
||||||
rawIntervals.add(GenomeLocParser.parseGenomeInterval(fileOrInterval));
|
rawIntervals.add(getToolkit().getGenomeLocParser().parseGenomeInterval(fileOrInterval));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ( ! rawIntervals.isEmpty() ) merger.add(rawIntervals.iterator());
|
if ( ! rawIntervals.isEmpty() ) merger.add(rawIntervals.iterator());
|
||||||
|
|
@ -221,7 +221,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
intervals = merger;
|
intervals = merger;
|
||||||
} else {
|
} else {
|
||||||
// read in the whole list of intervals for cleaning
|
// read in the whole list of intervals for cleaning
|
||||||
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(IntervalUtils.parseIntervalArguments(Arrays.asList(intervalsFile),this.getToolkit().getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), IntervalMergingRule.OVERLAPPING_ONLY);
|
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(intervalsFile),this.getToolkit().getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), IntervalMergingRule.OVERLAPPING_ONLY);
|
||||||
intervals = locs.iterator();
|
intervals = locs.iterator();
|
||||||
}
|
}
|
||||||
currentInterval = intervals.hasNext() ? intervals.next() : null;
|
currentInterval = intervals.hasNext() ? intervals.next() : null;
|
||||||
|
|
@ -239,9 +239,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
|
|
||||||
nwayWriters = new HashMap<SAMReaderID,SAMFileWriter>();
|
nwayWriters = new HashMap<SAMReaderID,SAMFileWriter>();
|
||||||
|
|
||||||
for ( SAMReaderID rid : getToolkit().getDataSource().getReaderIDs() ) {
|
for ( SAMReaderID rid : getToolkit().getReadsDataSource().getReaderIDs() ) {
|
||||||
|
|
||||||
String fName = getToolkit().getDataSource().getSAMFile(rid).getName();
|
String fName = getToolkit().getReadsDataSource().getSAMFile(rid).getName();
|
||||||
|
|
||||||
int pos ;
|
int pos ;
|
||||||
if ( fName.toUpperCase().endsWith(".BAM") ) pos = fName.toUpperCase().lastIndexOf(".BAM");
|
if ( fName.toUpperCase().endsWith(".BAM") ) pos = fName.toUpperCase().lastIndexOf(".BAM");
|
||||||
|
|
@ -383,10 +383,10 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
|
GenomeLoc readLoc = getToolkit().getGenomeLocParser().createGenomeLoc(read);
|
||||||
// hack to get around unmapped reads having screwy locations
|
// hack to get around unmapped reads having screwy locations
|
||||||
if ( readLoc.getStop() == 0 )
|
if ( readLoc.getStop() == 0 )
|
||||||
readLoc = GenomeLocParser.createGenomeLoc(readLoc.getContigIndex(), readLoc.getStart(), readLoc.getStart());
|
readLoc = getToolkit().getGenomeLocParser().createGenomeLoc(readLoc.getContig(), readLoc.getStart(), readLoc.getStart());
|
||||||
|
|
||||||
if ( readLoc.isBefore(currentInterval) || ReadUtils.is454Read(read) ) {
|
if ( readLoc.isBefore(currentInterval) || ReadUtils.is454Read(read) ) {
|
||||||
// TODO -- it would be nice if we could use indels from 454 reads as alternate consenses
|
// TODO -- it would be nice if we could use indels from 454 reads as alternate consenses
|
||||||
|
|
@ -1414,7 +1414,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class ReadBin {
|
private class ReadBin {
|
||||||
|
|
||||||
private final ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
private final ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||||
private byte[] reference = null;
|
private byte[] reference = null;
|
||||||
|
|
@ -1426,11 +1426,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
// This can happen if e.g. there's a large known indel with no overlapping reads.
|
// This can happen if e.g. there's a large known indel with no overlapping reads.
|
||||||
public void add(SAMRecord read) {
|
public void add(SAMRecord read) {
|
||||||
|
|
||||||
GenomeLoc locForRead = GenomeLocParser.createGenomeLoc(read);
|
GenomeLoc locForRead = getToolkit().getGenomeLocParser().createGenomeLoc(read);
|
||||||
if ( loc == null )
|
if ( loc == null )
|
||||||
loc = locForRead;
|
loc = locForRead;
|
||||||
else if ( locForRead.getStop() > loc.getStop() )
|
else if ( locForRead.getStop() > loc.getStop() )
|
||||||
loc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), locForRead.getStop());
|
loc = getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), loc.getStart(), locForRead.getStop());
|
||||||
|
|
||||||
reads.add(read);
|
reads.add(read);
|
||||||
}
|
}
|
||||||
|
|
@ -1441,9 +1441,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
// set up the reference if we haven't done so yet
|
// set up the reference if we haven't done so yet
|
||||||
if ( reference == null ) {
|
if ( reference == null ) {
|
||||||
// first, pad the reference to handle deletions in narrow windows (e.g. those with only 1 read)
|
// first, pad the reference to handle deletions in narrow windows (e.g. those with only 1 read)
|
||||||
long padLeft = Math.max(loc.getStart()-REFERENCE_PADDING, 1);
|
int padLeft = Math.max(loc.getStart()-REFERENCE_PADDING, 1);
|
||||||
long padRight = Math.min(loc.getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength());
|
int padRight = Math.min(loc.getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength());
|
||||||
loc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), padLeft, padRight);
|
loc = getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), padLeft, padRight);
|
||||||
reference = referenceReader.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases();
|
reference = referenceReader.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases();
|
||||||
StringUtil.toUpperCase(reference);
|
StringUtil.toUpperCase(reference);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -92,7 +92,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
||||||
boolean hasInsertion = false;
|
boolean hasInsertion = false;
|
||||||
boolean hasPointEvent = false;
|
boolean hasPointEvent = false;
|
||||||
|
|
||||||
long furthestStopPos = -1;
|
int furthestStopPos = -1;
|
||||||
|
|
||||||
// look for insertions in the extended context (we'll get deletions from the normal context)
|
// look for insertions in the extended context (we'll get deletions from the normal context)
|
||||||
if ( context.hasExtendedEventPileup() ) {
|
if ( context.hasExtendedEventPileup() ) {
|
||||||
|
|
@ -175,9 +175,9 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
||||||
|
|
||||||
GenomeLoc eventLoc = context.getLocation();
|
GenomeLoc eventLoc = context.getLocation();
|
||||||
if ( hasInsertion )
|
if ( hasInsertion )
|
||||||
eventLoc = GenomeLocParser.createGenomeLoc(eventLoc.getContigIndex(), eventLoc.getStart(), eventLoc.getStart()+1);
|
eventLoc = getToolkit().getGenomeLocParser().createGenomeLoc(eventLoc.getContig(), eventLoc.getStart(), eventLoc.getStart()+1);
|
||||||
else if ( hasIndel && !context.hasBasePileup() )
|
else if ( hasIndel && !context.hasBasePileup() )
|
||||||
eventLoc = GenomeLocParser.createGenomeLoc(eventLoc.getContigIndex(), eventLoc.getStart(), furthestStopPos);
|
eventLoc = getToolkit().getGenomeLocParser().createGenomeLoc(eventLoc.getContig(), eventLoc.getStart(), furthestStopPos);
|
||||||
|
|
||||||
EVENT_TYPE eventType = (hasIndel ? (hasPointEvent ? EVENT_TYPE.BOTH : EVENT_TYPE.INDEL_EVENT) : EVENT_TYPE.POINT_EVENT);
|
EVENT_TYPE eventType = (hasIndel ? (hasPointEvent ? EVENT_TYPE.BOTH : EVENT_TYPE.INDEL_EVENT) : EVENT_TYPE.POINT_EVENT);
|
||||||
|
|
||||||
|
|
@ -217,15 +217,15 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
||||||
private enum EVENT_TYPE { POINT_EVENT, INDEL_EVENT, BOTH }
|
private enum EVENT_TYPE { POINT_EVENT, INDEL_EVENT, BOTH }
|
||||||
|
|
||||||
class Event {
|
class Event {
|
||||||
public long furthestStopPos;
|
public int furthestStopPos;
|
||||||
|
|
||||||
public GenomeLoc loc;
|
public GenomeLoc loc;
|
||||||
public long eventStartPos;
|
public int eventStartPos;
|
||||||
private long eventStopPos;
|
private int eventStopPos;
|
||||||
private EVENT_TYPE type;
|
private EVENT_TYPE type;
|
||||||
private ArrayList<Long> pointEvents = new ArrayList<Long>();
|
private ArrayList<Integer> pointEvents = new ArrayList<Integer>();
|
||||||
|
|
||||||
public Event(GenomeLoc loc, long furthestStopPos, EVENT_TYPE type) {
|
public Event(GenomeLoc loc, int furthestStopPos, EVENT_TYPE type) {
|
||||||
this.loc = loc;
|
this.loc = loc;
|
||||||
this.furthestStopPos = furthestStopPos;
|
this.furthestStopPos = furthestStopPos;
|
||||||
this.type = type;
|
this.type = type;
|
||||||
|
|
@ -254,9 +254,9 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( e.type == EVENT_TYPE.POINT_EVENT || e.type == EVENT_TYPE.BOTH ) {
|
if ( e.type == EVENT_TYPE.POINT_EVENT || e.type == EVENT_TYPE.BOTH ) {
|
||||||
long newPosition = e.pointEvents.get(0);
|
int newPosition = e.pointEvents.get(0);
|
||||||
if ( pointEvents.size() > 0 ) {
|
if ( pointEvents.size() > 0 ) {
|
||||||
long lastPosition = pointEvents.get(pointEvents.size()-1);
|
int lastPosition = pointEvents.get(pointEvents.size()-1);
|
||||||
if ( newPosition - lastPosition < windowSize ) {
|
if ( newPosition - lastPosition < windowSize ) {
|
||||||
eventStopPos = Math.max(eventStopPos, newPosition);
|
eventStopPos = Math.max(eventStopPos, newPosition);
|
||||||
furthestStopPos = e.furthestStopPos;
|
furthestStopPos = e.furthestStopPos;
|
||||||
|
|
@ -272,7 +272,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isReportableEvent() {
|
public boolean isReportableEvent() {
|
||||||
return GenomeLocParser.validGenomeLoc(loc.getContig(), eventStartPos, eventStopPos) && eventStopPos >= 0 && eventStopPos - eventStartPos < maxIntervalSize;
|
return getToolkit().getGenomeLocParser().validGenomeLoc(loc.getContig(), eventStartPos, eventStopPos) && eventStopPos >= 0 && eventStopPos - eventStartPos < maxIntervalSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
import org.broad.tribble.vcf.VCFHeader;
|
import org.broad.tribble.vcf.VCFHeader;
|
||||||
import org.broad.tribble.vcf.VCFWriter;
|
import org.broad.tribble.vcf.VCFWriter;
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
|
@ -44,6 +45,8 @@ import java.util.*;
|
||||||
public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
||||||
private VCFWriter innerWriter;
|
private VCFWriter innerWriter;
|
||||||
|
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
private ReferenceSequenceFile referenceFileForMNPmerging;
|
private ReferenceSequenceFile referenceFileForMNPmerging;
|
||||||
private int maxGenomicDistanceForMNP;
|
private int maxGenomicDistanceForMNP;
|
||||||
|
|
||||||
|
|
@ -64,8 +67,9 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
||||||
// Should we call innerWriter.close() in close()
|
// Should we call innerWriter.close() in close()
|
||||||
private boolean takeOwnershipOfInner;
|
private boolean takeOwnershipOfInner;
|
||||||
|
|
||||||
public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, File referenceFile, int maxGenomicDistanceForMNP, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) {
|
public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, int maxGenomicDistanceForMNP, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) {
|
||||||
this.innerWriter = innerWriter;
|
this.innerWriter = innerWriter;
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.referenceFileForMNPmerging = new IndexedFastaSequenceFile(referenceFile);
|
this.referenceFileForMNPmerging = new IndexedFastaSequenceFile(referenceFile);
|
||||||
this.maxGenomicDistanceForMNP = maxGenomicDistanceForMNP;
|
this.maxGenomicDistanceForMNP = maxGenomicDistanceForMNP;
|
||||||
this.useSingleSample = singleSample;
|
this.useSingleSample = singleSample;
|
||||||
|
|
@ -83,8 +87,8 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
||||||
this.takeOwnershipOfInner = takeOwnershipOfInner;
|
this.takeOwnershipOfInner = takeOwnershipOfInner;
|
||||||
}
|
}
|
||||||
|
|
||||||
public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, File referenceFile, int maxGenomicDistanceForMNP, Logger logger) {
|
public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, int maxGenomicDistanceForMNP, Logger logger) {
|
||||||
this(innerWriter, referenceFile, maxGenomicDistanceForMNP, null, false, logger, false, false); // by default: consider all samples, emit all records, don't own inner, don't keep track of alt allele statistics
|
this(innerWriter, genomeLocParser, referenceFile, maxGenomicDistanceForMNP, null, false, logger, false, false); // by default: consider all samples, emit all records, don't own inner, don't keep track of alt allele statistics
|
||||||
}
|
}
|
||||||
|
|
||||||
public void writeHeader(VCFHeader header) {
|
public void writeHeader(VCFHeader header) {
|
||||||
|
|
@ -113,7 +117,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.debug("Next VC input = " + VariantContextUtils.getLocation(vc));
|
logger.debug("Next VC input = " + VariantContextUtils.getLocation(genomeLocParser,vc));
|
||||||
boolean curVcIsNotFiltered = vc.isNotFiltered();
|
boolean curVcIsNotFiltered = vc.isNotFiltered();
|
||||||
|
|
||||||
if (vcfrWaitingToMerge == null) {
|
if (vcfrWaitingToMerge == null) {
|
||||||
|
|
@ -123,20 +127,20 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
||||||
throw new ReviewedStingException("filteredVcfrList should be empty if not waiting to merge a vc!");
|
throw new ReviewedStingException("filteredVcfrList should be empty if not waiting to merge a vc!");
|
||||||
|
|
||||||
if (curVcIsNotFiltered) { // still need to wait before can release vc
|
if (curVcIsNotFiltered) { // still need to wait before can release vc
|
||||||
logger.debug("Waiting for new variant " + VariantContextUtils.getLocation(vc));
|
logger.debug("Waiting for new variant " + VariantContextUtils.getLocation(genomeLocParser,vc));
|
||||||
vcfrWaitingToMerge = new VCFRecord(vc, refBase, false);
|
vcfrWaitingToMerge = new VCFRecord(vc, refBase, false);
|
||||||
}
|
}
|
||||||
else if (!emitOnlyMergedRecords) { // filtered records are never merged
|
else if (!emitOnlyMergedRecords) { // filtered records are never merged
|
||||||
logger.debug("DIRECTLY output " + VariantContextUtils.getLocation(vc));
|
logger.debug("DIRECTLY output " + VariantContextUtils.getLocation(genomeLocParser,vc));
|
||||||
innerWriter.add(vc, refBase);
|
innerWriter.add(vc, refBase);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else { // waiting to merge vcfrWaitingToMerge
|
else { // waiting to merge vcfrWaitingToMerge
|
||||||
logger.debug("Waiting to merge " + VariantContextUtils.getLocation(vcfrWaitingToMerge.vc));
|
logger.debug("Waiting to merge " + VariantContextUtils.getLocation(genomeLocParser,vcfrWaitingToMerge.vc));
|
||||||
|
|
||||||
if (!curVcIsNotFiltered) {
|
if (!curVcIsNotFiltered) {
|
||||||
if (!emitOnlyMergedRecords) { // filtered records are never merged
|
if (!emitOnlyMergedRecords) { // filtered records are never merged
|
||||||
logger.debug("Caching unprocessed output " + VariantContextUtils.getLocation(vc));
|
logger.debug("Caching unprocessed output " + VariantContextUtils.getLocation(genomeLocParser,vc));
|
||||||
filteredVcfrList.add(new VCFRecord(vc, refBase, false));
|
filteredVcfrList.add(new VCFRecord(vc, refBase, false));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -164,7 +168,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
||||||
boolean mergedRecords = false;
|
boolean mergedRecords = false;
|
||||||
if (mergeDistanceInRange) {
|
if (mergeDistanceInRange) {
|
||||||
numRecordsWithinDistance++;
|
numRecordsWithinDistance++;
|
||||||
VariantContext mergedVc = VariantContextUtils.mergeIntoMNP(vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging);
|
VariantContext mergedVc = VariantContextUtils.mergeIntoMNP(genomeLocParser,vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging);
|
||||||
if (mergedVc != null) {
|
if (mergedVc != null) {
|
||||||
mergedRecords = true;
|
mergedRecords = true;
|
||||||
vcfrWaitingToMerge = new VCFRecord(mergedVc, vcfrWaitingToMerge.refBase, true);
|
vcfrWaitingToMerge = new VCFRecord(mergedVc, vcfrWaitingToMerge.refBase, true);
|
||||||
|
|
@ -209,8 +213,8 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
||||||
return numMergedRecords;
|
return numMergedRecords;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int minDistance(VariantContext vc1, VariantContext vc2) {
|
public int minDistance(VariantContext vc1, VariantContext vc2) {
|
||||||
return VariantContextUtils.getLocation(vc1).minDistance(VariantContextUtils.getLocation(vc2));
|
return VariantContextUtils.getLocation(genomeLocParser,vc1).minDistance(VariantContextUtils.getLocation(genomeLocParser,vc2));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -354,10 +358,10 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
||||||
|
|
||||||
if (!VariantContextUtils.alleleSegregationIsKnown(gt1, gt2)) {
|
if (!VariantContextUtils.alleleSegregationIsKnown(gt1, gt2)) {
|
||||||
aas.segregationUnknown++;
|
aas.segregationUnknown++;
|
||||||
logger.debug("Unknown segregation of alleles [not phased] for " + samp + " at " + VariantContextUtils.getLocation(vc1) + ", " + VariantContextUtils.getLocation(vc2));
|
logger.debug("Unknown segregation of alleles [not phased] for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser,vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser,vc2));
|
||||||
}
|
}
|
||||||
else if (gt1.isHomRef() || gt2.isHomRef()) {
|
else if (gt1.isHomRef() || gt2.isHomRef()) {
|
||||||
logger.debug("gt1.isHomRef() || gt2.isHomRef() for " + samp + " at " + VariantContextUtils.getLocation(vc1) + ", " + VariantContextUtils.getLocation(vc2));
|
logger.debug("gt1.isHomRef() || gt2.isHomRef() for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser,vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser,vc2));
|
||||||
aas.eitherNotVariant++;
|
aas.eitherNotVariant++;
|
||||||
}
|
}
|
||||||
else { // BOTH gt1 and gt2 have at least one variant allele (so either hets, or homozygous variant):
|
else { // BOTH gt1 and gt2 have at least one variant allele (so either hets, or homozygous variant):
|
||||||
|
|
@ -386,7 +390,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
||||||
|
|
||||||
// Check MNPs vs. CHets:
|
// Check MNPs vs. CHets:
|
||||||
if (containsRefAllele(site1Alleles) && containsRefAllele(site2Alleles)) {
|
if (containsRefAllele(site1Alleles) && containsRefAllele(site2Alleles)) {
|
||||||
logger.debug("HET-HET for " + samp + " at " + VariantContextUtils.getLocation(vc1) + ", " + VariantContextUtils.getLocation(vc2));
|
logger.debug("HET-HET for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser,vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser,vc2));
|
||||||
if (logger.isDebugEnabled() && !(gt1.isHet() && gt2.isHet()))
|
if (logger.isDebugEnabled() && !(gt1.isHet() && gt2.isHet()))
|
||||||
throw new ReviewedStingException("Since !gt1.isHomRef() && !gt2.isHomRef(), yet both have ref alleles, they BOTH must be hets!");
|
throw new ReviewedStingException("Since !gt1.isHomRef() && !gt2.isHomRef(), yet both have ref alleles, they BOTH must be hets!");
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -78,7 +78,7 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker<Integer, I
|
||||||
|
|
||||||
private void initializeVcfWriter() {
|
private void initializeVcfWriter() {
|
||||||
// false <-> don't take control of writer, since didn't create it:
|
// false <-> don't take control of writer, since didn't create it:
|
||||||
vcMergerWriter = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, useSingleSample, emitOnlyMergedRecords, logger, false, !disablePrintAlternateAlleleStatistics);
|
vcMergerWriter = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer,getToolkit().getGenomeLocParser(),getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, useSingleSample, emitOnlyMergedRecords, logger, false, !disablePrintAlternateAlleleStatistics);
|
||||||
writer = null; // so it can't be accessed directly [i.e., not through vcMergerWriter]
|
writer = null; // so it can't be accessed directly [i.e., not through vcMergerWriter]
|
||||||
|
|
||||||
// setup the header fields:
|
// setup the header fields:
|
||||||
|
|
|
||||||
|
|
@ -138,7 +138,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
VCFWriter origWriter = writer;
|
VCFWriter origWriter = writer;
|
||||||
|
|
||||||
if (enableMergePhasedSegregatingPolymorphismsToMNP) // null <-> use ALL samples, false <-> emit all records, false <-> don't track the statistics of alternate alleles being merged:
|
if (enableMergePhasedSegregatingPolymorphismsToMNP) // null <-> use ALL samples, false <-> emit all records, false <-> don't track the statistics of alternate alleles being merged:
|
||||||
writer = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, null, false, logger, writer != origWriter, false);
|
writer = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer,getToolkit().getGenomeLocParser(),getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, null, false, logger, writer != origWriter, false);
|
||||||
|
|
||||||
/* Due to discardIrrelevantPhasedSites(), the startDistance spanned by [partiallyPhasedSites.peek(), unphasedSiteQueue.peek()] is <= cacheWindow
|
/* Due to discardIrrelevantPhasedSites(), the startDistance spanned by [partiallyPhasedSites.peek(), unphasedSiteQueue.peek()] is <= cacheWindow
|
||||||
Due to processQueue(), the startDistance spanned by [unphasedSiteQueue.peek(), mostDownstreamLocusReached] is <= cacheWindow
|
Due to processQueue(), the startDistance spanned by [unphasedSiteQueue.peek(), mostDownstreamLocusReached] is <= cacheWindow
|
||||||
|
|
@ -197,7 +197,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
if (ReadBackedPhasingWalker.processVariantInPhasing(vc)) {
|
if (ReadBackedPhasingWalker.processVariantInPhasing(vc)) {
|
||||||
VariantAndReads vr = new VariantAndReads(vc, context);
|
VariantAndReads vr = new VariantAndReads(vc, context);
|
||||||
unphasedSiteQueue.add(vr);
|
unphasedSiteQueue.add(vr);
|
||||||
logger.debug("Added variant to queue = " + VariantContextUtils.getLocation(vr.variant));
|
logger.debug("Added variant to queue = " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vr.variant));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
unprocessedList.add(vc); // Finished with the unprocessed variant, and writer can enforce sorting on-the-fly
|
unprocessedList.add(vc); // Finished with the unprocessed variant, and writer can enforce sorting on-the-fly
|
||||||
|
|
@ -226,7 +226,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
while (!unphasedSiteQueue.isEmpty()) {
|
while (!unphasedSiteQueue.isEmpty()) {
|
||||||
if (!processAll) { // otherwise, phase until the end of unphasedSiteQueue
|
if (!processAll) { // otherwise, phase until the end of unphasedSiteQueue
|
||||||
VariantContext nextToPhaseVc = unphasedSiteQueue.peek().variant;
|
VariantContext nextToPhaseVc = unphasedSiteQueue.peek().variant;
|
||||||
if (startDistancesAreInWindowRange(mostDownstreamLocusReached, VariantContextUtils.getLocation(nextToPhaseVc))) {
|
if (startDistancesAreInWindowRange(mostDownstreamLocusReached, VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),nextToPhaseVc))) {
|
||||||
/* mostDownstreamLocusReached is still not far enough ahead of nextToPhaseVc to have all phasing information for nextToPhaseVc
|
/* mostDownstreamLocusReached is still not far enough ahead of nextToPhaseVc to have all phasing information for nextToPhaseVc
|
||||||
(note that we ASSUME that the VCF is ordered by <contig,locus>).
|
(note that we ASSUME that the VCF is ordered by <contig,locus>).
|
||||||
Note that this will always leave at least one entry (the last one), since mostDownstreamLocusReached is in range of itself.
|
Note that this will always leave at least one entry (the last one), since mostDownstreamLocusReached is in range of itself.
|
||||||
|
|
@ -240,7 +240,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
logger.debug("oldPhasedList(1st) = " + toStringVCL(oldPhasedList));
|
logger.debug("oldPhasedList(1st) = " + toStringVCL(oldPhasedList));
|
||||||
|
|
||||||
VariantAndReads vr = unphasedSiteQueue.remove();
|
VariantAndReads vr = unphasedSiteQueue.remove();
|
||||||
logger.debug("Performing phasing for " + VariantContextUtils.getLocation(vr.variant));
|
logger.debug("Performing phasing for " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vr.variant));
|
||||||
phaseSite(vr, phaseStats);
|
phaseSite(vr, phaseStats);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -259,7 +259,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
|
|
||||||
GenomeLoc nextToPhaseLoc = null;
|
GenomeLoc nextToPhaseLoc = null;
|
||||||
if (!unphasedSiteQueue.isEmpty())
|
if (!unphasedSiteQueue.isEmpty())
|
||||||
nextToPhaseLoc = VariantContextUtils.getLocation(unphasedSiteQueue.peek().variant);
|
nextToPhaseLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),unphasedSiteQueue.peek().variant);
|
||||||
|
|
||||||
while (!partiallyPhasedSites.isEmpty()) {
|
while (!partiallyPhasedSites.isEmpty()) {
|
||||||
if (nextToPhaseLoc != null) { // otherwise, unphasedSiteQueue.isEmpty(), and therefore no need to keep any of the "past"
|
if (nextToPhaseLoc != null) { // otherwise, unphasedSiteQueue.isEmpty(), and therefore no need to keep any of the "past"
|
||||||
|
|
@ -284,7 +284,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
|
|
||||||
private void phaseSite(VariantAndReads vr, PhasingStats phaseStats) {
|
private void phaseSite(VariantAndReads vr, PhasingStats phaseStats) {
|
||||||
VariantContext vc = vr.variant;
|
VariantContext vc = vr.variant;
|
||||||
logger.debug("Will phase vc = " + VariantContextUtils.getLocation(vc));
|
logger.debug("Will phase vc = " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc));
|
||||||
|
|
||||||
UnfinishedVariantAndReads uvr = new UnfinishedVariantAndReads(vr);
|
UnfinishedVariantAndReads uvr = new UnfinishedVariantAndReads(vr);
|
||||||
UnfinishedVariantContext uvc = uvr.unfinishedVariant;
|
UnfinishedVariantContext uvc = uvr.unfinishedVariant;
|
||||||
|
|
@ -321,7 +321,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality);
|
boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality);
|
||||||
|
|
||||||
if (pr.phasingContainsInconsistencies) {
|
if (pr.phasingContainsInconsistencies) {
|
||||||
logger.debug("MORE than " + (MAX_FRACTION_OF_INCONSISTENT_READS * 100) + "% of the reads are inconsistent for phasing of " + VariantContextUtils.getLocation(vc));
|
logger.debug("MORE than " + (MAX_FRACTION_OF_INCONSISTENT_READS * 100) + "% of the reads are inconsistent for phasing of " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc));
|
||||||
uvc.setPhasingInconsistent();
|
uvc.setPhasingInconsistent();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -360,7 +360,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
|
|
||||||
if (statsWriter != null)
|
if (statsWriter != null)
|
||||||
statsWriter.addStat(samp, VariantContextUtils.getLocation(vc), startDistance(prevUvc, vc), pr.phaseQuality, phaseWindow.readsAtHetSites.size(), phaseWindow.hetGenotypes.length);
|
statsWriter.addStat(samp, VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc), startDistance(prevUvc, vc), pr.phaseQuality, phaseWindow.readsAtHetSites.size(), phaseWindow.hetGenotypes.length);
|
||||||
|
|
||||||
PhaseCounts sampPhaseCounts = samplePhaseStats.get(samp);
|
PhaseCounts sampPhaseCounts = samplePhaseStats.get(samp);
|
||||||
if (sampPhaseCounts == null) {
|
if (sampPhaseCounts == null) {
|
||||||
|
|
@ -442,7 +442,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
prevHetAndInteriorIt.previous(); // so that it points to the previous het site [and NOT one after it, due to the last call to next()]
|
prevHetAndInteriorIt.previous(); // so that it points to the previous het site [and NOT one after it, due to the last call to next()]
|
||||||
|
|
||||||
// Add the (het) position to be phased:
|
// Add the (het) position to be phased:
|
||||||
GenomeLoc phaseLocus = VariantContextUtils.getLocation(vr.variant);
|
GenomeLoc phaseLocus = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vr.variant);
|
||||||
GenotypeAndReadBases grbPhase = new GenotypeAndReadBases(vr.variant.getGenotype(sample), vr.sampleReadBases.get(sample), phaseLocus);
|
GenotypeAndReadBases grbPhase = new GenotypeAndReadBases(vr.variant.getGenotype(sample), vr.sampleReadBases.get(sample), phaseLocus);
|
||||||
listHetGenotypes.add(grbPhase);
|
listHetGenotypes.add(grbPhase);
|
||||||
logger.debug("PHASING het site = " + grbPhase.loc + " [phasingSiteIndex = " + phasingSiteIndex + "]");
|
logger.debug("PHASING het site = " + grbPhase.loc + " [phasingSiteIndex = " + phasingSiteIndex + "]");
|
||||||
|
|
@ -456,7 +456,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else if (gt.isHet()) {
|
else if (gt.isHet()) {
|
||||||
GenotypeAndReadBases grb = new GenotypeAndReadBases(gt, nextVr.sampleReadBases.get(sample), VariantContextUtils.getLocation(nextVr.variant));
|
GenotypeAndReadBases grb = new GenotypeAndReadBases(gt, nextVr.sampleReadBases.get(sample), VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),nextVr.variant));
|
||||||
listHetGenotypes.add(grb);
|
listHetGenotypes.add(grb);
|
||||||
logger.debug("Using DOWNSTREAM het site = " + grb.loc);
|
logger.debug("Using DOWNSTREAM het site = " + grb.loc);
|
||||||
}
|
}
|
||||||
|
|
@ -922,15 +922,15 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean startDistancesAreInWindowRange(VariantContext vc1, VariantContext vc2) {
|
private boolean startDistancesAreInWindowRange(VariantContext vc1, VariantContext vc2) {
|
||||||
return startDistancesAreInWindowRange(VariantContextUtils.getLocation(vc1), VariantContextUtils.getLocation(vc2));
|
return startDistancesAreInWindowRange(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc1), VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc2));
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean startDistancesAreInWindowRange(GenomeLoc loc1, GenomeLoc loc2) {
|
private boolean startDistancesAreInWindowRange(GenomeLoc loc1, GenomeLoc loc2) {
|
||||||
return loc1.distance(loc2) <= cacheWindow; // distance() checks: loc1.onSameContig(loc2)
|
return loc1.distance(loc2) <= cacheWindow; // distance() checks: loc1.onSameContig(loc2)
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int startDistance(UnfinishedVariantContext uvc1, VariantContext vc2) {
|
private int startDistance(UnfinishedVariantContext uvc1, VariantContext vc2) {
|
||||||
return uvc1.getLocation().distance(VariantContextUtils.getLocation(vc2));
|
return uvc1.getLocation().distance(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc2));
|
||||||
}
|
}
|
||||||
|
|
||||||
public PhasingStats reduce(PhasingStatsAndOutput statsAndList, PhasingStats stats) {
|
public PhasingStats reduce(PhasingStatsAndOutput statsAndList, PhasingStats stats) {
|
||||||
|
|
@ -1040,7 +1040,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class UnfinishedVariantAndReads {
|
private class UnfinishedVariantAndReads {
|
||||||
public UnfinishedVariantContext unfinishedVariant;
|
public UnfinishedVariantContext unfinishedVariant;
|
||||||
public HashMap<String, ReadBasesAtPosition> sampleReadBases;
|
public HashMap<String, ReadBasesAtPosition> sampleReadBases;
|
||||||
|
|
||||||
|
|
@ -1052,11 +1052,11 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
|
|
||||||
// COULD replace with MutableVariantContext if it worked [didn't throw exceptions when trying to call its set() methods]...
|
// COULD replace with MutableVariantContext if it worked [didn't throw exceptions when trying to call its set() methods]...
|
||||||
|
|
||||||
private static class UnfinishedVariantContext {
|
private class UnfinishedVariantContext {
|
||||||
private String name;
|
private String name;
|
||||||
private String contig;
|
private String contig;
|
||||||
private long start;
|
private int start;
|
||||||
private long stop;
|
private int stop;
|
||||||
private Collection<Allele> alleles;
|
private Collection<Allele> alleles;
|
||||||
private Map<String, Genotype> genotypes;
|
private Map<String, Genotype> genotypes;
|
||||||
private double negLog10PError;
|
private double negLog10PError;
|
||||||
|
|
@ -1080,7 +1080,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
}
|
}
|
||||||
|
|
||||||
public GenomeLoc getLocation() {
|
public GenomeLoc getLocation() {
|
||||||
return GenomeLocParser.createGenomeLoc(contig, start, stop);
|
return getToolkit().getGenomeLocParser().createGenomeLoc(contig, start, stop);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Genotype getGenotype(String sample) {
|
public Genotype getGenotype(String sample) {
|
||||||
|
|
@ -1110,7 +1110,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String toStringVCL(List<VariantContext> vcList) {
|
private String toStringVCL(List<VariantContext> vcList) {
|
||||||
boolean first = true;
|
boolean first = true;
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for (VariantContext vc : vcList) {
|
for (VariantContext vc : vcList) {
|
||||||
|
|
@ -1119,7 +1119,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
else
|
else
|
||||||
sb.append(" -- ");
|
sb.append(" -- ");
|
||||||
|
|
||||||
sb.append(VariantContextUtils.getLocation(vc));
|
sb.append(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc));
|
||||||
}
|
}
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
@ -1411,7 +1411,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
||||||
public void outputMultipleBaseCounts() {
|
public void outputMultipleBaseCounts() {
|
||||||
GenomeLoc nextToPhaseLoc = null;
|
GenomeLoc nextToPhaseLoc = null;
|
||||||
if (!unphasedSiteQueue.isEmpty())
|
if (!unphasedSiteQueue.isEmpty())
|
||||||
nextToPhaseLoc = VariantContextUtils.getLocation(unphasedSiteQueue.peek().variant);
|
nextToPhaseLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),unphasedSiteQueue.peek().variant);
|
||||||
|
|
||||||
outputMultipleBaseCounts(nextToPhaseLoc);
|
outputMultipleBaseCounts(nextToPhaseLoc);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.qc;
|
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -92,8 +94,15 @@ public class CountRodWalker extends RodWalker<CountRodWalker.Datum, Pair<Expandi
|
||||||
GenomeLoc cur = context.getLocation();
|
GenomeLoc cur = context.getLocation();
|
||||||
|
|
||||||
if ( verbose && showSkipped ) {
|
if ( verbose && showSkipped ) {
|
||||||
for ( int i = 0; i < context.getSkippedBases(); i++ ) {
|
for(long i = context.getSkippedBases(); i >= 0; i--) {
|
||||||
out.printf("%s: skipped%n", GenomeLocParser.incPos(cur, i - context.getSkippedBases()));
|
SAMSequenceDictionary dictionary = getToolkit().getReferenceDataSource().getReference().getSequenceDictionary();
|
||||||
|
SAMSequenceRecord contig = dictionary.getSequence(cur.getContig());
|
||||||
|
if(cur.getStop() < contig.getSequenceLength())
|
||||||
|
cur = getToolkit().getGenomeLocParser().incPos(cur,1);
|
||||||
|
else
|
||||||
|
cur = getToolkit().getGenomeLocParser().createGenomeLoc(dictionary.getSequence(contig.getSequenceIndex()+1).getSequenceName(),1,1);
|
||||||
|
out.printf("%s: skipped%n", cur);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -87,11 +87,11 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String pileupDiff(final ReadBackedPileup a, final SAMPileupFeature b, boolean orderDependent)
|
public String pileupDiff(final ReadBackedPileup a, final SAMPileupFeature b, boolean orderDependent)
|
||||||
{
|
{
|
||||||
if ( a.size() != b.size() )
|
if ( a.size() != b.size() )
|
||||||
return "Sizes not equal";
|
return "Sizes not equal";
|
||||||
GenomeLoc featureLocation = GenomeLocParser.createGenomeLoc(b.getChr(),b.getStart(),b.getEnd());
|
GenomeLoc featureLocation = getToolkit().getGenomeLocParser().createGenomeLoc(b.getChr(),b.getStart(),b.getEnd());
|
||||||
if ( a.getLocation().compareTo(featureLocation) != 0 )
|
if ( a.getLocation().compareTo(featureLocation) != 0 )
|
||||||
return "Locations not equal";
|
return "Locations not equal";
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -86,9 +86,9 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
||||||
logger.info("Loading SNP mask... ");
|
logger.info("Loading SNP mask... ");
|
||||||
ReferenceOrderedData snp_mask;
|
ReferenceOrderedData snp_mask;
|
||||||
if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) {
|
if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) {
|
||||||
RMDTrackBuilder builder = new RMDTrackBuilder();
|
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser());
|
||||||
CloseableIterator<GATKFeature> iter = builder.createInstanceOfTrack(DbSNPCodec.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator();
|
CloseableIterator<GATKFeature> iter = builder.createInstanceOfTrack(DbSNPCodec.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator();
|
||||||
snpMaskIterator = new SeekableRODIterator(iter);
|
snpMaskIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),iter);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// TODO: fix me when Plink is back
|
// TODO: fix me when Plink is back
|
||||||
|
|
@ -142,8 +142,8 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
||||||
|
|
||||||
if ( ! haveMaskForWindow ) {
|
if ( ! haveMaskForWindow ) {
|
||||||
String contig = context.getLocation().getContig();
|
String contig = context.getLocation().getContig();
|
||||||
long offset = context.getLocation().getStart();
|
int offset = context.getLocation().getStart();
|
||||||
long true_offset = offset - 200;
|
int true_offset = offset - 200;
|
||||||
|
|
||||||
// we have variant; let's load all the snps falling into the current window and prepare the mask array.
|
// we have variant; let's load all the snps falling into the current window and prepare the mask array.
|
||||||
// we need to do it only once per window, regardless of how many vcs we may have at this location!
|
// we need to do it only once per window, regardless of how many vcs we may have at this location!
|
||||||
|
|
@ -152,7 +152,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
||||||
for ( int i = 0 ; i < 401; i++ )
|
for ( int i = 0 ; i < 401; i++ )
|
||||||
maskFlags[i] = 0;
|
maskFlags[i] = 0;
|
||||||
|
|
||||||
RODRecordList snpList = snpMaskIterator.seekForward(GenomeLocParser.createGenomeLoc(contig,offset-200,offset+200));
|
RODRecordList snpList = snpMaskIterator.seekForward(getToolkit().getGenomeLocParser().createGenomeLoc(contig,offset-200,offset+200));
|
||||||
if ( snpList != null && snpList.size() != 0 ) {
|
if ( snpList != null && snpList.size() != 0 ) {
|
||||||
Iterator<GATKFeature> snpsInWindow = snpList.iterator();
|
Iterator<GATKFeature> snpsInWindow = snpList.iterator();
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
|
||||||
|
|
@ -639,7 +639,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
else if ( group.requiresNovel() && vcKnown )
|
else if ( group.requiresNovel() && vcKnown )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if ( group.selectExp != null && ! VariantContextUtils.match(vc, group.selectExp) )
|
if ( group.selectExp != null && ! VariantContextUtils.match(getToolkit().getGenomeLocParser(),vc, group.selectExp) )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// nothing invalidated our membership in this set
|
// nothing invalidated our membership in this set
|
||||||
|
|
|
||||||
|
|
@ -178,7 +178,7 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
|
||||||
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||||
int iii = 0;
|
int iii = 0;
|
||||||
for( final String key : annotationKeys ) {
|
for( final String key : annotationKeys ) {
|
||||||
annotationValues[iii++] = theModel.decodeAnnotation( key, vc, true );
|
annotationValues[iii++] = theModel.decodeAnnotation( getToolkit().getGenomeLocParser(), key, vc, true );
|
||||||
}
|
}
|
||||||
|
|
||||||
final VariantDatum variantDatum = new VariantDatum();
|
final VariantDatum variantDatum = new VariantDatum();
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||||
|
|
@ -425,7 +426,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public double decodeAnnotation( final String annotationKey, final VariantContext vc, final boolean jitter ) {
|
public double decodeAnnotation(GenomeLocParser genomeLocParser, final String annotationKey, final VariantContext vc, final boolean jitter ) {
|
||||||
double value;
|
double value;
|
||||||
if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // HRun values must be jittered a bit to work in this GMM
|
if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // HRun values must be jittered a bit to work in this GMM
|
||||||
value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) );
|
value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) );
|
||||||
|
|
@ -436,18 +437,18 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
||||||
try {
|
try {
|
||||||
value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) );
|
value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) );
|
||||||
} catch( Exception e ) {
|
} catch( Exception e ) {
|
||||||
throw new UserException.MalformedFile(vc.getSource(), "No double value detected for annotation = " + annotationKey + " in variant at " + VariantContextUtils.getLocation(vc) + ", reported annotation value = " + vc.getAttribute( annotationKey ), e );
|
throw new UserException.MalformedFile(vc.getSource(), "No double value detected for annotation = " + annotationKey + " in variant at " + VariantContextUtils.getLocation(genomeLocParser,vc) + ", reported annotation value = " + vc.getAttribute( annotationKey ), e );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
public final double evaluateVariant( final VariantContext vc ) {
|
public final double evaluateVariant( GenomeLocParser genomeLocParser, final VariantContext vc ) {
|
||||||
final double[] pVarInCluster = new double[maxGaussians];
|
final double[] pVarInCluster = new double[maxGaussians];
|
||||||
final double[] annotations = new double[dataManager.numAnnotations];
|
final double[] annotations = new double[dataManager.numAnnotations];
|
||||||
|
|
||||||
for( int jjj = 0; jjj < dataManager.numAnnotations; jjj++ ) {
|
for( int jjj = 0; jjj < dataManager.numAnnotations; jjj++ ) {
|
||||||
final double value = decodeAnnotation( dataManager.annotationKeys.get(jjj), vc, false );
|
final double value = decodeAnnotation( genomeLocParser, dataManager.annotationKeys.get(jjj), vc, false );
|
||||||
annotations[jjj] = (value - dataManager.meanVector[jjj]) / dataManager.varianceVector[jjj];
|
annotations[jjj] = (value - dataManager.meanVector[jjj]) / dataManager.varianceVector[jjj];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -283,7 +283,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
priorCache.put( priorLodFactor, false, priorKey );
|
priorCache.put( priorLodFactor, false, priorKey );
|
||||||
}
|
}
|
||||||
|
|
||||||
final double pVar = theModel.evaluateVariant( vc );
|
final double pVar = theModel.evaluateVariant( ref.getGenomeLocParser(),vc );
|
||||||
final double lod = priorLodFactor + Math.log10(pVar);
|
final double lod = priorLodFactor + Math.log10(pVar);
|
||||||
variantDatum.qual = Math.abs( QUALITY_SCALE_FACTOR * QualityUtils.lodToPhredScaleErrorRate(lod) );
|
variantDatum.qual = Math.abs( QUALITY_SCALE_FACTOR * QualityUtils.lodToPhredScaleErrorRate(lod) );
|
||||||
if( variantDatum.qual > maxQualObserved ) {
|
if( variantDatum.qual > maxQualObserved ) {
|
||||||
|
|
|
||||||
|
|
@ -129,7 +129,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
||||||
// get all of the vcf rods at this locus
|
// get all of the vcf rods at this locus
|
||||||
// Need to provide reference bases to simpleMerge starting at current locus
|
// Need to provide reference bases to simpleMerge starting at current locus
|
||||||
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
|
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
|
||||||
VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, variantMergeOption,
|
VariantContext mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, variantMergeOption,
|
||||||
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled);
|
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled);
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -131,7 +131,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
||||||
// update if necessary and write
|
// update if necessary and write
|
||||||
if ( !newCigar.equals(originalCigar) && newCigar.numCigarElements() > 1 ) {
|
if ( !newCigar.equals(originalCigar) && newCigar.numCigarElements() > 1 ) {
|
||||||
int difference = originalIndex - newCigar.getCigarElement(0).getLength();
|
int difference = originalIndex - newCigar.getCigarElement(0).getLength();
|
||||||
GenomeLoc newLoc = GenomeLocParser.createPotentiallyInvalidGenomeLoc(vc.getChr(), vc.getStart()-difference, vc.getEnd()-difference);
|
GenomeLoc newLoc = getToolkit().getGenomeLocParser().createPotentiallyInvalidGenomeLoc(vc.getChr(), vc.getStart()-difference, vc.getEnd()-difference);
|
||||||
//System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + newLoc);
|
//System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + newLoc);
|
||||||
VariantContext newVC = VariantContextUtils.modifyLocation(vc, newLoc);
|
VariantContext newVC = VariantContextUtils.modifyLocation(vc, newLoc);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -96,7 +96,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
||||||
vc = VariantContextUtils.reverseComplement(vc);
|
vc = VariantContextUtils.reverseComplement(vc);
|
||||||
}
|
}
|
||||||
|
|
||||||
vc = VariantContextUtils.modifyLocation(vc, GenomeLocParser.createPotentiallyInvalidGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length));
|
vc = VariantContextUtils.modifyLocation(vc, getToolkit().getGenomeLocParser().createPotentiallyInvalidGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length));
|
||||||
VariantContext newVC = VariantContext.createVariantContextWithPaddedAlleles(vc, ref.getBase(), false);
|
VariantContext newVC = VariantContext.createVariantContextWithPaddedAlleles(vc, ref.getBase(), false);
|
||||||
|
|
||||||
if ( originalVC.isSNP() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
|
if ( originalVC.isSNP() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
|
||||||
|
|
|
||||||
|
|
@ -214,7 +214,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {
|
if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {
|
||||||
//System.out.printf("%s%n",sub.toString());
|
//System.out.printf("%s%n",sub.toString());
|
||||||
for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) {
|
for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) {
|
||||||
if ( !VariantContextUtils.match(sub, jexl) ) {
|
if ( !VariantContextUtils.match(getToolkit().getGenomeLocParser(),sub, jexl) ) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,7 @@ public class CreateTiTvTrack extends RodWalker<VariantContext,TiTvWindow> {
|
||||||
|
|
||||||
window.update(VariantContextUtils.isTransition(vc));
|
window.update(VariantContextUtils.isTransition(vc));
|
||||||
if ( window.getTiTv() != null ) {
|
if ( window.getTiTv() != null ) {
|
||||||
writer.writeData(VariantContextUtils.getLocation(vc),window.getTiTv());
|
writer.writeData(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc),window.getTiTv());
|
||||||
}
|
}
|
||||||
|
|
||||||
return window;
|
return window;
|
||||||
|
|
|
||||||
|
|
@ -287,7 +287,7 @@ public class DSBWalkerV3 extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void shiftWindows(long pos) {
|
private void shiftWindows(int pos) {
|
||||||
// we shift windows when there is a read that does not fit into the current window.
|
// we shift windows when there is a read that does not fit into the current window.
|
||||||
// the position, to which the shift is performed, is the first position such that the new read
|
// the position, to which the shift is performed, is the first position such that the new read
|
||||||
// can be accomodated. Hence we can safely slide up to pos, only discarding reads that go out of scope -
|
// can be accomodated. Hence we can safely slide up to pos, only discarding reads that go out of scope -
|
||||||
|
|
@ -332,7 +332,7 @@ public class DSBWalkerV3 extends ReadWalker<Integer,Integer> {
|
||||||
purgeSignal(pos);
|
purgeSignal(pos);
|
||||||
purgeControl(pos);
|
purgeControl(pos);
|
||||||
|
|
||||||
currentWindow = GenomeLocParser.createGenomeLoc(currentWindow.getContigIndex(),pos,pos+WINDOW_SIZE-1);
|
currentWindow = getToolkit().getGenomeLocParser().createGenomeLoc(currentWindow.getContig(),pos,pos+WINDOW_SIZE-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -349,7 +349,8 @@ public class DSBWalkerV3 extends ReadWalker<Integer,Integer> {
|
||||||
controlReadGroups = readGroupSets.get(1);
|
controlReadGroups = readGroupSets.get(1);
|
||||||
// System.out.println(controlReadGroups.size()+" read groups in control");
|
// System.out.println(controlReadGroups.size()+" read groups in control");
|
||||||
|
|
||||||
currentWindow = GenomeLocParser.createGenomeLoc(0,1,WINDOW_SIZE);
|
String sequenceName = getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequence(0).getSequenceName();
|
||||||
|
currentWindow = getToolkit().getGenomeLocParser().createGenomeLoc(sequenceName,1,WINDOW_SIZE);
|
||||||
readsInSignalWindow = new LinkedList<SAMRecord>();
|
readsInSignalWindow = new LinkedList<SAMRecord>();
|
||||||
readsInControlWindow = new LinkedList<SAMRecord>();
|
readsInControlWindow = new LinkedList<SAMRecord>();
|
||||||
|
|
||||||
|
|
@ -366,7 +367,7 @@ public class DSBWalkerV3 extends ReadWalker<Integer,Integer> {
|
||||||
if ( read.getReferenceIndex() > currentWindow.getContigIndex() ) {
|
if ( read.getReferenceIndex() > currentWindow.getContigIndex() ) {
|
||||||
printRegion(); // print all we had on the previous contig
|
printRegion(); // print all we had on the previous contig
|
||||||
|
|
||||||
currentWindow = GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),
|
currentWindow = ref.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),
|
||||||
read.getAlignmentStart(),
|
read.getAlignmentStart(),
|
||||||
read.getAlignmentStart()+WINDOW_SIZE-1);
|
read.getAlignmentStart()+WINDOW_SIZE-1);
|
||||||
currentContig = read.getReferenceName();
|
currentContig = read.getReferenceName();
|
||||||
|
|
|
||||||
|
|
@ -95,7 +95,7 @@ public class DesignFileGeneratorWalker extends RodWalker<Long,Long> {
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( Map.Entry<String,BEDFeature> additionalGenes : currentBedFeatures.entrySet() ) {
|
for ( Map.Entry<String,BEDFeature> additionalGenes : currentBedFeatures.entrySet() ) {
|
||||||
GenomeLoc entryLoc = GenomeLocParser.createGenomeLoc(additionalGenes.getValue().getChr(),additionalGenes.getValue().getStart(),additionalGenes.getValue().getEnd());
|
GenomeLoc entryLoc = getToolkit().getGenomeLocParser().createGenomeLoc(additionalGenes.getValue().getChr(),additionalGenes.getValue().getStart(),additionalGenes.getValue().getEnd());
|
||||||
if ( interval.overlapsP(entryLoc) &&
|
if ( interval.overlapsP(entryLoc) &&
|
||||||
! additionalGenes.getValue().getName().equals("") &&
|
! additionalGenes.getValue().getName().equals("") &&
|
||||||
! intervalBuffer.get(interval).geneNames.contains(additionalGenes.getKey()+"_"+additionalGenes.getValue().getName())) {
|
! intervalBuffer.get(interval).geneNames.contains(additionalGenes.getKey()+"_"+additionalGenes.getValue().getName())) {
|
||||||
|
|
@ -142,7 +142,7 @@ public class DesignFileGeneratorWalker extends RodWalker<Long,Long> {
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( Map.Entry<String,BEDFeature> entry : currentBedFeatures.entrySet() ) {
|
for ( Map.Entry<String,BEDFeature> entry : currentBedFeatures.entrySet() ) {
|
||||||
GenomeLoc entryLoc = GenomeLocParser.createGenomeLoc(entry.getValue().getChr(),entry.getValue().getStart(),entry.getValue().getEnd());
|
GenomeLoc entryLoc = getToolkit().getGenomeLocParser().createGenomeLoc(entry.getValue().getChr(),entry.getValue().getStart(),entry.getValue().getEnd());
|
||||||
if ( entryLoc.isBefore(ref.getLocus()) ) {
|
if ( entryLoc.isBefore(ref.getLocus()) ) {
|
||||||
currentBedFeatures.remove(entry.getKey());
|
currentBedFeatures.remove(entry.getKey());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,9 @@ public class IndelAnnotator extends RodWalker<Integer,Long> {
|
||||||
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
|
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
refseqIterator = new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(),"refseq"));
|
refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||||
|
getToolkit().getGenomeLocParser(),
|
||||||
|
new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),refseq.iterator(),"refseq"));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new UserException.CouldNotReadInputFile(RefseqFileName, e);
|
throw new UserException.CouldNotReadInputFile(RefseqFileName, e);
|
||||||
}
|
}
|
||||||
|
|
@ -128,14 +130,14 @@ public class IndelAnnotator extends RodWalker<Integer,Long> {
|
||||||
} else {
|
} else {
|
||||||
if ( RefSeqFeature.isCoding(ann) ) {
|
if ( RefSeqFeature.isCoding(ann) ) {
|
||||||
//b.append(annIntron); // not in exon, but within the coding region = intron
|
//b.append(annIntron); // not in exon, but within the coding region = intron
|
||||||
GenomeLoc ig = GenomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd());
|
GenomeLoc ig = getToolkit().getGenomeLocParser().createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd());
|
||||||
GenomeLoc cl = t.getCodingLocation();
|
GenomeLoc cl = t.getCodingLocation();
|
||||||
GenomeLoc g = t.getLocation();
|
GenomeLoc g = t.getLocation();
|
||||||
|
|
||||||
boolean spliceSiteDisruption = false;
|
boolean spliceSiteDisruption = false;
|
||||||
|
|
||||||
for (GenomeLoc exon : t.getExons()) {
|
for (GenomeLoc exon : t.getExons()) {
|
||||||
GenomeLoc expandedExon = GenomeLocParser.createGenomeLoc(exon.getContig(), exon.getStart() - 6, exon.getStop() + 6);
|
GenomeLoc expandedExon = getToolkit().getGenomeLocParser().createGenomeLoc(exon.getContig(), exon.getStart() - 6, exon.getStop() + 6);
|
||||||
|
|
||||||
if (ig.overlapsP(expandedExon)) {
|
if (ig.overlapsP(expandedExon)) {
|
||||||
spliceSiteDisruption = true;
|
spliceSiteDisruption = true;
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Reference;
|
import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -71,7 +72,7 @@ public class IndelDBRateWalker extends RodWalker<OverlapTable,OverlapTabulator>
|
||||||
|
|
||||||
private void finalUpdate(OverlapTabulator tab) {
|
private void finalUpdate(OverlapTabulator tab) {
|
||||||
while ( ! evalContexts.isEmpty() ) {
|
while ( ! evalContexts.isEmpty() ) {
|
||||||
tab.update(emptyOverlapTable());
|
tab.update(emptyOverlapTable(getToolkit().getGenomeLocParser()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -119,25 +120,25 @@ public class IndelDBRateWalker extends RodWalker<OverlapTable,OverlapTabulator>
|
||||||
|
|
||||||
public OverlapTable getOverlapTable(ReferenceContext ref) {
|
public OverlapTable getOverlapTable(ReferenceContext ref) {
|
||||||
// step 1: check that the eval queue is non-empty and that we are outside the window
|
// step 1: check that the eval queue is non-empty and that we are outside the window
|
||||||
if ( evalContexts.isEmpty() || VariantContextUtils.getLocation(evalContexts.get(0)).distance(ref.getLocus()) <= indelWindow ) {
|
if ( evalContexts.isEmpty() || VariantContextUtils.getLocation(ref.getGenomeLocParser(),evalContexts.get(0)).distance(ref.getLocus()) <= indelWindow ) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
// step 2: discard all comp variations which come before the window
|
// step 2: discard all comp variations which come before the window
|
||||||
while ( ! compContexts.isEmpty() && VariantContextUtils.getLocation(compContexts.get(0)).isBefore(ref.getLocus()) &&
|
while ( ! compContexts.isEmpty() && VariantContextUtils.getLocation(ref.getGenomeLocParser(),compContexts.get(0)).isBefore(ref.getLocus()) &&
|
||||||
VariantContextUtils.getLocation(compContexts.get(0)).distance(ref.getLocus()) > indelWindow) {
|
VariantContextUtils.getLocation(ref.getGenomeLocParser(),compContexts.get(0)).distance(ref.getLocus()) > indelWindow) {
|
||||||
compContexts.remove(0);
|
compContexts.remove(0);
|
||||||
}
|
}
|
||||||
// step 3: see if there are any contexts left; if so then they must be within the window
|
// step 3: see if there are any contexts left; if so then they must be within the window
|
||||||
if ( ! compContexts.isEmpty() ) {
|
if ( ! compContexts.isEmpty() ) {
|
||||||
return nonEmptyOverlapTable(ref);
|
return nonEmptyOverlapTable(ref);
|
||||||
} else {
|
} else {
|
||||||
return emptyOverlapTable();
|
return emptyOverlapTable(ref.getGenomeLocParser());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public OverlapTable emptyOverlapTable() {
|
public OverlapTable emptyOverlapTable(GenomeLocParser genomeLocParser) {
|
||||||
// only eval, no comp
|
// only eval, no comp
|
||||||
OverlapTable ot = new OverlapTable();
|
OverlapTable ot = new OverlapTable(genomeLocParser);
|
||||||
ot.setEvalSizeAndType(evalContexts.get(0));
|
ot.setEvalSizeAndType(evalContexts.get(0));
|
||||||
return ot;
|
return ot;
|
||||||
}
|
}
|
||||||
|
|
@ -145,17 +146,17 @@ public class IndelDBRateWalker extends RodWalker<OverlapTable,OverlapTabulator>
|
||||||
public OverlapTable nonEmptyOverlapTable(ReferenceContext ref) {
|
public OverlapTable nonEmptyOverlapTable(ReferenceContext ref) {
|
||||||
if ( vcfWriter != null ) {
|
if ( vcfWriter != null ) {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
while ( i < compContexts.size() && VariantContextUtils.getLocation(compContexts.get(i)).isBefore(VariantContextUtils.getLocation(evalContexts.get(0)))) {
|
while ( i < compContexts.size() && VariantContextUtils.getLocation(ref.getGenomeLocParser(),compContexts.get(i)).isBefore(VariantContextUtils.getLocation(ref.getGenomeLocParser(),evalContexts.get(0)))) {
|
||||||
vcfWriter.add(compContexts.get(i),compContexts.get(i).getReference().getBases()[0]);
|
vcfWriter.add(compContexts.get(i),compContexts.get(i).getReference().getBases()[0]);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
vcfWriter.add(evalContexts.get(0), ref.getBase());
|
vcfWriter.add(evalContexts.get(0), ref.getBase());
|
||||||
while ( i < compContexts.size() && VariantContextUtils.getLocation(compContexts.get(i)).distance(VariantContextUtils.getLocation(evalContexts.get(0))) <= indelWindow) {
|
while ( i < compContexts.size() && VariantContextUtils.getLocation(ref.getGenomeLocParser(),compContexts.get(i)).distance(VariantContextUtils.getLocation(ref.getGenomeLocParser(),evalContexts.get(0))) <= indelWindow) {
|
||||||
vcfWriter.add(compContexts.get(i), compContexts.get(i).getReference().getBases()[0]);
|
vcfWriter.add(compContexts.get(i), compContexts.get(i).getReference().getBases()[0]);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
OverlapTable ot = new OverlapTable();
|
OverlapTable ot = new OverlapTable(ref.getGenomeLocParser());
|
||||||
ot.setCompOverlaps(compContexts.size());
|
ot.setCompOverlaps(compContexts.size());
|
||||||
ot.setDistances(compContexts,evalContexts.get(0), indelWindow);
|
ot.setDistances(compContexts,evalContexts.get(0), indelWindow);
|
||||||
return ot;
|
return ot;
|
||||||
|
|
@ -164,13 +165,15 @@ public class IndelDBRateWalker extends RodWalker<OverlapTable,OverlapTabulator>
|
||||||
|
|
||||||
}
|
}
|
||||||
class OverlapTable {
|
class OverlapTable {
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
private int numOverlaps;
|
private int numOverlaps;
|
||||||
private ExpandingArrayList<Integer> distances; // currently unused
|
private ExpandingArrayList<Integer> distances; // currently unused
|
||||||
private int evalSize;
|
private int evalSize;
|
||||||
private boolean isDeletion;
|
private boolean isDeletion;
|
||||||
|
|
||||||
public OverlapTable() {
|
public OverlapTable(GenomeLocParser genomeLocParser) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
numOverlaps = 0;
|
numOverlaps = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -187,8 +190,8 @@ class OverlapTable {
|
||||||
public void setDistances(List<VariantContext> comps, VariantContext eval, int winsize) {
|
public void setDistances(List<VariantContext> comps, VariantContext eval, int winsize) {
|
||||||
distances = new ExpandingArrayList<Integer>();
|
distances = new ExpandingArrayList<Integer>();
|
||||||
for ( VariantContext comp : comps ) {
|
for ( VariantContext comp : comps ) {
|
||||||
if ( VariantContextUtils.getLocation(comp).distance(VariantContextUtils.getLocation(eval)) <= winsize ) {
|
if ( VariantContextUtils.getLocation(genomeLocParser,comp).distance(VariantContextUtils.getLocation(genomeLocParser,eval)) <= winsize ) {
|
||||||
distances.add(VariantContextUtils.getLocation(comp).distance(VariantContextUtils.getLocation(eval)));
|
distances.add(VariantContextUtils.getLocation(genomeLocParser,comp).distance(VariantContextUtils.getLocation(genomeLocParser,eval)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -143,7 +143,7 @@ public class IndelErrorRateWalker extends LocusWalker<Integer,Integer> {
|
||||||
// System.out.println("Non countable indel event at "+pileup.getLocation());
|
// System.out.println("Non countable indel event at "+pileup.getLocation());
|
||||||
countableIndelBuffer.clear();
|
countableIndelBuffer.clear();
|
||||||
coverageBuffer.clear(); // we do not want to count observations (read bases) around non-countable indel as well
|
coverageBuffer.clear(); // we do not want to count observations (read bases) around non-countable indel as well
|
||||||
skipToLoc = GenomeLocParser.createGenomeLoc(pileup.getLocation().getContigIndex(),pileup.getLocation().getStop()+pileup.getMaxDeletionLength()+MIN_DISTANCE+1);
|
skipToLoc = ref.getGenomeLocParser().createGenomeLoc(pileup.getLocation().getContig(),pileup.getLocation().getStop()+pileup.getMaxDeletionLength()+MIN_DISTANCE+1);
|
||||||
// System.out.println("Skip to "+skipToLoc);
|
// System.out.println("Skip to "+skipToLoc);
|
||||||
} else {
|
} else {
|
||||||
// pileup does not contain too many indels, we need to store them in the buffer and count them later,
|
// pileup does not contain too many indels, we need to store them in the buffer and count them later,
|
||||||
|
|
|
||||||
|
|
@ -83,7 +83,7 @@ public class MarkIntervals extends RodWalker<Long, Long> {
|
||||||
try {
|
try {
|
||||||
for ( String line : new XReadLines(locs, true) ) {
|
for ( String line : new XReadLines(locs, true) ) {
|
||||||
String parts[] = line.split(":");
|
String parts[] = line.split(":");
|
||||||
badSites.add(GenomeLocParser.createGenomeLoc(parts[0], Long.valueOf(parts[1])));
|
badSites.add(getToolkit().getGenomeLocParser().createGenomeLoc(parts[0], Integer.valueOf(parts[1])));
|
||||||
}
|
}
|
||||||
} catch ( FileNotFoundException e ) {
|
} catch ( FileNotFoundException e ) {
|
||||||
throw new UserException.CouldNotReadInputFile(locs, e);
|
throw new UserException.CouldNotReadInputFile(locs, e);
|
||||||
|
|
|
||||||
|
|
@ -237,7 +237,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
||||||
}
|
}
|
||||||
|
|
||||||
public GenomeLoc getLocus() {
|
public GenomeLoc getLocus() {
|
||||||
return VariantContextUtils.getLocation(trio);
|
return VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),trio);
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte getRefBase() {
|
public byte getRefBase() {
|
||||||
|
|
@ -623,7 +623,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
||||||
logger.info(String.format("%s,%s,%s,%d,%d",entryRegion.getKey(),entryRegion.getValue().regionStart,entryRegion.getValue().lastSeen,
|
logger.info(String.format("%s,%s,%s,%d,%d",entryRegion.getKey(),entryRegion.getValue().regionStart,entryRegion.getValue().lastSeen,
|
||||||
entryRegion.getValue().deNovoSNPsInRegion,entryRegion.getValue().oppositeHomsInRegion));
|
entryRegion.getValue().deNovoSNPsInRegion,entryRegion.getValue().oppositeHomsInRegion));
|
||||||
int chr_end = getToolkit().getSAMFileHeader().getSequenceDictionary().getSequence(entryRegion.getValue().getContigStr()).getSequenceLength();
|
int chr_end = getToolkit().getSAMFileHeader().getSequenceDictionary().getSequence(entryRegion.getValue().getContigStr()).getSequenceLength();
|
||||||
entryRegion.getValue().endedBy = GenomeLocParser.createGenomeLoc(entryRegion.getValue().getContigStr(),chr_end,chr_end);
|
entryRegion.getValue().endedBy = getToolkit().getGenomeLocParser().createGenomeLoc(entryRegion.getValue().getContigStr(),chr_end,chr_end);
|
||||||
to_print.add(entryRegion.getValue());
|
to_print.add(entryRegion.getValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,8 @@ import java.io.*;
|
||||||
* This walker is designed to be used in conjunction with NeighborhoodQualityWalker.
|
* This walker is designed to be used in conjunction with NeighborhoodQualityWalker.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class ReadQualityScoreWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
public class
|
||||||
|
ReadQualityScoreWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||||
@Output
|
@Output
|
||||||
protected PrintStream out;
|
protected PrintStream out;
|
||||||
@Argument(fullName = "inputQualityFile", shortName = "if", doc = "Input quality score file generated by NeighborhoodQualityWalker", required = true)
|
@Argument(fullName = "inputQualityFile", shortName = "if", doc = "Input quality score file generated by NeighborhoodQualityWalker", required = true)
|
||||||
|
|
@ -98,7 +99,7 @@ public class ReadQualityScoreWalker extends ReadWalker<SAMRecord, SAMFileWriter>
|
||||||
// BUGBUG: This assumes reads will be sorted by start location
|
// BUGBUG: This assumes reads will be sorted by start location
|
||||||
float sumNeighborhoodQuality = 0.0f;
|
float sumNeighborhoodQuality = 0.0f;
|
||||||
int numLines = 0;
|
int numLines = 0;
|
||||||
GenomeLoc readLoc = GenomeLocParser.createGenomeLoc( read );
|
GenomeLoc readLoc = getToolkit().getGenomeLocParser().createGenomeLoc( read );
|
||||||
if( readLoc.size() > 0 ) { // only calculate mean NQS if the read has a well formed GenomeLoc, if not NQS will be zero
|
if( readLoc.size() > 0 ) { // only calculate mean NQS if the read has a well formed GenomeLoc, if not NQS will be zero
|
||||||
try {
|
try {
|
||||||
if( line == null ) {
|
if( line == null ) {
|
||||||
|
|
@ -106,12 +107,12 @@ public class ReadQualityScoreWalker extends ReadWalker<SAMRecord, SAMFileWriter>
|
||||||
if( line == null ) { throw new UserException.MalformedFile(new File(inputQualityFile), "Input file is empty" ); }
|
if( line == null ) { throw new UserException.MalformedFile(new File(inputQualityFile), "Input file is empty" ); }
|
||||||
}
|
}
|
||||||
String[] halves = line.split( " ", 2 );
|
String[] halves = line.split( " ", 2 );
|
||||||
GenomeLoc curLoc = GenomeLocParser.parseGenomeLoc( halves[0] );
|
GenomeLoc curLoc = getToolkit().getGenomeLocParser().parseGenomeLoc( halves[0] );
|
||||||
while( curLoc.isBefore( readLoc ) ) { // Loop until the beginning of the read
|
while( curLoc.isBefore( readLoc ) ) { // Loop until the beginning of the read
|
||||||
line = inputReader.readLine();
|
line = inputReader.readLine();
|
||||||
if( line == null ) { throw new UserException.MalformedFile(new File(inputQualityFile), "Input file doesn't encompass all reads. Can't find beginning of read: " + readLoc ); }
|
if( line == null ) { throw new UserException.MalformedFile(new File(inputQualityFile), "Input file doesn't encompass all reads. Can't find beginning of read: " + readLoc ); }
|
||||||
halves = line.split( " ", 2 );
|
halves = line.split( " ", 2 );
|
||||||
curLoc = GenomeLocParser.parseGenomeLoc( halves[0] );
|
curLoc = getToolkit().getGenomeLocParser().parseGenomeLoc( halves[0] );
|
||||||
}
|
}
|
||||||
// now we have skipped ahead in the input file to where this read starts
|
// now we have skipped ahead in the input file to where this read starts
|
||||||
logger.debug( "Starting: " + curLoc + ", read: " + readLoc + "\t size: " + readLoc.size() );
|
logger.debug( "Starting: " + curLoc + ", read: " + readLoc + "\t size: " + readLoc.size() );
|
||||||
|
|
@ -124,7 +125,7 @@ public class ReadQualityScoreWalker extends ReadWalker<SAMRecord, SAMFileWriter>
|
||||||
line = inputReader.readLine();
|
line = inputReader.readLine();
|
||||||
if( line == null ) { throw new UserException.MalformedFile(new File(inputQualityFile), "Input file doesn't encompass all reads. Can't find end of read: " + readLoc ); }
|
if( line == null ) { throw new UserException.MalformedFile(new File(inputQualityFile), "Input file doesn't encompass all reads. Can't find end of read: " + readLoc ); }
|
||||||
halves = line.split( " ", 2 );
|
halves = line.split( " ", 2 );
|
||||||
curLoc = GenomeLocParser.parseGenomeLoc( halves[0] );
|
curLoc = getToolkit().getGenomeLocParser().parseGenomeLoc( halves[0] );
|
||||||
}
|
}
|
||||||
// now we have parsed the input file up to where the read ends
|
// now we have parsed the input file up to where the read ends
|
||||||
// reset back to the mark in order to parse the next read in the next call to the reduce function
|
// reset back to the mark in order to parse the next read in the next call to the reduce function
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@ public class RealignedReadCounter extends ReadWalker<Integer, Integer> {
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
// prepare to read intervals one-by-one, as needed (assuming they are sorted).
|
// prepare to read intervals one-by-one, as needed (assuming they are sorted).
|
||||||
intervals = new IntervalFileMergingIterator( new File(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY );
|
intervals = new IntervalFileMergingIterator( getToolkit().getGenomeLocParser(), new File(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY );
|
||||||
currentInterval = intervals.hasNext() ? intervals.next() : null;
|
currentInterval = intervals.hasNext() ? intervals.next() : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -69,10 +69,10 @@ public class RealignedReadCounter extends ReadWalker<Integer, Integer> {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
|
GenomeLoc readLoc = ref.getGenomeLocParser().createGenomeLoc(read);
|
||||||
// hack to get around unmapped reads having screwy locations
|
// hack to get around unmapped reads having screwy locations
|
||||||
if ( readLoc.getStop() == 0 )
|
if ( readLoc.getStop() == 0 )
|
||||||
readLoc = GenomeLocParser.createGenomeLoc(readLoc.getContigIndex(), readLoc.getStart(), readLoc.getStart());
|
readLoc = ref.getGenomeLocParser().createGenomeLoc(readLoc.getContig(), readLoc.getStart(), readLoc.getStart());
|
||||||
|
|
||||||
if ( readLoc.isBefore(currentInterval) || ReadUtils.is454Read(read) )
|
if ( readLoc.isBefore(currentInterval) || ReadUtils.is454Read(read) )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
|
|
@ -130,7 +130,7 @@ public class TestReadFishingWalker extends ReadWalker<Integer,Long> {
|
||||||
else
|
else
|
||||||
throw new ReviewedStingException("Invalid indel type: " + type);
|
throw new ReviewedStingException("Invalid indel type: " + type);
|
||||||
|
|
||||||
aligners.put(GenomeLocParser.createGenomeLoc(contig,start,stop),new BWACAligner(revisedReference,new BWAConfiguration()));
|
aligners.put(getToolkit().getGenomeLocParser().createGenomeLoc(contig,start,stop),new BWACAligner(revisedReference,new BWAConfiguration()));
|
||||||
if(++numAlignersCreated % 100 == 0)
|
if(++numAlignersCreated % 100 == 0)
|
||||||
out.printf("Created %d aligners in %dms%n",++numAlignersCreated,System.currentTimeMillis()-startTime);
|
out.printf("Created %d aligners in %dms%n",++numAlignersCreated,System.currentTimeMillis()-startTime);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -28,9 +28,9 @@ public class ValidateRODForReads extends ReadWalker<Integer, Integer> {
|
||||||
@Override
|
@Override
|
||||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
|
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
|
||||||
if (tracker != null) {
|
if (tracker != null) {
|
||||||
Map<Long, Collection<GATKFeature>> mapping = tracker.getContigOffsetMapping();
|
Map<Integer, Collection<GATKFeature>> mapping = tracker.getContigOffsetMapping();
|
||||||
for (Map.Entry<Long, Collection<GATKFeature>> entry : mapping.entrySet()) {
|
for (Map.Entry<Integer, Collection<GATKFeature>> entry : mapping.entrySet()) {
|
||||||
GenomeLoc location = GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),entry.getKey());
|
GenomeLoc location = ref.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),entry.getKey());
|
||||||
if (!map.containsKey(location)) {
|
if (!map.containsKey(location)) {
|
||||||
map.put(location,0);
|
map.put(location,0);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -113,7 +113,7 @@ public class CombineDuplicatesWalker extends DuplicateWalker<List<SAMRecord>, SA
|
||||||
// out.printf("Combining Read %s%n", read.format());
|
// out.printf("Combining Read %s%n", read.format());
|
||||||
// }
|
// }
|
||||||
//
|
//
|
||||||
combinedRead = DupUtils.combineDuplicates(reads, MAX_QUALITY_SCORE);
|
combinedRead = DupUtils.combineDuplicates(getToolkit().getGenomeLocParser(),reads, MAX_QUALITY_SCORE);
|
||||||
//out.printf(" => into %s%n", combinedRead.format());
|
//out.printf(" => into %s%n", combinedRead.format());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -468,10 +468,10 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
// if a read is too long for the reference context, extend the context (being sure not to extend past the end of the chromosome)
|
// if a read is too long for the reference context, extend the context (being sure not to extend past the end of the chromosome)
|
||||||
if ( record.getAlignmentEnd() > refContext.getWindow().getStop() ) {
|
if ( record.getAlignmentEnd() > refContext.getWindow().getStop() ) {
|
||||||
GenomeLoc window = GenomeLocParser.createGenomeLoc(refContext.getLocus().getContig(), refContext.getWindow().getStart(), Math.min(record.getAlignmentEnd(), referenceReader.getSequenceDictionary().getSequence(refContext.getLocus().getContig()).getSequenceLength()));
|
GenomeLoc window = refContext.getGenomeLocParser().createGenomeLoc(refContext.getLocus().getContig(), refContext.getWindow().getStart(), Math.min(record.getAlignmentEnd(), referenceReader.getSequenceDictionary().getSequence(refContext.getLocus().getContig()).getSequenceLength()));
|
||||||
byte[] bases = referenceReader.getSubsequenceAt(window.getContig(), window.getStart(), window.getStop()).getBases();
|
byte[] bases = referenceReader.getSubsequenceAt(window.getContig(), window.getStart(), window.getStop()).getBases();
|
||||||
StringUtil.toUpperCase(bases);
|
StringUtil.toUpperCase(bases);
|
||||||
refContext = new ReferenceContext(refContext.getLocus(), window, bases);
|
refContext = new ReferenceContext(refContext.getGenomeLocParser(),refContext.getLocus(), window, bases);
|
||||||
}
|
}
|
||||||
|
|
||||||
BitSet mismatches = AlignmentUtils.mismatchesInRefWindow(record, refContext, UAC.MAX_MISMATCHES, MISMATCH_WINDOW_SIZE);
|
BitSet mismatches = AlignmentUtils.mismatchesInRefWindow(record, refContext, UAC.MAX_MISMATCHES, MISMATCH_WINDOW_SIZE);
|
||||||
|
|
|
||||||
|
|
@ -136,10 +136,10 @@ public class RemapAlignments extends CommandLineProgram {
|
||||||
}
|
}
|
||||||
|
|
||||||
h.setSequenceDictionary(reference.getSequenceDictionary());
|
h.setSequenceDictionary(reference.getSequenceDictionary());
|
||||||
GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary());
|
GenomeLocParser genomeLocParser = new GenomeLocParser(reference.getSequenceDictionary());
|
||||||
|
|
||||||
map = new GenomicMap(10000);
|
map = new GenomicMap(10000);
|
||||||
map.read(MAP_FILE);
|
map.read(genomeLocParser,MAP_FILE);
|
||||||
System.out.println("Map loaded successfully: "+map.size()+" contigs");
|
System.out.println("Map loaded successfully: "+map.size()+" contigs");
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -40,12 +40,7 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import net.sf.samtools.Cigar;
|
import net.sf.samtools.*;
|
||||||
import net.sf.samtools.CigarElement;
|
|
||||||
import net.sf.samtools.CigarOperator;
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
|
||||||
import net.sf.samtools.SAMFileReader;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
|
@ -95,7 +90,7 @@ public class GenomicMap implements Iterable<Map.Entry<String, Collection<GenomeL
|
||||||
* where start, stop are 0 based, closed intervals
|
* where start, stop are 0 based, closed intervals
|
||||||
* @param f
|
* @param f
|
||||||
*/
|
*/
|
||||||
public void readArachne(File f) {
|
public void readArachne(SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser,File f) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
BufferedReader reader = new BufferedReader( new FileReader(f) );
|
BufferedReader reader = new BufferedReader( new FileReader(f) );
|
||||||
|
|
@ -127,9 +122,10 @@ public class GenomicMap implements Iterable<Map.Entry<String, Collection<GenomeL
|
||||||
for ( int i = 0 ; i < coord_parts.length ; i += 3 ) {
|
for ( int i = 0 ; i < coord_parts.length ; i += 3 ) {
|
||||||
// Arachne map file contains 0-based, closed intervals, hence +1 below.
|
// Arachne map file contains 0-based, closed intervals, hence +1 below.
|
||||||
int index = Integer.parseInt(coord_parts[i]);
|
int index = Integer.parseInt(coord_parts[i]);
|
||||||
long start = Long.parseLong(coord_parts[i+1]);
|
String contig = sequenceDictionary.getSequence(index).getSequenceName();
|
||||||
long stop = Long.parseLong(coord_parts[i+2]);
|
int start = Integer.parseInt(coord_parts[i+1]);
|
||||||
segments.add(GenomeLocParser.createGenomeLoc(index, start+1, stop+1));
|
int stop = Integer.parseInt(coord_parts[i+2]);
|
||||||
|
segments.add(genomeLocParser.createGenomeLoc(contig, start+1, stop+1));
|
||||||
}
|
}
|
||||||
|
|
||||||
addCustomContig(name, segments);
|
addCustomContig(name, segments);
|
||||||
|
|
@ -148,7 +144,7 @@ public class GenomicMap implements Iterable<Map.Entry<String, Collection<GenomeL
|
||||||
* where start, stop are 1 based, closed intervals
|
* where start, stop are 1 based, closed intervals
|
||||||
* @param f
|
* @param f
|
||||||
*/
|
*/
|
||||||
public void read(File f) {
|
public void read(GenomeLocParser genomeLocParser,File f) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
BufferedReader reader = new BufferedReader( new FileReader(f) );
|
BufferedReader reader = new BufferedReader( new FileReader(f) );
|
||||||
|
|
@ -171,7 +167,7 @@ public class GenomicMap implements Iterable<Map.Entry<String, Collection<GenomeL
|
||||||
while ( p2 < line.length() && line.charAt(p2) != ',') p2++; // next comma or end-of-line
|
while ( p2 < line.length() && line.charAt(p2) != ',') p2++; // next comma or end-of-line
|
||||||
|
|
||||||
while ( p2 != p1 ) {
|
while ( p2 != p1 ) {
|
||||||
GenomeLoc newSegment = GenomeLocParser.parseGenomeLoc(line.substring(p1, p2));
|
GenomeLoc newSegment = genomeLocParser.parseGenomeLoc(line.substring(p1, p2));
|
||||||
if ( segments.size() > 0 &&
|
if ( segments.size() > 0 &&
|
||||||
segments.get(segments.size()-1).getStop()+1 == newSegment.getStart() &&
|
segments.get(segments.size()-1).getStop()+1 == newSegment.getStart() &&
|
||||||
segments.get(segments.size()-1).getContigIndex() == newSegment.getContigIndex())
|
segments.get(segments.size()-1).getContigIndex() == newSegment.getContigIndex())
|
||||||
|
|
@ -408,7 +404,7 @@ public class GenomicMap implements Iterable<Map.Entry<String, Collection<GenomeL
|
||||||
|
|
||||||
|
|
||||||
SAMRecord r = new SAMRecord(reader.getFileHeader());
|
SAMRecord r = new SAMRecord(reader.getFileHeader());
|
||||||
GenomeLocParser.setupRefContigOrdering(reader.getFileHeader().getSequenceDictionary());
|
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
|
||||||
|
|
||||||
r.setReferenceName("ENST00000378466");
|
r.setReferenceName("ENST00000378466");
|
||||||
r.setAlignmentStart(1235);
|
r.setAlignmentStart(1235);
|
||||||
|
|
@ -421,9 +417,9 @@ public class GenomicMap implements Iterable<Map.Entry<String, Collection<GenomeL
|
||||||
|
|
||||||
GenomicMap m = new GenomicMap(5);
|
GenomicMap m = new GenomicMap(5);
|
||||||
|
|
||||||
// m.readArachne(new File("/humgen/gsa-scr1/asivache/cDNA/Ensembl48.transcriptome.map"));
|
// m.readArachne(genomeLocParser,new File("/humgen/gsa-scr1/asivache/cDNA/Ensembl48.transcriptome.map"));
|
||||||
// m.write(new File("/humgen/gsa-scr1/asivache/cDNA/new_pipeline/Ensembl48.new.transcriptome.map"));
|
// m.write(new File("/humgen/gsa-scr1/asivache/cDNA/new_pipeline/Ensembl48.new.transcriptome.map"));
|
||||||
m.read(new File("W:/berger/cDNA_BAM/refs/Ensembl52.plus.Genome.map"));
|
m.read(genomeLocParser,new File("W:/berger/cDNA_BAM/refs/Ensembl52.plus.Genome.map"));
|
||||||
|
|
||||||
m.remapToMasterReference(r,reader.getFileHeader(),true);
|
m.remapToMasterReference(r,reader.getFileHeader(),true);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ import java.io.Serializable;
|
||||||
* Date: Mar 2, 2009
|
* Date: Mar 2, 2009
|
||||||
* Time: 8:50:11 AM
|
* Time: 8:50:11 AM
|
||||||
*
|
*
|
||||||
* Genome location representation. It is *** 1 *** based
|
* Genome location representation. It is *** 1 *** based closed.
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
@ -23,8 +23,8 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable, Serializable
|
||||||
* start and stop position, and (optionally) the contig name
|
* start and stop position, and (optionally) the contig name
|
||||||
*/
|
*/
|
||||||
protected final int contigIndex;
|
protected final int contigIndex;
|
||||||
protected final long start;
|
protected final int start;
|
||||||
protected final long stop;
|
protected final int stop;
|
||||||
protected final String contigName;
|
protected final String contigName;
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
@ -32,93 +32,25 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable, Serializable
|
||||||
// constructors
|
// constructors
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
/*GenomeLoc( int contigIndex, final long start, final long stop ) {
|
|
||||||
MAX_CONTIG = Integer.MAX_VALUE;
|
|
||||||
if (start < 0) { throw new StingException("Bad start position " + start);}
|
|
||||||
if (stop < -1) { throw new StingException("Bad stop position " + stop); } // a negative -1 indicates it's not a meaningful end position
|
|
||||||
|
|
||||||
this.contigIndex = contigIndex;
|
|
||||||
this.start = start;
|
|
||||||
this.contigName = null; // we just don't know
|
|
||||||
this.stop = stop == -1 ? start : stop;
|
|
||||||
}*/
|
|
||||||
|
|
||||||
protected GenomeLoc(final SAMRecord read) {
|
protected GenomeLoc(final SAMRecord read) {
|
||||||
this(read.getHeader().getSequence(read.getReferenceIndex()).getSequenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd());
|
this(read.getHeader().getSequence(read.getReferenceIndex()).getSequenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected GenomeLoc( final String contig, final int contigIndex, final long start, final long stop ) {
|
protected GenomeLoc( final String contig, final int contigIndex, final int start, final int stop ) {
|
||||||
this.contigName = contig;
|
this.contigName = contig;
|
||||||
this.contigIndex = contigIndex;
|
this.contigIndex = contigIndex;
|
||||||
this.start = start;
|
this.start = start;
|
||||||
this.stop = stop;
|
this.stop = stop;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*GenomeLoc( final int contig, final long pos ) {
|
|
||||||
this(contig, pos, pos );
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
protected GenomeLoc( final GenomeLoc toCopy ) {
|
|
||||||
this( toCopy.getContig(), toCopy.contigIndex, toCopy.getStart(), toCopy.getStop() );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if we have a specified series of locations to process AND we are past the last
|
* Return a new GenomeLoc at this same position.
|
||||||
* location in the list. It means that, in a serial processing of the genome, that we are done.
|
* @return A GenomeLoc with the same contents as the current loc.
|
||||||
*
|
|
||||||
* @param curr Current genome Location
|
|
||||||
* @param locs a list of genomic locations
|
|
||||||
* @return true if we are past the last location to process
|
|
||||||
*/
|
*/
|
||||||
public static boolean pastFinalLocation(GenomeLoc curr, List<GenomeLoc> locs) {
|
@Override
|
||||||
return (locs.size() > 0 && curr.isPast(locs.get(locs.size() - 1)));
|
public GenomeLoc clone() {
|
||||||
}
|
return new GenomeLoc(getContig(),getContigIndex(),getStart(),getStop());
|
||||||
|
|
||||||
/**
|
|
||||||
* A key function that returns true if the proposed GenomeLoc curr is within the list of
|
|
||||||
* locations we are processing in this TraversalEngine
|
|
||||||
*
|
|
||||||
* @param curr the current location
|
|
||||||
* @param locs a list of genomic locations
|
|
||||||
* @return true if we should process GenomeLoc curr, otherwise false
|
|
||||||
*/
|
|
||||||
public static boolean inLocations(GenomeLoc curr, ArrayList<GenomeLoc> locs) {
|
|
||||||
if ( locs.size() == 0 ) {
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
for ( GenomeLoc loc : locs ) {
|
|
||||||
//System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr));
|
|
||||||
if (loc.overlapsP(curr))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void removePastLocs(GenomeLoc curr, List<GenomeLoc> locs) {
|
|
||||||
while ( !locs.isEmpty() && curr.isPast(locs.get(0)) ) {
|
|
||||||
//System.out.println("At: " + curr + ", removing: " + locs.get(0));
|
|
||||||
locs.remove(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean overlapswithSortedLocsP(GenomeLoc curr, List<GenomeLoc> locs, boolean returnTrueIfEmpty) {
|
|
||||||
if ( locs.isEmpty() )
|
|
||||||
return returnTrueIfEmpty;
|
|
||||||
|
|
||||||
// skip loci before intervals begin
|
|
||||||
if ( curr.contigIndex < locs.get(0).contigIndex )
|
|
||||||
return false;
|
|
||||||
|
|
||||||
for ( GenomeLoc loc : locs ) {
|
|
||||||
//System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr));
|
|
||||||
if ( loc.overlapsP(curr) )
|
|
||||||
return true;
|
|
||||||
if ( curr.compareTo(loc) < 0 )
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
@ -129,8 +61,8 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable, Serializable
|
||||||
}
|
}
|
||||||
|
|
||||||
public final int getContigIndex() { return this.contigIndex; }
|
public final int getContigIndex() { return this.contigIndex; }
|
||||||
public final long getStart() { return this.start; }
|
public final int getStart() { return this.start; }
|
||||||
public final long getStop() { return this.stop; }
|
public final int getStop() { return this.stop; }
|
||||||
public final String toString() {
|
public final String toString() {
|
||||||
if ( throughEndOfContigP() && atBeginningOfContigP() )
|
if ( throughEndOfContigP() && atBeginningOfContigP() )
|
||||||
return getContig();
|
return getContig();
|
||||||
|
|
@ -139,13 +71,8 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable, Serializable
|
||||||
else
|
else
|
||||||
return String.format("%s:%d-%d", getContig(), getStart(), getStop());
|
return String.format("%s:%d-%d", getContig(), getStart(), getStop());
|
||||||
}
|
}
|
||||||
|
private boolean throughEndOfContigP() { return this.stop == Integer.MAX_VALUE; }
|
||||||
public final boolean isUnmapped() { return this.contigIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX; }
|
private boolean atBeginningOfContigP() { return this.start == 1; }
|
||||||
public final boolean throughEndOfContigP() { return this.stop == Integer.MAX_VALUE; }
|
|
||||||
public final boolean atBeginningOfContigP() { return this.start == 1; }
|
|
||||||
|
|
||||||
|
|
||||||
public final boolean isSingleBP() { return stop == start; }
|
|
||||||
|
|
||||||
public final boolean disjointP(GenomeLoc that) {
|
public final boolean disjointP(GenomeLoc that) {
|
||||||
return this.contigIndex != that.contigIndex || this.start > that.stop || that.start > this.stop;
|
return this.contigIndex != that.contigIndex || this.start > that.stop || that.start > this.stop;
|
||||||
|
|
@ -187,15 +114,6 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable, Serializable
|
||||||
return onSameContig(that) && getStart() <= that.getStart() && getStop() >= that.getStop();
|
return onSameContig(that) && getStart() <= that.getStart() && getStop() >= that.getStop();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if this GenomeLoc contains the start position of GenomeLoc that, on the same contig
|
|
||||||
* @param start
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public final boolean containsStartPosition(long start) {
|
|
||||||
return getStart() <= start && start <= getStop();
|
|
||||||
}
|
|
||||||
|
|
||||||
public final boolean onSameContig(GenomeLoc that) {
|
public final boolean onSameContig(GenomeLoc that) {
|
||||||
return (this.contigIndex == that.contigIndex);
|
return (this.contigIndex == that.contigIndex);
|
||||||
}
|
}
|
||||||
|
|
@ -215,26 +133,26 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable, Serializable
|
||||||
return this.compareTo(left) > -1 && this.compareTo(right) < 1;
|
return this.compareTo(left) > -1 && this.compareTo(right) < 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests whether this contig is completely before contig 'that'.
|
||||||
|
* @param that Contig to test against.
|
||||||
|
* @return true if this contig ends before 'that' starts; false if this is completely after or overlaps 'that'.
|
||||||
|
*/
|
||||||
public final boolean isBefore( GenomeLoc that ) {
|
public final boolean isBefore( GenomeLoc that ) {
|
||||||
int comparison = this.compareContigs(that);
|
int comparison = this.compareContigs(that);
|
||||||
return ( comparison == -1 || ( comparison == 0 && this.getStop() < that.getStart() ));
|
return ( comparison == -1 || ( comparison == 0 && this.getStop() < that.getStart() ));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests whether this contig is completely after contig 'that'.
|
||||||
|
* @param that Contig to test against.
|
||||||
|
* @return true if this contig starts after 'that' ends; false if this is completely before or overlaps 'that'.
|
||||||
|
*/
|
||||||
public final boolean isPast( GenomeLoc that ) {
|
public final boolean isPast( GenomeLoc that ) {
|
||||||
int comparison = this.compareContigs(that);
|
int comparison = this.compareContigs(that);
|
||||||
return ( comparison == 1 || ( comparison == 0 && this.getStart() > that.getStop() ));
|
return ( comparison == 1 || ( comparison == 0 && this.getStart() > that.getStop() ));
|
||||||
}
|
}
|
||||||
|
|
||||||
public final boolean startsBefore( GenomeLoc that ) {
|
|
||||||
int comparison = this.compareContigs(that);
|
|
||||||
return ( comparison == -1 || ( comparison == 0 && this.getStart() < that.getStart() ));
|
|
||||||
}
|
|
||||||
|
|
||||||
public final boolean startsAfter( GenomeLoc that ) {
|
|
||||||
int comparison = this.compareContigs(that);
|
|
||||||
return ( comparison == 1 || ( comparison == 0 && this.getStart() > that.getStart() ));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the minimum distance between any pair of bases in this and that GenomeLocs:
|
// Return the minimum distance between any pair of bases in this and that GenomeLocs:
|
||||||
public final int minDistance( final GenomeLoc that ) {
|
public final int minDistance( final GenomeLoc that ) {
|
||||||
if (!this.onSameContig(that))
|
if (!this.onSameContig(that))
|
||||||
|
|
@ -281,15 +199,6 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable, Serializable
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return a new GenomeLoc at this same position.
|
|
||||||
* @return A GenomeLoc with the same contents as the current loc.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public GenomeLoc clone() {
|
|
||||||
return new GenomeLoc(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* conpare this genomeLoc's contig to another genome loc
|
* conpare this genomeLoc's contig to another genome loc
|
||||||
* @param that the genome loc to compare contigs with
|
* @param that the genome loc to compare contigs with
|
||||||
|
|
|
||||||
|
|
@ -57,24 +57,33 @@ import org.broadinstitute.sting.utils.text.XReadLines;
|
||||||
public class GenomeLocParser {
|
public class GenomeLocParser {
|
||||||
private static Logger logger = Logger.getLogger(GenomeLocParser.class);
|
private static Logger logger = Logger.getLogger(GenomeLocParser.class);
|
||||||
|
|
||||||
//private static final Pattern mPattern = Pattern.compile("([\\p{Print}&&[^:]]+):*([\\d,]+)?([\\+-])?([\\d,]+)?$"); // matches case 3
|
|
||||||
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// Ugly global variable defining the optional ordering of contig elements
|
// Ugly global variable defining the optional ordering of contig elements
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
//public static Map<String, Integer> refContigOrdering = null;
|
//public static Map<String, Integer> refContigOrdering = null;
|
||||||
protected static SAMSequenceDictionary contigInfo = null;
|
protected SAMSequenceDictionary contigInfo = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* do we have a contig ordering setup?
|
* set our internal reference contig order
|
||||||
*
|
* @param refFile the reference file
|
||||||
* @return true if the contig order is setup
|
|
||||||
*/
|
*/
|
||||||
public static boolean hasKnownContigOrdering() {
|
public GenomeLocParser(final ReferenceSequenceFile refFile) {
|
||||||
return contigInfo != null;
|
this(refFile.getSequenceDictionary());
|
||||||
|
}
|
||||||
|
|
||||||
|
public GenomeLocParser(SAMSequenceDictionary seqDict) {
|
||||||
|
if (seqDict == null) { // we couldn't load the reference dictionary
|
||||||
|
//logger.info("Failed to load reference dictionary, falling back to lexicographic order for contigs");
|
||||||
|
throw new UserException.CommandLineException("Failed to load reference dictionary");
|
||||||
|
} else if (contigInfo == null) {
|
||||||
|
contigInfo = seqDict;
|
||||||
|
logger.debug(String.format("Prepared reference sequence contig dictionary"));
|
||||||
|
for (SAMSequenceRecord contig : seqDict.getSequences()) {
|
||||||
|
logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength()));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -84,7 +93,7 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* @return the sam sequence record
|
* @return the sam sequence record
|
||||||
*/
|
*/
|
||||||
public static SAMSequenceRecord getContigInfo(final String contig) {
|
public SAMSequenceRecord getContigInfo(final String contig) {
|
||||||
return contigInfo.getSequence(contig);
|
return contigInfo.getSequence(contig);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -96,53 +105,13 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* @return the contig index, -1 if not found
|
* @return the contig index, -1 if not found
|
||||||
*/
|
*/
|
||||||
public static int getContigIndex(final String contig, boolean exceptionOut) {
|
public int getContigIndex(final String contig, boolean exceptionOut) {
|
||||||
if (contigInfo.getSequenceIndex(contig) == -1 && exceptionOut)
|
if (contigInfo.getSequenceIndex(contig) == -1 && exceptionOut)
|
||||||
throw new UserException.CommandLineException(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig));
|
throw new UserException.CommandLineException(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig));
|
||||||
|
|
||||||
return contigInfo.getSequenceIndex(contig);
|
return contigInfo.getSequenceIndex(contig);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* set our internal reference contig order
|
|
||||||
*
|
|
||||||
* @param refFile the reference file
|
|
||||||
*
|
|
||||||
* @return true if we were successful
|
|
||||||
*/
|
|
||||||
public static boolean setupRefContigOrdering(final ReferenceSequenceFile refFile) {
|
|
||||||
return setupRefContigOrdering(refFile.getSequenceDictionary());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* setup our internal reference contig order
|
|
||||||
*
|
|
||||||
* @param seqDict the sequence dictionary
|
|
||||||
*
|
|
||||||
* @return true if we were successful
|
|
||||||
*/
|
|
||||||
public static boolean setupRefContigOrdering(final SAMSequenceDictionary seqDict) {
|
|
||||||
if (seqDict == null) { // we couldn't load the reference dictionary
|
|
||||||
//logger.info("Failed to load reference dictionary, falling back to lexicographic order for contigs");
|
|
||||||
throw new UserException.CommandLineException("Failed to load reference dictionary");
|
|
||||||
} else if (contigInfo == null) {
|
|
||||||
contigInfo = seqDict;
|
|
||||||
logger.debug(String.format("Prepared reference sequence contig dictionary"));
|
|
||||||
for (SAMSequenceRecord contig : seqDict.getSequences()) {
|
|
||||||
logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A package-protected method that can be used by the test system to reset the sequence dictionary
|
|
||||||
* being used. Use this method sparingly.
|
|
||||||
*/
|
|
||||||
static void clearRefContigOrdering() {
|
|
||||||
contigInfo = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* parse a genome interval, from a location string
|
* parse a genome interval, from a location string
|
||||||
*
|
*
|
||||||
|
|
@ -155,7 +124,7 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public static GenomeLoc parseGenomeInterval(final String str) {
|
public GenomeLoc parseGenomeInterval(final String str) {
|
||||||
GenomeLoc ret = parseGenomeLoc(str);
|
GenomeLoc ret = parseGenomeLoc(str);
|
||||||
exceptionOnInvalidGenomeLocBounds(ret);
|
exceptionOnInvalidGenomeLocBounds(ret);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
@ -173,13 +142,13 @@ public class GenomeLocParser {
|
||||||
* @return a GenomeLoc representing the String
|
* @return a GenomeLoc representing the String
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc parseGenomeLoc(final String str) {
|
public GenomeLoc parseGenomeLoc(final String str) {
|
||||||
// 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
|
// 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
|
||||||
//System.out.printf("Parsing location '%s'%n", str);
|
//System.out.printf("Parsing location '%s'%n", str);
|
||||||
|
|
||||||
String contig = null;
|
String contig = null;
|
||||||
long start = 1;
|
int start = 1;
|
||||||
long stop = -1;
|
int stop = -1;
|
||||||
|
|
||||||
final int colonIndex = str.indexOf(":");
|
final int colonIndex = str.indexOf(":");
|
||||||
if(colonIndex == -1) {
|
if(colonIndex == -1) {
|
||||||
|
|
@ -210,7 +179,7 @@ public class GenomeLocParser {
|
||||||
if (!isContigValid(contig))
|
if (!isContigValid(contig))
|
||||||
throw new UserException("Contig '" + contig + "' does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?");
|
throw new UserException("Contig '" + contig + "' does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?");
|
||||||
|
|
||||||
if (stop == Integer.MAX_VALUE && hasKnownContigOrdering())
|
if (stop == Integer.MAX_VALUE)
|
||||||
// lookup the actually stop position!
|
// lookup the actually stop position!
|
||||||
stop = getContigInfo(contig).getSequenceLength();
|
stop = getContigInfo(contig).getSequenceLength();
|
||||||
|
|
||||||
|
|
@ -228,7 +197,7 @@ public class GenomeLocParser {
|
||||||
* Parses a number like 1,000,000 into a long.
|
* Parses a number like 1,000,000 into a long.
|
||||||
* @param pos
|
* @param pos
|
||||||
*/
|
*/
|
||||||
private static long parsePosition(final String pos) {
|
private int parsePosition(final String pos) {
|
||||||
//String x = pos.replaceAll(",", ""); - this was replaced because it uses regexps
|
//String x = pos.replaceAll(",", ""); - this was replaced because it uses regexps
|
||||||
//System.out.println("Parsing position: '" + pos + "'");
|
//System.out.println("Parsing position: '" + pos + "'");
|
||||||
if(pos.indexOf('-') != -1) {
|
if(pos.indexOf('-') != -1) {
|
||||||
|
|
@ -244,13 +213,13 @@ public class GenomeLocParser {
|
||||||
continue;
|
continue;
|
||||||
} else if(c < '0' || c > '9') {
|
} else if(c < '0' || c > '9') {
|
||||||
throw new NumberFormatException("Position: '" + pos + "' contains invalid chars." );
|
throw new NumberFormatException("Position: '" + pos + "' contains invalid chars." );
|
||||||
} else {
|
} else {
|
||||||
buffer.append(c);
|
buffer.append(c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Long.parseLong(buffer.toString());
|
return Integer.parseInt(buffer.toString());
|
||||||
} else {
|
} else {
|
||||||
return Long.parseLong(pos);
|
return Integer.parseInt(pos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -263,7 +232,7 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* @return the list of merged locations
|
* @return the list of merged locations
|
||||||
*/
|
*/
|
||||||
public static List<GenomeLoc> mergeIntervalLocations(final List<GenomeLoc> raw, IntervalMergingRule rule) {
|
public List<GenomeLoc> mergeIntervalLocations(final List<GenomeLoc> raw, IntervalMergingRule rule) {
|
||||||
if (raw.size() <= 1)
|
if (raw.size() <= 1)
|
||||||
return raw;
|
return raw;
|
||||||
else {
|
else {
|
||||||
|
|
@ -292,7 +261,7 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* @return True if the contig is valid. False otherwise.
|
* @return True if the contig is valid. False otherwise.
|
||||||
*/
|
*/
|
||||||
private static boolean isContigValid(String contig) {
|
private boolean isContigValid(String contig) {
|
||||||
int contigIndex = contigInfo.getSequenceIndex(contig);
|
int contigIndex = contigInfo.getSequenceIndex(contig);
|
||||||
return contigIndex >= 0 && contigIndex < contigInfo.size();
|
return contigIndex >= 0 && contigIndex < contigInfo.size();
|
||||||
}
|
}
|
||||||
|
|
@ -309,7 +278,7 @@ public class GenomeLocParser {
|
||||||
* Validation: only checks that contig is valid
|
* Validation: only checks that contig is valid
|
||||||
* start/stop could be anything
|
* start/stop could be anything
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc parseGenomeLoc(final String contig, long start, long stop) {
|
public GenomeLoc parseGenomeLoc(final String contig, int start, int stop) {
|
||||||
if (!isContigValid(contig))
|
if (!isContigValid(contig))
|
||||||
throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?");
|
throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?");
|
||||||
return new GenomeLoc(contig, getContigIndex(contig,true), start, stop);
|
return new GenomeLoc(contig, getContigIndex(contig,true), start, stop);
|
||||||
|
|
@ -327,7 +296,7 @@ public class GenomeLocParser {
|
||||||
* @param allowEmptyIntervalList if false empty interval lists will return null
|
* @param allowEmptyIntervalList if false empty interval lists will return null
|
||||||
* @return List<GenomeLoc> List of Genome Locs that have been parsed from file
|
* @return List<GenomeLoc> List of Genome Locs that have been parsed from file
|
||||||
*/
|
*/
|
||||||
public static List<GenomeLoc> intervalFileToList(final String file_name, boolean allowEmptyIntervalList) {
|
public List<GenomeLoc> intervalFileToList(final String file_name, boolean allowEmptyIntervalList) {
|
||||||
// try to open file
|
// try to open file
|
||||||
File inputFile = new File(file_name);
|
File inputFile = new File(file_name);
|
||||||
|
|
||||||
|
|
@ -344,7 +313,7 @@ public class GenomeLocParser {
|
||||||
|
|
||||||
// case: BED file
|
// case: BED file
|
||||||
if (file_name.toUpperCase().endsWith(".BED")) {
|
if (file_name.toUpperCase().endsWith(".BED")) {
|
||||||
BedParser parser = new BedParser(inputFile);
|
BedParser parser = new BedParser(this,inputFile);
|
||||||
return parser.getLocations();
|
return parser.getLocations();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -393,22 +362,8 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* @return the string that represents that contig name
|
* @return the string that represents that contig name
|
||||||
*/
|
*/
|
||||||
private static String getSequenceNameFromIndex(int contigIndex) {
|
private String getSequenceNameFromIndex(int contigIndex) {
|
||||||
return GenomeLocParser.contigInfo.getSequence(contigIndex).getSequenceName();
|
return contigInfo.getSequence(contigIndex).getSequenceName();
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* create a genome loc, given the contig name, start, and stop
|
|
||||||
*
|
|
||||||
* @param contig the contig name
|
|
||||||
* @param start the starting position
|
|
||||||
* @param stop the stop position
|
|
||||||
*
|
|
||||||
* @return a new genome loc
|
|
||||||
*/
|
|
||||||
public static GenomeLoc createGenomeLoc(String contig, final long start, final long stop) {
|
|
||||||
checkSetup();
|
|
||||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig,true), start, stop));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -420,31 +375,21 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* @return a new genome loc - but don't exception out if it is invalid
|
* @return a new genome loc - but don't exception out if it is invalid
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc createPotentiallyInvalidGenomeLoc(String contig, final long start, final long stop) {
|
public GenomeLoc createPotentiallyInvalidGenomeLoc(String contig, final int start, final int stop) {
|
||||||
checkSetup();
|
return new GenomeLoc(contig, getContigIndex(contig,false), start, stop);
|
||||||
return new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig,false), start, stop);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a genome loc, given the contig index, start, and stop
|
* create a genome loc, given the contig name, start, and stop
|
||||||
*
|
*
|
||||||
* @param contigIndex the contig index
|
* @param contig the contig name
|
||||||
* @param start the start position
|
* @param start the starting position
|
||||||
* @param stop the stop position
|
* @param stop the stop position
|
||||||
*
|
*
|
||||||
* @return a new genome loc
|
* @return a new genome loc
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc createGenomeLoc(int contigIndex, final long start, final long stop) {
|
public GenomeLoc createGenomeLoc(String contig, final int start, final int stop) {
|
||||||
checkSetup();
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, getContigIndex(contig,true), start, stop));
|
||||||
if (start < 0) {
|
|
||||||
throw new ReviewedStingException("Bad start position " + start);
|
|
||||||
}
|
|
||||||
if (stop < -1) {
|
|
||||||
throw new ReviewedStingException("Bad stop position " + stop);
|
|
||||||
} // a negative -1 indicates it's not a meaningful end position
|
|
||||||
|
|
||||||
|
|
||||||
return new GenomeLoc(getSequenceNameFromIndex(contigIndex), contigIndex, start, stop);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -454,25 +399,10 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc createGenomeLoc(final SAMRecord read) {
|
public GenomeLoc createGenomeLoc(final SAMRecord read) {
|
||||||
checkSetup();
|
|
||||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd()));
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* create a new genome loc, given the contig position, and a single position
|
|
||||||
*
|
|
||||||
* @param contig the contig name
|
|
||||||
* @param pos the postion
|
|
||||||
*
|
|
||||||
* @return a genome loc representing a single base at the specified postion on the contig
|
|
||||||
*/
|
|
||||||
public static GenomeLoc createGenomeLoc(final int contig, final long pos) {
|
|
||||||
checkSetup();
|
|
||||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(contig), contig, pos, pos));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a new genome loc, given the contig name, and a single position
|
* create a new genome loc, given the contig name, and a single position
|
||||||
*
|
*
|
||||||
|
|
@ -481,14 +411,8 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* @return a genome loc representing a single base at the specified postion on the contig
|
* @return a genome loc representing a single base at the specified postion on the contig
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc createGenomeLoc(final String contig, final long pos) {
|
public GenomeLoc createGenomeLoc(final String contig, final int pos) {
|
||||||
checkSetup();
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, getContigIndex(contig,true), pos, pos));
|
||||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig,true), pos, pos));
|
|
||||||
}
|
|
||||||
|
|
||||||
public static GenomeLoc createGenomeLoc(final GenomeLoc toCopy) {
|
|
||||||
checkSetup();
|
|
||||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(toCopy.getContig(), toCopy.getContigIndex(), toCopy.getStart(), toCopy.getStop()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -505,7 +429,7 @@ public class GenomeLocParser {
|
||||||
* @return the genome loc if it's valid, otherwise we throw an exception
|
* @return the genome loc if it's valid, otherwise we throw an exception
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
private static GenomeLoc exceptionOnInvalidGenomeLoc(GenomeLoc toReturn) {
|
private GenomeLoc exceptionOnInvalidGenomeLoc(GenomeLoc toReturn) {
|
||||||
if (toReturn.getStart() < 0) {
|
if (toReturn.getStart() < 0) {
|
||||||
throw new ReviewedStingException("Parameters to GenomeLocParser are incorrect: the start position is less than 0");
|
throw new ReviewedStingException("Parameters to GenomeLocParser are incorrect: the start position is less than 0");
|
||||||
}
|
}
|
||||||
|
|
@ -534,7 +458,7 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* @param locus Locus to verify.
|
* @param locus Locus to verify.
|
||||||
*/
|
*/
|
||||||
private static void exceptionOnInvalidGenomeLocBounds(GenomeLoc locus) {
|
private void exceptionOnInvalidGenomeLocBounds(GenomeLoc locus) {
|
||||||
int contigSize = contigInfo.getSequence(locus.getContigIndex()).getSequenceLength();
|
int contigSize = contigInfo.getSequence(locus.getContigIndex()).getSequenceLength();
|
||||||
if(locus.getStart() > contigSize)
|
if(locus.getStart() > contigSize)
|
||||||
throw new ReviewedStingException(String.format("GenomeLoc is invalid: locus start %d is after the end of contig %s",locus.getStart(),locus.getContig()));
|
throw new ReviewedStingException(String.format("GenomeLoc is invalid: locus start %d is after the end of contig %s",locus.getStart(),locus.getContig()));
|
||||||
|
|
@ -554,8 +478,7 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* performs interval-style validation: contig is valid and atart and stop less than the end
|
* performs interval-style validation: contig is valid and atart and stop less than the end
|
||||||
*/
|
*/
|
||||||
public static boolean validGenomeLoc(GenomeLoc loc) {
|
public boolean validGenomeLoc(GenomeLoc loc) {
|
||||||
checkSetup();
|
|
||||||
// quick check before we get the contig size, is the contig number valid
|
// quick check before we get the contig size, is the contig number valid
|
||||||
if ((loc.getContigIndex() < 0) || // the contig index has to be positive
|
if ((loc.getContigIndex() < 0) || // the contig index has to be positive
|
||||||
(loc.getContigIndex() >= contigInfo.getSequences().size())) // the contig must be in the integer range of contigs)
|
(loc.getContigIndex() >= contigInfo.getSequences().size())) // the contig must be in the integer range of contigs)
|
||||||
|
|
@ -583,9 +506,8 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* performs interval-style validation: contig is valid and atart and stop less than the end
|
* performs interval-style validation: contig is valid and atart and stop less than the end
|
||||||
*/
|
*/
|
||||||
public static boolean validGenomeLoc(String contig, long start, long stop) {
|
public boolean validGenomeLoc(String contig, int start, int stop) {
|
||||||
checkSetup();
|
return validGenomeLoc(new GenomeLoc(contig, getContigIndex(contig, false), start, stop));
|
||||||
return validGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig, false), start, stop));
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -600,58 +522,11 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* performs interval-style validation: contig is valid and atart and stop less than the end
|
* performs interval-style validation: contig is valid and atart and stop less than the end
|
||||||
*/
|
*/
|
||||||
public static boolean validGenomeLoc(int contigIndex, long start, long stop) {
|
public boolean validGenomeLoc(int contigIndex, int start, int stop) {
|
||||||
checkSetup();
|
|
||||||
if (contigIndex < 0 || contigIndex >= contigInfo.size()) return false;
|
if (contigIndex < 0 || contigIndex >= contigInfo.size()) return false;
|
||||||
return validGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(contigIndex), contigIndex, start, stop));
|
return validGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(contigIndex), contigIndex, start, stop));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Move this Genome loc to the next contig, with a start
|
|
||||||
* and stop of 1.
|
|
||||||
*
|
|
||||||
* @return true if we are not out of contigs, otherwise false if we're
|
|
||||||
* at the end of the genome (no more contigs to jump to).
|
|
||||||
*/
|
|
||||||
public static GenomeLoc toNextContig(GenomeLoc current) {
|
|
||||||
if (current.getContigIndex() + 1 >= contigInfo.getSequences().size()) {
|
|
||||||
return null;
|
|
||||||
} else
|
|
||||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(current.getContigIndex() + 1), current.getContigIndex() + 1, 1, 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* create a new genome loc, given an old location and a new contig
|
|
||||||
*
|
|
||||||
* @param loc the old location
|
|
||||||
* @param contig the new contig to set
|
|
||||||
*
|
|
||||||
* @return a new genome loc with an updated contig name and index
|
|
||||||
*/
|
|
||||||
public static GenomeLoc setContig(GenomeLoc loc, String contig) {
|
|
||||||
checkSetup();
|
|
||||||
|
|
||||||
int index = -1;
|
|
||||||
if ((index = contigInfo.getSequenceIndex(contig)) < 0) {
|
|
||||||
throw new ReviewedStingException("Contig name ( " + contig + " ) not in the set sequence dictionary.");
|
|
||||||
}
|
|
||||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, index, loc.start, loc.getStop()));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets contig index. UNSAFE since it 1) does NOT update contig name; 2) does not validate the index
|
|
||||||
*
|
|
||||||
* @param contig
|
|
||||||
*/
|
|
||||||
public static GenomeLoc setContigIndex(GenomeLoc loc, int contig) {
|
|
||||||
checkSetup();
|
|
||||||
if ((contig >= GenomeLocParser.contigInfo.getSequences().size()) || (contig < 0)) {
|
|
||||||
throw new ReviewedStingException("Contig index ( " + contig + " ) is not in the sequence dictionary set.");
|
|
||||||
}
|
|
||||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(GenomeLocParser.contigInfo.getSequence(contig).getSequenceName(), contig, loc.start, loc.getStop()));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a new genome loc from an existing loc, with a new start position
|
* create a new genome loc from an existing loc, with a new start position
|
||||||
* Note that this function will NOT explicitly check the ending offset, in case someone wants to
|
* Note that this function will NOT explicitly check the ending offset, in case someone wants to
|
||||||
|
|
@ -662,8 +537,7 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* @return the newly created genome loc
|
* @return the newly created genome loc
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc setStart(GenomeLoc loc, long start) {
|
public GenomeLoc setStart(GenomeLoc loc, int start) {
|
||||||
checkSetup();
|
|
||||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop()));
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -677,8 +551,7 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc setStop(GenomeLoc loc, long stop) {
|
public GenomeLoc setStop(GenomeLoc loc, int stop) {
|
||||||
checkSetup();
|
|
||||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop));
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -689,7 +562,7 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* @return a new genome loc
|
* @return a new genome loc
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc incPos(GenomeLoc loc) {
|
public GenomeLoc incPos(GenomeLoc loc) {
|
||||||
return incPos(loc, 1);
|
return incPos(loc, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -701,41 +574,19 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* @return a new genome loc
|
* @return a new genome loc
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc incPos(GenomeLoc loc, long by) {
|
public GenomeLoc incPos(GenomeLoc loc, int by) {
|
||||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start + by, loc.stop + by));
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start + by, loc.stop + by));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a new genome loc with an incremented position
|
* Creates a GenomeLoc than spans the entire contig.
|
||||||
*
|
* @param contigName Name of the contig.
|
||||||
* @param loc the location
|
* @return A locus spanning the entire contig.
|
||||||
*
|
|
||||||
* @return a new genome loc
|
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc nextLoc(GenomeLoc loc) {
|
public GenomeLoc createOverEntireContig(String contigName) {
|
||||||
return incPos(loc);
|
SAMSequenceRecord contig = contigInfo.getSequence(contigName);
|
||||||
}
|
if(contig == null)
|
||||||
|
throw new ReviewedStingException("Unable to find contig named " + contigName);
|
||||||
/** check to make sure that we've setup the contig information */
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength()));
|
||||||
private static void checkSetup() {
|
}
|
||||||
if (contigInfo == null) {
|
|
||||||
throw new ReviewedStingException("The GenomeLocParser hasn't been setup with a contig sequence yet");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* compare two contig names, in the current context
|
|
||||||
*
|
|
||||||
* @param firstContig
|
|
||||||
* @param secondContig
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public static int compareContigs(String firstContig, String secondContig) {
|
|
||||||
checkSetup();
|
|
||||||
Integer ref1 = GenomeLocParser.getContigIndex(firstContig,true);
|
|
||||||
Integer ref2 = GenomeLocParser.getContigIndex(secondContig,true);
|
|
||||||
return ref1.compareTo(ref2);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -38,25 +38,36 @@ import java.util.*;
|
||||||
public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||||
private static Logger logger = Logger.getLogger(GenomeLocSortedSet.class);
|
private static Logger logger = Logger.getLogger(GenomeLocSortedSet.class);
|
||||||
|
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
// our private storage for the GenomeLoc's
|
// our private storage for the GenomeLoc's
|
||||||
private List<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
|
private List<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
|
||||||
|
|
||||||
/** default constructor */
|
/** default constructor */
|
||||||
public GenomeLocSortedSet() {
|
public GenomeLocSortedSet(GenomeLocParser parser) {
|
||||||
|
this.genomeLocParser = parser;
|
||||||
}
|
}
|
||||||
|
|
||||||
public GenomeLocSortedSet(GenomeLoc e) {
|
public GenomeLocSortedSet(GenomeLocParser parser,GenomeLoc e) {
|
||||||
this();
|
this(parser);
|
||||||
add(e);
|
add(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
public GenomeLocSortedSet(Collection<GenomeLoc> l) {
|
public GenomeLocSortedSet(GenomeLocParser parser,Collection<GenomeLoc> l) {
|
||||||
this();
|
this(parser);
|
||||||
|
|
||||||
for ( GenomeLoc e : l )
|
for ( GenomeLoc e : l )
|
||||||
add(e);
|
add(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the GenomeLocParser used to create this sorted set.
|
||||||
|
* @return The parser. Will never be null.
|
||||||
|
*/
|
||||||
|
public GenomeLocParser getGenomeLocParser() {
|
||||||
|
return genomeLocParser;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get an iterator over this collection
|
* get an iterator over this collection
|
||||||
*
|
*
|
||||||
|
|
@ -201,7 +212,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||||
logger.debug("removeRegions operation: i = " + i);
|
logger.debug("removeRegions operation: i = " + i);
|
||||||
}
|
}
|
||||||
|
|
||||||
return GenomeLocSortedSet.createSetFromList(good);
|
return createSetFromList(genomeLocParser,good);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final List<GenomeLoc> EMPTY_LIST = new ArrayList<GenomeLoc>();
|
private static final List<GenomeLoc> EMPTY_LIST = new ArrayList<GenomeLoc>();
|
||||||
|
|
@ -221,8 +232,8 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||||
* |------| + |--------|
|
* |------| + |--------|
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
GenomeLoc before = GenomeLocParser.createGenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1);
|
GenomeLoc before = genomeLocParser.createGenomeLoc(g.getContig(), g.getStart(), e.getStart() - 1);
|
||||||
GenomeLoc after = GenomeLocParser.createGenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop());
|
GenomeLoc after = genomeLocParser.createGenomeLoc(g.getContig(), e.getStop() + 1, g.getStop());
|
||||||
if (after.getStop() - after.getStart() >= 0) {
|
if (after.getStop() - after.getStart() >= 0) {
|
||||||
l.add(after);
|
l.add(after);
|
||||||
}
|
}
|
||||||
|
|
@ -255,9 +266,9 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||||
|
|
||||||
GenomeLoc n;
|
GenomeLoc n;
|
||||||
if (e.getStart() < g.getStart()) {
|
if (e.getStart() < g.getStart()) {
|
||||||
n = GenomeLocParser.createGenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop());
|
n = genomeLocParser.createGenomeLoc(g.getContig(), e.getStop() + 1, g.getStop());
|
||||||
} else {
|
} else {
|
||||||
n = GenomeLocParser.createGenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1);
|
n = genomeLocParser.createGenomeLoc(g.getContig(), g.getStart(), e.getStart() - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// replace g with the new region
|
// replace g with the new region
|
||||||
|
|
@ -283,9 +294,10 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||||
* @return the GenomeLocSet of all references sequences as GenomeLoc's
|
* @return the GenomeLocSet of all references sequences as GenomeLoc's
|
||||||
*/
|
*/
|
||||||
public static GenomeLocSortedSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) {
|
public static GenomeLocSortedSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) {
|
||||||
GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet();
|
GenomeLocParser parser = new GenomeLocParser(dict);
|
||||||
|
GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet(parser);
|
||||||
for (SAMSequenceRecord record : dict.getSequences()) {
|
for (SAMSequenceRecord record : dict.getSequences()) {
|
||||||
returnSortedSet.add(GenomeLocParser.createGenomeLoc(record.getSequenceIndex(), 1, record.getSequenceLength()));
|
returnSortedSet.add(parser.createGenomeLoc(record.getSequenceName(), 1, record.getSequenceLength()));
|
||||||
}
|
}
|
||||||
return returnSortedSet;
|
return returnSortedSet;
|
||||||
}
|
}
|
||||||
|
|
@ -297,8 +309,8 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||||
*
|
*
|
||||||
* @return the sorted genome loc list
|
* @return the sorted genome loc list
|
||||||
*/
|
*/
|
||||||
public static GenomeLocSortedSet createSetFromList(List<GenomeLoc> locs) {
|
public static GenomeLocSortedSet createSetFromList(GenomeLocParser parser,List<GenomeLoc> locs) {
|
||||||
GenomeLocSortedSet set = new GenomeLocSortedSet();
|
GenomeLocSortedSet set = new GenomeLocSortedSet(parser);
|
||||||
set.addAll(locs);
|
set.addAll(locs);
|
||||||
return set;
|
return set;
|
||||||
}
|
}
|
||||||
|
|
@ -307,13 +319,13 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||||
/**
|
/**
|
||||||
* return a deep copy of this collection.
|
* return a deep copy of this collection.
|
||||||
*
|
*
|
||||||
* @return a new GenomeLocSortedSet, indentical to the current GenomeLocSortedSet.
|
* @return a new GenomeLocSortedSet, identical to the current GenomeLocSortedSet.
|
||||||
*/
|
*/
|
||||||
public GenomeLocSortedSet clone() {
|
public GenomeLocSortedSet clone() {
|
||||||
GenomeLocSortedSet ret = new GenomeLocSortedSet();
|
GenomeLocSortedSet ret = new GenomeLocSortedSet(genomeLocParser);
|
||||||
for (GenomeLoc loc : this.mArray) {
|
for (GenomeLoc loc : this.mArray) {
|
||||||
// ensure a deep copy
|
// ensure a deep copy
|
||||||
ret.mArray.add(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStop()));
|
ret.mArray.add(genomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStop()));
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,8 @@ public class BedParser {
|
||||||
// the buffered reader input
|
// the buffered reader input
|
||||||
private final BufferedReader mIn;
|
private final BufferedReader mIn;
|
||||||
|
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
// our array of locations
|
// our array of locations
|
||||||
private List<GenomeLoc> mLocations;
|
private List<GenomeLoc> mLocations;
|
||||||
|
|
||||||
|
|
@ -28,7 +30,8 @@ public class BedParser {
|
||||||
*
|
*
|
||||||
* @param fl
|
* @param fl
|
||||||
*/
|
*/
|
||||||
public BedParser(File fl) {
|
public BedParser(GenomeLocParser genomeLocParser,File fl) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
try {
|
try {
|
||||||
mIn = new BufferedReader(new FileReader(fl));
|
mIn = new BufferedReader(new FileReader(fl));
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
|
|
@ -57,7 +60,7 @@ public class BedParser {
|
||||||
List<GenomeLoc> locArray = new ArrayList<GenomeLoc>();
|
List<GenomeLoc> locArray = new ArrayList<GenomeLoc>();
|
||||||
try {
|
try {
|
||||||
while ((line = mIn.readLine()) != null) {
|
while ((line = mIn.readLine()) != null) {
|
||||||
locArray.add(parseLocation(line));
|
locArray.add(parseLocation(genomeLocParser,line));
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new UserException.MalformedFile("Unable to parse line in BED file.");
|
throw new UserException.MalformedFile("Unable to parse line in BED file.");
|
||||||
|
|
@ -71,7 +74,7 @@ public class BedParser {
|
||||||
* @param line the line, as a string
|
* @param line the line, as a string
|
||||||
* @return a parsed genome loc
|
* @return a parsed genome loc
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc parseLocation(String line) {
|
public static GenomeLoc parseLocation(GenomeLocParser genomeLocParser,String line) {
|
||||||
String contig;
|
String contig;
|
||||||
int start;
|
int start;
|
||||||
int stop;
|
int stop;
|
||||||
|
|
@ -85,7 +88,7 @@ public class BedParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
// we currently drop the rest of the bed record, which can contain names, scores, etc
|
// we currently drop the rest of the bed record, which can contain names, scores, etc
|
||||||
return GenomeLocParser.createGenomeLoc(contig, start, stop);
|
return genomeLocParser.createGenomeLoc(contig, start, stop);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ public class DupUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static SAMRecord combineDuplicates(List<SAMRecord> duplicates, int maxQScore) {
|
public static SAMRecord combineDuplicates(GenomeLocParser genomeLocParser,List<SAMRecord> duplicates, int maxQScore) {
|
||||||
if ( duplicates.size() == 0 )
|
if ( duplicates.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
@ -63,7 +63,7 @@ public class DupUtils {
|
||||||
//for ( SAMRecord read : duplicates ) {
|
//for ( SAMRecord read : duplicates ) {
|
||||||
// System.out.printf("dup base %c %d%n", (char)read.getReadBases()[i], read.getBaseQualities()[i]);
|
// System.out.printf("dup base %c %d%n", (char)read.getReadBases()[i], read.getBaseQualities()[i]);
|
||||||
//}
|
//}
|
||||||
Pair<Byte, Byte> baseAndQual = combineBaseProbs(duplicates, i, maxQScore);
|
Pair<Byte, Byte> baseAndQual = combineBaseProbs(genomeLocParser,duplicates, i, maxQScore);
|
||||||
bases[i] = baseAndQual.getFirst();
|
bases[i] = baseAndQual.getFirst();
|
||||||
quals[i] = baseAndQual.getSecond();
|
quals[i] = baseAndQual.getSecond();
|
||||||
}
|
}
|
||||||
|
|
@ -114,8 +114,8 @@ public class DupUtils {
|
||||||
System.out.printf("%n");
|
System.out.printf("%n");
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Pair<Byte, Byte> combineBaseProbs(List<SAMRecord> duplicates, int readOffset, int maxQScore) {
|
private static Pair<Byte, Byte> combineBaseProbs(GenomeLocParser genomeLocParser,List<SAMRecord> duplicates, int readOffset, int maxQScore) {
|
||||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(duplicates.get(0));
|
GenomeLoc loc = genomeLocParser.createGenomeLoc(duplicates.get(0));
|
||||||
ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, duplicates, readOffset);
|
ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, duplicates, readOffset);
|
||||||
|
|
||||||
final boolean debug = false;
|
final boolean debug = false;
|
||||||
|
|
|
||||||
|
|
@ -114,11 +114,10 @@ public class Haplotype {
|
||||||
|
|
||||||
|
|
||||||
// Create location for all haplotypes
|
// Create location for all haplotypes
|
||||||
long startLoc = ref.getWindow().getStart() + startIdxInReference;
|
int startLoc = ref.getWindow().getStart() + startIdxInReference;
|
||||||
long stopLoc = startLoc + haplotypeSize-1;
|
int stopLoc = startLoc + haplotypeSize-1;
|
||||||
|
|
||||||
GenomeLoc locus = GenomeLocParser.createGenomeLoc(ref.getLocus().getContigIndex(),startLoc,
|
GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc);
|
||||||
stopLoc);
|
|
||||||
|
|
||||||
|
|
||||||
for (Allele a : vc.getAlleles()) {
|
for (Allele a : vc.getAlleles()) {
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.sting.utils.interval;
|
package org.broadinstitute.sting.utils.interval;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||||
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
||||||
|
|
@ -56,17 +57,17 @@ public class IntervalFileMergingIterator implements Iterator<GenomeLoc> {
|
||||||
private IntervalMergingRule myRule;
|
private IntervalMergingRule myRule;
|
||||||
private File myFile;
|
private File myFile;
|
||||||
|
|
||||||
public IntervalFileMergingIterator(File f, IntervalMergingRule rule) {
|
public IntervalFileMergingIterator(GenomeLocParser genomeLocParser,File f, IntervalMergingRule rule) {
|
||||||
myFile = f;
|
myFile = f;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
XReadLines reader = new XReadLines(f);
|
XReadLines reader = new XReadLines(f);
|
||||||
|
|
||||||
if (f.getName().toUpperCase().endsWith(".BED")) {
|
if (f.getName().toUpperCase().endsWith(".BED")) {
|
||||||
it = new PushbackIterator<GenomeLoc>( new StringToGenomeLocIteratorAdapter( reader.iterator(),
|
it = new PushbackIterator<GenomeLoc>( new StringToGenomeLocIteratorAdapter( genomeLocParser,reader.iterator(),
|
||||||
StringToGenomeLocIteratorAdapter.FORMAT.BED ) ) ;
|
StringToGenomeLocIteratorAdapter.FORMAT.BED ) ) ;
|
||||||
} else {
|
} else {
|
||||||
it = new PushbackIterator<GenomeLoc>( new StringToGenomeLocIteratorAdapter( reader.iterator(),
|
it = new PushbackIterator<GenomeLoc>( new StringToGenomeLocIteratorAdapter( genomeLocParser,reader.iterator(),
|
||||||
StringToGenomeLocIteratorAdapter.FORMAT.GATK ) ) ;
|
StringToGenomeLocIteratorAdapter.FORMAT.GATK ) ) ;
|
||||||
}
|
}
|
||||||
} catch ( FileNotFoundException e ) {
|
} catch ( FileNotFoundException e ) {
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ public class IntervalUtils {
|
||||||
* @param allowEmptyIntervalList If false instead of an empty interval list will return null.
|
* @param allowEmptyIntervalList If false instead of an empty interval list will return null.
|
||||||
* @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used.
|
* @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used.
|
||||||
*/
|
*/
|
||||||
public static List<GenomeLoc> parseIntervalArguments(List<String> argList, boolean allowEmptyIntervalList) {
|
public static List<GenomeLoc> parseIntervalArguments(GenomeLocParser parser, List<String> argList, boolean allowEmptyIntervalList) {
|
||||||
List<GenomeLoc> rawIntervals = new ArrayList<GenomeLoc>(); // running list of raw GenomeLocs
|
List<GenomeLoc> rawIntervals = new ArrayList<GenomeLoc>(); // running list of raw GenomeLocs
|
||||||
|
|
||||||
if (argList != null) { // now that we can be in this function if only the ROD-to-Intervals was provided, we need to
|
if (argList != null) { // now that we can be in this function if only the ROD-to-Intervals was provided, we need to
|
||||||
|
|
@ -51,7 +51,7 @@ public class IntervalUtils {
|
||||||
// if it's a file, add items to raw interval list
|
// if it's a file, add items to raw interval list
|
||||||
if (isIntervalFile(fileOrInterval)) {
|
if (isIntervalFile(fileOrInterval)) {
|
||||||
try {
|
try {
|
||||||
rawIntervals.addAll(GenomeLocParser.intervalFileToList(fileOrInterval, allowEmptyIntervalList));
|
rawIntervals.addAll(parser.intervalFileToList(fileOrInterval, allowEmptyIntervalList));
|
||||||
}
|
}
|
||||||
catch (Exception e) {
|
catch (Exception e) {
|
||||||
throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in either format.", e);
|
throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in either format.", e);
|
||||||
|
|
@ -60,7 +60,7 @@ public class IntervalUtils {
|
||||||
|
|
||||||
// otherwise treat as an interval -> parse and add to raw interval list
|
// otherwise treat as an interval -> parse and add to raw interval list
|
||||||
else {
|
else {
|
||||||
rawIntervals.add(GenomeLocParser.parseGenomeInterval(fileOrInterval));
|
rawIntervals.add(parser.parseGenomeInterval(fileOrInterval));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -121,13 +121,13 @@ public class IntervalUtils {
|
||||||
* @param mergingRule A descriptor for the type of merging to perform.
|
* @param mergingRule A descriptor for the type of merging to perform.
|
||||||
* @return A sorted, merged version of the intervals passed in.
|
* @return A sorted, merged version of the intervals passed in.
|
||||||
*/
|
*/
|
||||||
public static GenomeLocSortedSet sortAndMergeIntervals(List<GenomeLoc> intervals, IntervalMergingRule mergingRule) {
|
public static GenomeLocSortedSet sortAndMergeIntervals(GenomeLocParser parser, List<GenomeLoc> intervals, IntervalMergingRule mergingRule) {
|
||||||
// sort raw interval list
|
// sort raw interval list
|
||||||
Collections.sort(intervals);
|
Collections.sort(intervals);
|
||||||
// now merge raw interval list
|
// now merge raw interval list
|
||||||
intervals = GenomeLocParser.mergeIntervalLocations(intervals, mergingRule);
|
intervals = parser.mergeIntervalLocations(intervals, mergingRule);
|
||||||
|
|
||||||
return GenomeLocSortedSet.createSetFromList(intervals);
|
return GenomeLocSortedSet.createSetFromList(parser,intervals);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -90,10 +90,6 @@ public class ExtendedEventPileupElement extends PileupElement {
|
||||||
|
|
||||||
public Type getType() { return type; }
|
public Type getType() { return type; }
|
||||||
|
|
||||||
public GenomeLoc getLocation() {
|
|
||||||
return GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart()+offset, read.getAlignmentStart()+offset+eventLength);
|
|
||||||
}
|
|
||||||
|
|
||||||
// The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with
|
// The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with
|
||||||
// a valid base quality. The following code attempts to compensate for that.'
|
// a valid base quality. The following code attempts to compensate for that.'
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,6 @@
|
||||||
package org.broadinstitute.sting.utils.sam;
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.*;
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import net.sf.samtools.SAMRecordIterator;
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
|
||||||
|
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
|
|
@ -31,17 +28,23 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class ArtificialSAMFileReader extends SAMFileReader {
|
public class ArtificialSAMFileReader extends SAMFileReader {
|
||||||
|
/**
|
||||||
|
* The parser, for GenomeLocs.
|
||||||
|
*/
|
||||||
|
private final GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Backing data store of reads.
|
* Backing data store of reads.
|
||||||
*/
|
*/
|
||||||
private List<SAMRecord> reads = null;
|
private final List<SAMRecord> reads;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct an artificial SAM file reader.
|
* Construct an artificial SAM file reader.
|
||||||
* @param reads Reads to use as backing data source.
|
* @param reads Reads to use as backing data source.
|
||||||
*/
|
*/
|
||||||
public ArtificialSAMFileReader(SAMRecord... reads) {
|
public ArtificialSAMFileReader(SAMSequenceDictionary sequenceDictionary,SAMRecord... reads) {
|
||||||
super( createEmptyInputStream(),true );
|
super( createEmptyInputStream(),true );
|
||||||
|
this.genomeLocParser = new GenomeLocParser(sequenceDictionary);
|
||||||
this.reads = Arrays.asList(reads);
|
this.reads = Arrays.asList(reads);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -50,11 +53,11 @@ public class ArtificialSAMFileReader extends SAMFileReader {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public SAMRecordIterator query(final String sequence, final int start, final int end, final boolean contained) {
|
public SAMRecordIterator query(final String sequence, final int start, final int end, final boolean contained) {
|
||||||
GenomeLoc region = GenomeLocParser.createGenomeLoc(sequence, start, end);
|
GenomeLoc region = genomeLocParser.createGenomeLoc(sequence, start, end);
|
||||||
List<SAMRecord> coveredSubset = new ArrayList<SAMRecord>();
|
List<SAMRecord> coveredSubset = new ArrayList<SAMRecord>();
|
||||||
|
|
||||||
for( SAMRecord read: reads ) {
|
for( SAMRecord read: reads ) {
|
||||||
GenomeLoc readPosition = GenomeLocParser.createGenomeLoc(read);
|
GenomeLoc readPosition = genomeLocParser.createGenomeLoc(read);
|
||||||
if( contained && region.containsP(readPosition) ) coveredSubset.add(read);
|
if( contained && region.containsP(readPosition) ) coveredSubset.add(read);
|
||||||
else if( !contained && readPosition.overlapsP(region) ) coveredSubset.add(read);
|
else if( !contained && readPosition.overlapsP(region) ) coveredSubset.add(read);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue